diff options
Diffstat (limited to 'src/gallium/drivers')
87 files changed, 9530 insertions, 1057 deletions
diff --git a/src/gallium/drivers/cell/ppu/cell_context.c b/src/gallium/drivers/cell/ppu/cell_context.c index 411f204f15a..143eca848f1 100644 --- a/src/gallium/drivers/cell/ppu/cell_context.c +++ b/src/gallium/drivers/cell/ppu/cell_context.c @@ -88,14 +88,14 @@ cell_draw_create(struct cell_context *cell) static const struct debug_named_value cell_debug_flags[] = { - {"checker", CELL_DEBUG_CHECKER},/**< modulate tile clear color by SPU ID */ - {"asm", CELL_DEBUG_ASM}, /**< dump SPU asm code */ - {"sync", CELL_DEBUG_SYNC}, /**< SPUs do synchronous DMA */ - {"fragops", CELL_DEBUG_FRAGMENT_OPS}, /**< SPUs emit fragment ops debug messages*/ - {"fragopfallback", CELL_DEBUG_FRAGMENT_OP_FALLBACK}, /**< SPUs use reference implementation for fragment ops*/ - {"cmd", CELL_DEBUG_CMD}, /**< SPUs dump command buffer info */ - {"cache", CELL_DEBUG_CACHE}, /**< report texture cache stats on exit */ - {NULL, 0} + {"checker", CELL_DEBUG_CHECKER, NULL},/**< modulate tile clear color by SPU ID */ + {"asm", CELL_DEBUG_ASM, NULL}, /**< dump SPU asm code */ + {"sync", CELL_DEBUG_SYNC, NULL}, /**< SPUs do synchronous DMA */ + {"fragops", CELL_DEBUG_FRAGMENT_OPS, NULL}, /**< SPUs emit fragment ops debug messages*/ + {"fragopfallback", CELL_DEBUG_FRAGMENT_OP_FALLBACK, NULL}, /**< SPUs use reference implementation for fragment ops*/ + {"cmd", CELL_DEBUG_CMD, NULL}, /**< SPUs dump command buffer info */ + {"cache", CELL_DEBUG_CACHE, NULL}, /**< report texture cache stats on exit */ + DEBUG_NAMED_VALUE_END }; static unsigned int diff --git a/src/gallium/drivers/i915/i915_screen.c b/src/gallium/drivers/i915/i915_screen.c index 3e2d0824ff6..f82426520cd 100644 --- a/src/gallium/drivers/i915/i915_screen.c +++ b/src/gallium/drivers/i915/i915_screen.c @@ -55,7 +55,7 @@ i915_get_name(struct pipe_screen *screen) static char buffer[128]; const char *chipset; - switch (i915_screen(screen)->pci_id) { + switch (i915_screen(screen)->iws->pci_id) { case PCI_CHIP_I915_G: chipset = "915G"; break; @@ -280,14 +280,14 @@ i915_destroy_screen(struct pipe_screen *screen) * Create a new i915_screen object */ struct pipe_screen * -i915_create_screen(struct i915_winsys *iws, uint pci_id) +i915_screen_create(struct i915_winsys *iws) { struct i915_screen *is = CALLOC_STRUCT(i915_screen); if (!is) return NULL; - switch (pci_id) { + switch (iws->pci_id) { case PCI_CHIP_I915_G: case PCI_CHIP_I915_GM: is->is_i945 = FALSE; @@ -304,12 +304,11 @@ i915_create_screen(struct i915_winsys *iws, uint pci_id) default: debug_printf("%s: unknown pci id 0x%x, cannot create screen\n", - __FUNCTION__, pci_id); + __FUNCTION__, iws->pci_id); FREE(is); return NULL; } - is->pci_id = pci_id; is->iws = iws; is->base.winsys = NULL; diff --git a/src/gallium/drivers/i915/i915_screen.h b/src/gallium/drivers/i915/i915_screen.h index 7f9e02fc0f6..0c4186c68ee 100644 --- a/src/gallium/drivers/i915/i915_screen.h +++ b/src/gallium/drivers/i915/i915_screen.h @@ -45,7 +45,6 @@ struct i915_screen struct i915_winsys *iws; boolean is_i945; - uint pci_id; }; /** diff --git a/src/gallium/drivers/i915/i915_winsys.h b/src/gallium/drivers/i915/i915_winsys.h index 8a6f579ad97..3aba19fe6a3 100644 --- a/src/gallium/drivers/i915/i915_winsys.h +++ b/src/gallium/drivers/i915/i915_winsys.h @@ -81,6 +81,8 @@ struct i915_winsys_batchbuffer { struct i915_winsys { + unsigned pci_id; /**< PCI ID for the device */ + /** * Batchbuffer functions. */ @@ -224,7 +226,7 @@ struct i915_winsys { /** * Create i915 pipe_screen. */ -struct pipe_screen *i915_create_screen(struct i915_winsys *iws, unsigned pci_id); +struct pipe_screen *i915_screen_create(struct i915_winsys *iws); #endif diff --git a/src/gallium/drivers/i965/brw_screen.c b/src/gallium/drivers/i965/brw_screen.c index 36b0eb6954e..50a446db917 100644 --- a/src/gallium/drivers/i965/brw_screen.c +++ b/src/gallium/drivers/i965/brw_screen.c @@ -39,38 +39,38 @@ #ifdef DEBUG static const struct debug_named_value debug_names[] = { - { "tex", DEBUG_TEXTURE}, - { "state", DEBUG_STATE}, - { "ioctl", DEBUG_IOCTL}, - { "blit", DEBUG_BLIT}, - { "curbe", DEBUG_CURBE}, - { "fall", DEBUG_FALLBACKS}, - { "verb", DEBUG_VERBOSE}, - { "bat", DEBUG_BATCH}, - { "pix", DEBUG_PIXEL}, - { "wins", DEBUG_WINSYS}, - { "min", DEBUG_MIN_URB}, - { "dis", DEBUG_DISASSEM}, - { "sync", DEBUG_SYNC}, - { "prim", DEBUG_PRIMS }, - { "vert", DEBUG_VERTS }, - { "dma", DEBUG_DMA }, - { "san", DEBUG_SANITY }, - { "sleep", DEBUG_SLEEP }, - { "stats", DEBUG_STATS }, - { "sing", DEBUG_SINGLE_THREAD }, - { "thre", DEBUG_SINGLE_THREAD }, - { "wm", DEBUG_WM }, - { "urb", DEBUG_URB }, - { "vs", DEBUG_VS }, - { NULL, 0 } + { "tex", DEBUG_TEXTURE, NULL }, + { "state", DEBUG_STATE, NULL }, + { "ioctl", DEBUG_IOCTL, NULL }, + { "blit", DEBUG_BLIT, NULL }, + { "curbe", DEBUG_CURBE, NULL }, + { "fall", DEBUG_FALLBACKS, NULL }, + { "verb", DEBUG_VERBOSE, NULL }, + { "bat", DEBUG_BATCH, NULL }, + { "pix", DEBUG_PIXEL, NULL }, + { "wins", DEBUG_WINSYS, NULL }, + { "min", DEBUG_MIN_URB, NULL }, + { "dis", DEBUG_DISASSEM, NULL }, + { "sync", DEBUG_SYNC, NULL }, + { "prim", DEBUG_PRIMS, NULL }, + { "vert", DEBUG_VERTS, NULL }, + { "dma", DEBUG_DMA, NULL }, + { "san", DEBUG_SANITY, NULL }, + { "sleep", DEBUG_SLEEP, NULL }, + { "stats", DEBUG_STATS, NULL }, + { "sing", DEBUG_SINGLE_THREAD, NULL }, + { "thre", DEBUG_SINGLE_THREAD, NULL }, + { "wm", DEBUG_WM, NULL }, + { "urb", DEBUG_URB, NULL }, + { "vs", DEBUG_VS, NULL }, + DEBUG_NAMED_VALUE_END }; static const struct debug_named_value dump_names[] = { - { "asm", DUMP_ASM}, - { "state", DUMP_STATE}, - { "batch", DUMP_BATCH}, - { NULL, 0 } + { "asm", DUMP_ASM, NULL }, + { "state", DUMP_STATE, NULL }, + { "batch", DUMP_BATCH, NULL }, + DEBUG_NAMED_VALUE_END }; int BRW_DEBUG = 0; diff --git a/src/gallium/drivers/llvmpipe/README b/src/gallium/drivers/llvmpipe/README index 9c874acdedc..8b5539d2c51 100644 --- a/src/gallium/drivers/llvmpipe/README +++ b/src/gallium/drivers/llvmpipe/README @@ -82,9 +82,10 @@ Requirements - scons (optional) - - udis86, http://udis86.sourceforge.net/ (optional): + - udis86, http://udis86.sourceforge.net/ (optional). My personal repository + supports more opcodes which haven't been merged upstream yet: - git clone git://udis86.git.sourceforge.net/gitroot/udis86/udis86 + git clone git://anongit.freedesktop.org/~jrfonseca/udis86 cd udis86 ./autogen.sh ./configure --with-pic diff --git a/src/gallium/drivers/llvmpipe/lp_bld_interp.c b/src/gallium/drivers/llvmpipe/lp_bld_interp.c index 838691e14b0..90d2b26f9f2 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_interp.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_interp.c @@ -37,7 +37,7 @@ #include "util/u_debug.h" #include "util/u_memory.h" #include "util/u_math.h" -#include "tgsi/tgsi_parse.h" +#include "tgsi/tgsi_scan.h" #include "gallivm/lp_bld_debug.h" #include "gallivm/lp_bld_const.h" #include "gallivm/lp_bld_arit.h" @@ -75,6 +75,10 @@ */ +static const unsigned char quad_offset_x[4] = {0, 1, 0, 1}; +static const unsigned char quad_offset_y[4] = {0, 0, 1, 1}; + + static void attrib_name(LLVMValueRef val, unsigned attrib, unsigned chan, const char *suffix) { @@ -95,104 +99,143 @@ coeffs_init(struct lp_build_interp_soa_context *bld, LLVMValueRef dadx_ptr, LLVMValueRef dady_ptr) { - LLVMBuilderRef builder = bld->base.builder; + struct lp_build_context *coeff_bld = &bld->coeff_bld; + LLVMBuilderRef builder = coeff_bld->builder; + LLVMValueRef zero = LLVMConstNull(coeff_bld->elem_type); + LLVMValueRef one = LLVMConstReal(coeff_bld->elem_type, 1.0); + LLVMValueRef i0 = LLVMConstInt(LLVMInt32Type(), 0, 0); + LLVMValueRef i1 = LLVMConstInt(LLVMInt32Type(), 1, 0); + LLVMValueRef i2 = LLVMConstInt(LLVMInt32Type(), 2, 0); + LLVMValueRef i3 = LLVMConstInt(LLVMInt32Type(), 3, 0); + LLVMValueRef oow = NULL; unsigned attrib; unsigned chan; - for(attrib = 0; attrib < bld->num_attribs; ++attrib) { + /* TODO: Use more vector operations */ + + for (attrib = 0; attrib < bld->num_attribs; ++attrib) { const unsigned mask = bld->mask[attrib]; const unsigned interp = bld->interp[attrib]; - for(chan = 0; chan < NUM_CHANNELS; ++chan) { - if(mask & (1 << chan)) { + for (chan = 0; chan < NUM_CHANNELS; ++chan) { + if (mask & (1 << chan)) { LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), attrib*NUM_CHANNELS + chan, 0); - LLVMValueRef a0 = NULL; - LLVMValueRef dadx = NULL; - LLVMValueRef dady = NULL; - - switch( interp ) { - case TGSI_INTERPOLATE_PERSPECTIVE: + LLVMValueRef a0 = zero; + LLVMValueRef dadx = zero; + LLVMValueRef dady = zero; + LLVMValueRef dadxy = zero; + LLVMValueRef dadq; + LLVMValueRef dadq2; + LLVMValueRef a; + + switch (interp) { + case LP_INTERP_PERSPECTIVE: /* fall-through */ - case TGSI_INTERPOLATE_LINEAR: - dadx = LLVMBuildLoad(builder, LLVMBuildGEP(builder, dadx_ptr, &index, 1, ""), ""); - dady = LLVMBuildLoad(builder, LLVMBuildGEP(builder, dady_ptr, &index, 1, ""), ""); - dadx = lp_build_broadcast_scalar(&bld->base, dadx); - dady = lp_build_broadcast_scalar(&bld->base, dady); - attrib_name(dadx, attrib, chan, ".dadx"); - attrib_name(dady, attrib, chan, ".dady"); + case LP_INTERP_LINEAR: + if (attrib == 0 && chan == 0) { + dadxy = dadx = one; + } + else if (attrib == 0 && chan == 1) { + dadxy = dady = one; + } + else { + dadx = LLVMBuildLoad(builder, LLVMBuildGEP(builder, dadx_ptr, &index, 1, ""), ""); + dady = LLVMBuildLoad(builder, LLVMBuildGEP(builder, dady_ptr, &index, 1, ""), ""); + dadxy = LLVMBuildAdd(builder, dadx, dady, ""); + attrib_name(dadx, attrib, chan, ".dadx"); + attrib_name(dady, attrib, chan, ".dady"); + attrib_name(dadxy, attrib, chan, ".dadxy"); + } /* fall-through */ - case TGSI_INTERPOLATE_CONSTANT: + case LP_INTERP_CONSTANT: + case LP_INTERP_FACING: a0 = LLVMBuildLoad(builder, LLVMBuildGEP(builder, a0_ptr, &index, 1, ""), ""); - a0 = lp_build_broadcast_scalar(&bld->base, a0); attrib_name(a0, attrib, chan, ".a0"); break; + case LP_INTERP_POSITION: + /* Nothing to do as the position coeffs are already setup in slot 0 */ + continue; + default: assert(0); break; } - bld->a0 [attrib][chan] = a0; - bld->dadx[attrib][chan] = dadx; - bld->dady[attrib][chan] = dady; - } - } - } -} + /* + * dadq = {0, dadx, dady, dadx + dady} + */ + dadq = coeff_bld->undef; + dadq = LLVMBuildInsertElement(builder, dadq, zero, i0, ""); + dadq = LLVMBuildInsertElement(builder, dadq, dadx, i1, ""); + dadq = LLVMBuildInsertElement(builder, dadq, dady, i2, ""); + dadq = LLVMBuildInsertElement(builder, dadq, dadxy, i3, ""); -/** - * Emit LLVM code to compute the fragment shader input attribute values. - * For example, for a color input, we'll compute red, green, blue and alpha - * values for the four pixels in a quad. - * Recall that we're operating on 4-element vectors so each arithmetic - * operation is operating on the four pixels in a quad. - */ -static void -attribs_init(struct lp_build_interp_soa_context *bld) -{ - LLVMValueRef x = bld->pos[0]; - LLVMValueRef y = bld->pos[1]; - LLVMValueRef oow = NULL; - unsigned attrib; - unsigned chan; + /* + * dadq2 = 2 * dq + */ - for(attrib = 0; attrib < bld->num_attribs; ++attrib) { - const unsigned mask = bld->mask[attrib]; - const unsigned interp = bld->interp[attrib]; - for(chan = 0; chan < NUM_CHANNELS; ++chan) { - if(mask & (1 << chan)) { - LLVMValueRef a0 = bld->a0 [attrib][chan]; - LLVMValueRef dadx = bld->dadx[attrib][chan]; - LLVMValueRef dady = bld->dady[attrib][chan]; - LLVMValueRef res; - - res = a0; - - if (interp != TGSI_INTERPOLATE_CONSTANT) { - /* res = res + x * dadx */ - res = lp_build_add(&bld->base, res, lp_build_mul(&bld->base, x, dadx)); - /* res = res + y * dady */ - res = lp_build_add(&bld->base, res, lp_build_mul(&bld->base, y, dady)); + dadq2 = LLVMBuildAdd(builder, dadq, dadq, ""); + + /* + * a = a0 + x * dadx + y * dady + */ + + if (attrib == 0 && chan == 0) { + a = bld->x; + } + else if (attrib == 0 && chan == 1) { + a = bld->y; + } + else { + a = a0; + if (interp != LP_INTERP_CONSTANT && + interp != LP_INTERP_FACING) { + a = LLVMBuildAdd(builder, a, + LLVMBuildMul(builder, bld->x, dadx, ""), + ""); + a = LLVMBuildAdd(builder, a, + LLVMBuildMul(builder, bld->y, dady, ""), + ""); + } } - /* Keep the value of the attribute before perspective divide - * for faster updates. + /* + * a = {a, a, a, a} + */ + + a = lp_build_broadcast(builder, coeff_bld->vec_type, a); + + /* + * Compute the attrib values on the upper-left corner of each quad. + */ + + a = LLVMBuildAdd(builder, a, dadq2, ""); + + /* + * a *= 1 / w + * dadq *= 1 / w */ - bld->attribs_pre[attrib][chan] = res; - if (interp == TGSI_INTERPOLATE_PERSPECTIVE) { - LLVMValueRef w = bld->pos[3]; + if (interp == LP_INTERP_PERSPECTIVE) { + LLVMValueRef w = bld->a[0][3]; assert(attrib != 0); - if(!oow) - oow = lp_build_rcp(&bld->base, w); - res = lp_build_mul(&bld->base, res, oow); + assert(bld->mask[0] & TGSI_WRITEMASK_W); + if (!oow) { + oow = lp_build_rcp(coeff_bld, w); + lp_build_name(oow, "oow"); + } + a = lp_build_mul(coeff_bld, a, oow); + dadq = lp_build_mul(coeff_bld, dadq, oow); } - attrib_name(res, attrib, chan, ""); + attrib_name(a, attrib, chan, ".a"); + attrib_name(dadq, attrib, chan, ".dadq"); - bld->attribs[attrib][chan] = res; + bld->a [attrib][chan] = a; + bld->dadq[attrib][chan] = dadq; } } } @@ -206,7 +249,8 @@ attribs_init(struct lp_build_interp_soa_context *bld) static void attribs_update(struct lp_build_interp_soa_context *bld, int quad_index) { - LLVMValueRef oow = NULL; + struct lp_build_context *coeff_bld = &bld->coeff_bld; + LLVMValueRef shuffle = lp_build_const_int_vec(coeff_bld->type, quad_index); unsigned attrib; unsigned chan; @@ -215,42 +259,36 @@ attribs_update(struct lp_build_interp_soa_context *bld, int quad_index) for(attrib = 0; attrib < bld->num_attribs; ++attrib) { const unsigned mask = bld->mask[attrib]; const unsigned interp = bld->interp[attrib]; + for(chan = 0; chan < NUM_CHANNELS; ++chan) { + if(mask & (1 << chan)) { + LLVMValueRef a = coeff_bld->undef; + if (interp == LP_INTERP_CONSTANT || + interp == LP_INTERP_FACING) { + a = bld->a[attrib][chan]; + } + else if (interp == LP_INTERP_POSITION) { + assert(attrib > 0); + a = bld->attribs[0][chan]; + } + else { + a = bld->a[attrib][chan]; - if (interp != TGSI_INTERPOLATE_CONSTANT) { - for(chan = 0; chan < NUM_CHANNELS; ++chan) { - if(mask & (1 << chan)) { - LLVMValueRef dadx = bld->dadx[attrib][chan]; - LLVMValueRef dady = bld->dady[attrib][chan]; - LLVMValueRef res; - - res = bld->attribs_pre[attrib][chan]; + /* + * Broadcast the attribute value for this quad into all elements + */ - if (quad_index == 1 || quad_index == 3) { - /* top-right or bottom-right quad */ - /* build res = res + dadx + dadx */ - res = lp_build_add(&bld->base, res, dadx); - res = lp_build_add(&bld->base, res, dadx); - } + a = LLVMBuildShuffleVector(coeff_bld->builder, + a, coeff_bld->undef, shuffle, ""); - if (quad_index == 2 || quad_index == 3) { - /* bottom-left or bottom-right quad */ - /* build res = res + dady + dady */ - res = lp_build_add(&bld->base, res, dady); - res = lp_build_add(&bld->base, res, dady); - } - - if (interp == TGSI_INTERPOLATE_PERSPECTIVE) { - LLVMValueRef w = bld->pos[3]; - assert(attrib != 0); - if(!oow) - oow = lp_build_rcp(&bld->base, w); - res = lp_build_mul(&bld->base, res, oow); - } + /* + * Add the derivatives + */ - attrib_name(res, attrib, chan, ""); + a = lp_build_add(coeff_bld, a, bld->dadq[attrib][chan]); - bld->attribs[attrib][chan] = res; + attrib_name(a, attrib, chan, ""); } + bld->attribs[attrib][chan] = a; } } } @@ -260,53 +298,17 @@ attribs_update(struct lp_build_interp_soa_context *bld, int quad_index) /** * Generate the position vectors. * - * Parameter x0, y0 are the integer values with the quad upper left coordinates. + * Parameter x0, y0 are the integer values with upper left coordinates. */ static void pos_init(struct lp_build_interp_soa_context *bld, LLVMValueRef x0, LLVMValueRef y0) { - lp_build_name(x0, "pos.x"); - lp_build_name(y0, "pos.y"); + struct lp_build_context *coeff_bld = &bld->coeff_bld; - bld->attribs[0][0] = x0; - bld->attribs[0][1] = y0; -} - - -/** - * Update quad position values when moving to the next quad. - */ -static void -pos_update(struct lp_build_interp_soa_context *bld, int quad_index) -{ - LLVMValueRef x = bld->attribs[0][0]; - LLVMValueRef y = bld->attribs[0][1]; - const int xstep = 2, ystep = 2; - - if (quad_index == 1 || quad_index == 3) { - /* top-right or bottom-right quad in block */ - /* build x += xstep */ - x = lp_build_add(&bld->base, x, - lp_build_const_vec(bld->base.type, xstep)); - } - - if (quad_index == 2) { - /* bottom-left quad in block */ - /* build y += ystep */ - y = lp_build_add(&bld->base, y, - lp_build_const_vec(bld->base.type, ystep)); - /* build x -= xstep */ - x = lp_build_sub(&bld->base, x, - lp_build_const_vec(bld->base.type, xstep)); - } - - lp_build_name(x, "pos.x"); - lp_build_name(y, "pos.y"); - - bld->attribs[0][0] = x; - bld->attribs[0][1] = y; + bld->x = LLVMBuildSIToFP(coeff_bld->builder, x0, coeff_bld->elem_type, ""); + bld->y = LLVMBuildSIToFP(coeff_bld->builder, y0, coeff_bld->elem_type, ""); } @@ -315,8 +317,8 @@ pos_update(struct lp_build_interp_soa_context *bld, int quad_index) */ void lp_build_interp_soa_init(struct lp_build_interp_soa_context *bld, - const struct tgsi_token *tokens, - boolean flatshade, + unsigned num_inputs, + const struct lp_shader_input *inputs, LLVMBuilderRef builder, struct lp_type type, LLVMValueRef a0_ptr, @@ -325,12 +327,22 @@ lp_build_interp_soa_init(struct lp_build_interp_soa_context *bld, LLVMValueRef x0, LLVMValueRef y0) { - struct tgsi_parse_context parse; - struct tgsi_full_declaration *decl; + struct lp_type coeff_type; + unsigned attrib; + unsigned chan; memset(bld, 0, sizeof *bld); - lp_build_context_init(&bld->base, builder, type); + memset(&coeff_type, 0, sizeof coeff_type); + coeff_type.floating = TRUE; + coeff_type.sign = TRUE; + coeff_type.width = 32; + coeff_type.length = QUAD_SIZE; + + /* XXX: we don't support interpolating into any other types */ + assert(memcmp(&coeff_type, &type, sizeof coeff_type) == 0); + + lp_build_context_init(&bld->coeff_bld, builder, coeff_type); /* For convenience */ bld->pos = bld->attribs[0]; @@ -338,58 +350,28 @@ lp_build_interp_soa_init(struct lp_build_interp_soa_context *bld, /* Position */ bld->num_attribs = 1; - bld->mask[0] = TGSI_WRITEMASK_ZW; - bld->interp[0] = TGSI_INTERPOLATE_LINEAR; + bld->mask[0] = TGSI_WRITEMASK_XYZW; + bld->interp[0] = LP_INTERP_LINEAR; /* Inputs */ - tgsi_parse_init( &parse, tokens ); - while( !tgsi_parse_end_of_tokens( &parse ) ) { - tgsi_parse_token( &parse ); - - switch( parse.FullToken.Token.Type ) { - case TGSI_TOKEN_TYPE_DECLARATION: - decl = &parse.FullToken.FullDeclaration; - if( decl->Declaration.File == TGSI_FILE_INPUT ) { - unsigned first, last, mask; - unsigned attrib; - - first = decl->Range.First; - last = decl->Range.Last; - mask = decl->Declaration.UsageMask; - - for( attrib = first; attrib <= last; ++attrib ) { - bld->mask[1 + attrib] = mask; - - /* XXX: have mesa set INTERP_CONSTANT in the fragment - * shader. - */ - if (decl->Semantic.Name == TGSI_SEMANTIC_COLOR && - flatshade) - bld->interp[1 + attrib] = TGSI_INTERPOLATE_CONSTANT; - else - bld->interp[1 + attrib] = decl->Declaration.Interpolate; - } - - bld->num_attribs = MAX2(bld->num_attribs, 1 + last + 1); - } - break; - - case TGSI_TOKEN_TYPE_INSTRUCTION: - case TGSI_TOKEN_TYPE_IMMEDIATE: - case TGSI_TOKEN_TYPE_PROPERTY: - break; + for (attrib = 0; attrib < num_inputs; ++attrib) { + bld->mask[1 + attrib] = inputs[attrib].usage_mask; + bld->interp[1 + attrib] = inputs[attrib].interp; + } + bld->num_attribs = 1 + num_inputs; - default: - assert( 0 ); + /* Ensure all masked out input channels have a valid value */ + for (attrib = 0; attrib < bld->num_attribs; ++attrib) { + for (chan = 0; chan < NUM_CHANNELS; ++chan) { + bld->attribs[attrib][chan] = bld->coeff_bld.undef; } } - tgsi_parse_free( &parse ); - - coeffs_init(bld, a0_ptr, dadx_ptr, dady_ptr); pos_init(bld, x0, y0); - attribs_init(bld); + coeffs_init(bld, a0_ptr, dadx_ptr, dady_ptr); + + attribs_update(bld, 0); } @@ -402,7 +384,5 @@ lp_build_interp_soa_update(struct lp_build_interp_soa_context *bld, { assert(quad_index < 4); - pos_update(bld, quad_index); - attribs_update(bld, quad_index); } diff --git a/src/gallium/drivers/llvmpipe/lp_bld_interp.h b/src/gallium/drivers/llvmpipe/lp_bld_interp.h index 99a432957ce..29055133011 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_interp.h +++ b/src/gallium/drivers/llvmpipe/lp_bld_interp.h @@ -46,25 +46,23 @@ #include "tgsi/tgsi_exec.h" - - -struct tgsi_token; +#include "lp_setup.h" struct lp_build_interp_soa_context { - struct lp_build_context base; + /* QUAD_SIZE x float */ + struct lp_build_context coeff_bld; unsigned num_attribs; unsigned mask[1 + PIPE_MAX_SHADER_INPUTS]; /**< TGSI_WRITE_MASK_x */ - unsigned interp[1 + PIPE_MAX_SHADER_INPUTS]; /**< TGSI_INTERPOLATE_x */ + enum lp_interp interp[1 + PIPE_MAX_SHADER_INPUTS]; - LLVMValueRef a0 [1 + PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS]; - LLVMValueRef dadx[1 + PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS]; - LLVMValueRef dady[1 + PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS]; + LLVMValueRef x; + LLVMValueRef y; - /* Attribute values before perspective divide */ - LLVMValueRef attribs_pre[1 + PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS]; + LLVMValueRef a [1 + PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS]; + LLVMValueRef dadq[1 + PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS]; LLVMValueRef attribs[1 + PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS]; @@ -78,15 +76,15 @@ struct lp_build_interp_soa_context void lp_build_interp_soa_init(struct lp_build_interp_soa_context *bld, - const struct tgsi_token *tokens, - boolean flatshade, + unsigned num_inputs, + const struct lp_shader_input *inputs, LLVMBuilderRef builder, struct lp_type type, LLVMValueRef a0_ptr, LLVMValueRef dadx_ptr, LLVMValueRef dady_ptr, - LLVMValueRef x0, - LLVMValueRef y0); + LLVMValueRef x, + LLVMValueRef y); void lp_build_interp_soa_update(struct lp_build_interp_soa_context *bld, diff --git a/src/gallium/drivers/llvmpipe/lp_context.h b/src/gallium/drivers/llvmpipe/lp_context.h index de7fe7a1796..689265fa30d 100644 --- a/src/gallium/drivers/llvmpipe/lp_context.h +++ b/src/gallium/drivers/llvmpipe/lp_context.h @@ -37,6 +37,7 @@ #include "lp_tex_sample.h" #include "lp_jit.h" +#include "lp_setup.h" struct llvmpipe_vbuf_render; @@ -90,6 +91,10 @@ struct llvmpipe_context { /** Vertex format */ struct vertex_info vertex_info; + /** Fragment shader input interpolation info */ + unsigned num_inputs; + struct lp_shader_input inputs[PIPE_MAX_SHADER_INPUTS]; + /** The tiling engine */ struct lp_setup_context *setup; diff --git a/src/gallium/drivers/llvmpipe/lp_jit.h b/src/gallium/drivers/llvmpipe/lp_jit.h index 8dee0413019..8d06e65725f 100644 --- a/src/gallium/drivers/llvmpipe/lp_jit.h +++ b/src/gallium/drivers/llvmpipe/lp_jit.h @@ -169,36 +169,6 @@ typedef void uint32_t *counter); -/** cast wrapper to avoid compiler warnings */ -static INLINE lp_jit_frag_func -cast_voidptr_to_lp_jit_frag_func(void *v) -{ - union { - void *v; - lp_jit_frag_func f; - } u; - assert(sizeof(u.v) == sizeof(u.f)); - u.v = v; - return u.f; -} - - -/** cast wrapper */ -static INLINE void * -cast_lp_jit_frag_func_to_voidptr(lp_jit_frag_func f) -{ - union { - void *v; - lp_jit_frag_func f; - } u; - assert(sizeof(u.v) == sizeof(u.f)); - u.f = f; - return u.v; -} - - - - void lp_jit_screen_cleanup(struct llvmpipe_screen *screen); diff --git a/src/gallium/drivers/llvmpipe/lp_query.c b/src/gallium/drivers/llvmpipe/lp_query.c index 080e169ea11..c4864422860 100644 --- a/src/gallium/drivers/llvmpipe/lp_query.c +++ b/src/gallium/drivers/llvmpipe/lp_query.c @@ -39,7 +39,6 @@ #include "lp_rast.h" #include "lp_rast_priv.h" #include "lp_state.h" -#include "lp_setup_context.h" static struct llvmpipe_query *llvmpipe_query( struct pipe_query *p ) diff --git a/src/gallium/drivers/llvmpipe/lp_rast.c b/src/gallium/drivers/llvmpipe/lp_rast.c index 2280c71c361..50e44dcb2b3 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.c +++ b/src/gallium/drivers/llvmpipe/lp_rast.c @@ -61,16 +61,12 @@ lp_rast_begin( struct lp_rasterizer *rast, for (i = 0; i < rast->state.nr_cbufs; i++) { struct pipe_surface *cbuf = scene->fb.cbufs[i]; - rast->cbuf[i].format = cbuf->texture->format; - rast->cbuf[i].tiles_per_row = align(cbuf->width, TILE_SIZE) / TILE_SIZE; - rast->cbuf[i].blocksize = - util_format_get_blocksize(cbuf->texture->format); - rast->cbuf[i].map = llvmpipe_resource_map(cbuf->texture, - cbuf->face, - cbuf->level, - cbuf->zslice, - LP_TEX_USAGE_READ_WRITE, - LP_TEX_LAYOUT_NONE); + llvmpipe_resource_map(cbuf->texture, + cbuf->face, + cbuf->level, + cbuf->zslice, + LP_TEX_USAGE_READ_WRITE, + LP_TEX_LAYOUT_NONE); } if (fb->zsbuf) { @@ -105,7 +101,6 @@ lp_rast_end( struct lp_rasterizer *rast ) cbuf->face, cbuf->level, cbuf->zslice); - rast->cbuf[i].map = NULL; } /* Unmap z/stencil buffer */ @@ -428,6 +423,7 @@ lp_rast_shade_tile(struct lp_rasterizer_task *task, struct lp_rasterizer *rast = task->rast; const struct lp_rast_state *state = task->current_state; const struct lp_rast_shader_inputs *inputs = arg.shade_tile; + struct lp_fragment_shader_variant *variant = state->variant; const unsigned tile_x = task->x, tile_y = task->y; unsigned x, y; @@ -449,7 +445,7 @@ lp_rast_shade_tile(struct lp_rasterizer_task *task, depth = lp_rast_get_depth_block_pointer(rast, tile_x + x, tile_y + y); /* run shader on 4x4 block */ - state->jit_function[RAST_WHOLE]( &state->jit_context, + variant->jit_function[RAST_WHOLE]( &state->jit_context, tile_x + x, tile_y + y, inputs->facing, inputs->a0, @@ -476,6 +472,7 @@ void lp_rast_shade_quads( struct lp_rasterizer_task *task, int32_t c1, int32_t c2, int32_t c3) { const struct lp_rast_state *state = task->current_state; + struct lp_fragment_shader_variant *variant = state->variant; struct lp_rasterizer *rast = task->rast; uint8_t *color[PIPE_MAX_COLOR_BUFS]; void *depth; @@ -507,7 +504,7 @@ void lp_rast_shade_quads( struct lp_rasterizer_task *task, assert(lp_check_alignment(inputs->step[2], 16)); /* run shader on 4x4 block */ - state->jit_function[RAST_EDGE_TEST]( &state->jit_context, + variant->jit_function[RAST_EDGE_TEST]( &state->jit_context, x, y, inputs->facing, inputs->a0, @@ -582,18 +579,20 @@ outline_subtiles(uint8_t *tile) static void lp_rast_tile_end(struct lp_rasterizer_task *task) { -#if DEBUG - struct lp_rasterizer *rast = task->rast; - unsigned buf; - - for (buf = 0; buf < rast->state.nr_cbufs; buf++) { - uint8_t *color = lp_rast_get_color_block_pointer(task, buf, - task->x, task->y); - - if (LP_DEBUG & DEBUG_SHOW_SUBTILES) - outline_subtiles(color); - else if (LP_DEBUG & DEBUG_SHOW_TILES) - outline_tile(color); +#ifdef DEBUG + if (LP_DEBUG & (DEBUG_SHOW_SUBTILES | DEBUG_SHOW_TILES)) { + struct lp_rasterizer *rast = task->rast; + unsigned buf; + + for (buf = 0; buf < rast->state.nr_cbufs; buf++) { + uint8_t *color = lp_rast_get_color_block_pointer(task, buf, + task->x, task->y); + + if (LP_DEBUG & DEBUG_SHOW_SUBTILES) + outline_subtiles(color); + else if (LP_DEBUG & DEBUG_SHOW_TILES) + outline_tile(color); + } } #else (void) outline_subtiles; diff --git a/src/gallium/drivers/llvmpipe/lp_rast.h b/src/gallium/drivers/llvmpipe/lp_rast.h index 757009d13f7..80ca68f5a2f 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.h +++ b/src/gallium/drivers/llvmpipe/lp_rast.h @@ -66,13 +66,9 @@ struct lp_rast_state { struct lp_jit_context jit_context; /* The shader itself. Probably we also need to pass a pointer to - * the tile color/z/stencil data somehow: - * jit_function[0] skips the triangle in/out test code - * jit_function[1] does triangle in/out testing + * the tile color/z/stencil data somehow */ - lp_jit_frag_func jit_function[2]; - - boolean opaque; + struct lp_fragment_shader_variant *variant; }; diff --git a/src/gallium/drivers/llvmpipe/lp_rast_priv.h b/src/gallium/drivers/llvmpipe/lp_rast_priv.h index efc013ff3f0..d33dd49f3a7 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast_priv.h +++ b/src/gallium/drivers/llvmpipe/lp_rast_priv.h @@ -80,13 +80,6 @@ struct lp_rasterizer /* Framebuffer stuff */ struct { - void *map; - unsigned tiles_per_row; - unsigned blocksize; - enum pipe_format format; - } cbuf[PIPE_MAX_COLOR_BUFS]; - - struct { uint8_t *map; unsigned stride; unsigned blocksize; @@ -205,6 +198,7 @@ lp_rast_shade_quads_all( struct lp_rasterizer_task *task, { struct lp_rasterizer *rast = task->rast; const struct lp_rast_state *state = task->current_state; + struct lp_fragment_shader_variant *variant = state->variant; uint8_t *color[PIPE_MAX_COLOR_BUFS]; void *depth; unsigned i; @@ -216,7 +210,7 @@ lp_rast_shade_quads_all( struct lp_rasterizer_task *task, depth = lp_rast_get_depth_block_pointer(rast, x, y); /* run shader on 4x4 block */ - state->jit_function[RAST_WHOLE]( &state->jit_context, + variant->jit_function[RAST_WHOLE]( &state->jit_context, x, y, inputs->facing, inputs->a0, diff --git a/src/gallium/drivers/llvmpipe/lp_screen.c b/src/gallium/drivers/llvmpipe/lp_screen.c index e24b86d938a..2fc5d6ecff6 100644 --- a/src/gallium/drivers/llvmpipe/lp_screen.c +++ b/src/gallium/drivers/llvmpipe/lp_screen.c @@ -50,17 +50,17 @@ int LP_DEBUG = 0; static const struct debug_named_value lp_debug_flags[] = { - { "pipe", DEBUG_PIPE }, - { "tgsi", DEBUG_TGSI }, - { "tex", DEBUG_TEX }, - { "setup", DEBUG_SETUP }, - { "rast", DEBUG_RAST }, - { "query", DEBUG_QUERY }, - { "screen", DEBUG_SCREEN }, - { "show_tiles", DEBUG_SHOW_TILES }, - { "show_subtiles", DEBUG_SHOW_SUBTILES }, - { "counters", DEBUG_COUNTERS }, - {NULL, 0} + { "pipe", DEBUG_PIPE, NULL }, + { "tgsi", DEBUG_TGSI, NULL }, + { "tex", DEBUG_TEX, NULL }, + { "setup", DEBUG_SETUP, NULL }, + { "rast", DEBUG_RAST, NULL }, + { "query", DEBUG_QUERY, NULL }, + { "screen", DEBUG_SCREEN, NULL }, + { "show_tiles", DEBUG_SHOW_TILES, NULL }, + { "show_subtiles", DEBUG_SHOW_SUBTILES, NULL }, + { "counters", DEBUG_COUNTERS, NULL }, + DEBUG_NAMED_VALUE_END }; #endif @@ -220,7 +220,8 @@ llvmpipe_is_format_supported( struct pipe_screen *_screen, if (!format_desc) return FALSE; - assert(target == PIPE_TEXTURE_1D || + assert(target == PIPE_BUFFER || + target == PIPE_TEXTURE_1D || target == PIPE_TEXTURE_2D || target == PIPE_TEXTURE_3D || target == PIPE_TEXTURE_CUBE); diff --git a/src/gallium/drivers/llvmpipe/lp_setup.c b/src/gallium/drivers/llvmpipe/lp_setup.c index 1970173af4c..8556dc87b44 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.c +++ b/src/gallium/drivers/llvmpipe/lp_setup.c @@ -472,18 +472,14 @@ lp_setup_set_fs_inputs( struct lp_setup_context *setup, } void -lp_setup_set_fs_functions( struct lp_setup_context *setup, - lp_jit_frag_func jit_function0, - lp_jit_frag_func jit_function1, - boolean opaque ) +lp_setup_set_fs_variant( struct lp_setup_context *setup, + struct lp_fragment_shader_variant *variant) { LP_DBG(DEBUG_SETUP, "%s %p\n", __FUNCTION__, - cast_lp_jit_frag_func_to_voidptr(jit_function0)); + variant); /* FIXME: reference count */ - setup->fs.current.jit_function[0] = jit_function0; - setup->fs.current.jit_function[1] = jit_function1; - setup->fs.current.opaque = opaque; + setup->fs.current.variant = variant; setup->dirty |= LP_SETUP_NEW_FS; } @@ -682,7 +678,7 @@ lp_setup_update_state( struct lp_setup_context *setup ) scene = lp_setup_get_current_scene(setup); - assert(setup->fs.current.jit_function); + assert(setup->fs.current.variant); /* Some of the 'draw' pipeline stages may have changed some driver state. * Make sure we've processed those state changes before anything else. diff --git a/src/gallium/drivers/llvmpipe/lp_setup.h b/src/gallium/drivers/llvmpipe/lp_setup.h index 10db03b9c69..6a0dc551290 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.h +++ b/src/gallium/drivers/llvmpipe/lp_setup.h @@ -41,8 +41,11 @@ enum lp_interp { LP_INTERP_FACING }; -/* Describes how to generate all the fragment shader inputs from the - * the vertices passed into our triangle/line/point functions. + +/** + * Describes how to compute the interpolation coefficients (a0, dadx, dady) + * from the vertices passed into our triangle/line/point functions by the + * draw module. * * Vertices are treated as an array of float[4] values, indexed by * src_index. @@ -50,6 +53,7 @@ enum lp_interp { struct lp_shader_input { enum lp_interp interp; /* how to interpolate values */ unsigned src_index; /* where to find values in incoming vertices */ + unsigned usage_mask; /* bitmask of TGSI_WRITEMASK_x flags */ }; struct pipe_resource; @@ -58,7 +62,7 @@ struct pipe_surface; struct pipe_blend_color; struct pipe_screen; struct pipe_framebuffer_state; -struct lp_fragment_shader; +struct lp_fragment_shader_variant; struct lp_jit_context; struct llvmpipe_query; @@ -100,10 +104,8 @@ lp_setup_set_fs_inputs( struct lp_setup_context *setup, unsigned nr ); void -lp_setup_set_fs_functions( struct lp_setup_context *setup, - lp_jit_frag_func jit_function0, - lp_jit_frag_func jit_function1, - boolean opaque ); +lp_setup_set_fs_variant( struct lp_setup_context *setup, + struct lp_fragment_shader_variant *variant ); void lp_setup_set_fs_constants(struct lp_setup_context *setup, diff --git a/src/gallium/drivers/llvmpipe/lp_setup_tri.c b/src/gallium/drivers/llvmpipe/lp_setup_tri.c index 306cb6e27d2..0557d35f8b1 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_setup_tri.c @@ -34,6 +34,7 @@ #include "lp_perf.h" #include "lp_setup_context.h" #include "lp_rast.h" +#include "lp_state_fs.h" #define NUM_CHANNELS 4 @@ -146,20 +147,32 @@ setup_fragcoord_coef(struct lp_setup_context *setup, unsigned slot, const float (*v1)[4], const float (*v2)[4], - const float (*v3)[4]) + const float (*v3)[4], + unsigned usage_mask) { /*X*/ - tri->inputs.a0[slot][0] = 0.0; - tri->inputs.dadx[slot][0] = 1.0; - tri->inputs.dady[slot][0] = 0.0; + if (usage_mask & TGSI_WRITEMASK_X) { + tri->inputs.a0[slot][0] = 0.0; + tri->inputs.dadx[slot][0] = 1.0; + tri->inputs.dady[slot][0] = 0.0; + } + /*Y*/ - tri->inputs.a0[slot][1] = 0.0; - tri->inputs.dadx[slot][1] = 0.0; - tri->inputs.dady[slot][1] = 1.0; + if (usage_mask & TGSI_WRITEMASK_Y) { + tri->inputs.a0[slot][1] = 0.0; + tri->inputs.dadx[slot][1] = 0.0; + tri->inputs.dady[slot][1] = 1.0; + } + /*Z*/ - linear_coef(setup, tri, oneoverarea, slot, v1, v2, v3, 0, 2); + if (usage_mask & TGSI_WRITEMASK_Z) { + linear_coef(setup, tri, oneoverarea, slot, v1, v2, v3, 0, 2); + } + /*W*/ - linear_coef(setup, tri, oneoverarea, slot, v1, v2, v3, 0, 3); + if (usage_mask & TGSI_WRITEMASK_W) { + linear_coef(setup, tri, oneoverarea, slot, v1, v2, v3, 0, 3); + } } @@ -170,13 +183,21 @@ setup_fragcoord_coef(struct lp_setup_context *setup, static void setup_facing_coef( struct lp_setup_context *setup, struct lp_rast_triangle *tri, unsigned slot, - boolean frontface ) + boolean frontface, + unsigned usage_mask) { /* convert TRUE to 1.0 and FALSE to -1.0 */ - constant_coef( setup, tri, slot, 2.0f * frontface - 1.0f, 0 ); - constant_coef( setup, tri, slot, 0.0f, 1 ); /* wasted */ - constant_coef( setup, tri, slot, 0.0f, 2 ); /* wasted */ - constant_coef( setup, tri, slot, 0.0f, 3 ); /* wasted */ + if (usage_mask & TGSI_WRITEMASK_X) + constant_coef( setup, tri, slot, 2.0f * frontface - 1.0f, 0 ); + + if (usage_mask & TGSI_WRITEMASK_Y) + constant_coef( setup, tri, slot, 0.0f, 1 ); /* wasted */ + + if (usage_mask & TGSI_WRITEMASK_Z) + constant_coef( setup, tri, slot, 0.0f, 2 ); /* wasted */ + + if (usage_mask & TGSI_WRITEMASK_W) + constant_coef( setup, tri, slot, 0.0f, 3 ); /* wasted */ } @@ -191,54 +212,65 @@ static void setup_tri_coefficients( struct lp_setup_context *setup, const float (*v3)[4], boolean frontface) { + unsigned fragcoord_usage_mask = TGSI_WRITEMASK_XYZ; unsigned slot; - /* The internal position input is in slot zero: - */ - setup_fragcoord_coef(setup, tri, oneoverarea, 0, v1, v2, v3); - /* setup interpolation for all the remaining attributes: */ for (slot = 0; slot < setup->fs.nr_inputs; slot++) { unsigned vert_attr = setup->fs.input[slot].src_index; + unsigned usage_mask = setup->fs.input[slot].usage_mask; unsigned i; switch (setup->fs.input[slot].interp) { case LP_INTERP_CONSTANT: if (setup->flatshade_first) { for (i = 0; i < NUM_CHANNELS; i++) - constant_coef(setup, tri, slot+1, v1[vert_attr][i], i); + if (usage_mask & (1 << i)) + constant_coef(setup, tri, slot+1, v1[vert_attr][i], i); } else { for (i = 0; i < NUM_CHANNELS; i++) - constant_coef(setup, tri, slot+1, v3[vert_attr][i], i); + if (usage_mask & (1 << i)) + constant_coef(setup, tri, slot+1, v3[vert_attr][i], i); } break; case LP_INTERP_LINEAR: for (i = 0; i < NUM_CHANNELS; i++) - linear_coef(setup, tri, oneoverarea, slot+1, v1, v2, v3, vert_attr, i); + if (usage_mask & (1 << i)) + linear_coef(setup, tri, oneoverarea, slot+1, v1, v2, v3, vert_attr, i); break; case LP_INTERP_PERSPECTIVE: for (i = 0; i < NUM_CHANNELS; i++) - perspective_coef(setup, tri, oneoverarea, slot+1, v1, v2, v3, vert_attr, i); + if (usage_mask & (1 << i)) + perspective_coef(setup, tri, oneoverarea, slot+1, v1, v2, v3, vert_attr, i); + fragcoord_usage_mask |= TGSI_WRITEMASK_W; break; case LP_INTERP_POSITION: - /* XXX: fix me - duplicates the values in slot zero. + /* + * The generated pixel interpolators will pick up the coeffs from + * slot 0, so all need to ensure that the usage mask is covers all + * usages. */ - setup_fragcoord_coef(setup, tri, oneoverarea, slot+1, v1, v2, v3); + fragcoord_usage_mask |= usage_mask; break; case LP_INTERP_FACING: - setup_facing_coef(setup, tri, slot+1, frontface); + setup_facing_coef(setup, tri, slot+1, frontface, usage_mask); break; default: assert(0); } } + + /* The internal position input is in slot zero: + */ + setup_fragcoord_coef(setup, tri, oneoverarea, 0, v1, v2, v3, + fragcoord_usage_mask); } @@ -272,12 +304,14 @@ alloc_triangle(struct lp_scene *scene, unsigned nr_inputs, unsigned *tri_size) tri = lp_scene_alloc_aligned( scene, bytes, 16 ); - inputs = (char *) (tri + 1); - tri->inputs.a0 = (float (*)[4]) inputs; - tri->inputs.dadx = (float (*)[4]) (inputs + input_array_sz); - tri->inputs.dady = (float (*)[4]) (inputs + 2 * input_array_sz); + if (tri) { + inputs = (char *) (tri + 1); + tri->inputs.a0 = (float (*)[4]) inputs; + tri->inputs.dadx = (float (*)[4]) (inputs + input_array_sz); + tri->inputs.dady = (float (*)[4]) (inputs + 2 * input_array_sz); - *tri_size = bytes; + *tri_size = bytes; + } return tri; } @@ -341,6 +375,8 @@ do_triangle_ccw(struct lp_setup_context *setup, print_triangle(setup, v1, v2, v3); tri = alloc_triangle(scene, setup->fs.nr_inputs, &tri_bytes); + if (!tri) + return; #ifdef DEBUG tri->v[0][0] = v1[0][0]; @@ -575,7 +611,7 @@ do_triangle_ccw(struct lp_setup_context *setup, /* triangle covers the whole tile- shade whole tile */ LP_COUNT(nr_fully_covered_64); in = TRUE; - if(setup->fs.current.opaque) { + if (setup->fs.current.variant->opaque) { lp_scene_bin_reset( scene, x, y ); lp_scene_bin_command( scene, x, y, lp_rast_set_state, diff --git a/src/gallium/drivers/llvmpipe/lp_state.h b/src/gallium/drivers/llvmpipe/lp_state.h index bae5de0cb35..c268f966473 100644 --- a/src/gallium/drivers/llvmpipe/lp_state.h +++ b/src/gallium/drivers/llvmpipe/lp_state.h @@ -32,10 +32,9 @@ #define LP_STATE_H #include "pipe/p_state.h" -#include "tgsi/tgsi_scan.h" #include "lp_jit.h" +#include "lp_state_fs.h" #include "gallivm/lp_bld.h" -#include "gallivm/lp_bld_sample.h" /* for struct lp_sampler_static_state */ #define LP_NEW_VIEWPORT 0x1 @@ -60,57 +59,6 @@ struct vertex_info; struct pipe_context; struct llvmpipe_context; -struct lp_fragment_shader; - - -/** Indexes into jit_function[] array */ -#define RAST_WHOLE 0 -#define RAST_EDGE_TEST 1 - - -struct lp_fragment_shader_variant_key -{ - struct pipe_depth_state depth; - struct pipe_stencil_state stencil[2]; - struct pipe_alpha_state alpha; - struct pipe_blend_state blend; - enum pipe_format zsbuf_format; - unsigned nr_cbufs:8; - unsigned flatshade:1; - unsigned scissor:1; - unsigned occlusion_count:1; - - struct { - ubyte colormask; - } cbuf_blend[PIPE_MAX_COLOR_BUFS]; - - struct lp_sampler_static_state sampler[PIPE_MAX_SAMPLERS]; -}; - - -struct lp_fragment_shader_variant -{ - struct lp_fragment_shader_variant_key key; - - boolean opaque; - - LLVMValueRef function[2]; - - lp_jit_frag_func jit_function[2]; - - struct lp_fragment_shader_variant *next; -}; - - -/** Subclass of pipe_shader_state */ -struct lp_fragment_shader -{ - struct pipe_shader_state base; - - struct tgsi_shader_info info; - - struct lp_fragment_shader_variant *variants; -}; /** Subclass of pipe_shader_state */ diff --git a/src/gallium/drivers/llvmpipe/lp_state_derived.c b/src/gallium/drivers/llvmpipe/lp_state_derived.c index 2edfcb28ce6..9e066f5c656 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_derived.c +++ b/src/gallium/drivers/llvmpipe/lp_state_derived.c @@ -50,7 +50,7 @@ compute_vertex_info(struct llvmpipe_context *llvmpipe) { const struct lp_fragment_shader *lpfs = llvmpipe->fs; struct vertex_info *vinfo = &llvmpipe->vertex_info; - struct lp_shader_input inputs[1 + PIPE_MAX_SHADER_INPUTS]; + struct lp_shader_input *inputs = llvmpipe->inputs; unsigned vs_index; uint i; @@ -77,6 +77,23 @@ compute_vertex_info(struct llvmpipe_context *llvmpipe) /* This can be pre-computed, except for flatshade: */ + inputs[i].usage_mask = lpfs->info.input_usage_mask[i]; + + switch (lpfs->info.input_interpolate[i]) { + case TGSI_INTERPOLATE_CONSTANT: + inputs[i].interp = LP_INTERP_CONSTANT; + break; + case TGSI_INTERPOLATE_LINEAR: + inputs[i].interp = LP_INTERP_LINEAR; + break; + case TGSI_INTERPOLATE_PERSPECTIVE: + inputs[i].interp = LP_INTERP_PERSPECTIVE; + break; + default: + assert(0); + break; + } + switch (lpfs->info.input_semantic_name[i]) { case TGSI_SEMANTIC_FACE: inputs[i].interp = LP_INTERP_FACING; @@ -95,25 +112,10 @@ compute_vertex_info(struct llvmpipe_context *llvmpipe) */ if (llvmpipe->rasterizer->flatshade) inputs[i].interp = LP_INTERP_CONSTANT; - else - inputs[i].interp = LP_INTERP_LINEAR; break; default: - switch (lpfs->info.input_interpolate[i]) { - case TGSI_INTERPOLATE_CONSTANT: - inputs[i].interp = LP_INTERP_CONSTANT; - break; - case TGSI_INTERPOLATE_LINEAR: - inputs[i].interp = LP_INTERP_LINEAR; - break; - case TGSI_INTERPOLATE_PERSPECTIVE: - inputs[i].interp = LP_INTERP_PERSPECTIVE; - break; - default: - assert(0); - break; - } + break; } /* @@ -123,6 +125,7 @@ compute_vertex_info(struct llvmpipe_context *llvmpipe) inputs[i].src_index = vinfo->num_attribs; draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_PERSPECTIVE, vs_index); } + llvmpipe->num_inputs = lpfs->info.num_inputs; draw_compute_vertex_size(vinfo); diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c index 9ef78e6badf..6896bedf189 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_fs.c +++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c @@ -64,8 +64,10 @@ #include "pipe/p_defines.h" #include "util/u_inlines.h" #include "util/u_memory.h" +#include "util/u_pointer.h" #include "util/u_format.h" #include "util/u_dump.h" +#include "util/u_string.h" #include "os/os_time.h" #include "pipe/p_shader_tokens.h" #include "draw/draw_context.h" @@ -87,6 +89,7 @@ #include "lp_bld_depth.h" #include "lp_bld_interp.h" #include "lp_context.h" +#include "lp_debug.h" #include "lp_perf.h" #include "lp_screen.h" #include "lp_setup.h" @@ -97,43 +100,7 @@ #include <llvm-c/Analysis.h> -static const unsigned char quad_offset_x[4] = {0, 1, 0, 1}; -static const unsigned char quad_offset_y[4] = {0, 0, 1, 1}; - - -/* - * Derive from the quad's upper left scalar coordinates the coordinates for - * all other quad pixels - */ -static void -generate_pos0(LLVMBuilderRef builder, - LLVMValueRef x, - LLVMValueRef y, - LLVMValueRef *x0, - LLVMValueRef *y0) -{ - LLVMTypeRef int_elem_type = LLVMInt32Type(); - LLVMTypeRef int_vec_type = LLVMVectorType(int_elem_type, QUAD_SIZE); - LLVMTypeRef elem_type = LLVMFloatType(); - LLVMTypeRef vec_type = LLVMVectorType(elem_type, QUAD_SIZE); - LLVMValueRef x_offsets[QUAD_SIZE]; - LLVMValueRef y_offsets[QUAD_SIZE]; - unsigned i; - - x = lp_build_broadcast(builder, int_vec_type, x); - y = lp_build_broadcast(builder, int_vec_type, y); - - for(i = 0; i < QUAD_SIZE; ++i) { - x_offsets[i] = LLVMConstInt(int_elem_type, quad_offset_x[i], 0); - y_offsets[i] = LLVMConstInt(int_elem_type, quad_offset_y[i], 0); - } - - x = LLVMBuildAdd(builder, x, LLVMConstVector(x_offsets, QUAD_SIZE), ""); - y = LLVMBuildAdd(builder, y, LLVMConstVector(y_offsets, QUAD_SIZE), ""); - - *x0 = LLVMBuildSIToFP(builder, x, vec_type, ""); - *y0 = LLVMBuildSIToFP(builder, y, vec_type, ""); -} +static unsigned fs_no = 0; /** @@ -614,6 +581,7 @@ generate_fragment(struct llvmpipe_context *lp, { struct llvmpipe_screen *screen = llvmpipe_screen(lp->pipe.screen); const struct lp_fragment_shader_variant_key *key = &variant->key; + char func_name[256]; struct lp_type fs_type; struct lp_type blend_type; LLVMTypeRef fs_elem_type; @@ -633,8 +601,6 @@ generate_fragment(struct llvmpipe_context *lp, LLVMValueRef c0, c1, c2, step0_ptr, step1_ptr, step2_ptr, counter = NULL; LLVMBasicBlockRef block; LLVMBuilderRef builder; - LLVMValueRef x0; - LLVMValueRef y0; struct lp_build_sampler_soa *sampler; struct lp_build_interp_soa_context interp; LLVMValueRef fs_mask[LP_MAX_VECTOR_LENGTH]; @@ -676,6 +642,9 @@ generate_fragment(struct llvmpipe_context *lp, blend_vec_type = lp_build_vec_type(blend_type); + util_snprintf(func_name, sizeof(func_name), "fs%u_variant%u_%s", + shader->no, variant->no, do_tri_test ? "edge" : "whole"); + arg_types[0] = screen->context_ptr_type; /* context */ arg_types[1] = LLVMInt32Type(); /* x */ arg_types[2] = LLVMInt32Type(); /* y */ @@ -698,7 +667,7 @@ generate_fragment(struct llvmpipe_context *lp, func_type = LLVMFunctionType(LLVMVoidType(), arg_types, Elements(arg_types), 0); - function = LLVMAddFunction(screen->module, "shader", func_type); + function = LLVMAddFunction(screen->module, func_name, func_type); LLVMSetFunctionCallConv(function, LLVMCCallConv); variant->function[do_tri_test] = function; @@ -755,14 +724,17 @@ generate_fragment(struct llvmpipe_context *lp, builder = LLVMCreateBuilder(); LLVMPositionBuilderAtEnd(builder, block); - generate_pos0(builder, x, y, &x0, &y0); - + /* + * The shader input interpolation info is not explicitely baked in the + * shader key, but everything it derives from (TGSI, and flatshade) is + * already included in the shader key. + */ lp_build_interp_soa_init(&interp, - shader->base.tokens, - key->flatshade, + lp->num_inputs, + lp->inputs, builder, fs_type, a0_ptr, dadx_ptr, dady_ptr, - x0, y0); + x, y); /* code generated texture sampling */ sampler = lp_llvm_sampler_soa_create(key->sampler, context_ptr); @@ -873,7 +845,7 @@ generate_fragment(struct llvmpipe_context *lp, { void *f = LLVMGetPointerToGlobal(screen->engine, function); - variant->jit_function[do_tri_test] = cast_voidptr_to_lp_jit_frag_func(f); + variant->jit_function[do_tri_test] = (lp_jit_frag_func)pointer_to_func(f); if (gallivm_debug & GALLIVM_DEBUG_ASM) { lp_disassemble(f); @@ -960,17 +932,21 @@ generate_variant(struct llvmpipe_context *lp, { struct lp_fragment_shader_variant *variant; - if (gallivm_debug & GALLIVM_DEBUG_IR) { - tgsi_dump(shader->base.tokens, 0); - dump_fs_variant_key(key); - } - variant = CALLOC_STRUCT(lp_fragment_shader_variant); if(!variant) return NULL; + variant->no = shader->variant_no++; + memcpy(&variant->key, key, sizeof *key); + if (gallivm_debug & GALLIVM_DEBUG_IR) { + debug_printf("llvmpipe: Creating fragment shader #%u variant #%u:\n", + shader->no, variant->no); + tgsi_dump(shader->base.tokens, 0); + dump_fs_variant_key(key); + } + generate_fragment(lp, shader, variant, RAST_WHOLE); generate_fragment(lp, shader, variant, RAST_EDGE_TEST); @@ -1004,15 +980,29 @@ llvmpipe_create_fs_state(struct pipe_context *pipe, if (!shader) return NULL; + shader->no = fs_no++; + /* get/save the summary info for this shader */ tgsi_scan_shader(templ->tokens, &shader->info); /* we need to keep a local copy of the tokens */ shader->base.tokens = tgsi_dup_tokens(templ->tokens); - if (gallivm_debug & GALLIVM_DEBUG_TGSI) { - debug_printf("llvmpipe: Create fragment shader %p:\n", (void *) shader); + if (LP_DEBUG & DEBUG_TGSI) { + unsigned attrib; + debug_printf("llvmpipe: Create fragment shader #%u %p:\n", shader->no, (void *) shader); tgsi_dump(templ->tokens, 0); + debug_printf("usage masks:\n"); + for (attrib = 0; attrib < shader->info.num_inputs; ++attrib) { + unsigned usage_mask = shader->info.input_usage_mask[attrib]; + debug_printf(" IN[%u].%s%s%s%s\n", + attrib, + usage_mask & TGSI_WRITEMASK_X ? "x" : "", + usage_mask & TGSI_WRITEMASK_Y ? "y" : "", + usage_mask & TGSI_WRITEMASK_Z ? "z" : "", + usage_mask & TGSI_WRITEMASK_W ? "w" : ""); + } + debug_printf("\n"); } return shader; @@ -1260,10 +1250,7 @@ llvmpipe_update_fs(struct llvmpipe_context *lp) LP_COUNT_ADD(nr_llvm_compiles, 2); /* emit vs. omit in/out test */ } - lp_setup_set_fs_functions(lp->setup, - variant->jit_function[RAST_WHOLE], - variant->jit_function[RAST_EDGE_TEST], - variant->opaque); + lp_setup_set_fs_variant(lp->setup, variant); } diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.h b/src/gallium/drivers/llvmpipe/lp_state_fs.h new file mode 100644 index 00000000000..64ead2a9973 --- /dev/null +++ b/src/gallium/drivers/llvmpipe/lp_state_fs.h @@ -0,0 +1,100 @@ +/************************************************************************** + * + * Copyright 2010 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + **************************************************************************/ + + +#ifndef LP_STATE_FS_H_ +#define LP_STATE_FS_H_ + + +#include "pipe/p_compiler.h" +#include "pipe/p_state.h" +#include "tgsi/tgsi_scan.h" /* for tgsi_shader_info */ +#include "gallivm/lp_bld_sample.h" /* for struct lp_sampler_static_state */ + + +struct tgsi_token; +struct lp_fragment_shader; + + +/** Indexes into jit_function[] array */ +#define RAST_WHOLE 0 +#define RAST_EDGE_TEST 1 + + +struct lp_fragment_shader_variant_key +{ + struct pipe_depth_state depth; + struct pipe_stencil_state stencil[2]; + struct pipe_alpha_state alpha; + struct pipe_blend_state blend; + enum pipe_format zsbuf_format; + unsigned nr_cbufs:8; + unsigned flatshade:1; + unsigned scissor:1; + unsigned occlusion_count:1; + + struct { + ubyte colormask; + } cbuf_blend[PIPE_MAX_COLOR_BUFS]; + + struct lp_sampler_static_state sampler[PIPE_MAX_SAMPLERS]; +}; + + +struct lp_fragment_shader_variant +{ + struct lp_fragment_shader_variant_key key; + + boolean opaque; + + LLVMValueRef function[2]; + + lp_jit_frag_func jit_function[2]; + + struct lp_fragment_shader_variant *next; + + /* For debugging/profiling purposes */ + unsigned no; +}; + + +/** Subclass of pipe_shader_state */ +struct lp_fragment_shader +{ + struct pipe_shader_state base; + + struct tgsi_shader_info info; + + struct lp_fragment_shader_variant *variants; + + /* For debugging/profiling purposes */ + unsigned no; + unsigned variant_no; +}; + + +#endif /* LP_STATE_FS_H_ */ diff --git a/src/gallium/drivers/llvmpipe/lp_test_blend.c b/src/gallium/drivers/llvmpipe/lp_test_blend.c index 072d699666b..557eb8e3e6f 100644 --- a/src/gallium/drivers/llvmpipe/lp_test_blend.c +++ b/src/gallium/drivers/llvmpipe/lp_test_blend.c @@ -805,7 +805,7 @@ test_all(unsigned verbose, FILE *fp) struct pipe_blend_state blend; enum vector_mode mode; const struct lp_type *type; - bool success = TRUE; + boolean success = TRUE; for(rgb_func = blend_funcs; rgb_func < &blend_funcs[num_funcs]; ++rgb_func) { for(alpha_func = blend_funcs; alpha_func < &blend_funcs[num_funcs]; ++alpha_func) { @@ -859,7 +859,7 @@ test_some(unsigned verbose, FILE *fp, unsigned long n) enum vector_mode mode; const struct lp_type *type; unsigned long i; - bool success = TRUE; + boolean success = TRUE; for(i = 0; i < n; ++i) { rgb_func = &blend_funcs[rand() % num_funcs]; diff --git a/src/gallium/drivers/llvmpipe/lp_test_conv.c b/src/gallium/drivers/llvmpipe/lp_test_conv.c index 254f0daea3b..cb0d02ab32c 100644 --- a/src/gallium/drivers/llvmpipe/lp_test_conv.c +++ b/src/gallium/drivers/llvmpipe/lp_test_conv.c @@ -34,6 +34,7 @@ */ +#include "util/u_pointer.h" #include "gallivm/lp_bld_type.h" #include "gallivm/lp_bld_const.h" #include "gallivm/lp_bld_conv.h" @@ -43,17 +44,6 @@ typedef void (*conv_test_ptr_t)(const void *src, const void *dst); -/** cast wrapper */ -static conv_test_ptr_t -voidptr_to_conv_test_ptr_t(void *p) -{ - union { - void *v; - conv_test_ptr_t f; - } u; - u.v = p; - return u.f; -} void write_tsv_header(FILE *fp) @@ -234,7 +224,7 @@ test_one(unsigned verbose, LLVMDumpModule(module); code = LLVMGetPointerToGlobal(engine, func); - conv_test_ptr = voidptr_to_conv_test_ptr_t(code); + conv_test_ptr = (conv_test_ptr_t)pointer_to_func(code); if(verbose >= 2) lp_disassemble(code); diff --git a/src/gallium/drivers/llvmpipe/lp_test_format.c b/src/gallium/drivers/llvmpipe/lp_test_format.c index e5f8bf96330..7c0d7d2e656 100644 --- a/src/gallium/drivers/llvmpipe/lp_test_format.c +++ b/src/gallium/drivers/llvmpipe/lp_test_format.c @@ -37,6 +37,7 @@ #include <llvm-c/Transforms/Scalar.h> #include "util/u_memory.h" +#include "util/u_pointer.h" #include "util/u_format.h" #include "util/u_format_tests.h" #include "util/u_format_s3tc.h" @@ -73,19 +74,6 @@ typedef void (*fetch_ptr_t)(float *, const void *packed, unsigned i, unsigned j); -/** cast wrapper to avoid warnings */ -static fetch_ptr_t -void_to_fetch_ptr_t(void *p) -{ - union { - void *v; - fetch_ptr_t f; - } u; - u.v = p; - return u.f; -} - - static LLVMValueRef add_fetch_rgba_test(LLVMModuleRef lp_build_module, @@ -162,7 +150,7 @@ test_format(unsigned verbose, FILE *fp, (void)pass; #endif - fetch_ptr = void_to_fetch_ptr_t(LLVMGetPointerToGlobal(lp_build_engine, fetch)); + fetch_ptr = (fetch_ptr_t)pointer_to_func(LLVMGetPointerToGlobal(lp_build_engine, fetch)); for (i = 0; i < desc->block.height; ++i) { for (j = 0; j < desc->block.width; ++j) { diff --git a/src/gallium/drivers/llvmpipe/lp_test_printf.c b/src/gallium/drivers/llvmpipe/lp_test_printf.c index 13485c37748..d99ca816387 100644 --- a/src/gallium/drivers/llvmpipe/lp_test_printf.c +++ b/src/gallium/drivers/llvmpipe/lp_test_printf.c @@ -29,6 +29,7 @@ #include <stdlib.h> #include <stdio.h> +#include "util/u_pointer.h" #include "gallivm/lp_bld.h" #include "gallivm/lp_bld_printf.h" @@ -58,18 +59,6 @@ write_tsv_header(FILE *fp) typedef void (*test_printf_t)(int i); -/** cast wrapper */ -static test_printf_t -voidptr_to_test_printf_t(void *p) -{ - union { - void *v; - test_printf_t f; - } u; - u.v = p; - return u.f; -} - static LLVMValueRef add_printf_test(LLVMModuleRef module) @@ -140,7 +129,7 @@ test_printf(unsigned verbose, FILE *fp, const struct printf_test_case *testcase) #endif code = LLVMGetPointerToGlobal(engine, test); - test_printf = voidptr_to_test_printf_t(code); + test_printf = (test_printf_t)pointer_to_func(code); memset(unpacked, 0, sizeof unpacked); packed = 0; diff --git a/src/gallium/drivers/llvmpipe/lp_test_sincos.c b/src/gallium/drivers/llvmpipe/lp_test_sincos.c index 386f852be0f..da16fea7bd5 100644 --- a/src/gallium/drivers/llvmpipe/lp_test_sincos.c +++ b/src/gallium/drivers/llvmpipe/lp_test_sincos.c @@ -60,7 +60,7 @@ write_tsv_header(FILE *fp) typedef __m128 (*test_sincos_t)(__m128); static LLVMValueRef -add_sincos_test(LLVMModuleRef module, bool sin) +add_sincos_test(LLVMModuleRef module, boolean sin) { LLVMTypeRef v4sf = LLVMVectorType(LLVMFloatType(), 4); LLVMTypeRef args[1] = { v4sf }; @@ -185,7 +185,7 @@ test_sincos(unsigned verbose, FILE *fp) boolean test_all(unsigned verbose, FILE *fp) { - bool success = TRUE; + boolean success = TRUE; test_sincos(verbose, fp); diff --git a/src/gallium/drivers/llvmpipe/lp_texture.c b/src/gallium/drivers/llvmpipe/lp_texture.c index 5696683f454..d86056ca34c 100644 --- a/src/gallium/drivers/llvmpipe/lp_texture.c +++ b/src/gallium/drivers/llvmpipe/lp_texture.c @@ -1045,6 +1045,7 @@ llvmpipe_get_texture_image(struct llvmpipe_resource *lpr, lpr->tiles_per_row[level]); } else { + assert(layout == LP_TEX_LAYOUT_LINEAR); lp_tiled_to_linear(other_data, target_data, x * TILE_SIZE, y * TILE_SIZE, TILE_SIZE, TILE_SIZE, @@ -1054,8 +1055,9 @@ llvmpipe_get_texture_image(struct llvmpipe_resource *lpr, } } - llvmpipe_set_texture_tile_layout(lpr, face_slice, level, x, y, - new_layout); + if (new_layout != cur_layout) + llvmpipe_set_texture_tile_layout(lpr, face_slice, level, x, y, + new_layout); } } } diff --git a/src/gallium/drivers/r300/Makefile b/src/gallium/drivers/r300/Makefile index d3cd6bef96e..6bb82e5ed08 100644 --- a/src/gallium/drivers/r300/Makefile +++ b/src/gallium/drivers/r300/Makefile @@ -21,6 +21,7 @@ C_SOURCES = \ r300_state_derived.c \ r300_state_invariant.c \ r300_vs.c \ + r300_vs_draw.c \ r300_texture.c \ r300_tgsi_to_rc.c \ r300_transfer.c diff --git a/src/gallium/drivers/r300/SConscript b/src/gallium/drivers/r300/SConscript index 3921085d76a..eb3e1d365e9 100644 --- a/src/gallium/drivers/r300/SConscript +++ b/src/gallium/drivers/r300/SConscript @@ -31,6 +31,7 @@ r300 = env.ConvenienceLibrary( 'r300_state_derived.c', 'r300_state_invariant.c', 'r300_vs.c', + 'r300_vs_draw.c', 'r300_texture.c', 'r300_tgsi_to_rc.c', 'r300_transfer.c', diff --git a/src/gallium/drivers/r300/r300_context.c b/src/gallium/drivers/r300/r300_context.c index 85e45d8647d..88ce1867982 100644 --- a/src/gallium/drivers/r300/r300_context.c +++ b/src/gallium/drivers/r300/r300_context.c @@ -236,6 +236,14 @@ struct pipe_context* r300_create_context(struct pipe_screen* screen, return NULL; } +boolean r300_check_cs(struct r300_context *r300, unsigned size) +{ + struct r300_cs_info cs_info; + + r300->rws->get_cs_info(r300->rws, &cs_info); + return size <= cs_info.free; +} + void r300_finish(struct r300_context *r300) { struct pipe_framebuffer_state *fb; diff --git a/src/gallium/drivers/r300/r300_context.h b/src/gallium/drivers/r300/r300_context.h index efc6df3fabf..48ec52d26cf 100644 --- a/src/gallium/drivers/r300/r300_context.h +++ b/src/gallium/drivers/r300/r300_context.h @@ -36,9 +36,9 @@ struct u_upload_mgr; struct r300_context; - struct r300_fragment_shader; struct r300_vertex_shader; +struct r300_stencilref_context; struct r300_atom { /* List pointers. */ @@ -86,7 +86,7 @@ struct r300_dsa_state { /* Whether a two-sided stencil is enabled. */ boolean two_sided; /* Whether a fallback should be used for a two-sided stencil ref value. */ - boolean stencil_ref_bf_fallback; + boolean two_sided_stencil_ref; }; struct r300_rs_state { @@ -272,6 +272,8 @@ struct r300_texture { /* Parent class */ struct u_resource b; + enum r300_buffer_domain domain; + /* Offsets into the buffer. */ unsigned offset[R300_MAX_TEXTURE_LEVELS]; @@ -331,21 +333,6 @@ struct r300_context { /* Parent class */ struct pipe_context context; - /* Emission of drawing packets. */ - void (*emit_draw_arrays_immediate)( - struct r300_context *r300, - unsigned mode, unsigned start, unsigned count); - - void (*emit_draw_arrays)( - struct r300_context *r300, - unsigned mode, unsigned count); - - void (*emit_draw_elements)( - struct r300_context *r300, struct pipe_resource* indexBuffer, - unsigned indexSize, unsigned minIndex, unsigned maxIndex, - unsigned mode, unsigned start, unsigned count); - - /* The interface to the windowing system, etc. */ struct r300_winsys_screen *rws; /* Screen. */ @@ -354,6 +341,8 @@ struct r300_context { struct draw_context* draw; /* Accelerated blit support. */ struct blitter_context* blitter; + /* Stencil two-sided reference value fallback. */ + struct r300_stencilref_context *stencilref_fallback; /* Vertex buffer for rendering. */ struct pipe_resource* vbo; @@ -441,9 +430,6 @@ struct r300_context { uint32_t zbuffer_bpp; /* Whether rendering is conditional and should be skipped. */ boolean skip_rendering; - /* Whether the two-sided stencil ref value is different for front and - * back faces, and fallback should be used for r3xx-r4xx. */ - boolean stencil_ref_bf_fallback; /* Point sprites texcoord index, 1 bit per texcoord */ int sprite_coord_enable; /* Whether two-sided color selection is enabled (AKA light_twoside). */ @@ -490,6 +476,7 @@ void r300_init_render_functions(struct r300_context *r300); void r300_init_state_functions(struct r300_context* r300); void r300_init_resource_functions(struct r300_context* r300); +boolean r300_check_cs(struct r300_context *r300, unsigned size); void r300_finish(struct r300_context *r300); void r500_dump_rs_block(struct r300_rs_block *rs); diff --git a/src/gallium/drivers/r300/r300_cs.h b/src/gallium/drivers/r300/r300_cs.h index 996a4f491e7..9c8c273902b 100644 --- a/src/gallium/drivers/r300/r300_cs.h +++ b/src/gallium/drivers/r300/r300_cs.h @@ -37,11 +37,6 @@ #define VERY_VERBOSE_CS 1 #define VERY_VERBOSE_REGISTERS 1 -/* XXX stolen from radeon_drm.h */ -#define RADEON_GEM_DOMAIN_CPU 0x1 -#define RADEON_GEM_DOMAIN_GTT 0x2 -#define RADEON_GEM_DOMAIN_VRAM 0x4 - /* XXX stolen from radeon_reg.h */ #define RADEON_CP_PACKET0 0x0 @@ -54,7 +49,7 @@ int cs_count = 0; (void) cs_count; #define CHECK_CS(size) \ - assert(cs_winsys->check_cs(cs_winsys, (size))) + assert(r300_check_cs(cs_context_copy, (size))) #define BEGIN_CS(size) do { \ CHECK_CS(size); \ diff --git a/src/gallium/drivers/r300/r300_debug.c b/src/gallium/drivers/r300/r300_debug.c index 024731a22ab..297791f3ffe 100644 --- a/src/gallium/drivers/r300/r300_debug.c +++ b/src/gallium/drivers/r300/r300_debug.c @@ -22,16 +22,11 @@ #include "r300_context.h" -#include <stdio.h> +#include "util/u_debug.h" -struct debug_option { - const char * name; - unsigned flag; - const char * description; -}; +#include <stdio.h> -static struct debug_option debug_options[] = { - { "help", DBG_HELP, "Helpful meta-information about the driver" }, +static const struct debug_named_value debug_options[] = { { "fp", DBG_FP, "Fragment program handling (for debugging)" }, { "vp", DBG_VP, "Vertex program handling (for debugging)" }, { "cs", DBG_CS, "Command submissions (for debugging)" }, @@ -46,57 +41,13 @@ static struct debug_option debug_options[] = { { "noimmd", DBG_NO_IMMD, "Disable immediate mode (for benchmarking)" }, { "stats", DBG_STATS, "Gather statistics (for lulz)" }, - { "all", ~0, "Convenience option that enables all debug flags" }, - /* must be last */ - { 0, 0, 0 } + DEBUG_NAMED_VALUE_END }; void r300_init_debug(struct r300_screen * screen) { - const char * options = debug_get_option("RADEON_DEBUG", 0); - boolean printhint = FALSE; - size_t length; - struct debug_option * opt; - - if (options) { - while(*options) { - if (*options == ' ' || *options == ',') { - options++; - continue; - } - - length = strcspn(options, " ,"); - - for(opt = debug_options; opt->name; ++opt) { - if (!strncmp(options, opt->name, length)) { - screen->debug |= opt->flag; - break; - } - } - - if (!opt->name) { - fprintf(stderr, "Unknown debug option: %s\n", options); - printhint = TRUE; - } - - options += length; - } - - if (!screen->debug) - printhint = TRUE; - } - - if (printhint || screen->debug & DBG_HELP) { - fprintf(stderr, "You can enable debug output by setting " - "the RADEON_DEBUG environment variable\n" - "to a comma-separated list of debug options. " - "Available options are:\n"); - - for(opt = debug_options; opt->name; ++opt) { - fprintf(stderr, " %s: %s\n", opt->name, opt->description); - } - } + screen->debug = debug_get_flags_option("RADEON_DEBUG", debug_options, 0); } void r500_dump_rs_block(struct r300_rs_block *rs) diff --git a/src/gallium/drivers/r300/r300_defines.h b/src/gallium/drivers/r300/r300_defines.h index 98ee3c1edec..83c9ec7e399 100644 --- a/src/gallium/drivers/r300/r300_defines.h +++ b/src/gallium/drivers/r300/r300_defines.h @@ -30,6 +30,8 @@ #define R300_RESOURCE_FLAG_TRANSFER PIPE_RESOURCE_FLAG_DRV_PRIV +#define R300_INVALID_FORMAT (~0) + /* XXX: this is just a bandaid on larger problems in * r300_screen_buffer.h which doesn't seem to be fully ported to * gallium-resources. @@ -43,4 +45,9 @@ enum r300_buffer_tiling { R300_BUFFER_SQUARETILED }; +enum r300_buffer_domain { /* bitfield */ + R300_DOMAIN_GTT = 1, + R300_DOMAIN_VRAM = 2 +}; + #endif diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c index 7f7f2929cc3..55c6ce50304 100644 --- a/src/gallium/drivers/r300/r300_emit.c +++ b/src/gallium/drivers/r300/r300_emit.c @@ -134,7 +134,6 @@ static const float * get_rc_constant_state( struct r300_context * r300, struct rc_constant * constant) { - struct r300_viewport_state* viewport = r300->viewport_state.state; struct r300_textures_state* texstate = r300->textures_state.state; static float vec[4] = { 0.0, 0.0, 0.0, 1.0 }; struct pipe_resource *tex; @@ -151,15 +150,15 @@ static const float * get_rc_constant_state( break; case RC_STATE_R300_VIEWPORT_SCALE: - vec[0] = viewport->xscale; - vec[1] = viewport->yscale; - vec[2] = viewport->zscale; + vec[0] = r300->viewport.scale[0]; + vec[1] = r300->viewport.scale[1]; + vec[2] = r300->viewport.scale[2]; break; case RC_STATE_R300_VIEWPORT_OFFSET: - vec[0] = viewport->xoffset; - vec[1] = viewport->yoffset; - vec[2] = viewport->zoffset; + vec[0] = r300->viewport.translate[0]; + vec[1] = r300->viewport.translate[1]; + vec[2] = r300->viewport.translate[2]; break; default: @@ -486,11 +485,11 @@ void r300_emit_fb_state(struct r300_context* r300, unsigned size, void* state) assert(tex && tex->buffer && "cbuf is marked, but NULL!"); OUT_CS_REG_SEQ(R300_RB3D_COLOROFFSET0 + (4 * i), 1); - OUT_CS_TEX_RELOC(tex, surf->offset, 0, RADEON_GEM_DOMAIN_VRAM, 0); + OUT_CS_TEX_RELOC(tex, surf->offset, 0, tex->domain, 0); OUT_CS_REG_SEQ(R300_RB3D_COLORPITCH0 + (4 * i), 1); OUT_CS_TEX_RELOC(tex, tex->fb_state.colorpitch[surf->level], - 0, RADEON_GEM_DOMAIN_VRAM, 0); + 0, tex->domain, 0); OUT_CS_REG(R300_US_OUT_FMT_0 + (4 * i), tex->fb_state.us_out_fmt); } @@ -505,13 +504,13 @@ void r300_emit_fb_state(struct r300_context* r300, unsigned size, void* state) assert(tex && tex->buffer && "zsbuf is marked, but NULL!"); OUT_CS_REG_SEQ(R300_ZB_DEPTHOFFSET, 1); - OUT_CS_TEX_RELOC(tex, surf->offset, 0, RADEON_GEM_DOMAIN_VRAM, 0); + OUT_CS_TEX_RELOC(tex, surf->offset, 0, tex->domain, 0); OUT_CS_REG(R300_ZB_FORMAT, tex->fb_state.zb_format); OUT_CS_REG_SEQ(R300_ZB_DEPTHPITCH, 1); OUT_CS_TEX_RELOC(tex, tex->fb_state.depthpitch[surf->level], - 0, RADEON_GEM_DOMAIN_VRAM, 0); + 0, tex->domain, 0); } OUT_CS_REG_SEQ(R300_SC_SCISSORS_TL, 2); @@ -571,13 +570,13 @@ static void r300_emit_query_end_frag_pipes(struct r300_context *r300, OUT_CS_REG(R300_SU_REG_DEST, 1 << 3); OUT_CS_REG_SEQ(R300_ZB_ZPASS_ADDR, 1); OUT_CS_BUF_RELOC(r300->oqbo, query->offset + (sizeof(uint32_t) * 3), - 0, RADEON_GEM_DOMAIN_GTT, 0); + 0, r300_buffer(r300->oqbo)->domain, 0); case 3: /* pipe 2 only */ OUT_CS_REG(R300_SU_REG_DEST, 1 << 2); OUT_CS_REG_SEQ(R300_ZB_ZPASS_ADDR, 1); OUT_CS_BUF_RELOC(r300->oqbo, query->offset + (sizeof(uint32_t) * 2), - 0, RADEON_GEM_DOMAIN_GTT, 0); + 0, r300_buffer(r300->oqbo)->domain, 0); case 2: /* pipe 1 only */ /* As mentioned above, accomodate RV380 and older. */ @@ -585,13 +584,13 @@ static void r300_emit_query_end_frag_pipes(struct r300_context *r300, 1 << (caps->high_second_pipe ? 3 : 1)); OUT_CS_REG_SEQ(R300_ZB_ZPASS_ADDR, 1); OUT_CS_BUF_RELOC(r300->oqbo, query->offset + (sizeof(uint32_t) * 1), - 0, RADEON_GEM_DOMAIN_GTT, 0); + 0, r300_buffer(r300->oqbo)->domain, 0); case 1: /* pipe 0 only */ OUT_CS_REG(R300_SU_REG_DEST, 1 << 0); OUT_CS_REG_SEQ(R300_ZB_ZPASS_ADDR, 1); OUT_CS_BUF_RELOC(r300->oqbo, query->offset + (sizeof(uint32_t) * 0), - 0, RADEON_GEM_DOMAIN_GTT, 0); + 0, r300_buffer(r300->oqbo)->domain, 0); break; default: fprintf(stderr, "r300: Implementation error: Chipset reports %d" @@ -612,7 +611,7 @@ static void rv530_emit_query_end_single_z(struct r300_context *r300, BEGIN_CS(8); OUT_CS_REG(RV530_FG_ZBREG_DEST, RV530_FG_ZBREG_DEST_PIPE_SELECT_0); OUT_CS_REG_SEQ(R300_ZB_ZPASS_ADDR, 1); - OUT_CS_BUF_RELOC(r300->oqbo, query->offset, 0, RADEON_GEM_DOMAIN_GTT, 0); + OUT_CS_BUF_RELOC(r300->oqbo, query->offset, 0, r300_buffer(r300->oqbo)->domain, 0); OUT_CS_REG(RV530_FG_ZBREG_DEST, RV530_FG_ZBREG_DEST_PIPE_SELECT_ALL); END_CS; } @@ -625,10 +624,10 @@ static void rv530_emit_query_end_double_z(struct r300_context *r300, BEGIN_CS(14); OUT_CS_REG(RV530_FG_ZBREG_DEST, RV530_FG_ZBREG_DEST_PIPE_SELECT_0); OUT_CS_REG_SEQ(R300_ZB_ZPASS_ADDR, 1); - OUT_CS_BUF_RELOC(r300->oqbo, query->offset, 0, RADEON_GEM_DOMAIN_GTT, 0); + OUT_CS_BUF_RELOC(r300->oqbo, query->offset, 0, r300_buffer(r300->oqbo)->domain, 0); OUT_CS_REG(RV530_FG_ZBREG_DEST, RV530_FG_ZBREG_DEST_PIPE_SELECT_1); OUT_CS_REG_SEQ(R300_ZB_ZPASS_ADDR, 1); - OUT_CS_BUF_RELOC(r300->oqbo, query->offset + sizeof(uint32_t), 0, RADEON_GEM_DOMAIN_GTT, 0); + OUT_CS_BUF_RELOC(r300->oqbo, query->offset + sizeof(uint32_t), 0, r300_buffer(r300->oqbo)->domain, 0); OUT_CS_REG(RV530_FG_ZBREG_DEST, RV530_FG_ZBREG_DEST_PIPE_SELECT_ALL); END_CS; } @@ -787,6 +786,7 @@ void r300_emit_textures_state(struct r300_context *r300, { struct r300_textures_state *allstate = (struct r300_textures_state*)state; struct r300_texture_sampler_state *texstate; + struct r300_texture *tex; unsigned i; CS_LOCALS(r300); @@ -796,6 +796,7 @@ void r300_emit_textures_state(struct r300_context *r300, for (i = 0; i < allstate->count; i++) { if ((1 << i) & allstate->tx_enable) { texstate = &allstate->regs[i]; + tex = r300_texture(allstate->sampler_views[i]->base.texture); OUT_CS_REG(R300_TX_FILTER0_0 + (i * 4), texstate->filter0); OUT_CS_REG(R300_TX_FILTER1_0 + (i * 4), texstate->filter1); @@ -807,9 +808,8 @@ void r300_emit_textures_state(struct r300_context *r300, OUT_CS_REG(R300_TX_FORMAT2_0 + (i * 4), texstate->format.format2); OUT_CS_REG_SEQ(R300_TX_OFFSET_0 + (i * 4), 1); - OUT_CS_TEX_RELOC(r300_texture(allstate->sampler_views[i]->base.texture), - texstate->format.tile_config, - RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM, 0, 0); + OUT_CS_TEX_RELOC(tex, texstate->format.tile_config, tex->domain, + 0, 0); } } END_CS; @@ -819,6 +819,7 @@ void r300_emit_aos(struct r300_context* r300, unsigned offset, boolean indexed) { struct pipe_vertex_buffer *vb1, *vb2, *vbuf = r300->vertex_buffer; struct pipe_vertex_element *velem = r300->velems->velem; + struct r300_buffer *buf; int i; unsigned size1, size2, aos_count = r300->velems->count; unsigned packet_size = (aos_count * 3 + 1) / 2; @@ -858,13 +859,13 @@ void r300_emit_aos(struct r300_context* r300, unsigned offset, boolean indexed) } for (i = 0; i < aos_count; i++) { - OUT_CS_BUF_RELOC_NO_OFFSET(vbuf[velem[i].vertex_buffer_index].buffer, - RADEON_GEM_DOMAIN_GTT, 0, 0); + buf = r300_buffer(vbuf[velem[i].vertex_buffer_index].buffer); + OUT_CS_BUF_RELOC_NO_OFFSET(&buf->b.b, buf->domain, 0, 0); } END_CS; } -void r300_emit_vertex_buffer(struct r300_context* r300) +void r300_emit_aos_swtcl(struct r300_context *r300, boolean indexed) { CS_LOCALS(r300); @@ -880,11 +881,11 @@ void r300_emit_vertex_buffer(struct r300_context* r300) */ BEGIN_CS(7); OUT_CS_PKT3(R300_PACKET3_3D_LOAD_VBPNTR, 3); - OUT_CS(1); + OUT_CS(1 | (!indexed ? R300_VC_FORCE_PREFETCH : 0)); OUT_CS(r300->vertex_info.size | (r300->vertex_info.size << 8)); OUT_CS(r300->vbo_offset); - OUT_CS_BUF_RELOC(r300->vbo, 0, RADEON_GEM_DOMAIN_GTT, 0, 0); + OUT_CS_BUF_RELOC(r300->vbo, 0, r300_buffer(r300->vbo)->domain, 0, 0); END_CS; } @@ -1057,7 +1058,7 @@ void r300_emit_buffer_validate(struct r300_context *r300, boolean invalid = FALSE; /* upload buffers first */ - if (r300->any_user_vbs) { + if (r300->screen->caps.has_tcl && r300->any_user_vbs) { r300_upload_user_buffers(r300); r300->any_user_vbs = false; } @@ -1070,8 +1071,7 @@ validate: for (i = 0; i < fb->nr_cbufs; i++) { tex = r300_texture(fb->cbufs[i]->texture); assert(tex && tex->buffer && "cbuf is marked, but NULL!"); - if (!r300_add_texture(r300->rws, tex, - 0, RADEON_GEM_DOMAIN_VRAM)) { + if (!r300_add_texture(r300->rws, tex, 0, tex->domain)) { r300->context.flush(&r300->context, 0, NULL); goto validate; } @@ -1081,7 +1081,7 @@ validate: tex = r300_texture(fb->zsbuf->texture); assert(tex && tex->buffer && "zsbuf is marked, but NULL!"); if (!r300_add_texture(r300->rws, tex, - 0, RADEON_GEM_DOMAIN_VRAM)) { + 0, tex->domain)) { r300->context.flush(&r300->context, 0, NULL); goto validate; } @@ -1093,8 +1093,7 @@ validate: } tex = r300_texture(texstate->sampler_views[i]->base.texture); - if (!r300_add_texture(r300->rws, tex, - RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM, 0)) { + if (!r300_add_texture(r300->rws, tex, tex->domain, 0)) { r300->context.flush(&r300->context, 0, NULL); goto validate; } @@ -1103,7 +1102,7 @@ validate: if (r300->query_start.dirty || (r300->query_current && r300->query_current->begin_emitted)) { if (!r300_add_buffer(r300->rws, r300->oqbo, - 0, RADEON_GEM_DOMAIN_GTT)) { + 0, r300_buffer(r300->oqbo)->domain)) { r300->context.flush(&r300->context, 0, NULL); goto validate; } @@ -1111,7 +1110,7 @@ validate: /* ...vertex buffer for SWTCL path... */ if (r300->vbo) { if (!r300_add_buffer(r300->rws, r300->vbo, - RADEON_GEM_DOMAIN_GTT, 0)) { + r300_buffer(r300->vbo)->domain, 0)) { r300->context.flush(&r300->context, 0, NULL); goto validate; } @@ -1122,7 +1121,7 @@ validate: pbuf = vbuf[velem[i].vertex_buffer_index].buffer; if (!r300_add_buffer(r300->rws, pbuf, - RADEON_GEM_DOMAIN_GTT, 0)) { + r300_buffer(pbuf)->domain, 0)) { r300->context.flush(&r300->context, 0, NULL); goto validate; } @@ -1131,7 +1130,7 @@ validate: /* ...and index buffer for HWTCL path. */ if (index_buffer) { if (!r300_add_buffer(r300->rws, index_buffer, - RADEON_GEM_DOMAIN_GTT, 0)) { + r300_buffer(index_buffer)->domain, 0)) { r300->context.flush(&r300->context, 0, NULL); goto validate; } @@ -1168,7 +1167,6 @@ unsigned r300_get_num_dirty_dwords(struct r300_context *r300) /* Emit all dirty state. */ void r300_emit_dirty_state(struct r300_context* r300) { - struct r300_screen* r300screen = r300->screen; struct r300_atom* atom; foreach(atom, &r300->atom_list) { @@ -1181,10 +1179,5 @@ void r300_emit_dirty_state(struct r300_context* r300) } } - /* Emit the VBO for SWTCL. */ - if (!r300screen->caps.has_tcl) { - r300_emit_vertex_buffer(r300); - } - r300->dirty_hw++; } diff --git a/src/gallium/drivers/r300/r300_emit.h b/src/gallium/drivers/r300/r300_emit.h index 3c0edf6fdca..55e5898ea89 100644 --- a/src/gallium/drivers/r300/r300_emit.h +++ b/src/gallium/drivers/r300/r300_emit.h @@ -76,7 +76,7 @@ void r300_emit_scissor_state(struct r300_context* r300, void r300_emit_textures_state(struct r300_context *r300, unsigned size, void *state); -void r300_emit_vertex_buffer(struct r300_context* r300); +void r300_emit_aos_swtcl(struct r300_context *r300, boolean indexed); void r300_emit_vertex_stream_state(struct r300_context* r300, unsigned size, void* state); diff --git a/src/gallium/drivers/r300/r300_flush.c b/src/gallium/drivers/r300/r300_flush.c index 9cda940c85d..360b19a0c1e 100644 --- a/src/gallium/drivers/r300/r300_flush.c +++ b/src/gallium/drivers/r300/r300_flush.c @@ -77,6 +77,7 @@ static void r300_flush(struct pipe_context* pipe, /* Create a new fence. */ if (rfence) { *rfence = CALLOC_STRUCT(r300_fence); + pipe_reference_init(&(*rfence)->reference, 1); (*rfence)->ctx = r300; } } diff --git a/src/gallium/drivers/r300/r300_query.c b/src/gallium/drivers/r300/r300_query.c index 4ce7114d821..97081c4a00f 100644 --- a/src/gallium/drivers/r300/r300_query.c +++ b/src/gallium/drivers/r300/r300_query.c @@ -146,9 +146,13 @@ static boolean r300_get_query_result(struct pipe_context* pipe, /* Looks like our results aren't ready yet. */ if (wait) { fprintf(stderr, "r300: Despite waiting, OQ results haven't " - "come in yet.\n"); + "come in yet. This is a driver bug.\n" + "r300: Returning bogus results to avoid " + "a possible infinite loop...\n"); + temp = 987654321; + } else { + temp = ~0U; } - temp = ~0U; break; } temp += *map; diff --git a/src/gallium/drivers/r300/r300_render.c b/src/gallium/drivers/r300/r300_render.c index 42716552c7f..83780037cf5 100644 --- a/src/gallium/drivers/r300/r300_render.c +++ b/src/gallium/drivers/r300/r300_render.c @@ -136,7 +136,8 @@ enum r300_prepare_flags { PREP_FIRST_DRAW = (1 << 0), PREP_VALIDATE_VBOS = (1 << 1), PREP_EMIT_AOS = (1 << 2), - PREP_INDEXED = (1 << 3) + PREP_EMIT_AOS_SWTCL = (1 << 3), + PREP_INDEXED = (1 << 4) }; /* Check if the requested number of dwords is available in the CS and @@ -147,16 +148,14 @@ static void r300_prepare_for_rendering(struct r300_context *r300, struct pipe_resource *index_buffer, unsigned cs_dwords, unsigned aos_offset, - int index_bias) + int index_bias, + unsigned *end_cs_dwords) { boolean flushed = FALSE; boolean first_draw = flags & PREP_FIRST_DRAW; boolean emit_aos = flags & PREP_EMIT_AOS; - - /* Stencil ref fallback. */ - if (r300->stencil_ref_bf_fallback) { - cs_dwords = cs_dwords * 2 + 10; - } + boolean emit_aos_swtcl = flags & PREP_EMIT_AOS_SWTCL; + unsigned end_dwords = 0; /* Add dirty state, index offset, and AOS. */ if (first_draw) { @@ -167,13 +166,18 @@ static void r300_prepare_for_rendering(struct r300_context *r300, if (emit_aos) cs_dwords += 55; /* emit_aos */ + + if (emit_aos_swtcl) + cs_dwords += 7; /* emit_aos_swtcl */ } /* Emitted in flush. */ - cs_dwords += 26; /* emit_query_end */ + end_dwords += 26; /* emit_query_end */ + + cs_dwords += end_dwords; /* Reserve requested CS space. */ - if (!r300->rws->check_cs(r300->rws, cs_dwords)) { + if (!r300_check_cs(r300, cs_dwords)) { r300->context.flush(&r300->context, 0, NULL); flushed = TRUE; } @@ -185,7 +189,12 @@ static void r300_prepare_for_rendering(struct r300_context *r300, r500_emit_index_offset(r300, index_bias); if (emit_aos) r300_emit_aos(r300, aos_offset, flags & PREP_INDEXED); + if (emit_aos_swtcl) + r300_emit_aos_swtcl(r300, flags & PREP_INDEXED); } + + if (end_cs_dwords) + *end_cs_dwords = end_dwords; } static boolean immd_is_good_idea(struct r300_context *r300, @@ -236,7 +245,7 @@ static boolean immd_is_good_idea(struct r300_context *r300, * after resolving fallback issues (e.g. stencil ref two-sided). * ****************************************************************************/ -static void r500_emit_draw_arrays_immediate(struct r300_context *r300, +static void r300_emit_draw_arrays_immediate(struct r300_context *r300, unsigned mode, unsigned start, unsigned count) @@ -287,7 +296,7 @@ static void r500_emit_draw_arrays_immediate(struct r300_context *r300, dwords = 9 + count * vertex_size; - r300_prepare_for_rendering(r300, PREP_FIRST_DRAW, NULL, dwords, 0, 0); + r300_prepare_for_rendering(r300, PREP_FIRST_DRAW, NULL, dwords, 0, 0, NULL); BEGIN_CS(dwords); OUT_CS_REG(R300_GA_COLOR_CONTROL, @@ -326,7 +335,7 @@ static void r500_emit_draw_arrays_immediate(struct r300_context *r300, } } -static void r500_emit_draw_arrays(struct r300_context *r300, +static void r300_emit_draw_arrays(struct r300_context *r300, unsigned mode, unsigned count) { @@ -355,7 +364,7 @@ static void r500_emit_draw_arrays(struct r300_context *r300, END_CS; } -static void r500_emit_draw_elements(struct r300_context *r300, +static void r300_emit_draw_elements(struct r300_context *r300, struct pipe_resource* indexBuffer, unsigned indexSize, unsigned minIndex, @@ -413,109 +422,11 @@ static void r500_emit_draw_elements(struct r300_context *r300, (0 << R300_INDX_BUFFER_SKIP_SHIFT)); OUT_CS(offset_dwords << 2); OUT_CS_BUF_RELOC(indexBuffer, count_dwords, - RADEON_GEM_DOMAIN_GTT, 0, 0); - - END_CS; -} + r300_buffer(indexBuffer)->domain, 0, 0); -/***************************************************************************** - * The emission of draw packets for r300 which take care of the two-sided * - * stencil ref fallback and call r500's functions. * - ****************************************************************************/ - -/* Set drawing for front faces. */ -static void r300_begin_stencil_ref_fallback(struct r300_context *r300) -{ - struct r300_rs_state *rs = (struct r300_rs_state*)r300->rs_state.state; - CS_LOCALS(r300); - - BEGIN_CS(2); - OUT_CS_REG(R300_SU_CULL_MODE, rs->cull_mode | R300_CULL_BACK); END_CS; } -/* Set drawing for back faces. */ -static void r300_switch_stencil_ref_side(struct r300_context *r300) -{ - struct r300_rs_state *rs = (struct r300_rs_state*)r300->rs_state.state; - struct r300_dsa_state *dsa = (struct r300_dsa_state*)r300->dsa_state.state; - CS_LOCALS(r300); - - BEGIN_CS(4); - OUT_CS_REG(R300_SU_CULL_MODE, rs->cull_mode | R300_CULL_FRONT); - OUT_CS_REG(R300_ZB_STENCILREFMASK, - dsa->stencil_ref_bf | r300->stencil_ref.ref_value[1]); - END_CS; -} - -/* Restore the original state. */ -static void r300_end_stencil_ref_fallback(struct r300_context *r300) -{ - struct r300_rs_state *rs = (struct r300_rs_state*)r300->rs_state.state; - struct r300_dsa_state *dsa = (struct r300_dsa_state*)r300->dsa_state.state; - CS_LOCALS(r300); - - BEGIN_CS(4); - OUT_CS_REG(R300_SU_CULL_MODE, rs->cull_mode); - OUT_CS_REG(R300_ZB_STENCILREFMASK, - dsa->stencil_ref_mask | r300->stencil_ref.ref_value[0]); - END_CS; -} - -static void r300_emit_draw_arrays_immediate(struct r300_context *r300, - unsigned mode, - unsigned start, - unsigned count) -{ - if (!r300->stencil_ref_bf_fallback) { - r500_emit_draw_arrays_immediate(r300, mode, start, count); - } else { - r300_begin_stencil_ref_fallback(r300); - r500_emit_draw_arrays_immediate(r300, mode, start, count); - r300_switch_stencil_ref_side(r300); - r500_emit_draw_arrays_immediate(r300, mode, start, count); - r300_end_stencil_ref_fallback(r300); - } -} - -static void r300_emit_draw_arrays(struct r300_context *r300, - unsigned mode, - unsigned count) -{ - if (!r300->stencil_ref_bf_fallback) { - r500_emit_draw_arrays(r300, mode, count); - } else { - r300_begin_stencil_ref_fallback(r300); - r500_emit_draw_arrays(r300, mode, count); - r300_switch_stencil_ref_side(r300); - r500_emit_draw_arrays(r300, mode, count); - r300_end_stencil_ref_fallback(r300); - } -} - -static void r300_emit_draw_elements(struct r300_context *r300, - struct pipe_resource* indexBuffer, - unsigned indexSize, - unsigned minIndex, - unsigned maxIndex, - unsigned mode, - unsigned start, - unsigned count) -{ - if (!r300->stencil_ref_bf_fallback) { - r500_emit_draw_elements(r300, indexBuffer, indexSize, - minIndex, maxIndex, mode, start, count); - } else { - r300_begin_stencil_ref_fallback(r300); - r500_emit_draw_elements(r300, indexBuffer, indexSize, - minIndex, maxIndex, mode, start, count); - r300_switch_stencil_ref_side(r300); - r500_emit_draw_elements(r300, indexBuffer, indexSize, - minIndex, maxIndex, mode, start, count); - r300_end_stencil_ref_fallback(r300); - } -} - static void r300_shorten_ubyte_elts(struct r300_context* r300, struct pipe_resource** elts, unsigned start, @@ -619,17 +530,17 @@ static void r300_draw_range_elements(struct pipe_context* pipe, /* 15 dwords for emit_draw_elements */ r300_prepare_for_rendering(r300, PREP_FIRST_DRAW | PREP_VALIDATE_VBOS | PREP_EMIT_AOS | PREP_INDEXED, - indexBuffer, 15, 0, indexBias); + indexBuffer, 15, 0, indexBias, NULL); u_upload_flush(r300->upload_vb); u_upload_flush(r300->upload_ib); if (alt_num_verts || count <= 65535) { - r300->emit_draw_elements(r300, indexBuffer, indexSize, + r300_emit_draw_elements(r300, indexBuffer, indexSize, minIndex, maxIndex, mode, start, count); } else { do { short_count = MIN2(count, 65534); - r300->emit_draw_elements(r300, indexBuffer, indexSize, + r300_emit_draw_elements(r300, indexBuffer, indexSize, minIndex, maxIndex, mode, start, short_count); @@ -640,7 +551,7 @@ static void r300_draw_range_elements(struct pipe_context* pipe, if (count) { r300_prepare_for_rendering(r300, PREP_VALIDATE_VBOS | PREP_EMIT_AOS | PREP_INDEXED, - indexBuffer, 15, 0, indexBias); + indexBuffer, 15, 0, indexBias, NULL); } } while (count); } @@ -683,18 +594,18 @@ static void r300_draw_arrays(struct pipe_context* pipe, unsigned mode, r300_update_derived_state(r300); if (immd_is_good_idea(r300, count)) { - r300->emit_draw_arrays_immediate(r300, mode, start, count); + r300_emit_draw_arrays_immediate(r300, mode, start, count); } else { /* 9 spare dwords for emit_draw_arrays. */ r300_prepare_for_rendering(r300, PREP_FIRST_DRAW | PREP_VALIDATE_VBOS | PREP_EMIT_AOS, - NULL, 9, start, 0); + NULL, 9, start, 0, NULL); if (alt_num_verts || count <= 65535) { - r300->emit_draw_arrays(r300, mode, count); + r300_emit_draw_arrays(r300, mode, count); } else { do { short_count = MIN2(count, 65535); - r300->emit_draw_arrays(r300, mode, short_count); + r300_emit_draw_arrays(r300, mode, short_count); start += short_count; count -= short_count; @@ -703,7 +614,7 @@ static void r300_draw_arrays(struct pipe_context* pipe, unsigned mode, if (count) { r300_prepare_for_rendering(r300, PREP_VALIDATE_VBOS | PREP_EMIT_AOS, NULL, 9, - start, 0); + start, 0, NULL); } } while (count); } @@ -886,6 +797,8 @@ static void* r300_render_map_vertices(struct vbuf_render* render) { struct r300_render* r300render = r300_render(render); + assert(!r300render->vbo_transfer); + r300render->vbo_ptr = pipe_buffer_map(&r300render->r300->context, r300render->vbo, PIPE_TRANSFER_WRITE, @@ -901,9 +814,13 @@ static void r300_render_unmap_vertices(struct vbuf_render* render, struct r300_render* r300render = r300_render(render); struct pipe_context* context = &r300render->r300->context; + assert(r300render->vbo_transfer); + r300render->vbo_max_used = MAX2(r300render->vbo_max_used, r300render->vertex_size * (max + 1)); pipe_buffer_unmap(context, r300render->vbo, r300render->vbo_transfer); + + r300render->vbo_transfer = NULL; } static void r300_render_release_vertices(struct vbuf_render* render) @@ -925,7 +842,7 @@ static boolean r300_render_set_primitive(struct vbuf_render* render, return TRUE; } -static void r500_render_draw_arrays(struct vbuf_render* render, +static void r300_render_draw_arrays(struct vbuf_render* render, unsigned start, unsigned count) { @@ -939,7 +856,8 @@ static void r500_render_draw_arrays(struct vbuf_render* render, (void) i; (void) ptr; - r300_prepare_for_rendering(r300, PREP_FIRST_DRAW, NULL, dwords, 0, 0); + r300_prepare_for_rendering(r300, PREP_FIRST_DRAW | PREP_EMIT_AOS_SWTCL, + NULL, dwords, 0, 0, NULL); DBG(r300, DBG_DRAW, "r300: Doing vbuf render, count %d\n", count); @@ -970,68 +888,59 @@ static void r500_render_draw_arrays(struct vbuf_render* render, END_CS; } -static void r500_render_draw_elements(struct vbuf_render* render, +static void r300_render_draw_elements(struct vbuf_render* render, const ushort* indices, uint count) { struct r300_render* r300render = r300_render(render); struct r300_context* r300 = r300render->r300; int i; - unsigned dwords = 6 + (count+1)/2; + unsigned end_cs_dwords; unsigned max_index = (r300render->vbo_size - r300render->vbo_offset) / (r300render->r300->vertex_info.size * 4) - 1; + unsigned short_count; + struct r300_cs_info cs_info; CS_LOCALS(r300); - r300_prepare_for_rendering(r300, PREP_FIRST_DRAW, NULL, dwords, 0, 0); - - BEGIN_CS(dwords); - OUT_CS_REG(R300_GA_COLOR_CONTROL, - r300_provoking_vertex_fixes(r300, r300render->prim)); - OUT_CS_REG(R300_VAP_VF_MAX_VTX_INDX, max_index); - OUT_CS_PKT3(R300_PACKET3_3D_DRAW_INDX_2, (count+1)/2); - OUT_CS(R300_VAP_VF_CNTL__PRIM_WALK_INDICES | (count << 16) | - r300render->hwprim); - for (i = 0; i < count-1; i += 2) { - OUT_CS(indices[i+1] << 16 | indices[i]); - } - if (count % 2) { - OUT_CS(indices[count-1]); - } - END_CS; -} - -static void r300_render_draw_arrays(struct vbuf_render* render, - unsigned start, - unsigned count) -{ - struct r300_context* r300 = r300_render(render)->r300; - - if (!r300->stencil_ref_bf_fallback) { - r500_render_draw_arrays(render, start, count); - } else { - r300_begin_stencil_ref_fallback(r300); - r500_render_draw_arrays(render, start, count); - r300_switch_stencil_ref_side(r300); - r500_render_draw_arrays(render, start, count); - r300_end_stencil_ref_fallback(r300); - } -} + /* Reserve at least 256 dwords. + * + * Below we manage the CS space manually because there may be more + * indices than it can fit in CS. */ + r300_prepare_for_rendering(r300, + PREP_FIRST_DRAW | PREP_EMIT_AOS_SWTCL | PREP_INDEXED, + NULL, 256, 0, 0, &end_cs_dwords); + + while (count) { + r300->rws->get_cs_info(r300->rws, &cs_info); + + short_count = MIN2(count, (cs_info.free - end_cs_dwords - 6) * 2); + + BEGIN_CS(6 + (short_count+1)/2); + OUT_CS_REG(R300_GA_COLOR_CONTROL, + r300_provoking_vertex_fixes(r300, r300render->prim)); + OUT_CS_REG(R300_VAP_VF_MAX_VTX_INDX, max_index); + OUT_CS_PKT3(R300_PACKET3_3D_DRAW_INDX_2, (short_count+1)/2); + OUT_CS(R300_VAP_VF_CNTL__PRIM_WALK_INDICES | (short_count << 16) | + r300render->hwprim); + for (i = 0; i < short_count-1; i += 2) { + OUT_CS(indices[i+1] << 16 | indices[i]); + } + if (short_count % 2) { + OUT_CS(indices[short_count-1]); + } + END_CS; -static void r300_render_draw_elements(struct vbuf_render* render, - const ushort* indices, - uint count) -{ - struct r300_context* r300 = r300_render(render)->r300; + /* OK now subtract the emitted indices and see if we need to emit + * another draw packet. */ + indices += short_count; + count -= short_count; - if (!r300->stencil_ref_bf_fallback) { - r500_render_draw_elements(render, indices, count); - } else { - r300_begin_stencil_ref_fallback(r300); - r500_render_draw_elements(render, indices, count); - r300_switch_stencil_ref_side(r300); - r500_render_draw_elements(render, indices, count); - r300_end_stencil_ref_fallback(r300); + if (count) { + r300_prepare_for_rendering(r300, + PREP_EMIT_AOS_SWTCL | PREP_INDEXED, + NULL, 256, 0, 0, &end_cs_dwords); + } } } @@ -1055,13 +964,8 @@ static struct vbuf_render* r300_render_create(struct r300_context* r300) r300render->base.map_vertices = r300_render_map_vertices; r300render->base.unmap_vertices = r300_render_unmap_vertices; r300render->base.set_primitive = r300_render_set_primitive; - if (r300->screen->caps.is_r500) { - r300render->base.draw_elements = r500_render_draw_elements; - r300render->base.draw_arrays = r500_render_draw_arrays; - } else { - r300render->base.draw_elements = r300_render_draw_elements; - r300render->base.draw_arrays = r300_render_draw_arrays; - } + r300render->base.draw_elements = r300_render_draw_elements; + r300render->base.draw_arrays = r300_render_draw_arrays; r300render->base.release_vertices = r300_render_release_vertices; r300render->base.destroy = r300_render_destroy; @@ -1095,25 +999,150 @@ struct draw_stage* r300_draw_stage(struct r300_context* r300) return stage; } +/**************************************************************************** + * Two-sided stencil reference value fallback. It's designed to be as much + * separate from rest of the driver as possible. + ***************************************************************************/ + +struct r300_stencilref_context { + void (*draw_arrays)(struct pipe_context *pipe, + unsigned mode, unsigned start, unsigned count); + + void (*draw_range_elements)( + struct pipe_context *pipe, struct pipe_resource *indexBuffer, + unsigned indexSize, int indexBias, unsigned minIndex, unsigned maxIndex, + unsigned mode, unsigned start, unsigned count); + + uint32_t rs_cull_mode; + uint32_t zb_stencilrefmask; + ubyte ref_value_front; +}; + +static boolean r300_stencilref_needed(struct r300_context *r300) +{ + struct r300_dsa_state *dsa = (struct r300_dsa_state*)r300->dsa_state.state; + + return dsa->two_sided_stencil_ref || + (dsa->two_sided && + r300->stencil_ref.ref_value[0] != r300->stencil_ref.ref_value[1]); +} + +/* Set drawing for front faces. */ +static void r300_stencilref_begin(struct r300_context *r300) +{ + struct r300_stencilref_context *sr = r300->stencilref_fallback; + struct r300_rs_state *rs = (struct r300_rs_state*)r300->rs_state.state; + struct r300_dsa_state *dsa = (struct r300_dsa_state*)r300->dsa_state.state; + + /* Save state. */ + sr->rs_cull_mode = rs->cull_mode; + sr->zb_stencilrefmask = dsa->stencil_ref_mask; + sr->ref_value_front = r300->stencil_ref.ref_value[0]; + + /* We *cull* pixels, therefore no need to mask out the bits. */ + rs->cull_mode |= R300_CULL_BACK; + + r300->rs_state.dirty = TRUE; +} + +/* Set drawing for back faces. */ +static void r300_stencilref_switch_side(struct r300_context *r300) +{ + struct r300_stencilref_context *sr = r300->stencilref_fallback; + struct r300_rs_state *rs = (struct r300_rs_state*)r300->rs_state.state; + struct r300_dsa_state *dsa = (struct r300_dsa_state*)r300->dsa_state.state; + + rs->cull_mode = sr->rs_cull_mode | R300_CULL_FRONT; + dsa->stencil_ref_mask = dsa->stencil_ref_bf; + r300->stencil_ref.ref_value[0] = r300->stencil_ref.ref_value[1]; + + r300->rs_state.dirty = TRUE; + r300->dsa_state.dirty = TRUE; +} + +/* Restore the original state. */ +static void r300_stencilref_end(struct r300_context *r300) +{ + struct r300_stencilref_context *sr = r300->stencilref_fallback; + struct r300_rs_state *rs = (struct r300_rs_state*)r300->rs_state.state; + struct r300_dsa_state *dsa = (struct r300_dsa_state*)r300->dsa_state.state; + + /* Restore state. */ + rs->cull_mode = sr->rs_cull_mode; + dsa->stencil_ref_mask = sr->zb_stencilrefmask; + r300->stencil_ref.ref_value[0] = sr->ref_value_front; + + r300->rs_state.dirty = TRUE; + r300->dsa_state.dirty = TRUE; +} + +static void r300_stencilref_draw_arrays(struct pipe_context *pipe, unsigned mode, + unsigned start, unsigned count) +{ + struct r300_context *r300 = r300_context(pipe); + struct r300_stencilref_context *sr = r300->stencilref_fallback; + + if (!r300_stencilref_needed(r300)) { + sr->draw_arrays(pipe, mode, start, count); + } else { + r300_stencilref_begin(r300); + sr->draw_arrays(pipe, mode, start, count); + r300_stencilref_switch_side(r300); + sr->draw_arrays(pipe, mode, start, count); + r300_stencilref_end(r300); + } +} + +static void r300_stencilref_draw_range_elements( + struct pipe_context *pipe, struct pipe_resource *indexBuffer, + unsigned indexSize, int indexBias, unsigned minIndex, unsigned maxIndex, + unsigned mode, unsigned start, unsigned count) +{ + struct r300_context *r300 = r300_context(pipe); + struct r300_stencilref_context *sr = r300->stencilref_fallback; + + if (!r300_stencilref_needed(r300)) { + sr->draw_range_elements(pipe, indexBuffer, indexSize, indexBias, + minIndex, maxIndex, mode, start, count); + } else { + r300_stencilref_begin(r300); + sr->draw_range_elements(pipe, indexBuffer, indexSize, indexBias, + minIndex, maxIndex, mode, start, count); + r300_stencilref_switch_side(r300); + sr->draw_range_elements(pipe, indexBuffer, indexSize, indexBias, + minIndex, maxIndex, mode, start, count); + r300_stencilref_end(r300); + } +} + +static void r300_plug_in_stencil_ref_fallback(struct r300_context *r300) +{ + r300->stencilref_fallback = CALLOC_STRUCT(r300_stencilref_context); + + /* Save original draw functions. */ + r300->stencilref_fallback->draw_arrays = r300->context.draw_arrays; + r300->stencilref_fallback->draw_range_elements = r300->context.draw_range_elements; + + /* Override the draw functions. */ + r300->context.draw_arrays = r300_stencilref_draw_arrays; + r300->context.draw_range_elements = r300_stencilref_draw_range_elements; +} + void r300_init_render_functions(struct r300_context *r300) { + /* Set generic functions. */ + r300->context.draw_elements = r300_draw_elements; + + /* Set draw functions based on presence of HW TCL. */ if (r300->screen->caps.has_tcl) { r300->context.draw_arrays = r300_draw_arrays; - r300->context.draw_elements = r300_draw_elements; r300->context.draw_range_elements = r300_draw_range_elements; - - if (r300->screen->caps.is_r500) { - r300->emit_draw_arrays_immediate = r500_emit_draw_arrays_immediate; - r300->emit_draw_arrays = r500_emit_draw_arrays; - r300->emit_draw_elements = r500_emit_draw_elements; - } else { - r300->emit_draw_arrays_immediate = r300_emit_draw_arrays_immediate; - r300->emit_draw_arrays = r300_emit_draw_arrays; - r300->emit_draw_elements = r300_emit_draw_elements; - } } else { r300->context.draw_arrays = r300_swtcl_draw_arrays; - r300->context.draw_elements = r300_draw_elements; r300->context.draw_range_elements = r300_swtcl_draw_range_elements; } + + /* Plug in two-sided stencil reference value fallback if needed. */ + if (!r300->screen->caps.is_r500) + r300_plug_in_stencil_ref_fallback(r300); } diff --git a/src/gallium/drivers/r300/r300_resource.c b/src/gallium/drivers/r300/r300_resource.c index 7ed47eaa605..f6f33028dc6 100644 --- a/src/gallium/drivers/r300/r300_resource.c +++ b/src/gallium/drivers/r300/r300_resource.c @@ -23,19 +23,9 @@ * Authors: Dave Airlie */ -#include <stdio.h> - -#include "util/u_inlines.h" -#include "util/u_format.h" -#include "util/u_memory.h" -#include "util/u_upload_mgr.h" -#include "util/u_math.h" - #include "r300_context.h" #include "r300_texture.h" -#include "r300_screen.h" #include "r300_screen_buffer.h" -#include "r300_winsys.h" static struct pipe_resource * r300_resource_create(struct pipe_screen *screen, diff --git a/src/gallium/drivers/r300/r300_screen.c b/src/gallium/drivers/r300/r300_screen.c index a5ad6d9c0ef..e7db074be62 100644 --- a/src/gallium/drivers/r300/r300_screen.c +++ b/src/gallium/drivers/r300/r300_screen.c @@ -28,6 +28,7 @@ #include "r300_context.h" #include "r300_texture.h" #include "r300_screen_buffer.h" +#include "r300_state_inlines.h" #include "r300_winsys.h" /* Return the identifier behind whom the brave coders responsible for this @@ -251,6 +252,7 @@ static boolean r300_is_format_supported(struct pipe_screen* screen, uint32_t retval = 0; boolean is_r500 = r300_screen(screen)->caps.is_r500; boolean is_r400 = r300_screen(screen)->caps.is_r400; + boolean is_rv350 = r300_screen(screen)->caps.is_rv350; boolean is_z24 = format == PIPE_FORMAT_X8Z24_UNORM || format == PIPE_FORMAT_S8_USCALED_Z24_UNORM; boolean is_color2101010 = format == PIPE_FORMAT_R10G10B10A2_UNORM || @@ -261,6 +263,10 @@ static boolean r300_is_format_supported(struct pipe_screen* screen, format == PIPE_FORMAT_RGTC1_SNORM; boolean is_ati2n = format == PIPE_FORMAT_RGTC2_UNORM || format == PIPE_FORMAT_RGTC2_SNORM; + boolean is_half_float = format == PIPE_FORMAT_R16_FLOAT || + format == PIPE_FORMAT_R16G16_FLOAT || + format == PIPE_FORMAT_R16G16B16_FLOAT || + format == PIPE_FORMAT_R16G16B16A16_FLOAT; if (target >= PIPE_MAX_TEXTURE_TYPES) { fprintf(stderr, "r300: Implementation error: Received bogus texture " @@ -304,6 +310,14 @@ static boolean r300_is_format_supported(struct pipe_screen* screen, retval |= PIPE_BIND_DEPTH_STENCIL; } + /* Check vertex buffer format support. */ + if (usage & PIPE_BIND_VERTEX_BUFFER && + /* Half float is supported on >= RV350. */ + (is_rv350 || !is_half_float) && + r300_translate_vertex_data_type(format) != R300_INVALID_FORMAT) { + retval |= PIPE_BIND_VERTEX_BUFFER; + } + return retval == usage; } diff --git a/src/gallium/drivers/r300/r300_screen_buffer.c b/src/gallium/drivers/r300/r300_screen_buffer.c index 3e2b5afe6f4..44179f19ed4 100644 --- a/src/gallium/drivers/r300/r300_screen_buffer.c +++ b/src/gallium/drivers/r300/r300_screen_buffer.c @@ -116,19 +116,6 @@ int r300_upload_user_buffers(struct r300_context *r300) return ret; } -static struct r300_winsys_buffer * -r300_winsys_buffer_create(struct r300_screen *r300screen, - unsigned alignment, - unsigned usage, - unsigned size) -{ - struct r300_winsys_screen *rws = r300screen->rws; - struct r300_winsys_buffer *buf; - - buf = rws->buffer_create(rws, alignment, usage, size); - return buf; -} - static void r300_winsys_buffer_destroy(struct r300_screen *r300screen, struct r300_buffer *rbuf) { @@ -180,9 +167,9 @@ r300_buffer_transfer_map( struct pipe_context *pipe, rws->buffer_reference(rws, &rbuf->buf, NULL); rbuf->num_ranges = 0; - rbuf->buf = r300_winsys_buffer_create(r300screen, - 16, - rbuf->b.b.bind, /* XXX */ + rbuf->buf = r300screen->rws->buffer_create(r300screen->rws, 16, + rbuf->b.b.bind, + rbuf->domain, rbuf->b.b.width0); break; } @@ -278,9 +265,12 @@ struct pipe_resource *r300_buffer_create(struct pipe_screen *screen, if (rbuf->b.b.bind & R300_BIND_OQBO) alignment = 4096; - rbuf->buf = r300_winsys_buffer_create(r300screen, + rbuf->domain = R300_DOMAIN_GTT; + + rbuf->buf = r300screen->rws->buffer_create(r300screen->rws, alignment, rbuf->b.b.bind, + rbuf->domain, rbuf->b.b.width0); if (!rbuf->buf) @@ -315,6 +305,7 @@ struct pipe_resource *r300_user_buffer_create(struct pipe_screen *screen, rbuf->b.b.width0 = bytes; rbuf->b.b.height0 = 1; rbuf->b.b.depth0 = 1; + rbuf->domain = R300_DOMAIN_GTT; rbuf->user_buffer = ptr; return &rbuf->b.b; diff --git a/src/gallium/drivers/r300/r300_screen_buffer.h b/src/gallium/drivers/r300/r300_screen_buffer.h index 57f48229b2e..87b42b94122 100644 --- a/src/gallium/drivers/r300/r300_screen_buffer.h +++ b/src/gallium/drivers/r300/r300_screen_buffer.h @@ -52,6 +52,8 @@ struct r300_buffer struct r300_winsys_buffer *buf; + enum r300_buffer_domain domain; + void *user_buffer; struct r300_buffer_range ranges[R300_BUFFER_MAX_RANGES]; unsigned num_ranges; @@ -116,7 +118,9 @@ static INLINE boolean r300_add_texture(struct r300_winsys_screen *rws, static INLINE void r300_buffer_write_reloc(struct r300_winsys_screen *rws, struct r300_buffer *buf, - uint32_t rd, uint32_t wd, uint32_t flags) + enum r300_buffer_domain rd, + enum r300_buffer_domain wd, + uint32_t flags) { if (!buf->buf) return; @@ -126,7 +130,9 @@ static INLINE void r300_buffer_write_reloc(struct r300_winsys_screen *rws, static INLINE void r300_texture_write_reloc(struct r300_winsys_screen *rws, struct r300_texture *texture, - uint32_t rd, uint32_t wd, uint32_t flags) + enum r300_buffer_domain rd, + enum r300_buffer_domain wd, + uint32_t flags) { rws->write_cs_reloc(rws, texture->buffer, rd, wd, flags); } diff --git a/src/gallium/drivers/r300/r300_state.c b/src/gallium/drivers/r300/r300_state.c index 67e09362d8e..cfec8ac2b7e 100644 --- a/src/gallium/drivers/r300/r300_state.c +++ b/src/gallium/drivers/r300/r300_state.c @@ -474,7 +474,7 @@ static void* if (caps->is_r500) { dsa->z_buffer_control |= R500_STENCIL_REFMASK_FRONT_BACK; } else { - dsa->stencil_ref_bf_fallback = + dsa->two_sided_stencil_ref = (state->stencil[0].valuemask != state->stencil[1].valuemask || state->stencil[0].writemask != state->stencil[1].writemask); } @@ -497,20 +497,6 @@ static void* return (void*)dsa; } -static void r300_update_stencil_ref_fallback_status(struct r300_context *r300) -{ - struct r300_dsa_state *dsa = (struct r300_dsa_state*)r300->dsa_state.state; - - if (r300->screen->caps.is_r500) { - return; - } - - r300->stencil_ref_bf_fallback = - dsa->stencil_ref_bf_fallback || - (dsa->two_sided && - r300->stencil_ref.ref_value[0] != r300->stencil_ref.ref_value[1]); -} - /* Bind DSA state. */ static void r300_bind_dsa_state(struct pipe_context* pipe, void* state) @@ -522,8 +508,6 @@ static void r300_bind_dsa_state(struct pipe_context* pipe, } UPDATE_STATE(state, r300->dsa_state); - - r300_update_stencil_ref_fallback_status(r300); } /* Free DSA state. */ @@ -540,8 +524,6 @@ static void r300_set_stencil_ref(struct pipe_context* pipe, r300->stencil_ref = *sr; r300->dsa_state.dirty = TRUE; - - r300_update_stencil_ref_fallback_status(r300); } /* This switcheroo is needed just because of goddamned MACRO_SWITCH. */ @@ -558,18 +540,18 @@ static void r300_fb_set_tiling_flags(struct r300_context *r300, level = new_state->cbufs[i]->level; r300->rws->buffer_set_tiling(r300->rws, tex->buffer, - tex->pitch[0], - tex->microtile, - tex->mip_macrotile[level]); + tex->pitch[0] * util_format_get_blocksize(tex->b.b.format), + tex->microtile, + tex->mip_macrotile[level]); } if (new_state->zsbuf) { tex = r300_texture(new_state->zsbuf->texture); level = new_state->zsbuf->level; r300->rws->buffer_set_tiling(r300->rws, tex->buffer, - tex->pitch[0], - tex->microtile, - tex->mip_macrotile[level]); + tex->pitch[0] * util_format_get_blocksize(tex->b.b.format), + tex->microtile, + tex->mip_macrotile[level]); } } @@ -1183,48 +1165,61 @@ static void r300_set_vertex_buffers(struct pipe_context* pipe, return; } - /* Check if the stride is aligned to the size of DWORD. */ - for (i = 0; i < count; i++) { - if (buffers[i].buffer) { - if (buffers[i].stride % 4 != 0) { - // XXX Shouldn't we align the buffer? - fprintf(stderr, "r300: set_vertex_buffers: " - "Unaligned buffer stride %i isn't supported.\n", - buffers[i].stride); - abort(); + if (r300->screen->caps.has_tcl) { + /* HW TCL. */ + /* Check if the stride is aligned to the size of DWORD. */ + for (i = 0; i < count; i++) { + if (buffers[i].buffer) { + if (buffers[i].stride % 4 != 0) { + // XXX Shouldn't we align the buffer? + fprintf(stderr, "r300: set_vertex_buffers: " + "Unaligned buffer stride %i isn't supported.\n", + buffers[i].stride); + abort(); + } } } - } - for (i = 0; i < count; i++) { - /* Why, yes, I AM casting away constness. How did you know? */ - vbo = (struct pipe_vertex_buffer*)&buffers[i]; + for (i = 0; i < count; i++) { + /* Why, yes, I AM casting away constness. How did you know? */ + vbo = (struct pipe_vertex_buffer*)&buffers[i]; - /* Reference our buffer. */ - pipe_resource_reference(&r300->vertex_buffer[i].buffer, vbo->buffer); + /* Skip NULL buffers */ + if (!buffers[i].buffer) { + continue; + } - /* Skip NULL buffers */ - if (!buffers[i].buffer) { - continue; - } + if (r300_buffer_is_user_buffer(vbo->buffer)) { + any_user_buffer = TRUE; + } - if (r300_buffer_is_user_buffer(vbo->buffer)) { - any_user_buffer = TRUE; - } + if (vbo->max_index == ~0) { + /* if no VBO stride then only one vertex value so max index is 1 */ + /* should think about converting to VS constants like svga does */ + if (!vbo->stride) + vbo->max_index = 1; + else + vbo->max_index = + (vbo->buffer->width0 - vbo->buffer_offset) / vbo->stride; + } - if (vbo->max_index == ~0) { - /* if no VBO stride then only one vertex value so max index is 1 */ - /* should think about converting to VS constants like svga does */ - if (!vbo->stride) - vbo->max_index = 1; - else - vbo->max_index = - (vbo->buffer->width0 - vbo->buffer_offset) / vbo->stride; + max_index = MIN2(vbo->max_index, max_index); } - max_index = MIN2(vbo->max_index, max_index); + r300->any_user_vbs = any_user_buffer; + r300->vertex_buffer_max_index = max_index; + + } else { + /* SW TCL. */ + draw_flush(r300->draw); + draw_set_vertex_buffers(r300->draw, count, buffers); } + /* Common code. */ + for (i = 0; i < count; i++) { + /* Reference our buffer. */ + pipe_resource_reference(&r300->vertex_buffer[i].buffer, buffers[i].buffer); + } for (; i < r300->vertex_buffer_count; i++) { /* Dereference any old buffers. */ pipe_resource_reference(&r300->vertex_buffer[i].buffer, NULL); @@ -1232,15 +1227,7 @@ static void r300_set_vertex_buffers(struct pipe_context* pipe, memcpy(r300->vertex_buffer, buffers, sizeof(struct pipe_vertex_buffer) * count); - r300->vertex_buffer_count = count; - r300->vertex_buffer_max_index = max_index; - r300->any_user_vbs = any_user_buffer; - - if (r300->draw) { - draw_flush(r300->draw); - draw_set_vertex_buffers(r300->draw, count, buffers); - } } /* Initialize the PSC tables. */ @@ -1263,8 +1250,15 @@ static void r300_vertex_psc(struct r300_vertex_element_state *velems) for (i = 0; i < velems->count; i++) { format = velems->velem[i].src_format; - type = r300_translate_vertex_data_type(format) | - (i << R300_DST_VEC_LOC_SHIFT); + type = r300_translate_vertex_data_type(format); + if (type == R300_INVALID_FORMAT) { + fprintf(stderr, "r300: Bad vertex format %s.\n", + util_format_short_name(format)); + assert(0); + abort(); + } + + type |= i << R300_DST_VEC_LOC_SHIFT; swizzle = r300_translate_vertex_data_swizzle(format); if (i & 1) { @@ -1425,12 +1419,11 @@ static void* r300_create_vs_state(struct pipe_context* pipe, vs->state = *shader; vs->state.tokens = tgsi_dup_tokens(shader->tokens); - r300_init_vs_outputs(vs); - if (r300->screen->caps.has_tcl) { + r300_init_vs_outputs(vs); r300_translate_vertex_shader(r300, vs); } else { - vs->draw_vs = draw_create_vertex_shader(r300->draw, shader); + r300_draw_init_vertex_shader(r300->draw, vs); } return vs; diff --git a/src/gallium/drivers/r300/r300_state_derived.c b/src/gallium/drivers/r300/r300_state_derived.c index 7583862a1a4..cc75fad3bb9 100644 --- a/src/gallium/drivers/r300/r300_state_derived.c +++ b/src/gallium/drivers/r300/r300_state_derived.c @@ -90,7 +90,13 @@ static void r300_draw_emit_all_attribs(struct r300_context* r300) } } - /* XXX Back-face colors. */ + /* Back-face colors. */ + for (i = 0; i < ATTR_COLOR_COUNT; i++) { + if (vs_outputs->bcolor[i] != ATTR_UNUSED) { + r300_draw_emit_attrib(r300, EMIT_4F, INTERP_LINEAR, + vs_outputs->bcolor[i]); + } + } /* Texture coordinates. */ /* Only 8 generic vertex attributes can be used. If there are more, @@ -110,6 +116,14 @@ static void r300_draw_emit_all_attribs(struct r300_context* r300) vs_outputs->fog); gen_count++; } + + /* WPOS. */ + if (r300_fs(r300)->shader->inputs.wpos != ATTR_UNUSED && gen_count < 8) { + DBG(r300, DBG_DRAW, "draw_emit_attrib: WPOS, index: %i\n", + vs_outputs->wpos); + r300_draw_emit_attrib(r300, EMIT_4F, INTERP_PERSPECTIVE, + vs_outputs->wpos); + } } /* Update the PSC tables for SW TCL, using Draw. */ @@ -129,7 +143,7 @@ static void r300_swtcl_vertex_psc(struct r300_context *r300) attrib_count = vinfo->num_attribs; DBG(r300, DBG_DRAW, "r300: attrib count: %d\n", attrib_count); for (i = 0; i < attrib_count; i++) { - DBG(r300, DBG_DRAW, "r300: attrib: offset %d, interp %d, size %d," + DBG(r300, DBG_DRAW, "r300: attrib: index %d, interp %d, emit %d," " vs_output_tab %d\n", vinfo->attrib[i].src_index, vinfo->attrib[i].interp_mode, vinfo->attrib[i].emit, vs_output_tab[i]); @@ -140,8 +154,15 @@ static void r300_swtcl_vertex_psc(struct r300_context *r300) format = draw_translate_vinfo_format(vinfo->attrib[i].emit); /* Obtain the type of data in this attribute. */ - type = r300_translate_vertex_data_type(format) | - vs_output_tab[i] << R300_DST_VEC_LOC_SHIFT; + type = r300_translate_vertex_data_type(format); + if (type == R300_INVALID_FORMAT) { + fprintf(stderr, "r300: Bad vertex format %s.\n", + util_format_short_name(format)); + assert(0); + abort(); + } + + type |= vs_output_tab[i] << R300_DST_VEC_LOC_SHIFT; /* Obtain the swizzle for this attribute. Note that the default * swizzle in the hardware is not XYZW! */ diff --git a/src/gallium/drivers/r300/r300_state_inlines.h b/src/gallium/drivers/r300/r300_state_inlines.h index 715601042fa..03ec127ff79 100644 --- a/src/gallium/drivers/r300/r300_state_inlines.h +++ b/src/gallium/drivers/r300/r300_state_inlines.h @@ -348,20 +348,17 @@ static INLINE uint32_t r300_translate_gb_pipes(int pipe_count) switch (pipe_count) { case 1: return R300_GB_TILE_PIPE_COUNT_RV300; - break; case 2: return R300_GB_TILE_PIPE_COUNT_R300; - break; case 3: return R300_GB_TILE_PIPE_COUNT_R420_3P; - break; case 4: return R300_GB_TILE_PIPE_COUNT_R420; - break; } return 0; } + /* Translate pipe_formats into PSC vertex types. */ static INLINE uint16_t r300_translate_vertex_data_type(enum pipe_format format) { @@ -371,10 +368,7 @@ r300_translate_vertex_data_type(enum pipe_format format) { desc = util_format_description(format); if (desc->layout != UTIL_FORMAT_LAYOUT_PLAIN) { - fprintf(stderr, "r300: Bad format %s in %s:%d\n", util_format_short_name(format), - __FUNCTION__, __LINE__); - assert(0); - abort(); + return R300_INVALID_FORMAT; } switch (desc->channel[0].type) { @@ -382,7 +376,7 @@ r300_translate_vertex_data_type(enum pipe_format format) { case UTIL_FORMAT_TYPE_FLOAT: switch (desc->channel[0].size) { case 16: - /* XXX Supported only on RV350 and later. */ + /* Supported only on RV350 and later. */ if (desc->nr_channels > 2) { result = R300_DATA_TYPE_FLT16_4; } else { @@ -393,10 +387,7 @@ r300_translate_vertex_data_type(enum pipe_format format) { result = R300_DATA_TYPE_FLOAT_1 + (desc->nr_channels - 1); break; default: - fprintf(stderr, "r300: Bad format %s in %s:%d\n", - util_format_short_name(format), __FUNCTION__, __LINE__); - assert(0); - abort(); + return R300_INVALID_FORMAT; } break; /* Unsigned ints */ @@ -415,19 +406,11 @@ r300_translate_vertex_data_type(enum pipe_format format) { } break; default: - fprintf(stderr, "r300: Bad format %s in %s:%d\n", - util_format_short_name(format), __FUNCTION__, __LINE__); - fprintf(stderr, "r300: desc->channel[0].size == %d\n", - desc->channel[0].size); - assert(0); - abort(); + return R300_INVALID_FORMAT; } break; default: - fprintf(stderr, "r300: Bad format %s in %s:%d\n", - util_format_short_name(format), __FUNCTION__, __LINE__); - assert(0); - abort(); + return R300_INVALID_FORMAT; } if (desc->channel[0].type == UTIL_FORMAT_TYPE_SIGNED) { diff --git a/src/gallium/drivers/r300/r300_texture.c b/src/gallium/drivers/r300/r300_texture.c index 4a5c932b7e8..4823755fb7a 100644 --- a/src/gallium/drivers/r300/r300_texture.c +++ b/src/gallium/drivers/r300/r300_texture.c @@ -696,7 +696,7 @@ static boolean r300_texture_macro_switch(struct r300_texture *tex, unsigned r300_texture_get_stride(struct r300_screen* screen, struct r300_texture* tex, unsigned level) { - unsigned tile_width, width; + unsigned tile_width, width, stride; if (tex->stride_override) return tex->stride_override; @@ -715,7 +715,19 @@ unsigned r300_texture_get_stride(struct r300_screen* screen, tex->mip_macrotile[level]); width = align(width, tile_width); - return util_format_get_stride(tex->b.b.format, width); + stride = util_format_get_stride(tex->b.b.format, width); + + /* Some IGPs need a minimum stride of 64 bytes, hmm... + * This doesn't seem to apply to tiled textures, according to r300c. */ + if (!tex->microtile && !tex->mip_macrotile[level] && + (screen->caps.family == CHIP_FAMILY_RS600 || + screen->caps.family == CHIP_FAMILY_RS690 || + screen->caps.family == CHIP_FAMILY_RS740)) { + return stride < 64 ? 64 : stride; + } + + /* The alignment to 32 bytes is sort of implied by the layout... */ + return stride; } else { return align(util_format_get_stride(tex->b.b.format, width), 32); } @@ -959,13 +971,16 @@ struct pipe_resource* r300_texture_create(struct pipe_screen* screen, base->width0, base->height0, base->depth0, base->last_level, util_format_short_name(base->format)); - tex->buffer = rws->buffer_create(rws, 2048, - base->bind, - tex->size); + tex->domain = base->flags & R300_RESOURCE_FLAG_TRANSFER ? R300_DOMAIN_GTT : + R300_DOMAIN_VRAM; + + tex->buffer = rws->buffer_create(rws, 2048, base->bind, tex->domain, + tex->size); + rws->buffer_set_tiling(rws, tex->buffer, - tex->pitch[0], - tex->microtile, - tex->macrotile); + tex->pitch[0] * util_format_get_blocksize(tex->b.b.format), + tex->microtile, + tex->macrotile); if (!tex->buffer) { FREE(tex); @@ -1048,6 +1063,7 @@ r300_texture_from_handle(struct pipe_screen* screen, tex->b.vtbl = &r300_texture_vtbl; pipe_reference_init(&tex->b.b.reference, 1); tex->b.b.screen = screen; + tex->domain = R300_DOMAIN_VRAM; tex->stride_override = stride; @@ -1093,9 +1109,9 @@ r300_texture_from_handle(struct pipe_screen* screen, if (override_zb_flags) { rws->buffer_set_tiling(rws, tex->buffer, - tex->pitch[0], - tex->microtile, - tex->macrotile); + tex->pitch[0] * util_format_get_blocksize(tex->b.b.format), + tex->microtile, + tex->macrotile); } return (struct pipe_resource*)tex; } diff --git a/src/gallium/drivers/r300/r300_texture.h b/src/gallium/drivers/r300/r300_texture.h index 2d8f0e14393..ff640c56eed 100644 --- a/src/gallium/drivers/r300/r300_texture.h +++ b/src/gallium/drivers/r300/r300_texture.h @@ -26,6 +26,7 @@ #include "util/u_format.h" struct r300_texture; +struct r300_screen; unsigned r300_get_swizzle_combined(const unsigned char *swizzle_format, const unsigned char *swizzle_view); diff --git a/src/gallium/drivers/r300/r300_transfer.c b/src/gallium/drivers/r300/r300_transfer.c index beb321cb238..0d88d745c07 100644 --- a/src/gallium/drivers/r300/r300_transfer.c +++ b/src/gallium/drivers/r300/r300_transfer.c @@ -21,12 +21,9 @@ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE * USE OR OTHER DEALINGS IN THE SOFTWARE. */ -#include "r300_context.h" #include "r300_transfer.h" #include "r300_texture.h" -#include "r300_screen.h" - -#include "r300_winsys.h" +#include "r300_screen_buffer.h" #include "util/u_memory.h" #include "util/u_format.h" @@ -40,9 +37,6 @@ struct r300_transfer { /* Detiled texture. */ struct r300_texture *detiled_texture; - - /* Transfer and format flags. */ - unsigned render_target_usage; }; /* Convenience cast wrapper. */ @@ -63,11 +57,22 @@ static void r300_copy_from_tiled_texture(struct pipe_context *ctx, subdst.face = 0; subdst.level = 0; + /* XXX if we don't flush before copying the texture and mapping it, + * we get wrong pixels, i.e. it's like latest draw calls didn't happen, + * including this blit. Tests: e.g. piglit/provoking-vertex + * + * Since the flush immediately before mapping is implicit (the buffer is + * always referenced in resource_copy_region), every read transfer costs + * 2 flushes. That sucks. */ + ctx->flush(ctx, 0, NULL); + ctx->resource_copy_region(ctx, &r300transfer->detiled_texture->b.b, subdst, 0, 0, 0, tex, transfer->sr, transfer->box.x, transfer->box.y, transfer->box.z, transfer->box.width, transfer->box.height); + + /* Flushing after the copy is implicit, issued by winsys. */ } /* Copy a detiled texture to a tiled one. */ @@ -81,11 +86,6 @@ static void r300_copy_into_tiled_texture(struct pipe_context *ctx, subsrc.face = 0; subsrc.level = 0; - /* XXX this flush prevents the following DRM error from occuring: - * [drm:radeon_cs_ioctl] *ERROR* Failed to parse relocation ! - * Reproducible with perf/copytex. */ - ctx->flush(ctx, 0, NULL); - ctx->resource_copy_region(ctx, tex, transfer->sr, transfer->box.x, transfer->box.y, transfer->box.z, &r300transfer->detiled_texture->b.b, subsrc, @@ -107,12 +107,16 @@ r300_texture_get_transfer(struct pipe_context *ctx, struct r300_screen *r300screen = r300_screen(ctx->screen); struct r300_transfer *trans; struct pipe_resource base; + boolean referenced_cs, referenced_hw; - /* XXX Why aren't flushes taken care of by winsys automatically? - * Winsys seems to sometimes return a cached buffer instead of - * a mapped hardware buffer if this flush is commented out. */ - if (ctx->is_resource_referenced(ctx, texture, sr.face, sr.level)) - ctx->flush(ctx, PIPE_FLUSH_RENDER_CACHE, NULL); + referenced_cs = r300screen->rws->is_buffer_referenced( + r300screen->rws, tex->buffer, R300_REF_CS); + if (referenced_cs) { + referenced_hw = TRUE; + } else { + referenced_hw = r300screen->rws->is_buffer_referenced( + r300screen->rws, tex->buffer, R300_REF_HW); + } trans = CALLOC_STRUCT(r300_transfer); if (trans) { @@ -123,13 +127,10 @@ r300_texture_get_transfer(struct pipe_context *ctx, trans->transfer.box = *box; /* If the texture is tiled, we must create a temporary detiled texture - * for this transfer. */ - if (tex->microtile || tex->macrotile) { - trans->render_target_usage = - util_format_is_depth_or_stencil(texture->format) ? - PIPE_BIND_DEPTH_STENCIL : - PIPE_BIND_RENDER_TARGET; - + * for this transfer. + * Also make write transfers pipelined. */ + if (tex->microtile || tex->macrotile || + (referenced_hw & !(usage & PIPE_TRANSFER_READ))) { base.target = PIPE_TEXTURE_2D; base.format = texture->format; base.width0 = box->width; @@ -144,7 +145,7 @@ r300_texture_get_transfer(struct pipe_context *ctx, /* For texture reading, the temporary (detiled) texture is used as * a render target when blitting from a tiled texture. */ if (usage & PIPE_TRANSFER_READ) { - base.bind |= trans->render_target_usage; + base.bind |= PIPE_BIND_RENDER_TARGET; } /* For texture writing, the temporary texture is used as a sampler * when blitting into a tiled texture. */ @@ -174,11 +175,17 @@ r300_texture_get_transfer(struct pipe_context *ctx, /* We cannot map a tiled texture directly because the data is * in a different order, therefore we do detiling using a blit. */ r300_copy_from_tiled_texture(ctx, trans); + + /* Always referenced in the blit. */ + ctx->flush(ctx, 0, NULL); } } else { trans->transfer.stride = r300_texture_get_stride(r300screen, tex, sr.level); trans->offset = r300_texture_get_offset(tex, sr.level, box->z, sr.face); + + if (referenced_cs && (usage & PIPE_TRANSFER_READ)) + ctx->flush(ctx, PIPE_FLUSH_RENDER_CACHE, NULL); } } return &trans->transfer; @@ -244,4 +251,3 @@ void r300_texture_transfer_unmap(struct pipe_context *ctx, rws->buffer_unmap(rws, tex->buffer); } } - diff --git a/src/gallium/drivers/r300/r300_transfer.h b/src/gallium/drivers/r300/r300_transfer.h index d72e54e5ed9..0d32a68d1fa 100644 --- a/src/gallium/drivers/r300/r300_transfer.h +++ b/src/gallium/drivers/r300/r300_transfer.h @@ -24,7 +24,7 @@ #ifndef R300_TRANSFER #define R300_TRANSFER -#include "pipe/p_screen.h" +#include "pipe/p_context.h" struct r300_context; diff --git a/src/gallium/drivers/r300/r300_vs.h b/src/gallium/drivers/r300/r300_vs.h index 31890d78caf..170de6c79db 100644 --- a/src/gallium/drivers/r300/r300_vs.h +++ b/src/gallium/drivers/r300/r300_vs.h @@ -60,4 +60,8 @@ void r300_init_vs_outputs(struct r300_vertex_shader *vs); void r300_translate_vertex_shader(struct r300_context *r300, struct r300_vertex_shader *vs); + +void r300_draw_init_vertex_shader(struct draw_context *draw, + struct r300_vertex_shader *vs); + #endif /* R300_VS_H */ diff --git a/src/gallium/drivers/r300/r300_vs_draw.c b/src/gallium/drivers/r300/r300_vs_draw.c new file mode 100644 index 00000000000..d64040b8911 --- /dev/null +++ b/src/gallium/drivers/r300/r300_vs_draw.c @@ -0,0 +1,358 @@ +/************************************************************************** + * + * Copyright 2009 Marek Olšák <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* This file contains the vertex shader tranformations for SW TCL needed + * to overcome the limitations of the r300 rasterizer. + * + * Transformations: + * 1) If the secondary color output is present, the primary color must be + * inserted before it. + * 2) If any back-face color output is present, there must be all 4 color + * outputs and missing ones must be inserted. + * 3) Insert a trailing texcoord output containing a copy of POS, for WPOS. + * + * I know this code is cumbersome, but I don't know of any nicer way + * of transforming TGSI shaders. ~ M. + */ + +#include "r300_vs.h" + +#include <stdio.h> + +#include "tgsi/tgsi_transform.h" +#include "tgsi/tgsi_dump.h" + +#include "draw/draw_context.h" + +struct vs_transform_context { + struct tgsi_transform_context base; + + boolean color_used[2]; + boolean bcolor_used[2]; + boolean temp_used[128]; + + /* Index of the pos output, typically 0. */ + unsigned pos_output; + /* Index of the pos temp where all writes of pos are redirected to. */ + unsigned pos_temp; + /* The index of the last generic output, after which we insert a new + * output for WPOS. */ + int last_generic; + + unsigned num_outputs; + /* Used to shift output decl. indices when inserting new ones. */ + unsigned decl_shift; + /* Used to remap writes to output decls if their indices changed. */ + unsigned out_remap[32]; + + /* First instruction processed? */ + boolean first_instruction; + /* End instruction processed? */ + boolean end_instruction; +}; + +static void emit_temp(struct tgsi_transform_context *ctx, unsigned reg) +{ + struct tgsi_full_declaration decl; + + decl = tgsi_default_full_declaration(); + decl.Declaration.File = TGSI_FILE_TEMPORARY; + decl.Range.First = decl.Range.Last = reg; + ctx->emit_declaration(ctx, &decl); +} + +static void emit_output(struct tgsi_transform_context *ctx, + unsigned name, unsigned index, unsigned interp, + unsigned reg) +{ + struct vs_transform_context *vsctx = (struct vs_transform_context *)ctx; + struct tgsi_full_declaration decl; + + decl = tgsi_default_full_declaration(); + decl.Declaration.File = TGSI_FILE_OUTPUT; + decl.Declaration.Interpolate = interp; + decl.Declaration.Semantic = TRUE; + decl.Semantic.Name = name; + decl.Semantic.Index = index; + decl.Range.First = decl.Range.Last = reg; + ctx->emit_declaration(ctx, &decl); + ++vsctx->num_outputs; +} + +static void insert_output(struct tgsi_transform_context *ctx, + struct tgsi_full_declaration *before, + unsigned name, unsigned index, unsigned interp) +{ + struct vs_transform_context *vsctx = (struct vs_transform_context *)ctx; + unsigned i; + + /* Make a place for the new output. */ + for (i = before->Range.First; i < Elements(vsctx->out_remap); i++) { + ++vsctx->out_remap[i]; + } + + /* Insert the new output. */ + emit_output(ctx, name, index, interp, before->Range.First); + + ++vsctx->decl_shift; +} + +static void insert_trailing_bcolor(struct tgsi_transform_context *ctx, + struct tgsi_full_declaration *before) +{ + struct vs_transform_context *vsctx = (struct vs_transform_context *)ctx; + + /* If BCOLOR0 is used, make sure BCOLOR1 is present too. Otherwise + * the rasterizer doesn't do the color selection correctly. */ + if (vsctx->bcolor_used[0] && !vsctx->bcolor_used[1]) { + if (before) { + insert_output(ctx, before, TGSI_SEMANTIC_BCOLOR, 1, + TGSI_INTERPOLATE_LINEAR); + } else { + emit_output(ctx, TGSI_SEMANTIC_BCOLOR, 1, + TGSI_INTERPOLATE_LINEAR, vsctx->num_outputs); + } + vsctx->bcolor_used[1] = TRUE; + } +} + +static void transform_decl(struct tgsi_transform_context *ctx, + struct tgsi_full_declaration *decl) +{ + struct vs_transform_context *vsctx = (struct vs_transform_context *)ctx; + unsigned i; + + if (decl->Declaration.File == TGSI_FILE_OUTPUT) { + switch (decl->Semantic.Name) { + case TGSI_SEMANTIC_POSITION: + vsctx->pos_output = decl->Range.First; + break; + + case TGSI_SEMANTIC_COLOR: + assert(decl->Semantic.Index < 2); + vsctx->color_used[decl->Semantic.Index] = TRUE; + + /* We must rasterize the first color if the second one is + * used, otherwise the rasterizer doesn't do the color + * selection correctly. Declare it, but don't write to it. */ + if (decl->Semantic.Index == 1 && !vsctx->color_used[0]) { + insert_output(ctx, decl, TGSI_SEMANTIC_COLOR, 0, + TGSI_INTERPOLATE_LINEAR); + vsctx->color_used[0] = TRUE; + } + break; + + case TGSI_SEMANTIC_BCOLOR: + assert(decl->Semantic.Index < 2); + vsctx->bcolor_used[decl->Semantic.Index] = TRUE; + + /* We must rasterize all 4 colors if back-face colors are + * used, otherwise the rasterizer doesn't do the color + * selection correctly. Declare it, but don't write to it. */ + if (!vsctx->color_used[0]) { + insert_output(ctx, decl, TGSI_SEMANTIC_COLOR, 0, + TGSI_INTERPOLATE_LINEAR); + vsctx->color_used[0] = TRUE; + } + if (!vsctx->color_used[1]) { + insert_output(ctx, decl, TGSI_SEMANTIC_COLOR, 1, + TGSI_INTERPOLATE_LINEAR); + vsctx->color_used[1] = TRUE; + } + if (decl->Semantic.Index == 1 && !vsctx->bcolor_used[0]) { + insert_output(ctx, decl, TGSI_SEMANTIC_BCOLOR, 0, + TGSI_INTERPOLATE_LINEAR); + vsctx->color_used[2] = TRUE; + } + /* One more case is handled in insert_trailing_bcolor. */ + break; + + case TGSI_SEMANTIC_GENERIC: + vsctx->last_generic = MAX2(vsctx->last_generic, decl->Semantic.Index); + break; + } + + if (decl->Semantic.Name != TGSI_SEMANTIC_BCOLOR) { + /* Insert it as soon as possible. */ + insert_trailing_bcolor(ctx, decl); + } + + /* Since we're inserting new outputs in between, the following outputs + * should be moved to the right so that they don't overlap with + * the newly added ones. */ + decl->Range.First += vsctx->decl_shift; + decl->Range.Last += vsctx->decl_shift; + + ++vsctx->num_outputs; + } else if (decl->Declaration.File == TGSI_FILE_TEMPORARY) { + for (i = decl->Range.First; i <= decl->Range.Last; i++) { + vsctx->temp_used[i] = TRUE; + } + } + + ctx->emit_declaration(ctx, decl); +} + +static void transform_inst(struct tgsi_transform_context *ctx, + struct tgsi_full_instruction *inst) +{ + struct vs_transform_context *vsctx = (struct vs_transform_context *) ctx; + struct tgsi_full_instruction new_inst; + unsigned i; + + if (!vsctx->first_instruction) { + vsctx->first_instruction = TRUE; + + /* The trailing BCOLOR should be inserted before the code + * if it hasn't already been done so. */ + insert_trailing_bcolor(ctx, NULL); + + /* Insert the generic output for WPOS. */ + emit_output(ctx, TGSI_SEMANTIC_GENERIC, vsctx->last_generic + 1, + TGSI_INTERPOLATE_PERSPECTIVE, vsctx->num_outputs); + + /* Find a free temp for POSITION. */ + for (i = 0; i < Elements(vsctx->temp_used); i++) { + if (!vsctx->temp_used[i]) { + emit_temp(ctx, i); + vsctx->pos_temp = i; + break; + } + } + } + + if (inst->Instruction.Opcode == TGSI_OPCODE_END) { + /* MOV OUT[pos_output], TEMP[pos_temp]; */ + new_inst = tgsi_default_full_instruction(); + new_inst.Instruction.Opcode = TGSI_OPCODE_MOV; + new_inst.Instruction.NumDstRegs = 1; + new_inst.Dst[0].Register.File = TGSI_FILE_OUTPUT; + new_inst.Dst[0].Register.Index = vsctx->pos_output; + new_inst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_XYZW; + new_inst.Instruction.NumSrcRegs = 1; + new_inst.Src[0].Register.File = TGSI_FILE_TEMPORARY; + new_inst.Src[0].Register.Index = vsctx->pos_temp; + ctx->emit_instruction(ctx, &new_inst); + + /* MOV OUT[n-1], TEMP[pos_temp]; */ + new_inst = tgsi_default_full_instruction(); + new_inst.Instruction.Opcode = TGSI_OPCODE_MOV; + new_inst.Instruction.NumDstRegs = 1; + new_inst.Dst[0].Register.File = TGSI_FILE_OUTPUT; + new_inst.Dst[0].Register.Index = vsctx->num_outputs - 1; + new_inst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_XYZW; + new_inst.Instruction.NumSrcRegs = 1; + new_inst.Src[0].Register.File = TGSI_FILE_TEMPORARY; + new_inst.Src[0].Register.Index = vsctx->pos_temp; + ctx->emit_instruction(ctx, &new_inst); + + vsctx->end_instruction = TRUE; + } else { + /* Not an END instruction. */ + /* Fix writes to outputs. */ + for (i = 0; i < inst->Instruction.NumDstRegs; i++) { + struct tgsi_full_dst_register *dst = &inst->Dst[i]; + if (dst->Register.File == TGSI_FILE_OUTPUT) { + if (dst->Register.Index == vsctx->pos_output) { + /* Replace writes to OUT[pos_output] with TEMP[pos_temp]. */ + dst->Register.File = TGSI_FILE_TEMPORARY; + dst->Register.Index = vsctx->pos_temp; + } else { + /* Not a position, good... + * Since we were changing the indices of output decls, + * we must redirect writes into them too. */ + dst->Register.Index = vsctx->out_remap[dst->Register.Index]; + } + } + } + + /* Inserting 2 instructions before the END opcode moves all following + * labels by 2. Subroutines are always after the END opcode so + * they're always moved. */ + if (inst->Instruction.Opcode == TGSI_OPCODE_CAL) { + inst->Label.Label += 2; + } + /* The labels of the following opcodes are moved only after + * the END opcode. */ + if (vsctx->end_instruction && + (inst->Instruction.Opcode == TGSI_OPCODE_IF || + inst->Instruction.Opcode == TGSI_OPCODE_ELSE || + inst->Instruction.Opcode == TGSI_OPCODE_BGNLOOP || + inst->Instruction.Opcode == TGSI_OPCODE_ENDLOOP)) { + inst->Label.Label += 2; + } + } + + ctx->emit_instruction(ctx, inst); +} + +void r300_draw_init_vertex_shader(struct draw_context *draw, + struct r300_vertex_shader *vs) +{ + struct pipe_shader_state new_vs; + struct vs_transform_context transform; + const uint newLen = tgsi_num_tokens(vs->state.tokens) + 100 /* XXX */; + unsigned i; + + new_vs.tokens = tgsi_alloc_tokens(newLen); + if (new_vs.tokens == NULL) + return; + + memset(&transform, 0, sizeof(transform)); + for (i = 0; i < Elements(transform.out_remap); i++) { + transform.out_remap[i] = i; + } + transform.last_generic = -1; + transform.base.transform_instruction = transform_inst; + transform.base.transform_declaration = transform_decl; + + tgsi_transform_shader(vs->state.tokens, + (struct tgsi_token*)new_vs.tokens, + newLen, &transform.base); + +#if 0 + printf("----------------------------------------------\norig shader:\n"); + tgsi_dump(vs->state.tokens, 0); + printf("----------------------------------------------\nnew shader:\n"); + tgsi_dump(new_vs.tokens, 0); + printf("----------------------------------------------\n"); +#endif + + /* Free old tokens. */ + FREE((void*)vs->state.tokens); + + vs->draw_vs = draw_create_vertex_shader(draw, &new_vs); + + /* Instead of duplicating and freeing the tokens, copy the pointer directly. */ + vs->state.tokens = new_vs.tokens; + + /* Init the VS output table for the rasterizer. */ + r300_init_vs_outputs(vs); + + /* Make the last generic be WPOS. */ + vs->outputs.wpos = vs->outputs.generic[transform.last_generic + 1]; + vs->outputs.generic[transform.last_generic + 1] = ATTR_UNUSED; +} diff --git a/src/gallium/drivers/r300/r300_winsys.h b/src/gallium/drivers/r300/r300_winsys.h index 3d0413f90af..6ce218923b1 100644 --- a/src/gallium/drivers/r300/r300_winsys.h +++ b/src/gallium/drivers/r300/r300_winsys.h @@ -47,6 +47,13 @@ enum r300_reference_domain { /* bitfield */ R300_REF_HW = 2 }; +struct r300_cs_info { + /* In DWORDs. */ + unsigned used; + unsigned free; + unsigned capacity; +}; + struct r300_winsys_screen { void (*destroy)(struct r300_winsys_screen *ws); @@ -67,8 +74,9 @@ struct r300_winsys_screen { struct r300_winsys_buffer *(*buffer_create)(struct r300_winsys_screen *ws, unsigned alignment, unsigned usage, + enum r300_buffer_domain domain, unsigned size); - + /** * Map the entire data store of a buffer object into the client's address. * flags is bitmask of R300_WINSYS_BUFFER_USAGE_CPU_READ/WRITE flags. @@ -93,16 +101,17 @@ struct r300_winsys_screen { /* Add a pipe_resource to the list of buffer objects to validate. */ boolean (*add_buffer)(struct r300_winsys_screen *winsys, struct r300_winsys_buffer *buf, - uint32_t rd, - uint32_t wd); + enum r300_buffer_domain rd, + enum r300_buffer_domain wd); /* Revalidate all currently setup pipe_buffers. * Returns TRUE if a flush is required. */ boolean (*validate)(struct r300_winsys_screen* winsys); - /* Check to see if there's room for commands. */ - boolean (*check_cs)(struct r300_winsys_screen* winsys, int size); + /* Return current CS info. */ + void (*get_cs_info)(struct r300_winsys_screen *winsys, + struct r300_cs_info *info); /* Start a command emit. */ void (*begin_cs)(struct r300_winsys_screen* winsys, @@ -121,8 +130,8 @@ struct r300_winsys_screen { /* Write a relocated dword to the command buffer. */ void (*write_cs_reloc)(struct r300_winsys_screen *winsys, struct r300_winsys_buffer *buf, - uint32_t rd, - uint32_t wd, + enum r300_buffer_domain rd, + enum r300_buffer_domain wd, uint32_t flags); /* Finish a command emit. */ diff --git a/src/gallium/drivers/r600/Makefile b/src/gallium/drivers/r600/Makefile new file mode 100644 index 00000000000..aae31a6a6eb --- /dev/null +++ b/src/gallium/drivers/r600/Makefile @@ -0,0 +1,27 @@ +TOP = ../../../.. +include $(TOP)/configs/current + +LIBNAME = r600 + +LIBRARY_INCLUDES = \ + $(shell pkg-config libdrm --cflags-only-I) + +C_SOURCES = \ + r600_buffer.c \ + r600_context.c \ + r600_draw.c \ + r600_blit.c \ + r600_helper.c \ + r600_query.c \ + r600_resource.c \ + r600_screen.c \ + r600_state.c \ + r600_texture.c \ + r600_shader.c \ + r600_compiler.c \ + r600_compiler_tgsi.c \ + r600_compiler_dump.c \ + r600_compiler_r600.c \ + r600_compiler_r700.c + +include ../../Makefile.template diff --git a/src/gallium/drivers/r600/SConscript b/src/gallium/drivers/r600/SConscript new file mode 100644 index 00000000000..26e2f1941cc --- /dev/null +++ b/src/gallium/drivers/r600/SConscript @@ -0,0 +1,37 @@ +Import('*') + +env = env.Clone() + +try: + env.ParseConfig('pkg-config --cflags libdrm_radeon') +except OSError: + print 'warning: not building r600' + Return() + +env.Append(CPPPATH = [ + '#/include', + '#/src/mesa', +]) + +r600 = env.ConvenienceLibrary( + target = 'r600', + source = [ + 'r600_buffer.c', + 'r600_context.c', + 'r600_draw.c', + 'r600_blit.c', + 'r600_helper.c', + 'r600_query.c', + 'r600_resource.c', + 'r600_screen.c', + 'r600_state.c', + 'r600_texture.c', + 'r600_shader.c', + 'r600_compiler.c', + 'r600_compiler_tgsi.c', + 'r600_compiler_dump.c', + 'r600_compiler_r600.c', + 'r600_compiler_r700.c' + ]) + +Export('r600') diff --git a/src/gallium/drivers/r600/r600_blit.c b/src/gallium/drivers/r600/r600_blit.c new file mode 100644 index 00000000000..413c8ec403d --- /dev/null +++ b/src/gallium/drivers/r600/r600_blit.c @@ -0,0 +1,82 @@ +/* + * Copyright 2009 Marek Olšák <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: + * Jerome Glisse + * Marek Olšák + */ +#include <pipe/p_screen.h> +#include <util/u_blitter.h> +#include <util/u_inlines.h> +#include <util/u_memory.h> +#include "r600_screen.h" +#include "r600_context.h" + +static void r600_blitter_save_states(struct pipe_context *ctx) +{ + struct r600_context *rctx = (struct r600_context*)ctx; + + util_blitter_save_blend(rctx->blitter, + rctx->draw->state[R600_BLEND]); + util_blitter_save_depth_stencil_alpha(rctx->blitter, + rctx->draw->state[R600_DSA]); + util_blitter_save_stencil_ref(rctx->blitter, &rctx->stencil_ref); + util_blitter_save_rasterizer(rctx->blitter, + rctx->draw->state[R600_RASTERIZER]); + util_blitter_save_fragment_shader(rctx->blitter, + rctx->ps_shader); + util_blitter_save_vertex_shader(rctx->blitter, + rctx->vs_shader); + util_blitter_save_vertex_elements(rctx->blitter, + rctx->vertex_elements); + util_blitter_save_viewport(rctx->blitter, + &rctx->viewport); +} + +void r600_clear(struct pipe_context *ctx, unsigned buffers, + const float *rgba, double depth, unsigned stencil) +{ + struct r600_context *rctx = (struct r600_context*)ctx; + struct pipe_framebuffer_state *fb = &rctx->fb_state; + + r600_blitter_save_states(ctx); + util_blitter_clear(rctx->blitter, fb->width, fb->height, + fb->nr_cbufs, buffers, rgba, depth, + stencil); +} + +void r600_surface_copy(struct pipe_context *ctx, + struct pipe_surface *dst, + unsigned dstx, unsigned dsty, + struct pipe_surface *src, + unsigned srcx, unsigned srcy, + unsigned width, unsigned height) +{ +} + +void r600_surface_fill(struct pipe_context *ctx, + struct pipe_surface *dst, + unsigned dstx, unsigned dsty, + unsigned width, unsigned height, + unsigned value) +{ +} diff --git a/src/gallium/drivers/r600/r600_buffer.c b/src/gallium/drivers/r600/r600_buffer.c new file mode 100644 index 00000000000..272f4dd6730 --- /dev/null +++ b/src/gallium/drivers/r600/r600_buffer.c @@ -0,0 +1,232 @@ +/* + * Copyright 2010 Jerome Glisse <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: + * Jerome Glisse + * Corbin Simpson <[email protected]> + */ +#include <pipe/p_screen.h> +#include <util/u_format.h> +#include <util/u_math.h> +#include <util/u_inlines.h> +#include <util/u_memory.h> +#include "state_tracker/drm_api.h" +#include "r600_screen.h" +#include "r600_context.h" + +extern struct u_resource_vtbl r600_buffer_vtbl; + +static u32 r600_domain_from_usage(unsigned usage) +{ + u32 domain = RADEON_GEM_DOMAIN_GTT; + + if (usage & PIPE_BIND_RENDER_TARGET) { + domain |= RADEON_GEM_DOMAIN_VRAM; + } + if (usage & PIPE_BIND_DEPTH_STENCIL) { + domain |= RADEON_GEM_DOMAIN_VRAM; + } + if (usage & PIPE_BIND_SAMPLER_VIEW) { + domain |= RADEON_GEM_DOMAIN_VRAM; + } + /* also need BIND_BLIT_SOURCE/DESTINATION ? */ + if (usage & PIPE_BIND_VERTEX_BUFFER) { + domain |= RADEON_GEM_DOMAIN_GTT; + } + if (usage & PIPE_BIND_INDEX_BUFFER) { + domain |= RADEON_GEM_DOMAIN_GTT; + } + + return domain; +} + +struct pipe_resource *r600_buffer_create(struct pipe_screen *screen, + const struct pipe_resource *templ) +{ + struct r600_screen *rscreen = r600_screen(screen); + struct r600_buffer *rbuffer; + struct radeon_bo *bo; + struct pb_desc desc; + /* XXX We probably want a different alignment for buffers and textures. */ + unsigned alignment = 4096; + + rbuffer = CALLOC_STRUCT(r600_buffer); + if (rbuffer == NULL) + return NULL; + + rbuffer->b.b = *templ; + pipe_reference_init(&rbuffer->b.b.reference, 1); + rbuffer->b.b.screen = screen; + rbuffer->b.vtbl = &r600_buffer_vtbl; + + if (rbuffer->b.b.bind & PIPE_BIND_CONSTANT_BUFFER) { + desc.alignment = alignment; + desc.usage = rbuffer->b.b.bind; + rbuffer->pb = pb_malloc_buffer_create(rbuffer->b.b.width0, + &desc); + if (rbuffer->pb == NULL) { + free(rbuffer); + return NULL; + } + return &rbuffer->b.b; + } + rbuffer->domain = r600_domain_from_usage(rbuffer->b.b.bind); + bo = radeon_bo(rscreen->rw, 0, rbuffer->b.b.width0, alignment, NULL); + if (bo == NULL) { + FREE(rbuffer); + return NULL; + } + rbuffer->bo = bo; + return &rbuffer->b.b; +} + +struct pipe_resource *r600_user_buffer_create(struct pipe_screen *screen, + void *ptr, unsigned bytes, + unsigned bind) +{ + struct r600_buffer *rbuffer; + struct r600_screen *rscreen = r600_screen(screen); + struct pipe_resource templ; + + memset(&templ, 0, sizeof(struct pipe_resource)); + templ.target = PIPE_BUFFER; + templ.format = PIPE_FORMAT_R8_UNORM; + templ.usage = PIPE_USAGE_IMMUTABLE; + templ.bind = bind; + templ.width0 = bytes; + templ.height0 = 1; + templ.depth0 = 1; + + rbuffer = (struct r600_buffer*)r600_buffer_create(screen, &templ); + if (rbuffer == NULL) { + return NULL; + } + radeon_bo_map(rscreen->rw, rbuffer->bo); + memcpy(rbuffer->bo->data, ptr, bytes); + radeon_bo_unmap(rscreen->rw, rbuffer->bo); + return &rbuffer->b.b; +} + +static void r600_buffer_destroy(struct pipe_screen *screen, + struct pipe_resource *buf) +{ + struct r600_buffer *rbuffer = (struct r600_buffer*)buf; + struct r600_screen *rscreen = r600_screen(screen); + + if (rbuffer->pb) { + pipe_reference_init(&rbuffer->pb->base.reference, 0); + pb_destroy(rbuffer->pb); + rbuffer->pb = NULL; + } + if (rbuffer->bo) { + radeon_bo_decref(rscreen->rw, rbuffer->bo); + } + FREE(rbuffer); +} + +static void *r600_buffer_transfer_map(struct pipe_context *pipe, + struct pipe_transfer *transfer) +{ + struct r600_buffer *rbuffer = (struct r600_buffer*)transfer->resource; + struct r600_screen *rscreen = r600_screen(pipe->screen); + int write = 0; + + if (rbuffer->pb) { + return (uint8_t*)pb_map(rbuffer->pb, transfer->usage) + transfer->box.x; + } + if (transfer->usage & PIPE_TRANSFER_DONTBLOCK) { + /* FIXME */ + } + if (transfer->usage & PIPE_TRANSFER_WRITE) { + write = 1; + } + if (radeon_bo_map(rscreen->rw, rbuffer->bo)) { + return NULL; + } + return (uint8_t*)rbuffer->bo->data + transfer->box.x; +} + +static void r600_buffer_transfer_unmap(struct pipe_context *pipe, + struct pipe_transfer *transfer) +{ + struct r600_buffer *rbuffer = (struct r600_buffer*)transfer->resource; + struct r600_screen *rscreen = r600_screen(pipe->screen); + + if (rbuffer->pb) { + pb_unmap(rbuffer->pb); + } else { + radeon_bo_unmap(rscreen->rw, rbuffer->bo); + } +} + +static void r600_buffer_transfer_flush_region(struct pipe_context *pipe, + struct pipe_transfer *transfer, + const struct pipe_box *box) +{ +} + +unsigned r600_buffer_is_referenced_by_cs(struct pipe_context *context, + struct pipe_resource *buf, + unsigned face, unsigned level) +{ + /* XXX */ + return PIPE_REFERENCED_FOR_READ | PIPE_REFERENCED_FOR_WRITE; +} + +struct pipe_resource *r600_buffer_from_handle(struct pipe_screen *screen, + struct winsys_handle *whandle) +{ + struct radeon *rw = (struct radeon*)screen->winsys; + struct r600_buffer *rbuffer; + struct radeon_bo *bo = NULL; + + bo = radeon_bo(rw, whandle->handle, 0, 0, NULL); + if (bo == NULL) { + return NULL; + } + + rbuffer = CALLOC_STRUCT(r600_buffer); + if (rbuffer == NULL) { + radeon_bo_decref(rw, bo); + return NULL; + } + + pipe_reference_init(&rbuffer->b.b.reference, 1); + rbuffer->b.b.target = PIPE_BUFFER; + rbuffer->b.b.screen = screen; + rbuffer->b.vtbl = &r600_buffer_vtbl; + rbuffer->bo = bo; + return &rbuffer->b.b; +} + +struct u_resource_vtbl r600_buffer_vtbl = +{ + u_default_resource_get_handle, /* get_handle */ + r600_buffer_destroy, /* resource_destroy */ + r600_buffer_is_referenced_by_cs, /* is_buffer_referenced */ + u_default_get_transfer, /* get_transfer */ + u_default_transfer_destroy, /* transfer_destroy */ + r600_buffer_transfer_map, /* transfer_map */ + r600_buffer_transfer_flush_region, /* transfer_flush_region */ + r600_buffer_transfer_unmap, /* transfer_unmap */ + u_default_transfer_inline_write /* transfer_inline_write */ +}; diff --git a/src/gallium/drivers/r600/r600_compiler.c b/src/gallium/drivers/r600/r600_compiler.c new file mode 100644 index 00000000000..f1be2bbdf4f --- /dev/null +++ b/src/gallium/drivers/r600/r600_compiler.c @@ -0,0 +1,446 @@ +/* + * Copyright 2010 Jerome Glisse <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ +#include <stdlib.h> +#include <string.h> +#include <stdint.h> +#include <stdio.h> +#include <errno.h> +#include "r600_compiler.h" + +struct c_vector *c_vector_new(void) +{ + struct c_vector *v = calloc(1, sizeof(struct c_vector)); + + if (v == NULL) { + return NULL; + } + c_list_init(v); + return v; +} + +static unsigned c_opcode_is_alu(unsigned opcode) +{ + switch (opcode) { + case C_OPCODE_MOV: + case C_OPCODE_MUL: + case C_OPCODE_MAD: + case C_OPCODE_ARL: + case C_OPCODE_LIT: + case C_OPCODE_RCP: + case C_OPCODE_RSQ: + case C_OPCODE_EXP: + case C_OPCODE_LOG: + case C_OPCODE_ADD: + case C_OPCODE_DP3: + case C_OPCODE_DP4: + case C_OPCODE_DST: + case C_OPCODE_MIN: + case C_OPCODE_MAX: + case C_OPCODE_SLT: + case C_OPCODE_SGE: + case C_OPCODE_SUB: + case C_OPCODE_LRP: + case C_OPCODE_CND: + case C_OPCODE_DP2A: + case C_OPCODE_FRC: + case C_OPCODE_CLAMP: + case C_OPCODE_FLR: + case C_OPCODE_ROUND: + case C_OPCODE_EX2: + case C_OPCODE_LG2: + case C_OPCODE_POW: + case C_OPCODE_XPD: + case C_OPCODE_ABS: + case C_OPCODE_RCC: + case C_OPCODE_DPH: + case C_OPCODE_COS: + case C_OPCODE_DDX: + case C_OPCODE_DDY: + case C_OPCODE_PK2H: + case C_OPCODE_PK2US: + case C_OPCODE_PK4B: + case C_OPCODE_PK4UB: + case C_OPCODE_RFL: + case C_OPCODE_SEQ: + case C_OPCODE_SFL: + case C_OPCODE_SGT: + case C_OPCODE_SIN: + case C_OPCODE_SLE: + case C_OPCODE_SNE: + case C_OPCODE_STR: + case C_OPCODE_UP2H: + case C_OPCODE_UP2US: + case C_OPCODE_UP4B: + case C_OPCODE_UP4UB: + case C_OPCODE_X2D: + case C_OPCODE_ARA: + case C_OPCODE_ARR: + case C_OPCODE_BRA: + case C_OPCODE_SSG: + case C_OPCODE_CMP: + case C_OPCODE_SCS: + case C_OPCODE_NRM: + case C_OPCODE_DIV: + case C_OPCODE_DP2: + case C_OPCODE_CEIL: + case C_OPCODE_I2F: + case C_OPCODE_NOT: + case C_OPCODE_TRUNC: + case C_OPCODE_SHL: + case C_OPCODE_AND: + case C_OPCODE_OR: + case C_OPCODE_MOD: + case C_OPCODE_XOR: + case C_OPCODE_SAD: + case C_OPCODE_NRM4: + case C_OPCODE_F2I: + case C_OPCODE_IDIV: + case C_OPCODE_IMAX: + case C_OPCODE_IMIN: + case C_OPCODE_INEG: + case C_OPCODE_ISGE: + case C_OPCODE_ISHR: + case C_OPCODE_ISLT: + case C_OPCODE_F2U: + case C_OPCODE_U2F: + case C_OPCODE_UADD: + case C_OPCODE_UDIV: + case C_OPCODE_UMAD: + case C_OPCODE_UMAX: + case C_OPCODE_UMIN: + case C_OPCODE_UMOD: + case C_OPCODE_UMUL: + case C_OPCODE_USEQ: + case C_OPCODE_USGE: + case C_OPCODE_USHR: + case C_OPCODE_USLT: + case C_OPCODE_USNE: + return 1; + case C_OPCODE_END: + case C_OPCODE_VFETCH: + case C_OPCODE_KILP: + case C_OPCODE_CAL: + case C_OPCODE_RET: + case C_OPCODE_TXB: + case C_OPCODE_TXL: + case C_OPCODE_BRK: + case C_OPCODE_IF: + case C_OPCODE_BGNFOR: + case C_OPCODE_REP: + case C_OPCODE_ELSE: + case C_OPCODE_ENDIF: + case C_OPCODE_ENDFOR: + case C_OPCODE_ENDREP: + case C_OPCODE_PUSHA: + case C_OPCODE_POPA: + case C_OPCODE_TXF: + case C_OPCODE_TXQ: + case C_OPCODE_CONT: + case C_OPCODE_EMIT: + case C_OPCODE_ENDPRIM: + case C_OPCODE_BGNLOOP: + case C_OPCODE_BGNSUB: + case C_OPCODE_ENDLOOP: + case C_OPCODE_ENDSUB: + case C_OPCODE_NOP: + case C_OPCODE_CALLNZ: + case C_OPCODE_IFC: + case C_OPCODE_BREAKC: + case C_OPCODE_KIL: + case C_OPCODE_TEX: + case C_OPCODE_TXD: + case C_OPCODE_TXP: + case C_OPCODE_SWITCH: + case C_OPCODE_CASE: + case C_OPCODE_DEFAULT: + case C_OPCODE_ENDSWITCH: + default: + return 0; + } +} + + +/* NEW */ +void c_node_init(struct c_node *node) +{ + memset(node, 0, sizeof(struct c_node)); + c_list_init(&node->predecessors); + c_list_init(&node->successors); + c_list_init(&node->childs); + c_list_init(&node->insts); + node->parent = NULL; +} + +static struct c_node_link *c_node_link_new(struct c_node *node) +{ + struct c_node_link *link; + + link = calloc(1, sizeof(struct c_node_link)); + if (link == NULL) + return NULL; + c_list_init(link); + link->node = node; + return link; +} + +int c_node_cfg_link(struct c_node *predecessor, struct c_node *successor) +{ + struct c_node_link *pedge, *sedge; + + pedge = c_node_link_new(successor); + sedge = c_node_link_new(predecessor); + if (sedge == NULL || pedge == NULL) { + free(sedge); + free(pedge); + return -ENOMEM; + } + c_list_add_tail(pedge, &predecessor->successors); + c_list_add_tail(sedge, &successor->predecessors); + return 0; +} + +int c_node_add_new_instruction_head(struct c_node *node, struct c_instruction *instruction) +{ + struct c_instruction *inst = calloc(1, sizeof(struct c_instruction)); + + if (inst == NULL) + return -ENOMEM; + memcpy(inst, instruction, sizeof(struct c_instruction)); + c_list_add(inst, &node->insts); + return 0; +} + +int c_node_add_new_instruction(struct c_node *node, struct c_instruction *instruction) +{ + struct c_instruction *inst = calloc(1, sizeof(struct c_instruction)); + + if (inst == NULL) + return -ENOMEM; + memcpy(inst, instruction, sizeof(struct c_instruction)); + c_list_add_tail(inst, &node->insts); + return 0; +} + +struct c_node *c_shader_cfg_new_node_after(struct c_shader *shader, struct c_node *predecessor) +{ + struct c_node *node = calloc(1, sizeof(struct c_node)); + + if (node == NULL) + return NULL; + c_node_init(node); + if (c_node_cfg_link(predecessor, node)) { + free(node); + return NULL; + } + c_list_add_tail(node, &shader->nodes); + return node; +} + +int c_shader_init(struct c_shader *shader, unsigned type) +{ + unsigned i; + int r; + + shader->type = type; + for (i = 0; i < C_FILE_COUNT; i++) { + shader->files[i].nvectors = 0; + c_list_init(&shader->files[i].vectors); + } + c_list_init(&shader->nodes); + c_node_init(&shader->entry); + c_node_init(&shader->end); + shader->entry.opcode = C_OPCODE_ENTRY; + shader->end.opcode = C_OPCODE_END; + r = c_node_cfg_link(&shader->entry, &shader->end); + if (r) + return r; + return 0; +} + +struct c_vector *c_shader_vector_new(struct c_shader *shader, unsigned file, unsigned name, int sid) +{ + struct c_vector *v = calloc(1, sizeof(struct c_vector)); + int i; + + if (v == NULL) { + return NULL; + } + for (i = 0; i < 4; i++) { + v->channel[i] = calloc(1, sizeof(struct c_channel)); + if (v->channel[i] == NULL) + goto out_err; + v->channel[i]->vindex = i; + v->channel[i]->vector = v; + } + v->file = file; + v->name = name; + v->sid = sid; + shader->files[v->file].nvectors++; + v->id = shader->nvectors++; + c_list_add_tail(v, &shader->files[v->file].vectors); + return v; +out_err: + for (i = 0; i < 4; i++) { + free(v->channel[i]); + } + free(v); + return NULL; +} + +static void c_node_remove_link(struct c_node_link *head, struct c_node *node) +{ + struct c_node_link *link, *tmp; + + c_list_for_each_safe(link, tmp, head) { + if (link->node == node) { + c_list_del(link); + free(link); + } + } +} + +static void c_node_destroy(struct c_node *node) +{ + struct c_instruction *i, *ni; + struct c_node_link *link, *tmp; + + c_list_for_each_safe(i, ni, &node->insts) { + c_list_del(i); + free(i); + } + if (node->parent) + c_node_remove_link(&node->parent->childs, node); + node->parent = NULL; + c_list_for_each_safe(link, tmp, &node->predecessors) { + c_node_remove_link(&link->node->successors, node); + c_list_del(link); + free(link); + } + c_list_for_each_safe(link, tmp, &node->successors) { + c_node_remove_link(&link->node->predecessors, node); + c_list_del(link); + free(link); + } + c_list_for_each_safe(link, tmp, &node->childs) { + link->node->parent = NULL; + c_list_del(link); + free(link); + } +} + +void c_shader_destroy(struct c_shader *shader) +{ + struct c_node *n, *nn; + struct c_vector *v, *nv; + unsigned i; + + for (i = 0; i < C_FILE_COUNT; i++) { + shader->files[i].nvectors = 0; + c_list_for_each_safe(v, nv, &shader->files[i].vectors) { + c_list_del(v); + free(v->channel[0]); + free(v->channel[1]); + free(v->channel[2]); + free(v->channel[3]); + free(v); + } + } + c_list_for_each_safe(n, nn, &shader->nodes) { + c_list_del(n); + c_node_destroy(n); + } + memset(shader, 0, sizeof(struct c_shader)); +} + +static void c_shader_dfs_without_rec(struct c_node *entry, struct c_node *node) +{ + struct c_node_link *link; + + if (entry == node || entry->visited) + return; + entry->visited = 1; + c_list_for_each(link, &entry->successors) { + c_shader_dfs_without_rec(link->node, node); + } +} + +static void c_shader_dfs_without(struct c_shader *shader, struct c_node *node) +{ + struct c_node *n; + + shader->entry.visited = 0; + shader->end.visited = 0; + c_list_for_each(n, &shader->nodes) { + n->visited = 0; + } + c_shader_dfs_without_rec(&shader->entry, node); +} + +static int c_shader_build_dominator_tree_rec(struct c_shader *shader, struct c_node *node) +{ + struct c_node_link *link, *nlink; + unsigned found = 0; + int r; + + if (node->done) + return 0; + node->done = 1; + c_list_for_each(link, &node->predecessors) { + /* if we remove this predecessor can we reach the current node ? */ + c_shader_dfs_without(shader, link->node); + if (node->visited == 0) { + /* we were unable to visit current node thus current + * predecessor is the immediate dominator of node, as + * their can be only one immediate dominator we break + */ + node->parent = link->node; + nlink = c_node_link_new(node); + if (nlink == NULL) + return -ENOMEM; + c_list_add_tail(nlink, &link->node->childs); + found = 1; + break; + } + } + /* this shouldn't happen there should at least be 1 denominator for each node */ + if (!found && node->opcode != C_OPCODE_ENTRY) { + fprintf(stderr, "invalid flow control graph node %p (%d) has no immediate dominator\n", + node, node->opcode); + return -EINVAL; + } + c_list_for_each(link, &node->predecessors) { + r = c_shader_build_dominator_tree_rec(shader, link->node); + if (r) + return r; + } + return 0; +} + +int c_shader_build_dominator_tree(struct c_shader *shader) +{ + struct c_node *node; + c_list_for_each(node, &shader->nodes) { + node->done = 0; + } + return c_shader_build_dominator_tree_rec(shader, &shader->end); +} diff --git a/src/gallium/drivers/r600/r600_compiler.h b/src/gallium/drivers/r600/r600_compiler.h new file mode 100644 index 00000000000..64dab5000be --- /dev/null +++ b/src/gallium/drivers/r600/r600_compiler.h @@ -0,0 +1,328 @@ +/* + * Copyright 2010 Jerome Glisse <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ +#ifndef R600_COMPILER_H +#define R600_COMPILER_H + +struct c_vector; + +/* operand are the basic source/destination of each operation */ +struct c_channel { + struct c_channel *next; + struct c_channel *prev; + unsigned vindex; /**< index in vector X,Y,Z,W (0,1,2,3) */ + unsigned value; /**< immediate value 32bits */ + struct c_vector *vector; /**< vector to which it belongs */ +}; + +/* in GPU world most of the time operand are grouped into vector + * of 4 component this structure is mostly and handler to group + * operand into a same vector + */ +struct c_vector { + struct c_vector *next; + struct c_vector *prev; + unsigned id; /**< vector uniq id */ + unsigned name; /**< semantic name */ + unsigned file; /**< operand file C_FILE_* */ + int sid; /**< semantic id */ + struct c_channel *channel[4]; /**< operands */ +}; + +#define c_list_init(e) do { (e)->next = e; (e)->prev = e; } while(0) +#define c_list_add(e, h) do { (e)->next = (h)->next; (e)->prev = h; (h)->next = e; (e)->next->prev = e; } while(0) +#define c_list_add_tail(e, h) do { (e)->next = h; (e)->prev = (h)->prev; (h)->prev = e; (e)->prev->next = e; } while(0) +#define c_list_del(e) do { (e)->next->prev = (e)->prev; (e)->prev->next = (e)->next; c_list_init(e); } while(0) +#define c_list_for_each(p, h) for (p = (h)->next; p != (h); p = p->next) +#define c_list_for_each_from(p, s, h) for (p = s; p != (h); p = p->next) +#define c_list_for_each_safe(p, n, h) for (p = (h)->next, n = p->next; p != (h); p = n, n = p->next) +#define c_list_empty(h) ((h)->next == h) + + +#define C_PROGRAM_TYPE_VS 0 +#define C_PROGRAM_TYPE_FS 1 +#define C_PROGRAM_TYPE_COUNT 2 + +#define C_NODE_FLAG_ALU 1 +#define C_NODE_FLAG_FETCH 2 + +#define C_SWIZZLE_X 0 +#define C_SWIZZLE_Y 1 +#define C_SWIZZLE_Z 2 +#define C_SWIZZLE_W 3 +#define C_SWIZZLE_0 4 +#define C_SWIZZLE_1 5 +#define C_SWIZZLE_D 6 /**< discard */ + +#define C_FILE_NULL 0 +#define C_FILE_CONSTANT 1 +#define C_FILE_INPUT 2 +#define C_FILE_OUTPUT 3 +#define C_FILE_TEMPORARY 4 +#define C_FILE_SAMPLER 5 +#define C_FILE_ADDRESS 6 +#define C_FILE_IMMEDIATE 7 +#define C_FILE_LOOP 8 +#define C_FILE_PREDICATE 9 +#define C_FILE_SYSTEM_VALUE 10 +#define C_FILE_RESOURCE 11 +#define C_FILE_COUNT 12 + +#define C_SEMANTIC_POSITION 0 +#define C_SEMANTIC_COLOR 1 +#define C_SEMANTIC_BCOLOR 2 /**< back-face color */ +#define C_SEMANTIC_FOG 3 +#define C_SEMANTIC_PSIZE 4 +#define C_SEMANTIC_GENERIC 5 +#define C_SEMANTIC_NORMAL 6 +#define C_SEMANTIC_FACE 7 +#define C_SEMANTIC_EDGEFLAG 8 +#define C_SEMANTIC_PRIMID 9 +#define C_SEMANTIC_INSTANCEID 10 +#define C_SEMANTIC_VERTEXID 11 +#define C_SEMANTIC_COUNT 12 /**< number of semantic values */ + +#define C_OPCODE_NOP 0 +#define C_OPCODE_MOV 1 +#define C_OPCODE_LIT 2 +#define C_OPCODE_RCP 3 +#define C_OPCODE_RSQ 4 +#define C_OPCODE_EXP 5 +#define C_OPCODE_LOG 6 +#define C_OPCODE_MUL 7 +#define C_OPCODE_ADD 8 +#define C_OPCODE_DP3 9 +#define C_OPCODE_DP4 10 +#define C_OPCODE_DST 11 +#define C_OPCODE_MIN 12 +#define C_OPCODE_MAX 13 +#define C_OPCODE_SLT 14 +#define C_OPCODE_SGE 15 +#define C_OPCODE_MAD 16 +#define C_OPCODE_SUB 17 +#define C_OPCODE_LRP 18 +#define C_OPCODE_CND 19 +/* gap */ +#define C_OPCODE_DP2A 21 +/* gap */ +#define C_OPCODE_FRC 24 +#define C_OPCODE_CLAMP 25 +#define C_OPCODE_FLR 26 +#define C_OPCODE_ROUND 27 +#define C_OPCODE_EX2 28 +#define C_OPCODE_LG2 29 +#define C_OPCODE_POW 30 +#define C_OPCODE_XPD 31 +/* gap */ +#define C_OPCODE_ABS 33 +#define C_OPCODE_RCC 34 +#define C_OPCODE_DPH 35 +#define C_OPCODE_COS 36 +#define C_OPCODE_DDX 37 +#define C_OPCODE_DDY 38 +#define C_OPCODE_KILP 39 /* predicated kill */ +#define C_OPCODE_PK2H 40 +#define C_OPCODE_PK2US 41 +#define C_OPCODE_PK4B 42 +#define C_OPCODE_PK4UB 43 +#define C_OPCODE_RFL 44 +#define C_OPCODE_SEQ 45 +#define C_OPCODE_SFL 46 +#define C_OPCODE_SGT 47 +#define C_OPCODE_SIN 48 +#define C_OPCODE_SLE 49 +#define C_OPCODE_SNE 50 +#define C_OPCODE_STR 51 +#define C_OPCODE_TEX 52 +#define C_OPCODE_TXD 53 +#define C_OPCODE_TXP 54 +#define C_OPCODE_UP2H 55 +#define C_OPCODE_UP2US 56 +#define C_OPCODE_UP4B 57 +#define C_OPCODE_UP4UB 58 +#define C_OPCODE_X2D 59 +#define C_OPCODE_ARA 60 +#define C_OPCODE_ARR 61 +#define C_OPCODE_BRA 62 +#define C_OPCODE_CAL 63 +#define C_OPCODE_RET 64 +#define C_OPCODE_SSG 65 /* SGN */ +#define C_OPCODE_CMP 66 +#define C_OPCODE_SCS 67 +#define C_OPCODE_TXB 68 +#define C_OPCODE_NRM 69 +#define C_OPCODE_DIV 70 +#define C_OPCODE_DP2 71 +#define C_OPCODE_TXL 72 +#define C_OPCODE_BRK 73 +#define C_OPCODE_IF 74 +#define C_OPCODE_BGNFOR 75 +#define C_OPCODE_REP 76 +#define C_OPCODE_ELSE 77 +#define C_OPCODE_ENDIF 78 +#define C_OPCODE_ENDFOR 79 +#define C_OPCODE_ENDREP 80 +#define C_OPCODE_PUSHA 81 +#define C_OPCODE_POPA 82 +#define C_OPCODE_CEIL 83 +#define C_OPCODE_I2F 84 +#define C_OPCODE_NOT 85 +#define C_OPCODE_TRUNC 86 +#define C_OPCODE_SHL 87 +/* gap */ +#define C_OPCODE_AND 89 +#define C_OPCODE_OR 90 +#define C_OPCODE_MOD 91 +#define C_OPCODE_XOR 92 +#define C_OPCODE_SAD 93 +#define C_OPCODE_TXF 94 +#define C_OPCODE_TXQ 95 +#define C_OPCODE_CONT 96 +#define C_OPCODE_EMIT 97 +#define C_OPCODE_ENDPRIM 98 +#define C_OPCODE_BGNLOOP 99 +#define C_OPCODE_BGNSUB 100 +#define C_OPCODE_ENDLOOP 101 +#define C_OPCODE_ENDSUB 102 +/* gap */ +#define C_OPCODE_NRM4 112 +#define C_OPCODE_CALLNZ 113 +#define C_OPCODE_IFC 114 +#define C_OPCODE_BREAKC 115 +#define C_OPCODE_KIL 116 /* conditional kill */ +#define C_OPCODE_END 117 /* aka HALT */ +/* gap */ +#define C_OPCODE_F2I 119 +#define C_OPCODE_IDIV 120 +#define C_OPCODE_IMAX 121 +#define C_OPCODE_IMIN 122 +#define C_OPCODE_INEG 123 +#define C_OPCODE_ISGE 124 +#define C_OPCODE_ISHR 125 +#define C_OPCODE_ISLT 126 +#define C_OPCODE_F2U 127 +#define C_OPCODE_U2F 128 +#define C_OPCODE_UADD 129 +#define C_OPCODE_UDIV 130 +#define C_OPCODE_UMAD 131 +#define C_OPCODE_UMAX 132 +#define C_OPCODE_UMIN 133 +#define C_OPCODE_UMOD 134 +#define C_OPCODE_UMUL 135 +#define C_OPCODE_USEQ 136 +#define C_OPCODE_USGE 137 +#define C_OPCODE_USHR 138 +#define C_OPCODE_USLT 139 +#define C_OPCODE_USNE 140 +#define C_OPCODE_SWITCH 141 +#define C_OPCODE_CASE 142 +#define C_OPCODE_DEFAULT 143 +#define C_OPCODE_ENDSWITCH 144 +#define C_OPCODE_VFETCH 145 +#define C_OPCODE_ENTRY 146 +#define C_OPCODE_ARL 147 +#define C_OPCODE_LAST 148 + +#define C_OPERAND_FLAG_ABS (1 << 0) +#define C_OPERAND_FLAG_NEG (1 << 1) + +struct c_operand { + struct c_vector *vector; + unsigned swizzle[4]; + unsigned flag[4]; +}; + +struct c_instruction { + struct c_instruction *next, *prev; + unsigned opcode; + unsigned ninput; + struct c_operand input[3]; + struct c_operand output; + unsigned write_mask; + void *backend; +}; + +struct c_node; + +struct c_node_link { + struct c_node_link *next; + struct c_node_link *prev; + struct c_node *node; +}; + +/** + * struct c_node + * + * @next: all node are in a double linked list, this point to + * next node + * @next: all node are in a double linked list, this point to + * previous node + * @predecessors: list of all predecessor nodes in the flow graph + * @successors: list of all sucessor nodes in the flow graph + * @parent: parent node in the depth first walk tree + * @childs: child nodes in the depth first walk tree + */ +struct c_node { + struct c_node *next, *prev; + struct c_node_link predecessors; + struct c_node_link successors; + struct c_node *parent; + struct c_node_link childs; + struct c_instruction insts; + unsigned opcode; + unsigned visited; + unsigned done; + void *backend; +}; + +struct c_file { + unsigned nvectors; + struct c_vector vectors; +}; + +struct c_shader { + unsigned nvectors; + struct c_file files[C_FILE_COUNT]; + struct c_node nodes; + struct c_node entry; + struct c_node end; + unsigned type; +}; + +int c_shader_init(struct c_shader *shader, unsigned type); +void c_shader_destroy(struct c_shader *shader); +struct c_vector *c_shader_vector_new(struct c_shader *shader, unsigned file, unsigned name, int sid); +int c_shader_build_dominator_tree(struct c_shader *shader); +void c_shader_dump(struct c_shader *shader); + +void c_node_init(struct c_node *node); +int c_node_add_new_instruction(struct c_node *node, struct c_instruction *instruction); +int c_node_add_new_instruction_head(struct c_node *node, struct c_instruction *instruction); + +/* control flow graph functions */ +int c_node_cfg_link(struct c_node *predecessor, struct c_node *successor); +struct c_node *c_node_cfg_new_after(struct c_node *predecessor); +struct c_node *c_shader_cfg_new_node_after(struct c_shader *shader, struct c_node *predecessor); + +struct c_vector *c_vector_new(void); + +#endif diff --git a/src/gallium/drivers/r600/r600_compiler_dump.c b/src/gallium/drivers/r600/r600_compiler_dump.c new file mode 100644 index 00000000000..9df6a38598e --- /dev/null +++ b/src/gallium/drivers/r600/r600_compiler_dump.c @@ -0,0 +1,270 @@ +/* + * Copyright 2010 Jerome Glisse <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ +#include <stdlib.h> +#include <string.h> +#include <stdint.h> +#include <stdio.h> +#include "r600_compiler.h" + +static const char *c_file_swz[] = { + "x", + "y", + "z", + "w", + "0", + "1", + ".", +}; + +static const char *c_file_str[] = { + "NULL", + "CONSTANT", + "INPUT", + "OUTPUT", + "TEMPORARY", + "SAMPLER", + "ADDRESS", + "IMMEDIATE", + "LOOP", + "PREDICATE", + "SYSTEM_VALUE", +}; + +static const char *c_semantic_str[] = { + "POSITION", + "COLOR", + "BCOLOR", + "FOG", + "PSIZE", + "GENERIC", + "NORMAL", + "FACE", + "EDGEFLAG", + "PRIMID", + "INSTANCEID", +}; + +static const char *c_opcode_str[] = { + "ARL", + "MOV", + "LIT", + "RCP", + "RSQ", + "EXP", + "LOG", + "MUL", + "ADD", + "DP3", + "DP4", + "DST", + "MIN", + "MAX", + "SLT", + "SGE", + "MAD", + "SUB", + "LRP", + "CND", + "(INVALID)", + "DP2A", + "(INVALID)", + "(INVALID)", + "FRC", + "CLAMP", + "FLR", + "ROUND", + "EX2", + "LG2", + "POW", + "XPD", + "(INVALID)", + "ABS", + "RCC", + "DPH", + "COS", + "DDX", + "DDY", + "KILP", + "PK2H", + "PK2US", + "PK4B", + "PK4UB", + "RFL", + "SEQ", + "SFL", + "SGT", + "SIN", + "SLE", + "SNE", + "STR", + "TEX", + "TXD", + "TXP", + "UP2H", + "UP2US", + "UP4B", + "UP4UB", + "X2D", + "ARA", + "ARR", + "BRA", + "CAL", + "RET", + "SSG", + "CMP", + "SCS", + "TXB", + "NRM", + "DIV", + "DP2", + "TXL", + "BRK", + "IF", + "BGNFOR", + "REP", + "ELSE", + "ENDIF", + "ENDFOR", + "ENDREP", + "PUSHA", + "POPA", + "CEIL", + "I2F", + "NOT", + "TRUNC", + "SHL", + "(INVALID)", + "AND", + "OR", + "MOD", + "XOR", + "SAD", + "TXF", + "TXQ", + "CONT", + "EMIT", + "ENDPRIM", + "BGNLOOP", + "BGNSUB", + "ENDLOOP", + "ENDSUB", + "(INVALID)", + "(INVALID)", + "(INVALID)", + "(INVALID)", + "NOP", + "(INVALID)", + "(INVALID)", + "(INVALID)", + "(INVALID)", + "NRM4", + "CALLNZ", + "IFC", + "BREAKC", + "KIL", + "END", + "(INVALID)", + "F2I", + "IDIV", + "IMAX", + "IMIN", + "INEG", + "ISGE", + "ISHR", + "ISLT", + "F2U", + "U2F", + "UADD", + "UDIV", + "UMAD", + "UMAX", + "UMIN", + "UMOD", + "UMUL", + "USEQ", + "USGE", + "USHR", + "USLT", + "USNE", + "SWITCH", + "CASE", + "DEFAULT", + "ENDSWITCH", + "VFETCH", + "ENTRY", +}; + +static inline const char *c_get_name(const char *name[], unsigned i) +{ + return name[i]; +} + +static void pindent(unsigned indent) +{ + unsigned i; + for (i = 0; i < indent; i++) + fprintf(stderr, " "); +} + +static void c_node_dump(struct c_node *node, unsigned indent) +{ + struct c_instruction *i; + unsigned j; + + pindent(indent); fprintf(stderr, "# node %s\n", c_get_name(c_opcode_str, node->opcode)); + c_list_for_each(i, &node->insts) { + pindent(indent); fprintf(stderr, "%s", c_get_name(c_opcode_str, i->opcode)); + fprintf(stderr, " %s[%d][%s%s%s%s]", + c_get_name(c_file_str, i->output.vector->file), + i->output.vector->id, + c_get_name(c_file_swz, i->output.swizzle[0]), + c_get_name(c_file_swz, i->output.swizzle[1]), + c_get_name(c_file_swz, i->output.swizzle[2]), + c_get_name(c_file_swz, i->output.swizzle[3])); + for (j = 0; j < i->ninput; j++) { + fprintf(stderr, " %s[%d][%s%s%s%s]", + c_get_name(c_file_str, i->input[j].vector->file), + i->input[j].vector->id, + c_get_name(c_file_swz, i->input[j].swizzle[0]), + c_get_name(c_file_swz, i->input[j].swizzle[1]), + c_get_name(c_file_swz, i->input[j].swizzle[2]), + c_get_name(c_file_swz, i->input[j].swizzle[3])); + } + fprintf(stderr, ";\n"); + } +} + +static void c_shader_dump_rec(struct c_shader *shader, struct c_node *node, unsigned indent) +{ + struct c_node_link *link; + + c_node_dump(node, indent); + c_list_for_each(link, &node->childs) { + c_shader_dump_rec(shader, link->node, indent + 1); + } +} + +void c_shader_dump(struct c_shader *shader) +{ + c_shader_dump_rec(shader, &shader->entry, 0); +} diff --git a/src/gallium/drivers/r600/r600_compiler_r600.c b/src/gallium/drivers/r600/r600_compiler_r600.c new file mode 100644 index 00000000000..1be5bf2b4a0 --- /dev/null +++ b/src/gallium/drivers/r600/r600_compiler_r600.c @@ -0,0 +1,869 @@ +/* + * Copyright 2010 Jerome Glisse <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ +#include <stdlib.h> +#include <string.h> +#include <stdint.h> +#include <stdio.h> +#include <errno.h> +#include <util/u_format.h> +#include "r600_screen.h" +#include "r600_context.h" +#include "r600_sq.h" + + +struct r600_alu_instruction { + unsigned copcode; + enum r600_instruction instruction; +}; + +static int r600_shader_alu_translate(struct r600_shader *rshader, + struct r600_shader_node *node, + struct c_instruction *instruction); +struct r600_alu_instruction r600_alu_instruction[C_OPCODE_LAST]; +struct r600_instruction_info r600_instruction_info[]; + +int r600_shader_insert_fetch(struct c_shader *shader) +{ + struct c_vector *vi, *vr, *v, *nv; + struct c_instruction instruction; + int r; + + if (shader->type != C_PROGRAM_TYPE_VS) + return 0; + vi = c_shader_vector_new(shader, C_FILE_INPUT, C_SEMANTIC_VERTEXID, -1); + if (vi == NULL) + return -ENOMEM; + c_list_for_each_safe(v, nv, &shader->files[C_FILE_INPUT].vectors) { + if (v == vi) + continue; + vr = c_shader_vector_new(shader, C_FILE_RESOURCE, C_SEMANTIC_GENERIC, -1); + if (vr == NULL) + return -ENOMEM; + memset(&instruction, 0, sizeof(struct c_instruction)); + instruction.opcode = C_OPCODE_VFETCH; + instruction.write_mask = 0xF; + instruction.ninput = 2; + instruction.output.vector = v; + instruction.input[0].vector = vi; + instruction.input[1].vector = vr; + instruction.output.swizzle[0] = C_SWIZZLE_X; + instruction.output.swizzle[1] = C_SWIZZLE_Y; + instruction.output.swizzle[2] = C_SWIZZLE_Z; + instruction.output.swizzle[3] = C_SWIZZLE_W; + r = c_node_add_new_instruction_head(&shader->entry, &instruction); + if (r) + return r; + c_list_del(v); + shader->files[C_FILE_INPUT].nvectors--; + c_list_add_tail(v, &shader->files[C_FILE_TEMPORARY].vectors); + shader->files[C_FILE_TEMPORARY].nvectors++; + v->file = C_FILE_TEMPORARY; + } + return 0; +} + +void r600_shader_cleanup(struct r600_shader *rshader) +{ + struct r600_shader_node *n, *nn; + struct r600_shader_vfetch *vf, *nvf; + struct r600_shader_alu *alu, *nalu; + int i; + + if (rshader == NULL) + return; + if (rshader->gpr) { + for (i = 0; i < rshader->nvector; i++) { + free(rshader->gpr[i]); + } + free(rshader->gpr); + rshader->gpr = NULL; + } + c_list_for_each_safe(n, nn, &rshader->nodes) { + c_list_del(n); + c_list_for_each_safe(vf, nvf, &n->vfetch) { + c_list_del(vf); + free(vf); + } + c_list_for_each_safe(alu, nalu, &n->alu) { + c_list_del(alu); + free(alu); + } + free(n); + } + free(rshader->bcode); + return; +} + +int r600_shader_vfetch_bytecode(struct r600_shader *rshader, + struct r600_shader_node *rnode, + struct r600_shader_vfetch *vfetch, + unsigned *cid) +{ + unsigned id = *cid; + + vfetch->cf_addr = id; + rshader->bcode[id++] = S_SQ_VTX_WORD0_BUFFER_ID(vfetch->src[1].sel) | + S_SQ_VTX_WORD0_SRC_GPR(vfetch->src[0].sel) | + S_SQ_VTX_WORD0_SRC_SEL_X(vfetch->src[0].sel) | + S_SQ_VTX_WORD0_MEGA_FETCH_COUNT(0x1F); + rshader->bcode[id++] = S_SQ_VTX_WORD1_DST_SEL_X(vfetch->dst[0].chan) | + S_SQ_VTX_WORD1_DST_SEL_Y(vfetch->dst[1].chan) | + S_SQ_VTX_WORD1_DST_SEL_Z(vfetch->dst[2].chan) | + S_SQ_VTX_WORD1_DST_SEL_W(vfetch->dst[3].chan) | + S_SQ_VTX_WORD1_USE_CONST_FIELDS(1) | + S_SQ_VTX_WORD1_GPR_DST_GPR(vfetch->dst[0].sel); + rshader->bcode[id++] = S_SQ_VTX_WORD2_MEGA_FETCH(1); + rshader->bcode[id++] = 0; + *cid = id; + return 0; +} + +int r600_shader_update(struct r600_shader *rshader, enum pipe_format *resource_format) +{ + struct r600_shader_node *rnode; + struct r600_shader_vfetch *vfetch; + unsigned i; + + memcpy(rshader->resource_format, resource_format, + rshader->nresource * sizeof(enum pipe_format)); + c_list_for_each(rnode, &rshader->nodes) { + c_list_for_each(vfetch, &rnode->vfetch) { + const struct util_format_description *desc; + i = vfetch->cf_addr + 1; + rshader->bcode[i] &= C_SQ_VTX_WORD1_DST_SEL_X; + rshader->bcode[i] &= C_SQ_VTX_WORD1_DST_SEL_Y; + rshader->bcode[i] &= C_SQ_VTX_WORD1_DST_SEL_Z; + rshader->bcode[i] &= C_SQ_VTX_WORD1_DST_SEL_W; + desc = util_format_description(resource_format[vfetch->src[1].sel]); + if (desc == NULL) { + fprintf(stderr, "%s unknown format %d\n", __func__, resource_format[vfetch->src[1].sel]); + continue; + } + /* WARNING so far TGSI swizzle match R600 ones */ + rshader->bcode[i] |= S_SQ_VTX_WORD1_DST_SEL_X(desc->swizzle[0]); + rshader->bcode[i] |= S_SQ_VTX_WORD1_DST_SEL_Y(desc->swizzle[1]); + rshader->bcode[i] |= S_SQ_VTX_WORD1_DST_SEL_Z(desc->swizzle[2]); + rshader->bcode[i] |= S_SQ_VTX_WORD1_DST_SEL_W(desc->swizzle[3]); + } + } + return 0; +} + +int r600_shader_register(struct r600_shader *rshader) +{ + struct c_vector *v, *nv; + unsigned tid, cid, rid, i; + + rshader->nvector = rshader->cshader.nvectors; + rshader->gpr = calloc(rshader->nvector, sizeof(void*)); + if (rshader->gpr == NULL) + return -ENOMEM; + tid = 0; + cid = 0; + rid = 0; + /* alloc input first */ + c_list_for_each(v, &rshader->cshader.files[C_FILE_INPUT].vectors) { + nv = c_vector_new(); + if (nv == NULL) { + return -ENOMEM; + } + memcpy(nv, v, sizeof(struct c_vector)); + nv->id = tid++; + rshader->gpr[v->id] = nv; + } + for (i = 0; i < C_FILE_COUNT; i++) { + if (i == C_FILE_INPUT || i == C_FILE_IMMEDIATE) + continue; + c_list_for_each(v, &rshader->cshader.files[i].vectors) { + switch (v->file) { + case C_FILE_OUTPUT: + case C_FILE_TEMPORARY: + nv = c_vector_new(); + if (nv == NULL) { + return -ENOMEM; + } + memcpy(nv, v, sizeof(struct c_vector)); + nv->id = tid++; + rshader->gpr[v->id] = nv; + break; + case C_FILE_CONSTANT: + nv = c_vector_new(); + if (nv == NULL) { + return -ENOMEM; + } + memcpy(nv, v, sizeof(struct c_vector)); + nv->id = (cid++) + 256; + rshader->gpr[v->id] = nv; + break; + case C_FILE_RESOURCE: + nv = c_vector_new(); + if (nv == NULL) { + return -ENOMEM; + } + memcpy(nv, v, sizeof(struct c_vector)); + nv->id = (rid++); + rshader->gpr[v->id] = nv; + break; + default: + fprintf(stderr, "%s:%d unsupported file %d\n", __func__, __LINE__, v->file); + return -EINVAL; + } + } + } + rshader->ngpr = tid; + rshader->nconstant = cid; + rshader->nresource = rid; + return 0; +} + +int r600_shader_find_gpr(struct r600_shader *rshader, struct c_vector *v, unsigned swizzle, + struct r600_shader_operand *operand) +{ + struct c_vector *tmp; + + /* Values [0,127] correspond to GPR[0..127]. + * Values [256,511] correspond to cfile constants c[0..255]. + * Other special values are shown in the list below. + * 248 SQ_ALU_SRC_0: special constant 0.0. + * 249 SQ_ALU_SRC_1: special constant 1.0 float. + * 250 SQ_ALU_SRC_1_INT: special constant 1 integer. + * 251 SQ_ALU_SRC_M_1_INT: special constant -1 integer. + * 252 SQ_ALU_SRC_0_5: special constant 0.5 float. + * 253 SQ_ALU_SRC_LITERAL: literal constant. + * 254 SQ_ALU_SRC_PV: previous vector result. + * 255 SQ_ALU_SRC_PS: previous scalar result. + */ + operand->vector = v; + operand->sel = 248; + operand->chan = 0; + operand->neg = 0; + operand->abs = 0; + if (v == NULL) + return 0; + if (v->file == C_FILE_IMMEDIATE) { + operand->sel = 253; + } else { + tmp = rshader->gpr[v->id]; + if (tmp == NULL) { + fprintf(stderr, "%s %d unknown register\n", __FILE__, __LINE__); + return -EINVAL; + } + operand->sel = tmp->id; + } + operand->chan = swizzle; + switch (swizzle) { + case C_SWIZZLE_X: + case C_SWIZZLE_Y: + case C_SWIZZLE_Z: + case C_SWIZZLE_W: + break; + case C_SWIZZLE_0: + operand->sel = 248; + operand->chan = 0; + break; + case C_SWIZZLE_1: + operand->sel = 249; + operand->chan = 0; + break; + default: + fprintf(stderr, "%s %d invalid swizzle %d\n", __FILE__, __LINE__, swizzle); + return -EINVAL; + } + return 0; +} + +static struct r600_shader_node *r600_shader_new_node(struct r600_shader *rshader, struct c_node *node) +{ + struct r600_shader_node *rnode; + + rnode = CALLOC_STRUCT(r600_shader_node); + if (rnode == NULL) + return NULL; + rnode->node = node; + c_list_init(&rnode->vfetch); + c_list_init(&rnode->alu); + c_list_add_tail(rnode, &rshader->nodes); + return rnode; +} + +static int r600_shader_add_vfetch(struct r600_shader *rshader, + struct r600_shader_node *node, + struct c_instruction *instruction) +{ + struct r600_shader_vfetch *vfetch; + struct r600_shader_node *rnode; + int r; + + if (instruction == NULL) + return 0; + if (instruction->opcode != C_OPCODE_VFETCH) + return 0; + if (!c_list_empty(&node->alu)) { + rnode = r600_shader_new_node(rshader, node->node); + if (rnode == NULL) + return -ENOMEM; + node = rnode; + } + vfetch = calloc(1, sizeof(struct r600_shader_vfetch)); + if (vfetch == NULL) + return -ENOMEM; + r = r600_shader_find_gpr(rshader, instruction->output.vector, 0, &vfetch->dst[0]); + if (r) + return r; + r = r600_shader_find_gpr(rshader, instruction->input[0].vector, 0, &vfetch->src[0]); + if (r) + return r; + r = r600_shader_find_gpr(rshader, instruction->input[1].vector, 0, &vfetch->src[1]); + if (r) + return r; + vfetch->dst[0].chan = C_SWIZZLE_X; + vfetch->dst[1].chan = C_SWIZZLE_Y; + vfetch->dst[2].chan = C_SWIZZLE_Z; + vfetch->dst[3].chan = C_SWIZZLE_W; + c_list_add_tail(vfetch, &node->vfetch); + node->nslot += 2; + return 0; +} + +static int r600_node_translate(struct r600_shader *rshader, struct c_node *node) +{ + struct c_instruction *instruction; + struct r600_shader_node *rnode; + int r; + + rnode = r600_shader_new_node(rshader, node); + if (rnode == NULL) + return -ENOMEM; + c_list_for_each(instruction, &node->insts) { + switch (instruction->opcode) { + case C_OPCODE_VFETCH: + r = r600_shader_add_vfetch(rshader, rnode, instruction); + if (r) { + fprintf(stderr, "%s %d vfetch failed\n", __func__, __LINE__); + return r; + } + break; + default: + r = r600_shader_alu_translate(rshader, rnode, instruction); + if (r) { + fprintf(stderr, "%s %d alu failed\n", __func__, __LINE__); + return r; + } + break; + } + } + return 0; +} + +int r600_shader_translate_rec(struct r600_shader *rshader, struct c_node *node) +{ + struct c_node_link *link; + int r; + + if (node->opcode == C_OPCODE_END) + return 0; + r = r600_node_translate(rshader, node); + if (r) + return r; + c_list_for_each(link, &node->childs) { + r = r600_shader_translate_rec(rshader, link->node); + if (r) + return r; + } + return 0; +} + +static struct r600_shader_alu *r600_shader_insert_alu(struct r600_shader *rshader, struct r600_shader_node *node) +{ + struct r600_shader_alu *alu; + + alu = CALLOC_STRUCT(r600_shader_alu); + if (alu == NULL) + return NULL; + alu->alu[0].inst = INST_NOP; + alu->alu[1].inst = INST_NOP; + alu->alu[2].inst = INST_NOP; + alu->alu[3].inst = INST_NOP; + alu->alu[4].inst = INST_NOP; + alu->alu[0].opcode = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP; + alu->alu[1].opcode = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP; + alu->alu[2].opcode = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP; + alu->alu[3].opcode = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP; + alu->alu[4].opcode = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP; + c_list_add_tail(alu, &node->alu); + return alu; +} + +static int r600_shader_node_add_alu(struct r600_shader *rshader, + struct r600_shader_node *node, + struct r600_shader_alu *alu, + unsigned instruction, + unsigned ninput, + struct r600_shader_operand *dst, + struct r600_shader_operand src[3]) +{ + struct r600_shader_alu *nalu; + unsigned nconstant = 0, nliteral = 0, slot, i; + + /* count number of constant & literal */ + for (i = 0; i < ninput; i++) { + if (src[i].vector->file == C_FILE_IMMEDIATE) { + nliteral++; + nconstant++; + } + } + + slot = dst->chan; + if (r600_instruction_info[instruction].is_trans) { + slot = 4; + } + + /* allocate new alu group if necessary */ + nalu = alu; + if (alu == NULL) { + nalu = r600_shader_insert_alu(rshader, node); + if (nalu == NULL) + return -ENOMEM; + alu = nalu; + } + if ((alu->alu[slot].inst != INST_NOP && + alu->alu[4].inst != INST_NOP) || + (alu->nconstant + nconstant) > 4 || + (alu->nliteral + nliteral) > 4) { + /* neither trans neither dst slot are free need new alu */ + nalu = r600_shader_insert_alu(rshader, node); + if (nalu == NULL) + return -ENOMEM; + alu = nalu; + } + if (alu->alu[slot].inst != INST_NOP) { + slot = 4; + } + + alu->alu[slot].dst = *dst; + alu->alu[slot].inst = instruction; + alu->alu[slot].opcode = r600_instruction_info[instruction].opcode; + alu->alu[slot].is_op3 = r600_instruction_info[instruction].is_op3; + for (i = 0; i < ninput; i++) { + alu->alu[slot].src[i] = src[i]; + if (src[i].vector->file == C_FILE_IMMEDIATE) { + alu->literal[alu->nliteral++] = src[i].vector->channel[0]->value; + alu->literal[alu->nliteral++] = src[i].vector->channel[1]->value; + alu->literal[alu->nliteral++] = src[i].vector->channel[2]->value; + alu->literal[alu->nliteral++] = src[i].vector->channel[3]->value; + } + } + return 0; +} + +static int r600_shader_alu_translate(struct r600_shader *rshader, + struct r600_shader_node *node, + struct c_instruction *instruction) +{ + struct r600_alu_instruction *ainfo = &r600_alu_instruction[instruction->opcode]; + struct r600_instruction_info *info; + struct r600_shader_alu *alu; + struct r600_shader_node *rnode; + int r, i, j; + struct r600_shader_operand dst; + struct r600_shader_operand src[3]; + + if (!c_list_empty(&node->vfetch)) { + rnode = r600_shader_new_node(rshader, node->node); + if (rnode == NULL) { + fprintf(stderr, "%s %d new node failed\n", __func__, __LINE__); + return -ENOMEM; + } + node = rnode; + } + alu = r600_shader_insert_alu(rshader, node); + if (alu == NULL) { + fprintf(stderr, "%s %d instert alu node failed\n", __func__, __LINE__); + return -ENOMEM; + } + for (i = 0; i < 4; i++) { + if (!(instruction->write_mask) || instruction->output.swizzle[i] == C_SWIZZLE_D) + continue; + if (ainfo->instruction == INST_NOP) { + fprintf(stderr, "%s:%d unsupported instruction %d\n", __FILE__, __LINE__, instruction->opcode); + continue; + } + info = &r600_instruction_info[ainfo->instruction]; + r = r600_shader_find_gpr(rshader, instruction->output.vector, + instruction->output.swizzle[i], &dst); + if (r) { + fprintf(stderr, "%s %d register failed\n", __FILE__, __LINE__); + return r; + } + for (j = 0; j < instruction->ninput; j++) { + r = r600_shader_find_gpr(rshader, instruction->input[j].vector, + instruction->input[j].swizzle[i], &src[j]); + if (r) { + fprintf(stderr, "%s %d register failed\n", __FILE__, __LINE__); + return r; + } + } + r = r600_shader_node_add_alu(rshader, node, alu, ainfo->instruction, + instruction->ninput, &dst, src); + if (r) { + fprintf(stderr, "%s %d failed\n", __FILE__, __LINE__); + return r; + } + } + return 0; +} + +void r600_shader_node_place(struct r600_shader *rshader) +{ + struct r600_shader_node *node, *nnode; + struct r600_shader_alu *alu, *nalu; + struct r600_shader_vfetch *vfetch, *nvfetch; + unsigned cf_id = 0, cf_addr = 0; + + rshader->ncf = 0; + rshader->nslot = 0; + c_list_for_each_safe(node, nnode, &rshader->nodes) { + c_list_for_each_safe(alu, nalu, &node->alu) { + node->nslot += alu->nalu; + node->nslot += alu->nliteral >> 1; + } + node->nfetch = 0; + c_list_for_each_safe(vfetch, nvfetch, &node->vfetch) { + node->nslot += 2; + node->nfetch += 1; + } + if (!c_list_empty(&node->vfetch)) { + /* fetch node need to be 16 bytes aligned*/ + cf_addr += 1; + cf_addr &= 0xFFFFFFFEUL; + } + node->cf_id = cf_id; + node->cf_addr = cf_addr; + cf_id += 2; + cf_addr += node->nslot * 2; + rshader->ncf++; + } + rshader->nslot = cf_addr; + c_list_for_each_safe(node, nnode, &rshader->nodes) { + node->cf_addr += cf_id * 2; + } + rshader->ncf += rshader->cshader.files[C_FILE_OUTPUT].nvectors; + rshader->ndw = rshader->ncf * 2 + rshader->nslot * 2; +} + +int r600_shader_legalize(struct r600_shader *rshader) +{ + struct r600_shader_node *node, *nnode; + struct r600_shader_alu *alu, *nalu; + + c_list_for_each_safe(node, nnode, &rshader->nodes) { + c_list_for_each_safe(alu, nalu, &node->alu) { + alu->nalu = 5; + alu->alu[4].last = 1; + } + } + return 0; +} + + + + + + + +struct r600_instruction_info r600_instruction_info[] = { + {INST_ADD, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, 0, 0}, + {INST_MUL, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL, 0, 0}, + {INST_MUL_IEEE, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL_IEEE, 0, 0}, + {INST_MAX, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX, 0, 0}, + {INST_MIN, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN, 0, 0}, + {INST_MAX_DX10, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX_DX10, 0, 0}, + {INST_MIN_DX10, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN_DX10, 0, 0}, + {INST_SETE, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE, 0, 0}, + {INST_SETGT, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, 0, 0}, + {INST_SETGE, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, 0, 0}, + {INST_SETNE, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE, 0, 0}, + {INST_SETE_DX10, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE_DX10, 0, 0}, + {INST_SETGT_DX10, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT_DX10, 0, 0}, + {INST_SETGE_DX10, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE_DX10, 0, 0}, + {INST_SETNE_DX10, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE_DX10, 0, 0}, + {INST_FRACT, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT, 0, 0}, + {INST_TRUNC, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC, 0, 0}, + {INST_CEIL, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CEIL, 0, 0}, + {INST_RNDNE, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RNDNE, 0, 0}, + {INST_FLOOR, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR, 0, 0}, + {INST_MOVA, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA, 0, 0}, + {INST_MOVA_FLOOR, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_FLOOR, 0, 0}, + {INST_MOVA_INT, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT, 0, 0}, + {INST_MOV, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, 0, 0}, + {INST_NOP, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, 0, 0}, + {INST_PRED_SETGT_UINT, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGT_UINT, 0, 0}, + {INST_PRED_SETGE_UINT, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGE_UINT, 0, 0}, + {INST_PRED_SETE, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETE, 0, 0}, + {INST_PRED_SETGT, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGT, 0, 0}, + {INST_PRED_SETGE, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGE, 0, 0}, + {INST_PRED_SETNE, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE, 0, 0}, + {INST_PRED_SET_INV, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SET_INV, 0, 0}, + {INST_PRED_SET_POP, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SET_POP, 0, 0}, + {INST_PRED_SET_CLR, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SET_CLR, 0, 0}, + {INST_PRED_SET_RESTORE, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SET_RESTORE, 0, 0}, + {INST_PRED_SETE_PUSH, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETE_PUSH, 0, 0}, + {INST_PRED_SETGT_PUSH, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGT_PUSH, 0, 0}, + {INST_PRED_SETGE_PUSH, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGE_PUSH, 0, 0}, + {INST_PRED_SETNE_PUSH, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE_PUSH, 0, 0}, + {INST_KILLE, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLE, 0, 0}, + {INST_KILLGT, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, 0, 0}, + {INST_KILLGE, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGE, 0, 0}, + {INST_KILLNE, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLNE, 0, 0}, + {INST_AND_INT, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_AND_INT, 0, 0}, + {INST_OR_INT, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_OR_INT, 0, 0}, + {INST_XOR_INT, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_XOR_INT, 0, 0}, + {INST_NOT_INT, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOT_INT, 0, 0}, + {INST_ADD_INT, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD_INT, 0, 0}, + {INST_SUB_INT, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SUB_INT, 0, 0}, + {INST_MAX_INT, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX_INT, 0, 0}, + {INST_MIN_INT, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN_INT, 0, 0}, + {INST_MAX_UINT, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX_UINT, 0, 0}, + {INST_MIN_UINT, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN_UINT, 0, 0}, + {INST_SETE_INT, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE_INT, 0, 0}, + {INST_SETGT_INT, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT_INT, 0, 0}, + {INST_SETGE_INT, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE_INT, 0, 0}, + {INST_SETNE_INT, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE_INT, 0, 0}, + {INST_SETGT_UINT, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT_UINT, 0, 0}, + {INST_SETGE_UINT, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE_UINT, 0, 0}, + {INST_KILLGT_UINT, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT_UINT, 0, 0}, + {INST_KILLGE_UINT, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGE_UINT, 0, 0}, + {INST_PRED_SETE_INT, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETE_INT, 0, 0}, + {INST_PRED_SETGT_INT, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGT_INT, 0, 0}, + {INST_PRED_SETGE_INT, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGE_INT, 0, 0}, + {INST_PRED_SETNE_INT, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE_INT, 0, 0}, + {INST_KILLE_INT, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLE_INT, 0, 0}, + {INST_KILLGT_INT, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT_INT, 0, 0}, + {INST_KILLGE_INT, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGE_INT, 0, 0}, + {INST_KILLNE_INT, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLNE_INT, 0, 0}, + {INST_PRED_SETE_PUSH_INT, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETE_PUSH_INT, 0, 0}, + {INST_PRED_SETGT_PUSH_INT, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGT_PUSH_INT, 0, 0}, + {INST_PRED_SETGE_PUSH_INT, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGE_PUSH_INT, 0, 0}, + {INST_PRED_SETNE_PUSH_INT, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE_PUSH_INT, 0, 0}, + {INST_PRED_SETLT_PUSH_INT, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETLT_PUSH_INT, 0, 0}, + {INST_PRED_SETLE_PUSH_INT, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETLE_PUSH_INT, 0, 0}, + {INST_DOT4, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, 0, 0}, + {INST_DOT4_IEEE, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4_IEEE, 0, 0}, + {INST_CUBE, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CUBE, 0, 0}, + {INST_MAX4, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX4, 0, 0}, + {INST_MOVA_GPR_INT, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_GPR_INT, 0, 0}, + {INST_EXP_IEEE, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE, 1, 0}, + {INST_LOG_CLAMPED, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_CLAMPED, 1, 0}, + {INST_LOG_IEEE, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE, 1, 0}, + {INST_RECIP_CLAMPED, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_CLAMPED, 1, 0}, + {INST_RECIP_FF, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_FF, 1, 0}, + {INST_RECIP_IEEE, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE, 1, 0}, + {INST_RECIPSQRT_CLAMPED, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_CLAMPED, 1, 0}, + {INST_RECIPSQRT_FF, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_FF, 1, 0}, + {INST_RECIPSQRT_IEEE, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_IEEE, 1, 0}, + {INST_SQRT_IEEE, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SQRT_IEEE, 1, 0}, + {INST_FLT_TO_INT, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT, 1, 0}, + {INST_INT_TO_FLT, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INT_TO_FLT, 1, 0}, + {INST_UINT_TO_FLT, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_UINT_TO_FLT, 1, 0}, + {INST_SIN, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN, 1, 0}, + {INST_COS, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS, 1, 0}, + {INST_ASHR_INT, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ASHR_INT, 1, 0}, + {INST_LSHR_INT, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LSHR_INT, 1, 0}, + {INST_LSHL_INT, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LSHL_INT, 1, 0}, + {INST_MULLO_INT, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULLO_INT, 1, 0}, + {INST_MULHI_INT, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULHI_INT, 1, 0}, + {INST_MULLO_UINT, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULLO_UINT, 1, 0}, + {INST_MULHI_UINT, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULHI_UINT, 1, 0}, + {INST_RECIP_INT, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_INT, 1, 0}, + {INST_RECIP_UINT, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_UINT, 1, 0}, + {INST_FLT_TO_UINT, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_UINT, 1, 0}, + {INST_MUL_LIT, V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MUL_LIT, 1, 1}, + {INST_MUL_LIT_M2, V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MUL_LIT_M2, 1, 1}, + {INST_MUL_LIT_M4, V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MUL_LIT_M4, 1, 1}, + {INST_MUL_LIT_D2, V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MUL_LIT_D2, 1, 1}, + {INST_MULADD, V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD, 0, 1}, + {INST_MULADD_M2, V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD_M2, 0, 1}, + {INST_MULADD_M4, V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD_M4, 0, 1}, + {INST_MULADD_D2, V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD_D2, 0, 1}, + {INST_MULADD_IEEE, V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD_IEEE, 0, 1}, + {INST_MULADD_IEEE_M2, V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD_IEEE_M2, 0, 1}, + {INST_MULADD_IEEE_M4, V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD_IEEE_M4, 0, 1}, + {INST_MULADD_IEEE_D2, V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD_IEEE_D2, 0, 1}, + {INST_CNDE, V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDE, 0, 1}, + {INST_CNDGT, V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGT, 0, 1}, + {INST_CNDGE, V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGE, 0, 1}, + {INST_CNDE_INT, V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDE_INT, 0, 1}, + {INST_CNDGT_INT, V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGT_INT, 0, 1}, + {INST_CNDGE_INT, V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGE_INT, 0, 1}, +}; + +struct r600_alu_instruction r600_alu_instruction[C_OPCODE_LAST] = { + {C_OPCODE_NOP, INST_NOP}, + {C_OPCODE_MOV, INST_MOV}, + {C_OPCODE_LIT, INST_NOP}, + {C_OPCODE_RCP, INST_RECIP_IEEE}, + {C_OPCODE_RSQ, INST_RECIPSQRT_IEEE}, + {C_OPCODE_EXP, INST_EXP_IEEE}, + {C_OPCODE_LOG, INST_LOG_IEEE}, + {C_OPCODE_MUL, INST_MUL}, + {C_OPCODE_ADD, INST_ADD}, + {C_OPCODE_DP3, INST_DOT4}, + {C_OPCODE_DP4, INST_DOT4}, + {C_OPCODE_DST, INST_NOP}, + {C_OPCODE_MIN, INST_MIN}, + {C_OPCODE_MAX, INST_MAX}, + {C_OPCODE_SLT, INST_NOP}, + {C_OPCODE_SGE, INST_NOP}, + {C_OPCODE_MAD, INST_MULADD}, + {C_OPCODE_SUB, INST_COUNT}, + {C_OPCODE_LRP, INST_NOP}, + {C_OPCODE_CND, INST_NOP}, + {20, INST_NOP}, + {C_OPCODE_DP2A, INST_NOP}, + {22, INST_NOP}, + {23, INST_NOP}, + {C_OPCODE_FRC, INST_NOP}, + {C_OPCODE_CLAMP, INST_NOP}, + {C_OPCODE_FLR, INST_NOP}, + {C_OPCODE_ROUND, INST_NOP}, + {C_OPCODE_EX2, INST_NOP}, + {C_OPCODE_LG2, INST_NOP}, + {C_OPCODE_POW, INST_NOP}, + {C_OPCODE_XPD, INST_NOP}, + {32, INST_NOP}, + {C_OPCODE_ABS, INST_COUNT}, + {C_OPCODE_RCC, INST_NOP}, + {C_OPCODE_DPH, INST_NOP}, + {C_OPCODE_COS, INST_COS}, + {C_OPCODE_DDX, INST_NOP}, + {C_OPCODE_DDY, INST_NOP}, + {C_OPCODE_KILP, INST_NOP}, + {C_OPCODE_PK2H, INST_NOP}, + {C_OPCODE_PK2US, INST_NOP}, + {C_OPCODE_PK4B, INST_NOP}, + {C_OPCODE_PK4UB, INST_NOP}, + {C_OPCODE_RFL, INST_NOP}, + {C_OPCODE_SEQ, INST_NOP}, + {C_OPCODE_SFL, INST_NOP}, + {C_OPCODE_SGT, INST_NOP}, + {C_OPCODE_SIN, INST_SIN}, + {C_OPCODE_SLE, INST_NOP}, + {C_OPCODE_SNE, INST_NOP}, + {C_OPCODE_STR, INST_NOP}, + {C_OPCODE_TEX, INST_NOP}, + {C_OPCODE_TXD, INST_NOP}, + {C_OPCODE_TXP, INST_NOP}, + {C_OPCODE_UP2H, INST_NOP}, + {C_OPCODE_UP2US, INST_NOP}, + {C_OPCODE_UP4B, INST_NOP}, + {C_OPCODE_UP4UB, INST_NOP}, + {C_OPCODE_X2D, INST_NOP}, + {C_OPCODE_ARA, INST_NOP}, + {C_OPCODE_ARR, INST_NOP}, + {C_OPCODE_BRA, INST_NOP}, + {C_OPCODE_CAL, INST_NOP}, + {C_OPCODE_RET, INST_NOP}, + {C_OPCODE_SSG, INST_NOP}, + {C_OPCODE_CMP, INST_NOP}, + {C_OPCODE_SCS, INST_NOP}, + {C_OPCODE_TXB, INST_NOP}, + {C_OPCODE_NRM, INST_NOP}, + {C_OPCODE_DIV, INST_NOP}, + {C_OPCODE_DP2, INST_NOP}, + {C_OPCODE_TXL, INST_NOP}, + {C_OPCODE_BRK, INST_NOP}, + {C_OPCODE_IF, INST_NOP}, + {C_OPCODE_BGNFOR, INST_NOP}, + {C_OPCODE_REP, INST_NOP}, + {C_OPCODE_ELSE, INST_NOP}, + {C_OPCODE_ENDIF, INST_NOP}, + {C_OPCODE_ENDFOR, INST_NOP}, + {C_OPCODE_ENDREP, INST_NOP}, + {C_OPCODE_PUSHA, INST_NOP}, + {C_OPCODE_POPA, INST_NOP}, + {C_OPCODE_CEIL, INST_NOP}, + {C_OPCODE_I2F, INST_NOP}, + {C_OPCODE_NOT, INST_NOP}, + {C_OPCODE_TRUNC, INST_NOP}, + {C_OPCODE_SHL, INST_NOP}, + {88, INST_NOP}, + {C_OPCODE_AND, INST_NOP}, + {C_OPCODE_OR, INST_NOP}, + {C_OPCODE_MOD, INST_NOP}, + {C_OPCODE_XOR, INST_NOP}, + {C_OPCODE_SAD, INST_NOP}, + {C_OPCODE_TXF, INST_NOP}, + {C_OPCODE_TXQ, INST_NOP}, + {C_OPCODE_CONT, INST_NOP}, + {C_OPCODE_EMIT, INST_NOP}, + {C_OPCODE_ENDPRIM, INST_NOP}, + {C_OPCODE_BGNLOOP, INST_NOP}, + {C_OPCODE_BGNSUB, INST_NOP}, + {C_OPCODE_ENDLOOP, INST_NOP}, + {C_OPCODE_ENDSUB, INST_NOP}, + {103, INST_NOP}, + {104, INST_NOP}, + {105, INST_NOP}, + {106, INST_NOP}, + {107, INST_NOP}, + {108, INST_NOP}, + {109, INST_NOP}, + {110, INST_NOP}, + {111, INST_NOP}, + {C_OPCODE_NRM4, INST_NOP}, + {C_OPCODE_CALLNZ, INST_NOP}, + {C_OPCODE_IFC, INST_NOP}, + {C_OPCODE_BREAKC, INST_NOP}, + {C_OPCODE_KIL, INST_NOP}, + {C_OPCODE_END, INST_NOP}, + {118, INST_NOP}, + {C_OPCODE_F2I, INST_NOP}, + {C_OPCODE_IDIV, INST_NOP}, + {C_OPCODE_IMAX, INST_NOP}, + {C_OPCODE_IMIN, INST_NOP}, + {C_OPCODE_INEG, INST_NOP}, + {C_OPCODE_ISGE, INST_NOP}, + {C_OPCODE_ISHR, INST_NOP}, + {C_OPCODE_ISLT, INST_NOP}, + {C_OPCODE_F2U, INST_NOP}, + {C_OPCODE_U2F, INST_NOP}, + {C_OPCODE_UADD, INST_NOP}, + {C_OPCODE_UDIV, INST_NOP}, + {C_OPCODE_UMAD, INST_NOP}, + {C_OPCODE_UMAX, INST_NOP}, + {C_OPCODE_UMIN, INST_NOP}, + {C_OPCODE_UMOD, INST_NOP}, + {C_OPCODE_UMUL, INST_NOP}, + {C_OPCODE_USEQ, INST_NOP}, + {C_OPCODE_USGE, INST_NOP}, + {C_OPCODE_USHR, INST_NOP}, + {C_OPCODE_USLT, INST_NOP}, + {C_OPCODE_USNE, INST_NOP}, + {C_OPCODE_SWITCH, INST_NOP}, + {C_OPCODE_CASE, INST_NOP}, + {C_OPCODE_DEFAULT, INST_NOP}, + {C_OPCODE_ENDSWITCH, INST_NOP}, + {C_OPCODE_VFETCH, INST_NOP}, + {C_OPCODE_ENTRY, INST_NOP}, + {C_OPCODE_ARL, INST_NOP}, +}; diff --git a/src/gallium/drivers/r600/r600_compiler_r700.c b/src/gallium/drivers/r600/r600_compiler_r700.c new file mode 100644 index 00000000000..809a57ae5c5 --- /dev/null +++ b/src/gallium/drivers/r600/r600_compiler_r700.c @@ -0,0 +1,214 @@ +/* + * Copyright 2010 Jerome Glisse <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ +#include <stdlib.h> +#include <string.h> +#include <stdint.h> +#include <stdio.h> +#include <errno.h> +#include "r600_context.h" +#include "r700_sq.h" + +static int r700_shader_cf_node_bytecode(struct r600_shader *rshader, + struct r600_shader_node *rnode, + unsigned *cid) +{ + unsigned id = *cid; + + if (rnode->nfetch) { + rshader->bcode[id++] = S_SQ_CF_WORD0_ADDR(rnode->cf_addr >> 1); + rshader->bcode[id++] = S_SQ_CF_WORD1_CF_INST(V_SQ_CF_WORD1_SQ_CF_INST_VTX) | + S_SQ_CF_WORD1_BARRIER(1) | + S_SQ_CF_WORD1_COUNT(rnode->nfetch - 1); + } else { + rshader->bcode[id++] = S_SQ_CF_ALU_WORD0_ADDR(rnode->cf_addr >> 1); + rshader->bcode[id++] = S_SQ_CF_ALU_WORD1_CF_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU) | + S_SQ_CF_ALU_WORD1_BARRIER(1) | + S_SQ_CF_ALU_WORD1_COUNT(rnode->nslot - 1); + } + *cid = id; + return 0; +} + +static int r700_shader_cf_output_bytecode(struct r600_shader *rshader, + struct c_vector *v, + unsigned *cid, + unsigned end) +{ + struct r600_shader_operand out; + unsigned id = *cid; + int r; + + r = r600_shader_find_gpr(rshader, v, 0, &out); + if (r) + return r; + rshader->bcode[id + 0] = S_SQ_CF_ALLOC_EXPORT_WORD0_RW_GPR(out.sel) | + S_SQ_CF_ALLOC_EXPORT_WORD0_ELEM_SIZE(3); + rshader->bcode[id + 1] = S_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_X(0) | + S_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Y(1) | + S_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Z(2) | + S_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_W(3) | + S_SQ_CF_ALLOC_EXPORT_WORD1_BARRIER(1) | + S_SQ_CF_ALLOC_EXPORT_WORD1_CF_INST(V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE) | + S_SQ_CF_ALLOC_EXPORT_WORD1_END_OF_PROGRAM(end); + switch (v->name) { + case C_SEMANTIC_POSITION: + rshader->bcode[id + 0] |= S_SQ_CF_ALLOC_EXPORT_WORD0_ARRAY_BASE(60) | + S_SQ_CF_ALLOC_EXPORT_WORD0_TYPE(V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS); + break; + case C_SEMANTIC_COLOR: + if (rshader->cshader.type == C_PROGRAM_TYPE_VS) { + rshader->output[rshader->noutput].gpr = out.sel; + rshader->output[rshader->noutput].sid = v->sid; + rshader->output[rshader->noutput].name = v->name; + rshader->bcode[id + 0] |= S_SQ_CF_ALLOC_EXPORT_WORD0_ARRAY_BASE(rshader->noutput++) | + S_SQ_CF_ALLOC_EXPORT_WORD0_TYPE(V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM); + } else { + rshader->bcode[id + 0] |= S_SQ_CF_ALLOC_EXPORT_WORD0_ARRAY_BASE(0) | + S_SQ_CF_ALLOC_EXPORT_WORD0_TYPE(V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL); + } + break; + case C_SEMANTIC_GENERIC: + rshader->output[rshader->noutput].gpr = out.sel; + rshader->output[rshader->noutput].sid = v->sid; + rshader->output[rshader->noutput].name = v->name; + rshader->bcode[id + 0] |= S_SQ_CF_ALLOC_EXPORT_WORD0_ARRAY_BASE(rshader->noutput++) | + S_SQ_CF_ALLOC_EXPORT_WORD0_TYPE(V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM); + break; + default: + fprintf(stderr, "%s:%d unsupported\n", __func__, __LINE__); + return -EINVAL; + } + *cid = id + 2; + return 0; +} + +static int r700_shader_alu_bytecode(struct r600_shader *rshader, + struct r600_shader_node *rnode, + struct r600_shader_inst *alu, + unsigned *cid) +{ + unsigned id = *cid; + + /* don't replace gpr by pv or ps for destination register */ + if (alu->is_op3) { + rshader->bcode[id++] = S_SQ_ALU_WORD0_SRC0_SEL(alu->src[0].sel) | + S_SQ_ALU_WORD0_SRC0_CHAN(alu->src[0].chan) | + S_SQ_ALU_WORD0_SRC1_SEL(alu->src[1].sel) | + S_SQ_ALU_WORD0_SRC1_CHAN(alu->src[1].chan) | + S_SQ_ALU_WORD0_LAST(alu->last); + rshader->bcode[id++] = S_SQ_ALU_WORD1_DST_GPR(alu->dst.sel) | + S_SQ_ALU_WORD1_DST_CHAN(alu->dst.chan) | + S_SQ_ALU_WORD1_OP3_SRC2_SEL(alu->src[2].sel) | + S_SQ_ALU_WORD1_OP3_SRC2_CHAN(alu->src[2].chan) | + S_SQ_ALU_WORD1_OP3_SRC2_NEG(alu->src[2].neg) | + S_SQ_ALU_WORD1_OP3_ALU_INST(alu->opcode) | + S_SQ_ALU_WORD1_BANK_SWIZZLE(0); + } else { + rshader->bcode[id++] = S_SQ_ALU_WORD0_SRC0_SEL(alu->src[0].sel) | + S_SQ_ALU_WORD0_SRC0_CHAN(alu->src[0].chan) | + S_SQ_ALU_WORD0_SRC0_NEG(alu->src[0].neg) | + S_SQ_ALU_WORD0_SRC1_SEL(alu->src[1].sel) | + S_SQ_ALU_WORD0_SRC1_CHAN(alu->src[1].chan) | + S_SQ_ALU_WORD0_SRC1_NEG(alu->src[1].neg) | + S_SQ_ALU_WORD0_LAST(alu->last); + rshader->bcode[id++] = S_SQ_ALU_WORD1_DST_GPR(alu->dst.sel) | + S_SQ_ALU_WORD1_DST_CHAN(alu->dst.chan) | + S_SQ_ALU_WORD1_OP2_SRC0_ABS(alu->src[0].abs) | + S_SQ_ALU_WORD1_OP2_SRC1_ABS(alu->src[1].abs) | + S_SQ_ALU_WORD1_OP2_WRITE_MASK(1) | + S_SQ_ALU_WORD1_OP2_ALU_INST(alu->opcode) | + S_SQ_ALU_WORD1_BANK_SWIZZLE(0); + } + *cid = id; + return 0; +} + +int r700_shader_translate(struct r600_shader *rshader) +{ + struct c_shader *shader = &rshader->cshader; + struct r600_shader_node *rnode; + struct r600_shader_vfetch *vfetch; + struct r600_shader_alu *alu; + struct c_vector *v; + unsigned id, i, end; + int r; + + r = r600_shader_register(rshader); + if (r) { + fprintf(stderr, "%s %d register allocation failed\n", __FILE__, __LINE__); + return r; + } + r = r600_shader_translate_rec(rshader, &shader->entry); + if (r) { + fprintf(stderr, "%s %d translation failed\n", __FILE__, __LINE__); + return r; + } + r = r600_shader_legalize(rshader); + if (r) { + fprintf(stderr, "%s %d legalize failed\n", __FILE__, __LINE__); + return r; + } + r600_shader_node_place(rshader); + rshader->bcode = malloc(rshader->ndw * 4); + if (rshader->bcode == NULL) + return -ENOMEM; + c_list_for_each(rnode, &rshader->nodes) { + id = rnode->cf_addr; + c_list_for_each(vfetch, &rnode->vfetch) { + r = r600_shader_vfetch_bytecode(rshader, rnode, vfetch, &id); + if (r) + return r; + } + c_list_for_each(alu, &rnode->alu) { + for (i = 0; i < alu->nalu; i++) { + r = r700_shader_alu_bytecode(rshader, rnode, &alu->alu[i], &id); + if (r) + return r; + } + for (i = 0; i < alu->nliteral; i++) { + rshader->bcode[id++] = alu->literal[i]; + } + } + } + id = 0; + c_list_for_each(rnode, &rshader->nodes) { + r = r700_shader_cf_node_bytecode(rshader, rnode, &id); + if (r) + return r; + } + c_list_for_each(v, &rshader->cshader.files[C_FILE_OUTPUT].vectors) { + end = 0; + if (v->next == &rshader->cshader.files[C_FILE_OUTPUT].vectors) + end = 1; + r = r700_shader_cf_output_bytecode(rshader, v, &id, end); + if (r) + return r; + } + c_list_for_each(v, &rshader->cshader.files[C_FILE_INPUT].vectors) { + rshader->input[rshader->ninput].gpr = rshader->ninput; + rshader->input[rshader->ninput].sid = v->sid; + rshader->input[rshader->ninput].name = v->name; + rshader->ninput++; + } + return 0; +} diff --git a/src/gallium/drivers/r600/r600_compiler_tgsi.c b/src/gallium/drivers/r600/r600_compiler_tgsi.c new file mode 100644 index 00000000000..563ca063afe --- /dev/null +++ b/src/gallium/drivers/r600/r600_compiler_tgsi.c @@ -0,0 +1,703 @@ +/* + * Copyright 2010 Jerome Glisse <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ +#include <stdlib.h> +#include <string.h> +#include <stdint.h> +#include <stdio.h> +#include <errno.h> +#include <tgsi/tgsi_parse.h> +#include <tgsi/tgsi_scan.h> +#include "r600_shader.h" +#include "r600_context.h" + +struct tgsi_shader { + struct c_vector **v[TGSI_FILE_COUNT]; + struct tgsi_shader_info info; + struct tgsi_parse_context parser; + const struct tgsi_token *tokens; + struct c_shader *shader; + struct c_node *node; +}; + +static unsigned tgsi_file_to_c_file(unsigned file); +static unsigned tgsi_sname_to_c_sname(unsigned sname); +static int tgsi_opcode_to_c_opcode(unsigned opcode, unsigned *copcode); + +static int tgsi_shader_init(struct tgsi_shader *ts, + const struct tgsi_token *tokens, + struct c_shader *shader) +{ + int i; + + ts->shader = shader; + ts->tokens = tokens; + tgsi_scan_shader(ts->tokens, &ts->info); + tgsi_parse_init(&ts->parser, ts->tokens); + /* initialize to NULL in case of error */ + for (i = 0; i < C_FILE_COUNT; i++) { + ts->v[i] = NULL; + } + for (i = 0; i < TGSI_FILE_COUNT; i++) { + if (ts->info.file_count[i] > 0) { + ts->v[i] = calloc(ts->info.file_count[i], sizeof(void*)); + if (ts->v[i] == NULL) { + fprintf(stderr, "%s:%d unsupported %d %d\n", __func__, __LINE__, i, ts->info.file_count[i]); + return -ENOMEM; + } + } + } + return 0; +} + +static void tgsi_shader_destroy(struct tgsi_shader *ts) +{ + int i; + + for (i = 0; i < TGSI_FILE_COUNT; i++) { + free(ts->v[i]); + } + tgsi_parse_free(&ts->parser); +} + +static int ntransform_declaration(struct tgsi_shader *ts) +{ + struct tgsi_full_declaration *fd = &ts->parser.FullToken.FullDeclaration; + struct c_vector *v; + unsigned file; + unsigned name; + int sid; + int i; + + if (fd->Declaration.Dimension) { + fprintf(stderr, "%s:%d unsupported\n", __func__, __LINE__); + return -EINVAL; + } + for (i = fd->Range.First ; i <= fd->Range.Last; i++) { + sid = i; + name = C_SEMANTIC_GENERIC; + file = tgsi_file_to_c_file(fd->Declaration.File); + if (file == TGSI_FILE_NULL) { + fprintf(stderr, "%s:%d unsupported\n", __func__, __LINE__); + return -EINVAL; + } + if (fd->Declaration.Semantic) { + name = tgsi_sname_to_c_sname(fd->Semantic.Name); + sid = fd->Semantic.Index; + } + v = c_shader_vector_new(ts->shader, file, name, sid); + if (v == NULL) { + fprintf(stderr, "%s:%d unsupported\n", __func__, __LINE__); + return -ENOMEM; + } + ts->v[fd->Declaration.File][i] = v; + } + return 0; +} + +static int ntransform_immediate(struct tgsi_shader *ts) +{ + struct tgsi_full_immediate *fd = &ts->parser.FullToken.FullImmediate; + struct c_vector *v; + unsigned file; + unsigned name; + + if (fd->Immediate.DataType != TGSI_IMM_FLOAT32) { + fprintf(stderr, "%s:%d unsupported\n", __func__, __LINE__); + return -EINVAL; + } + name = C_SEMANTIC_GENERIC; + file = C_FILE_IMMEDIATE; + v = c_shader_vector_new(ts->shader, file, name, 0); + if (v == NULL) { + fprintf(stderr, "%s:%d unsupported\n", __func__, __LINE__); + return -ENOMEM; + } + v->channel[0]->value = fd->u[0].Uint; + v->channel[1]->value = fd->u[1].Uint; + v->channel[2]->value = fd->u[2].Uint; + v->channel[3]->value = fd->u[3].Uint; + ts->v[TGSI_FILE_IMMEDIATE][0] = v; + return 0; +} + +static int ntransform_instruction(struct tgsi_shader *ts) +{ + struct tgsi_full_instruction *fi = &ts->parser.FullToken.FullInstruction; + struct c_shader *shader = ts->shader; + struct c_instruction instruction; + unsigned opcode; + int i, r; + + if (fi->Instruction.NumDstRegs > 1) { + fprintf(stderr, "%s %d unsupported\n", __func__, __LINE__); + return -EINVAL; + } + if (fi->Instruction.Saturate) { + fprintf(stderr, "%s %d unsupported\n", __func__, __LINE__); + return -EINVAL; + } + if (fi->Instruction.Predicate) { + fprintf(stderr, "%s %d unsupported\n", __func__, __LINE__); + return -EINVAL; + } + if (fi->Instruction.Label) { + fprintf(stderr, "%s %d unsupported\n", __func__, __LINE__); + return -EINVAL; + } + if (fi->Instruction.Texture) { + fprintf(stderr, "%s %d unsupported\n", __func__, __LINE__); + return -EINVAL; + } + for (i = 0; i < fi->Instruction.NumSrcRegs; i++) { + if (fi->Src[i].Register.Indirect || + fi->Src[i].Register.Dimension || + fi->Src[i].Register.Absolute) { + fprintf(stderr, "%s %d unsupported\n", __func__, __LINE__); + return -EINVAL; + } + } + for (i = 0; i < fi->Instruction.NumDstRegs; i++) { + if (fi->Dst[i].Register.Indirect || fi->Dst[i].Register.Dimension) { + fprintf(stderr, "%s %d unsupported\n", __func__, __LINE__); + return -EINVAL; + } + } + r = tgsi_opcode_to_c_opcode(fi->Instruction.Opcode, &opcode); + if (r) { + fprintf(stderr, "%s:%d unsupported\n", __func__, __LINE__); + return r; + } + if (opcode == C_OPCODE_END) { + return c_node_cfg_link(ts->node, &shader->end); + } + /* FIXME add flow instruction handling */ + memset(&instruction, 0, sizeof(struct c_instruction)); + instruction.opcode = opcode; + instruction.ninput = fi->Instruction.NumSrcRegs; + instruction.write_mask = fi->Dst[0].Register.WriteMask; + for (i = 0; i < fi->Instruction.NumSrcRegs; i++) { + instruction.input[i].vector = ts->v[fi->Src[i].Register.File][fi->Src[i].Register.Index]; + instruction.input[i].swizzle[0] = fi->Src[i].Register.SwizzleX; + instruction.input[i].swizzle[1] = fi->Src[i].Register.SwizzleY; + instruction.input[i].swizzle[2] = fi->Src[i].Register.SwizzleZ; + instruction.input[i].swizzle[3] = fi->Src[i].Register.SwizzleW; + } + instruction.output.vector = ts->v[fi->Dst[0].Register.File][fi->Dst[0].Register.Index]; + instruction.output.swizzle[0] = (fi->Dst[0].Register.WriteMask & 0x1) ? C_SWIZZLE_X : C_SWIZZLE_D; + instruction.output.swizzle[1] = (fi->Dst[0].Register.WriteMask & 0x2) ? C_SWIZZLE_Y : C_SWIZZLE_D; + instruction.output.swizzle[2] = (fi->Dst[0].Register.WriteMask & 0x4) ? C_SWIZZLE_Z : C_SWIZZLE_D; + instruction.output.swizzle[3] = (fi->Dst[0].Register.WriteMask & 0x8) ? C_SWIZZLE_W : C_SWIZZLE_D; + return c_node_add_new_instruction(ts->node, &instruction); +} + +int c_shader_from_tgsi(struct c_shader *shader, unsigned type, + const struct tgsi_token *tokens) +{ + struct tgsi_shader ts; + int r = 0; + + c_shader_init(shader, type); + r = tgsi_shader_init(&ts, tokens, shader); + if (r) + goto out_err; + ts.shader = shader; + ts.node = &shader->entry; + while (!tgsi_parse_end_of_tokens(&ts.parser)) { + tgsi_parse_token(&ts.parser); + switch (ts.parser.FullToken.Token.Type) { + case TGSI_TOKEN_TYPE_IMMEDIATE: + r = ntransform_immediate(&ts); + if (r) + goto out_err; + break; + case TGSI_TOKEN_TYPE_DECLARATION: + r = ntransform_declaration(&ts); + if (r) + goto out_err; + break; + case TGSI_TOKEN_TYPE_INSTRUCTION: + r = ntransform_instruction(&ts); + if (r) + goto out_err; + break; + default: + r = -EINVAL; + goto out_err; + } + } + tgsi_shader_destroy(&ts); + return 0; +out_err: + c_shader_destroy(shader); + tgsi_shader_destroy(&ts); + return r; +} + +static unsigned tgsi_file_to_c_file(unsigned file) +{ + switch (file) { + case TGSI_FILE_CONSTANT: + return C_FILE_CONSTANT; + case TGSI_FILE_INPUT: + return C_FILE_INPUT; + case TGSI_FILE_OUTPUT: + return C_FILE_OUTPUT; + case TGSI_FILE_TEMPORARY: + return C_FILE_TEMPORARY; + case TGSI_FILE_SAMPLER: + return C_FILE_SAMPLER; + case TGSI_FILE_ADDRESS: + return C_FILE_ADDRESS; + case TGSI_FILE_IMMEDIATE: + return C_FILE_IMMEDIATE; + case TGSI_FILE_PREDICATE: + return C_FILE_PREDICATE; + case TGSI_FILE_SYSTEM_VALUE: + return C_FILE_SYSTEM_VALUE; + case TGSI_FILE_NULL: + return C_FILE_NULL; + default: + fprintf(stderr, "%s:%d unsupported file %d\n", __func__, __LINE__, file); + return C_FILE_NULL; + } +} + +static unsigned tgsi_sname_to_c_sname(unsigned sname) +{ + switch (sname) { + case TGSI_SEMANTIC_POSITION: + return C_SEMANTIC_POSITION; + case TGSI_SEMANTIC_COLOR: + return C_SEMANTIC_COLOR; + case TGSI_SEMANTIC_BCOLOR: + return C_SEMANTIC_BCOLOR; + case TGSI_SEMANTIC_FOG: + return C_SEMANTIC_FOG; + case TGSI_SEMANTIC_PSIZE: + return C_SEMANTIC_PSIZE; + case TGSI_SEMANTIC_GENERIC: + return C_SEMANTIC_GENERIC; + case TGSI_SEMANTIC_NORMAL: + return C_SEMANTIC_NORMAL; + case TGSI_SEMANTIC_FACE: + return C_SEMANTIC_FACE; + case TGSI_SEMANTIC_EDGEFLAG: + return C_SEMANTIC_EDGEFLAG; + case TGSI_SEMANTIC_PRIMID: + return C_SEMANTIC_PRIMID; + case TGSI_SEMANTIC_INSTANCEID: + return C_SEMANTIC_INSTANCEID; + default: + return C_SEMANTIC_GENERIC; + } +} + +static int tgsi_opcode_to_c_opcode(unsigned opcode, unsigned *copcode) +{ + switch (opcode) { + case TGSI_OPCODE_MOV: + *copcode = C_OPCODE_MOV; + return 0; + case TGSI_OPCODE_MUL: + *copcode = C_OPCODE_MUL; + return 0; + case TGSI_OPCODE_MAD: + *copcode = C_OPCODE_MAD; + return 0; + case TGSI_OPCODE_END: + *copcode = C_OPCODE_END; + return 0; + case TGSI_OPCODE_ARL: + *copcode = C_OPCODE_ARL; + return 0; + case TGSI_OPCODE_LIT: + *copcode = C_OPCODE_LIT; + return 0; + case TGSI_OPCODE_RCP: + *copcode = C_OPCODE_RCP; + return 0; + case TGSI_OPCODE_RSQ: + *copcode = C_OPCODE_RSQ; + return 0; + case TGSI_OPCODE_EXP: + *copcode = C_OPCODE_EXP; + return 0; + case TGSI_OPCODE_LOG: + *copcode = C_OPCODE_LOG; + return 0; + case TGSI_OPCODE_ADD: + *copcode = C_OPCODE_ADD; + return 0; + case TGSI_OPCODE_DP3: + *copcode = C_OPCODE_DP3; + return 0; + case TGSI_OPCODE_DP4: + *copcode = C_OPCODE_DP4; + return 0; + case TGSI_OPCODE_DST: + *copcode = C_OPCODE_DST; + return 0; + case TGSI_OPCODE_MIN: + *copcode = C_OPCODE_MIN; + return 0; + case TGSI_OPCODE_MAX: + *copcode = C_OPCODE_MAX; + return 0; + case TGSI_OPCODE_SLT: + *copcode = C_OPCODE_SLT; + return 0; + case TGSI_OPCODE_SGE: + *copcode = C_OPCODE_SGE; + return 0; + case TGSI_OPCODE_SUB: + *copcode = C_OPCODE_SUB; + return 0; + case TGSI_OPCODE_LRP: + *copcode = C_OPCODE_LRP; + return 0; + case TGSI_OPCODE_CND: + *copcode = C_OPCODE_CND; + return 0; + case TGSI_OPCODE_DP2A: + *copcode = C_OPCODE_DP2A; + return 0; + case TGSI_OPCODE_FRC: + *copcode = C_OPCODE_FRC; + return 0; + case TGSI_OPCODE_CLAMP: + *copcode = C_OPCODE_CLAMP; + return 0; + case TGSI_OPCODE_FLR: + *copcode = C_OPCODE_FLR; + return 0; + case TGSI_OPCODE_ROUND: + *copcode = C_OPCODE_ROUND; + return 0; + case TGSI_OPCODE_EX2: + *copcode = C_OPCODE_EX2; + return 0; + case TGSI_OPCODE_LG2: + *copcode = C_OPCODE_LG2; + return 0; + case TGSI_OPCODE_POW: + *copcode = C_OPCODE_POW; + return 0; + case TGSI_OPCODE_XPD: + *copcode = C_OPCODE_XPD; + return 0; + case TGSI_OPCODE_ABS: + *copcode = C_OPCODE_ABS; + return 0; + case TGSI_OPCODE_RCC: + *copcode = C_OPCODE_RCC; + return 0; + case TGSI_OPCODE_DPH: + *copcode = C_OPCODE_DPH; + return 0; + case TGSI_OPCODE_COS: + *copcode = C_OPCODE_COS; + return 0; + case TGSI_OPCODE_DDX: + *copcode = C_OPCODE_DDX; + return 0; + case TGSI_OPCODE_DDY: + *copcode = C_OPCODE_DDY; + return 0; + case TGSI_OPCODE_KILP: + *copcode = C_OPCODE_KILP; + return 0; + case TGSI_OPCODE_PK2H: + *copcode = C_OPCODE_PK2H; + return 0; + case TGSI_OPCODE_PK2US: + *copcode = C_OPCODE_PK2US; + return 0; + case TGSI_OPCODE_PK4B: + *copcode = C_OPCODE_PK4B; + return 0; + case TGSI_OPCODE_PK4UB: + *copcode = C_OPCODE_PK4UB; + return 0; + case TGSI_OPCODE_RFL: + *copcode = C_OPCODE_RFL; + return 0; + case TGSI_OPCODE_SEQ: + *copcode = C_OPCODE_SEQ; + return 0; + case TGSI_OPCODE_SFL: + *copcode = C_OPCODE_SFL; + return 0; + case TGSI_OPCODE_SGT: + *copcode = C_OPCODE_SGT; + return 0; + case TGSI_OPCODE_SIN: + *copcode = C_OPCODE_SIN; + return 0; + case TGSI_OPCODE_SLE: + *copcode = C_OPCODE_SLE; + return 0; + case TGSI_OPCODE_SNE: + *copcode = C_OPCODE_SNE; + return 0; + case TGSI_OPCODE_STR: + *copcode = C_OPCODE_STR; + return 0; + case TGSI_OPCODE_TEX: + *copcode = C_OPCODE_TEX; + return 0; + case TGSI_OPCODE_TXD: + *copcode = C_OPCODE_TXD; + return 0; + case TGSI_OPCODE_TXP: + *copcode = C_OPCODE_TXP; + return 0; + case TGSI_OPCODE_UP2H: + *copcode = C_OPCODE_UP2H; + return 0; + case TGSI_OPCODE_UP2US: + *copcode = C_OPCODE_UP2US; + return 0; + case TGSI_OPCODE_UP4B: + *copcode = C_OPCODE_UP4B; + return 0; + case TGSI_OPCODE_UP4UB: + *copcode = C_OPCODE_UP4UB; + return 0; + case TGSI_OPCODE_X2D: + *copcode = C_OPCODE_X2D; + return 0; + case TGSI_OPCODE_ARA: + *copcode = C_OPCODE_ARA; + return 0; + case TGSI_OPCODE_ARR: + *copcode = C_OPCODE_ARR; + return 0; + case TGSI_OPCODE_BRA: + *copcode = C_OPCODE_BRA; + return 0; + case TGSI_OPCODE_CAL: + *copcode = C_OPCODE_CAL; + return 0; + case TGSI_OPCODE_RET: + *copcode = C_OPCODE_RET; + return 0; + case TGSI_OPCODE_SSG: + *copcode = C_OPCODE_SSG; + return 0; + case TGSI_OPCODE_CMP: + *copcode = C_OPCODE_CMP; + return 0; + case TGSI_OPCODE_SCS: + *copcode = C_OPCODE_SCS; + return 0; + case TGSI_OPCODE_TXB: + *copcode = C_OPCODE_TXB; + return 0; + case TGSI_OPCODE_NRM: + *copcode = C_OPCODE_NRM; + return 0; + case TGSI_OPCODE_DIV: + *copcode = C_OPCODE_DIV; + return 0; + case TGSI_OPCODE_DP2: + *copcode = C_OPCODE_DP2; + return 0; + case TGSI_OPCODE_TXL: + *copcode = C_OPCODE_TXL; + return 0; + case TGSI_OPCODE_BRK: + *copcode = C_OPCODE_BRK; + return 0; + case TGSI_OPCODE_IF: + *copcode = C_OPCODE_IF; + return 0; + case TGSI_OPCODE_ELSE: + *copcode = C_OPCODE_ELSE; + return 0; + case TGSI_OPCODE_ENDIF: + *copcode = C_OPCODE_ENDIF; + return 0; + case TGSI_OPCODE_PUSHA: + *copcode = C_OPCODE_PUSHA; + return 0; + case TGSI_OPCODE_POPA: + *copcode = C_OPCODE_POPA; + return 0; + case TGSI_OPCODE_CEIL: + *copcode = C_OPCODE_CEIL; + return 0; + case TGSI_OPCODE_I2F: + *copcode = C_OPCODE_I2F; + return 0; + case TGSI_OPCODE_NOT: + *copcode = C_OPCODE_NOT; + return 0; + case TGSI_OPCODE_TRUNC: + *copcode = C_OPCODE_TRUNC; + return 0; + case TGSI_OPCODE_SHL: + *copcode = C_OPCODE_SHL; + return 0; + case TGSI_OPCODE_AND: + *copcode = C_OPCODE_AND; + return 0; + case TGSI_OPCODE_OR: + *copcode = C_OPCODE_OR; + return 0; + case TGSI_OPCODE_MOD: + *copcode = C_OPCODE_MOD; + return 0; + case TGSI_OPCODE_XOR: + *copcode = C_OPCODE_XOR; + return 0; + case TGSI_OPCODE_SAD: + *copcode = C_OPCODE_SAD; + return 0; + case TGSI_OPCODE_TXF: + *copcode = C_OPCODE_TXF; + return 0; + case TGSI_OPCODE_TXQ: + *copcode = C_OPCODE_TXQ; + return 0; + case TGSI_OPCODE_CONT: + *copcode = C_OPCODE_CONT; + return 0; + case TGSI_OPCODE_EMIT: + *copcode = C_OPCODE_EMIT; + return 0; + case TGSI_OPCODE_ENDPRIM: + *copcode = C_OPCODE_ENDPRIM; + return 0; + case TGSI_OPCODE_BGNLOOP: + *copcode = C_OPCODE_BGNLOOP; + return 0; + case TGSI_OPCODE_BGNSUB: + *copcode = C_OPCODE_BGNSUB; + return 0; + case TGSI_OPCODE_ENDLOOP: + *copcode = C_OPCODE_ENDLOOP; + return 0; + case TGSI_OPCODE_ENDSUB: + *copcode = C_OPCODE_ENDSUB; + return 0; + case TGSI_OPCODE_NOP: + *copcode = C_OPCODE_NOP; + return 0; + case TGSI_OPCODE_NRM4: + *copcode = C_OPCODE_NRM4; + return 0; + case TGSI_OPCODE_CALLNZ: + *copcode = C_OPCODE_CALLNZ; + return 0; + case TGSI_OPCODE_IFC: + *copcode = C_OPCODE_IFC; + return 0; + case TGSI_OPCODE_BREAKC: + *copcode = C_OPCODE_BREAKC; + return 0; + case TGSI_OPCODE_KIL: + *copcode = C_OPCODE_KIL; + return 0; + case TGSI_OPCODE_F2I: + *copcode = C_OPCODE_F2I; + return 0; + case TGSI_OPCODE_IDIV: + *copcode = C_OPCODE_IDIV; + return 0; + case TGSI_OPCODE_IMAX: + *copcode = C_OPCODE_IMAX; + return 0; + case TGSI_OPCODE_IMIN: + *copcode = C_OPCODE_IMIN; + return 0; + case TGSI_OPCODE_INEG: + *copcode = C_OPCODE_INEG; + return 0; + case TGSI_OPCODE_ISGE: + *copcode = C_OPCODE_ISGE; + return 0; + case TGSI_OPCODE_ISHR: + *copcode = C_OPCODE_ISHR; + return 0; + case TGSI_OPCODE_ISLT: + *copcode = C_OPCODE_ISLT; + return 0; + case TGSI_OPCODE_F2U: + *copcode = C_OPCODE_F2U; + return 0; + case TGSI_OPCODE_U2F: + *copcode = C_OPCODE_U2F; + return 0; + case TGSI_OPCODE_UADD: + *copcode = C_OPCODE_UADD; + return 0; + case TGSI_OPCODE_UDIV: + *copcode = C_OPCODE_UDIV; + return 0; + case TGSI_OPCODE_UMAD: + *copcode = C_OPCODE_UMAD; + return 0; + case TGSI_OPCODE_UMAX: + *copcode = C_OPCODE_UMAX; + return 0; + case TGSI_OPCODE_UMIN: + *copcode = C_OPCODE_UMIN; + return 0; + case TGSI_OPCODE_UMOD: + *copcode = C_OPCODE_UMOD; + return 0; + case TGSI_OPCODE_UMUL: + *copcode = C_OPCODE_UMUL; + return 0; + case TGSI_OPCODE_USEQ: + *copcode = C_OPCODE_USEQ; + return 0; + case TGSI_OPCODE_USGE: + *copcode = C_OPCODE_USGE; + return 0; + case TGSI_OPCODE_USHR: + *copcode = C_OPCODE_USHR; + return 0; + case TGSI_OPCODE_USLT: + *copcode = C_OPCODE_USLT; + return 0; + case TGSI_OPCODE_USNE: + *copcode = C_OPCODE_USNE; + return 0; + case TGSI_OPCODE_SWITCH: + *copcode = C_OPCODE_SWITCH; + return 0; + case TGSI_OPCODE_CASE: + *copcode = C_OPCODE_CASE; + return 0; + case TGSI_OPCODE_DEFAULT: + *copcode = C_OPCODE_DEFAULT; + return 0; + case TGSI_OPCODE_ENDSWITCH: + *copcode = C_OPCODE_ENDSWITCH; + return 0; + default: + fprintf(stderr, "%s:%d unsupported opcode %d\n", __func__, __LINE__, opcode); + return -EINVAL; + } +} diff --git a/src/gallium/drivers/r600/r600_context.c b/src/gallium/drivers/r600/r600_context.c new file mode 100644 index 00000000000..d819c141485 --- /dev/null +++ b/src/gallium/drivers/r600/r600_context.c @@ -0,0 +1,152 @@ +/* + * Copyright 2010 Jerome Glisse <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: + * Jerome Glisse + * Corbin Simpson + */ +#include <stdio.h> +#include <util/u_inlines.h> +#include <util/u_format.h> +#include <util/u_memory.h> +#include <util/u_blitter.h> +#include "r600_resource.h" +#include "r600_screen.h" +#include "r600_context.h" + +static void r600_destroy_context(struct pipe_context *context) +{ + struct r600_context *rctx = (struct r600_context*)context; + + FREE(rctx); +} + +static void r600_flush(struct pipe_context *ctx, unsigned flags, + struct pipe_fence_handle **fence) +{ + struct r600_context *rctx = (struct r600_context*)ctx; + struct r600_screen *rscreen = (struct r600_screen*)ctx->screen; + static int dc = 0; + + if (radeon_ctx_pm4(rctx->ctx)) + return; + /* FIXME dumping should be removed once shader support instructions + * without throwing bad code + */ + if (!dc) + radeon_ctx_dump_bof(rctx->ctx, "gallium.bof"); +#if 0 + radeon_ctx_submit(rctx->ctx); +#endif + rctx->ctx = radeon_ctx_decref(rctx->ctx); + rctx->ctx = radeon_ctx(rscreen->rw); + dc++; +} + +struct pipe_context *r600_create_context(struct pipe_screen *screen, void *priv) +{ + struct r600_context *rctx = CALLOC_STRUCT(r600_context); + struct r600_screen* rscreen = r600_screen(screen); + + if (rctx == NULL) + return NULL; + rctx->context.winsys = rscreen->screen.winsys; + rctx->context.screen = screen; + rctx->context.priv = priv; + rctx->context.destroy = r600_destroy_context; + rctx->context.clear = r600_clear; + rctx->context.draw_arrays = r600_draw_arrays; + rctx->context.draw_elements = r600_draw_elements; + rctx->context.draw_range_elements = r600_draw_range_elements; + rctx->context.flush = r600_flush; + r600_init_query_functions(rctx); + r600_init_state_functions(rctx); + r600_init_context_resource_functions(rctx); + + rctx->blitter = util_blitter_create(&rctx->context); + if (rctx->blitter == NULL) { + FREE(rctx); + return NULL; + } + + rctx->cb_cntl = radeon_state(rscreen->rw, R600_CB_CNTL_TYPE, R600_CB_CNTL); + rctx->cb_cntl->states[R600_CB_CNTL__CB_SHADER_MASK] = 0x0000000F; + rctx->cb_cntl->states[R600_CB_CNTL__CB_TARGET_MASK] = 0x0000000F; + rctx->cb_cntl->states[R600_CB_CNTL__CB_COLOR_CONTROL] = 0x00CC0000; + rctx->cb_cntl->states[R600_CB_CNTL__PA_SC_AA_CONFIG] = 0x00000000; + rctx->cb_cntl->states[R600_CB_CNTL__PA_SC_AA_SAMPLE_LOCS_MCTX] = 0x00000000; + rctx->cb_cntl->states[R600_CB_CNTL__PA_SC_AA_SAMPLE_LOCS_8S_WD1_MCTX] = 0x00000000; + rctx->cb_cntl->states[R600_CB_CNTL__CB_CLRCMP_CONTROL] = 0x01000000; + rctx->cb_cntl->states[R600_CB_CNTL__CB_CLRCMP_SRC] = 0x00000000; + rctx->cb_cntl->states[R600_CB_CNTL__CB_CLRCMP_DST] = 0x000000FF; + rctx->cb_cntl->states[R600_CB_CNTL__CB_CLRCMP_MSK] = 0xFFFFFFFF; + rctx->cb_cntl->states[R600_CB_CNTL__PA_SC_AA_MASK] = 0xFFFFFFFF; + radeon_state_pm4(rctx->cb_cntl); + + rctx->config = radeon_state(rscreen->rw, R600_CONFIG_TYPE, R600_CONFIG); + rctx->config->states[R600_CONFIG__SQ_CONFIG] = 0xE400000C; + rctx->config->states[R600_CONFIG__SQ_GPR_RESOURCE_MGMT_1] = 0x403800C0; + rctx->config->states[R600_CONFIG__SQ_GPR_RESOURCE_MGMT_2] = 0x00000000; + rctx->config->states[R600_CONFIG__SQ_THREAD_RESOURCE_MGMT] = 0x00003090; + rctx->config->states[R600_CONFIG__SQ_STACK_RESOURCE_MGMT_1] = 0x00800080; + rctx->config->states[R600_CONFIG__SQ_STACK_RESOURCE_MGMT_2] = 0x00000000; + rctx->config->states[R600_CONFIG__SQ_DYN_GPR_CNTL_PS_FLUSH_REQ] = 0x00004000; + rctx->config->states[R600_CONFIG__TA_CNTL_AUX] = 0x07000002; + rctx->config->states[R600_CONFIG__VC_ENHANCE] = 0x00000000; + rctx->config->states[R600_CONFIG__DB_DEBUG] = 0x00000000; + rctx->config->states[R600_CONFIG__DB_WATERMARKS] = 0x00420204; + rctx->config->states[R600_CONFIG__SX_MISC] = 0x00000000; + rctx->config->states[R600_CONFIG__SPI_THREAD_GROUPING] = 0x00000001; + rctx->config->states[R600_CONFIG__CB_SHADER_CONTROL] = 0x00000003; + rctx->config->states[R600_CONFIG__SQ_ESGS_RING_ITEMSIZE] = 0x00000000; + rctx->config->states[R600_CONFIG__SQ_GSVS_RING_ITEMSIZE] = 0x00000000; + rctx->config->states[R600_CONFIG__SQ_ESTMP_RING_ITEMSIZE] = 0x00000000; + rctx->config->states[R600_CONFIG__SQ_GSTMP_RING_ITEMSIZE] = 0x00000000; + rctx->config->states[R600_CONFIG__SQ_VSTMP_RING_ITEMSIZE] = 0x00000000; + rctx->config->states[R600_CONFIG__SQ_PSTMP_RING_ITEMSIZE] = 0x00000000; + rctx->config->states[R600_CONFIG__SQ_FBUF_RING_ITEMSIZE] = 0x00000000; + rctx->config->states[R600_CONFIG__SQ_REDUC_RING_ITEMSIZE] = 0x00000000; + rctx->config->states[R600_CONFIG__SQ_GS_VERT_ITEMSIZE] = 0x00000000; + rctx->config->states[R600_CONFIG__VGT_OUTPUT_PATH_CNTL] = 0x00000000; + rctx->config->states[R600_CONFIG__VGT_HOS_CNTL] = 0x00000000; + rctx->config->states[R600_CONFIG__VGT_HOS_MAX_TESS_LEVEL] = 0x00000000; + rctx->config->states[R600_CONFIG__VGT_HOS_MIN_TESS_LEVEL] = 0x00000000; + rctx->config->states[R600_CONFIG__VGT_HOS_REUSE_DEPTH] = 0x00000000; + rctx->config->states[R600_CONFIG__VGT_GROUP_PRIM_TYPE] = 0x00000000; + rctx->config->states[R600_CONFIG__VGT_GROUP_FIRST_DECR] = 0x00000000; + rctx->config->states[R600_CONFIG__VGT_GROUP_DECR] = 0x00000000; + rctx->config->states[R600_CONFIG__VGT_GROUP_VECT_0_CNTL] = 0x00000000; + rctx->config->states[R600_CONFIG__VGT_GROUP_VECT_1_CNTL] = 0x00000000; + rctx->config->states[R600_CONFIG__VGT_GROUP_VECT_0_FMT_CNTL] = 0x00000000; + rctx->config->states[R600_CONFIG__VGT_GROUP_VECT_1_FMT_CNTL] = 0x00000000; + rctx->config->states[R600_CONFIG__VGT_GS_MODE] = 0x00000000; + rctx->config->states[R600_CONFIG__PA_SC_MODE_CNTL] = 0x00514000; + rctx->config->states[R600_CONFIG__VGT_STRMOUT_EN] = 0x00000000; + rctx->config->states[R600_CONFIG__VGT_REUSE_OFF] = 0x00000001; + rctx->config->states[R600_CONFIG__VGT_VTX_CNT_EN] = 0x00000000; + rctx->config->states[R600_CONFIG__VGT_STRMOUT_BUFFER_EN] = 0x00000000; + radeon_state_pm4(rctx->config); + + rctx->ctx = radeon_ctx(rscreen->rw); + rctx->draw = radeon_draw(rscreen->rw); + return &rctx->context; +} diff --git a/src/gallium/drivers/r600/r600_context.h b/src/gallium/drivers/r600/r600_context.h new file mode 100644 index 00000000000..8e996b7d204 --- /dev/null +++ b/src/gallium/drivers/r600/r600_context.h @@ -0,0 +1,90 @@ +/* + * Copyright 2010 Jerome Glisse <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ +#ifndef R600_CONTEXT_H +#define R600_CONTEXT_H + +#include <pipe/p_state.h> +#include <pipe/p_context.h> +#include <tgsi/tgsi_scan.h> +#include <tgsi/tgsi_parse.h> +#include <tgsi/tgsi_util.h> +#include <util/u_blitter.h> +#include "radeon.h" +#include "r600_shader.h" + +/* XXX move this to a more appropriate place */ +struct r600_vertex_elements_state +{ + unsigned count; + struct pipe_vertex_element elements[32]; +}; + +struct r600_pipe_shader { + unsigned type; + struct r600_shader shader; + struct radeon_bo *bo; + struct radeon_state *state; +}; + +struct r600_context { + struct pipe_context context; + struct radeon_ctx *ctx; + struct radeon_state *cb_cntl; + struct radeon_state *db; + struct radeon_state *config; + struct r600_pipe_shader *ps_shader; + struct r600_pipe_shader *vs_shader; + unsigned flat_shade; + unsigned nvertex_buffer; + struct r600_vertex_elements_state *vertex_elements; + struct pipe_vertex_buffer vertex_buffer[PIPE_MAX_ATTRIBS]; + struct blitter_context *blitter; + struct pipe_stencil_ref stencil_ref; + struct pipe_framebuffer_state fb_state; + struct radeon_draw *draw; + struct pipe_viewport_state viewport; +}; + +void r600_draw_arrays(struct pipe_context *ctx, unsigned mode, + unsigned start, unsigned count); +void r600_draw_elements(struct pipe_context *ctx, + struct pipe_resource *index_buffer, + unsigned index_size, int index_bias, unsigned mode, + unsigned start, unsigned count); +void r600_draw_range_elements(struct pipe_context *ctx, + struct pipe_resource *index_buffer, + unsigned index_size, int index_bias, unsigned min_index, + unsigned max_index, unsigned mode, + unsigned start, unsigned count); + +void r600_init_state_functions(struct r600_context *rctx); +void r600_init_query_functions(struct r600_context* rctx); +struct pipe_context *r600_create_context(struct pipe_screen *screen, void *priv); + +void r600_pipe_shader_destroy(struct pipe_context *ctx, struct r600_pipe_shader *rpshader); +struct r600_pipe_shader *r600_pipe_shader_create(struct pipe_context *ctx, + unsigned type, + const struct tgsi_token *tokens); +int r600_pipe_shader_update(struct pipe_context *ctx, struct r600_pipe_shader *rpshader); + +#endif diff --git a/src/gallium/drivers/r600/r600_draw.c b/src/gallium/drivers/r600/r600_draw.c new file mode 100644 index 00000000000..8b6a6d96aa8 --- /dev/null +++ b/src/gallium/drivers/r600/r600_draw.c @@ -0,0 +1,217 @@ +/* + * Copyright 2010 Jerome Glisse <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: + * Jerome Glisse + * Corbin Simpson + */ +#include <stdio.h> +#include <errno.h> +#include <pipe/p_screen.h> +#include <util/u_format.h> +#include <util/u_math.h> +#include <util/u_inlines.h> +#include <util/u_memory.h> +#include "r600_screen.h" +#include "r600_context.h" +#include "r600d.h" + +struct r600_draw { + struct pipe_context *ctx; + struct radeon_state *draw; + struct radeon_state *vgt; + unsigned mode; + unsigned start; + unsigned count; + unsigned index_size; + struct pipe_resource *index_buffer; +}; + +static int r600_draw_common(struct r600_draw *draw) +{ + struct r600_context *rctx = (struct r600_context*)draw->ctx; + struct r600_screen *rscreen = (struct r600_screen*)draw->ctx->screen; + struct radeon_state *vs_resource; + struct r600_buffer *rbuffer; + unsigned i, j, offset, format, prim; + u32 vgt_dma_index_type, vgt_draw_initiator; + int r; + + switch (draw->index_size) { + case 2: + vgt_draw_initiator = 0; + vgt_dma_index_type = 0; + break; + case 4: + vgt_draw_initiator = 0; + vgt_dma_index_type = 1; + break; + case 0: + vgt_draw_initiator = 2; + vgt_dma_index_type = 0; + break; + default: + fprintf(stderr, "%s %d unsupported index size %d\n", __func__, __LINE__, draw->index_size); + return -EINVAL; + } + r = r600_conv_pipe_prim(draw->mode, &prim); + if (r) + return r; + /* rebuild vertex shader if input format changed */ + r = r600_pipe_shader_update(draw->ctx, rctx->vs_shader); + if (r) + return r; + r = r600_pipe_shader_update(draw->ctx, rctx->ps_shader); + if (r) + return r; + r = radeon_draw_set(rctx->draw, rctx->vs_shader->state); + if (r) + return r; + r = radeon_draw_set(rctx->draw, rctx->ps_shader->state); + if (r) + return r; + r = radeon_draw_set(rctx->draw, rctx->cb_cntl); + if (r) + return r; + r = radeon_draw_set(rctx->draw, rctx->db); + if (r) + return r; + r = radeon_draw_set(rctx->draw, rctx->config); + if (r) + return r; + + for (i = 0 ; i < rctx->vertex_elements->count; i++) { + j = rctx->vertex_elements->elements[i].vertex_buffer_index; + rbuffer = (struct r600_buffer*)rctx->vertex_buffer[j].buffer; + offset = rctx->vertex_elements->elements[i].src_offset + rctx->vertex_buffer[j].buffer_offset; + r = r600_conv_pipe_format(rctx->vertex_elements->elements[i].src_format, &format); + if (r) + return r; + vs_resource = radeon_state(rscreen->rw, R600_VS_RESOURCE_TYPE, R600_VS_RESOURCE + i); + if (vs_resource == NULL) + return -ENOMEM; + vs_resource->bo[0] = radeon_bo_incref(rscreen->rw, rbuffer->bo); + vs_resource->nbo = 1; + vs_resource->states[R600_PS_RESOURCE__RESOURCE0_WORD0] = offset; + vs_resource->states[R600_PS_RESOURCE__RESOURCE0_WORD1] = rbuffer->bo->size - offset; + vs_resource->states[R600_PS_RESOURCE__RESOURCE0_WORD2] = S_038008_STRIDE(rctx->vertex_buffer[j].stride) | + S_038008_DATA_FORMAT(format); + vs_resource->states[R600_PS_RESOURCE__RESOURCE0_WORD3] = 0x00000000; + vs_resource->states[R600_PS_RESOURCE__RESOURCE0_WORD4] = 0x00000000; + vs_resource->states[R600_PS_RESOURCE__RESOURCE0_WORD5] = 0x00000000; + vs_resource->states[R600_PS_RESOURCE__RESOURCE0_WORD6] = 0xC0000000; + vs_resource->placement[0] = RADEON_GEM_DOMAIN_GTT; + vs_resource->placement[1] = RADEON_GEM_DOMAIN_GTT; + r = radeon_draw_set_new(rctx->draw, vs_resource); + if (r) + return r; + } + /* FIXME start need to change winsys */ + draw->draw = radeon_state(rscreen->rw, R600_DRAW_TYPE, R600_DRAW); + if (draw->draw == NULL) + return -ENOMEM; + draw->draw->states[R600_DRAW__VGT_NUM_INDICES] = draw->count; + draw->draw->states[R600_DRAW__VGT_DRAW_INITIATOR] = vgt_draw_initiator; + if (draw->index_buffer) { + rbuffer = (struct r600_buffer*)draw->index_buffer; + draw->draw->bo[0] = radeon_bo_incref(rscreen->rw, rbuffer->bo); + draw->draw->placement[0] = RADEON_GEM_DOMAIN_GTT; + draw->draw->placement[1] = RADEON_GEM_DOMAIN_GTT; + draw->draw->nbo = 1; + } + r = radeon_draw_set_new(rctx->draw, draw->draw); + if (r) + return r; + draw->vgt = radeon_state(rscreen->rw, R600_VGT_TYPE, R600_VGT); + if (draw->vgt == NULL) + return -ENOMEM; + draw->vgt->states[R600_VGT__VGT_PRIMITIVE_TYPE] = prim; + draw->vgt->states[R600_VGT__VGT_MAX_VTX_INDX] = 0x00FFFFFF; + draw->vgt->states[R600_VGT__VGT_MIN_VTX_INDX] = 0x00000000; + draw->vgt->states[R600_VGT__VGT_INDX_OFFSET] = draw->start; + draw->vgt->states[R600_VGT__VGT_MULTI_PRIM_IB_RESET_INDX] = 0x00000000; + draw->vgt->states[R600_VGT__VGT_DMA_INDEX_TYPE] = vgt_dma_index_type; + draw->vgt->states[R600_VGT__VGT_PRIMITIVEID_EN] = 0x00000000; + draw->vgt->states[R600_VGT__VGT_DMA_NUM_INSTANCES] = 0x00000001; + draw->vgt->states[R600_VGT__VGT_MULTI_PRIM_IB_RESET_EN] = 0x00000000; + draw->vgt->states[R600_VGT__VGT_INSTANCE_STEP_RATE_0] = 0x00000000; + draw->vgt->states[R600_VGT__VGT_INSTANCE_STEP_RATE_1] = 0x00000000; + r = radeon_draw_set_new(rctx->draw, draw->vgt); + if (r) + return r; + /* FIXME */ + r = radeon_ctx_set_draw_new(rctx->ctx, rctx->draw); + if (r) + return r; + rctx->draw = radeon_draw_duplicate(rctx->draw); + return 0; +} + +void r600_draw_range_elements(struct pipe_context *ctx, + struct pipe_resource *index_buffer, + unsigned index_size, int index_bias, unsigned min_index, + unsigned max_index, unsigned mode, + unsigned start, unsigned count) +{ + struct r600_draw draw; + assert(index_bias == 0); + + draw.ctx = ctx; + draw.mode = mode; + draw.start = start; + draw.count = count; + draw.index_size = index_size; + draw.index_buffer = index_buffer; +printf("index_size %d min %d max %d start %d count %d\n", index_size, min_index, max_index, start, count); + r600_draw_common(&draw); +} + +void r600_draw_elements(struct pipe_context *ctx, + struct pipe_resource *index_buffer, + unsigned index_size, int index_bias, unsigned mode, + unsigned start, unsigned count) +{ + struct r600_draw draw; + assert(index_bias == 0); + + draw.ctx = ctx; + draw.mode = mode; + draw.start = start; + draw.count = count; + draw.index_size = index_size; + draw.index_buffer = index_buffer; + r600_draw_common(&draw); +} + +void r600_draw_arrays(struct pipe_context *ctx, unsigned mode, + unsigned start, unsigned count) +{ + struct r600_draw draw; + + draw.ctx = ctx; + draw.mode = mode; + draw.start = start; + draw.count = count; + draw.index_size = 0; + draw.index_buffer = NULL; + r600_draw_common(&draw); +} diff --git a/src/gallium/drivers/r600/r600_helper.c b/src/gallium/drivers/r600/r600_helper.c new file mode 100644 index 00000000000..9370a5355ee --- /dev/null +++ b/src/gallium/drivers/r600/r600_helper.c @@ -0,0 +1,157 @@ +/* + * Copyright 2010 Jerome Glisse <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: + * Jerome Glisse + */ +#include <stdio.h> +#include <errno.h> +#include <util/u_inlines.h> +#include <util/u_format.h> +#include "r600_screen.h" +#include "r600d.h" + +int r600_conv_pipe_format(unsigned pformat, unsigned *format) +{ + switch (pformat) { + case PIPE_FORMAT_R32G32B32_FLOAT: + *format = 0x30; + return 0; + case PIPE_FORMAT_R32G32B32A32_FLOAT: + *format = V_0280A0_COLOR_32_32_32_32_FLOAT; + return 0; + case PIPE_FORMAT_A8R8G8B8_UNORM: + case PIPE_FORMAT_X8R8G8B8_UNORM: + case PIPE_FORMAT_B8G8R8A8_UNORM: + case PIPE_FORMAT_B8G8R8X8_UNORM: + case PIPE_FORMAT_R8G8B8A8_UNORM: + case PIPE_FORMAT_R8G8B8X8_UNORM: + case PIPE_FORMAT_R8G8B8A8_USCALED: + case PIPE_FORMAT_R8G8B8A8_SNORM: + case PIPE_FORMAT_R8G8B8A8_SSCALED: + *format = V_0280A0_COLOR_8_8_8_8; + return 0; + case PIPE_FORMAT_L8_UNORM: + case PIPE_FORMAT_A8_UNORM: + case PIPE_FORMAT_I8_UNORM: + case PIPE_FORMAT_L16_UNORM: + case PIPE_FORMAT_Z16_UNORM: + case PIPE_FORMAT_Z32_UNORM: + case PIPE_FORMAT_Z32_FLOAT: + case PIPE_FORMAT_R64_FLOAT: + case PIPE_FORMAT_R64G64_FLOAT: + case PIPE_FORMAT_R64G64B64_FLOAT: + case PIPE_FORMAT_R64G64B64A64_FLOAT: + case PIPE_FORMAT_R32_FLOAT: + case PIPE_FORMAT_R32G32_FLOAT: + case PIPE_FORMAT_R32_UNORM: + case PIPE_FORMAT_R32G32_UNORM: + case PIPE_FORMAT_R32G32B32_UNORM: + case PIPE_FORMAT_R32G32B32A32_UNORM: + case PIPE_FORMAT_R32_USCALED: + case PIPE_FORMAT_R32G32_USCALED: + case PIPE_FORMAT_R32G32B32_USCALED: + case PIPE_FORMAT_R32G32B32A32_USCALED: + case PIPE_FORMAT_R32_SNORM: + case PIPE_FORMAT_R32G32_SNORM: + case PIPE_FORMAT_R32G32B32_SNORM: + case PIPE_FORMAT_R32G32B32A32_SNORM: + case PIPE_FORMAT_R32_SSCALED: + case PIPE_FORMAT_R32G32_SSCALED: + case PIPE_FORMAT_R32G32B32_SSCALED: + case PIPE_FORMAT_R32G32B32A32_SSCALED: + case PIPE_FORMAT_R16_UNORM: + case PIPE_FORMAT_R16G16_UNORM: + case PIPE_FORMAT_R16G16B16_UNORM: + case PIPE_FORMAT_R16G16B16A16_UNORM: + case PIPE_FORMAT_R16_USCALED: + case PIPE_FORMAT_R16G16_USCALED: + case PIPE_FORMAT_R16G16B16_USCALED: + case PIPE_FORMAT_R16G16B16A16_USCALED: + case PIPE_FORMAT_R16_SNORM: + case PIPE_FORMAT_R16G16_SNORM: + case PIPE_FORMAT_R16G16B16_SNORM: + case PIPE_FORMAT_R16G16B16A16_SNORM: + case PIPE_FORMAT_R16_SSCALED: + case PIPE_FORMAT_R16G16_SSCALED: + case PIPE_FORMAT_R16G16B16_SSCALED: + case PIPE_FORMAT_R16G16B16A16_SSCALED: + case PIPE_FORMAT_R8_UNORM: + case PIPE_FORMAT_R8G8_UNORM: + case PIPE_FORMAT_R8G8B8_UNORM: + case PIPE_FORMAT_R8_USCALED: + case PIPE_FORMAT_R8G8_USCALED: + case PIPE_FORMAT_R8G8B8_USCALED: + case PIPE_FORMAT_R8_SNORM: + case PIPE_FORMAT_R8G8_SNORM: + case PIPE_FORMAT_R8G8B8_SNORM: + case PIPE_FORMAT_R8_SSCALED: + case PIPE_FORMAT_R8G8_SSCALED: + case PIPE_FORMAT_R8G8B8_SSCALED: + case PIPE_FORMAT_R32_FIXED: + case PIPE_FORMAT_R32G32_FIXED: + case PIPE_FORMAT_R32G32B32_FIXED: + case PIPE_FORMAT_R32G32B32A32_FIXED: + default: + fprintf(stderr, "%s:%d unsupported %d\n", __func__, __LINE__, pformat); + return -EINVAL; + } +} + +int r600_conv_pipe_prim(unsigned pprim, unsigned *prim) +{ + switch (pprim) { + case PIPE_PRIM_POINTS: + *prim = V_008958_DI_PT_POINTLIST; + return 0; + case PIPE_PRIM_LINES: + *prim = V_008958_DI_PT_LINELIST; + return 0; + case PIPE_PRIM_LINE_STRIP: + *prim = V_008958_DI_PT_LINESTRIP; + return 0; + case PIPE_PRIM_LINE_LOOP: + *prim = V_008958_DI_PT_LINELOOP; + return 0; + case PIPE_PRIM_TRIANGLES: + *prim = V_008958_DI_PT_TRILIST; + return 0; + case PIPE_PRIM_TRIANGLE_STRIP: + *prim = V_008958_DI_PT_TRISTRIP; + return 0; + case PIPE_PRIM_TRIANGLE_FAN: + *prim = V_008958_DI_PT_TRIFAN; + return 0; + case PIPE_PRIM_POLYGON: + *prim = V_008958_DI_PT_POLYGON; + return 0; + case PIPE_PRIM_QUADS: + *prim = V_008958_DI_PT_QUADLIST; + return 0; + case PIPE_PRIM_QUAD_STRIP: + *prim = V_008958_DI_PT_QUADSTRIP; + return 0; + default: + fprintf(stderr, "%s:%d unsupported %d\n", __func__, __LINE__, pprim); + return -EINVAL; + } +} diff --git a/src/gallium/drivers/r600/r600_query.c b/src/gallium/drivers/r600/r600_query.c new file mode 100644 index 00000000000..24746e78771 --- /dev/null +++ b/src/gallium/drivers/r600/r600_query.c @@ -0,0 +1,65 @@ +/* + * Copyright 2010 Jerome Glisse <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: + * Jerome Glisse + * Corbin Simpson + */ +#include <util/u_inlines.h> +#include <util/u_format.h> +#include <util/u_memory.h> +#include "r600_screen.h" +#include "r600_context.h" + +static struct pipe_query *r600_create_query(struct pipe_context *pipe, unsigned query_type) +{ + return NULL; +} + +static void r600_destroy_query(struct pipe_context *pipe, struct pipe_query *query) +{ + FREE(query); +} + +static void r600_begin_query(struct pipe_context *pipe, struct pipe_query *query) +{ +} + +static void r600_end_query(struct pipe_context *pipe, struct pipe_query *query) +{ +} + +static boolean r600_get_query_result(struct pipe_context *pipe, + struct pipe_query *query, + boolean wait, uint64_t *result) +{ + return TRUE; +} + +void r600_init_query_functions(struct r600_context* rctx) +{ + rctx->context.create_query = r600_create_query; + rctx->context.destroy_query = r600_destroy_query; + rctx->context.begin_query = r600_begin_query; + rctx->context.end_query = r600_end_query; + rctx->context.get_query_result = r600_get_query_result; +} diff --git a/src/gallium/drivers/r600/r600_resource.c b/src/gallium/drivers/r600/r600_resource.c new file mode 100644 index 00000000000..d9aa1df04f0 --- /dev/null +++ b/src/gallium/drivers/r600/r600_resource.c @@ -0,0 +1,68 @@ +/* + * Copyright 2010 Marek Olšák <[email protected] + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "r600_context.h" +#include "r600_resource.h" +#include "r600_screen.h" +#include "r600_texture.h" + +static struct pipe_resource * +r600_resource_create(struct pipe_screen *screen, + const struct pipe_resource *templ) +{ + if (templ->target == PIPE_BUFFER) + return r600_buffer_create(screen, templ); + else + return r600_texture_create(screen, templ); +} + +static struct pipe_resource * +r600_resource_from_handle(struct pipe_screen * screen, + const struct pipe_resource *templ, + struct winsys_handle *whandle) +{ + if (templ->target == PIPE_BUFFER) + return NULL; + else + return r600_texture_from_handle(screen, templ, whandle); +} + +void r600_init_context_resource_functions(struct r600_context *r600) +{ + r600->context.get_transfer = u_get_transfer_vtbl; + r600->context.transfer_map = u_transfer_map_vtbl; + r600->context.transfer_flush_region = u_transfer_flush_region_vtbl; + r600->context.transfer_unmap = u_transfer_unmap_vtbl; + r600->context.transfer_destroy = u_transfer_destroy_vtbl; + r600->context.transfer_inline_write = u_transfer_inline_write_vtbl; + r600->context.is_resource_referenced = u_is_resource_referenced_vtbl; +} + +void r600_init_screen_resource_functions(struct r600_screen *r600screen) +{ + r600screen->screen.resource_create = r600_resource_create; + r600screen->screen.resource_from_handle = r600_resource_from_handle; + r600screen->screen.resource_get_handle = u_resource_get_handle_vtbl; + r600screen->screen.resource_destroy = u_resource_destroy_vtbl; + r600screen->screen.user_buffer_create = r600_user_buffer_create; +} diff --git a/src/gallium/drivers/r600/r600_resource.h b/src/gallium/drivers/r600/r600_resource.h new file mode 100644 index 00000000000..95084a371bd --- /dev/null +++ b/src/gallium/drivers/r600/r600_resource.h @@ -0,0 +1,33 @@ +/* + * Copyright 2010 Marek Olšák <[email protected] + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef R600_RESOURCE_H +#define R600_RESOURCE_H + +struct r600_context; +struct r600_screen; + +void r600_init_context_resource_functions(struct r600_context *r600); +void r600_init_screen_resource_functions(struct r600_screen *r600screen); + +#endif diff --git a/src/gallium/drivers/r600/r600_screen.c b/src/gallium/drivers/r600/r600_screen.c new file mode 100644 index 00000000000..7d39184d901 --- /dev/null +++ b/src/gallium/drivers/r600/r600_screen.c @@ -0,0 +1,261 @@ +/* + * Copyright 2010 Jerome Glisse <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: + * Jerome Glisse + * Corbin Simpson + */ +#include <util/u_inlines.h> +#include <util/u_format.h> +#include <util/u_memory.h> +#include "r600_resource.h" +#include "r600_screen.h" +#include "r600_texture.h" +#include "r600_context.h" +#include <stdio.h> + +static const char* r600_get_vendor(struct pipe_screen* pscreen) +{ + return "X.Org"; +} + +static const char* r600_get_name(struct pipe_screen* pscreen) +{ + return "R600/R700 (HD2XXX,HD3XXX,HD4XXX)"; +} + +static int r600_get_param(struct pipe_screen* pscreen, enum pipe_cap param) +{ + switch (param) { + case PIPE_CAP_MAX_TEXTURE_IMAGE_UNITS: + case PIPE_CAP_MAX_COMBINED_SAMPLERS: + return 16; + case PIPE_CAP_NPOT_TEXTURES: + return 1; + case PIPE_CAP_TWO_SIDED_STENCIL: + return 1; + case PIPE_CAP_GLSL: + return 1; + case PIPE_CAP_DUAL_SOURCE_BLEND: + return 1; + case PIPE_CAP_ANISOTROPIC_FILTER: + return 1; + case PIPE_CAP_POINT_SPRITE: + return 1; + case PIPE_CAP_MAX_RENDER_TARGETS: + /* FIXME some r6xx are buggy and can only do 4 */ + return 8; + case PIPE_CAP_OCCLUSION_QUERY: + return 1; + case PIPE_CAP_TEXTURE_SHADOW_MAP: + return 1; + case PIPE_CAP_MAX_TEXTURE_2D_LEVELS: + case PIPE_CAP_MAX_TEXTURE_3D_LEVELS: + case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS: + /* FIXME not sure here */ + return 13; + case PIPE_CAP_TEXTURE_MIRROR_CLAMP: + return 1; + case PIPE_CAP_TEXTURE_MIRROR_REPEAT: + return 1; + case PIPE_CAP_MAX_VERTEX_TEXTURE_UNITS: + /* FIXME allow this once infrastructure is there */ + return 0; + case PIPE_CAP_TGSI_CONT_SUPPORTED: + return 0; + case PIPE_CAP_BLEND_EQUATION_SEPARATE: + return 1; + case PIPE_CAP_SM3: + return 1; + case PIPE_CAP_INDEP_BLEND_ENABLE: + return 1; + case PIPE_CAP_INDEP_BLEND_FUNC: + /* FIXME allow this */ + return 0; + case PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT: + case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER: + return 1; + case PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT: + case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER: + return 0; + default: + debug_printf("r600: unknown param %d\n", param); + return 0; + } +} + +static float r600_get_paramf(struct pipe_screen* pscreen, enum pipe_cap param) +{ + switch (param) { + case PIPE_CAP_MAX_LINE_WIDTH: + case PIPE_CAP_MAX_LINE_WIDTH_AA: + case PIPE_CAP_MAX_POINT_WIDTH: + case PIPE_CAP_MAX_POINT_WIDTH_AA: + return 8192.0f; + case PIPE_CAP_MAX_TEXTURE_ANISOTROPY: + return 16.0f; + case PIPE_CAP_MAX_TEXTURE_LOD_BIAS: + return 16.0f; + default: + debug_printf("r600: unsupported paramf %d\n", param); + return 0.0f; + } +} + +static boolean r600_is_format_supported(struct pipe_screen* screen, + enum pipe_format format, + enum pipe_texture_target target, + unsigned sample_count, + unsigned bindings, + unsigned geom_flags) +{ + if (target >= PIPE_MAX_TEXTURE_TYPES) { + debug_printf("r600: unsupported texture type %d\n", target); + return FALSE; + } + switch (format) { + case PIPE_FORMAT_B4G4R4A4_UNORM: + case PIPE_FORMAT_B5G6R5_UNORM: + case PIPE_FORMAT_B5G5R5A1_UNORM: + case PIPE_FORMAT_A8_UNORM: + case PIPE_FORMAT_L8_UNORM: + case PIPE_FORMAT_A8R8G8B8_SRGB: + case PIPE_FORMAT_R8G8B8A8_SRGB: + case PIPE_FORMAT_DXT1_RGB: + case PIPE_FORMAT_DXT1_RGBA: + case PIPE_FORMAT_DXT3_RGBA: + case PIPE_FORMAT_DXT5_RGBA: + case PIPE_FORMAT_UYVY: + case PIPE_FORMAT_L8_SRGB: + case PIPE_FORMAT_L8A8_SRGB: + case PIPE_FORMAT_L8A8_UNORM: + case PIPE_FORMAT_A8R8G8B8_UNORM: + case PIPE_FORMAT_X8R8G8B8_UNORM: + case PIPE_FORMAT_R8G8B8A8_UNORM: + case PIPE_FORMAT_R8G8B8X8_UNORM: + case PIPE_FORMAT_B8G8R8A8_UNORM: + case PIPE_FORMAT_B8G8R8X8_UNORM: + case PIPE_FORMAT_A8B8G8R8_SRGB: + case PIPE_FORMAT_B8G8R8A8_SRGB: + case PIPE_FORMAT_I8_UNORM: + case PIPE_FORMAT_Z16_UNORM: + case PIPE_FORMAT_X8Z24_UNORM: + case PIPE_FORMAT_S8_USCALED_Z24_UNORM: + case PIPE_FORMAT_Z32_UNORM: + case PIPE_FORMAT_Z24_UNORM_S8_USCALED: + case PIPE_FORMAT_Z24X8_UNORM: + return TRUE; + default: + /* Unknown format... */ + break; + } + return FALSE; +} + +struct pipe_transfer* r600_texture_get_transfer(struct pipe_context *ctx, + struct pipe_resource *texture, + struct pipe_subresource sr, + unsigned usage, + const struct pipe_box *box) +{ + struct r600_texture *rtex = (struct r600_texture*)texture; + struct r600_transfer *trans; + + trans = CALLOC_STRUCT(r600_transfer); + if (trans == NULL) + return NULL; + pipe_resource_reference(&trans->transfer.resource, texture); + trans->transfer.sr = sr; + trans->transfer.usage = usage; + trans->transfer.box = *box; + trans->transfer.stride = rtex->stride[sr.level]; + trans->offset = r600_texture_get_offset(rtex, sr.level, box->z, sr.face); + return &trans->transfer; +} + +void r600_texture_transfer_destroy(struct pipe_context *ctx, + struct pipe_transfer *trans) +{ + pipe_resource_reference(&trans->resource, NULL); + FREE(trans); +} + +void* r600_texture_transfer_map(struct pipe_context *ctx, + struct pipe_transfer* transfer) +{ + struct r600_transfer *rtransfer = (struct r600_transfer*)transfer; + struct r600_texture *rtex = (struct r600_texture*)transfer->resource; + char *map; + enum pipe_format format = rtex->b.b.format; + + map = pipe_buffer_map(ctx, rtex->buffer, + transfer->usage, + &rtransfer->buffer_transfer); + + if (!map) { + return NULL; + } + + return map + rtransfer->offset + + transfer->box.y / util_format_get_blockheight(format) * transfer->stride + + transfer->box.x / util_format_get_blockwidth(format) * util_format_get_blocksize(format); +} + +void r600_texture_transfer_unmap(struct pipe_context *ctx, + struct pipe_transfer* transfer) +{ + struct r600_transfer *rtransfer = (struct r600_transfer*)transfer; + struct r600_texture *rtex = (struct r600_texture*)transfer->resource; + + pipe_buffer_unmap(ctx, rtex->buffer, rtransfer->buffer_transfer); +} + +static void r600_destroy_screen(struct pipe_screen* pscreen) +{ + struct r600_screen* rscreen = r600_screen(pscreen); + + if (rscreen == NULL) + return; + FREE(rscreen); +} + +struct pipe_screen *radeon_create_screen(struct radeon *rw) +{ + struct r600_screen* rscreen; + + rscreen = CALLOC_STRUCT(r600_screen); + if (rscreen == NULL) { + return NULL; + } + rscreen->rw = rw; + rscreen->screen.winsys = (struct pipe_winsys*)rw; + rscreen->screen.destroy = r600_destroy_screen; + rscreen->screen.get_name = r600_get_name; + rscreen->screen.get_vendor = r600_get_vendor; + rscreen->screen.get_param = r600_get_param; + rscreen->screen.get_paramf = r600_get_paramf; + rscreen->screen.is_format_supported = r600_is_format_supported; + rscreen->screen.context_create = r600_create_context; + r600_init_screen_texture_functions(&rscreen->screen); + r600_init_screen_resource_functions(rscreen); + return &rscreen->screen; +} diff --git a/src/gallium/drivers/r600/r600_screen.h b/src/gallium/drivers/r600/r600_screen.h new file mode 100644 index 00000000000..4748021d079 --- /dev/null +++ b/src/gallium/drivers/r600/r600_screen.h @@ -0,0 +1,119 @@ +/* + * Copyright 2010 Jerome Glisse <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ +#ifndef R600_SCREEN_H +#define R600_SCREEN_H + +#include <pipe/p_state.h> +#include <pipe/p_screen.h> +#include <pipebuffer/pb_buffer.h> +#include <xf86drm.h> +#include <radeon_drm.h> +#include "radeon.h" +#include "util/u_transfer.h" + +#define r600_screen(s) ((struct r600_screen*)s) + +/* Texture transfer. */ +struct r600_transfer { + /* Base class. */ + struct pipe_transfer transfer; + /* Buffer transfer. */ + struct pipe_transfer *buffer_transfer; + unsigned offset; +}; + +struct r600_buffer { + struct u_resource b; + struct radeon_bo *bo; + u32 domain; + u32 flink; + struct pb_buffer *pb; +}; + +struct r600_screen { + struct pipe_screen screen; + struct radeon *rw; +}; + +/* Buffer functions. */ +struct pipe_resource *r600_buffer_create(struct pipe_screen *screen, + const struct pipe_resource *templ); +struct pipe_resource *r600_user_buffer_create(struct pipe_screen *screen, + void *ptr, unsigned bytes, + unsigned bind); +unsigned r600_buffer_is_referenced_by_cs(struct pipe_context *context, + struct pipe_resource *buf, + unsigned face, unsigned level); +struct pipe_resource *r600_buffer_from_handle(struct pipe_screen *screen, + struct winsys_handle *whandle); + +/* Texture transfer functions. */ +struct pipe_transfer* r600_texture_get_transfer(struct pipe_context *ctx, + struct pipe_resource *texture, + struct pipe_subresource sr, + unsigned usage, + const struct pipe_box *box); +void r600_texture_transfer_destroy(struct pipe_context *ctx, + struct pipe_transfer *trans); +void* r600_texture_transfer_map(struct pipe_context *ctx, + struct pipe_transfer* transfer); +void r600_texture_transfer_unmap(struct pipe_context *ctx, + struct pipe_transfer* transfer); + + +/* Blit functions. */ +void r600_clear(struct pipe_context *ctx, + unsigned buffers, + const float *rgba, + double depth, + unsigned stencil); +void r600_surface_copy(struct pipe_context *ctx, + struct pipe_surface *dst, + unsigned dstx, unsigned dsty, + struct pipe_surface *src, + unsigned srcx, unsigned srcy, + unsigned width, unsigned height); +void r600_surface_fill(struct pipe_context *ctx, + struct pipe_surface *dst, + unsigned dstx, unsigned dsty, + unsigned width, unsigned height, + unsigned value); + +/* helpers */ +int r600_conv_pipe_format(unsigned pformat, unsigned *format); +int r600_conv_pipe_prim(unsigned pprim, unsigned *prim); + +union r600_float_to_u32_u { + u32 u; + float f; +}; + +static inline u32 r600_float_to_u32(float f) +{ + union r600_float_to_u32_u c; + + c.f = f; + return c.u; +} + +#endif diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c new file mode 100644 index 00000000000..6729fdd0e5c --- /dev/null +++ b/src/gallium/drivers/r600/r600_shader.c @@ -0,0 +1,217 @@ +/* + * Copyright 2010 Jerome Glisse <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: + * Jerome Glisse + */ +#include <stdio.h> +#include <errno.h> +#include <util/u_inlines.h> +#include <util/u_format.h> +#include <util/u_memory.h> +#include <tgsi/tgsi_dump.h> +#include "r600_screen.h" +#include "r600_context.h" +#include "r600d.h" + +static int r600_pipe_shader_vs(struct pipe_context *ctx, struct r600_pipe_shader *rpshader) +{ + struct r600_screen *rscreen = (struct r600_screen*)ctx->screen; + struct r600_shader *rshader = &rpshader->shader; + struct radeon_state *state; + unsigned i, tmp; + + rpshader->state = radeon_state_decref(rpshader->state); + state = radeon_state(rscreen->rw, R600_VS_SHADER_TYPE, R600_VS_SHADER); + if (state == NULL) + return -ENOMEM; + for (i = 0; i < rshader->noutput; i += 4) { + tmp = rshader->output[i].sid; + tmp |= rshader->output[i + 1].sid << 8; + tmp |= rshader->output[i + 2].sid << 16; + tmp |= rshader->output[i + 3].sid << 24; + state->states[R600_VS_SHADER__SPI_VS_OUT_ID_0 + i / 4] = tmp; + } + state->states[R600_VS_SHADER__SPI_VS_OUT_CONFIG] = S_0286C4_VS_EXPORT_COUNT(rshader->noutput - 1); + state->states[R600_VS_SHADER__SQ_PGM_RESOURCES_VS] = S_028868_NUM_GPRS(rshader->ngpr); + rpshader->state = state; + rpshader->state->bo[0] = radeon_bo_incref(rscreen->rw, rpshader->bo); + rpshader->state->bo[1] = radeon_bo_incref(rscreen->rw, rpshader->bo); + rpshader->state->nbo = 2; + rpshader->state->placement[0] = RADEON_GEM_DOMAIN_GTT; + return radeon_state_pm4(state); +} + +static int r600_pipe_shader_ps(struct pipe_context *ctx, struct r600_pipe_shader *rpshader) +{ + struct r600_screen *rscreen = (struct r600_screen*)ctx->screen; + struct r600_shader *rshader = &rpshader->shader; + struct radeon_state *state; + unsigned i, tmp; + + rpshader->state = radeon_state_decref(rpshader->state); + state = radeon_state(rscreen->rw, R600_PS_SHADER_TYPE, R600_PS_SHADER); + if (state == NULL) + return -ENOMEM; + for (i = 0; i < rshader->ninput; i++) { + tmp = S_028644_SEMANTIC(rshader->input[i].sid); + tmp |= S_028644_SEL_CENTROID(1); + tmp |= S_028644_FLAT_SHADE(rshader->flat_shade); + state->states[R600_PS_SHADER__SPI_PS_INPUT_CNTL_0 + i] = tmp; + } + state->states[R600_PS_SHADER__SPI_PS_IN_CONTROL_0] = S_0286CC_NUM_INTERP(rshader->ninput) | + S_0286CC_PERSP_GRADIENT_ENA(1); + state->states[R600_PS_SHADER__SPI_PS_IN_CONTROL_1] = 0x00000000; + state->states[R600_PS_SHADER__SQ_PGM_RESOURCES_PS] = S_028868_NUM_GPRS(rshader->ngpr); + state->states[R600_PS_SHADER__SQ_PGM_EXPORTS_PS] = 0x00000002; + rpshader->state = state; + rpshader->state->bo[0] = radeon_bo_incref(rscreen->rw, rpshader->bo); + rpshader->state->nbo = 1; + rpshader->state->placement[0] = RADEON_GEM_DOMAIN_GTT; + return radeon_state_pm4(state); +} + +static int r600_pipe_shader(struct pipe_context *ctx, struct r600_pipe_shader *rpshader) +{ + struct r600_screen *rscreen = (struct r600_screen*)ctx->screen; + struct r600_context *rctx = (struct r600_context*)ctx; + struct r600_shader *rshader = &rpshader->shader; + int r; + + /* copy new shader */ + radeon_bo_decref(rscreen->rw, rpshader->bo); + rpshader->bo = NULL; + rpshader->bo = radeon_bo(rscreen->rw, 0, rshader->ndw * 4, + 4096, NULL); + if (rpshader->bo == NULL) { + return -ENOMEM; + } + radeon_bo_map(rscreen->rw, rpshader->bo); + memcpy(rpshader->bo->data, rshader->bcode, rshader->ndw * 4); + radeon_bo_unmap(rscreen->rw, rpshader->bo); + /* build state */ + rshader->flat_shade = rctx->flat_shade; + switch (rpshader->type) { + case C_PROGRAM_TYPE_VS: + r = r600_pipe_shader_vs(ctx, rpshader); + break; + case C_PROGRAM_TYPE_FS: + r = r600_pipe_shader_ps(ctx, rpshader); + break; + default: + r = -EINVAL; + break; + } + return r; +} + +struct r600_pipe_shader *r600_pipe_shader_create(struct pipe_context *ctx, unsigned type, const struct tgsi_token *tokens) +{ + struct r600_pipe_shader *rpshader = CALLOC_STRUCT(r600_pipe_shader); + struct r600_shader *rshader = &rpshader->shader; + int r; + + if (rpshader == NULL) + return NULL; + rpshader->type = type; + c_list_init(&rshader->nodes); + fprintf(stderr, "<<\n"); + tgsi_dump(tokens, 0); + fprintf(stderr, "--------------------------------------------------------------\n"); + r = c_shader_from_tgsi(&rshader->cshader, type, tokens); + if (r) { + r600_pipe_shader_destroy(ctx, rpshader); + fprintf(stderr, "ERROR(%s %d)>>\n\n", __func__, __LINE__); + return NULL; + } + r = r600_shader_insert_fetch(&rshader->cshader); + if (r) { + r600_pipe_shader_destroy(ctx, rpshader); + fprintf(stderr, "ERROR(%s %d)>>\n\n", __func__, __LINE__); + return NULL; + } + r = c_shader_build_dominator_tree(&rshader->cshader); + if (r) { + r600_pipe_shader_destroy(ctx, rpshader); + fprintf(stderr, "ERROR(%s %d)>>\n\n", __func__, __LINE__); + return NULL; + } + c_shader_dump(&rshader->cshader); + r = r700_shader_translate(rshader); + if (r) { + r600_pipe_shader_destroy(ctx, rpshader); + fprintf(stderr, "ERROR(%s %d)>>\n\n", __func__, __LINE__); + return NULL; + } +#if 1 +#if 0 + fprintf(stderr, "--------------------------------------------------------------\n"); + for (int i = 0; i < rshader->ndw; i++) { + fprintf(stderr, "0x%08X\n", rshader->bcode[i]); + } +#endif + fprintf(stderr, ">>\n\n"); +#endif + return rpshader; +} + +void r600_pipe_shader_destroy(struct pipe_context *ctx, struct r600_pipe_shader *rpshader) +{ + struct r600_screen *rscreen = (struct r600_screen*)ctx->screen; + + if (rpshader == NULL) + return; + radeon_bo_decref(rscreen->rw, rpshader->bo); + rpshader->bo = NULL; + r600_shader_cleanup(&rpshader->shader); + FREE(rpshader); +} + +int r600_pipe_shader_update(struct pipe_context *ctx, struct r600_pipe_shader *rpshader) +{ + struct r600_context *rctx = (struct r600_context*)ctx; + struct r600_shader *rshader; + enum pipe_format resource_format[160]; + unsigned i, nresources = 0; + int r; + + if (rpshader == NULL) + return -EINVAL; + rshader = &rpshader->shader; + switch (rpshader->type) { + case C_PROGRAM_TYPE_VS: + for (i = 0; i < rctx->vertex_elements->count; i++) { + resource_format[nresources++] = rctx->vertex_elements->elements[i].src_format; + } + break; + default: + break; + } + /* there should be enough input */ + if (nresources < rshader->nresource) + return -EINVAL; + /* FIXME compare resources */ + r = r600_shader_update(rshader, resource_format); + if (r) + return r; + return r600_pipe_shader(ctx, rpshader); +} diff --git a/src/gallium/drivers/r600/r600_shader.h b/src/gallium/drivers/r600/r600_shader.h new file mode 100644 index 00000000000..3b65b29553c --- /dev/null +++ b/src/gallium/drivers/r600/r600_shader.h @@ -0,0 +1,262 @@ +/* + * Copyright 2010 Jerome Glisse <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ +#ifndef R600_SHADER_H +#define R600_SHADER_H + +#include "r600_compiler.h" +#include "radeon.h" + +struct r600_shader_operand { + struct c_vector *vector; + unsigned sel; + unsigned chan; + unsigned neg; + unsigned abs; +}; + +struct r600_shader_vfetch { + struct r600_shader_vfetch *next; + struct r600_shader_vfetch *prev; + unsigned cf_addr; + struct r600_shader_operand src[2]; + struct r600_shader_operand dst[4]; +}; + +struct r600_shader_inst { + unsigned is_op3; + unsigned opcode; + unsigned inst; + struct r600_shader_operand src[3]; + struct r600_shader_operand dst; + unsigned last; +}; + +struct r600_shader_alu { + struct r600_shader_alu *next; + struct r600_shader_alu *prev; + unsigned nalu; + unsigned nliteral; + unsigned nconstant; + struct r600_shader_inst alu[5]; + u32 literal[4]; +}; + +struct r600_shader_node { + struct r600_shader_node *next; + struct r600_shader_node *prev; + unsigned cf_id; /**< cf index (in dw) in byte code */ + unsigned cf_addr; /**< instructions index (in dw) in byte code */ + unsigned nslot; /**< number of slot (2 dw) needed by this node */ + unsigned nfetch; + struct c_node *node; /**< compiler node from which this node originate */ + struct r600_shader_vfetch vfetch; /**< list of vfetch instructions */ + struct r600_shader_alu alu; /**< list of alu instructions */ +}; + +struct r600_shader_io { + unsigned name; + unsigned gpr; + int sid; +}; + +struct r600_shader { + unsigned stack_size; /**< stack size needed by this shader */ + unsigned ngpr; /**< number of GPR needed by this shader */ + unsigned nconstant; /**< number of constants used by this shader */ + unsigned nresource; /**< number of resources used by this shader */ + unsigned noutput; + unsigned ninput; + unsigned nvector; + unsigned ncf; /**< total number of cf clauses */ + unsigned nslot; /**< total number of slots (2 dw) */ + unsigned flat_shade; /**< are we flat shading */ + struct r600_shader_node nodes; /**< list of node */ + struct r600_shader_io input[32]; + struct r600_shader_io output[32]; + /* TODO replace GPR by some better register allocator */ + struct c_vector **gpr; + unsigned ndw; /**< bytes code size in dw */ + u32 *bcode; /**< bytes code */ + enum pipe_format resource_format[160]; /**< format of resource */ + struct c_shader cshader; +}; + +void r600_shader_cleanup(struct r600_shader *rshader); +int r600_shader_register(struct r600_shader *rshader); +int r600_shader_node(struct r600_shader *shader); +void r600_shader_node_place(struct r600_shader *rshader); +int r600_shader_find_gpr(struct r600_shader *rshader, struct c_vector *v, unsigned swizzle, + struct r600_shader_operand *operand); +int r600_shader_vfetch_bytecode(struct r600_shader *rshader, + struct r600_shader_node *rnode, + struct r600_shader_vfetch *vfetch, + unsigned *cid); +int r600_shader_update(struct r600_shader *rshader, + enum pipe_format *resource_format); +int r600_shader_legalize(struct r600_shader *rshader); + +int r700_shader_translate(struct r600_shader *rshader); + +int c_shader_from_tgsi(struct c_shader *shader, unsigned type, + const struct tgsi_token *tokens); +int r600_shader_register(struct r600_shader *rshader); +int r600_shader_translate_rec(struct r600_shader *rshader, struct c_node *node); +int r700_shader_translate(struct r600_shader *rshader); +int r600_shader_insert_fetch(struct c_shader *shader); + +enum r600_instruction { + INST_ADD = 0, + INST_MUL = 1, + INST_MUL_IEEE = 2, + INST_MAX = 3, + INST_MIN = 4, + INST_MAX_DX10 = 5, + INST_MIN_DX10 = 6, + INST_SETE = 7, + INST_SETGT = 8, + INST_SETGE = 9, + INST_SETNE = 10, + INST_SETE_DX10 = 11, + INST_SETGT_DX10 = 12, + INST_SETGE_DX10 = 13, + INST_SETNE_DX10 = 14, + INST_FRACT = 15, + INST_TRUNC = 16, + INST_CEIL = 17, + INST_RNDNE = 18, + INST_FLOOR = 19, + INST_MOVA = 20, + INST_MOVA_FLOOR = 21, + INST_MOVA_INT = 22, + INST_MOV = 23, + INST_NOP = 24, + INST_PRED_SETGT_UINT = 25, + INST_PRED_SETGE_UINT = 26, + INST_PRED_SETE = 27, + INST_PRED_SETGT = 28, + INST_PRED_SETGE = 29, + INST_PRED_SETNE = 30, + INST_PRED_SET_INV = 31, + INST_PRED_SET_POP = 32, + INST_PRED_SET_CLR = 33, + INST_PRED_SET_RESTORE = 34, + INST_PRED_SETE_PUSH = 35, + INST_PRED_SETGT_PUSH = 36, + INST_PRED_SETGE_PUSH = 37, + INST_PRED_SETNE_PUSH = 38, + INST_KILLE = 39, + INST_KILLGT = 40, + INST_KILLGE = 41, + INST_KILLNE = 42, + INST_AND_INT = 43, + INST_OR_INT = 44, + INST_XOR_INT = 45, + INST_NOT_INT = 46, + INST_ADD_INT = 47, + INST_SUB_INT = 48, + INST_MAX_INT = 49, + INST_MIN_INT = 50, + INST_MAX_UINT = 51, + INST_MIN_UINT = 52, + INST_SETE_INT = 53, + INST_SETGT_INT = 54, + INST_SETGE_INT = 55, + INST_SETNE_INT = 56, + INST_SETGT_UINT = 57, + INST_SETGE_UINT = 58, + INST_KILLGT_UINT = 59, + INST_KILLGE_UINT = 60, + INST_PRED_SETE_INT = 61, + INST_PRED_SETGT_INT = 62, + INST_PRED_SETGE_INT = 63, + INST_PRED_SETNE_INT = 64, + INST_KILLE_INT = 65, + INST_KILLGT_INT = 66, + INST_KILLGE_INT = 67, + INST_KILLNE_INT = 68, + INST_PRED_SETE_PUSH_INT = 69, + INST_PRED_SETGT_PUSH_INT = 70, + INST_PRED_SETGE_PUSH_INT = 71, + INST_PRED_SETNE_PUSH_INT = 72, + INST_PRED_SETLT_PUSH_INT = 73, + INST_PRED_SETLE_PUSH_INT = 74, + INST_DOT4 = 75, + INST_DOT4_IEEE = 76, + INST_CUBE = 77, + INST_MAX4 = 78, + INST_MOVA_GPR_INT = 79, + INST_EXP_IEEE = 80, + INST_LOG_CLAMPED = 81, + INST_LOG_IEEE = 82, + INST_RECIP_CLAMPED = 83, + INST_RECIP_FF = 84, + INST_RECIP_IEEE = 85, + INST_RECIPSQRT_CLAMPED = 86, + INST_RECIPSQRT_FF = 87, + INST_RECIPSQRT_IEEE = 88, + INST_SQRT_IEEE = 89, + INST_FLT_TO_INT = 90, + INST_INT_TO_FLT = 91, + INST_UINT_TO_FLT = 92, + INST_SIN = 93, + INST_COS = 94, + INST_ASHR_INT = 95, + INST_LSHR_INT = 96, + INST_LSHL_INT = 97, + INST_MULLO_INT = 98, + INST_MULHI_INT = 99, + INST_MULLO_UINT = 100, + INST_MULHI_UINT = 101, + INST_RECIP_INT = 102, + INST_RECIP_UINT = 103, + INST_FLT_TO_UINT = 104, + INST_MUL_LIT = 105, + INST_MUL_LIT_M2 = 106, + INST_MUL_LIT_M4 = 107, + INST_MUL_LIT_D2 = 108, + INST_MULADD = 109, + INST_MULADD_M2 = 110, + INST_MULADD_M4 = 111, + INST_MULADD_D2 = 112, + INST_MULADD_IEEE = 113, + INST_MULADD_IEEE_M2 = 114, + INST_MULADD_IEEE_M4 = 115, + INST_MULADD_IEEE_D2 = 116, + INST_CNDE = 117, + INST_CNDGT = 118, + INST_CNDGE = 119, + INST_CNDE_INT = 120, + INST_CNDGT_INT = 121, + INST_CNDGE_INT = 122, + INST_COUNT +}; + +struct r600_instruction_info { + enum r600_instruction instruction; + unsigned opcode; + unsigned is_trans; + unsigned is_op3; +}; + + +#endif diff --git a/src/gallium/drivers/r600/r600_sq.h b/src/gallium/drivers/r600/r600_sq.h new file mode 100644 index 00000000000..71aa09719e0 --- /dev/null +++ b/src/gallium/drivers/r600/r600_sq.h @@ -0,0 +1,606 @@ +/* + * Copyright 2010 Jerome Glisse <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: + * Jerome Glisse + */ +#ifndef R600_SQ_H +#define R600_SQ_H + +#define P_SQ_CF_WORD0 +#define S_SQ_CF_WORD0_ADDR(x) (((x) & 0xFFFFFFFF) << 0) +#define G_SQ_CF_WORD0_ADDR(x) (((x) >> 0) & 0xFFFFFFFF) +#define C_SQ_CF_WORD0_ADDR 0x00000000 +#define P_SQ_CF_WORD1 +#define S_SQ_CF_WORD1_POP_COUNT(x) (((x) & 0x7) << 0) +#define G_SQ_CF_WORD1_POP_COUNT(x) (((x) >> 0) & 0x7) +#define C_SQ_CF_WORD1_POP_COUNT 0xFFFFFFF8 +#define S_SQ_CF_WORD1_CF_CONST(x) (((x) & 0x1F) << 3) +#define G_SQ_CF_WORD1_CF_CONST(x) (((x) >> 3) & 0x1F) +#define C_SQ_CF_WORD1_CF_CONST 0xFFFFFF07 +#define S_SQ_CF_WORD1_COND(x) (((x) & 0x3) << 8) +#define G_SQ_CF_WORD1_COND(x) (((x) >> 8) & 0x3) +#define C_SQ_CF_WORD1_COND 0xFFFFFCFF +#define S_SQ_CF_WORD1_COUNT(x) (((x) & 0x7) << 10) +#define G_SQ_CF_WORD1_COUNT(x) (((x) >> 10) & 0x7) +#define C_SQ_CF_WORD1_COUNT 0xFFFFE3FF +#define S_SQ_CF_WORD1_CALL_COUNT(x) (((x) & 0x3F) << 13) +#define G_SQ_CF_WORD1_CALL_COUNT(x) (((x) >> 13) & 0x3F) +#define C_SQ_CF_WORD1_CALL_COUNT 0xFFF81FFF +#define S_SQ_CF_WORD1_END_OF_PROGRAM(x) (((x) & 0x1) << 21) +#define G_SQ_CF_WORD1_END_OF_PROGRAM(x) (((x) >> 21) & 0x1) +#define C_SQ_CF_WORD1_END_OF_PROGRAM 0xFFDFFFFF +#define S_SQ_CF_WORD1_VALID_PIXEL_MODE(x) (((x) & 0x1) << 22) +#define G_SQ_CF_WORD1_VALID_PIXEL_MODE(x) (((x) >> 22) & 0x1) +#define C_SQ_CF_WORD1_VALID_PIXEL_MODE 0xFFBFFFFF +#define S_SQ_CF_WORD1_CF_INST(x) (((x) & 0x7F) << 23) +#define G_SQ_CF_WORD1_CF_INST(x) (((x) >> 23) & 0x7F) +#define C_SQ_CF_WORD1_CF_INST 0xC07FFFFF +#define V_SQ_CF_WORD1_SQ_CF_INST_NOP 0x00000000 +#define V_SQ_CF_WORD1_SQ_CF_INST_TEX 0x00000001 +#define V_SQ_CF_WORD1_SQ_CF_INST_VTX 0x00000002 +#define V_SQ_CF_WORD1_SQ_CF_INST_VTX_TC 0x00000003 +#define V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START 0x00000004 +#define V_SQ_CF_WORD1_SQ_CF_INST_LOOP_END 0x00000005 +#define V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_DX10 0x00000006 +#define V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_NO_AL 0x00000007 +#define V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE 0x00000008 +#define V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK 0x00000009 +#define V_SQ_CF_WORD1_SQ_CF_INST_JUMP 0x0000000A +#define V_SQ_CF_WORD1_SQ_CF_INST_PUSH 0x0000000B +#define V_SQ_CF_WORD1_SQ_CF_INST_PUSH_ELSE 0x0000000C +#define V_SQ_CF_WORD1_SQ_CF_INST_ELSE 0x0000000D +#define V_SQ_CF_WORD1_SQ_CF_INST_POP 0x0000000E +#define V_SQ_CF_WORD1_SQ_CF_INST_POP_JUMP 0x0000000F +#define V_SQ_CF_WORD1_SQ_CF_INST_POP_PUSH 0x00000010 +#define V_SQ_CF_WORD1_SQ_CF_INST_POP_PUSH_ELSE 0x00000011 +#define V_SQ_CF_WORD1_SQ_CF_INST_CALL 0x00000012 +#define V_SQ_CF_WORD1_SQ_CF_INST_CALL_FS 0x00000013 +#define V_SQ_CF_WORD1_SQ_CF_INST_RETURN 0x00000014 +#define V_SQ_CF_WORD1_SQ_CF_INST_EMIT_VERTEX 0x00000015 +#define V_SQ_CF_WORD1_SQ_CF_INST_EMIT_CUT_VERTEX 0x00000016 +#define V_SQ_CF_WORD1_SQ_CF_INST_CUT_VERTEX 0x00000017 +#define V_SQ_CF_WORD1_SQ_CF_INST_KILL 0x00000018 +#define S_SQ_CF_WORD1_WHOLE_QUAD_MODE(x) (((x) & 0x1) << 30) +#define G_SQ_CF_WORD1_WHOLE_QUAD_MODE(x) (((x) >> 30) & 0x1) +#define C_SQ_CF_WORD1_WHOLE_QUAD_MODE 0xBFFFFFFF +#define S_SQ_CF_WORD1_BARRIER(x) (((x) & 0x1) << 31) +#define G_SQ_CF_WORD1_BARRIER(x) (((x) >> 31) & 0x1) +#define C_SQ_CF_WORD1_BARRIER 0x7FFFFFFF +#define P_SQ_CF_ALU_WORD0 +#define S_SQ_CF_ALU_WORD0_ALU_ADDR(x) (((x) & 0x3FFFFF) << 0) +#define G_SQ_CF_ALU_WORD0_ALU_ADDR(x) (((x) >> 0) & 0x3FFFFF) +#define C_SQ_CF_ALU_WORD0_ALU_ADDR 0xFFC00000 +#define S_SQ_CF_ALU_WORD0_KCACHE_BANK0(x) (((x) & 0xF) << 22) +#define G_SQ_CF_ALU_WORD0_KCACHE_BANK0(x) (((x) >> 22) & 0xF) +#define C_SQ_CF_ALU_WORD0_KCACHE_BANK0 0xFC3FFFFF +#define S_SQ_CF_ALU_WORD0_KCACHE_BANK1(x) (((x) & 0xF) << 26) +#define G_SQ_CF_ALU_WORD0_KCACHE_BANK1(x) (((x) >> 26) & 0xF) +#define C_SQ_CF_ALU_WORD0_KCACHE_BANK1 0xC3FFFFFF +#define S_SQ_CF_ALU_WORD0_KCACHE_MODE0(x) (((x) & 0x3) << 30) +#define G_SQ_CF_ALU_WORD0_KCACHE_MODE0(x) (((x) >> 30) & 0x3) +#define C_SQ_CF_ALU_WORD0_KCACHE_MODE0 0x3FFFFFFF +#define P_SQ_CF_ALU_WORD1 +#define S_SQ_CF_ALU_WORD1_KCACHE_MODE1(x) (((x) & 0x3) << 0) +#define G_SQ_CF_ALU_WORD1_KCACHE_MODE1(x) (((x) >> 0) & 0x3) +#define C_SQ_CF_ALU_WORD1_KCACHE_MODE1 0xFFFFFFFC +#define S_SQ_CF_ALU_WORD1_KCACHE_ADDR0(x) (((x) & 0xFF) << 2) +#define G_SQ_CF_ALU_WORD1_KCACHE_ADDR0(x) (((x) >> 2) & 0xFF) +#define C_SQ_CF_ALU_WORD1_KCACHE_ADDR0 0xFFFFFC03 +#define S_SQ_CF_ALU_WORD1_KCACHE_ADDR1(x) (((x) & 0xFF) << 10) +#define G_SQ_CF_ALU_WORD1_KCACHE_ADDR1(x) (((x) >> 10) & 0xFF) +#define C_SQ_CF_ALU_WORD1_KCACHE_ADDR1 0xFFFC03FF +#define S_SQ_CF_ALU_WORD1_ALU_COUNT(x) (((x) & 0x7F) << 18) +#define G_SQ_CF_ALU_WORD1_ALU_COUNT(x) (((x) >> 18) & 0x7F) +#define C_SQ_CF_ALU_WORD1_ALU_COUNT 0xFE03FFFF +#define S_SQ_CF_ALU_WORD1_USES_WATERFALL(x) (((x) & 0x1) << 25) +#define G_SQ_CF_ALU_WORD1_USES_WATERFALL(x) (((x) >> 25) & 0x1) +#define C_SQ_CF_ALU_WORD1_USES_WATERFALL 0xFDFFFFFF +#define S_SQ_CF_ALU_WORD1_CF_ALU_INST(x) (((x) & 0xF) << 26) +#define G_SQ_CF_ALU_WORD1_CF_ALU_INST(x) (((x) >> 26) & 0xF) +#define C_SQ_CF_ALU_WORD1_CF_ALU_INST 0xC3FFFFFF +#define V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU 0x00000008 +#define V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE 0x00000009 +#define V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER 0x0000000A +#define V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP2_AFTER 0x0000000B +#define V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_CONTINUE 0x0000000D +#define V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_BREAK 0x0000000E +#define V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_ELSE_AFTER 0x0000000F +#define S_SQ_CF_ALU_WORD1_WHOLE_QUAD_MODE(x) (((x) & 0x1) << 30) +#define G_SQ_CF_ALU_WORD1_WHOLE_QUAD_MODE(x) (((x) >> 30) & 0x1) +#define C_SQ_CF_ALU_WORD1_WHOLE_QUAD_MODE 0xBFFFFFFF +#define S_SQ_CF_ALU_WORD1_BARRIER(x) (((x) & 0x1) << 31) +#define G_SQ_CF_ALU_WORD1_BARRIER(x) (((x) >> 31) & 0x1) +#define C_SQ_CF_ALU_WORD1_BARRIER 0x7FFFFFFF +#define P_SQ_CF_ALLOC_EXPORT_WORD0 +#define S_SQ_CF_ALLOC_EXPORT_WORD0_ARRAY_BASE(x) (((x) & 0x1FFF) << 0) +#define G_SQ_CF_ALLOC_EXPORT_WORD0_ARRAY_BASE(x) (((x) >> 0) & 0x1FFF) +#define C_SQ_CF_ALLOC_EXPORT_WORD0_ARRAY_BASE 0xFFFFE000 +#define S_SQ_CF_ALLOC_EXPORT_WORD0_TYPE(x) (((x) & 0x3) << 13) +#define G_SQ_CF_ALLOC_EXPORT_WORD0_TYPE(x) (((x) >> 13) & 0x3) +#define C_SQ_CF_ALLOC_EXPORT_WORD0_TYPE 0xFFFF9FFF +#define V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL 0x00000000 +#define V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS 0x00000001 +#define V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM 0x00000002 +#define V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_SX 0x00000003 +#define S_SQ_CF_ALLOC_EXPORT_WORD0_RW_GPR(x) (((x) & 0x7F) << 15) +#define G_SQ_CF_ALLOC_EXPORT_WORD0_RW_GPR(x) (((x) >> 15) & 0x7F) +#define C_SQ_CF_ALLOC_EXPORT_WORD0_RW_GPR 0xFFC07FFF +#define S_SQ_CF_ALLOC_EXPORT_WORD0_RW_REL(x) (((x) & 0x1) << 22) +#define G_SQ_CF_ALLOC_EXPORT_WORD0_RW_REL(x) (((x) >> 22) & 0x1) +#define C_SQ_CF_ALLOC_EXPORT_WORD0_RW_REL 0xFFBFFFFF +#define S_SQ_CF_ALLOC_EXPORT_WORD0_INDEX_GPR(x) (((x) & 0x7F) << 23) +#define G_SQ_CF_ALLOC_EXPORT_WORD0_INDEX_GPR(x) (((x) >> 23) & 0x7F) +#define C_SQ_CF_ALLOC_EXPORT_WORD0_INDEX_GPR 0xC07FFFFF +#define S_SQ_CF_ALLOC_EXPORT_WORD0_ELEM_SIZE(x) (((x) & 0x3) << 30) +#define G_SQ_CF_ALLOC_EXPORT_WORD0_ELEM_SIZE(x) (((x) >> 30) & 0x3) +#define C_SQ_CF_ALLOC_EXPORT_WORD0_ELEM_SIZE 0x3FFFFFFF +#define P_SQ_CF_ALLOC_EXPORT_WORD1 +#define S_SQ_CF_ALLOC_EXPORT_WORD1_BURST_COUNT(x) (((x) & 0xF) << 17) +#define G_SQ_CF_ALLOC_EXPORT_WORD1_BURST_COUNT(x) (((x) >> 17) & 0xF) +#define C_SQ_CF_ALLOC_EXPORT_WORD1_BURST_COUNT 0xFFE1FFFF +#define S_SQ_CF_ALLOC_EXPORT_WORD1_END_OF_PROGRAM(x) (((x) & 0x1) << 21) +#define G_SQ_CF_ALLOC_EXPORT_WORD1_END_OF_PROGRAM(x) (((x) >> 21) & 0x1) +#define C_SQ_CF_ALLOC_EXPORT_WORD1_END_OF_PROGRAM 0xFFDFFFFF +#define S_SQ_CF_ALLOC_EXPORT_WORD1_VALID_PIXEL_MODE(x) (((x) & 0x1) << 22) +#define G_SQ_CF_ALLOC_EXPORT_WORD1_VALID_PIXEL_MODE(x) (((x) >> 22) & 0x1) +#define C_SQ_CF_ALLOC_EXPORT_WORD1_VALID_PIXEL_MODE 0xFFBFFFFF +#define S_SQ_CF_ALLOC_EXPORT_WORD1_CF_INST(x) (((x) & 0x7F) << 23) +#define G_SQ_CF_ALLOC_EXPORT_WORD1_CF_INST(x) (((x) >> 23) & 0x7F) +#define C_SQ_CF_ALLOC_EXPORT_WORD1_CF_INST 0xC07FFFFF +#define V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM0 0x00000020 +#define V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM1 0x00000021 +#define V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM2 0x00000022 +#define V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM3 0x00000023 +#define V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_SCRATCH 0x00000024 +#define V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_REDUCTION 0x00000025 +#define V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_RING 0x00000026 +#define V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT 0x00000027 +#define V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE 0x00000028 +#define S_SQ_CF_ALLOC_EXPORT_WORD1_WHOLE_QUAD_MODE(x) (((x) & 0x1) << 30) +#define G_SQ_CF_ALLOC_EXPORT_WORD1_WHOLE_QUAD_MODE(x) (((x) >> 30) & 0x1) +#define C_SQ_CF_ALLOC_EXPORT_WORD1_WHOLE_QUAD_MODE 0xBFFFFFFF +#define S_SQ_CF_ALLOC_EXPORT_WORD1_BARRIER(x) (((x) & 0x1) << 31) +#define G_SQ_CF_ALLOC_EXPORT_WORD1_BARRIER(x) (((x) >> 31) & 0x1) +#define C_SQ_CF_ALLOC_EXPORT_WORD1_BARRIER 0x7FFFFFFF +#define P_SQ_CF_ALLOC_EXPORT_WORD1_BUF +#define S_SQ_CF_ALLOC_EXPORT_WORD1_BUF_ARRAY_SIZE(x) (((x) & 0xFFF) << 0) +#define G_SQ_CF_ALLOC_EXPORT_WORD1_BUF_ARRAY_SIZE(x) (((x) >> 0) & 0xFFF) +#define C_SQ_CF_ALLOC_EXPORT_WORD1_BUF_ARRAY_SIZE 0xFFFFF000 +#define S_SQ_CF_ALLOC_EXPORT_WORD1_BUF_COMP_MASK(x) (((x) & 0xF) << 12) +#define G_SQ_CF_ALLOC_EXPORT_WORD1_BUF_COMP_MASK(x) (((x) >> 12) & 0xF) +#define C_SQ_CF_ALLOC_EXPORT_WORD1_BUF_COMP_MASK 0xFFFF0FFF +#define P_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ +#define S_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_X(x) (((x) & 0x7) << 0) +#define G_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_X(x) (((x) >> 0) & 0x7) +#define C_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_X 0xFFFFFFF8 +#define S_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Y(x) (((x) & 0x7) << 3) +#define G_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Y(x) (((x) >> 3) & 0x7) +#define C_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Y 0xFFFFFFC7 +#define S_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Z(x) (((x) & 0x7) << 6) +#define G_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Z(x) (((x) >> 6) & 0x7) +#define C_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Z 0xFFFFFE3F +#define S_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_W(x) (((x) & 0x7) << 9) +#define G_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_W(x) (((x) >> 9) & 0x7) +#define C_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_W 0xFFFFF1FF +#define P_SQ_ALU_WORD0 +#define S_SQ_ALU_WORD0_SRC0_SEL(x) (((x) & 0x1FF) << 0) +#define G_SQ_ALU_WORD0_SRC0_SEL(x) (((x) >> 0) & 0x1FF) +#define C_SQ_ALU_WORD0_SRC0_SEL 0xFFFFFE00 +#define S_SQ_ALU_WORD0_SRC0_REL(x) (((x) & 0x1) << 9) +#define G_SQ_ALU_WORD0_SRC0_REL(x) (((x) >> 9) & 0x1) +#define C_SQ_ALU_WORD0_SRC0_REL 0xFFFFFDFF +#define S_SQ_ALU_WORD0_SRC0_CHAN(x) (((x) & 0x3) << 10) +#define G_SQ_ALU_WORD0_SRC0_CHAN(x) (((x) >> 10) & 0x3) +#define C_SQ_ALU_WORD0_SRC0_CHAN 0xFFFFF3FF +#define S_SQ_ALU_WORD0_SRC0_NEG(x) (((x) & 0x1) << 12) +#define G_SQ_ALU_WORD0_SRC0_NEG(x) (((x) >> 12) & 0x1) +#define C_SQ_ALU_WORD0_SRC0_NEG 0xFFFFEFFF +#define S_SQ_ALU_WORD0_SRC1_SEL(x) (((x) & 0x1FF) << 13) +#define G_SQ_ALU_WORD0_SRC1_SEL(x) (((x) >> 13) & 0x1FF) +#define C_SQ_ALU_WORD0_SRC1_SEL 0xFFC01FFF +#define S_SQ_ALU_WORD0_SRC1_REL(x) (((x) & 0x1) << 22) +#define G_SQ_ALU_WORD0_SRC1_REL(x) (((x) >> 22) & 0x1) +#define C_SQ_ALU_WORD0_SRC1_REL 0xFFBFFFFF +#define S_SQ_ALU_WORD0_SRC1_CHAN(x) (((x) & 0x3) << 23) +#define G_SQ_ALU_WORD0_SRC1_CHAN(x) (((x) >> 23) & 0x3) +#define C_SQ_ALU_WORD0_SRC1_CHAN 0xFE7FFFFF +#define S_SQ_ALU_WORD0_SRC1_NEG(x) (((x) & 0x1) << 25) +#define G_SQ_ALU_WORD0_SRC1_NEG(x) (((x) >> 25) & 0x1) +#define C_SQ_ALU_WORD0_SRC1_NEG 0xFDFFFFFF +#define S_SQ_ALU_WORD0_INDEX_MODE(x) (((x) & 0x7) << 26) +#define G_SQ_ALU_WORD0_INDEX_MODE(x) (((x) >> 26) & 0x7) +#define C_SQ_ALU_WORD0_INDEX_MODE 0xE3FFFFFF +#define S_SQ_ALU_WORD0_PRED_SEL(x) (((x) & 0x3) << 29) +#define G_SQ_ALU_WORD0_PRED_SEL(x) (((x) >> 29) & 0x3) +#define C_SQ_ALU_WORD0_PRED_SEL 0x9FFFFFFF +#define S_SQ_ALU_WORD0_LAST(x) (((x) & 0x1) << 31) +#define G_SQ_ALU_WORD0_LAST(x) (((x) >> 31) & 0x1) +#define C_SQ_ALU_WORD0_LAST 0x7FFFFFFF +#define P_SQ_ALU_WORD1 +#define S_SQ_ALU_WORD1_ENCODING(x) (((x) & 0x7) << 15) +#define G_SQ_ALU_WORD1_ENCODING(x) (((x) >> 15) & 0x7) +#define C_SQ_ALU_WORD1_ENCODING 0xFFFC7FFF +#define S_SQ_ALU_WORD1_BANK_SWIZZLE(x) (((x) & 0x7) << 18) +#define G_SQ_ALU_WORD1_BANK_SWIZZLE(x) (((x) >> 18) & 0x7) +#define C_SQ_ALU_WORD1_BANK_SWIZZLE 0xFFE3FFFF +#define S_SQ_ALU_WORD1_DST_GPR(x) (((x) & 0x7F) << 21) +#define G_SQ_ALU_WORD1_DST_GPR(x) (((x) >> 21) & 0x7F) +#define C_SQ_ALU_WORD1_DST_GPR 0xF01FFFFF +#define S_SQ_ALU_WORD1_DST_REL(x) (((x) & 0x1) << 28) +#define G_SQ_ALU_WORD1_DST_REL(x) (((x) >> 28) & 0x1) +#define C_SQ_ALU_WORD1_DST_REL 0xEFFFFFFF +#define S_SQ_ALU_WORD1_DST_CHAN(x) (((x) & 0x3) << 29) +#define G_SQ_ALU_WORD1_DST_CHAN(x) (((x) >> 29) & 0x3) +#define C_SQ_ALU_WORD1_DST_CHAN 0x9FFFFFFF +#define S_SQ_ALU_WORD1_CLAMP(x) (((x) & 0x1) << 31) +#define G_SQ_ALU_WORD1_CLAMP(x) (((x) >> 31) & 0x1) +#define C_SQ_ALU_WORD1_CLAMP 0x7FFFFFFF +#define P_SQ_ALU_WORD1_OP2 +#define S_SQ_ALU_WORD1_OP2_SRC0_ABS(x) (((x) & 0x1) << 0) +#define G_SQ_ALU_WORD1_OP2_SRC0_ABS(x) (((x) >> 0) & 0x1) +#define C_SQ_ALU_WORD1_OP2_SRC0_ABS 0xFFFFFFFE +#define S_SQ_ALU_WORD1_OP2_SRC1_ABS(x) (((x) & 0x1) << 1) +#define G_SQ_ALU_WORD1_OP2_SRC1_ABS(x) (((x) >> 1) & 0x1) +#define C_SQ_ALU_WORD1_OP2_SRC1_ABS 0xFFFFFFFD +#define S_SQ_ALU_WORD1_OP2_UPDATE_EXECUTE_MASK(x) (((x) & 0x1) << 2) +#define G_SQ_ALU_WORD1_OP2_UPDATE_EXECUTE_MASK(x) (((x) >> 2) & 0x1) +#define C_SQ_ALU_WORD1_OP2_UPDATE_EXECUTE_MASK 0xFFFFFFFB +#define S_SQ_ALU_WORD1_OP2_UPDATE_PRED(x) (((x) & 0x1) << 3) +#define G_SQ_ALU_WORD1_OP2_UPDATE_PRED(x) (((x) >> 3) & 0x1) +#define C_SQ_ALU_WORD1_OP2_UPDATE_PRED 0xFFFFFFF7 +#define S_SQ_ALU_WORD1_OP2_WRITE_MASK(x) (((x) & 0x1) << 4) +#define G_SQ_ALU_WORD1_OP2_WRITE_MASK(x) (((x) >> 4) & 0x1) +#define C_SQ_ALU_WORD1_OP2_WRITE_MASK 0xFFFFFFEF +#define S_SQ_ALU_WORD1_OP2_FOG_MERGE(x) (((x) & 0x1) << 5) +#define G_SQ_ALU_WORD1_OP2_FOG_MERGE(x) (((x) >> 5) & 0x1) +#define C_SQ_ALU_WORD1_OP2_FOG_MERGE 0xFFFFFFDF +#define S_SQ_ALU_WORD1_OP2_OMOD(x) (((x) & 0x3) << 6) +#define G_SQ_ALU_WORD1_OP2_OMOD(x) (((x) >> 6) & 0x3) +#define C_SQ_ALU_WORD1_OP2_OMOD 0xFFFFFF3F +#define S_SQ_ALU_WORD1_OP2_ALU_INST(x) (((x) & 0x3FF) << 8) +#define G_SQ_ALU_WORD1_OP2_ALU_INST(x) (((x) >> 8) & 0x3FF) +#define C_SQ_ALU_WORD1_OP2_ALU_INST 0xFFFC00FF +#define V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD 0x00000000 +#define V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL 0x00000001 +#define V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL_IEEE 0x00000002 +#define V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX 0x00000003 +#define V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN 0x00000004 +#define V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX_DX10 0x00000005 +#define V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN_DX10 0x00000006 +#define V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE 0x00000008 +#define V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT 0x00000009 +#define V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE 0x0000000A +#define V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE 0x0000000B +#define V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE_DX10 0x0000000C +#define V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT_DX10 0x0000000D +#define V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE_DX10 0x0000000E +#define V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE_DX10 0x0000000F +#define V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT 0x00000010 +#define V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC 0x00000011 +#define V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CEIL 0x00000012 +#define V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RNDNE 0x00000013 +#define V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR 0x00000014 +#define V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA 0x00000015 +#define V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_FLOOR 0x00000016 +#define V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT 0x00000018 +#define V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV 0x00000019 +#define V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP 0x0000001A +#define V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGT_UINT 0x0000001E +#define V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGE_UINT 0x0000001F +#define V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETE 0x00000020 +#define V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGT 0x00000021 +#define V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGE 0x00000022 +#define V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE 0x00000023 +#define V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SET_INV 0x00000024 +#define V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SET_POP 0x00000025 +#define V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SET_CLR 0x00000026 +#define V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SET_RESTORE 0x00000027 +#define V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETE_PUSH 0x00000028 +#define V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGT_PUSH 0x00000029 +#define V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGE_PUSH 0x0000002A +#define V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE_PUSH 0x0000002B +#define V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLE 0x0000002C +#define V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT 0x0000002D +#define V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGE 0x0000002E +#define V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLNE 0x0000002F +#define V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_AND_INT 0x00000030 +#define V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_OR_INT 0x00000031 +#define V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_XOR_INT 0x00000032 +#define V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOT_INT 0x00000033 +#define V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD_INT 0x00000034 +#define V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SUB_INT 0x00000035 +#define V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX_INT 0x00000036 +#define V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN_INT 0x00000037 +#define V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX_UINT 0x00000038 +#define V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN_UINT 0x00000039 +#define V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE_INT 0x0000003A +#define V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT_INT 0x0000003B +#define V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE_INT 0x0000003C +#define V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE_INT 0x0000003D +#define V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT_UINT 0x0000003E +#define V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE_UINT 0x0000003F +#define V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT_UINT 0x00000040 +#define V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGE_UINT 0x00000041 +#define V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETE_INT 0x00000042 +#define V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGT_INT 0x00000043 +#define V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGE_INT 0x00000044 +#define V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE_INT 0x00000045 +#define V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLE_INT 0x00000046 +#define V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT_INT 0x00000047 +#define V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGE_INT 0x00000048 +#define V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLNE_INT 0x00000049 +#define V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETE_PUSH_INT 0x0000004A +#define V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGT_PUSH_INT 0x0000004B +#define V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGE_PUSH_INT 0x0000004C +#define V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE_PUSH_INT 0x0000004D +#define V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETLT_PUSH_INT 0x0000004E +#define V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETLE_PUSH_INT 0x0000004F +#define V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4 0x00000050 +#define V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4_IEEE 0x00000051 +#define V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CUBE 0x00000052 +#define V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX4 0x00000053 +#define V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_GPR_INT 0x00000060 +#define V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE 0x00000061 +#define V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_CLAMPED 0x00000062 +#define V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE 0x00000063 +#define V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_CLAMPED 0x00000064 +#define V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_FF 0x00000065 +#define V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE 0x00000066 +#define V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_CLAMPED 0x00000067 +#define V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_FF 0x00000068 +#define V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_IEEE 0x00000069 +#define V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SQRT_IEEE 0x0000006A +#define V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT 0x0000006B +#define V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INT_TO_FLT 0x0000006C +#define V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_UINT_TO_FLT 0x0000006D +#define V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN 0x0000006E +#define V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS 0x0000006F +#define V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ASHR_INT 0x00000070 +#define V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LSHR_INT 0x00000071 +#define V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LSHL_INT 0x00000072 +#define V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULLO_INT 0x00000073 +#define V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULHI_INT 0x00000074 +#define V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULLO_UINT 0x00000075 +#define V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULHI_UINT 0x00000076 +#define V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_INT 0x00000077 +#define V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_UINT 0x00000078 +#define V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_UINT 0x00000079 +#define P_SQ_ALU_WORD1_OP3 +#define S_SQ_ALU_WORD1_OP3_SRC2_SEL(x) (((x) & 0x1FF) << 0) +#define G_SQ_ALU_WORD1_OP3_SRC2_SEL(x) (((x) >> 0) & 0x1FF) +#define C_SQ_ALU_WORD1_OP3_SRC2_SEL 0xFFFFFE00 +#define S_SQ_ALU_WORD1_OP3_SRC2_REL(x) (((x) & 0x1) << 9) +#define G_SQ_ALU_WORD1_OP3_SRC2_REL(x) (((x) >> 9) & 0x1) +#define C_SQ_ALU_WORD1_OP3_SRC2_REL 0xFFFFFDFF +#define S_SQ_ALU_WORD1_OP3_SRC2_CHAN(x) (((x) & 0x3) << 10) +#define G_SQ_ALU_WORD1_OP3_SRC2_CHAN(x) (((x) >> 10) & 0x3) +#define C_SQ_ALU_WORD1_OP3_SRC2_CHAN 0xFFFFF3FF +#define S_SQ_ALU_WORD1_OP3_SRC2_NEG(x) (((x) & 0x1) << 12) +#define G_SQ_ALU_WORD1_OP3_SRC2_NEG(x) (((x) >> 12) & 0x1) +#define C_SQ_ALU_WORD1_OP3_SRC2_NEG 0xFFFFEFFF +#define S_SQ_ALU_WORD1_OP3_ALU_INST(x) (((x) & 0x1F) << 13) +#define G_SQ_ALU_WORD1_OP3_ALU_INST(x) (((x) >> 13) & 0x1F) +#define C_SQ_ALU_WORD1_OP3_ALU_INST 0xFFFC1FFF +#define V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MUL_LIT 0x0000000C +#define V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MUL_LIT_M2 0x0000000D +#define V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MUL_LIT_M4 0x0000000E +#define V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MUL_LIT_D2 0x0000000F +#define V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD 0x00000010 +#define V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD_M2 0x00000011 +#define V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD_M4 0x00000012 +#define V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD_D2 0x00000013 +#define V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD_IEEE 0x00000014 +#define V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD_IEEE_M2 0x00000015 +#define V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD_IEEE_M4 0x00000016 +#define V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD_IEEE_D2 0x00000017 +#define V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDE 0x00000018 +#define V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGT 0x00000019 +#define V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGE 0x0000001A +#define V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDE_INT 0x0000001C +#define V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGT_INT 0x0000001D +#define V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGE_INT 0x0000001E +#define P_SQ_VTX_WORD0 +#define S_SQ_VTX_WORD0_VTX_INST(x) (((x) & 0x1F) << 0) +#define G_SQ_VTX_WORD0_VTX_INST(x) (((x) >> 0) & 0x1F) +#define C_SQ_VTX_WORD0_VTX_INST 0xFFFFFFE0 +#define S_SQ_VTX_WORD0_FETCH_TYPE(x) (((x) & 0x3) << 5) +#define G_SQ_VTX_WORD0_FETCH_TYPE(x) (((x) >> 5) & 0x3) +#define C_SQ_VTX_WORD0_FETCH_TYPE 0xFFFFFF9F +#define S_SQ_VTX_WORD0_FETCH_WHOLE_QUAD(x) (((x) & 0x1) << 7) +#define G_SQ_VTX_WORD0_FETCH_WHOLE_QUAD(x) (((x) >> 7) & 0x1) +#define C_SQ_VTX_WORD0_FETCH_WHOLE_QUAD 0xFFFFFF7F +#define S_SQ_VTX_WORD0_BUFFER_ID(x) (((x) & 0xFF) << 8) +#define G_SQ_VTX_WORD0_BUFFER_ID(x) (((x) >> 8) & 0xFF) +#define C_SQ_VTX_WORD0_BUFFER_ID 0xFFFF00FF +#define S_SQ_VTX_WORD0_SRC_GPR(x) (((x) & 0x7F) << 16) +#define G_SQ_VTX_WORD0_SRC_GPR(x) (((x) >> 16) & 0x7F) +#define C_SQ_VTX_WORD0_SRC_GPR 0xFF80FFFF +#define S_SQ_VTX_WORD0_SRC_REL(x) (((x) & 0x1) << 23) +#define G_SQ_VTX_WORD0_SRC_REL(x) (((x) >> 23) & 0x1) +#define C_SQ_VTX_WORD0_SRC_REL 0xFF7FFFFF +#define S_SQ_VTX_WORD0_SRC_SEL_X(x) (((x) & 0x3) << 24) +#define G_SQ_VTX_WORD0_SRC_SEL_X(x) (((x) >> 24) & 0x3) +#define C_SQ_VTX_WORD0_SRC_SEL_X 0xFCFFFFFF +#define S_SQ_VTX_WORD0_MEGA_FETCH_COUNT(x) (((x) & 0x3F) << 26) +#define G_SQ_VTX_WORD0_MEGA_FETCH_COUNT(x) (((x) >> 26) & 0x3F) +#define C_SQ_VTX_WORD0_MEGA_FETCH_COUNT 0x03FFFFFF +#define P_SQ_VTX_WORD1 +#define S_SQ_VTX_WORD1_DST_SEL_X(x) (((x) & 0x7) << 9) +#define G_SQ_VTX_WORD1_DST_SEL_X(x) (((x) >> 9) & 0x7) +#define C_SQ_VTX_WORD1_DST_SEL_X 0xFFFFF1FF +#define S_SQ_VTX_WORD1_DST_SEL_Y(x) (((x) & 0x7) << 12) +#define G_SQ_VTX_WORD1_DST_SEL_Y(x) (((x) >> 12) & 0x7) +#define C_SQ_VTX_WORD1_DST_SEL_Y 0xFFFF8FFF +#define S_SQ_VTX_WORD1_DST_SEL_Z(x) (((x) & 0x7) << 15) +#define G_SQ_VTX_WORD1_DST_SEL_Z(x) (((x) >> 15) & 0x7) +#define C_SQ_VTX_WORD1_DST_SEL_Z 0xFFFC7FFF +#define S_SQ_VTX_WORD1_DST_SEL_W(x) (((x) & 0x7) << 18) +#define G_SQ_VTX_WORD1_DST_SEL_W(x) (((x) >> 18) & 0x7) +#define C_SQ_VTX_WORD1_DST_SEL_W 0xFFE3FFFF +#define S_SQ_VTX_WORD1_USE_CONST_FIELDS(x) (((x) & 0x1) << 21) +#define G_SQ_VTX_WORD1_USE_CONST_FIELDS(x) (((x) >> 21) & 0x1) +#define C_SQ_VTX_WORD1_USE_CONST_FIELDS 0xFFDFFFFF +#define S_SQ_VTX_WORD1_DATA_FORMAT(x) (((x) & 0x3F) << 22) +#define G_SQ_VTX_WORD1_DATA_FORMAT(x) (((x) >> 22) & 0x3F) +#define C_SQ_VTX_WORD1_DATA_FORMAT 0xF03FFFFF +#define S_SQ_VTX_WORD1_NUM_FORMAT_ALL(x) (((x) & 0x3) << 28) +#define G_SQ_VTX_WORD1_NUM_FORMAT_ALL(x) (((x) >> 28) & 0x3) +#define C_SQ_VTX_WORD1_NUM_FORMAT_ALL 0xCFFFFFFF +#define S_SQ_VTX_WORD1_FORMAT_COMP_ALL(x) (((x) & 0x1) << 30) +#define G_SQ_VTX_WORD1_FORMAT_COMP_ALL(x) (((x) >> 30) & 0x1) +#define C_SQ_VTX_WORD1_FORMAT_COMP_ALL 0xBFFFFFFF +#define S_SQ_VTX_WORD1_SRF_MODE_ALL(x) (((x) & 0x1) << 31) +#define G_SQ_VTX_WORD1_SRF_MODE_ALL(x) (((x) >> 31) & 0x1) +#define C_SQ_VTX_WORD1_SRF_MODE_ALL 0x7FFFFFFF +#define P_SQ_VTX_WORD1_GPR +#define S_SQ_VTX_WORD1_GPR_DST_GPR(x) (((x) & 0x7F) << 0) +#define G_SQ_VTX_WORD1_GPR_DST_GPR(x) (((x) >> 0) & 0x7F) +#define C_SQ_VTX_WORD1_GPR_DST_GPR 0xFFFFFF80 +#define S_SQ_VTX_WORD1_GPR_DST_REL(x) (((x) & 0x1) << 7) +#define G_SQ_VTX_WORD1_GPR_DST_REL(x) (((x) >> 7) & 0x1) +#define C_SQ_VTX_WORD1_GPR_DST_REL 0xFFFFFF7F +#define P_SQ_VTX_WORD1_SEM +#define S_SQ_VTX_WORD1_SEM_SEMANTIC_ID(x) (((x) & 0xFF) << 0) +#define G_SQ_VTX_WORD1_SEM_SEMANTIC_ID(x) (((x) >> 0) & 0xFF) +#define C_SQ_VTX_WORD1_SEM_SEMANTIC_ID 0xFFFFFF00 +#define P_SQ_VTX_WORD2 +#define S_SQ_VTX_WORD2_OFFSET(x) (((x) & 0xFFFF) << 0) +#define G_SQ_VTX_WORD2_OFFSET(x) (((x) >> 0) & 0xFFFF) +#define C_SQ_VTX_WORD2_OFFSET 0xFFFF0000 +#define S_SQ_VTX_WORD2_ENDIAN_SWAP(x) (((x) & 0x3) << 16) +#define G_SQ_VTX_WORD2_ENDIAN_SWAP(x) (((x) >> 16) & 0x3) +#define C_SQ_VTX_WORD2_ENDIAN_SWAP 0xFFFCFFFF +#define S_SQ_VTX_WORD2_CONST_BUF_NO_STRIDE(x) (((x) & 0x1) << 18) +#define G_SQ_VTX_WORD2_CONST_BUF_NO_STRIDE(x) (((x) >> 18) & 0x1) +#define C_SQ_VTX_WORD2_CONST_BUF_NO_STRIDE 0xFFFBFFFF +#define S_SQ_VTX_WORD2_MEGA_FETCH(x) (((x) & 0x1) << 19) +#define G_SQ_VTX_WORD2_MEGA_FETCH(x) (((x) >> 19) & 0x1) +#define C_SQ_VTX_WORD2_MEGA_FETCH 0xFFF7FFFF +#define S_SQ_VTX_WORD2_ALT_CONST(x) (((x) & 0x1) << 20) +#define G_SQ_VTX_WORD2_ALT_CONST(x) (((x) >> 20) & 0x1) +#define C_SQ_VTX_WORD2_ALT_CONST 0xFFEFFFFF +#define P_SQ_TEX_WORD0 +#define S_SQ_TEX_WORD0_TEX_INST(x) (((x) & 0x1F) << 0) +#define G_SQ_TEX_WORD0_TEX_INST(x) (((x) >> 0) & 0x1F) +#define C_SQ_TEX_WORD0_TEX_INST 0xFFFFFFE0 +#define S_SQ_TEX_WORD0_BC_FRAC_MODE(x) (((x) & 0x1) << 5) +#define G_SQ_TEX_WORD0_BC_FRAC_MODE(x) (((x) >> 5) & 0x1) +#define C_SQ_TEX_WORD0_BC_FRAC_MODE 0xFFFFFFDF +#define S_SQ_TEX_WORD0_FETCH_WHOLE_QUAD(x) (((x) & 0x1) << 7) +#define G_SQ_TEX_WORD0_FETCH_WHOLE_QUAD(x) (((x) >> 7) & 0x1) +#define C_SQ_TEX_WORD0_FETCH_WHOLE_QUAD 0xFFFFFF7F +#define S_SQ_TEX_WORD0_RESOURCE_ID(x) (((x) & 0xFF) << 8) +#define G_SQ_TEX_WORD0_RESOURCE_ID(x) (((x) >> 8) & 0xFF) +#define C_SQ_TEX_WORD0_RESOURCE_ID 0xFFFF00FF +#define S_SQ_TEX_WORD0_SRC_GPR(x) (((x) & 0x7F) << 16) +#define G_SQ_TEX_WORD0_SRC_GPR(x) (((x) >> 16) & 0x7F) +#define C_SQ_TEX_WORD0_SRC_GPR 0xFF80FFFF +#define S_SQ_TEX_WORD0_SRC_REL(x) (((x) & 0x1) << 23) +#define G_SQ_TEX_WORD0_SRC_REL(x) (((x) >> 23) & 0x1) +#define C_SQ_TEX_WORD0_SRC_REL 0xFF7FFFFF +#define S_SQ_TEX_WORD0_ALT_CONST(x) (((x) & 0x1) << 24) +#define G_SQ_TEX_WORD0_ALT_CONST(x) (((x) >> 24) & 0x1) +#define C_SQ_TEX_WORD0_ALT_CONST 0xFEFFFFFF +#define P_SQ_TEX_WORD1 +#define S_SQ_TEX_WORD1_DST_GPR(x) (((x) & 0x7F) << 0) +#define G_SQ_TEX_WORD1_DST_GPR(x) (((x) >> 0) & 0x7F) +#define C_SQ_TEX_WORD1_DST_GPR 0xFFFFFF80 +#define S_SQ_TEX_WORD1_DST_REL(x) (((x) & 0x1) << 7) +#define G_SQ_TEX_WORD1_DST_REL(x) (((x) >> 7) & 0x1) +#define C_SQ_TEX_WORD1_DST_REL 0xFFFFFF7F +#define S_SQ_TEX_WORD1_DST_SEL_X(x) (((x) & 0x7) << 9) +#define G_SQ_TEX_WORD1_DST_SEL_X(x) (((x) >> 9) & 0x7) +#define C_SQ_TEX_WORD1_DST_SEL_X 0xFFFFF1FF +#define S_SQ_TEX_WORD1_DST_SEL_Y(x) (((x) & 0x7) << 12) +#define G_SQ_TEX_WORD1_DST_SEL_Y(x) (((x) >> 12) & 0x7) +#define C_SQ_TEX_WORD1_DST_SEL_Y 0xFFFF8FFF +#define S_SQ_TEX_WORD1_DST_SEL_Z(x) (((x) & 0x7) << 15) +#define G_SQ_TEX_WORD1_DST_SEL_Z(x) (((x) >> 15) & 0x7) +#define C_SQ_TEX_WORD1_DST_SEL_Z 0xFFFC7FFF +#define S_SQ_TEX_WORD1_DST_SEL_W(x) (((x) & 0x7) << 18) +#define G_SQ_TEX_WORD1_DST_SEL_W(x) (((x) >> 18) & 0x7) +#define C_SQ_TEX_WORD1_DST_SEL_W 0xFFE3FFFF +#define S_SQ_TEX_WORD1_LOD_BIAS(x) (((x) & 0x7F) << 21) +#define G_SQ_TEX_WORD1_LOD_BIAS(x) (((x) >> 21) & 0x7F) +#define C_SQ_TEX_WORD1_LOD_BIAS 0xF01FFFFF +#define S_SQ_TEX_WORD1_COORD_TYPE_X(x) (((x) & 0x1) << 28) +#define G_SQ_TEX_WORD1_COORD_TYPE_X(x) (((x) >> 28) & 0x1) +#define C_SQ_TEX_WORD1_COORD_TYPE_X 0xEFFFFFFF +#define S_SQ_TEX_WORD1_COORD_TYPE_Y(x) (((x) & 0x1) << 29) +#define G_SQ_TEX_WORD1_COORD_TYPE_Y(x) (((x) >> 29) & 0x1) +#define C_SQ_TEX_WORD1_COORD_TYPE_Y 0xDFFFFFFF +#define S_SQ_TEX_WORD1_COORD_TYPE_Z(x) (((x) & 0x1) << 30) +#define G_SQ_TEX_WORD1_COORD_TYPE_Z(x) (((x) >> 30) & 0x1) +#define C_SQ_TEX_WORD1_COORD_TYPE_Z 0xBFFFFFFF +#define S_SQ_TEX_WORD1_COORD_TYPE_W(x) (((x) & 0x1) << 31) +#define G_SQ_TEX_WORD1_COORD_TYPE_W(x) (((x) >> 31) & 0x1) +#define C_SQ_TEX_WORD1_COORD_TYPE_W 0x7FFFFFFF +#define P_SQ_TEX_WORD2 +#define S_SQ_TEX_WORD2_OFFSET_X(x) (((x) & 0x1F) << 0) +#define G_SQ_TEX_WORD2_OFFSET_X(x) (((x) >> 0) & 0x1F) +#define C_SQ_TEX_WORD2_OFFSET_X 0xFFFFFFE0 +#define S_SQ_TEX_WORD2_OFFSET_Y(x) (((x) & 0x1F) << 5) +#define G_SQ_TEX_WORD2_OFFSET_Y(x) (((x) >> 5) & 0x1F) +#define C_SQ_TEX_WORD2_OFFSET_Y 0xFFFFFC1F +#define S_SQ_TEX_WORD2_OFFSET_Z(x) (((x) & 0x1F) << 10) +#define G_SQ_TEX_WORD2_OFFSET_Z(x) (((x) >> 10) & 0x1F) +#define C_SQ_TEX_WORD2_OFFSET_Z 0xFFFF83FF +#define S_SQ_TEX_WORD2_SAMPLER_ID(x) (((x) & 0x1F) << 15) +#define G_SQ_TEX_WORD2_SAMPLER_ID(x) (((x) >> 15) & 0x1F) +#define C_SQ_TEX_WORD2_SAMPLER_ID 0xFFF07FFF +#define S_SQ_TEX_WORD2_SRC_SEL_X(x) (((x) & 0x7) << 20) +#define G_SQ_TEX_WORD2_SRC_SEL_X(x) (((x) >> 20) & 0x7) +#define C_SQ_TEX_WORD2_SRC_SEL_X 0xFF8FFFFF +#define S_SQ_TEX_WORD2_SRC_SEL_Y(x) (((x) & 0x7) << 23) +#define G_SQ_TEX_WORD2_SRC_SEL_Y(x) (((x) >> 23) & 0x7) +#define C_SQ_TEX_WORD2_SRC_SEL_Y 0xFC7FFFFF +#define S_SQ_TEX_WORD2_SRC_SEL_Z(x) (((x) & 0x7) << 26) +#define G_SQ_TEX_WORD2_SRC_SEL_Z(x) (((x) >> 26) & 0x7) +#define C_SQ_TEX_WORD2_SRC_SEL_Z 0xE3FFFFFF +#define S_SQ_TEX_WORD2_SRC_SEL_W(x) (((x) & 0x7) << 29) +#define G_SQ_TEX_WORD2_SRC_SEL_W(x) (((x) >> 29) & 0x7) +#define C_SQ_TEX_WORD2_SRC_SEL_W 0x1FFFFFFF +#define P_SQ_ALU_WORD1_OP2_V2 +#define S_SQ_ALU_WORD1_OP2_V2_SRC0_ABS(x) (((x) & 0x1) << 0) +#define G_SQ_ALU_WORD1_OP2_V2_SRC0_ABS(x) (((x) >> 0) & 0x1) +#define C_SQ_ALU_WORD1_OP2_V2_SRC0_ABS 0xFFFFFFFE +#define S_SQ_ALU_WORD1_OP2_V2_SRC1_ABS(x) (((x) & 0x1) << 1) +#define G_SQ_ALU_WORD1_OP2_V2_SRC1_ABS(x) (((x) >> 1) & 0x1) +#define C_SQ_ALU_WORD1_OP2_V2_SRC1_ABS 0xFFFFFFFD +#define S_SQ_ALU_WORD1_OP2_V2_UPDATE_EXECUTE_MASK(x) (((x) & 0x1) << 2) +#define G_SQ_ALU_WORD1_OP2_V2_UPDATE_EXECUTE_MASK(x) (((x) >> 2) & 0x1) +#define C_SQ_ALU_WORD1_OP2_V2_UPDATE_EXECUTE_MASK 0xFFFFFFFB +#define S_SQ_ALU_WORD1_OP2_V2_UPDATE_PRED(x) (((x) & 0x1) << 3) +#define G_SQ_ALU_WORD1_OP2_V2_UPDATE_PRED(x) (((x) >> 3) & 0x1) +#define C_SQ_ALU_WORD1_OP2_V2_UPDATE_PRED 0xFFFFFFF7 +#define S_SQ_ALU_WORD1_OP2_V2_WRITE_MASK(x) (((x) & 0x1) << 4) +#define G_SQ_ALU_WORD1_OP2_V2_WRITE_MASK(x) (((x) >> 4) & 0x1) +#define C_SQ_ALU_WORD1_OP2_V2_WRITE_MASK 0xFFFFFFEF +#define S_SQ_ALU_WORD1_OP2_V2_OMOD(x) (((x) & 0x3) << 5) +#define G_SQ_ALU_WORD1_OP2_V2_OMOD(x) (((x) >> 5) & 0x3) +#define C_SQ_ALU_WORD1_OP2_V2_OMOD 0xFFFFFF9F +#define S_SQ_ALU_WORD1_OP2_V2_ALU_INST(x) (((x) & 0x7FF) << 7) +#define G_SQ_ALU_WORD1_OP2_V2_ALU_INST(x) (((x) >> 7) & 0x7FF) +#define C_SQ_ALU_WORD1_OP2_V2_ALU_INST 0xFFFC007F + +#endif diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c new file mode 100644 index 00000000000..ff574b82855 --- /dev/null +++ b/src/gallium/drivers/r600/r600_state.c @@ -0,0 +1,484 @@ +/* + * Copyright 2010 Jerome Glisse <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: + * Jerome Glisse + */ +#include <stdio.h> +#include <util/u_inlines.h> +#include <util/u_format.h> +#include <util/u_memory.h> +#include "r600_screen.h" +#include "r600_texture.h" +#include "r600_context.h" +#include "r600d.h" + + +static void r600_delete_state(struct pipe_context *ctx, void *state) +{ + struct radeon_state *rstate = state; + + radeon_state_decref(rstate); +} + +static void *r600_create_blend_state(struct pipe_context *ctx, + const struct pipe_blend_state *state) +{ + struct r600_screen *rscreen = (struct r600_screen*)ctx->screen; + struct radeon_state *rstate; + + rstate = radeon_state(rscreen->rw, R600_BLEND_TYPE, R600_BLEND); + if (rstate == NULL) + return NULL; + rstate->states[R600_BLEND__CB_BLEND_RED] = 0x00000000; + rstate->states[R600_BLEND__CB_BLEND_GREEN] = 0x00000000; + rstate->states[R600_BLEND__CB_BLEND_BLUE] = 0x00000000; + rstate->states[R600_BLEND__CB_BLEND_ALPHA] = 0x00000000; + rstate->states[R600_BLEND__CB_BLEND0_CONTROL] = 0x00010001; + rstate->states[R600_BLEND__CB_BLEND1_CONTROL] = 0x00000000; + rstate->states[R600_BLEND__CB_BLEND2_CONTROL] = 0x00000000; + rstate->states[R600_BLEND__CB_BLEND3_CONTROL] = 0x00000000; + rstate->states[R600_BLEND__CB_BLEND4_CONTROL] = 0x00000000; + rstate->states[R600_BLEND__CB_BLEND5_CONTROL] = 0x00000000; + rstate->states[R600_BLEND__CB_BLEND6_CONTROL] = 0x00000000; + rstate->states[R600_BLEND__CB_BLEND7_CONTROL] = 0x00000000; + rstate->states[R600_BLEND__CB_BLEND_CONTROL] = 0x00000000; + if (radeon_state_pm4(rstate)) { + radeon_state_decref(rstate); + return NULL; + } + return rstate; +} + +static void r600_bind_blend_state(struct pipe_context *ctx, void *state) +{ + struct r600_context *rctx = (struct r600_context*)ctx; + radeon_draw_set(rctx->draw, state); +} + +static void r600_set_blend_color(struct pipe_context *ctx, + const struct pipe_blend_color *color) +{ +} + +static void r600_set_clip_state(struct pipe_context *ctx, + const struct pipe_clip_state *state) +{ +} + +static void r600_set_framebuffer_state(struct pipe_context *ctx, + const struct pipe_framebuffer_state *state) +{ + struct r600_screen *rscreen = (struct r600_screen*)ctx->screen; + struct r600_context *rctx = (struct r600_context*)ctx; + struct r600_texture *rtex; + struct r600_buffer *rbuffer; + struct radeon_state *rstate; + unsigned level = state->cbufs[0]->level; + unsigned pitch, slice; + + rstate = radeon_state(rscreen->rw, R600_CB0_TYPE, R600_CB0); + if (rstate == NULL) + return; + rtex = (struct r600_texture*)state->cbufs[0]->texture; + rbuffer = (struct r600_buffer*)rtex->buffer; + rstate->bo[0] = radeon_bo_incref(rscreen->rw, rbuffer->bo); + rstate->bo[1] = radeon_bo_incref(rscreen->rw, rbuffer->bo); + rstate->bo[2] = radeon_bo_incref(rscreen->rw, rbuffer->bo); + rstate->placement[0] = RADEON_GEM_DOMAIN_GTT; + rstate->placement[2] = RADEON_GEM_DOMAIN_GTT; + rstate->placement[4] = RADEON_GEM_DOMAIN_GTT; + rstate->nbo = 3; + pitch = rtex->pitch[level] / 8 - 1; + slice = rtex->pitch[level] * state->cbufs[0]->height / 64 - 1; + rstate->states[R600_CB0__CB_COLOR0_BASE] = 0x00000000; + rstate->states[R600_CB0__CB_COLOR0_INFO] = 0x08110068; + rstate->states[R600_CB0__CB_COLOR0_SIZE] = S_028060_PITCH_TILE_MAX(pitch) | + S_028060_SLICE_TILE_MAX(slice); + rstate->states[R600_CB0__CB_COLOR0_VIEW] = 0x00000000; + rstate->states[R600_CB0__CB_COLOR0_FRAG] = 0x00000000; + rstate->states[R600_CB0__CB_COLOR0_TILE] = 0x00000000; + rstate->states[R600_CB0__CB_COLOR0_MASK] = 0x00000000; + if (radeon_state_pm4(rstate)) { + radeon_state_decref(rstate); + return; + } + radeon_draw_set_new(rctx->draw, rstate); + rctx->db = radeon_state_decref(rctx->db); + rctx->db = radeon_state(rscreen->rw, R600_DB_TYPE, R600_DB); + rctx->db->bo[0] = radeon_bo_incref(rscreen->rw, rstate->bo[0]); + rctx->db->nbo = 1; + rctx->db->placement[0] = RADEON_GEM_DOMAIN_GTT; + rctx->fb_state = *state; +} + +static void *r600_create_fs_state(struct pipe_context *ctx, + const struct pipe_shader_state *shader) +{ + return r600_pipe_shader_create(ctx, C_PROGRAM_TYPE_FS, shader->tokens); +} + +static void r600_bind_fs_state(struct pipe_context *ctx, void *state) +{ + struct r600_context *rctx = (struct r600_context*)ctx; + + rctx->ps_shader = state; +} + +static void *r600_create_vs_state(struct pipe_context *ctx, + const struct pipe_shader_state *shader) +{ + return r600_pipe_shader_create(ctx, C_PROGRAM_TYPE_VS, shader->tokens); +} + +static void r600_bind_vs_state(struct pipe_context *ctx, void *state) +{ + struct r600_context *rctx = (struct r600_context*)ctx; + + rctx->vs_shader = state; +} + +static void r600_set_polygon_stipple(struct pipe_context *ctx, + const struct pipe_poly_stipple *state) +{ +} + +static void *r600_create_rs_state(struct pipe_context *ctx, + const struct pipe_rasterizer_state *state) +{ + struct r600_screen *rscreen = (struct r600_screen*)ctx->screen; + struct r600_context *rctx = (struct r600_context*)ctx; + struct radeon_state *rstate; + + rctx->flat_shade = state->flatshade; + rstate = radeon_state(rscreen->rw, R600_RASTERIZER_TYPE, R600_RASTERIZER); + if (rstate == NULL) + return NULL; + rstate->states[R600_RASTERIZER__SPI_INTERP_CONTROL_0] = 0x00000001; + rstate->states[R600_RASTERIZER__PA_CL_CLIP_CNTL] = 0x00000000; + rstate->states[R600_RASTERIZER__PA_SU_SC_MODE_CNTL] = 0x00080000; + rstate->states[R600_RASTERIZER__PA_CL_VS_OUT_CNTL] = 0x00000000; + rstate->states[R600_RASTERIZER__PA_CL_NANINF_CNTL] = 0x00000000; + rstate->states[R600_RASTERIZER__PA_SU_POINT_SIZE] = 0x00080008; + rstate->states[R600_RASTERIZER__PA_SU_POINT_MINMAX] = 0x00000000; + rstate->states[R600_RASTERIZER__PA_SU_LINE_CNTL] = 0x00000008; + rstate->states[R600_RASTERIZER__PA_SC_LINE_STIPPLE] = 0x00000005; + rstate->states[R600_RASTERIZER__PA_SC_MPASS_PS_CNTL] = 0x00000000; + rstate->states[R600_RASTERIZER__PA_SC_LINE_CNTL] = 0x00000400; + rstate->states[R600_RASTERIZER__PA_CL_GB_VERT_CLIP_ADJ] = 0x3F800000; + rstate->states[R600_RASTERIZER__PA_CL_GB_VERT_DISC_ADJ] = 0x3F800000; + rstate->states[R600_RASTERIZER__PA_CL_GB_HORZ_CLIP_ADJ] = 0x3F800000; + rstate->states[R600_RASTERIZER__PA_CL_GB_HORZ_DISC_ADJ] = 0x3F800000; + rstate->states[R600_RASTERIZER__PA_SU_POLY_OFFSET_DB_FMT_CNTL] = 0x00000000; + rstate->states[R600_RASTERIZER__PA_SU_POLY_OFFSET_CLAMP] = 0x00000000; + rstate->states[R600_RASTERIZER__PA_SU_POLY_OFFSET_FRONT_SCALE] = 0x00000000; + rstate->states[R600_RASTERIZER__PA_SU_POLY_OFFSET_FRONT_OFFSET] = 0x00000000; + rstate->states[R600_RASTERIZER__PA_SU_POLY_OFFSET_BACK_SCALE] = 0x00000000; + rstate->states[R600_RASTERIZER__PA_SU_POLY_OFFSET_BACK_OFFSET] = 0x00000000; + if (radeon_state_pm4(rstate)) { + radeon_state_decref(rstate); + return NULL; + } + return rstate; +} + +static void r600_bind_rs_state(struct pipe_context *ctx, void *state) +{ + struct r600_context *rctx = (struct r600_context*)ctx; + radeon_draw_set(rctx->draw, state); +} + +static void *r600_create_sampler_state(struct pipe_context *ctx, + const struct pipe_sampler_state *state) +{ + return NULL; +} + +static void r600_bind_sampler_states(struct pipe_context *ctx, + unsigned count, void **states) +{ +} + +static struct pipe_sampler_view *r600_create_sampler_view(struct pipe_context *ctx, + struct pipe_resource *texture, + const struct pipe_sampler_view *templ) +{ + struct pipe_sampler_view *view = CALLOC_STRUCT(pipe_sampler_view); + + *view = *templ; + return view; +} + +static void r600_sampler_view_destroy(struct pipe_context *ctx, + struct pipe_sampler_view *view) +{ + FREE(view); +} + +static void r600_set_fragment_sampler_views(struct pipe_context *ctx, + unsigned count, + struct pipe_sampler_view **views) +{ +} + +static void r600_set_vertex_sampler_views(struct pipe_context *ctx, + unsigned count, + struct pipe_sampler_view **views) +{ +} + +static void r600_set_scissor_state(struct pipe_context *ctx, + const struct pipe_scissor_state *state) +{ + struct r600_screen *rscreen = (struct r600_screen*)ctx->screen; + struct r600_context *rctx = (struct r600_context*)ctx; + struct radeon_state *rstate; + u32 tl, br; + + tl = S_028240_TL_X(state->minx) | S_028240_TL_Y(state->miny) | S_028240_WINDOW_OFFSET_DISABLE(1); + br = S_028244_BR_X(state->maxx) | S_028244_BR_Y(state->maxy); + rstate = radeon_state(rscreen->rw, R600_SCISSOR_TYPE, R600_SCISSOR); + if (rstate == NULL) + return; + rstate->states[R600_SCISSOR__PA_SC_SCREEN_SCISSOR_TL] = tl; + rstate->states[R600_SCISSOR__PA_SC_SCREEN_SCISSOR_BR] = br; + rstate->states[R600_SCISSOR__PA_SC_WINDOW_OFFSET] = 0x00000000; + rstate->states[R600_SCISSOR__PA_SC_WINDOW_SCISSOR_TL] = tl; + rstate->states[R600_SCISSOR__PA_SC_WINDOW_SCISSOR_BR] = br; + rstate->states[R600_SCISSOR__PA_SC_CLIPRECT_RULE] = 0x0000FFFF; + rstate->states[R600_SCISSOR__PA_SC_CLIPRECT_0_TL] = tl; + rstate->states[R600_SCISSOR__PA_SC_CLIPRECT_0_BR] = br; + rstate->states[R600_SCISSOR__PA_SC_CLIPRECT_1_TL] = tl; + rstate->states[R600_SCISSOR__PA_SC_CLIPRECT_1_BR] = br; + rstate->states[R600_SCISSOR__PA_SC_CLIPRECT_2_TL] = tl; + rstate->states[R600_SCISSOR__PA_SC_CLIPRECT_2_BR] = br; + rstate->states[R600_SCISSOR__PA_SC_CLIPRECT_3_TL] = tl; + rstate->states[R600_SCISSOR__PA_SC_CLIPRECT_3_BR] = br; + rstate->states[R600_SCISSOR__PA_SC_EDGERULE] = 0xAAAAAAAA; + rstate->states[R600_SCISSOR__PA_SC_GENERIC_SCISSOR_TL] = tl; + rstate->states[R600_SCISSOR__PA_SC_GENERIC_SCISSOR_BR] = br; + rstate->states[R600_SCISSOR__PA_SC_VPORT_SCISSOR_0_TL] = tl; + rstate->states[R600_SCISSOR__PA_SC_VPORT_SCISSOR_0_BR] = br; + if (radeon_state_pm4(rstate)) { + radeon_state_decref(rstate); + return; + } + radeon_draw_set_new(rctx->draw, rstate); +} + +static void r600_set_viewport_state(struct pipe_context *ctx, + const struct pipe_viewport_state *state) +{ + struct r600_screen *rscreen = (struct r600_screen*)ctx->screen; + struct r600_context *rctx = (struct r600_context*)ctx; + struct radeon_state *rstate; + + rstate = radeon_state(rscreen->rw, R600_VIEWPORT_TYPE, R600_VIEWPORT); + if (rstate == NULL) + return; + rstate->states[R600_VIEWPORT__PA_SC_VPORT_ZMIN_0] = 0x00000000; + rstate->states[R600_VIEWPORT__PA_SC_VPORT_ZMAX_0] = 0x3F800000; + rstate->states[R600_VIEWPORT__PA_CL_VPORT_XSCALE_0] = r600_float_to_u32(state->scale[0]); + rstate->states[R600_VIEWPORT__PA_CL_VPORT_YSCALE_0] = r600_float_to_u32(state->scale[1]); + rstate->states[R600_VIEWPORT__PA_CL_VPORT_ZSCALE_0] = r600_float_to_u32(state->scale[2]); + rstate->states[R600_VIEWPORT__PA_CL_VPORT_XOFFSET_0] = r600_float_to_u32(state->translate[0]); + rstate->states[R600_VIEWPORT__PA_CL_VPORT_YOFFSET_0] = r600_float_to_u32(state->translate[1]); + rstate->states[R600_VIEWPORT__PA_CL_VPORT_ZOFFSET_0] = r600_float_to_u32(state->translate[2]); + rstate->states[R600_VIEWPORT__PA_CL_VTE_CNTL] = 0x0000043F; + if (radeon_state_pm4(rstate)) { + radeon_state_decref(rstate); + return; + } + radeon_draw_set_new(rctx->draw, rstate); + rctx->viewport = *state; +} + +static void r600_set_vertex_buffers(struct pipe_context *ctx, + unsigned count, + const struct pipe_vertex_buffer *buffers) +{ + struct r600_context *rctx = (struct r600_context*)ctx; + + memcpy(rctx->vertex_buffer, buffers, sizeof(struct pipe_vertex_buffer) * count); + rctx->nvertex_buffer = count; +} + + +static void *r600_create_vertex_elements_state(struct pipe_context *ctx, + unsigned count, + const struct pipe_vertex_element *elements) +{ + struct r600_vertex_elements_state *v = CALLOC_STRUCT(r600_vertex_elements_state); + + assert(count < 32); + v->count = count; + memcpy(v->elements, elements, count * sizeof(struct pipe_vertex_element)); + return v; +} + +static void r600_bind_vertex_elements_state(struct pipe_context *ctx, void *state) +{ + struct r600_context *rctx = (struct r600_context*)ctx; + struct r600_vertex_elements_state *v = (struct r600_vertex_elements_state*)state; + + rctx->vertex_elements = v; +} + +static void r600_delete_vertex_elements_state(struct pipe_context *ctx, void *state) +{ + FREE(state); +} + +static void *r600_create_dsa_state(struct pipe_context *ctx, + const struct pipe_depth_stencil_alpha_state *state) +{ + struct r600_screen *rscreen = (struct r600_screen*)ctx->screen; + struct radeon_state *rstate; + + rstate = radeon_state(rscreen->rw, R600_DSA_TYPE, R600_DSA); + if (rstate == NULL) + return NULL; + rstate->states[R600_DSA__DB_STENCIL_CLEAR] = 0x00000000; + rstate->states[R600_DSA__DB_DEPTH_CLEAR] = 0x3F800000; + rstate->states[R600_DSA__SX_ALPHA_TEST_CONTROL] = 0x00000000; + rstate->states[R600_DSA__DB_STENCILREFMASK] = 0xFFFFFF00; + rstate->states[R600_DSA__DB_STENCILREFMASK_BF] = 0xFFFFFF00; + rstate->states[R600_DSA__SX_ALPHA_REF] = 0x00000000; + rstate->states[R600_DSA__SPI_FOG_FUNC_SCALE] = 0x00000000; + rstate->states[R600_DSA__SPI_FOG_FUNC_BIAS] = 0x00000000; + rstate->states[R600_DSA__SPI_FOG_CNTL] = 0x00000000; + rstate->states[R600_DSA__DB_DEPTH_CONTROL] = 0x00700700; + rstate->states[R600_DSA__DB_SHADER_CONTROL] = 0x00000210; + rstate->states[R600_DSA__DB_RENDER_CONTROL] = 0x00000060; + rstate->states[R600_DSA__DB_RENDER_OVERRIDE] = 0x0000002A; + rstate->states[R600_DSA__DB_SRESULTS_COMPARE_STATE1] = 0x00000000; + rstate->states[R600_DSA__DB_PRELOAD_CONTROL] = 0x00000000; + rstate->states[R600_DSA__DB_ALPHA_TO_MASK] = 0x0000AA00; + if (radeon_state_pm4(rstate)) { + radeon_state_decref(rstate); + return NULL; + } + return rstate; +} + +static void r600_bind_dsa_state(struct pipe_context *ctx, void *state) +{ + struct r600_context *rctx = (struct r600_context*)ctx; + radeon_draw_set(rctx->draw, state); +} + +static void r600_set_constant_buffer(struct pipe_context *ctx, + uint shader, uint index, + struct pipe_resource *buffer) +{ + struct r600_screen *rscreen = (struct r600_screen*)ctx->screen; + struct r600_context *rctx = (struct r600_context*)ctx; + unsigned nconstant = 0, i, type, id; + struct radeon_state *rstate; + struct pipe_transfer *transfer; + u32 *ptr; + + switch (shader) { + case PIPE_SHADER_VERTEX: + id = R600_VS_CONSTANT; + type = R600_VS_CONSTANT_TYPE; + break; + case PIPE_SHADER_FRAGMENT: + id = R600_PS_CONSTANT; + type = R600_PS_CONSTANT_TYPE; + break; + default: + fprintf(stderr, "%s:%d unsupported %d\n", __func__, __LINE__, shader); + return; + } + if (buffer && buffer->width0 > 0) { + nconstant = buffer->width0 / 16; + ptr = pipe_buffer_map(ctx, buffer, PIPE_TRANSFER_READ, &transfer); + if (ptr == NULL) + return; + for (i = 0; i < nconstant; i++) { + rstate = radeon_state(rscreen->rw, type, id + i); + if (rstate == NULL) + return; + rstate->states[R600_PS_CONSTANT__SQ_ALU_CONSTANT0_0] = ptr[i * 4 + 0]; + rstate->states[R600_PS_CONSTANT__SQ_ALU_CONSTANT1_0] = ptr[i * 4 + 1]; + rstate->states[R600_PS_CONSTANT__SQ_ALU_CONSTANT2_0] = ptr[i * 4 + 2]; + rstate->states[R600_PS_CONSTANT__SQ_ALU_CONSTANT3_0] = ptr[i * 4 + 3]; + if (radeon_state_pm4(rstate)) + return; + if (radeon_draw_set_new(rctx->draw, rstate)) + return; + } + pipe_buffer_unmap(ctx, buffer, transfer); + } +} + +static void r600_set_stencil_ref(struct pipe_context *ctx, + const struct pipe_stencil_ref *sr) +{ + struct r600_context *rctx = (struct r600_context*)ctx; + rctx->stencil_ref = *sr; +} + +static void r600_set_sample_mask(struct pipe_context *pipe, unsigned sample_mask) +{ +} + +void r600_init_state_functions(struct r600_context *rctx) +{ + rctx->context.set_sample_mask = r600_set_sample_mask; + rctx->context.create_blend_state = r600_create_blend_state; + rctx->context.bind_blend_state = r600_bind_blend_state; + rctx->context.delete_blend_state = r600_delete_state; + rctx->context.set_blend_color = r600_set_blend_color; + rctx->context.set_clip_state = r600_set_clip_state; + rctx->context.set_constant_buffer = r600_set_constant_buffer; + rctx->context.create_depth_stencil_alpha_state = r600_create_dsa_state; + rctx->context.bind_depth_stencil_alpha_state = r600_bind_dsa_state; + rctx->context.delete_depth_stencil_alpha_state = r600_delete_state; + rctx->context.set_framebuffer_state = r600_set_framebuffer_state; + rctx->context.create_fs_state = r600_create_fs_state; + rctx->context.bind_fs_state = r600_bind_fs_state; + rctx->context.delete_fs_state = r600_delete_state; + rctx->context.set_polygon_stipple = r600_set_polygon_stipple; + rctx->context.create_rasterizer_state = r600_create_rs_state; + rctx->context.bind_rasterizer_state = r600_bind_rs_state; + rctx->context.delete_rasterizer_state = r600_delete_state; + rctx->context.create_sampler_state = r600_create_sampler_state; + rctx->context.bind_fragment_sampler_states = r600_bind_sampler_states; + rctx->context.bind_vertex_sampler_states = r600_bind_sampler_states; + rctx->context.delete_sampler_state = r600_delete_state; + rctx->context.create_sampler_view = r600_create_sampler_view; + rctx->context.sampler_view_destroy = r600_sampler_view_destroy; + rctx->context.set_fragment_sampler_views = r600_set_fragment_sampler_views; + rctx->context.set_vertex_sampler_views = r600_set_vertex_sampler_views; + rctx->context.set_scissor_state = r600_set_scissor_state; + rctx->context.set_viewport_state = r600_set_viewport_state; + rctx->context.set_vertex_buffers = r600_set_vertex_buffers; + rctx->context.create_vertex_elements_state = r600_create_vertex_elements_state; + rctx->context.bind_vertex_elements_state = r600_bind_vertex_elements_state; + rctx->context.delete_vertex_elements_state = r600_delete_vertex_elements_state; + rctx->context.create_vs_state = r600_create_vs_state; + rctx->context.bind_vs_state = r600_bind_vs_state; + rctx->context.delete_vs_state = r600_delete_state; + rctx->context.set_stencil_ref = r600_set_stencil_ref; +} diff --git a/src/gallium/drivers/r600/r600_texture.c b/src/gallium/drivers/r600/r600_texture.c new file mode 100644 index 00000000000..7d94bbe510f --- /dev/null +++ b/src/gallium/drivers/r600/r600_texture.c @@ -0,0 +1,231 @@ +/* + * Copyright 2010 Jerome Glisse <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: + * Jerome Glisse + * Corbin Simpson + */ +#include <pipe/p_screen.h> +#include <util/u_format.h> +#include <util/u_math.h> +#include <util/u_inlines.h> +#include <util/u_memory.h> +#include "state_tracker/drm_api.h" +#include "r600_screen.h" +#include "r600_texture.h" + +extern struct u_resource_vtbl r600_texture_vtbl; + +unsigned long r600_texture_get_offset(struct r600_texture *rtex, unsigned level, unsigned zslice, unsigned face) +{ + unsigned long offset = rtex->offset[level]; + + switch (rtex->b.b.target) { + case PIPE_TEXTURE_3D: + assert(face == 0); + return offset + zslice * rtex->layer_size[level]; + case PIPE_TEXTURE_CUBE: + assert(zslice == 0); + return offset + face * rtex->layer_size[level]; + default: + assert(zslice == 0 && face == 0); + return offset; + } +} + +static void r600_setup_miptree(struct r600_screen *rscreen, struct r600_texture *rtex) +{ + struct pipe_resource *ptex = &rtex->b.b; + unsigned long w, h, stride, size, layer_size, i, offset; + + for (i = 0, offset = 0; i <= ptex->last_level; i++) { + w = u_minify(ptex->width0, i); + h = u_minify(ptex->height0, i); + stride = align(util_format_get_stride(ptex->format, w), 32); + layer_size = stride * h; + if (ptex->target == PIPE_TEXTURE_CUBE) + size = layer_size * 6; + else + size = layer_size * u_minify(ptex->depth0, i); + rtex->offset[i] = offset; + rtex->layer_size[i] = layer_size; + rtex->pitch[i] = stride / util_format_get_blocksize(ptex->format); + rtex->stride[i] = stride; + offset += align(size, 32); + } + rtex->size = offset; +} + +struct pipe_resource *r600_texture_create(struct pipe_screen *screen, + const struct pipe_resource *templ) +{ + struct r600_texture *rtex = CALLOC_STRUCT(r600_texture); + struct r600_screen *rscreen = r600_screen(screen); + struct pipe_resource templ_buf; + + if (!rtex) { + return NULL; + } + rtex->b.b = *templ; + rtex->b.vtbl = &r600_texture_vtbl; + pipe_reference_init(&rtex->b.b.reference, 1); + rtex->b.b.screen = screen; + r600_setup_miptree(rscreen, rtex); + + memset(&templ_buf, 0, sizeof(struct pipe_resource)); + templ_buf.target = PIPE_BUFFER; + templ_buf.format = PIPE_FORMAT_R8_UNORM; + templ_buf.usage = templ->usage; + templ_buf.bind = templ->bind; + templ_buf.width0 = rtex->size; + templ_buf.height0 = 1; + templ_buf.depth0 = 1; + + rtex->buffer = screen->resource_create(screen, &templ_buf); + if (!rtex->buffer) { + FREE(rtex); + return NULL; + } + return &rtex->b.b; +} + +static void r600_texture_destroy(struct pipe_screen *screen, + struct pipe_resource *ptex) +{ + struct r600_texture *rtex = (struct r600_texture*)ptex; + + FREE(rtex); +} + +static struct pipe_surface *r600_get_tex_surface(struct pipe_screen *screen, + struct pipe_resource *texture, + unsigned face, unsigned level, + unsigned zslice, unsigned flags) +{ + struct r600_texture *rtex = (struct r600_texture*)texture; + struct pipe_surface *surface = CALLOC_STRUCT(pipe_surface); + unsigned long offset; + + if (surface == NULL) + return NULL; + offset = r600_texture_get_offset(rtex, level, zslice, face); + pipe_reference_init(&surface->reference, 1); + pipe_resource_reference(&surface->texture, texture); + surface->format = texture->format; + surface->width = u_minify(texture->width0, level); + surface->height = u_minify(texture->height0, level); + surface->offset = offset; + surface->usage = flags; + surface->zslice = zslice; + surface->texture = texture; + surface->face = face; + surface->level = level; + return surface; +} + +static void r600_tex_surface_destroy(struct pipe_surface *surface) +{ + pipe_resource_reference(&surface->texture, NULL); + FREE(surface); +} + +struct pipe_resource *r600_texture_from_handle(struct pipe_screen *screen, + const struct pipe_resource *base, + struct winsys_handle *whandle) +{ + struct pipe_resource *buffer; + struct r600_texture *rtex; + + buffer = r600_buffer_from_handle(screen, whandle); + if (buffer == NULL) { + return NULL; + } + + /* Support only 2D textures without mipmaps */ + if (base->target != PIPE_TEXTURE_2D || base->depth0 != 1 || base->last_level != 0) + return NULL; + + rtex = CALLOC_STRUCT(r600_texture); + if (rtex == NULL) + return NULL; + + /* one ref already taken */ + rtex->buffer = buffer; + + rtex->b.b = *base; + rtex->b.vtbl = &r600_texture_vtbl; + pipe_reference_init(&rtex->b.b.reference, 1); + rtex->b.b.screen = screen; + rtex->stride_override = whandle->stride; + rtex->pitch[0] = whandle->stride / util_format_get_blocksize(base->format); + rtex->stride[0] = whandle->stride; + rtex->offset[0] = 0; + rtex->size = align(rtex->stride[0] * base->height0, 32); + + return &rtex->b.b; +} + +static boolean r600_texture_get_handle(struct pipe_screen* screen, + struct pipe_resource *texture, + struct winsys_handle *whandle) +{ + struct r600_screen *rscreen = r600_screen(screen); + struct r600_texture* rtex = (struct r600_texture*)texture; + + if (!rtex) { + return FALSE; + } + + whandle->stride = rtex->stride[0]; + + r600_buffer_get_handle(rscreen->rw, rtex->buffer, whandle); + + return TRUE; +} + +static unsigned int r600_texture_is_referenced(struct pipe_context *context, + struct pipe_resource *texture, + unsigned face, unsigned level) +{ + struct r600_texture *rtex = (struct r600_texture*)texture; + + return r600_buffer_is_referenced_by_cs(context, rtex->buffer, face, level); +} + +struct u_resource_vtbl r600_texture_vtbl = +{ + r600_texture_get_handle, /* get_handle */ + r600_texture_destroy, /* resource_destroy */ + r600_texture_is_referenced, /* is_resource_referenced */ + r600_texture_get_transfer, /* get_transfer */ + r600_texture_transfer_destroy, /* transfer_destroy */ + r600_texture_transfer_map, /* transfer_map */ + u_default_transfer_flush_region,/* transfer_flush_region */ + r600_texture_transfer_unmap, /* transfer_unmap */ + u_default_transfer_inline_write /* transfer_inline_write */ +}; + +void r600_init_screen_texture_functions(struct pipe_screen *screen) +{ + screen->get_tex_surface = r600_get_tex_surface; + screen->tex_surface_destroy = r600_tex_surface_destroy; +} diff --git a/src/gallium/drivers/r600/r600_texture.h b/src/gallium/drivers/r600/r600_texture.h new file mode 100644 index 00000000000..9bc08d6b040 --- /dev/null +++ b/src/gallium/drivers/r600/r600_texture.h @@ -0,0 +1,53 @@ +/* + * Copyright 2010 Jerome Glisse <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ +#ifndef R600_TEXTURE_H +#define R600_TEXTURE_H + +#include <pipe/p_state.h> + +struct r600_texture { + struct u_resource b; + unsigned long offset[PIPE_MAX_TEXTURE_LEVELS]; + unsigned long pitch[PIPE_MAX_TEXTURE_LEVELS]; + unsigned long stride[PIPE_MAX_TEXTURE_LEVELS]; + unsigned long layer_size[PIPE_MAX_TEXTURE_LEVELS]; + unsigned long stride_override; + unsigned long size; + struct pipe_resource *buffer; +}; + +struct pipe_resource *r600_texture_create(struct pipe_screen *screen, + const struct pipe_resource *templ); +unsigned long r600_texture_get_offset(struct r600_texture *rtex, unsigned level, unsigned zslice, unsigned face); +struct pipe_resource *r600_texture_from_handle(struct pipe_screen *screen, + const struct pipe_resource *base, + struct winsys_handle *whandle); +void r600_init_screen_texture_functions(struct pipe_screen *screen); + +/* This should be implemented by winsys. */ +boolean r600_buffer_get_handle(struct radeon *rw, + struct pipe_resource *buf, + struct winsys_handle *whandle); + + +#endif diff --git a/src/gallium/drivers/r600/r600d.h b/src/gallium/drivers/r600/r600d.h new file mode 100644 index 00000000000..d2c7248ff2b --- /dev/null +++ b/src/gallium/drivers/r600/r600d.h @@ -0,0 +1,677 @@ +/* + * Copyright 2010 Jerome Glisse <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: + * Jerome Glisse + */ +#ifndef R600D_H +#define R600D_H + +#define PKT3_NOP 0x10 +#define PKT3_INDIRECT_BUFFER_END 0x17 +#define PKT3_SET_PREDICATION 0x20 +#define PKT3_REG_RMW 0x21 +#define PKT3_COND_EXEC 0x22 +#define PKT3_PRED_EXEC 0x23 +#define PKT3_START_3D_CMDBUF 0x24 +#define PKT3_DRAW_INDEX_2 0x27 +#define PKT3_CONTEXT_CONTROL 0x28 +#define PKT3_DRAW_INDEX_IMMD_BE 0x29 +#define PKT3_INDEX_TYPE 0x2A +#define PKT3_DRAW_INDEX 0x2B +#define PKT3_DRAW_INDEX_AUTO 0x2D +#define PKT3_DRAW_INDEX_IMMD 0x2E +#define PKT3_NUM_INSTANCES 0x2F +#define PKT3_STRMOUT_BUFFER_UPDATE 0x34 +#define PKT3_INDIRECT_BUFFER_MP 0x38 +#define PKT3_MEM_SEMAPHORE 0x39 +#define PKT3_MPEG_INDEX 0x3A +#define PKT3_WAIT_REG_MEM 0x3C +#define PKT3_MEM_WRITE 0x3D +#define PKT3_INDIRECT_BUFFER 0x32 +#define PKT3_CP_INTERRUPT 0x40 +#define PKT3_SURFACE_SYNC 0x43 +#define PKT3_ME_INITIALIZE 0x44 +#define PKT3_COND_WRITE 0x45 +#define PKT3_EVENT_WRITE 0x46 +#define PKT3_EVENT_WRITE_EOP 0x47 +#define PKT3_ONE_REG_WRITE 0x57 +#define PKT3_SET_CONFIG_REG 0x68 +#define PKT3_SET_CONTEXT_REG 0x69 +#define PKT3_SET_ALU_CONST 0x6A +#define PKT3_SET_BOOL_CONST 0x6B +#define PKT3_SET_LOOP_CONST 0x6C +#define PKT3_SET_RESOURCE 0x6D +#define PKT3_SET_SAMPLER 0x6E +#define PKT3_SET_CTL_CONST 0x6F +#define PKT3_SURFACE_BASE_UPDATE 0x73 + +#define PKT_TYPE_S(x) (((x) & 0x3) << 30) +#define PKT_TYPE_G(x) (((x) >> 30) & 0x3) +#define PKT_TYPE_C 0x3FFFFFFF +#define PKT_COUNT_S(x) (((x) & 0x3FFF) << 16) +#define PKT_COUNT_G(x) (((x) >> 16) & 0x3FFF) +#define PKT_COUNT_C 0xC000FFFF +#define PKT0_BASE_INDEX_S(x) (((x) & 0xFFFF) << 0) +#define PKT0_BASE_INDEX_G(x) (((x) >> 0) & 0xFFFF) +#define PKT0_BASE_INDEX_C 0xFFFF0000 +#define PKT3_IT_OPCODE_S(x) (((x) & 0xFF) << 8) +#define PKT3_IT_OPCODE_G(x) (((x) >> 8) & 0xFF) +#define PKT3_IT_OPCODE_C 0xFFFF00FF +#define PKT0(index, count) (PKT_TYPE_S(0) | PKT0_BASE_INDEX_S(index) | PKT_COUNT_S(count)) +#define PKT3(op, count) (PKT_TYPE_S(3) | PKT3_IT_OPCODE_S(op) | PKT_COUNT_S(count)) + +/* Registers */ +#define R_0280A0_CB_COLOR0_INFO 0x0280A0 +#define S_0280A0_ENDIAN(x) (((x) & 0x3) << 0) +#define G_0280A0_ENDIAN(x) (((x) >> 0) & 0x3) +#define C_0280A0_ENDIAN 0xFFFFFFFC +#define S_0280A0_FORMAT(x) (((x) & 0x3F) << 2) +#define G_0280A0_FORMAT(x) (((x) >> 2) & 0x3F) +#define C_0280A0_FORMAT 0xFFFFFF03 +#define V_0280A0_COLOR_INVALID 0x00000000 +#define V_0280A0_COLOR_8 0x00000001 +#define V_0280A0_COLOR_4_4 0x00000002 +#define V_0280A0_COLOR_3_3_2 0x00000003 +#define V_0280A0_COLOR_16 0x00000005 +#define V_0280A0_COLOR_16_FLOAT 0x00000006 +#define V_0280A0_COLOR_8_8 0x00000007 +#define V_0280A0_COLOR_5_6_5 0x00000008 +#define V_0280A0_COLOR_6_5_5 0x00000009 +#define V_0280A0_COLOR_1_5_5_5 0x0000000A +#define V_0280A0_COLOR_4_4_4_4 0x0000000B +#define V_0280A0_COLOR_5_5_5_1 0x0000000C +#define V_0280A0_COLOR_32 0x0000000D +#define V_0280A0_COLOR_32_FLOAT 0x0000000E +#define V_0280A0_COLOR_16_16 0x0000000F +#define V_0280A0_COLOR_16_16_FLOAT 0x00000010 +#define V_0280A0_COLOR_8_24 0x00000011 +#define V_0280A0_COLOR_8_24_FLOAT 0x00000012 +#define V_0280A0_COLOR_24_8 0x00000013 +#define V_0280A0_COLOR_24_8_FLOAT 0x00000014 +#define V_0280A0_COLOR_10_11_11 0x00000015 +#define V_0280A0_COLOR_10_11_11_FLOAT 0x00000016 +#define V_0280A0_COLOR_11_11_10 0x00000017 +#define V_0280A0_COLOR_11_11_10_FLOAT 0x00000018 +#define V_0280A0_COLOR_2_10_10_10 0x00000019 +#define V_0280A0_COLOR_8_8_8_8 0x0000001A +#define V_0280A0_COLOR_10_10_10_2 0x0000001B +#define V_0280A0_COLOR_X24_8_32_FLOAT 0x0000001C +#define V_0280A0_COLOR_32_32 0x0000001D +#define V_0280A0_COLOR_32_32_FLOAT 0x0000001E +#define V_0280A0_COLOR_16_16_16_16 0x0000001F +#define V_0280A0_COLOR_16_16_16_16_FLOAT 0x00000020 +#define V_0280A0_COLOR_32_32_32_32 0x00000022 +#define V_0280A0_COLOR_32_32_32_32_FLOAT 0x00000023 +#define S_0280A0_ARRAY_MODE(x) (((x) & 0xF) << 8) +#define G_0280A0_ARRAY_MODE(x) (((x) >> 8) & 0xF) +#define C_0280A0_ARRAY_MODE 0xFFFFF0FF +#define V_0280A0_ARRAY_LINEAR_GENERAL 0x00000000 +#define V_0280A0_ARRAY_LINEAR_ALIGNED 0x00000001 +#define V_0280A0_ARRAY_1D_TILED_THIN1 0x00000002 +#define V_0280A0_ARRAY_2D_TILED_THIN1 0x00000004 +#define S_0280A0_NUMBER_TYPE(x) (((x) & 0x7) << 12) +#define G_0280A0_NUMBER_TYPE(x) (((x) >> 12) & 0x7) +#define C_0280A0_NUMBER_TYPE 0xFFFF8FFF +#define S_0280A0_READ_SIZE(x) (((x) & 0x1) << 15) +#define G_0280A0_READ_SIZE(x) (((x) >> 15) & 0x1) +#define C_0280A0_READ_SIZE 0xFFFF7FFF +#define S_0280A0_COMP_SWAP(x) (((x) & 0x3) << 16) +#define G_0280A0_COMP_SWAP(x) (((x) >> 16) & 0x3) +#define C_0280A0_COMP_SWAP 0xFFFCFFFF +#define S_0280A0_TILE_MODE(x) (((x) & 0x3) << 18) +#define G_0280A0_TILE_MODE(x) (((x) >> 18) & 0x3) +#define C_0280A0_TILE_MODE 0xFFF3FFFF +#define S_0280A0_BLEND_CLAMP(x) (((x) & 0x1) << 20) +#define G_0280A0_BLEND_CLAMP(x) (((x) >> 20) & 0x1) +#define C_0280A0_BLEND_CLAMP 0xFFEFFFFF +#define S_0280A0_CLEAR_COLOR(x) (((x) & 0x1) << 21) +#define G_0280A0_CLEAR_COLOR(x) (((x) >> 21) & 0x1) +#define C_0280A0_CLEAR_COLOR 0xFFDFFFFF +#define S_0280A0_BLEND_BYPASS(x) (((x) & 0x1) << 22) +#define G_0280A0_BLEND_BYPASS(x) (((x) >> 22) & 0x1) +#define C_0280A0_BLEND_BYPASS 0xFFBFFFFF +#define S_0280A0_BLEND_FLOAT32(x) (((x) & 0x1) << 23) +#define G_0280A0_BLEND_FLOAT32(x) (((x) >> 23) & 0x1) +#define C_0280A0_BLEND_FLOAT32 0xFF7FFFFF +#define S_0280A0_SIMPLE_FLOAT(x) (((x) & 0x1) << 24) +#define G_0280A0_SIMPLE_FLOAT(x) (((x) >> 24) & 0x1) +#define C_0280A0_SIMPLE_FLOAT 0xFEFFFFFF +#define S_0280A0_ROUND_MODE(x) (((x) & 0x1) << 25) +#define G_0280A0_ROUND_MODE(x) (((x) >> 25) & 0x1) +#define C_0280A0_ROUND_MODE 0xFDFFFFFF +#define S_0280A0_TILE_COMPACT(x) (((x) & 0x1) << 26) +#define G_0280A0_TILE_COMPACT(x) (((x) >> 26) & 0x1) +#define C_0280A0_TILE_COMPACT 0xFBFFFFFF +#define S_0280A0_SOURCE_FORMAT(x) (((x) & 0x1) << 27) +#define G_0280A0_SOURCE_FORMAT(x) (((x) >> 27) & 0x1) +#define C_0280A0_SOURCE_FORMAT 0xF7FFFFFF +#define R_028060_CB_COLOR0_SIZE 0x028060 +#define S_028060_PITCH_TILE_MAX(x) (((x) & 0x3FF) << 0) +#define G_028060_PITCH_TILE_MAX(x) (((x) >> 0) & 0x3FF) +#define C_028060_PITCH_TILE_MAX 0xFFFFFC00 +#define S_028060_SLICE_TILE_MAX(x) (((x) & 0xFFFFF) << 10) +#define G_028060_SLICE_TILE_MAX(x) (((x) >> 10) & 0xFFFFF) +#define C_028060_SLICE_TILE_MAX 0xC00003FF +#define R_028800_DB_DEPTH_CONTROL 0x028800 +#define S_028800_STENCIL_ENABLE(x) (((x) & 0x1) << 0) +#define G_028800_STENCIL_ENABLE(x) (((x) >> 0) & 0x1) +#define C_028800_STENCIL_ENABLE 0xFFFFFFFE +#define S_028800_Z_ENABLE(x) (((x) & 0x1) << 1) +#define G_028800_Z_ENABLE(x) (((x) >> 1) & 0x1) +#define C_028800_Z_ENABLE 0xFFFFFFFD +#define S_028800_Z_WRITE_ENABLE(x) (((x) & 0x1) << 2) +#define G_028800_Z_WRITE_ENABLE(x) (((x) >> 2) & 0x1) +#define C_028800_Z_WRITE_ENABLE 0xFFFFFFFB +#define S_028800_ZFUNC(x) (((x) & 0x7) << 4) +#define G_028800_ZFUNC(x) (((x) >> 4) & 0x7) +#define C_028800_ZFUNC 0xFFFFFF8F +#define S_028800_BACKFACE_ENABLE(x) (((x) & 0x1) << 7) +#define G_028800_BACKFACE_ENABLE(x) (((x) >> 7) & 0x1) +#define C_028800_BACKFACE_ENABLE 0xFFFFFF7F +#define S_028800_STENCILFUNC(x) (((x) & 0x7) << 8) +#define G_028800_STENCILFUNC(x) (((x) >> 8) & 0x7) +#define C_028800_STENCILFUNC 0xFFFFF8FF +#define S_028800_STENCILFAIL(x) (((x) & 0x7) << 11) +#define G_028800_STENCILFAIL(x) (((x) >> 11) & 0x7) +#define C_028800_STENCILFAIL 0xFFFFC7FF +#define S_028800_STENCILZPASS(x) (((x) & 0x7) << 14) +#define G_028800_STENCILZPASS(x) (((x) >> 14) & 0x7) +#define C_028800_STENCILZPASS 0xFFFE3FFF +#define S_028800_STENCILZFAIL(x) (((x) & 0x7) << 17) +#define G_028800_STENCILZFAIL(x) (((x) >> 17) & 0x7) +#define C_028800_STENCILZFAIL 0xFFF1FFFF +#define S_028800_STENCILFUNC_BF(x) (((x) & 0x7) << 20) +#define G_028800_STENCILFUNC_BF(x) (((x) >> 20) & 0x7) +#define C_028800_STENCILFUNC_BF 0xFF8FFFFF +#define S_028800_STENCILFAIL_BF(x) (((x) & 0x7) << 23) +#define G_028800_STENCILFAIL_BF(x) (((x) >> 23) & 0x7) +#define C_028800_STENCILFAIL_BF 0xFC7FFFFF +#define S_028800_STENCILZPASS_BF(x) (((x) & 0x7) << 26) +#define G_028800_STENCILZPASS_BF(x) (((x) >> 26) & 0x7) +#define C_028800_STENCILZPASS_BF 0xE3FFFFFF +#define S_028800_STENCILZFAIL_BF(x) (((x) & 0x7) << 29) +#define G_028800_STENCILZFAIL_BF(x) (((x) >> 29) & 0x7) +#define C_028800_STENCILZFAIL_BF 0x1FFFFFFF +#define R_028010_DB_DEPTH_INFO 0x028010 +#define S_028010_FORMAT(x) (((x) & 0x7) << 0) +#define G_028010_FORMAT(x) (((x) >> 0) & 0x7) +#define C_028010_FORMAT 0xFFFFFFF8 +#define V_028010_DEPTH_INVALID 0x00000000 +#define V_028010_DEPTH_16 0x00000001 +#define V_028010_DEPTH_X8_24 0x00000002 +#define V_028010_DEPTH_8_24 0x00000003 +#define V_028010_DEPTH_X8_24_FLOAT 0x00000004 +#define V_028010_DEPTH_8_24_FLOAT 0x00000005 +#define V_028010_DEPTH_32_FLOAT 0x00000006 +#define V_028010_DEPTH_X24_8_32_FLOAT 0x00000007 +#define S_028010_READ_SIZE(x) (((x) & 0x1) << 3) +#define G_028010_READ_SIZE(x) (((x) >> 3) & 0x1) +#define C_028010_READ_SIZE 0xFFFFFFF7 +#define S_028010_ARRAY_MODE(x) (((x) & 0xF) << 15) +#define G_028010_ARRAY_MODE(x) (((x) >> 15) & 0xF) +#define C_028010_ARRAY_MODE 0xFFF87FFF +#define S_028010_TILE_SURFACE_ENABLE(x) (((x) & 0x1) << 25) +#define G_028010_TILE_SURFACE_ENABLE(x) (((x) >> 25) & 0x1) +#define C_028010_TILE_SURFACE_ENABLE 0xFDFFFFFF +#define S_028010_TILE_COMPACT(x) (((x) & 0x1) << 26) +#define G_028010_TILE_COMPACT(x) (((x) >> 26) & 0x1) +#define C_028010_TILE_COMPACT 0xFBFFFFFF +#define S_028010_ZRANGE_PRECISION(x) (((x) & 0x1) << 31) +#define G_028010_ZRANGE_PRECISION(x) (((x) >> 31) & 0x1) +#define C_028010_ZRANGE_PRECISION 0x7FFFFFFF +#define R_028000_DB_DEPTH_SIZE 0x028000 +#define S_028000_PITCH_TILE_MAX(x) (((x) & 0x3FF) << 0) +#define G_028000_PITCH_TILE_MAX(x) (((x) >> 0) & 0x3FF) +#define C_028000_PITCH_TILE_MAX 0xFFFFFC00 +#define S_028000_SLICE_TILE_MAX(x) (((x) & 0xFFFFF) << 10) +#define G_028000_SLICE_TILE_MAX(x) (((x) >> 10) & 0xFFFFF) +#define C_028000_SLICE_TILE_MAX 0xC00003FF +#define R_028004_DB_DEPTH_VIEW 0x028004 +#define S_028004_SLICE_START(x) (((x) & 0x7FF) << 0) +#define G_028004_SLICE_START(x) (((x) >> 0) & 0x7FF) +#define C_028004_SLICE_START 0xFFFFF800 +#define S_028004_SLICE_MAX(x) (((x) & 0x7FF) << 13) +#define G_028004_SLICE_MAX(x) (((x) >> 13) & 0x7FF) +#define C_028004_SLICE_MAX 0xFF001FFF +#define R_028D24_DB_HTILE_SURFACE 0x028D24 +#define S_028D24_HTILE_WIDTH(x) (((x) & 0x1) << 0) +#define G_028D24_HTILE_WIDTH(x) (((x) >> 0) & 0x1) +#define C_028D24_HTILE_WIDTH 0xFFFFFFFE +#define S_028D24_HTILE_HEIGHT(x) (((x) & 0x1) << 1) +#define G_028D24_HTILE_HEIGHT(x) (((x) >> 1) & 0x1) +#define C_028D24_HTILE_HEIGHT 0xFFFFFFFD +#define S_028D24_LINEAR(x) (((x) & 0x1) << 2) +#define G_028D24_LINEAR(x) (((x) >> 2) & 0x1) +#define C_028D24_LINEAR 0xFFFFFFFB +#define S_028D24_FULL_CACHE(x) (((x) & 0x1) << 3) +#define G_028D24_FULL_CACHE(x) (((x) >> 3) & 0x1) +#define C_028D24_FULL_CACHE 0xFFFFFFF7 +#define S_028D24_HTILE_USES_PRELOAD_WIN(x) (((x) & 0x1) << 4) +#define G_028D24_HTILE_USES_PRELOAD_WIN(x) (((x) >> 4) & 0x1) +#define C_028D24_HTILE_USES_PRELOAD_WIN 0xFFFFFFEF +#define S_028D24_PRELOAD(x) (((x) & 0x1) << 5) +#define G_028D24_PRELOAD(x) (((x) >> 5) & 0x1) +#define C_028D24_PRELOAD 0xFFFFFFDF +#define S_028D24_PREFETCH_WIDTH(x) (((x) & 0x3F) << 6) +#define G_028D24_PREFETCH_WIDTH(x) (((x) >> 6) & 0x3F) +#define C_028D24_PREFETCH_WIDTH 0xFFFFF03F +#define S_028D24_PREFETCH_HEIGHT(x) (((x) & 0x3F) << 12) +#define G_028D24_PREFETCH_HEIGHT(x) (((x) >> 12) & 0x3F) +#define C_028D24_PREFETCH_HEIGHT 0xFFFC0FFF +#define R_028D34_DB_PREFETCH_LIMIT 0x028D34 +#define S_028D34_DEPTH_HEIGHT_TILE_MAX(x) (((x) & 0x3FF) << 0) +#define G_028D34_DEPTH_HEIGHT_TILE_MAX(x) (((x) >> 0) & 0x3FF) +#define C_028D34_DEPTH_HEIGHT_TILE_MAX 0xFFFFFC00 +#define R_028D10_DB_RENDER_OVERRIDE 0x028D10 +#define S_028D10_FORCE_HIZ_ENABLE(x) (((x) & 0x3) << 0) +#define G_028D10_FORCE_HIZ_ENABLE(x) (((x) >> 0) & 0x3) +#define C_028D10_FORCE_HIZ_ENABLE 0xFFFFFFFC +#define S_028D10_FORCE_HIS_ENABLE0(x) (((x) & 0x3) << 2) +#define G_028D10_FORCE_HIS_ENABLE0(x) (((x) >> 2) & 0x3) +#define C_028D10_FORCE_HIS_ENABLE0 0xFFFFFFF3 +#define S_028D10_FORCE_HIS_ENABLE1(x) (((x) & 0x3) << 4) +#define G_028D10_FORCE_HIS_ENABLE1(x) (((x) >> 4) & 0x3) +#define C_028D10_FORCE_HIS_ENABLE1 0xFFFFFFCF +#define S_028D10_FORCE_SHADER_Z_ORDER(x) (((x) & 0x1) << 6) +#define G_028D10_FORCE_SHADER_Z_ORDER(x) (((x) >> 6) & 0x1) +#define C_028D10_FORCE_SHADER_Z_ORDER 0xFFFFFFBF +#define S_028D10_FAST_Z_DISABLE(x) (((x) & 0x1) << 7) +#define G_028D10_FAST_Z_DISABLE(x) (((x) >> 7) & 0x1) +#define C_028D10_FAST_Z_DISABLE 0xFFFFFF7F +#define S_028D10_FAST_STENCIL_DISABLE(x) (((x) & 0x1) << 8) +#define G_028D10_FAST_STENCIL_DISABLE(x) (((x) >> 8) & 0x1) +#define C_028D10_FAST_STENCIL_DISABLE 0xFFFFFEFF +#define S_028D10_NOOP_CULL_DISABLE(x) (((x) & 0x1) << 9) +#define G_028D10_NOOP_CULL_DISABLE(x) (((x) >> 9) & 0x1) +#define C_028D10_NOOP_CULL_DISABLE 0xFFFFFDFF +#define S_028D10_FORCE_COLOR_KILL(x) (((x) & 0x1) << 10) +#define G_028D10_FORCE_COLOR_KILL(x) (((x) >> 10) & 0x1) +#define C_028D10_FORCE_COLOR_KILL 0xFFFFFBFF +#define S_028D10_FORCE_Z_READ(x) (((x) & 0x1) << 11) +#define G_028D10_FORCE_Z_READ(x) (((x) >> 11) & 0x1) +#define C_028D10_FORCE_Z_READ 0xFFFFF7FF +#define S_028D10_FORCE_STENCIL_READ(x) (((x) & 0x1) << 12) +#define G_028D10_FORCE_STENCIL_READ(x) (((x) >> 12) & 0x1) +#define C_028D10_FORCE_STENCIL_READ 0xFFFFEFFF +#define S_028D10_FORCE_FULL_Z_RANGE(x) (((x) & 0x3) << 13) +#define G_028D10_FORCE_FULL_Z_RANGE(x) (((x) >> 13) & 0x3) +#define C_028D10_FORCE_FULL_Z_RANGE 0xFFFF9FFF +#define S_028D10_FORCE_QC_SMASK_CONFLICT(x) (((x) & 0x1) << 15) +#define G_028D10_FORCE_QC_SMASK_CONFLICT(x) (((x) >> 15) & 0x1) +#define C_028D10_FORCE_QC_SMASK_CONFLICT 0xFFFF7FFF +#define S_028D10_DISABLE_VIEWPORT_CLAMP(x) (((x) & 0x1) << 16) +#define G_028D10_DISABLE_VIEWPORT_CLAMP(x) (((x) >> 16) & 0x1) +#define C_028D10_DISABLE_VIEWPORT_CLAMP 0xFFFEFFFF +#define S_028D10_IGNORE_SC_ZRANGE(x) (((x) & 0x1) << 17) +#define G_028D10_IGNORE_SC_ZRANGE(x) (((x) >> 17) & 0x1) +#define C_028D10_IGNORE_SC_ZRANGE 0xFFFDFFFF +#define R_028A40_VGT_GS_MODE 0x028A40 +#define S_028A40_MODE(x) (((x) & 0x3) << 0) +#define G_028A40_MODE(x) (((x) >> 0) & 0x3) +#define C_028A40_MODE 0xFFFFFFFC +#define S_028A40_ES_PASSTHRU(x) (((x) & 0x1) << 2) +#define G_028A40_ES_PASSTHRU(x) (((x) >> 2) & 0x1) +#define C_028A40_ES_PASSTHRU 0xFFFFFFFB +#define S_028A40_CUT_MODE(x) (((x) & 0x3) << 3) +#define G_028A40_CUT_MODE(x) (((x) >> 3) & 0x3) +#define C_028A40_CUT_MODE 0xFFFFFFE7 +#define R_008040_WAIT_UNTIL 0x008040 +#define S_008040_WAIT_CP_DMA_IDLE(x) (((x) & 0x1) << 8) +#define G_008040_WAIT_CP_DMA_IDLE(x) (((x) >> 8) & 0x1) +#define C_008040_WAIT_CP_DMA_IDLE 0xFFFFFEFF +#define S_008040_WAIT_CMDFIFO(x) (((x) & 0x1) << 10) +#define G_008040_WAIT_CMDFIFO(x) (((x) >> 10) & 0x1) +#define C_008040_WAIT_CMDFIFO 0xFFFFFBFF +#define S_008040_WAIT_2D_IDLE(x) (((x) & 0x1) << 14) +#define G_008040_WAIT_2D_IDLE(x) (((x) >> 14) & 0x1) +#define C_008040_WAIT_2D_IDLE 0xFFFFBFFF +#define S_008040_WAIT_3D_IDLE(x) (((x) & 0x1) << 15) +#define G_008040_WAIT_3D_IDLE(x) (((x) >> 15) & 0x1) +#define C_008040_WAIT_3D_IDLE 0xFFFF7FFF +#define S_008040_WAIT_2D_IDLECLEAN(x) (((x) & 0x1) << 16) +#define G_008040_WAIT_2D_IDLECLEAN(x) (((x) >> 16) & 0x1) +#define C_008040_WAIT_2D_IDLECLEAN 0xFFFEFFFF +#define S_008040_WAIT_3D_IDLECLEAN(x) (((x) & 0x1) << 17) +#define G_008040_WAIT_3D_IDLECLEAN(x) (((x) >> 17) & 0x1) +#define C_008040_WAIT_3D_IDLECLEAN 0xFFFDFFFF +#define S_008040_WAIT_EXTERN_SIG(x) (((x) & 0x1) << 19) +#define G_008040_WAIT_EXTERN_SIG(x) (((x) >> 19) & 0x1) +#define C_008040_WAIT_EXTERN_SIG 0xFFF7FFFF +#define S_008040_CMDFIFO_ENTRIES(x) (((x) & 0x1F) << 20) +#define G_008040_CMDFIFO_ENTRIES(x) (((x) >> 20) & 0x1F) +#define C_008040_CMDFIFO_ENTRIES 0xFE0FFFFF +#define R_0286CC_SPI_PS_IN_CONTROL_0 0x0286CC +#define S_0286CC_NUM_INTERP(x) (((x) & 0x3F) << 0) +#define G_0286CC_NUM_INTERP(x) (((x) >> 0) & 0x3F) +#define C_0286CC_NUM_INTERP 0xFFFFFFC0 +#define S_0286CC_POSITION_ENA(x) (((x) & 0x1) << 8) +#define G_0286CC_POSITION_ENA(x) (((x) >> 8) & 0x1) +#define C_0286CC_POSITION_ENA 0xFFFFFEFF +#define S_0286CC_POSITION_CENTROID(x) (((x) & 0x1) << 9) +#define G_0286CC_POSITION_CENTROID(x) (((x) >> 9) & 0x1) +#define C_0286CC_POSITION_CENTROID 0xFFFFFDFF +#define S_0286CC_POSITION_ADDR(x) (((x) & 0x1F) << 10) +#define G_0286CC_POSITION_ADDR(x) (((x) >> 10) & 0x1F) +#define C_0286CC_POSITION_ADDR 0xFFFF83FF +#define S_0286CC_PARAM_GEN(x) (((x) & 0xF) << 15) +#define G_0286CC_PARAM_GEN(x) (((x) >> 15) & 0xF) +#define C_0286CC_PARAM_GEN 0xFFF87FFF +#define S_0286CC_PARAM_GEN_ADDR(x) (((x) & 0x7F) << 19) +#define G_0286CC_PARAM_GEN_ADDR(x) (((x) >> 19) & 0x7F) +#define C_0286CC_PARAM_GEN_ADDR 0xFC07FFFF +#define S_0286CC_BARYC_SAMPLE_CNTL(x) (((x) & 0x3) << 26) +#define G_0286CC_BARYC_SAMPLE_CNTL(x) (((x) >> 26) & 0x3) +#define C_0286CC_BARYC_SAMPLE_CNTL 0xF3FFFFFF +#define S_0286CC_PERSP_GRADIENT_ENA(x) (((x) & 0x1) << 28) +#define G_0286CC_PERSP_GRADIENT_ENA(x) (((x) >> 28) & 0x1) +#define C_0286CC_PERSP_GRADIENT_ENA 0xEFFFFFFF +#define S_0286CC_LINEAR_GRADIENT_ENA(x) (((x) & 0x1) << 29) +#define G_0286CC_LINEAR_GRADIENT_ENA(x) (((x) >> 29) & 0x1) +#define C_0286CC_LINEAR_GRADIENT_ENA 0xDFFFFFFF +#define S_0286CC_POSITION_SAMPLE(x) (((x) & 0x1) << 30) +#define G_0286CC_POSITION_SAMPLE(x) (((x) >> 30) & 0x1) +#define C_0286CC_POSITION_SAMPLE 0xBFFFFFFF +#define S_0286CC_BARYC_AT_SAMPLE_ENA(x) (((x) & 0x1) << 31) +#define G_0286CC_BARYC_AT_SAMPLE_ENA(x) (((x) >> 31) & 0x1) +#define C_0286CC_BARYC_AT_SAMPLE_ENA 0x7FFFFFFF +#define R_0286D0_SPI_PS_IN_CONTROL_1 0x0286D0 +#define S_0286D0_GEN_INDEX_PIX(x) (((x) & 0x1) << 0) +#define G_0286D0_GEN_INDEX_PIX(x) (((x) >> 0) & 0x1) +#define C_0286D0_GEN_INDEX_PIX 0xFFFFFFFE +#define S_0286D0_GEN_INDEX_PIX_ADDR(x) (((x) & 0x7F) << 1) +#define G_0286D0_GEN_INDEX_PIX_ADDR(x) (((x) >> 1) & 0x7F) +#define C_0286D0_GEN_INDEX_PIX_ADDR 0xFFFFFF01 +#define S_0286D0_FRONT_FACE_ENA(x) (((x) & 0x1) << 8) +#define G_0286D0_FRONT_FACE_ENA(x) (((x) >> 8) & 0x1) +#define C_0286D0_FRONT_FACE_ENA 0xFFFFFEFF +#define S_0286D0_FRONT_FACE_CHAN(x) (((x) & 0x3) << 9) +#define G_0286D0_FRONT_FACE_CHAN(x) (((x) >> 9) & 0x3) +#define C_0286D0_FRONT_FACE_CHAN 0xFFFFF9FF +#define S_0286D0_FRONT_FACE_ALL_BITS(x) (((x) & 0x1) << 11) +#define G_0286D0_FRONT_FACE_ALL_BITS(x) (((x) >> 11) & 0x1) +#define C_0286D0_FRONT_FACE_ALL_BITS 0xFFFFF7FF +#define S_0286D0_FRONT_FACE_ADDR(x) (((x) & 0x1F) << 12) +#define G_0286D0_FRONT_FACE_ADDR(x) (((x) >> 12) & 0x1F) +#define C_0286D0_FRONT_FACE_ADDR 0xFFFE0FFF +#define S_0286D0_FOG_ADDR(x) (((x) & 0x7F) << 17) +#define G_0286D0_FOG_ADDR(x) (((x) >> 17) & 0x7F) +#define C_0286D0_FOG_ADDR 0xFF01FFFF +#define S_0286D0_FIXED_PT_POSITION_ENA(x) (((x) & 0x1) << 24) +#define G_0286D0_FIXED_PT_POSITION_ENA(x) (((x) >> 24) & 0x1) +#define C_0286D0_FIXED_PT_POSITION_ENA 0xFEFFFFFF +#define S_0286D0_FIXED_PT_POSITION_ADDR(x) (((x) & 0x1F) << 25) +#define G_0286D0_FIXED_PT_POSITION_ADDR(x) (((x) >> 25) & 0x1F) +#define C_0286D0_FIXED_PT_POSITION_ADDR 0xC1FFFFFF +#define R_0286C4_SPI_VS_OUT_CONFIG 0x0286C4 +#define S_0286C4_VS_PER_COMPONENT(x) (((x) & 0x1) << 0) +#define G_0286C4_VS_PER_COMPONENT(x) (((x) >> 0) & 0x1) +#define C_0286C4_VS_PER_COMPONENT 0xFFFFFFFE +#define S_0286C4_VS_EXPORT_COUNT(x) (((x) & 0x1F) << 1) +#define G_0286C4_VS_EXPORT_COUNT(x) (((x) >> 1) & 0x1F) +#define C_0286C4_VS_EXPORT_COUNT 0xFFFFFFC1 +#define S_0286C4_VS_EXPORTS_FOG(x) (((x) & 0x1) << 8) +#define G_0286C4_VS_EXPORTS_FOG(x) (((x) >> 8) & 0x1) +#define C_0286C4_VS_EXPORTS_FOG 0xFFFFFEFF +#define S_0286C4_VS_OUT_FOG_VEC_ADDR(x) (((x) & 0x1F) << 9) +#define G_0286C4_VS_OUT_FOG_VEC_ADDR(x) (((x) >> 9) & 0x1F) +#define C_0286C4_VS_OUT_FOG_VEC_ADDR 0xFFFFC1FF +#define R_028240_PA_SC_GENERIC_SCISSOR_TL 0x028240 +#define S_028240_TL_X(x) (((x) & 0x3FFF) << 0) +#define G_028240_TL_X(x) (((x) >> 0) & 0x3FFF) +#define C_028240_TL_X 0xFFFFC000 +#define S_028240_TL_Y(x) (((x) & 0x3FFF) << 16) +#define G_028240_TL_Y(x) (((x) >> 16) & 0x3FFF) +#define C_028240_TL_Y 0xC000FFFF +#define S_028240_WINDOW_OFFSET_DISABLE(x) (((x) & 0x1) << 31) +#define G_028240_WINDOW_OFFSET_DISABLE(x) (((x) >> 31) & 0x1) +#define C_028240_WINDOW_OFFSET_DISABLE 0x7FFFFFFF +#define R_028244_PA_SC_GENERIC_SCISSOR_BR 0x028244 +#define S_028244_BR_X(x) (((x) & 0x3FFF) << 0) +#define G_028244_BR_X(x) (((x) >> 0) & 0x3FFF) +#define C_028244_BR_X 0xFFFFC000 +#define S_028244_BR_Y(x) (((x) & 0x3FFF) << 16) +#define G_028244_BR_Y(x) (((x) >> 16) & 0x3FFF) +#define C_028244_BR_Y 0xC000FFFF +#define R_028030_PA_SC_SCREEN_SCISSOR_TL 0x028030 +#define S_028030_TL_X(x) (((x) & 0x7FFF) << 0) +#define G_028030_TL_X(x) (((x) >> 0) & 0x7FFF) +#define C_028030_TL_X 0xFFFF8000 +#define S_028030_TL_Y(x) (((x) & 0x7FFF) << 16) +#define G_028030_TL_Y(x) (((x) >> 16) & 0x7FFF) +#define C_028030_TL_Y 0x8000FFFF +#define R_028034_PA_SC_SCREEN_SCISSOR_BR 0x028034 +#define S_028034_BR_X(x) (((x) & 0x7FFF) << 0) +#define G_028034_BR_X(x) (((x) >> 0) & 0x7FFF) +#define C_028034_BR_X 0xFFFF8000 +#define S_028034_BR_Y(x) (((x) & 0x7FFF) << 16) +#define G_028034_BR_Y(x) (((x) >> 16) & 0x7FFF) +#define C_028034_BR_Y 0x8000FFFF +#define R_028204_PA_SC_WINDOW_SCISSOR_TL 0x028204 +#define S_028204_TL_X(x) (((x) & 0x3FFF) << 0) +#define G_028204_TL_X(x) (((x) >> 0) & 0x3FFF) +#define C_028204_TL_X 0xFFFFC000 +#define S_028204_TL_Y(x) (((x) & 0x3FFF) << 16) +#define G_028204_TL_Y(x) (((x) >> 16) & 0x3FFF) +#define C_028204_TL_Y 0xC000FFFF +#define S_028204_WINDOW_OFFSET_DISABLE(x) (((x) & 0x1) << 31) +#define G_028204_WINDOW_OFFSET_DISABLE(x) (((x) >> 31) & 0x1) +#define C_028204_WINDOW_OFFSET_DISABLE 0x7FFFFFFF +#define R_028208_PA_SC_WINDOW_SCISSOR_BR 0x028208 +#define S_028208_BR_X(x) (((x) & 0x3FFF) << 0) +#define G_028208_BR_X(x) (((x) >> 0) & 0x3FFF) +#define C_028208_BR_X 0xFFFFC000 +#define S_028208_BR_Y(x) (((x) & 0x3FFF) << 16) +#define G_028208_BR_Y(x) (((x) >> 16) & 0x3FFF) +#define C_028208_BR_Y 0xC000FFFF +#define R_0287F0_VGT_DRAW_INITIATOR 0x0287F0 +#define S_0287F0_SOURCE_SELECT(x) (((x) & 0x3) << 0) +#define G_0287F0_SOURCE_SELECT(x) (((x) >> 0) & 0x3) +#define C_0287F0_SOURCE_SELECT 0xFFFFFFFC +#define S_0287F0_MAJOR_MODE(x) (((x) & 0x3) << 2) +#define G_0287F0_MAJOR_MODE(x) (((x) >> 2) & 0x3) +#define C_0287F0_MAJOR_MODE 0xFFFFFFF3 +#define S_0287F0_SPRITE_EN(x) (((x) & 0x1) << 4) +#define G_0287F0_SPRITE_EN(x) (((x) >> 4) & 0x1) +#define C_0287F0_SPRITE_EN 0xFFFFFFEF +#define S_0287F0_NOT_EOP(x) (((x) & 0x1) << 5) +#define G_0287F0_NOT_EOP(x) (((x) >> 5) & 0x1) +#define C_0287F0_NOT_EOP 0xFFFFFFDF +#define S_0287F0_USE_OPAQUE(x) (((x) & 0x1) << 6) +#define G_0287F0_USE_OPAQUE(x) (((x) >> 6) & 0x1) +#define C_0287F0_USE_OPAQUE 0xFFFFFFBF +#define R_038008_SQ_VTX_CONSTANT_WORD2_0 0x038008 +#define S_038008_BASE_ADDRESS_HI(x) (((x) & 0xFF) << 0) +#define G_038008_BASE_ADDRESS_HI(x) (((x) >> 0) & 0xFF) +#define C_038008_BASE_ADDRESS_HI 0xFFFFFF00 +#define S_038008_STRIDE(x) (((x) & 0x7FF) << 8) +#define G_038008_STRIDE(x) (((x) >> 8) & 0x7FF) +#define C_038008_STRIDE 0xFFF800FF +#define S_038008_CLAMP_X(x) (((x) & 0x1) << 19) +#define G_038008_CLAMP_X(x) (((x) >> 19) & 0x1) +#define C_038008_CLAMP_X 0xFFF7FFFF +#define S_038008_DATA_FORMAT(x) (((x) & 0x3F) << 20) +#define G_038008_DATA_FORMAT(x) (((x) >> 20) & 0x3F) +#define C_038008_DATA_FORMAT 0xFC0FFFFF +#define V_038008_COLOR_INVALID 0x00000000 +#define V_038008_COLOR_8 0x00000001 +#define V_038008_COLOR_4_4 0x00000002 +#define V_038008_COLOR_3_3_2 0x00000003 +#define V_038008_COLOR_16 0x00000005 +#define V_038008_COLOR_16_FLOAT 0x00000006 +#define V_038008_COLOR_8_8 0x00000007 +#define V_038008_COLOR_5_6_5 0x00000008 +#define V_038008_COLOR_6_5_5 0x00000009 +#define V_038008_COLOR_1_5_5_5 0x0000000A +#define V_038008_COLOR_4_4_4_4 0x0000000B +#define V_038008_COLOR_5_5_5_1 0x0000000C +#define V_038008_COLOR_32 0x0000000D +#define V_038008_COLOR_32_FLOAT 0x0000000E +#define V_038008_COLOR_16_16 0x0000000F +#define V_038008_COLOR_16_16_FLOAT 0x00000010 +#define V_038008_COLOR_8_24 0x00000011 +#define V_038008_COLOR_8_24_FLOAT 0x00000012 +#define V_038008_COLOR_24_8 0x00000013 +#define V_038008_COLOR_24_8_FLOAT 0x00000014 +#define V_038008_COLOR_10_11_11 0x00000015 +#define V_038008_COLOR_10_11_11_FLOAT 0x00000016 +#define V_038008_COLOR_11_11_10 0x00000017 +#define V_038008_COLOR_11_11_10_FLOAT 0x00000018 +#define V_038008_COLOR_2_10_10_10 0x00000019 +#define V_038008_COLOR_8_8_8_8 0x0000001A +#define V_038008_COLOR_10_10_10_2 0x0000001B +#define V_038008_COLOR_X24_8_32_FLOAT 0x0000001C +#define V_038008_COLOR_32_32 0x0000001D +#define V_038008_COLOR_32_32_FLOAT 0x0000001E +#define V_038008_COLOR_16_16_16_16 0x0000001F +#define V_038008_COLOR_16_16_16_16_FLOAT 0x00000020 +#define V_038008_COLOR_32_32_32_32 0x00000022 +#define V_038008_COLOR_32_32_32_32_FLOAT 0x00000023 +#define S_038008_NUM_FORMAT_ALL(x) (((x) & 0x3) << 26) +#define G_038008_NUM_FORMAT_ALL(x) (((x) >> 26) & 0x3) +#define C_038008_NUM_FORMAT_ALL 0xF3FFFFFF +#define S_038008_FORMAT_COMP_ALL(x) (((x) & 0x1) << 28) +#define G_038008_FORMAT_COMP_ALL(x) (((x) >> 28) & 0x1) +#define C_038008_FORMAT_COMP_ALL 0xEFFFFFFF +#define S_038008_SRF_MODE_ALL(x) (((x) & 0x1) << 29) +#define G_038008_SRF_MODE_ALL(x) (((x) >> 29) & 0x1) +#define C_038008_SRF_MODE_ALL 0xDFFFFFFF +#define S_038008_ENDIAN_SWAP(x) (((x) & 0x3) << 30) +#define G_038008_ENDIAN_SWAP(x) (((x) >> 30) & 0x3) +#define C_038008_ENDIAN_SWAP 0x3FFFFFFF +#define R_008958_VGT_PRIMITIVE_TYPE 0x008958 +#define S_008958_PRIM_TYPE(x) (((x) & 0x3F) << 0) +#define G_008958_PRIM_TYPE(x) (((x) >> 0) & 0x3F) +#define C_008958_PRIM_TYPE 0xFFFFFFC0 +#define V_008958_DI_PT_NONE 0x00000000 +#define V_008958_DI_PT_POINTLIST 0x00000001 +#define V_008958_DI_PT_LINELIST 0x00000002 +#define V_008958_DI_PT_LINESTRIP 0x00000003 +#define V_008958_DI_PT_TRILIST 0x00000004 +#define V_008958_DI_PT_TRIFAN 0x00000005 +#define V_008958_DI_PT_TRISTRIP 0x00000006 +#define V_008958_DI_PT_UNUSED_0 0x00000007 +#define V_008958_DI_PT_UNUSED_1 0x00000008 +#define V_008958_DI_PT_UNUSED_2 0x00000009 +#define V_008958_DI_PT_LINELIST_ADJ 0x0000000A +#define V_008958_DI_PT_LINESTRIP_ADJ 0x0000000B +#define V_008958_DI_PT_TRILIST_ADJ 0x0000000C +#define V_008958_DI_PT_TRISTRIP_ADJ 0x0000000D +#define V_008958_DI_PT_UNUSED_3 0x0000000E +#define V_008958_DI_PT_UNUSED_4 0x0000000F +#define V_008958_DI_PT_TRI_WITH_WFLAGS 0x00000010 +#define V_008958_DI_PT_RECTLIST 0x00000011 +#define V_008958_DI_PT_LINELOOP 0x00000012 +#define V_008958_DI_PT_QUADLIST 0x00000013 +#define V_008958_DI_PT_QUADSTRIP 0x00000014 +#define V_008958_DI_PT_POLYGON 0x00000015 +#define V_008958_DI_PT_2D_COPY_RECT_LIST_V0 0x00000016 +#define V_008958_DI_PT_2D_COPY_RECT_LIST_V1 0x00000017 +#define V_008958_DI_PT_2D_COPY_RECT_LIST_V2 0x00000018 +#define V_008958_DI_PT_2D_COPY_RECT_LIST_V3 0x00000019 +#define V_008958_DI_PT_2D_FILL_RECT_LIST 0x0000001A +#define V_008958_DI_PT_2D_LINE_STRIP 0x0000001B +#define V_008958_DI_PT_2D_TRI_STRIP 0x0000001C +#define R_028868_SQ_PGM_RESOURCES_VS 0x028868 +#define S_028868_NUM_GPRS(x) (((x) & 0xFF) << 0) +#define G_028868_NUM_GPRS(x) (((x) >> 0) & 0xFF) +#define C_028868_NUM_GPRS 0xFFFFFF00 +#define S_028868_STACK_SIZE(x) (((x) & 0xFF) << 8) +#define G_028868_STACK_SIZE(x) (((x) >> 8) & 0xFF) +#define C_028868_STACK_SIZE 0xFFFF00FF +#define S_028868_DX10_CLAMP(x) (((x) & 0x1) << 21) +#define G_028868_DX10_CLAMP(x) (((x) >> 21) & 0x1) +#define C_028868_DX10_CLAMP 0xFFDFFFFF +#define S_028868_FETCH_CACHE_LINES(x) (((x) & 0x7) << 24) +#define G_028868_FETCH_CACHE_LINES(x) (((x) >> 24) & 0x7) +#define C_028868_FETCH_CACHE_LINES 0xF8FFFFFF +#define S_028868_UNCACHED_FIRST_INST(x) (((x) & 0x1) << 28) +#define G_028868_UNCACHED_FIRST_INST(x) (((x) >> 28) & 0x1) +#define C_028868_UNCACHED_FIRST_INST 0xEFFFFFFF +#define R_028850_SQ_PGM_RESOURCES_PS 0x028850 +#define S_028850_NUM_GPRS(x) (((x) & 0xFF) << 0) +#define G_028850_NUM_GPRS(x) (((x) >> 0) & 0xFF) +#define C_028850_NUM_GPRS 0xFFFFFF00 +#define S_028850_STACK_SIZE(x) (((x) & 0xFF) << 8) +#define G_028850_STACK_SIZE(x) (((x) >> 8) & 0xFF) +#define C_028850_STACK_SIZE 0xFFFF00FF +#define S_028850_DX10_CLAMP(x) (((x) & 0x1) << 21) +#define G_028850_DX10_CLAMP(x) (((x) >> 21) & 0x1) +#define C_028850_DX10_CLAMP 0xFFDFFFFF +#define S_028850_FETCH_CACHE_LINES(x) (((x) & 0x7) << 24) +#define G_028850_FETCH_CACHE_LINES(x) (((x) >> 24) & 0x7) +#define C_028850_FETCH_CACHE_LINES 0xF8FFFFFF +#define S_028850_UNCACHED_FIRST_INST(x) (((x) & 0x1) << 28) +#define G_028850_UNCACHED_FIRST_INST(x) (((x) >> 28) & 0x1) +#define C_028850_UNCACHED_FIRST_INST 0xEFFFFFFF +#define S_028850_CLAMP_CONSTS(x) (((x) & 0x1) << 31) +#define G_028850_CLAMP_CONSTS(x) (((x) >> 31) & 0x1) +#define C_028850_CLAMP_CONSTS 0x7FFFFFFF +#define R_028644_SPI_PS_INPUT_CNTL_0 0x028644 +#define S_028644_SEMANTIC(x) (((x) & 0xFF) << 0) +#define G_028644_SEMANTIC(x) (((x) >> 0) & 0xFF) +#define C_028644_SEMANTIC 0xFFFFFF00 +#define S_028644_DEFAULT_VAL(x) (((x) & 0x3) << 8) +#define G_028644_DEFAULT_VAL(x) (((x) >> 8) & 0x3) +#define C_028644_DEFAULT_VAL 0xFFFFFCFF +#define S_028644_FLAT_SHADE(x) (((x) & 0x1) << 10) +#define G_028644_FLAT_SHADE(x) (((x) >> 10) & 0x1) +#define C_028644_FLAT_SHADE 0xFFFFFBFF +#define S_028644_SEL_CENTROID(x) (((x) & 0x1) << 11) +#define G_028644_SEL_CENTROID(x) (((x) >> 11) & 0x1) +#define C_028644_SEL_CENTROID 0xFFFFF7FF +#define S_028644_SEL_LINEAR(x) (((x) & 0x1) << 12) +#define G_028644_SEL_LINEAR(x) (((x) >> 12) & 0x1) +#define C_028644_SEL_LINEAR 0xFFFFEFFF +#define S_028644_CYL_WRAP(x) (((x) & 0xF) << 13) +#define G_028644_CYL_WRAP(x) (((x) >> 13) & 0xF) +#define C_028644_CYL_WRAP 0xFFFE1FFF +#define S_028644_PT_SPRITE_TEX(x) (((x) & 0x1) << 17) +#define G_028644_PT_SPRITE_TEX(x) (((x) >> 17) & 0x1) +#define C_028644_PT_SPRITE_TEX 0xFFFDFFFF +#define S_028644_SEL_SAMPLE(x) (((x) & 0x1) << 18) +#define G_028644_SEL_SAMPLE(x) (((x) >> 18) & 0x1) +#define C_028644_SEL_SAMPLE 0xFFFBFFFF +#define R_0286D4_SPI_INTERP_CONTROL_0 0x0286D4 +#define S_0286D4_FLAT_SHADE_ENA(x) (((x) & 0x1) << 0) +#define G_0286D4_FLAT_SHADE_ENA(x) (((x) >> 0) & 0x1) +#define C_0286D4_FLAT_SHADE_ENA 0xFFFFFFFE +#define S_0286D4_PNT_SPRITE_ENA(x) (((x) & 0x1) << 1) +#define G_0286D4_PNT_SPRITE_ENA(x) (((x) >> 1) & 0x1) +#define C_0286D4_PNT_SPRITE_ENA 0xFFFFFFFD +#define S_0286D4_PNT_SPRITE_OVRD_X(x) (((x) & 0x7) << 2) +#define G_0286D4_PNT_SPRITE_OVRD_X(x) (((x) >> 2) & 0x7) +#define C_0286D4_PNT_SPRITE_OVRD_X 0xFFFFFFE3 +#define S_0286D4_PNT_SPRITE_OVRD_Y(x) (((x) & 0x7) << 5) +#define G_0286D4_PNT_SPRITE_OVRD_Y(x) (((x) >> 5) & 0x7) +#define C_0286D4_PNT_SPRITE_OVRD_Y 0xFFFFFF1F +#define S_0286D4_PNT_SPRITE_OVRD_Z(x) (((x) & 0x7) << 8) +#define G_0286D4_PNT_SPRITE_OVRD_Z(x) (((x) >> 8) & 0x7) +#define C_0286D4_PNT_SPRITE_OVRD_Z 0xFFFFF8FF +#define S_0286D4_PNT_SPRITE_OVRD_W(x) (((x) & 0x7) << 11) +#define G_0286D4_PNT_SPRITE_OVRD_W(x) (((x) >> 11) & 0x7) +#define C_0286D4_PNT_SPRITE_OVRD_W 0xFFFFC7FF +#define S_0286D4_PNT_SPRITE_TOP_1(x) (((x) & 0x1) << 14) +#define G_0286D4_PNT_SPRITE_TOP_1(x) (((x) >> 14) & 0x1) +#define C_0286D4_PNT_SPRITE_TOP_1 0xFFFFBFFF + +#endif diff --git a/src/gallium/drivers/r600/r700_sq.h b/src/gallium/drivers/r600/r700_sq.h new file mode 100644 index 00000000000..8266af6d1fc --- /dev/null +++ b/src/gallium/drivers/r600/r700_sq.h @@ -0,0 +1,609 @@ +/* + * Copyright 2010 Jerome Glisse <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: + * Jerome Glisse + */ +#ifndef R700_SQ_H +#define R700_SQ_H + +#define P_SQ_CF_WORD0 +#define S_SQ_CF_WORD0_ADDR(x) (((x) & 0xFFFFFFFF) << 0) +#define G_SQ_CF_WORD0_ADDR(x) (((x) >> 0) & 0xFFFFFFFF) +#define C_SQ_CF_WORD0_ADDR 0x00000000 +#define P_SQ_CF_WORD1 +#define S_SQ_CF_WORD1_POP_COUNT(x) (((x) & 0x7) << 0) +#define G_SQ_CF_WORD1_POP_COUNT(x) (((x) >> 0) & 0x7) +#define C_SQ_CF_WORD1_POP_COUNT 0xFFFFFFF8 +#define S_SQ_CF_WORD1_CF_CONST(x) (((x) & 0x1F) << 3) +#define G_SQ_CF_WORD1_CF_CONST(x) (((x) >> 3) & 0x1F) +#define C_SQ_CF_WORD1_CF_CONST 0xFFFFFF07 +#define S_SQ_CF_WORD1_COND(x) (((x) & 0x3) << 8) +#define G_SQ_CF_WORD1_COND(x) (((x) >> 8) & 0x3) +#define C_SQ_CF_WORD1_COND 0xFFFFFCFF +#define S_SQ_CF_WORD1_COUNT(x) (((x) & 0x7) << 10) +#define G_SQ_CF_WORD1_COUNT(x) (((x) >> 10) & 0x7) +#define C_SQ_CF_WORD1_COUNT 0xFFFFE3FF +#define S_SQ_CF_WORD1_CALL_COUNT(x) (((x) & 0x3F) << 13) +#define G_SQ_CF_WORD1_CALL_COUNT(x) (((x) >> 13) & 0x3F) +#define C_SQ_CF_WORD1_CALL_COUNT 0xFFF81FFF +#define S_SQ_CF_WORD1_END_OF_PROGRAM(x) (((x) & 0x1) << 21) +#define G_SQ_CF_WORD1_END_OF_PROGRAM(x) (((x) >> 21) & 0x1) +#define C_SQ_CF_WORD1_END_OF_PROGRAM 0xFFDFFFFF +#define S_SQ_CF_WORD1_VALID_PIXEL_MODE(x) (((x) & 0x1) << 22) +#define G_SQ_CF_WORD1_VALID_PIXEL_MODE(x) (((x) >> 22) & 0x1) +#define C_SQ_CF_WORD1_VALID_PIXEL_MODE 0xFFBFFFFF +#define S_SQ_CF_WORD1_CF_INST(x) (((x) & 0x7F) << 23) +#define G_SQ_CF_WORD1_CF_INST(x) (((x) >> 23) & 0x7F) +#define C_SQ_CF_WORD1_CF_INST 0xC07FFFFF +#define V_SQ_CF_WORD1_SQ_CF_INST_NOP 0x00000000 +#define V_SQ_CF_WORD1_SQ_CF_INST_TEX 0x00000001 +#define V_SQ_CF_WORD1_SQ_CF_INST_VTX 0x00000002 +#define V_SQ_CF_WORD1_SQ_CF_INST_VTX_TC 0x00000003 +#define V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START 0x00000004 +#define V_SQ_CF_WORD1_SQ_CF_INST_LOOP_END 0x00000005 +#define V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_DX10 0x00000006 +#define V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_NO_AL 0x00000007 +#define V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE 0x00000008 +#define V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK 0x00000009 +#define V_SQ_CF_WORD1_SQ_CF_INST_JUMP 0x0000000A +#define V_SQ_CF_WORD1_SQ_CF_INST_PUSH 0x0000000B +#define V_SQ_CF_WORD1_SQ_CF_INST_PUSH_ELSE 0x0000000C +#define V_SQ_CF_WORD1_SQ_CF_INST_ELSE 0x0000000D +#define V_SQ_CF_WORD1_SQ_CF_INST_POP 0x0000000E +#define V_SQ_CF_WORD1_SQ_CF_INST_POP_JUMP 0x0000000F +#define V_SQ_CF_WORD1_SQ_CF_INST_POP_PUSH 0x00000010 +#define V_SQ_CF_WORD1_SQ_CF_INST_POP_PUSH_ELSE 0x00000011 +#define V_SQ_CF_WORD1_SQ_CF_INST_CALL 0x00000012 +#define V_SQ_CF_WORD1_SQ_CF_INST_CALL_FS 0x00000013 +#define V_SQ_CF_WORD1_SQ_CF_INST_RETURN 0x00000014 +#define V_SQ_CF_WORD1_SQ_CF_INST_EMIT_VERTEX 0x00000015 +#define V_SQ_CF_WORD1_SQ_CF_INST_EMIT_CUT_VERTEX 0x00000016 +#define V_SQ_CF_WORD1_SQ_CF_INST_CUT_VERTEX 0x00000017 +#define V_SQ_CF_WORD1_SQ_CF_INST_KILL 0x00000018 +#define S_SQ_CF_WORD1_WHOLE_QUAD_MODE(x) (((x) & 0x1) << 30) +#define G_SQ_CF_WORD1_WHOLE_QUAD_MODE(x) (((x) >> 30) & 0x1) +#define C_SQ_CF_WORD1_WHOLE_QUAD_MODE 0xBFFFFFFF +#define S_SQ_CF_WORD1_BARRIER(x) (((x) & 0x1) << 31) +#define G_SQ_CF_WORD1_BARRIER(x) (((x) >> 31) & 0x1) +#define C_SQ_CF_WORD1_BARRIER 0x7FFFFFFF +#define S_SQ_CF_WORD1_COUNT_3(x) (((x) & 0x1) << 19) +#define G_SQ_CF_WORD1_COUNT_3(x) (((x) >> 19) & 0x1) +#define C_SQ_CF_WORD1_COUNT_3 0xFFF7FFFF +#define P_SQ_CF_ALU_WORD0 +#define S_SQ_CF_ALU_WORD0_ADDR(x) (((x) & 0x3FFFFF) << 0) +#define G_SQ_CF_ALU_WORD0_ADDR(x) (((x) >> 0) & 0x3FFFFF) +#define C_SQ_CF_ALU_WORD0_ADDR 0xFFC00000 +#define S_SQ_CF_ALU_WORD0_KCACHE_BANK0(x) (((x) & 0xF) << 22) +#define G_SQ_CF_ALU_WORD0_KCACHE_BANK0(x) (((x) >> 22) & 0xF) +#define C_SQ_CF_ALU_WORD0_KCACHE_BANK0 0xFC3FFFFF +#define S_SQ_CF_ALU_WORD0_KCACHE_BANK1(x) (((x) & 0xF) << 26) +#define G_SQ_CF_ALU_WORD0_KCACHE_BANK1(x) (((x) >> 26) & 0xF) +#define C_SQ_CF_ALU_WORD0_KCACHE_BANK1 0xC3FFFFFF +#define S_SQ_CF_ALU_WORD0_KCACHE_MODE0(x) (((x) & 0x3) << 30) +#define G_SQ_CF_ALU_WORD0_KCACHE_MODE0(x) (((x) >> 30) & 0x3) +#define C_SQ_CF_ALU_WORD0_KCACHE_MODE0 0x3FFFFFFF +#define P_SQ_CF_ALU_WORD1 +#define S_SQ_CF_ALU_WORD1_KCACHE_MODE1(x) (((x) & 0x3) << 0) +#define G_SQ_CF_ALU_WORD1_KCACHE_MODE1(x) (((x) >> 0) & 0x3) +#define C_SQ_CF_ALU_WORD1_KCACHE_MODE1 0xFFFFFFFC +#define S_SQ_CF_ALU_WORD1_KCACHE_ADDR0(x) (((x) & 0xFF) << 2) +#define G_SQ_CF_ALU_WORD1_KCACHE_ADDR0(x) (((x) >> 2) & 0xFF) +#define C_SQ_CF_ALU_WORD1_KCACHE_ADDR0 0xFFFFFC03 +#define S_SQ_CF_ALU_WORD1_KCACHE_ADDR1(x) (((x) & 0xFF) << 10) +#define G_SQ_CF_ALU_WORD1_KCACHE_ADDR1(x) (((x) >> 10) & 0xFF) +#define C_SQ_CF_ALU_WORD1_KCACHE_ADDR1 0xFFFC03FF +#define S_SQ_CF_ALU_WORD1_COUNT(x) (((x) & 0x7F) << 18) +#define G_SQ_CF_ALU_WORD1_COUNT(x) (((x) >> 18) & 0x7F) +#define C_SQ_CF_ALU_WORD1_COUNT 0xFE03FFFF +#define S_SQ_CF_ALU_WORD1_USES_WATERFALL(x) (((x) & 0x1) << 25) +#define G_SQ_CF_ALU_WORD1_USES_WATERFALL(x) (((x) >> 25) & 0x1) +#define C_SQ_CF_ALU_WORD1_USES_WATERFALL 0xFDFFFFFF +#define S_SQ_CF_ALU_WORD1_CF_INST(x) (((x) & 0xF) << 26) +#define G_SQ_CF_ALU_WORD1_CF_INST(x) (((x) >> 26) & 0xF) +#define C_SQ_CF_ALU_WORD1_CF_INST 0xC3FFFFFF +#define V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU 0x00000008 +#define V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE 0x00000009 +#define V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER 0x0000000A +#define V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP2_AFTER 0x0000000B +#define V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_CONTINUE 0x0000000D +#define V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_BREAK 0x0000000E +#define V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_ELSE_AFTER 0x0000000F +#define S_SQ_CF_ALU_WORD1_WHOLE_QUAD_MODE(x) (((x) & 0x1) << 30) +#define G_SQ_CF_ALU_WORD1_WHOLE_QUAD_MODE(x) (((x) >> 30) & 0x1) +#define C_SQ_CF_ALU_WORD1_WHOLE_QUAD_MODE 0xBFFFFFFF +#define S_SQ_CF_ALU_WORD1_BARRIER(x) (((x) & 0x1) << 31) +#define G_SQ_CF_ALU_WORD1_BARRIER(x) (((x) >> 31) & 0x1) +#define C_SQ_CF_ALU_WORD1_BARRIER 0x7FFFFFFF +#define S_SQ_CF_ALU_WORD1_ALT_CONST(x) (((x) & 0x1) << 25) +#define G_SQ_CF_ALU_WORD1_ALT_CONST(x) (((x) >> 25) & 0x1) +#define C_SQ_CF_ALU_WORD1_ALT_CONST 0xFDFFFFFF +#define P_SQ_CF_ALLOC_EXPORT_WORD0 +#define S_SQ_CF_ALLOC_EXPORT_WORD0_ARRAY_BASE(x) (((x) & 0x1FFF) << 0) +#define G_SQ_CF_ALLOC_EXPORT_WORD0_ARRAY_BASE(x) (((x) >> 0) & 0x1FFF) +#define C_SQ_CF_ALLOC_EXPORT_WORD0_ARRAY_BASE 0xFFFFE000 +#define S_SQ_CF_ALLOC_EXPORT_WORD0_TYPE(x) (((x) & 0x3) << 13) +#define G_SQ_CF_ALLOC_EXPORT_WORD0_TYPE(x) (((x) >> 13) & 0x3) +#define C_SQ_CF_ALLOC_EXPORT_WORD0_TYPE 0xFFFF9FFF +#define V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL 0x00000000 +#define V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS 0x00000001 +#define V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM 0x00000002 +#define V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_SX 0x00000003 +#define S_SQ_CF_ALLOC_EXPORT_WORD0_RW_GPR(x) (((x) & 0x7F) << 15) +#define G_SQ_CF_ALLOC_EXPORT_WORD0_RW_GPR(x) (((x) >> 15) & 0x7F) +#define C_SQ_CF_ALLOC_EXPORT_WORD0_RW_GPR 0xFFC07FFF +#define S_SQ_CF_ALLOC_EXPORT_WORD0_RW_REL(x) (((x) & 0x1) << 22) +#define G_SQ_CF_ALLOC_EXPORT_WORD0_RW_REL(x) (((x) >> 22) & 0x1) +#define C_SQ_CF_ALLOC_EXPORT_WORD0_RW_REL 0xFFBFFFFF +#define S_SQ_CF_ALLOC_EXPORT_WORD0_INDEX_GPR(x) (((x) & 0x7F) << 23) +#define G_SQ_CF_ALLOC_EXPORT_WORD0_INDEX_GPR(x) (((x) >> 23) & 0x7F) +#define C_SQ_CF_ALLOC_EXPORT_WORD0_INDEX_GPR 0xC07FFFFF +#define S_SQ_CF_ALLOC_EXPORT_WORD0_ELEM_SIZE(x) (((x) & 0x3) << 30) +#define G_SQ_CF_ALLOC_EXPORT_WORD0_ELEM_SIZE(x) (((x) >> 30) & 0x3) +#define C_SQ_CF_ALLOC_EXPORT_WORD0_ELEM_SIZE 0x3FFFFFFF +#define P_SQ_CF_ALLOC_EXPORT_WORD1 +#define S_SQ_CF_ALLOC_EXPORT_WORD1_BURST_COUNT(x) (((x) & 0xF) << 17) +#define G_SQ_CF_ALLOC_EXPORT_WORD1_BURST_COUNT(x) (((x) >> 17) & 0xF) +#define C_SQ_CF_ALLOC_EXPORT_WORD1_BURST_COUNT 0xFFE1FFFF +#define S_SQ_CF_ALLOC_EXPORT_WORD1_END_OF_PROGRAM(x) (((x) & 0x1) << 21) +#define G_SQ_CF_ALLOC_EXPORT_WORD1_END_OF_PROGRAM(x) (((x) >> 21) & 0x1) +#define C_SQ_CF_ALLOC_EXPORT_WORD1_END_OF_PROGRAM 0xFFDFFFFF +#define S_SQ_CF_ALLOC_EXPORT_WORD1_VALID_PIXEL_MODE(x) (((x) & 0x1) << 22) +#define G_SQ_CF_ALLOC_EXPORT_WORD1_VALID_PIXEL_MODE(x) (((x) >> 22) & 0x1) +#define C_SQ_CF_ALLOC_EXPORT_WORD1_VALID_PIXEL_MODE 0xFFBFFFFF +#define S_SQ_CF_ALLOC_EXPORT_WORD1_CF_INST(x) (((x) & 0x7F) << 23) +#define G_SQ_CF_ALLOC_EXPORT_WORD1_CF_INST(x) (((x) >> 23) & 0x7F) +#define C_SQ_CF_ALLOC_EXPORT_WORD1_CF_INST 0xC07FFFFF +#define V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM0 0x00000020 +#define V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM1 0x00000021 +#define V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM2 0x00000022 +#define V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM3 0x00000023 +#define V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_SCRATCH 0x00000024 +#define V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_REDUCTION 0x00000025 +#define V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_RING 0x00000026 +#define V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT 0x00000027 +#define V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE 0x00000028 +#define S_SQ_CF_ALLOC_EXPORT_WORD1_WHOLE_QUAD_MODE(x) (((x) & 0x1) << 30) +#define G_SQ_CF_ALLOC_EXPORT_WORD1_WHOLE_QUAD_MODE(x) (((x) >> 30) & 0x1) +#define C_SQ_CF_ALLOC_EXPORT_WORD1_WHOLE_QUAD_MODE 0xBFFFFFFF +#define S_SQ_CF_ALLOC_EXPORT_WORD1_BARRIER(x) (((x) & 0x1) << 31) +#define G_SQ_CF_ALLOC_EXPORT_WORD1_BARRIER(x) (((x) >> 31) & 0x1) +#define C_SQ_CF_ALLOC_EXPORT_WORD1_BARRIER 0x7FFFFFFF +#define P_SQ_CF_ALLOC_EXPORT_WORD1_BUF +#define S_SQ_CF_ALLOC_EXPORT_WORD1_BUF_ARRAY_SIZE(x) (((x) & 0xFFF) << 0) +#define G_SQ_CF_ALLOC_EXPORT_WORD1_BUF_ARRAY_SIZE(x) (((x) >> 0) & 0xFFF) +#define C_SQ_CF_ALLOC_EXPORT_WORD1_BUF_ARRAY_SIZE 0xFFFFF000 +#define S_SQ_CF_ALLOC_EXPORT_WORD1_BUF_COMP_MASK(x) (((x) & 0xF) << 12) +#define G_SQ_CF_ALLOC_EXPORT_WORD1_BUF_COMP_MASK(x) (((x) >> 12) & 0xF) +#define C_SQ_CF_ALLOC_EXPORT_WORD1_BUF_COMP_MASK 0xFFFF0FFF +#define P_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ +#define S_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_X(x) (((x) & 0x7) << 0) +#define G_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_X(x) (((x) >> 0) & 0x7) +#define C_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_X 0xFFFFFFF8 +#define S_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Y(x) (((x) & 0x7) << 3) +#define G_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Y(x) (((x) >> 3) & 0x7) +#define C_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Y 0xFFFFFFC7 +#define S_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Z(x) (((x) & 0x7) << 6) +#define G_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Z(x) (((x) >> 6) & 0x7) +#define C_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Z 0xFFFFFE3F +#define S_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_W(x) (((x) & 0x7) << 9) +#define G_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_W(x) (((x) >> 9) & 0x7) +#define C_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_W 0xFFFFF1FF +#define P_SQ_ALU_WORD0 +#define S_SQ_ALU_WORD0_SRC0_SEL(x) (((x) & 0x1FF) << 0) +#define G_SQ_ALU_WORD0_SRC0_SEL(x) (((x) >> 0) & 0x1FF) +#define C_SQ_ALU_WORD0_SRC0_SEL 0xFFFFFE00 +#define S_SQ_ALU_WORD0_SRC0_REL(x) (((x) & 0x1) << 9) +#define G_SQ_ALU_WORD0_SRC0_REL(x) (((x) >> 9) & 0x1) +#define C_SQ_ALU_WORD0_SRC0_REL 0xFFFFFDFF +#define S_SQ_ALU_WORD0_SRC0_CHAN(x) (((x) & 0x3) << 10) +#define G_SQ_ALU_WORD0_SRC0_CHAN(x) (((x) >> 10) & 0x3) +#define C_SQ_ALU_WORD0_SRC0_CHAN 0xFFFFF3FF +#define S_SQ_ALU_WORD0_SRC0_NEG(x) (((x) & 0x1) << 12) +#define G_SQ_ALU_WORD0_SRC0_NEG(x) (((x) >> 12) & 0x1) +#define C_SQ_ALU_WORD0_SRC0_NEG 0xFFFFEFFF +#define S_SQ_ALU_WORD0_SRC1_SEL(x) (((x) & 0x1FF) << 13) +#define G_SQ_ALU_WORD0_SRC1_SEL(x) (((x) >> 13) & 0x1FF) +#define C_SQ_ALU_WORD0_SRC1_SEL 0xFFC01FFF +#define S_SQ_ALU_WORD0_SRC1_REL(x) (((x) & 0x1) << 22) +#define G_SQ_ALU_WORD0_SRC1_REL(x) (((x) >> 22) & 0x1) +#define C_SQ_ALU_WORD0_SRC1_REL 0xFFBFFFFF +#define S_SQ_ALU_WORD0_SRC1_CHAN(x) (((x) & 0x3) << 23) +#define G_SQ_ALU_WORD0_SRC1_CHAN(x) (((x) >> 23) & 0x3) +#define C_SQ_ALU_WORD0_SRC1_CHAN 0xFE7FFFFF +#define S_SQ_ALU_WORD0_SRC1_NEG(x) (((x) & 0x1) << 25) +#define G_SQ_ALU_WORD0_SRC1_NEG(x) (((x) >> 25) & 0x1) +#define C_SQ_ALU_WORD0_SRC1_NEG 0xFDFFFFFF +#define S_SQ_ALU_WORD0_INDEX_MODE(x) (((x) & 0x7) << 26) +#define G_SQ_ALU_WORD0_INDEX_MODE(x) (((x) >> 26) & 0x7) +#define C_SQ_ALU_WORD0_INDEX_MODE 0xE3FFFFFF +#define S_SQ_ALU_WORD0_PRED_SEL(x) (((x) & 0x3) << 29) +#define G_SQ_ALU_WORD0_PRED_SEL(x) (((x) >> 29) & 0x3) +#define C_SQ_ALU_WORD0_PRED_SEL 0x9FFFFFFF +#define S_SQ_ALU_WORD0_LAST(x) (((x) & 0x1) << 31) +#define G_SQ_ALU_WORD0_LAST(x) (((x) >> 31) & 0x1) +#define C_SQ_ALU_WORD0_LAST 0x7FFFFFFF +#define P_SQ_ALU_WORD1 +#define S_SQ_ALU_WORD1_ENCODING(x) (((x) & 0x7) << 15) +#define G_SQ_ALU_WORD1_ENCODING(x) (((x) >> 15) & 0x7) +#define C_SQ_ALU_WORD1_ENCODING 0xFFFC7FFF +#define S_SQ_ALU_WORD1_BANK_SWIZZLE(x) (((x) & 0x7) << 18) +#define G_SQ_ALU_WORD1_BANK_SWIZZLE(x) (((x) >> 18) & 0x7) +#define C_SQ_ALU_WORD1_BANK_SWIZZLE 0xFFE3FFFF +#define S_SQ_ALU_WORD1_DST_GPR(x) (((x) & 0x7F) << 21) +#define G_SQ_ALU_WORD1_DST_GPR(x) (((x) >> 21) & 0x7F) +#define C_SQ_ALU_WORD1_DST_GPR 0xF01FFFFF +#define S_SQ_ALU_WORD1_DST_REL(x) (((x) & 0x1) << 28) +#define G_SQ_ALU_WORD1_DST_REL(x) (((x) >> 28) & 0x1) +#define C_SQ_ALU_WORD1_DST_REL 0xEFFFFFFF +#define S_SQ_ALU_WORD1_DST_CHAN(x) (((x) & 0x3) << 29) +#define G_SQ_ALU_WORD1_DST_CHAN(x) (((x) >> 29) & 0x3) +#define C_SQ_ALU_WORD1_DST_CHAN 0x9FFFFFFF +#define S_SQ_ALU_WORD1_CLAMP(x) (((x) & 0x1) << 31) +#define G_SQ_ALU_WORD1_CLAMP(x) (((x) >> 31) & 0x1) +#define C_SQ_ALU_WORD1_CLAMP 0x7FFFFFFF +#define P_SQ_ALU_WORD1_OP2 +#define S_SQ_ALU_WORD1_OP2_SRC0_ABS(x) (((x) & 0x1) << 0) +#define G_SQ_ALU_WORD1_OP2_SRC0_ABS(x) (((x) >> 0) & 0x1) +#define C_SQ_ALU_WORD1_OP2_SRC0_ABS 0xFFFFFFFE +#define S_SQ_ALU_WORD1_OP2_SRC1_ABS(x) (((x) & 0x1) << 1) +#define G_SQ_ALU_WORD1_OP2_SRC1_ABS(x) (((x) >> 1) & 0x1) +#define C_SQ_ALU_WORD1_OP2_SRC1_ABS 0xFFFFFFFD +#define S_SQ_ALU_WORD1_OP2_UPDATE_EXECUTE_MASK(x) (((x) & 0x1) << 2) +#define G_SQ_ALU_WORD1_OP2_UPDATE_EXECUTE_MASK(x) (((x) >> 2) & 0x1) +#define C_SQ_ALU_WORD1_OP2_UPDATE_EXECUTE_MASK 0xFFFFFFFB +#define S_SQ_ALU_WORD1_OP2_UPDATE_PRED(x) (((x) & 0x1) << 3) +#define G_SQ_ALU_WORD1_OP2_UPDATE_PRED(x) (((x) >> 3) & 0x1) +#define C_SQ_ALU_WORD1_OP2_UPDATE_PRED 0xFFFFFFF7 +#define S_SQ_ALU_WORD1_OP2_WRITE_MASK(x) (((x) & 0x1) << 4) +#define G_SQ_ALU_WORD1_OP2_WRITE_MASK(x) (((x) >> 4) & 0x1) +#define C_SQ_ALU_WORD1_OP2_WRITE_MASK 0xFFFFFFEF +#define S_SQ_ALU_WORD1_OP2_OMOD(x) (((x) & 0x3) << 5) +#define G_SQ_ALU_WORD1_OP2_OMOD(x) (((x) >> 5) & 0x3) +#define C_SQ_ALU_WORD1_OP2_OMOD 0xFFFFFF9F +#define S_SQ_ALU_WORD1_OP2_ALU_INST(x) (((x) & 0x7FF) << 7) +#define G_SQ_ALU_WORD1_OP2_ALU_INST(x) (((x) >> 7) & 0x7FF) +#define C_SQ_ALU_WORD1_OP2_ALU_INST 0xFFFC007F +#define V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD 0x00000000 +#define V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL 0x00000001 +#define V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL_IEEE 0x00000002 +#define V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX 0x00000003 +#define V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN 0x00000004 +#define V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX_DX10 0x00000005 +#define V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN_DX10 0x00000006 +#define V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE 0x00000008 +#define V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT 0x00000009 +#define V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE 0x0000000A +#define V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE 0x0000000B +#define V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE_DX10 0x0000000C +#define V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT_DX10 0x0000000D +#define V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE_DX10 0x0000000E +#define V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE_DX10 0x0000000F +#define V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT 0x00000010 +#define V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC 0x00000011 +#define V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CEIL 0x00000012 +#define V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RNDNE 0x00000013 +#define V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR 0x00000014 +#define V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA 0x00000015 +#define V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_FLOOR 0x00000016 +#define V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT 0x00000018 +#define V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV 0x00000019 +#define V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP 0x0000001A +#define V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGT_UINT 0x0000001E +#define V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGE_UINT 0x0000001F +#define V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETE 0x00000020 +#define V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGT 0x00000021 +#define V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGE 0x00000022 +#define V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE 0x00000023 +#define V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SET_INV 0x00000024 +#define V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SET_POP 0x00000025 +#define V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SET_CLR 0x00000026 +#define V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SET_RESTORE 0x00000027 +#define V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETE_PUSH 0x00000028 +#define V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGT_PUSH 0x00000029 +#define V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGE_PUSH 0x0000002A +#define V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE_PUSH 0x0000002B +#define V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLE 0x0000002C +#define V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT 0x0000002D +#define V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGE 0x0000002E +#define V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLNE 0x0000002F +#define V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_AND_INT 0x00000030 +#define V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_OR_INT 0x00000031 +#define V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_XOR_INT 0x00000032 +#define V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOT_INT 0x00000033 +#define V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD_INT 0x00000034 +#define V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SUB_INT 0x00000035 +#define V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX_INT 0x00000036 +#define V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN_INT 0x00000037 +#define V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX_UINT 0x00000038 +#define V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN_UINT 0x00000039 +#define V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE_INT 0x0000003A +#define V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT_INT 0x0000003B +#define V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE_INT 0x0000003C +#define V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE_INT 0x0000003D +#define V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT_UINT 0x0000003E +#define V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE_UINT 0x0000003F +#define V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT_UINT 0x00000040 +#define V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGE_UINT 0x00000041 +#define V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETE_INT 0x00000042 +#define V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGT_INT 0x00000043 +#define V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGE_INT 0x00000044 +#define V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE_INT 0x00000045 +#define V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLE_INT 0x00000046 +#define V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT_INT 0x00000047 +#define V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGE_INT 0x00000048 +#define V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLNE_INT 0x00000049 +#define V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETE_PUSH_INT 0x0000004A +#define V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGT_PUSH_INT 0x0000004B +#define V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGE_PUSH_INT 0x0000004C +#define V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE_PUSH_INT 0x0000004D +#define V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETLT_PUSH_INT 0x0000004E +#define V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETLE_PUSH_INT 0x0000004F +#define V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4 0x00000050 +#define V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4_IEEE 0x00000051 +#define V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CUBE 0x00000052 +#define V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX4 0x00000053 +#define V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_GPR_INT 0x00000060 +#define V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE 0x00000061 +#define V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_CLAMPED 0x00000062 +#define V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE 0x00000063 +#define V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_CLAMPED 0x00000064 +#define V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_FF 0x00000065 +#define V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE 0x00000066 +#define V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_CLAMPED 0x00000067 +#define V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_FF 0x00000068 +#define V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_IEEE 0x00000069 +#define V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SQRT_IEEE 0x0000006A +#define V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT 0x0000006B +#define V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INT_TO_FLT 0x0000006C +#define V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_UINT_TO_FLT 0x0000006D +#define V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN 0x0000006E +#define V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS 0x0000006F +#define V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ASHR_INT 0x00000070 +#define V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LSHR_INT 0x00000071 +#define V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LSHL_INT 0x00000072 +#define V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULLO_INT 0x00000073 +#define V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULHI_INT 0x00000074 +#define V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULLO_UINT 0x00000075 +#define V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULHI_UINT 0x00000076 +#define V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_INT 0x00000077 +#define V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_UINT 0x00000078 +#define V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_UINT 0x00000079 +#define P_SQ_ALU_WORD1_OP3 +#define S_SQ_ALU_WORD1_OP3_SRC2_SEL(x) (((x) & 0x1FF) << 0) +#define G_SQ_ALU_WORD1_OP3_SRC2_SEL(x) (((x) >> 0) & 0x1FF) +#define C_SQ_ALU_WORD1_OP3_SRC2_SEL 0xFFFFFE00 +#define S_SQ_ALU_WORD1_OP3_SRC2_REL(x) (((x) & 0x1) << 9) +#define G_SQ_ALU_WORD1_OP3_SRC2_REL(x) (((x) >> 9) & 0x1) +#define C_SQ_ALU_WORD1_OP3_SRC2_REL 0xFFFFFDFF +#define S_SQ_ALU_WORD1_OP3_SRC2_CHAN(x) (((x) & 0x3) << 10) +#define G_SQ_ALU_WORD1_OP3_SRC2_CHAN(x) (((x) >> 10) & 0x3) +#define C_SQ_ALU_WORD1_OP3_SRC2_CHAN 0xFFFFF3FF +#define S_SQ_ALU_WORD1_OP3_SRC2_NEG(x) (((x) & 0x1) << 12) +#define G_SQ_ALU_WORD1_OP3_SRC2_NEG(x) (((x) >> 12) & 0x1) +#define C_SQ_ALU_WORD1_OP3_SRC2_NEG 0xFFFFEFFF +#define S_SQ_ALU_WORD1_OP3_ALU_INST(x) (((x) & 0x1F) << 13) +#define G_SQ_ALU_WORD1_OP3_ALU_INST(x) (((x) >> 13) & 0x1F) +#define C_SQ_ALU_WORD1_OP3_ALU_INST 0xFFFC1FFF +#define V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MUL_LIT 0x0000000C +#define V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MUL_LIT_M2 0x0000000D +#define V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MUL_LIT_M4 0x0000000E +#define V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MUL_LIT_D2 0x0000000F +#define V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD 0x00000010 +#define V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD_M2 0x00000011 +#define V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD_M4 0x00000012 +#define V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD_D2 0x00000013 +#define V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD_IEEE 0x00000014 +#define V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD_IEEE_M2 0x00000015 +#define V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD_IEEE_M4 0x00000016 +#define V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD_IEEE_D2 0x00000017 +#define V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDE 0x00000018 +#define V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGT 0x00000019 +#define V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGE 0x0000001A +#define V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDE_INT 0x0000001C +#define V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGT_INT 0x0000001D +#define V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGE_INT 0x0000001E +#define P_SQ_VTX_WORD0 +#define S_SQ_VTX_WORD0_VTX_INST(x) (((x) & 0x1F) << 0) +#define G_SQ_VTX_WORD0_VTX_INST(x) (((x) >> 0) & 0x1F) +#define C_SQ_VTX_WORD0_VTX_INST 0xFFFFFFE0 +#define S_SQ_VTX_WORD0_FETCH_TYPE(x) (((x) & 0x3) << 5) +#define G_SQ_VTX_WORD0_FETCH_TYPE(x) (((x) >> 5) & 0x3) +#define C_SQ_VTX_WORD0_FETCH_TYPE 0xFFFFFF9F +#define S_SQ_VTX_WORD0_FETCH_WHOLE_QUAD(x) (((x) & 0x1) << 7) +#define G_SQ_VTX_WORD0_FETCH_WHOLE_QUAD(x) (((x) >> 7) & 0x1) +#define C_SQ_VTX_WORD0_FETCH_WHOLE_QUAD 0xFFFFFF7F +#define S_SQ_VTX_WORD0_BUFFER_ID(x) (((x) & 0xFF) << 8) +#define G_SQ_VTX_WORD0_BUFFER_ID(x) (((x) >> 8) & 0xFF) +#define C_SQ_VTX_WORD0_BUFFER_ID 0xFFFF00FF +#define S_SQ_VTX_WORD0_SRC_GPR(x) (((x) & 0x7F) << 16) +#define G_SQ_VTX_WORD0_SRC_GPR(x) (((x) >> 16) & 0x7F) +#define C_SQ_VTX_WORD0_SRC_GPR 0xFF80FFFF +#define S_SQ_VTX_WORD0_SRC_REL(x) (((x) & 0x1) << 23) +#define G_SQ_VTX_WORD0_SRC_REL(x) (((x) >> 23) & 0x1) +#define C_SQ_VTX_WORD0_SRC_REL 0xFF7FFFFF +#define S_SQ_VTX_WORD0_SRC_SEL_X(x) (((x) & 0x3) << 24) +#define G_SQ_VTX_WORD0_SRC_SEL_X(x) (((x) >> 24) & 0x3) +#define C_SQ_VTX_WORD0_SRC_SEL_X 0xFCFFFFFF +#define S_SQ_VTX_WORD0_MEGA_FETCH_COUNT(x) (((x) & 0x3F) << 26) +#define G_SQ_VTX_WORD0_MEGA_FETCH_COUNT(x) (((x) >> 26) & 0x3F) +#define C_SQ_VTX_WORD0_MEGA_FETCH_COUNT 0x03FFFFFF +#define P_SQ_VTX_WORD1 +#define S_SQ_VTX_WORD1_DST_SEL_X(x) (((x) & 0x7) << 9) +#define G_SQ_VTX_WORD1_DST_SEL_X(x) (((x) >> 9) & 0x7) +#define C_SQ_VTX_WORD1_DST_SEL_X 0xFFFFF1FF +#define S_SQ_VTX_WORD1_DST_SEL_Y(x) (((x) & 0x7) << 12) +#define G_SQ_VTX_WORD1_DST_SEL_Y(x) (((x) >> 12) & 0x7) +#define C_SQ_VTX_WORD1_DST_SEL_Y 0xFFFF8FFF +#define S_SQ_VTX_WORD1_DST_SEL_Z(x) (((x) & 0x7) << 15) +#define G_SQ_VTX_WORD1_DST_SEL_Z(x) (((x) >> 15) & 0x7) +#define C_SQ_VTX_WORD1_DST_SEL_Z 0xFFFC7FFF +#define S_SQ_VTX_WORD1_DST_SEL_W(x) (((x) & 0x7) << 18) +#define G_SQ_VTX_WORD1_DST_SEL_W(x) (((x) >> 18) & 0x7) +#define C_SQ_VTX_WORD1_DST_SEL_W 0xFFE3FFFF +#define S_SQ_VTX_WORD1_USE_CONST_FIELDS(x) (((x) & 0x1) << 21) +#define G_SQ_VTX_WORD1_USE_CONST_FIELDS(x) (((x) >> 21) & 0x1) +#define C_SQ_VTX_WORD1_USE_CONST_FIELDS 0xFFDFFFFF +#define S_SQ_VTX_WORD1_DATA_FORMAT(x) (((x) & 0x3F) << 22) +#define G_SQ_VTX_WORD1_DATA_FORMAT(x) (((x) >> 22) & 0x3F) +#define C_SQ_VTX_WORD1_DATA_FORMAT 0xF03FFFFF +#define S_SQ_VTX_WORD1_NUM_FORMAT_ALL(x) (((x) & 0x3) << 28) +#define G_SQ_VTX_WORD1_NUM_FORMAT_ALL(x) (((x) >> 28) & 0x3) +#define C_SQ_VTX_WORD1_NUM_FORMAT_ALL 0xCFFFFFFF +#define S_SQ_VTX_WORD1_FORMAT_COMP_ALL(x) (((x) & 0x1) << 30) +#define G_SQ_VTX_WORD1_FORMAT_COMP_ALL(x) (((x) >> 30) & 0x1) +#define C_SQ_VTX_WORD1_FORMAT_COMP_ALL 0xBFFFFFFF +#define S_SQ_VTX_WORD1_SRF_MODE_ALL(x) (((x) & 0x1) << 31) +#define G_SQ_VTX_WORD1_SRF_MODE_ALL(x) (((x) >> 31) & 0x1) +#define C_SQ_VTX_WORD1_SRF_MODE_ALL 0x7FFFFFFF +#define P_SQ_VTX_WORD1_GPR +#define S_SQ_VTX_WORD1_GPR_DST_GPR(x) (((x) & 0x7F) << 0) +#define G_SQ_VTX_WORD1_GPR_DST_GPR(x) (((x) >> 0) & 0x7F) +#define C_SQ_VTX_WORD1_GPR_DST_GPR 0xFFFFFF80 +#define S_SQ_VTX_WORD1_GPR_DST_REL(x) (((x) & 0x1) << 7) +#define G_SQ_VTX_WORD1_GPR_DST_REL(x) (((x) >> 7) & 0x1) +#define C_SQ_VTX_WORD1_GPR_DST_REL 0xFFFFFF7F +#define P_SQ_VTX_WORD1_SEM +#define S_SQ_VTX_WORD1_SEM_SEMANTIC_ID(x) (((x) & 0xFF) << 0) +#define G_SQ_VTX_WORD1_SEM_SEMANTIC_ID(x) (((x) >> 0) & 0xFF) +#define C_SQ_VTX_WORD1_SEM_SEMANTIC_ID 0xFFFFFF00 +#define P_SQ_VTX_WORD2 +#define S_SQ_VTX_WORD2_OFFSET(x) (((x) & 0xFFFF) << 0) +#define G_SQ_VTX_WORD2_OFFSET(x) (((x) >> 0) & 0xFFFF) +#define C_SQ_VTX_WORD2_OFFSET 0xFFFF0000 +#define S_SQ_VTX_WORD2_ENDIAN_SWAP(x) (((x) & 0x3) << 16) +#define G_SQ_VTX_WORD2_ENDIAN_SWAP(x) (((x) >> 16) & 0x3) +#define C_SQ_VTX_WORD2_ENDIAN_SWAP 0xFFFCFFFF +#define S_SQ_VTX_WORD2_CONST_BUF_NO_STRIDE(x) (((x) & 0x1) << 18) +#define G_SQ_VTX_WORD2_CONST_BUF_NO_STRIDE(x) (((x) >> 18) & 0x1) +#define C_SQ_VTX_WORD2_CONST_BUF_NO_STRIDE 0xFFFBFFFF +#define S_SQ_VTX_WORD2_MEGA_FETCH(x) (((x) & 0x1) << 19) +#define G_SQ_VTX_WORD2_MEGA_FETCH(x) (((x) >> 19) & 0x1) +#define C_SQ_VTX_WORD2_MEGA_FETCH 0xFFF7FFFF +#define S_SQ_VTX_WORD2_ALT_CONST(x) (((x) & 0x1) << 20) +#define G_SQ_VTX_WORD2_ALT_CONST(x) (((x) >> 20) & 0x1) +#define C_SQ_VTX_WORD2_ALT_CONST 0xFFEFFFFF +#define P_SQ_TEX_WORD0 +#define S_SQ_TEX_WORD0_TEX_INST(x) (((x) & 0x1F) << 0) +#define G_SQ_TEX_WORD0_TEX_INST(x) (((x) >> 0) & 0x1F) +#define C_SQ_TEX_WORD0_TEX_INST 0xFFFFFFE0 +#define S_SQ_TEX_WORD0_BC_FRAC_MODE(x) (((x) & 0x1) << 5) +#define G_SQ_TEX_WORD0_BC_FRAC_MODE(x) (((x) >> 5) & 0x1) +#define C_SQ_TEX_WORD0_BC_FRAC_MODE 0xFFFFFFDF +#define S_SQ_TEX_WORD0_FETCH_WHOLE_QUAD(x) (((x) & 0x1) << 7) +#define G_SQ_TEX_WORD0_FETCH_WHOLE_QUAD(x) (((x) >> 7) & 0x1) +#define C_SQ_TEX_WORD0_FETCH_WHOLE_QUAD 0xFFFFFF7F +#define S_SQ_TEX_WORD0_RESOURCE_ID(x) (((x) & 0xFF) << 8) +#define G_SQ_TEX_WORD0_RESOURCE_ID(x) (((x) >> 8) & 0xFF) +#define C_SQ_TEX_WORD0_RESOURCE_ID 0xFFFF00FF +#define S_SQ_TEX_WORD0_SRC_GPR(x) (((x) & 0x7F) << 16) +#define G_SQ_TEX_WORD0_SRC_GPR(x) (((x) >> 16) & 0x7F) +#define C_SQ_TEX_WORD0_SRC_GPR 0xFF80FFFF +#define S_SQ_TEX_WORD0_SRC_REL(x) (((x) & 0x1) << 23) +#define G_SQ_TEX_WORD0_SRC_REL(x) (((x) >> 23) & 0x1) +#define C_SQ_TEX_WORD0_SRC_REL 0xFF7FFFFF +#define S_SQ_TEX_WORD0_ALT_CONST(x) (((x) & 0x1) << 24) +#define G_SQ_TEX_WORD0_ALT_CONST(x) (((x) >> 24) & 0x1) +#define C_SQ_TEX_WORD0_ALT_CONST 0xFEFFFFFF +#define P_SQ_TEX_WORD1 +#define S_SQ_TEX_WORD1_DST_GPR(x) (((x) & 0x7F) << 0) +#define G_SQ_TEX_WORD1_DST_GPR(x) (((x) >> 0) & 0x7F) +#define C_SQ_TEX_WORD1_DST_GPR 0xFFFFFF80 +#define S_SQ_TEX_WORD1_DST_REL(x) (((x) & 0x1) << 7) +#define G_SQ_TEX_WORD1_DST_REL(x) (((x) >> 7) & 0x1) +#define C_SQ_TEX_WORD1_DST_REL 0xFFFFFF7F +#define S_SQ_TEX_WORD1_DST_SEL_X(x) (((x) & 0x7) << 9) +#define G_SQ_TEX_WORD1_DST_SEL_X(x) (((x) >> 9) & 0x7) +#define C_SQ_TEX_WORD1_DST_SEL_X 0xFFFFF1FF +#define S_SQ_TEX_WORD1_DST_SEL_Y(x) (((x) & 0x7) << 12) +#define G_SQ_TEX_WORD1_DST_SEL_Y(x) (((x) >> 12) & 0x7) +#define C_SQ_TEX_WORD1_DST_SEL_Y 0xFFFF8FFF +#define S_SQ_TEX_WORD1_DST_SEL_Z(x) (((x) & 0x7) << 15) +#define G_SQ_TEX_WORD1_DST_SEL_Z(x) (((x) >> 15) & 0x7) +#define C_SQ_TEX_WORD1_DST_SEL_Z 0xFFFC7FFF +#define S_SQ_TEX_WORD1_DST_SEL_W(x) (((x) & 0x7) << 18) +#define G_SQ_TEX_WORD1_DST_SEL_W(x) (((x) >> 18) & 0x7) +#define C_SQ_TEX_WORD1_DST_SEL_W 0xFFE3FFFF +#define S_SQ_TEX_WORD1_LOD_BIAS(x) (((x) & 0x7F) << 21) +#define G_SQ_TEX_WORD1_LOD_BIAS(x) (((x) >> 21) & 0x7F) +#define C_SQ_TEX_WORD1_LOD_BIAS 0xF01FFFFF +#define S_SQ_TEX_WORD1_COORD_TYPE_X(x) (((x) & 0x1) << 28) +#define G_SQ_TEX_WORD1_COORD_TYPE_X(x) (((x) >> 28) & 0x1) +#define C_SQ_TEX_WORD1_COORD_TYPE_X 0xEFFFFFFF +#define S_SQ_TEX_WORD1_COORD_TYPE_Y(x) (((x) & 0x1) << 29) +#define G_SQ_TEX_WORD1_COORD_TYPE_Y(x) (((x) >> 29) & 0x1) +#define C_SQ_TEX_WORD1_COORD_TYPE_Y 0xDFFFFFFF +#define S_SQ_TEX_WORD1_COORD_TYPE_Z(x) (((x) & 0x1) << 30) +#define G_SQ_TEX_WORD1_COORD_TYPE_Z(x) (((x) >> 30) & 0x1) +#define C_SQ_TEX_WORD1_COORD_TYPE_Z 0xBFFFFFFF +#define S_SQ_TEX_WORD1_COORD_TYPE_W(x) (((x) & 0x1) << 31) +#define G_SQ_TEX_WORD1_COORD_TYPE_W(x) (((x) >> 31) & 0x1) +#define C_SQ_TEX_WORD1_COORD_TYPE_W 0x7FFFFFFF +#define P_SQ_TEX_WORD2 +#define S_SQ_TEX_WORD2_OFFSET_X(x) (((x) & 0x1F) << 0) +#define G_SQ_TEX_WORD2_OFFSET_X(x) (((x) >> 0) & 0x1F) +#define C_SQ_TEX_WORD2_OFFSET_X 0xFFFFFFE0 +#define S_SQ_TEX_WORD2_OFFSET_Y(x) (((x) & 0x1F) << 5) +#define G_SQ_TEX_WORD2_OFFSET_Y(x) (((x) >> 5) & 0x1F) +#define C_SQ_TEX_WORD2_OFFSET_Y 0xFFFFFC1F +#define S_SQ_TEX_WORD2_OFFSET_Z(x) (((x) & 0x1F) << 10) +#define G_SQ_TEX_WORD2_OFFSET_Z(x) (((x) >> 10) & 0x1F) +#define C_SQ_TEX_WORD2_OFFSET_Z 0xFFFF83FF +#define S_SQ_TEX_WORD2_SAMPLER_ID(x) (((x) & 0x1F) << 15) +#define G_SQ_TEX_WORD2_SAMPLER_ID(x) (((x) >> 15) & 0x1F) +#define C_SQ_TEX_WORD2_SAMPLER_ID 0xFFF07FFF +#define S_SQ_TEX_WORD2_SRC_SEL_X(x) (((x) & 0x7) << 20) +#define G_SQ_TEX_WORD2_SRC_SEL_X(x) (((x) >> 20) & 0x7) +#define C_SQ_TEX_WORD2_SRC_SEL_X 0xFF8FFFFF +#define S_SQ_TEX_WORD2_SRC_SEL_Y(x) (((x) & 0x7) << 23) +#define G_SQ_TEX_WORD2_SRC_SEL_Y(x) (((x) >> 23) & 0x7) +#define C_SQ_TEX_WORD2_SRC_SEL_Y 0xFC7FFFFF +#define S_SQ_TEX_WORD2_SRC_SEL_Z(x) (((x) & 0x7) << 26) +#define G_SQ_TEX_WORD2_SRC_SEL_Z(x) (((x) >> 26) & 0x7) +#define C_SQ_TEX_WORD2_SRC_SEL_Z 0xE3FFFFFF +#define S_SQ_TEX_WORD2_SRC_SEL_W(x) (((x) & 0x7) << 29) +#define G_SQ_TEX_WORD2_SRC_SEL_W(x) (((x) >> 29) & 0x7) +#define C_SQ_TEX_WORD2_SRC_SEL_W 0x1FFFFFFF +#define P_SQ_ALU_WORD1_OP2_V2 +#define S_SQ_ALU_WORD1_OP2_V2_SRC0_ABS(x) (((x) & 0x1) << 0) +#define G_SQ_ALU_WORD1_OP2_V2_SRC0_ABS(x) (((x) >> 0) & 0x1) +#define C_SQ_ALU_WORD1_OP2_V2_SRC0_ABS 0xFFFFFFFE +#define S_SQ_ALU_WORD1_OP2_V2_SRC1_ABS(x) (((x) & 0x1) << 1) +#define G_SQ_ALU_WORD1_OP2_V2_SRC1_ABS(x) (((x) >> 1) & 0x1) +#define C_SQ_ALU_WORD1_OP2_V2_SRC1_ABS 0xFFFFFFFD +#define S_SQ_ALU_WORD1_OP2_V2_UPDATE_EXECUTE_MASK(x) (((x) & 0x1) << 2) +#define G_SQ_ALU_WORD1_OP2_V2_UPDATE_EXECUTE_MASK(x) (((x) >> 2) & 0x1) +#define C_SQ_ALU_WORD1_OP2_V2_UPDATE_EXECUTE_MASK 0xFFFFFFFB +#define S_SQ_ALU_WORD1_OP2_V2_UPDATE_PRED(x) (((x) & 0x1) << 3) +#define G_SQ_ALU_WORD1_OP2_V2_UPDATE_PRED(x) (((x) >> 3) & 0x1) +#define C_SQ_ALU_WORD1_OP2_V2_UPDATE_PRED 0xFFFFFFF7 +#define S_SQ_ALU_WORD1_OP2_V2_WRITE_MASK(x) (((x) & 0x1) << 4) +#define G_SQ_ALU_WORD1_OP2_V2_WRITE_MASK(x) (((x) >> 4) & 0x1) +#define C_SQ_ALU_WORD1_OP2_V2_WRITE_MASK 0xFFFFFFEF +#define S_SQ_ALU_WORD1_OP2_V2_OMOD(x) (((x) & 0x3) << 5) +#define G_SQ_ALU_WORD1_OP2_V2_OMOD(x) (((x) >> 5) & 0x3) +#define C_SQ_ALU_WORD1_OP2_V2_OMOD 0xFFFFFF9F +#define S_SQ_ALU_WORD1_OP2_V2_ALU_INST(x) (((x) & 0x7FF) << 7) +#define G_SQ_ALU_WORD1_OP2_V2_ALU_INST(x) (((x) >> 7) & 0x7FF) +#define C_SQ_ALU_WORD1_OP2_V2_ALU_INST 0xFFFC007F + +#endif diff --git a/src/gallium/drivers/r600/radeon.h b/src/gallium/drivers/r600/radeon.h new file mode 100644 index 00000000000..ec94b112d69 --- /dev/null +++ b/src/gallium/drivers/r600/radeon.h @@ -0,0 +1,602 @@ +/* + * Copyright © 2009 Jerome Glisse <[email protected]> + * + * This file is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. + */ +#ifndef RADEON_H +#define RADEON_H + +#define RADEON_CTX_MAX_PM4 (64 * 1024 / 4) + +#include <stdint.h> + +typedef uint64_t u64; +typedef uint32_t u32; +typedef uint16_t u16; +typedef uint8_t u8; + +struct radeon; + +struct pipe_screen *radeon_create_screen(struct radeon *rw); + +enum radeon_family { + CHIP_UNKNOWN, + CHIP_R100, + CHIP_RV100, + CHIP_RS100, + CHIP_RV200, + CHIP_RS200, + CHIP_R200, + CHIP_RV250, + CHIP_RS300, + CHIP_RV280, + CHIP_R300, + CHIP_R350, + CHIP_RV350, + CHIP_RV380, + CHIP_R420, + CHIP_R423, + CHIP_RV410, + CHIP_RS400, + CHIP_RS480, + CHIP_RS600, + CHIP_RS690, + CHIP_RS740, + CHIP_RV515, + CHIP_R520, + CHIP_RV530, + CHIP_RV560, + CHIP_RV570, + CHIP_R580, + CHIP_R600, + CHIP_RV610, + CHIP_RV630, + CHIP_RV670, + CHIP_RV620, + CHIP_RV635, + CHIP_RS780, + CHIP_RS880, + CHIP_RV770, + CHIP_RV730, + CHIP_RV710, + CHIP_RV740, + CHIP_CEDAR, + CHIP_REDWOOD, + CHIP_JUNIPER, + CHIP_CYPRESS, + CHIP_HEMLOCK, + CHIP_LAST, +}; + +/* + * radeon object functions + */ +struct radeon_bo { + unsigned refcount; + unsigned handle; + unsigned size; + unsigned alignment; + unsigned map_count; + void *data; +}; +struct radeon_bo *radeon_bo(struct radeon *radeon, unsigned handle, + unsigned size, unsigned alignment, void *ptr); +int radeon_bo_map(struct radeon *radeon, struct radeon_bo *bo); +void radeon_bo_unmap(struct radeon *radeon, struct radeon_bo *bo); +struct radeon_bo *radeon_bo_incref(struct radeon *radeon, struct radeon_bo *bo); +struct radeon_bo *radeon_bo_decref(struct radeon *radeon, struct radeon_bo *bo); +int radeon_bo_wait(struct radeon *radeon, struct radeon_bo *bo); + +/* + * states functions + */ +struct radeon_state { + struct radeon *radeon; + unsigned refcount; + unsigned type; + unsigned id; + unsigned nstates; + u32 *states; + unsigned npm4; + unsigned cpm4; + u32 pm4_crc; + u32 *pm4; + u32 nimmd; + u32 *immd; + unsigned nbo; + struct radeon_bo *bo[4]; + unsigned nreloc; + unsigned reloc_pm4_id[8]; + unsigned reloc_bo_id[8]; + u32 placement[8]; + unsigned bo_dirty[4]; +}; + +struct radeon_state *radeon_state(struct radeon *radeon, u32 type, u32 id); +struct radeon_state *radeon_state_incref(struct radeon_state *state); +struct radeon_state *radeon_state_decref(struct radeon_state *state); +int radeon_state_pm4(struct radeon_state *state); + +/* + * draw functions + */ +struct radeon_draw { + unsigned refcount; + struct radeon *radeon; + unsigned nstate; + struct radeon_state **state; + unsigned cpm4; +}; + +struct radeon_draw *radeon_draw(struct radeon *radeon); +struct radeon_draw *radeon_draw_duplicate(struct radeon_draw *draw); +struct radeon_draw *radeon_draw_incref(struct radeon_draw *draw); +struct radeon_draw *radeon_draw_decref(struct radeon_draw *draw); +int radeon_draw_set(struct radeon_draw *draw, struct radeon_state *state); +int radeon_draw_set_new(struct radeon_draw *draw, struct radeon_state *state); +int radeon_draw_check(struct radeon_draw *draw); + +struct radeon_ctx *radeon_ctx(struct radeon *radeon); +struct radeon_ctx *radeon_ctx_decref(struct radeon_ctx *ctx); +struct radeon_ctx *radeon_ctx_incref(struct radeon_ctx *ctx); +int radeon_ctx_set_draw(struct radeon_ctx *ctx, struct radeon_draw *draw); +int radeon_ctx_set_draw_new(struct radeon_ctx *ctx, struct radeon_draw *draw); +int radeon_ctx_pm4(struct radeon_ctx *ctx); +int radeon_ctx_submit(struct radeon_ctx *ctx); +void radeon_ctx_dump_bof(struct radeon_ctx *ctx, const char *file); + +/* + * R600/R700 + */ + +#define R600_NSTATE 1273 +#define R600_NTYPE 25 + +#define R600_CONFIG 0 +#define R600_CONFIG_TYPE 0 +#define R600_CB_CNTL 1 +#define R600_CB_CNTL_TYPE 1 +#define R600_RASTERIZER 2 +#define R600_RASTERIZER_TYPE 2 +#define R600_VIEWPORT 3 +#define R600_VIEWPORT_TYPE 3 +#define R600_SCISSOR 4 +#define R600_SCISSOR_TYPE 4 +#define R600_BLEND 5 +#define R600_BLEND_TYPE 5 +#define R600_DSA 6 +#define R600_DSA_TYPE 6 +#define R600_VS_SHADER 7 +#define R600_VS_SHADER_TYPE 7 +#define R600_PS_SHADER 8 +#define R600_PS_SHADER_TYPE 8 +#define R600_PS_CONSTANT 9 +#define R600_PS_CONSTANT_TYPE 9 +#define R600_VS_CONSTANT 265 +#define R600_VS_CONSTANT_TYPE 10 +#define R600_PS_RESOURCE 521 +#define R600_PS_RESOURCE_TYPE 11 +#define R600_VS_RESOURCE 681 +#define R600_VS_RESOURCE_TYPE 12 +#define R600_FS_RESOURCE 841 +#define R600_FS_RESOURCE_TYPE 13 +#define R600_GS_RESOURCE 1001 +#define R600_GS_RESOURCE_TYPE 14 +#define R600_PS_SAMPLER 1161 +#define R600_PS_SAMPLER_TYPE 15 +#define R600_VS_SAMPLER 1179 +#define R600_VS_SAMPLER_TYPE 16 +#define R600_GS_SAMPLER 1197 +#define R600_GS_SAMPLER_TYPE 17 +#define R600_PS_SAMPLER_BORDER 1215 +#define R600_PS_SAMPLER_BORDER_TYPE 18 +#define R600_VS_SAMPLER_BORDER 1233 +#define R600_VS_SAMPLER_BORDER_TYPE 19 +#define R600_GS_SAMPLER_BORDER 1251 +#define R600_GS_SAMPLER_BORDER_TYPE 20 +#define R600_CB0 1269 +#define R600_CB0_TYPE 21 +#define R600_DB 1270 +#define R600_DB_TYPE 22 +#define R600_VGT 1271 +#define R600_VGT_TYPE 23 +#define R600_DRAW 1272 +#define R600_DRAW_TYPE 24 +/* R600_CONFIG */ +#define R600_CONFIG__SQ_CONFIG 0 +#define R600_CONFIG__SQ_GPR_RESOURCE_MGMT_1 1 +#define R600_CONFIG__SQ_GPR_RESOURCE_MGMT_2 2 +#define R600_CONFIG__SQ_THREAD_RESOURCE_MGMT 3 +#define R600_CONFIG__SQ_STACK_RESOURCE_MGMT_1 4 +#define R600_CONFIG__SQ_STACK_RESOURCE_MGMT_2 5 +#define R600_CONFIG__SQ_DYN_GPR_CNTL_PS_FLUSH_REQ 6 +#define R600_CONFIG__TA_CNTL_AUX 7 +#define R600_CONFIG__VC_ENHANCE 8 +#define R600_CONFIG__DB_DEBUG 9 +#define R600_CONFIG__DB_WATERMARKS 10 +#define R600_CONFIG__SX_MISC 11 +#define R600_CONFIG__SPI_THREAD_GROUPING 12 +#define R600_CONFIG__CB_SHADER_CONTROL 13 +#define R600_CONFIG__SQ_ESGS_RING_ITEMSIZE 14 +#define R600_CONFIG__SQ_GSVS_RING_ITEMSIZE 15 +#define R600_CONFIG__SQ_ESTMP_RING_ITEMSIZE 16 +#define R600_CONFIG__SQ_GSTMP_RING_ITEMSIZE 17 +#define R600_CONFIG__SQ_VSTMP_RING_ITEMSIZE 18 +#define R600_CONFIG__SQ_PSTMP_RING_ITEMSIZE 19 +#define R600_CONFIG__SQ_FBUF_RING_ITEMSIZE 20 +#define R600_CONFIG__SQ_REDUC_RING_ITEMSIZE 21 +#define R600_CONFIG__SQ_GS_VERT_ITEMSIZE 22 +#define R600_CONFIG__VGT_OUTPUT_PATH_CNTL 23 +#define R600_CONFIG__VGT_HOS_CNTL 24 +#define R600_CONFIG__VGT_HOS_MAX_TESS_LEVEL 25 +#define R600_CONFIG__VGT_HOS_MIN_TESS_LEVEL 26 +#define R600_CONFIG__VGT_HOS_REUSE_DEPTH 27 +#define R600_CONFIG__VGT_GROUP_PRIM_TYPE 28 +#define R600_CONFIG__VGT_GROUP_FIRST_DECR 29 +#define R600_CONFIG__VGT_GROUP_DECR 30 +#define R600_CONFIG__VGT_GROUP_VECT_0_CNTL 31 +#define R600_CONFIG__VGT_GROUP_VECT_1_CNTL 32 +#define R600_CONFIG__VGT_GROUP_VECT_0_FMT_CNTL 33 +#define R600_CONFIG__VGT_GROUP_VECT_1_FMT_CNTL 34 +#define R600_CONFIG__VGT_GS_MODE 35 +#define R600_CONFIG__PA_SC_MODE_CNTL 36 +#define R600_CONFIG__VGT_STRMOUT_EN 37 +#define R600_CONFIG__VGT_REUSE_OFF 38 +#define R600_CONFIG__VGT_VTX_CNT_EN 39 +#define R600_CONFIG__VGT_STRMOUT_BUFFER_EN 40 +#define R600_CONFIG_SIZE 41 +#define R600_CONFIG_PM4 128 +/* R600_CB_CNTL */ +#define R600_CB_CNTL__CB_CLEAR_RED 0 +#define R600_CB_CNTL__CB_CLEAR_GREEN 1 +#define R600_CB_CNTL__CB_CLEAR_BLUE 2 +#define R600_CB_CNTL__CB_CLEAR_ALPHA 3 +#define R600_CB_CNTL__CB_SHADER_MASK 4 +#define R600_CB_CNTL__CB_TARGET_MASK 5 +#define R600_CB_CNTL__CB_FOG_RED 6 +#define R600_CB_CNTL__CB_FOG_GREEN 7 +#define R600_CB_CNTL__CB_FOG_BLUE 8 +#define R600_CB_CNTL__CB_COLOR_CONTROL 9 +#define R600_CB_CNTL__PA_SC_AA_CONFIG 10 +#define R600_CB_CNTL__PA_SC_AA_SAMPLE_LOCS_MCTX 11 +#define R600_CB_CNTL__PA_SC_AA_SAMPLE_LOCS_8S_WD1_MCTX 12 +#define R600_CB_CNTL__CB_CLRCMP_CONTROL 13 +#define R600_CB_CNTL__CB_CLRCMP_SRC 14 +#define R600_CB_CNTL__CB_CLRCMP_DST 15 +#define R600_CB_CNTL__CB_CLRCMP_MSK 16 +#define R600_CB_CNTL__PA_SC_AA_MASK 17 +#define R600_CB_CNTL_SIZE 18 +#define R600_CB_CNTL_PM4 128 +/* R600_RASTERIZER */ +#define R600_RASTERIZER__SPI_INTERP_CONTROL_0 0 +#define R600_RASTERIZER__PA_CL_CLIP_CNTL 1 +#define R600_RASTERIZER__PA_SU_SC_MODE_CNTL 2 +#define R600_RASTERIZER__PA_CL_VS_OUT_CNTL 3 +#define R600_RASTERIZER__PA_CL_NANINF_CNTL 4 +#define R600_RASTERIZER__PA_SU_POINT_SIZE 5 +#define R600_RASTERIZER__PA_SU_POINT_MINMAX 6 +#define R600_RASTERIZER__PA_SU_LINE_CNTL 7 +#define R600_RASTERIZER__PA_SC_LINE_STIPPLE 8 +#define R600_RASTERIZER__PA_SC_MPASS_PS_CNTL 9 +#define R600_RASTERIZER__PA_SC_LINE_CNTL 10 +#define R600_RASTERIZER__PA_CL_GB_VERT_CLIP_ADJ 11 +#define R600_RASTERIZER__PA_CL_GB_VERT_DISC_ADJ 12 +#define R600_RASTERIZER__PA_CL_GB_HORZ_CLIP_ADJ 13 +#define R600_RASTERIZER__PA_CL_GB_HORZ_DISC_ADJ 14 +#define R600_RASTERIZER__PA_SU_POLY_OFFSET_DB_FMT_CNTL 15 +#define R600_RASTERIZER__PA_SU_POLY_OFFSET_CLAMP 16 +#define R600_RASTERIZER__PA_SU_POLY_OFFSET_FRONT_SCALE 17 +#define R600_RASTERIZER__PA_SU_POLY_OFFSET_FRONT_OFFSET 18 +#define R600_RASTERIZER__PA_SU_POLY_OFFSET_BACK_SCALE 19 +#define R600_RASTERIZER__PA_SU_POLY_OFFSET_BACK_OFFSET 20 +#define R600_RASTERIZER_SIZE 21 +#define R600_RASTERIZER_PM4 128 +/* R600_VIEWPORT */ +#define R600_VIEWPORT__PA_SC_VPORT_ZMIN_0 0 +#define R600_VIEWPORT__PA_SC_VPORT_ZMAX_0 1 +#define R600_VIEWPORT__PA_CL_VPORT_XSCALE_0 2 +#define R600_VIEWPORT__PA_CL_VPORT_YSCALE_0 3 +#define R600_VIEWPORT__PA_CL_VPORT_ZSCALE_0 4 +#define R600_VIEWPORT__PA_CL_VPORT_XOFFSET_0 5 +#define R600_VIEWPORT__PA_CL_VPORT_YOFFSET_0 6 +#define R600_VIEWPORT__PA_CL_VPORT_ZOFFSET_0 7 +#define R600_VIEWPORT__PA_CL_VTE_CNTL 8 +#define R600_VIEWPORT_SIZE 9 +#define R600_VIEWPORT_PM4 128 +/* R600_SCISSOR */ +#define R600_SCISSOR__PA_SC_SCREEN_SCISSOR_TL 0 +#define R600_SCISSOR__PA_SC_SCREEN_SCISSOR_BR 1 +#define R600_SCISSOR__PA_SC_WINDOW_OFFSET 2 +#define R600_SCISSOR__PA_SC_WINDOW_SCISSOR_TL 3 +#define R600_SCISSOR__PA_SC_WINDOW_SCISSOR_BR 4 +#define R600_SCISSOR__PA_SC_CLIPRECT_RULE 5 +#define R600_SCISSOR__PA_SC_CLIPRECT_0_TL 6 +#define R600_SCISSOR__PA_SC_CLIPRECT_0_BR 7 +#define R600_SCISSOR__PA_SC_CLIPRECT_1_TL 8 +#define R600_SCISSOR__PA_SC_CLIPRECT_1_BR 9 +#define R600_SCISSOR__PA_SC_CLIPRECT_2_TL 10 +#define R600_SCISSOR__PA_SC_CLIPRECT_2_BR 11 +#define R600_SCISSOR__PA_SC_CLIPRECT_3_TL 12 +#define R600_SCISSOR__PA_SC_CLIPRECT_3_BR 13 +#define R600_SCISSOR__PA_SC_EDGERULE 14 +#define R600_SCISSOR__PA_SC_GENERIC_SCISSOR_TL 15 +#define R600_SCISSOR__PA_SC_GENERIC_SCISSOR_BR 16 +#define R600_SCISSOR__PA_SC_VPORT_SCISSOR_0_TL 17 +#define R600_SCISSOR__PA_SC_VPORT_SCISSOR_0_BR 18 +#define R600_SCISSOR_SIZE 19 +#define R600_SCISSOR_PM4 128 +/* R600_BLEND */ +#define R600_BLEND__CB_BLEND_RED 0 +#define R600_BLEND__CB_BLEND_GREEN 1 +#define R600_BLEND__CB_BLEND_BLUE 2 +#define R600_BLEND__CB_BLEND_ALPHA 3 +#define R600_BLEND__CB_BLEND0_CONTROL 4 +#define R600_BLEND__CB_BLEND1_CONTROL 5 +#define R600_BLEND__CB_BLEND2_CONTROL 6 +#define R600_BLEND__CB_BLEND3_CONTROL 7 +#define R600_BLEND__CB_BLEND4_CONTROL 8 +#define R600_BLEND__CB_BLEND5_CONTROL 9 +#define R600_BLEND__CB_BLEND6_CONTROL 10 +#define R600_BLEND__CB_BLEND7_CONTROL 11 +#define R600_BLEND__CB_BLEND_CONTROL 12 +#define R600_BLEND_SIZE 13 +#define R600_BLEND_PM4 128 +/* R600_DSA */ +#define R600_DSA__DB_STENCIL_CLEAR 0 +#define R600_DSA__DB_DEPTH_CLEAR 1 +#define R600_DSA__SX_ALPHA_TEST_CONTROL 2 +#define R600_DSA__DB_STENCILREFMASK 3 +#define R600_DSA__DB_STENCILREFMASK_BF 4 +#define R600_DSA__SX_ALPHA_REF 5 +#define R600_DSA__SPI_FOG_FUNC_SCALE 6 +#define R600_DSA__SPI_FOG_FUNC_BIAS 7 +#define R600_DSA__SPI_FOG_CNTL 8 +#define R600_DSA__DB_DEPTH_CONTROL 9 +#define R600_DSA__DB_SHADER_CONTROL 10 +#define R600_DSA__DB_RENDER_CONTROL 11 +#define R600_DSA__DB_RENDER_OVERRIDE 12 +#define R600_DSA__DB_SRESULTS_COMPARE_STATE1 13 +#define R600_DSA__DB_PRELOAD_CONTROL 14 +#define R600_DSA__DB_ALPHA_TO_MASK 15 +#define R600_DSA_SIZE 16 +#define R600_DSA_PM4 128 +/* R600_VS_SHADER */ +#define R600_VS_SHADER__SQ_VTX_SEMANTIC_0 0 +#define R600_VS_SHADER__SQ_VTX_SEMANTIC_1 1 +#define R600_VS_SHADER__SQ_VTX_SEMANTIC_2 2 +#define R600_VS_SHADER__SQ_VTX_SEMANTIC_3 3 +#define R600_VS_SHADER__SQ_VTX_SEMANTIC_4 4 +#define R600_VS_SHADER__SQ_VTX_SEMANTIC_5 5 +#define R600_VS_SHADER__SQ_VTX_SEMANTIC_6 6 +#define R600_VS_SHADER__SQ_VTX_SEMANTIC_7 7 +#define R600_VS_SHADER__SQ_VTX_SEMANTIC_8 8 +#define R600_VS_SHADER__SQ_VTX_SEMANTIC_9 9 +#define R600_VS_SHADER__SQ_VTX_SEMANTIC_10 10 +#define R600_VS_SHADER__SQ_VTX_SEMANTIC_11 11 +#define R600_VS_SHADER__SQ_VTX_SEMANTIC_12 12 +#define R600_VS_SHADER__SQ_VTX_SEMANTIC_13 13 +#define R600_VS_SHADER__SQ_VTX_SEMANTIC_14 14 +#define R600_VS_SHADER__SQ_VTX_SEMANTIC_15 15 +#define R600_VS_SHADER__SQ_VTX_SEMANTIC_16 16 +#define R600_VS_SHADER__SQ_VTX_SEMANTIC_17 17 +#define R600_VS_SHADER__SQ_VTX_SEMANTIC_18 18 +#define R600_VS_SHADER__SQ_VTX_SEMANTIC_19 19 +#define R600_VS_SHADER__SQ_VTX_SEMANTIC_20 20 +#define R600_VS_SHADER__SQ_VTX_SEMANTIC_21 21 +#define R600_VS_SHADER__SQ_VTX_SEMANTIC_22 22 +#define R600_VS_SHADER__SQ_VTX_SEMANTIC_23 23 +#define R600_VS_SHADER__SQ_VTX_SEMANTIC_24 24 +#define R600_VS_SHADER__SQ_VTX_SEMANTIC_25 25 +#define R600_VS_SHADER__SQ_VTX_SEMANTIC_26 26 +#define R600_VS_SHADER__SQ_VTX_SEMANTIC_27 27 +#define R600_VS_SHADER__SQ_VTX_SEMANTIC_28 28 +#define R600_VS_SHADER__SQ_VTX_SEMANTIC_29 29 +#define R600_VS_SHADER__SQ_VTX_SEMANTIC_30 30 +#define R600_VS_SHADER__SQ_VTX_SEMANTIC_31 31 +#define R600_VS_SHADER__SPI_VS_OUT_ID_0 32 +#define R600_VS_SHADER__SPI_VS_OUT_ID_1 33 +#define R600_VS_SHADER__SPI_VS_OUT_ID_2 34 +#define R600_VS_SHADER__SPI_VS_OUT_ID_3 35 +#define R600_VS_SHADER__SPI_VS_OUT_ID_4 36 +#define R600_VS_SHADER__SPI_VS_OUT_ID_5 37 +#define R600_VS_SHADER__SPI_VS_OUT_ID_6 38 +#define R600_VS_SHADER__SPI_VS_OUT_ID_7 39 +#define R600_VS_SHADER__SPI_VS_OUT_ID_8 40 +#define R600_VS_SHADER__SPI_VS_OUT_ID_9 41 +#define R600_VS_SHADER__SPI_VS_OUT_CONFIG 42 +#define R600_VS_SHADER__SQ_PGM_START_VS 43 +#define R600_VS_SHADER__SQ_PGM_RESOURCES_VS 44 +#define R600_VS_SHADER__SQ_PGM_START_FS 45 +#define R600_VS_SHADER__SQ_PGM_RESOURCES_FS 46 +#define R600_VS_SHADER__SQ_PGM_CF_OFFSET_VS 47 +#define R600_VS_SHADER__SQ_PGM_CF_OFFSET_FS 48 +#define R600_VS_SHADER_SIZE 49 +#define R600_VS_SHADER_PM4 128 +/* R600_PS_SHADER */ +#define R600_PS_SHADER__SPI_PS_INPUT_CNTL_0 0 +#define R600_PS_SHADER__SPI_PS_INPUT_CNTL_1 1 +#define R600_PS_SHADER__SPI_PS_INPUT_CNTL_2 2 +#define R600_PS_SHADER__SPI_PS_INPUT_CNTL_3 3 +#define R600_PS_SHADER__SPI_PS_INPUT_CNTL_4 4 +#define R600_PS_SHADER__SPI_PS_INPUT_CNTL_5 5 +#define R600_PS_SHADER__SPI_PS_INPUT_CNTL_6 6 +#define R600_PS_SHADER__SPI_PS_INPUT_CNTL_7 7 +#define R600_PS_SHADER__SPI_PS_INPUT_CNTL_8 8 +#define R600_PS_SHADER__SPI_PS_INPUT_CNTL_9 9 +#define R600_PS_SHADER__SPI_PS_INPUT_CNTL_10 10 +#define R600_PS_SHADER__SPI_PS_INPUT_CNTL_11 11 +#define R600_PS_SHADER__SPI_PS_INPUT_CNTL_12 12 +#define R600_PS_SHADER__SPI_PS_INPUT_CNTL_13 13 +#define R600_PS_SHADER__SPI_PS_INPUT_CNTL_14 14 +#define R600_PS_SHADER__SPI_PS_INPUT_CNTL_15 15 +#define R600_PS_SHADER__SPI_PS_INPUT_CNTL_16 16 +#define R600_PS_SHADER__SPI_PS_INPUT_CNTL_17 17 +#define R600_PS_SHADER__SPI_PS_INPUT_CNTL_18 18 +#define R600_PS_SHADER__SPI_PS_INPUT_CNTL_19 19 +#define R600_PS_SHADER__SPI_PS_INPUT_CNTL_20 20 +#define R600_PS_SHADER__SPI_PS_INPUT_CNTL_21 21 +#define R600_PS_SHADER__SPI_PS_INPUT_CNTL_22 22 +#define R600_PS_SHADER__SPI_PS_INPUT_CNTL_23 23 +#define R600_PS_SHADER__SPI_PS_INPUT_CNTL_24 24 +#define R600_PS_SHADER__SPI_PS_INPUT_CNTL_25 25 +#define R600_PS_SHADER__SPI_PS_INPUT_CNTL_26 26 +#define R600_PS_SHADER__SPI_PS_INPUT_CNTL_27 27 +#define R600_PS_SHADER__SPI_PS_INPUT_CNTL_28 28 +#define R600_PS_SHADER__SPI_PS_INPUT_CNTL_29 29 +#define R600_PS_SHADER__SPI_PS_INPUT_CNTL_30 30 +#define R600_PS_SHADER__SPI_PS_INPUT_CNTL_31 31 +#define R600_PS_SHADER__SPI_PS_IN_CONTROL_0 32 +#define R600_PS_SHADER__SPI_PS_IN_CONTROL_1 33 +#define R600_PS_SHADER__SPI_INPUT_Z 34 +#define R600_PS_SHADER__SQ_PGM_START_PS 35 +#define R600_PS_SHADER__SQ_PGM_RESOURCES_PS 36 +#define R600_PS_SHADER__SQ_PGM_EXPORTS_PS 37 +#define R600_PS_SHADER__SQ_PGM_CF_OFFSET_PS 38 +#define R600_PS_SHADER_SIZE 39 +#define R600_PS_SHADER_PM4 128 +/* R600_PS_CONSTANT */ +#define R600_PS_CONSTANT__SQ_ALU_CONSTANT0_0 0 +#define R600_PS_CONSTANT__SQ_ALU_CONSTANT1_0 1 +#define R600_PS_CONSTANT__SQ_ALU_CONSTANT2_0 2 +#define R600_PS_CONSTANT__SQ_ALU_CONSTANT3_0 3 +#define R600_PS_CONSTANT_SIZE 4 +#define R600_PS_CONSTANT_PM4 128 +/* R600_VS_CONSTANT */ +#define R600_VS_CONSTANT__SQ_ALU_CONSTANT0_256 0 +#define R600_VS_CONSTANT__SQ_ALU_CONSTANT1_256 1 +#define R600_VS_CONSTANT__SQ_ALU_CONSTANT2_256 2 +#define R600_VS_CONSTANT__SQ_ALU_CONSTANT3_256 3 +#define R600_VS_CONSTANT_SIZE 4 +#define R600_VS_CONSTANT_PM4 128 +/* R600_PS_RESOURCE */ +#define R600_PS_RESOURCE__RESOURCE0_WORD0 0 +#define R600_PS_RESOURCE__RESOURCE0_WORD1 1 +#define R600_PS_RESOURCE__RESOURCE0_WORD2 2 +#define R600_PS_RESOURCE__RESOURCE0_WORD3 3 +#define R600_PS_RESOURCE__RESOURCE0_WORD4 4 +#define R600_PS_RESOURCE__RESOURCE0_WORD5 5 +#define R600_PS_RESOURCE__RESOURCE0_WORD6 6 +#define R600_PS_RESOURCE_SIZE 7 +#define R600_PS_RESOURCE_PM4 128 +/* R600_VS_RESOURCE */ +#define R600_VS_RESOURCE__RESOURCE160_WORD0 0 +#define R600_VS_RESOURCE__RESOURCE160_WORD1 1 +#define R600_VS_RESOURCE__RESOURCE160_WORD2 2 +#define R600_VS_RESOURCE__RESOURCE160_WORD3 3 +#define R600_VS_RESOURCE__RESOURCE160_WORD4 4 +#define R600_VS_RESOURCE__RESOURCE160_WORD5 5 +#define R600_VS_RESOURCE__RESOURCE160_WORD6 6 +#define R600_VS_RESOURCE_SIZE 7 +#define R600_VS_RESOURCE_PM4 128 +/* R600_FS_RESOURCE */ +#define R600_FS_RESOURCE__RESOURCE320_WORD0 0 +#define R600_FS_RESOURCE__RESOURCE320_WORD1 1 +#define R600_FS_RESOURCE__RESOURCE320_WORD2 2 +#define R600_FS_RESOURCE__RESOURCE320_WORD3 3 +#define R600_FS_RESOURCE__RESOURCE320_WORD4 4 +#define R600_FS_RESOURCE__RESOURCE320_WORD5 5 +#define R600_FS_RESOURCE__RESOURCE320_WORD6 6 +#define R600_FS_RESOURCE_SIZE 7 +#define R600_FS_RESOURCE_PM4 128 +/* R600_GS_RESOURCE */ +#define R600_GS_RESOURCE__RESOURCE336_WORD0 0 +#define R600_GS_RESOURCE__RESOURCE336_WORD1 1 +#define R600_GS_RESOURCE__RESOURCE336_WORD2 2 +#define R600_GS_RESOURCE__RESOURCE336_WORD3 3 +#define R600_GS_RESOURCE__RESOURCE336_WORD4 4 +#define R600_GS_RESOURCE__RESOURCE336_WORD5 5 +#define R600_GS_RESOURCE__RESOURCE336_WORD6 6 +#define R600_GS_RESOURCE_SIZE 7 +#define R600_GS_RESOURCE_PM4 128 +/* R600_PS_SAMPLER */ +#define R600_PS_SAMPLER__SQ_TEX_SAMPLER_WORD0_0 0 +#define R600_PS_SAMPLER__SQ_TEX_SAMPLER_WORD1_0 1 +#define R600_PS_SAMPLER__SQ_TEX_SAMPLER_WORD2_0 2 +#define R600_PS_SAMPLER_SIZE 3 +#define R600_PS_SAMPLER_PM4 128 +/* R600_VS_SAMPLER */ +#define R600_VS_SAMPLER__SQ_TEX_SAMPLER_WORD0_18 0 +#define R600_VS_SAMPLER__SQ_TEX_SAMPLER_WORD1_18 1 +#define R600_VS_SAMPLER__SQ_TEX_SAMPLER_WORD2_18 2 +#define R600_VS_SAMPLER_SIZE 3 +#define R600_VS_SAMPLER_PM4 128 +/* R600_GS_SAMPLER */ +#define R600_GS_SAMPLER__SQ_TEX_SAMPLER_WORD0_36 0 +#define R600_GS_SAMPLER__SQ_TEX_SAMPLER_WORD1_36 1 +#define R600_GS_SAMPLER__SQ_TEX_SAMPLER_WORD2_36 2 +#define R600_GS_SAMPLER_SIZE 3 +#define R600_GS_SAMPLER_PM4 128 +/* R600_PS_SAMPLER_BORDER */ +#define R600_PS_SAMPLER_BORDER__TD_PS_SAMPLER0_BORDER_RED 0 +#define R600_PS_SAMPLER_BORDER__TD_PS_SAMPLER0_BORDER_GREEN 1 +#define R600_PS_SAMPLER_BORDER__TD_PS_SAMPLER0_BORDER_BLUE 2 +#define R600_PS_SAMPLER_BORDER__TD_PS_SAMPLER0_BORDER_ALPHA 3 +#define R600_PS_SAMPLER_BORDER_SIZE 4 +#define R600_PS_SAMPLER_BORDER_PM4 128 +/* R600_VS_SAMPLER_BORDER */ +#define R600_VS_SAMPLER_BORDER__TD_VS_SAMPLER0_BORDER_RED 0 +#define R600_VS_SAMPLER_BORDER__TD_VS_SAMPLER0_BORDER_GREEN 1 +#define R600_VS_SAMPLER_BORDER__TD_VS_SAMPLER0_BORDER_BLUE 2 +#define R600_VS_SAMPLER_BORDER__TD_VS_SAMPLER0_BORDER_ALPHA 3 +#define R600_VS_SAMPLER_BORDER_SIZE 4 +#define R600_VS_SAMPLER_BORDER_PM4 128 +/* R600_GS_SAMPLER_BORDER */ +#define R600_GS_SAMPLER_BORDER__TD_GS_SAMPLER0_BORDER_RED 0 +#define R600_GS_SAMPLER_BORDER__TD_GS_SAMPLER0_BORDER_GREEN 1 +#define R600_GS_SAMPLER_BORDER__TD_GS_SAMPLER0_BORDER_BLUE 2 +#define R600_GS_SAMPLER_BORDER__TD_GS_SAMPLER0_BORDER_ALPHA 3 +#define R600_GS_SAMPLER_BORDER_SIZE 4 +#define R600_GS_SAMPLER_BORDER_PM4 128 +/* R600_CB0 */ +#define R600_CB0__CB_COLOR0_BASE 0 +#define R600_CB0__CB_COLOR0_INFO 1 +#define R600_CB0__CB_COLOR0_SIZE 2 +#define R600_CB0__CB_COLOR0_VIEW 3 +#define R600_CB0__CB_COLOR0_FRAG 4 +#define R600_CB0__CB_COLOR0_TILE 5 +#define R600_CB0__CB_COLOR0_MASK 6 +#define R600_CB0_SIZE 7 +#define R600_CB0_PM4 128 +/* R600_DB */ +#define R600_DB__DB_DEPTH_BASE 0 +#define R600_DB__DB_DEPTH_SIZE 1 +#define R600_DB__DB_DEPTH_VIEW 2 +#define R600_DB__DB_DEPTH_INFO 3 +#define R600_DB__DB_HTILE_SURFACE 4 +#define R600_DB__DB_PREFETCH_LIMIT 5 +#define R600_DB_SIZE 6 +#define R600_DB_PM4 128 +/* R600_VGT */ +#define R600_VGT__VGT_PRIMITIVE_TYPE 0 +#define R600_VGT__VGT_MAX_VTX_INDX 1 +#define R600_VGT__VGT_MIN_VTX_INDX 2 +#define R600_VGT__VGT_INDX_OFFSET 3 +#define R600_VGT__VGT_MULTI_PRIM_IB_RESET_INDX 4 +#define R600_VGT__VGT_DMA_INDEX_TYPE 5 +#define R600_VGT__VGT_PRIMITIVEID_EN 6 +#define R600_VGT__VGT_DMA_NUM_INSTANCES 7 +#define R600_VGT__VGT_MULTI_PRIM_IB_RESET_EN 8 +#define R600_VGT__VGT_INSTANCE_STEP_RATE_0 9 +#define R600_VGT__VGT_INSTANCE_STEP_RATE_1 10 +#define R600_VGT_SIZE 11 +#define R600_VGT_PM4 128 +/* R600_DRAW */ +#define R600_DRAW__VGT_NUM_INDICES 0 +#define R600_DRAW__VGT_DMA_BASE_HI 1 +#define R600_DRAW__VGT_DMA_BASE 2 +#define R600_DRAW__VGT_DRAW_INITIATOR 3 +#define R600_DRAW_SIZE 4 +#define R600_DRAW_PM4 128 + +#endif diff --git a/src/gallium/drivers/softpipe/sp_screen.c b/src/gallium/drivers/softpipe/sp_screen.c index 4e664683017..cb59040395b 100644 --- a/src/gallium/drivers/softpipe/sp_screen.c +++ b/src/gallium/drivers/softpipe/sp_screen.c @@ -189,7 +189,8 @@ softpipe_is_format_supported( struct pipe_screen *screen, struct sw_winsys *winsys = softpipe_screen(screen)->winsys; const struct util_format_description *format_desc; - assert(target == PIPE_TEXTURE_1D || + assert(target == PIPE_BUFFER || + target == PIPE_TEXTURE_1D || target == PIPE_TEXTURE_2D || target == PIPE_TEXTURE_3D || target == PIPE_TEXTURE_CUBE); diff --git a/src/gallium/drivers/svga/svga_context.c b/src/gallium/drivers/svga/svga_context.c index 75d8afb2ea8..3b30b9e341e 100644 --- a/src/gallium/drivers/svga/svga_context.c +++ b/src/gallium/drivers/svga/svga_context.c @@ -239,3 +239,8 @@ void svga_hwtnl_flush_retry( struct svga_context *svga ) assert(ret == 0); } +struct svga_winsys_context * +svga_winsys_context( struct pipe_context *pipe ) +{ + return svga_context( pipe )->swc; +} diff --git a/src/gallium/drivers/svga/svga_screen.c b/src/gallium/drivers/svga/svga_screen.c index 099d8f91df3..54d9faeb72a 100644 --- a/src/gallium/drivers/svga/svga_screen.c +++ b/src/gallium/drivers/svga/svga_screen.c @@ -43,21 +43,21 @@ int SVGA_DEBUG = 0; static const struct debug_named_value svga_debug_flags[] = { - { "dma", DEBUG_DMA }, - { "tgsi", DEBUG_TGSI }, - { "pipe", DEBUG_PIPE }, - { "state", DEBUG_STATE }, - { "screen", DEBUG_SCREEN }, - { "tex", DEBUG_TEX }, - { "swtnl", DEBUG_SWTNL }, - { "const", DEBUG_CONSTS }, - { "viewport", DEBUG_VIEWPORT }, - { "views", DEBUG_VIEWS }, - { "perf", DEBUG_PERF }, - { "flush", DEBUG_FLUSH }, - { "sync", DEBUG_SYNC }, - { "cache", DEBUG_CACHE }, - {NULL, 0} + { "dma", DEBUG_DMA, NULL }, + { "tgsi", DEBUG_TGSI, NULL }, + { "pipe", DEBUG_PIPE, NULL }, + { "state", DEBUG_STATE, NULL }, + { "screen", DEBUG_SCREEN, NULL }, + { "tex", DEBUG_TEX, NULL }, + { "swtnl", DEBUG_SWTNL, NULL }, + { "const", DEBUG_CONSTS, NULL }, + { "viewport", DEBUG_VIEWPORT, NULL }, + { "views", DEBUG_VIEWS, NULL }, + { "perf", DEBUG_PERF, NULL }, + { "flush", DEBUG_FLUSH, NULL }, + { "sync", DEBUG_SYNC, NULL }, + { "cache", DEBUG_CACHE, NULL }, + DEBUG_NAMED_VALUE_END }; #endif diff --git a/src/gallium/drivers/svga/svga_winsys.h b/src/gallium/drivers/svga/svga_winsys.h index c155f5dae2f..a2dcc84f7da 100644 --- a/src/gallium/drivers/svga/svga_winsys.h +++ b/src/gallium/drivers/svga/svga_winsys.h @@ -294,6 +294,9 @@ svga_screen_create(struct svga_winsys_screen *sws); struct svga_winsys_screen * svga_winsys_screen(struct pipe_screen *screen); +struct svga_winsys_context * +svga_winsys_context(struct pipe_context *context); + struct pipe_resource * svga_screen_buffer_wrap_surface(struct pipe_screen *screen, enum SVGA3dSurfaceFormat format, diff --git a/src/gallium/drivers/trace/tr_context.c b/src/gallium/drivers/trace/tr_context.c index 7584b8d437b..9ca916fe7b4 100644 --- a/src/gallium/drivers/trace/tr_context.c +++ b/src/gallium/drivers/trace/tr_context.c @@ -1407,9 +1407,9 @@ trace_context_transfer_inline_write(struct pipe_context *_context, static const struct debug_named_value rbug_blocker_flags[] = { - {"before", 1}, - {"after", 2}, - {NULL, 0}, + {"before", 1, NULL}, + {"after", 2, NULL}, + DEBUG_NAMED_VALUE_END }; struct pipe_context * |