diff options
Diffstat (limited to 'src/gallium/drivers')
212 files changed, 23125 insertions, 3505 deletions
diff --git a/src/gallium/drivers/i915/Makefile b/src/gallium/drivers/i915/Makefile index b3f387f9335..36197fbc93b 100644 --- a/src/gallium/drivers/i915/Makefile +++ b/src/gallium/drivers/i915/Makefile @@ -21,11 +21,13 @@ C_SOURCES = \ i915_screen.c \ i915_prim_emit.c \ i915_prim_vbuf.c \ + i915_query.c \ i915_resource.c \ i915_resource_texture.c \ i915_resource_buffer.c \ i915_fpc_emit.c \ i915_fpc_translate.c \ + i915_fpc_optimize.c \ i915_surface.c include ../../Makefile.template diff --git a/src/gallium/drivers/i915/SConscript b/src/gallium/drivers/i915/SConscript index 8f5deed64a9..76f597001fe 100644 --- a/src/gallium/drivers/i915/SConscript +++ b/src/gallium/drivers/i915/SConscript @@ -14,8 +14,10 @@ i915 = env.ConvenienceLibrary( 'i915_flush.c', 'i915_fpc_emit.c', 'i915_fpc_translate.c', + 'i915_fpc_optimize.c', 'i915_prim_emit.c', 'i915_prim_vbuf.c', + 'i915_query.c', 'i915_screen.c', 'i915_state.c', 'i915_state_derived.c', diff --git a/src/gallium/drivers/i915/TODO b/src/gallium/drivers/i915/TODO index fba180064c3..c26db198d20 100644 --- a/src/gallium/drivers/i915/TODO +++ b/src/gallium/drivers/i915/TODO @@ -26,5 +26,20 @@ Random list of problems with i915g: - src/xvmc/i915_structs.h in xf86-video-intel has a few more bits of various commands defined. Scavenge them and see what's useful. +- Do smarter remapping. Right now we send everything onto tex coords 0-7. + We could also use diffuse/specular and pack two sets of 2D coords in a single + 4D. Is it a big problem though? We're more limited by the # of texture + indirections and the # of instructions. + +- Leverage draw to enable more caps: + * PIPE_CAP_TGSI_INSTANCEID + * PIPE_CAP_MAX_VERTEX_TEXTURE_UNITS + +- Finish front/back face. We need to add face support to lp_build_system_values_array and use it in draw_llvm.c. + +- Replace constants and immediates which are 0,1,-1 or a combination of those with a swizzle. + +- i915_delete_fs_state doesn't call draw_delete_fragment_shader. Why? + Other bugs can be found here: https://bugs.freedesktop.org/buglist.cgi?bug_status=NEW&bug_status=ASSIGNED&bug_status=REOPENED&component=Drivers/Gallium/i915g diff --git a/src/gallium/drivers/i915/i915_batch.h b/src/gallium/drivers/i915/i915_batch.h index ce2691b2fd7..a1f8bcae802 100644 --- a/src/gallium/drivers/i915/i915_batch.h +++ b/src/gallium/drivers/i915/i915_batch.h @@ -29,6 +29,7 @@ #define I915_BATCH_H #include "i915_batchbuffer.h" +#include "i915_context.h" #define BEGIN_BATCH(dwords) \ @@ -49,11 +50,26 @@ #define FLUSH_BATCH(fence) \ i915_flush(i915, fence) - /************************************************************************ * i915_flush.c */ void i915_flush(struct i915_context *i915, struct pipe_fence_handle **fence); +/* + * Flush if the current color buf is idle and we have more than 256 vertices + * queued, or if the current color buf is busy and we have more than 4096 + * vertices queued. + */ +static INLINE void i915_flush_heuristically(struct i915_context* i915, + int num_vertex) +{ + struct i915_winsys *iws = i915->iws; + i915->vertices_since_last_flush += num_vertex; + if ( i915->vertices_since_last_flush > 4096 + || ( i915->vertices_since_last_flush > 256 && + !iws->buffer_is_busy(iws, i915->current.cbuf_bo)) ) + FLUSH_BATCH(NULL); +} + #endif diff --git a/src/gallium/drivers/i915/i915_clear.c b/src/gallium/drivers/i915/i915_clear.c index 4a97746e981..e1d6a749cdc 100644 --- a/src/gallium/drivers/i915/i915_clear.c +++ b/src/gallium/drivers/i915/i915_clear.c @@ -66,7 +66,7 @@ i915_clear_emit(struct pipe_context *pipe, unsigned buffers, const float *rgba, else clear_color = (u_color.ui & 0xffff) | (u_color.ui << 16); - util_pack_color(rgba, PIPE_FORMAT_B8G8R8A8_UNORM, &u_color); + util_pack_color(rgba, cbuf->format, &u_color); clear_color8888 = u_color.ui; } else clear_color = clear_color8888 = 0; @@ -120,6 +120,11 @@ i915_clear_emit(struct pipe_context *pipe, unsigned buffers, const float *rgba, OUT_BATCH_F(desty + height); OUT_BATCH_F(destx); OUT_BATCH_F(desty); + + /* Flush after clear, its expected to be a costly operation. + * This is not required, just a heuristic + */ + FLUSH_BATCH(NULL); } /** diff --git a/src/gallium/drivers/i915/i915_context.c b/src/gallium/drivers/i915/i915_context.c index 7a98ef73c1f..1b30309bb58 100644 --- a/src/gallium/drivers/i915/i915_context.c +++ b/src/gallium/drivers/i915/i915_context.c @@ -29,6 +29,7 @@ #include "i915_state.h" #include "i915_screen.h" #include "i915_surface.h" +#include "i915_query.h" #include "i915_batch.h" #include "i915_resource.h" @@ -53,13 +54,11 @@ i915_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info) struct i915_context *i915 = i915_context(pipe); struct draw_context *draw = i915->draw; void *mapped_indices = NULL; - unsigned cbuf_dirty; /* * Ack vs contants here, helps ipers a lot. */ - cbuf_dirty = i915->dirty & I915_NEW_VS_CONSTANTS; i915->dirty &= ~I915_NEW_VS_CONSTANTS; if (i915->dirty) @@ -72,15 +71,13 @@ i915_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info) mapped_indices = i915_buffer(i915->index_buffer.buffer)->data; draw_set_mapped_index_buffer(draw, mapped_indices); - if (cbuf_dirty) { - if (i915->constants[PIPE_SHADER_VERTEX]) - draw_set_mapped_constant_buffer(draw, PIPE_SHADER_VERTEX, 0, - i915_buffer(i915->constants[PIPE_SHADER_VERTEX])->data, - (i915->current.num_user_constants[PIPE_SHADER_VERTEX] * - 4 * sizeof(float))); - else - draw_set_mapped_constant_buffer(draw, PIPE_SHADER_VERTEX, 0, NULL, 0); - } + if (i915->constants[PIPE_SHADER_VERTEX]) + draw_set_mapped_constant_buffer(draw, PIPE_SHADER_VERTEX, 0, + i915_buffer(i915->constants[PIPE_SHADER_VERTEX])->data, + (i915->current.num_user_constants[PIPE_SHADER_VERTEX] * + 4 * sizeof(float))); + else + draw_set_mapped_constant_buffer(draw, PIPE_SHADER_VERTEX, 0, NULL, 0); /* * Do the drawing @@ -102,11 +99,11 @@ static void i915_destroy(struct pipe_context *pipe) struct i915_context *i915 = i915_context(pipe); int i; - draw_destroy(i915->draw); - if (i915->blitter) util_blitter_destroy(i915->blitter); - + + draw_destroy(i915->draw); + if(i915->batch) i915->iws->batchbuffer_destroy(i915->batch); @@ -150,6 +147,8 @@ i915_create_context(struct pipe_screen *screen, void *priv) /* init this before draw */ util_slab_create(&i915->transfer_pool, sizeof(struct pipe_transfer), 16, UTIL_SLAB_SINGLETHREADED); + util_slab_create(&i915->texture_transfer_pool, sizeof(struct i915_transfer), + 16, UTIL_SLAB_SINGLETHREADED); /* Batch stream debugging is a bit hacked up at the moment: */ @@ -170,9 +169,11 @@ i915_create_context(struct pipe_screen *screen, void *priv) i915_init_state_functions(i915); i915_init_flush_functions(i915); i915_init_resource_functions(i915); + i915_init_query_functions(i915); draw_install_aaline_stage(i915->draw, &i915->base); draw_install_aapoint_stage(i915->draw, &i915->base); + draw_enable_point_sprites(i915->draw, TRUE); /* augmented draw pipeline clobbers state functions */ i915_init_fixup_state_functions(i915); diff --git a/src/gallium/drivers/i915/i915_context.h b/src/gallium/drivers/i915/i915_context.h index 964948edc0e..84862351ffe 100644 --- a/src/gallium/drivers/i915/i915_context.h +++ b/src/gallium/drivers/i915/i915_context.h @@ -102,6 +102,8 @@ struct i915_fragment_shader struct tgsi_shader_info info; + struct draw_fragment_shader *draw_data; + uint *program; uint program_len; @@ -260,6 +262,9 @@ struct i915_context { int num_validation_buffers; struct util_slab_mempool transfer_pool; + struct util_slab_mempool texture_transfer_pool; + + int vertices_since_last_flush; /** blitter/hw-clear */ struct blitter_context* blitter; diff --git a/src/gallium/drivers/i915/i915_flush.c b/src/gallium/drivers/i915/i915_flush.c index b4e81147c4f..6d76afa9dbc 100644 --- a/src/gallium/drivers/i915/i915_flush.c +++ b/src/gallium/drivers/i915/i915_flush.c @@ -77,4 +77,5 @@ void i915_flush(struct i915_context *i915, struct pipe_fence_handle **fence) i915->static_dirty = ~0; /* kernel emits flushes in between batchbuffers */ i915->flush_dirty = 0; + i915->vertices_since_last_flush = 0; } diff --git a/src/gallium/drivers/i915/i915_fpc.h b/src/gallium/drivers/i915/i915_fpc.h index 2f0f99d0468..b760bc461a1 100644 --- a/src/gallium/drivers/i915/i915_fpc.h +++ b/src/gallium/drivers/i915/i915_fpc.h @@ -33,10 +33,15 @@ #include "i915_context.h" #include "i915_reg.h" +#include "pipe/p_shader_tokens.h" +#include "tgsi/tgsi_parse.h" #define I915_PROGRAM_SIZE 192 +/* Use those indices for pos/face routing, must be >= I915_TEX_UNITS */ +#define I915_SEMANTIC_POS 10 +#define I915_SEMANTIC_FACE 11 /** @@ -67,13 +72,13 @@ struct i915_fp_compile { uint temp_flag; /**< Tracks temporary regs which are in use */ uint utemp_flag; /**< Tracks TYPE_U temporary regs which are in use */ + uint register_phases[16]; uint nr_tex_indirect; uint nr_tex_insn; uint nr_alu_insn; uint nr_decl_insn; boolean error; /**< Set if i915_program_error() is called */ - uint wpos_tex; uint NumNativeInstructions; uint NumNativeAluInstructions; uint NumNativeTexInstructions; @@ -204,4 +209,90 @@ extern void i915_program_error(struct i915_fp_compile *p, const char *msg, ...); +/*====================================================================== + * i915_fpc_optimize.c + */ + + +struct i915_src_register +{ + unsigned File : 4; /* TGSI_FILE_ */ + unsigned Indirect : 1; /* BOOL */ + unsigned Dimension : 1; /* BOOL */ + int Index : 16; /* SINT */ + unsigned SwizzleX : 3; /* TGSI_SWIZZLE_ */ + unsigned SwizzleY : 3; /* TGSI_SWIZZLE_ */ + unsigned SwizzleZ : 3; /* TGSI_SWIZZLE_ */ + unsigned SwizzleW : 3; /* TGSI_SWIZZLE_ */ + unsigned Absolute : 1; /* BOOL */ + unsigned Negate : 1; /* BOOL */ +}; + +/* Additional swizzle supported in i915 */ +#define TGSI_SWIZZLE_ZERO 4 +#define TGSI_SWIZZLE_ONE 5 + +struct i915_dst_register +{ + unsigned File : 4; /* TGSI_FILE_ */ + unsigned WriteMask : 4; /* TGSI_WRITEMASK_ */ + unsigned Indirect : 1; /* BOOL */ + unsigned Dimension : 1; /* BOOL */ + int Index : 16; /* SINT */ + unsigned Padding : 6; +}; + + +struct i915_full_dst_register +{ + struct i915_dst_register Register; +/* + struct tgsi_src_register Indirect; + struct tgsi_dimension Dimension; + struct tgsi_src_register DimIndirect; +*/ +}; + +struct i915_full_src_register +{ + struct i915_src_register Register; +/* + struct tgsi_src_register Indirect; + struct tgsi_dimension Dimension; + struct tgsi_src_register DimIndirect; +*/ +}; + +struct i915_full_instruction +{ + struct tgsi_instruction Instruction; +/* + struct tgsi_instruction_predicate Predicate; + struct tgsi_instruction_label Label; +*/ + struct tgsi_instruction_texture Texture; + struct i915_full_dst_register Dst[1]; + struct i915_full_src_register Src[3]; +}; + + +union i915_full_token +{ + struct tgsi_token Token; + struct tgsi_full_declaration FullDeclaration; + struct tgsi_full_immediate FullImmediate; + struct i915_full_instruction FullInstruction; + struct tgsi_full_property FullProperty; +}; + +struct i915_token_list +{ + union i915_full_token* Tokens; + unsigned NumTokens; +}; + +extern struct i915_token_list* i915_optimize(const struct tgsi_token *tokens); + +extern void i915_optimize_free(struct i915_token_list* tokens); + #endif diff --git a/src/gallium/drivers/i915/i915_fpc_emit.c b/src/gallium/drivers/i915/i915_fpc_emit.c index 76c24d2b2fd..c4a42df7882 100644 --- a/src/gallium/drivers/i915/i915_fpc_emit.c +++ b/src/gallium/drivers/i915/i915_fpc_emit.c @@ -67,7 +67,7 @@ i915_get_temp(struct i915_fp_compile *p) { int bit = ffs(~p->temp_flag); if (!bit) { - i915_program_error(p, "i915_get_temp: out of temporaries\n"); + i915_program_error(p, "i915_get_temp: out of temporaries"); return 0; } @@ -92,7 +92,7 @@ i915_get_utemp(struct i915_fp_compile * p) { int bit = ffs(~p->utemp_flag); if (!bit) { - i915_program_error(p, "i915_get_utemp: out of temporaries\n"); + i915_program_error(p, "i915_get_utemp: out of temporaries"); return 0; } @@ -128,9 +128,13 @@ i915_emit_decl(struct i915_fp_compile *p, else return reg; - *(p->decl++) = (D0_DCL | D0_DEST(reg) | d0_flags); - *(p->decl++) = D1_MBZ; - *(p->decl++) = D2_MBZ; + if (p->decl< p->declarations + I915_PROGRAM_SIZE) { + *(p->decl++) = (D0_DCL | D0_DEST(reg) | d0_flags); + *(p->decl++) = D1_MBZ; + *(p->decl++) = D2_MBZ; + } + else + i915_program_error(p, "Out of declarations"); p->nr_decl_insn++; return reg; @@ -187,9 +191,16 @@ i915_emit_arith(struct i915_fp_compile * p, p->utemp_flag = old_utemp_flag; /* restore */ } - *(p->csr++) = (op | A0_DEST(dest) | mask | saturate | A0_SRC0(src0)); - *(p->csr++) = (A1_SRC0(src0) | A1_SRC1(src1)); - *(p->csr++) = (A2_SRC1(src1) | A2_SRC2(src2)); + if (p->csr< p->program + I915_PROGRAM_SIZE) { + *(p->csr++) = (op | A0_DEST(dest) | mask | saturate | A0_SRC0(src0)); + *(p->csr++) = (A1_SRC0(src0) | A1_SRC1(src1)); + *(p->csr++) = (A2_SRC1(src1) | A2_SRC2(src2)); + } + else + i915_program_error(p, "Out of instructions"); + + if (GET_UREG_TYPE(dest) == REG_TYPE_R) + p->register_phases[GET_UREG_NR(dest)] = p->nr_tex_indirect; p->nr_alu_insn++; return dest; @@ -245,17 +256,31 @@ uint i915_emit_texld( struct i915_fp_compile *p, assert(GET_UREG_TYPE(dest) != REG_TYPE_CONST); assert(dest == UREG(GET_UREG_TYPE(dest), GET_UREG_NR(dest))); - /* is the sampler coord a texcoord input reg? */ - if (GET_UREG_TYPE(coord) != REG_TYPE_T) { - p->nr_tex_indirect++; - } + /* Output register being oC or oD defines a phase boundary */ + if (GET_UREG_TYPE(dest) == REG_TYPE_OC || + GET_UREG_TYPE(dest) == REG_TYPE_OD) + p->nr_tex_indirect++; - *(p->csr++) = (opcode | - T0_DEST( dest ) | - T0_SAMPLER( sampler )); + /* Reading from an r# register whose contents depend on output of the + * current phase defines a phase boundary. + */ + if (GET_UREG_TYPE(coord) == REG_TYPE_R && + p->register_phases[GET_UREG_NR(coord)] == p->nr_tex_indirect) + p->nr_tex_indirect++; + + if (p->csr< p->program + I915_PROGRAM_SIZE) { + *(p->csr++) = (opcode | + T0_DEST( dest ) | + T0_SAMPLER( sampler )); + + *(p->csr++) = T1_ADDRESS_REG( coord ); + *(p->csr++) = T2_MBZ; + } + else + i915_program_error(p, "Out of instructions"); - *(p->csr++) = T1_ADDRESS_REG( coord ); - *(p->csr++) = T2_MBZ; + if (GET_UREG_TYPE(dest) == REG_TYPE_R) + p->register_phases[GET_UREG_NR(dest)] = p->nr_tex_indirect; p->nr_tex_insn++; } @@ -293,7 +318,7 @@ i915_emit_const1f(struct i915_fp_compile * p, float c0) } } - i915_program_error(p, "i915_emit_const1f: out of constants\n"); + i915_program_error(p, "i915_emit_const1f: out of constants"); return 0; } @@ -313,6 +338,8 @@ i915_emit_const2f(struct i915_fp_compile * p, float c0, float c1) if (c1 == 1.0) return swizzle(i915_emit_const1f(p, c0), X, ONE, Z, W); + // XXX emit swizzle here for 0, 1, -1 and any combination thereof + // we can use swizzle + neg for that for (reg = 0; reg < I915_MAX_CONSTANT; reg++) { if (ifs->constant_flags[reg] == 0xf || ifs->constant_flags[reg] == I915_CONSTFLAG_USER) @@ -329,12 +356,10 @@ i915_emit_const2f(struct i915_fp_compile * p, float c0, float c1) } } - i915_program_error(p, "i915_emit_const2f: out of constants\n"); + i915_program_error(p, "i915_emit_const2f: out of constants"); return 0; } - - uint i915_emit_const4f(struct i915_fp_compile * p, float c0, float c1, float c2, float c3) @@ -342,6 +367,8 @@ i915_emit_const4f(struct i915_fp_compile * p, struct i915_fragment_shader *ifs = p->shader; unsigned reg; + // XXX emit swizzle here for 0, 1, -1 and any combination thereof + // we can use swizzle + neg for that for (reg = 0; reg < I915_MAX_CONSTANT; reg++) { if (ifs->constant_flags[reg] == 0xf && ifs->constants[reg][0] == c0 && @@ -363,7 +390,7 @@ i915_emit_const4f(struct i915_fp_compile * p, } } - i915_program_error(p, "i915_emit_const4f: out of constants\n"); + i915_program_error(p, "i915_emit_const4f: out of constants"); return 0; } diff --git a/src/gallium/drivers/i915/i915_fpc_optimize.c b/src/gallium/drivers/i915/i915_fpc_optimize.c new file mode 100644 index 00000000000..2b739e9ccb8 --- /dev/null +++ b/src/gallium/drivers/i915/i915_fpc_optimize.c @@ -0,0 +1,259 @@ +/************************************************************************** + * + * Copyright 2011 The Chromium OS authors. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL GOOGLE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "i915_reg.h" +#include "i915_context.h" +#include "i915_fpc.h" + +#include "pipe/p_shader_tokens.h" +#include "util/u_math.h" +#include "util/u_memory.h" +#include "util/u_string.h" +#include "tgsi/tgsi_parse.h" +#include "tgsi/tgsi_dump.h" + +static boolean same_dst_reg(struct i915_full_dst_register* d1, struct i915_full_dst_register* d2) +{ + return (d1->Register.File == d2->Register.File && + d1->Register.Indirect == d2->Register.Indirect && + d1->Register.Dimension == d2->Register.Dimension && + d1->Register.Index == d2->Register.Index); +} + +static boolean same_src_reg(struct i915_full_src_register* d1, struct i915_full_src_register* d2) +{ + return (d1->Register.File == d2->Register.File && + d1->Register.Indirect == d2->Register.Indirect && + d1->Register.Dimension == d2->Register.Dimension && + d1->Register.Index == d2->Register.Index && + d1->Register.Absolute == d2->Register.Absolute && + d1->Register.Negate == d2->Register.Negate); +} + +static boolean is_unswizzled(struct i915_full_src_register* r, + unsigned write_mask) +{ + if ( write_mask & TGSI_WRITEMASK_X && r->Register.SwizzleX != TGSI_SWIZZLE_X) + return FALSE; + if ( write_mask & TGSI_WRITEMASK_Y && r->Register.SwizzleY != TGSI_SWIZZLE_Y) + return FALSE; + if ( write_mask & TGSI_WRITEMASK_Z && r->Register.SwizzleZ != TGSI_SWIZZLE_Z) + return FALSE; + if ( write_mask & TGSI_WRITEMASK_W && r->Register.SwizzleW != TGSI_SWIZZLE_W) + return FALSE; + return TRUE; +} + +static boolean op_commutes(unsigned opcode) +{ + if (opcode == TGSI_OPCODE_ADD) return TRUE; + if (opcode == TGSI_OPCODE_MUL) return TRUE; + return FALSE; +} + +static unsigned op_neutral_element(unsigned opcode) +{ + if (opcode == TGSI_OPCODE_ADD) + return TGSI_SWIZZLE_ZERO; + if (opcode == TGSI_OPCODE_MUL) + return TGSI_SWIZZLE_ONE; + + debug_printf("Unknown opcode %d\n",opcode); + return TGSI_SWIZZLE_ZERO; +} + +/* + * Sets the swizzle to the neutral element for the operation for the bits + * of writemask which are set, swizzle to identity otherwise. + */ +static void set_neutral_element_swizzle(struct i915_full_src_register* r, + unsigned write_mask, + unsigned neutral) +{ + if ( write_mask & TGSI_WRITEMASK_X ) + r->Register.SwizzleX = neutral; + else + r->Register.SwizzleX = TGSI_SWIZZLE_X; + + if ( write_mask & TGSI_WRITEMASK_Y ) + r->Register.SwizzleY = neutral; + else + r->Register.SwizzleY = TGSI_SWIZZLE_Y; + + if ( write_mask & TGSI_WRITEMASK_Z ) + r->Register.SwizzleZ = neutral; + else + r->Register.SwizzleZ = TGSI_SWIZZLE_Z; + + if ( write_mask & TGSI_WRITEMASK_W ) + r->Register.SwizzleW = neutral; + else + r->Register.SwizzleW = TGSI_SWIZZLE_W; +} + +/* + * Optimize away things like: + * MUL OUT[0].xyz, TEMP[1], TEMP[2] + * MOV OUT[0].w, TEMP[2] + * into: + * MUL OUT[0].xyzw, TEMP[1].xyz1, TEMP[2] + * This is useful for optimizing texenv. + */ +static void i915_fpc_optimize_mov_after_alu(union i915_full_token* current, union i915_full_token* next) +{ + if ( current->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION && + next->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION && + op_commutes(current->FullInstruction.Instruction.Opcode) && + current->FullInstruction.Instruction.Saturate == next->FullInstruction.Instruction.Saturate && + next->FullInstruction.Instruction.Opcode == TGSI_OPCODE_MOV && + same_dst_reg(&next->FullInstruction.Dst[0], &next->FullInstruction.Dst[0]) && + same_src_reg(&next->FullInstruction.Src[0], ¤t->FullInstruction.Src[1]) && + is_unswizzled(¤t->FullInstruction.Src[0], current->FullInstruction.Dst[0].Register.WriteMask) && + is_unswizzled(¤t->FullInstruction.Src[1], current->FullInstruction.Dst[0].Register.WriteMask) && + is_unswizzled(&next->FullInstruction.Src[0], next->FullInstruction.Dst[0].Register.WriteMask) ) + { + next->FullInstruction.Instruction.Opcode = TGSI_OPCODE_NOP; + + set_neutral_element_swizzle(¤t->FullInstruction.Src[1], 0, 0); + set_neutral_element_swizzle(¤t->FullInstruction.Src[0], + next->FullInstruction.Dst[0].Register.WriteMask, + op_neutral_element(current->FullInstruction.Instruction.Opcode)); + + current->FullInstruction.Dst[0].Register.WriteMask = current->FullInstruction.Dst[0].Register.WriteMask | + next->FullInstruction.Dst[0].Register.WriteMask; + return; + } + + if ( current->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION && + next->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION && + op_commutes(current->FullInstruction.Instruction.Opcode) && + current->FullInstruction.Instruction.Saturate == next->FullInstruction.Instruction.Saturate && + next->FullInstruction.Instruction.Opcode == TGSI_OPCODE_MOV && + same_dst_reg(&next->FullInstruction.Dst[0], &next->FullInstruction.Dst[0]) && + same_src_reg(&next->FullInstruction.Src[0], ¤t->FullInstruction.Src[0]) && + is_unswizzled(¤t->FullInstruction.Src[0], current->FullInstruction.Dst[0].Register.WriteMask) && + is_unswizzled(¤t->FullInstruction.Src[1], current->FullInstruction.Dst[0].Register.WriteMask) && + is_unswizzled(&next->FullInstruction.Src[0], next->FullInstruction.Dst[0].Register.WriteMask) ) + { + next->FullInstruction.Instruction.Opcode = TGSI_OPCODE_NOP; + + set_neutral_element_swizzle(¤t->FullInstruction.Src[0], 0, 0); + set_neutral_element_swizzle(¤t->FullInstruction.Src[1], + next->FullInstruction.Dst[0].Register.WriteMask, + op_neutral_element(current->FullInstruction.Instruction.Opcode)); + + current->FullInstruction.Dst[0].Register.WriteMask = current->FullInstruction.Dst[0].Register.WriteMask | + next->FullInstruction.Dst[0].Register.WriteMask; + return; + } +} + +static void copy_src_reg(struct i915_src_register* o, const struct tgsi_src_register* i) +{ + o->File = i->File; + o->Indirect = i->Indirect; + o->Dimension = i->Dimension; + o->Index = i->Index; + o->SwizzleX = i->SwizzleX; + o->SwizzleY = i->SwizzleY; + o->SwizzleZ = i->SwizzleZ; + o->SwizzleW = i->SwizzleW; + o->Absolute = i->Absolute; + o->Negate = i->Negate; +} + +static void copy_dst_reg(struct i915_dst_register* o, const struct tgsi_dst_register* i) +{ + o->File = i->File; + o->WriteMask = i->WriteMask; + o->Indirect = i->Indirect; + o->Dimension = i->Dimension; + o->Index = i->Index; +} + +static void copy_instruction(struct i915_full_instruction* o, const struct tgsi_full_instruction* i) +{ + memcpy(&o->Instruction, &i->Instruction, sizeof(o->Instruction)); + memcpy(&o->Texture, &i->Texture, sizeof(o->Texture)); + + copy_dst_reg(&o->Dst[0].Register, &i->Dst[0].Register); + + copy_src_reg(&o->Src[0].Register, &i->Src[0].Register); + copy_src_reg(&o->Src[1].Register, &i->Src[1].Register); + copy_src_reg(&o->Src[2].Register, &i->Src[2].Register); +} + +static void copy_token(union i915_full_token* o, union tgsi_full_token* i) +{ + if (i->Token.Type != TGSI_TOKEN_TYPE_INSTRUCTION) + memcpy(o, i, sizeof(*o)); + else + copy_instruction(&o->FullInstruction, &i->FullInstruction); + +} + +struct i915_token_list* i915_optimize(const struct tgsi_token *tokens) +{ + struct i915_token_list *out_tokens = MALLOC(sizeof(struct i915_token_list)); + struct tgsi_parse_context parse; + int i = 0; + + out_tokens->NumTokens = 0; + + /* Count the tokens */ + tgsi_parse_init( &parse, tokens ); + while( !tgsi_parse_end_of_tokens( &parse ) ) { + tgsi_parse_token( &parse ); + out_tokens->NumTokens++; + } + tgsi_parse_free (&parse); + + /* Allocate our tokens */ + out_tokens->Tokens = MALLOC(sizeof(union i915_full_token) * out_tokens->NumTokens); + + tgsi_parse_init( &parse, tokens ); + while( !tgsi_parse_end_of_tokens( &parse ) ) { + tgsi_parse_token( &parse ); + copy_token(&out_tokens->Tokens[i] , &parse.FullToken); + + if (i > 0) + i915_fpc_optimize_mov_after_alu(&out_tokens->Tokens[i-1], &out_tokens->Tokens[i]); + + i++; + } + tgsi_parse_free (&parse); + + return out_tokens; +} + +void i915_optimize_free(struct i915_token_list* tokens) +{ + free(tokens->Tokens); + free(tokens); +} + + diff --git a/src/gallium/drivers/i915/i915_fpc_translate.c b/src/gallium/drivers/i915/i915_fpc_translate.c index 27f100843bf..a4ea9127976 100644 --- a/src/gallium/drivers/i915/i915_fpc_translate.c +++ b/src/gallium/drivers/i915/i915_fpc_translate.c @@ -41,6 +41,9 @@ #include "draw/draw_vertex.h" +#ifndef M_PI +#define M_PI 3.14159265358979323846 +#endif /** * Simple pass-through fragment shader to use when we don't have @@ -72,19 +75,33 @@ static unsigned passthrough[] = /* 1, -1/3!, 1/5!, -1/7! */ -static const float sin_constants[4] = { 1.0, +static const float scs_sin_constants[4] = { 1.0, -1.0f / (3 * 2 * 1), 1.0f / (5 * 4 * 3 * 2 * 1), -1.0f / (7 * 6 * 5 * 4 * 3 * 2 * 1) }; /* 1, -1/2!, 1/4!, -1/6! */ -static const float cos_constants[4] = { 1.0, +static const float scs_cos_constants[4] = { 1.0, -1.0f / (2 * 1), 1.0f / (4 * 3 * 2 * 1), -1.0f / (6 * 5 * 4 * 3 * 2 * 1) }; +/* 2*pi, -(2*pi)^3/3!, (2*pi)^5/5!, -(2*pi)^7/7! */ +static const float sin_constants[4] = { 2.0 * M_PI, + -8.0f * M_PI * M_PI * M_PI / (3 * 2 * 1), + 32.0f * M_PI * M_PI * M_PI * M_PI * M_PI / (5 * 4 * 3 * 2 * 1), + -128.0f * M_PI * M_PI * M_PI * M_PI * M_PI * M_PI * M_PI / (7 * 6 * 5 * 4 * 3 * 2 * 1) +}; + +/* 1, -(2*pi)^2/2!, (2*pi)^4/4!, -(2*pi)^6/6! */ +static const float cos_constants[4] = { 1.0, + -4.0f * M_PI * M_PI / (2 * 1), + 16.0f * M_PI * M_PI * M_PI * M_PI / (4 * 3 * 2 * 1), + -64.0f * M_PI * M_PI * M_PI * M_PI * M_PI * M_PI / (6 * 5 * 4 * 3 * 2 * 1) +}; + /** @@ -155,7 +172,7 @@ static uint get_mapping(struct i915_fragment_shader* fs, int unit) */ static uint src_vector(struct i915_fp_compile *p, - const struct tgsi_full_src_register *source, + const struct i915_full_src_register *source, struct i915_fragment_shader* fs) { uint index = source->Register.Index; @@ -185,12 +202,12 @@ src_vector(struct i915_fp_compile *p, switch (sem_name) { case TGSI_SEMANTIC_POSITION: - debug_printf("SKIP SEM POS\n"); - /* - assert(p->wpos_tex != -1); - src = i915_emit_decl(p, REG_TYPE_T, p->wpos_tex, D0_CHANNEL_ALL); - */ - break; + { + /* for fragcoord */ + int real_tex_unit = get_mapping(fs, I915_SEMANTIC_POS); + src = i915_emit_decl(p, REG_TYPE_T, T_TEX0 + real_tex_unit, D0_CHANNEL_ALL); + break; + } case TGSI_SEMANTIC_COLOR: if (sem_ind == 0) { src = i915_emit_decl(p, REG_TYPE_T, T_DIFFUSE, D0_CHANNEL_ALL); @@ -212,6 +229,13 @@ src_vector(struct i915_fp_compile *p, src = i915_emit_decl(p, REG_TYPE_T, T_TEX0 + real_tex_unit, D0_CHANNEL_ALL); break; } + case TGSI_SEMANTIC_FACE: + { + /* for back/front faces */ + int real_tex_unit = get_mapping(fs, I915_SEMANTIC_FACE); + src = i915_emit_decl(p, REG_TYPE_T, T_TEX0 + real_tex_unit, D0_CHANNEL_X); + break; + } default: i915_program_error(p, "Bad source->Index"); return 0; @@ -237,7 +261,6 @@ src_vector(struct i915_fp_compile *p, source->Register.SwizzleZ, source->Register.SwizzleW); - /* There's both negate-all-components and per-component negation. * Try to handle both here. */ @@ -252,6 +275,9 @@ src_vector(struct i915_fp_compile *p, /* XXX enable these assertions, or fix things */ assert(!source->Register.Absolute); #endif + if (source->Register.Absolute) + debug_printf("Unhandled absolute value\n"); + return src; } @@ -261,7 +287,7 @@ src_vector(struct i915_fp_compile *p, */ static uint get_result_vector(struct i915_fp_compile *p, - const struct tgsi_full_dst_register *dest) + const struct i915_full_dst_register *dest) { switch (dest->Register.File) { case TGSI_FILE_OUTPUT: @@ -290,7 +316,7 @@ get_result_vector(struct i915_fp_compile *p, * Compute flags for saturation and writemask. */ static uint -get_result_flags(const struct tgsi_full_instruction *inst) +get_result_flags(const struct i915_full_instruction *inst) { const uint writeMask = inst->Dst[0].Register.WriteMask; @@ -352,7 +378,7 @@ translate_tex_src_target(struct i915_fp_compile *p, uint tex) */ static void emit_tex(struct i915_fp_compile *p, - const struct tgsi_full_instruction *inst, + const struct i915_full_instruction *inst, uint opcode, struct i915_fragment_shader* fs) { @@ -378,7 +404,7 @@ emit_tex(struct i915_fp_compile *p, */ static void emit_simple_arith(struct i915_fp_compile *p, - const struct tgsi_full_instruction *inst, + const struct i915_full_instruction *inst, uint opcode, uint numArgs, struct i915_fragment_shader* fs) { @@ -403,11 +429,11 @@ emit_simple_arith(struct i915_fp_compile *p, /** As above, but swap the first two src regs */ static void emit_simple_arith_swap2(struct i915_fp_compile *p, - const struct tgsi_full_instruction *inst, + const struct i915_full_instruction *inst, uint opcode, uint numArgs, struct i915_fragment_shader* fs) { - struct tgsi_full_instruction inst2; + struct i915_full_instruction inst2; assert(numArgs == 2); @@ -419,23 +445,19 @@ emit_simple_arith_swap2(struct i915_fp_compile *p, emit_simple_arith(p, &inst2, opcode, numArgs, fs); } - -#ifndef M_PI -#define M_PI 3.14159265358979323846 -#endif - /* * Translate TGSI instruction to i915 instruction. * * Possible concerns: * + * DDX, DDY -- return 0 * SIN, COS -- could use another taylor step? * LIT -- results seem a little different to sw mesa * LOG -- different to mesa on negative numbers, but this is conformant. */ static void i915_translate_instruction(struct i915_fp_compile *p, - const struct tgsi_full_instruction *inst, + const struct i915_full_instruction *inst, struct i915_fragment_shader *fs) { uint writemask; @@ -477,13 +499,6 @@ i915_translate_instruction(struct i915_fp_compile *p, i915_emit_arith(p, A0_MOD, tmp, A0_DEST_CHANNEL_X, 0, tmp, 0, 0); - /* By choosing different taylor constants, could get rid of this mul: - */ - i915_emit_arith(p, - A0_MUL, - tmp, A0_DEST_CHANNEL_X, 0, - tmp, i915_emit_const1f(p, (float) (M_PI * 2.0)), 0); - /* * t0.xy = MUL x.xx11, x.x1111 ; x^2, x, 1, 1 * t0 = MUL t0.xyxy t0.xx11 ; x^4, x^3, x^2, 1 @@ -516,6 +531,18 @@ i915_translate_instruction(struct i915_fp_compile *p, i915_emit_const4fv(p, cos_constants), 0); break; + case TGSI_OPCODE_DDX: + case TGSI_OPCODE_DDY: + /* XXX We just output 0 here */ + debug_printf("Punting DDX/DDX\n"); + src0 = get_result_vector(p, &inst->Dst[0]); + i915_emit_arith(p, + A0_MOV, + get_result_vector(p, &inst->Dst[0]), + get_result_flags(inst), 0, + swizzle(src0, ZERO, ZERO, ZERO, ZERO), 0, 0); + break; + case TGSI_OPCODE_DP2: src0 = src_vector(p, &inst->Src[0], fs); src1 = src_vector(p, &inst->Src[1], fs); @@ -701,6 +728,9 @@ i915_translate_instruction(struct i915_fp_compile *p, emit_simple_arith(p, inst, A0_MUL, 2, fs); break; + case TGSI_OPCODE_NOP: + break; + case TGSI_OPCODE_POW: src0 = src_vector(p, &inst->Src[0], fs); src1 = src_vector(p, &inst->Src[1], fs); @@ -754,9 +784,9 @@ i915_translate_instruction(struct i915_fp_compile *p, * t0.xy = MUL x.xx11, x.x1111 ; x^2, x, 1, 1 * t0 = MUL t0.xyxy t0.xx11 ; x^4, x^3, x^2, x * t1 = MUL t0.xyyw t0.yz11 ; x^7 x^5 x^3 x - * scs.x = DP4 t1, sin_constants + * scs.x = DP4 t1, scs_sin_constants * t1 = MUL t0.xxz1 t0.z111 ; x^6 x^4 x^2 1 - * scs.y = DP4 t1, cos_constants + * scs.y = DP4 t1, scs_cos_constants */ i915_emit_arith(p, A0_MUL, @@ -791,7 +821,7 @@ i915_translate_instruction(struct i915_fp_compile *p, get_result_vector(p, &inst->Dst[0]), A0_DEST_CHANNEL_Y, 0, swizzle(tmp1, W, Z, Y, X), - i915_emit_const4fv(p, sin_constants), 0); + i915_emit_const4fv(p, scs_sin_constants), 0); } if (writemask & TGSI_WRITEMASK_X) { @@ -806,7 +836,7 @@ i915_translate_instruction(struct i915_fp_compile *p, get_result_vector(p, &inst->Dst[0]), A0_DEST_CHANNEL_X, 0, swizzle(tmp, ONE, Z, Y, X), - i915_emit_const4fv(p, cos_constants), 0); + i915_emit_const4fv(p, scs_cos_constants), 0); } break; @@ -853,13 +883,6 @@ i915_translate_instruction(struct i915_fp_compile *p, i915_emit_arith(p, A0_MOD, tmp, A0_DEST_CHANNEL_X, 0, tmp, 0, 0); - /* By choosing different taylor constants, could get rid of this mul: - */ - i915_emit_arith(p, - A0_MUL, - tmp, A0_DEST_CHANNEL_X, 0, - tmp, i915_emit_const1f(p, (float) (M_PI * 2.0)), 0); - /* * t0.xy = MUL x.xx11, x.x1111 ; x^2, x, 1, 1 * t0 = MUL t0.xyxy t0.xx11 ; x^4, x^3, x^2, x @@ -907,7 +930,7 @@ i915_translate_instruction(struct i915_fp_compile *p, break; case TGSI_OPCODE_SNE: - /* if we're neither < nor > then we're != */ + /* if we're < or > then we're != */ src0 = src_vector(p, &inst->Src[0], fs); src1 = src_vector(p, &inst->Src[1], fs); tmp = i915_get_utemp(p); @@ -1024,105 +1047,107 @@ i915_translate_instruction(struct i915_fp_compile *p, } -/** - * Translate TGSI fragment shader into i915 hardware instructions. - * \param p the translation state - * \param tokens the TGSI token array - */ -static void -i915_translate_instructions(struct i915_fp_compile *p, - const struct tgsi_token *tokens, - struct i915_fragment_shader *fs) +static void i915_translate_token(struct i915_fp_compile *p, + const union i915_full_token* token, + struct i915_fragment_shader *fs) { struct i915_fragment_shader *ifs = p->shader; - struct tgsi_parse_context parse; - - tgsi_parse_init( &parse, tokens ); - - while( !tgsi_parse_end_of_tokens( &parse ) ) { - - tgsi_parse_token( &parse ); + switch( token->Token.Type ) { + case TGSI_TOKEN_TYPE_PROPERTY: + /* + * We only support one cbuf, but we still need to ignore the property + * correctly so we don't hit the assert at the end of the switch case. + */ + assert(token->FullProperty.Property.PropertyName == + TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS); + break; - switch( parse.FullToken.Token.Type ) { - case TGSI_TOKEN_TYPE_PROPERTY: - /* - * We only support one cbuf, but we still need to ignore the property - * correctly so we don't hit the assert at the end of the switch case. - */ - assert(parse.FullToken.FullProperty.Property.PropertyName == - TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS); - break; - case TGSI_TOKEN_TYPE_DECLARATION: - if (parse.FullToken.FullDeclaration.Declaration.File - == TGSI_FILE_CONSTANT) { - uint i; - for (i = parse.FullToken.FullDeclaration.Range.First; - i <= parse.FullToken.FullDeclaration.Range.Last; - i++) { - assert(ifs->constant_flags[i] == 0x0); - ifs->constant_flags[i] = I915_CONSTFLAG_USER; - ifs->num_constants = MAX2(ifs->num_constants, i + 1); - } + case TGSI_TOKEN_TYPE_DECLARATION: + if (token->FullDeclaration.Declaration.File + == TGSI_FILE_CONSTANT) { + uint i; + for (i = token->FullDeclaration.Range.First; + i <= token->FullDeclaration.Range.Last; + i++) { + assert(ifs->constant_flags[i] == 0x0); + ifs->constant_flags[i] = I915_CONSTFLAG_USER; + ifs->num_constants = MAX2(ifs->num_constants, i + 1); } - else if (parse.FullToken.FullDeclaration.Declaration.File - == TGSI_FILE_TEMPORARY) { - uint i; - for (i = parse.FullToken.FullDeclaration.Range.First; - i <= parse.FullToken.FullDeclaration.Range.Last; - i++) { - assert(i < I915_MAX_TEMPORARY); + } + else if (token->FullDeclaration.Declaration.File + == TGSI_FILE_TEMPORARY) { + uint i; + for (i = token->FullDeclaration.Range.First; + i <= token->FullDeclaration.Range.Last; + i++) { + if (i >= I915_MAX_TEMPORARY) + debug_printf("Too many temps (%d)\n",i); + else /* XXX just use shader->info->file_mask[TGSI_FILE_TEMPORARY] */ p->temp_flag |= (1 << i); /* mark temp as used */ - } } - break; + } + break; - case TGSI_TOKEN_TYPE_IMMEDIATE: - { - const struct tgsi_full_immediate *imm - = &parse.FullToken.FullImmediate; - const uint pos = p->num_immediates++; - uint j; - assert( imm->Immediate.NrTokens <= 4 + 1 ); - for (j = 0; j < imm->Immediate.NrTokens - 1; j++) { - p->immediates[pos][j] = imm->u[j].Float; - } + case TGSI_TOKEN_TYPE_IMMEDIATE: + { + const struct tgsi_full_immediate *imm + = &token->FullImmediate; + const uint pos = p->num_immediates++; + uint j; + assert( imm->Immediate.NrTokens <= 4 + 1 ); + for (j = 0; j < imm->Immediate.NrTokens - 1; j++) { + p->immediates[pos][j] = imm->u[j].Float; } - break; + } + break; - case TGSI_TOKEN_TYPE_INSTRUCTION: - if (p->first_instruction) { - /* resolve location of immediates */ - uint i, j; - for (i = 0; i < p->num_immediates; i++) { - /* find constant slot for this immediate */ - for (j = 0; j < I915_MAX_CONSTANT; j++) { - if (ifs->constant_flags[j] == 0x0) { - memcpy(ifs->constants[j], - p->immediates[i], - 4 * sizeof(float)); - /*printf("immediate %d maps to const %d\n", i, j);*/ - ifs->constant_flags[j] = 0xf; /* all four comps used */ - p->immediates_map[i] = j; - ifs->num_constants = MAX2(ifs->num_constants, j + 1); - break; - } + case TGSI_TOKEN_TYPE_INSTRUCTION: + if (p->first_instruction) { + /* resolve location of immediates */ + uint i, j; + for (i = 0; i < p->num_immediates; i++) { + /* find constant slot for this immediate */ + for (j = 0; j < I915_MAX_CONSTANT; j++) { + if (ifs->constant_flags[j] == 0x0) { + memcpy(ifs->constants[j], + p->immediates[i], + 4 * sizeof(float)); + /*printf("immediate %d maps to const %d\n", i, j);*/ + ifs->constant_flags[j] = 0xf; /* all four comps used */ + p->immediates_map[i] = j; + ifs->num_constants = MAX2(ifs->num_constants, j + 1); + break; } } - - p->first_instruction = FALSE; } - i915_translate_instruction(p, &parse.FullToken.FullInstruction, fs); - break; - - default: - assert( 0 ); + p->first_instruction = FALSE; } - } /* while */ + i915_translate_instruction(p, &token->FullInstruction, fs); + break; + + default: + assert( 0 ); + } - tgsi_parse_free (&parse); +} + +/** + * Translate TGSI fragment shader into i915 hardware instructions. + * \param p the translation state + * \param tokens the TGSI token array + */ +static void +i915_translate_instructions(struct i915_fp_compile *p, + const struct i915_token_list *tokens, + struct i915_fragment_shader *fs) +{ + int i; + for(i = 0; i<tokens->NumTokens; i++) { + i915_translate_token(p, &tokens->Tokens[i], fs); + } } @@ -1144,6 +1169,8 @@ i915_init_compile(struct i915_context *i915, ifs->num_constants = 0; memset(ifs->constant_flags, 0, sizeof(ifs->constant_flags)); + memset(&p->register_phases, 0, sizeof(p->register_phases)); + for (i = 0; i < I915_TEX_UNITS; i++) ifs->generic_mapping[i] = -1; @@ -1161,8 +1188,6 @@ i915_init_compile(struct i915_context *i915, p->temp_flag = ~0x0 << I915_MAX_TEMPORARY; p->utemp_flag = ~0x7; - p->wpos_tex = -1; - /* initialize the first program word */ *(p->decl++) = _3DSTATE_PIXEL_SHADER_PROGRAM; @@ -1181,7 +1206,7 @@ i915_fini_compile(struct i915_context *i915, struct i915_fp_compile *p) unsigned long decl_size = (unsigned long) (p->decl - p->declarations); if (p->nr_tex_indirect > I915_MAX_TEX_INDIRECT) - i915_program_error(p, "Exceeded max nr indirect texture lookups"); + debug_printf("Exceeded max nr indirect texture lookups\n"); if (p->nr_tex_insn > I915_MAX_TEX_INSN) i915_program_error(p, "Exceeded max TEX instructions"); @@ -1234,40 +1259,6 @@ i915_fini_compile(struct i915_context *i915, struct i915_fp_compile *p) } -/** - * Find an unused texture coordinate slot to use for fragment WPOS. - * Update p->fp->wpos_tex with the result (-1 if no used texcoord slot is found). - */ -static void -i915_find_wpos_space(struct i915_fp_compile *p) -{ -#if 0 - const uint inputs - = p->shader->inputs_read | (1 << TGSI_ATTRIB_POS); /*XXX hack*/ - uint i; - - p->wpos_tex = -1; - - if (inputs & (1 << TGSI_ATTRIB_POS)) { - for (i = 0; i < I915_TEX_UNITS; i++) { - if ((inputs & (1 << (TGSI_ATTRIB_TEX0 + i))) == 0) { - p->wpos_tex = i; - return; - } - } - - i915_program_error(p, "No free texcoord for wpos value"); - } -#else - if (p->shader->info.input_semantic_name[0] == TGSI_SEMANTIC_POSITION) { - /* frag shader using the fragment position input */ -#if 0 - assert(0); -#endif - } -#endif -} - @@ -1300,6 +1291,7 @@ i915_translate_fragment_program( struct i915_context *i915, { struct i915_fp_compile *p; const struct tgsi_token *tokens = fs->state.tokens; + struct i915_token_list* i_tokens; #if 0 tgsi_dump(tokens, 0); @@ -1314,10 +1306,11 @@ i915_translate_fragment_program( struct i915_context *i915, } p = i915_init_compile(i915, fs); - i915_find_wpos_space(p); - i915_translate_instructions(p, tokens, fs); + i_tokens = i915_optimize(tokens); + i915_translate_instructions(p, i_tokens, fs); i915_fixup_depth_write(p); i915_fini_compile(i915, p); + i915_optimize_free(i_tokens); } diff --git a/src/gallium/drivers/i915/i915_prim_emit.c b/src/gallium/drivers/i915/i915_prim_emit.c index 85656cd7846..1acde97d4bd 100644 --- a/src/gallium/drivers/i915/i915_prim_emit.c +++ b/src/gallium/drivers/i915/i915_prim_emit.c @@ -166,6 +166,8 @@ emit_prim( struct draw_stage *stage, for (i = 0; i < nr; i++) emit_hw_vertex(i915, prim->v[i]); + + i915_flush_heuristically(i915, nr); } diff --git a/src/gallium/drivers/i915/i915_prim_vbuf.c b/src/gallium/drivers/i915/i915_prim_vbuf.c index 79db3b650eb..d8ae1de2963 100644 --- a/src/gallium/drivers/i915/i915_prim_vbuf.c +++ b/src/gallium/drivers/i915/i915_prim_vbuf.c @@ -487,6 +487,7 @@ draw_arrays_fallback(struct vbuf_render *render, draw_arrays_generate_indices(render, start, nr, i915_render->fallback); + i915_flush_heuristically(i915, nr_indices); out: return; } @@ -534,6 +535,7 @@ i915_vbuf_render_draw_arrays(struct vbuf_render *render, nr); OUT_BATCH(start); /* Beginning vertex index */ + i915_flush_heuristically(i915, nr); out: return; } @@ -657,6 +659,7 @@ i915_vbuf_render_draw_elements(struct vbuf_render *render, save_nr_indices, i915_render->fallback); + i915_flush_heuristically(i915, nr_indices); out: return; } diff --git a/src/gallium/drivers/i915/i915_query.c b/src/gallium/drivers/i915/i915_query.c new file mode 100644 index 00000000000..c886df74bad --- /dev/null +++ b/src/gallium/drivers/i915/i915_query.c @@ -0,0 +1,86 @@ +/************************************************************************** + * + * Copyright 2011 The Chromium OS authors. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL GOOGLE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* Fake occlusion queries which return 0, it's better than crashing */ + +#include "pipe/p_compiler.h" + +#include "util/u_memory.h" + +#include "i915_context.h" +#include "i915_query.h" + +struct i915_query +{ + unsigned query; +}; + +static struct pipe_query *i915_create_query(struct pipe_context *ctx, + unsigned query_type) +{ + struct i915_query *query = CALLOC_STRUCT( i915_query ); + + return (struct pipe_query *)query; +} + +static void i915_destroy_query(struct pipe_context *ctx, + struct pipe_query *query) +{ + FREE(query); +} + +static void i915_begin_query(struct pipe_context *ctx, + struct pipe_query *query) +{ +} + +static void i915_end_query(struct pipe_context *ctx, struct pipe_query *query) +{ +} + +static boolean i915_get_query_result(struct pipe_context *ctx, + struct pipe_query *query, + boolean wait, + void *vresult) +{ + uint64_t *result = (uint64_t*)vresult; + + /* 2* viewport Max */ + *result = 512*1024*1024; + return TRUE; +} + +void +i915_init_query_functions(struct i915_context *i915) +{ + i915->base.create_query = i915_create_query; + i915->base.destroy_query = i915_destroy_query; + i915->base.begin_query = i915_begin_query; + i915->base.end_query = i915_end_query; + i915->base.get_query_result = i915_get_query_result; +} + diff --git a/src/gallium/drivers/i915/i915_query.h b/src/gallium/drivers/i915/i915_query.h new file mode 100644 index 00000000000..2c689ea6b1c --- /dev/null +++ b/src/gallium/drivers/i915/i915_query.h @@ -0,0 +1,36 @@ +/************************************************************************** + * + * Copyright 2011 The Chromium OS authors. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL GOOGLE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef I915_QUERY_H +#define I915_QUERY_H + +struct i915_context; +struct pipe_context; + +void i915_init_query_functions( struct i915_context *i915 ); + +#endif /* I915_QUERY_H */ diff --git a/src/gallium/drivers/i915/i915_resource.c b/src/gallium/drivers/i915/i915_resource.c index 7f52ba11d61..b4719af1fb6 100644 --- a/src/gallium/drivers/i915/i915_resource.c +++ b/src/gallium/drivers/i915/i915_resource.c @@ -7,12 +7,12 @@ static struct pipe_resource * i915_resource_create(struct pipe_screen *screen, - const struct pipe_resource *template) + const struct pipe_resource *template) { if (template->target == PIPE_BUFFER) return i915_buffer_create(screen, template); else - return i915_texture_create(screen, template); + return i915_texture_create(screen, template, FALSE); } diff --git a/src/gallium/drivers/i915/i915_resource.h b/src/gallium/drivers/i915/i915_resource.h index c15ecdfc22a..14eed2c4a79 100644 --- a/src/gallium/drivers/i915/i915_resource.h +++ b/src/gallium/drivers/i915/i915_resource.h @@ -45,6 +45,15 @@ struct i915_buffer { boolean free_on_destroy; }; + +/* Texture transfer. */ +struct i915_transfer { + /* Base class. */ + struct pipe_transfer b; + struct pipe_resource *staging_texture; +}; + + #define I915_MAX_TEXTURE_2D_LEVELS 12 /* max 2048x2048 */ #define I915_MAX_TEXTURE_3D_LEVELS 9 /* max 256x256x256 */ @@ -101,7 +110,8 @@ static INLINE struct i915_buffer *i915_buffer(struct pipe_resource *resource) struct pipe_resource * i915_texture_create(struct pipe_screen *screen, - const struct pipe_resource *template); + const struct pipe_resource *template, + boolean force_untiled); struct pipe_resource * i915_texture_from_handle(struct pipe_screen * screen, diff --git a/src/gallium/drivers/i915/i915_resource_texture.c b/src/gallium/drivers/i915/i915_resource_texture.c index b74b19d0fe4..0b6424f8d16 100644 --- a/src/gallium/drivers/i915/i915_resource_texture.c +++ b/src/gallium/drivers/i915/i915_resource_texture.c @@ -37,6 +37,7 @@ #include "util/u_format.h" #include "util/u_math.h" #include "util/u_memory.h" +#include "util/u_rect.h" #include "i915_context.h" #include "i915_resource.h" @@ -710,7 +711,7 @@ i915_texture_destroy(struct pipe_screen *screen, FREE(tex); } -static struct pipe_transfer * +static struct pipe_transfer * i915_texture_get_transfer(struct pipe_context *pipe, struct pipe_resource *resource, unsigned level, @@ -719,19 +720,45 @@ i915_texture_get_transfer(struct pipe_context *pipe, { struct i915_context *i915 = i915_context(pipe); struct i915_texture *tex = i915_texture(resource); - struct pipe_transfer *transfer = util_slab_alloc(&i915->transfer_pool); + struct i915_transfer *transfer = util_slab_alloc(&i915->texture_transfer_pool); + boolean use_staging_texture = FALSE; if (transfer == NULL) return NULL; - transfer->resource = resource; - transfer->level = level; - transfer->usage = usage; - transfer->box = *box; - transfer->stride = tex->stride; - /* FIXME: layer_stride */ + transfer->b.resource = resource; + transfer->b.level = level; + transfer->b.usage = usage; + transfer->b.box = *box; + transfer->b.stride = tex->stride; + transfer->staging_texture = NULL; + /* XXX: handle depth textures everyhwere*/ + transfer->b.layer_stride = 0; + transfer->b.data = NULL; + + /* if we use staging transfers, only support textures we can render to, + * because we need that for u_blitter */ + if (i915->blitter && + i915_is_format_supported(NULL, /* screen */ + transfer->b.resource->format, + 0, /* target */ + 1, /* sample count */ + PIPE_BIND_RENDER_TARGET) && + (usage & PIPE_TRANSFER_WRITE) && + !(usage & (PIPE_TRANSFER_READ | PIPE_TRANSFER_DONTBLOCK | PIPE_TRANSFER_UNSYNCHRONIZED))) + use_staging_texture = TRUE; + + use_staging_texture = FALSE; + + if (use_staging_texture) { + /* + * Allocate the untiled staging texture. + * If the alloc fails, transfer->staging_texture is NULL and we fallback to a map() + */ + transfer->staging_texture = i915_texture_create(pipe->screen, resource, TRUE); + } - return transfer; + return (struct pipe_transfer*)transfer; } static void @@ -739,17 +766,33 @@ i915_transfer_destroy(struct pipe_context *pipe, struct pipe_transfer *transfer) { struct i915_context *i915 = i915_context(pipe); - util_slab_free(&i915->transfer_pool, transfer); + struct i915_transfer *itransfer = (struct i915_transfer*)transfer; + + if ((itransfer->staging_texture) && + (transfer->usage & PIPE_TRANSFER_WRITE)) { + struct pipe_box sbox; + + u_box_origin_2d(itransfer->b.box.width, itransfer->b.box.height, &sbox); + pipe->resource_copy_region(pipe, itransfer->b.resource, itransfer->b.level, + itransfer->b.box.x, itransfer->b.box.y, itransfer->b.box.z, + itransfer->staging_texture, + 0, &sbox); + pipe->flush(pipe, NULL); + pipe_resource_reference(&itransfer->staging_texture, NULL); + } + + util_slab_free(&i915->texture_transfer_pool, itransfer); } static void * i915_texture_transfer_map(struct pipe_context *pipe, struct pipe_transfer *transfer) { - struct pipe_resource *resource = transfer->resource; - struct i915_texture *tex = i915_texture(resource); + struct i915_transfer *itransfer = (struct i915_transfer*)transfer; + struct pipe_resource *resource = itransfer->b.resource; + struct i915_texture *tex = NULL; struct i915_winsys *iws = i915_screen(pipe->screen)->iws; - struct pipe_box *box = &transfer->box; + struct pipe_box *box = &itransfer->b.box; enum pipe_format format = resource->format; unsigned offset; char *map; @@ -757,18 +800,25 @@ i915_texture_transfer_map(struct pipe_context *pipe, if (resource->target != PIPE_TEXTURE_3D && resource->target != PIPE_TEXTURE_CUBE) assert(box->z == 0); - offset = i915_texture_offset(tex, transfer->level, box->z); - /* TODO this is a sledgehammer */ - pipe->flush(pipe, NULL); + if (itransfer->staging_texture) { + tex = i915_texture(itransfer->staging_texture); + } else { + /* TODO this is a sledgehammer */ + tex = i915_texture(resource); + pipe->flush(pipe, NULL); + } + + offset = i915_texture_offset(tex, itransfer->b.level, box->z); map = iws->buffer_map(iws, tex->buffer, - (transfer->usage & PIPE_TRANSFER_WRITE) ? TRUE : FALSE); - if (map == NULL) + (itransfer->b.usage & PIPE_TRANSFER_WRITE) ? TRUE : FALSE); + if (map == NULL) { return NULL; + } return map + offset + - box->y / util_format_get_blockheight(format) * transfer->stride + + box->y / util_format_get_blockheight(format) * itransfer->b.stride + box->x / util_format_get_blockwidth(format) * util_format_get_blocksize(format); } @@ -776,14 +826,106 @@ static void i915_texture_transfer_unmap(struct pipe_context *pipe, struct pipe_transfer *transfer) { - struct i915_texture *tex = i915_texture(transfer->resource); + struct i915_transfer *itransfer = (struct i915_transfer*)transfer; + struct i915_texture *tex = i915_texture(itransfer->b.resource); struct i915_winsys *iws = i915_screen(tex->b.b.screen)->iws; + + if (itransfer->staging_texture) + tex = i915_texture(itransfer->staging_texture); + iws->buffer_unmap(iws, tex->buffer); } +static void i915_transfer_inline_write( struct pipe_context *pipe, + struct pipe_resource *resource, + unsigned level, + unsigned usage, + const struct pipe_box *box, + const void *data, + unsigned stride, + unsigned layer_stride) +{ + struct pipe_transfer *transfer = NULL; + struct i915_transfer *itransfer = NULL; + const uint8_t *src_data = data; + unsigned i; + + transfer = pipe->get_transfer(pipe, + resource, + level, + usage, + box ); + if (transfer == NULL) + goto out; + + itransfer = (struct i915_transfer*)transfer; + + if (itransfer->staging_texture) { + struct i915_texture *tex = i915_texture(itransfer->staging_texture); + enum pipe_format format = tex->b.b.format; + struct i915_winsys *iws = i915_screen(tex->b.b.screen)->iws; + size_t offset; + size_t size; + + offset = i915_texture_offset(tex, transfer->level, transfer->box.z); + + for (i = 0; i < box->depth; i++) { + if (!tex->b.b.last_level && + tex->b.b.width0 == transfer->box.width) { + unsigned nby = util_format_get_nblocksy(format, transfer->box.y); + assert(!offset); + assert(!transfer->box.x); + assert(tex->stride == transfer->stride); + + offset += tex->stride * nby; + size = util_format_get_2d_size(format, transfer->stride, + transfer->box.height); + iws->buffer_write(iws, tex->buffer, offset, size, transfer->data); + + } else { + unsigned nby = util_format_get_nblocksy(format, transfer->box.y); + int i; + offset += util_format_get_stride(format, transfer->box.x); + size = transfer->stride; + + for (i = 0; i < nby; i++) { + iws->buffer_write(iws, tex->buffer, offset, size, transfer->data); + offset += tex->stride; + } + } + offset += layer_stride; + } + } else { + uint8_t *map = pipe_transfer_map(pipe, &itransfer->b); + if (map == NULL) + goto nomap; + + for (i = 0; i < box->depth; i++) { + util_copy_rect(map, + resource->format, + itransfer->b.stride, /* bytes */ + 0, 0, + box->width, + box->height, + src_data, + stride, /* bytes */ + 0, 0); + map += itransfer->b.layer_stride; + src_data += layer_stride; + } +nomap: + if (map) + pipe_transfer_unmap(pipe, &itransfer->b); + } + +out: + if (itransfer) + pipe_transfer_destroy(pipe, &itransfer->b); +} -struct u_resource_vtbl i915_texture_vtbl = + +struct u_resource_vtbl i915_texture_vtbl = { i915_texture_get_handle, /* get_handle */ i915_texture_destroy, /* resource_destroy */ @@ -792,7 +934,7 @@ struct u_resource_vtbl i915_texture_vtbl = i915_texture_transfer_map, /* transfer_map */ u_default_transfer_flush_region, /* transfer_flush_region */ i915_texture_transfer_unmap, /* transfer_unmap */ - u_default_transfer_inline_write /* transfer_inline_write */ + i915_transfer_inline_write /* transfer_inline_write */ }; @@ -800,7 +942,8 @@ struct u_resource_vtbl i915_texture_vtbl = struct pipe_resource * i915_texture_create(struct pipe_screen *screen, - const struct pipe_resource *template) + const struct pipe_resource *template, + boolean force_untiled) { struct i915_screen *is = i915_screen(screen); struct i915_winsys *iws = is->iws; @@ -815,7 +958,10 @@ i915_texture_create(struct pipe_screen *screen, pipe_reference_init(&tex->b.b.reference, 1); tex->b.b.screen = screen; - tex->tiling = i915_texture_tiling(is, tex); + if (force_untiled) + tex->tiling = I915_TILE_NONE; + else + tex->tiling = i915_texture_tiling(is, tex); if (is->is_i945) { if (!i945_texture_layout(tex)) @@ -836,7 +982,7 @@ i915_texture_create(struct pipe_screen *screen, buf_usage = I915_NEW_TEXTURE; tex->buffer = iws->buffer_create_tiled(iws, &tex->stride, tex->total_nblocksy, - &tex->tiling, buf_usage); + &tex->tiling, buf_usage); if (!tex->buffer) goto fail; diff --git a/src/gallium/drivers/i915/i915_screen.c b/src/gallium/drivers/i915/i915_screen.c index 5b3af2519fc..c108c702983 100644 --- a/src/gallium/drivers/i915/i915_screen.c +++ b/src/gallium/drivers/i915/i915_screen.c @@ -109,17 +109,17 @@ i915_get_param(struct pipe_screen *screen, enum pipe_cap cap) case PIPE_CAP_ANISOTROPIC_FILTER: case PIPE_CAP_DEPTHSTENCIL_CLEAR_SEPARATE: case PIPE_CAP_NPOT_TEXTURES: + case PIPE_CAP_POINT_SPRITE: case PIPE_CAP_PRIMITIVE_RESTART: /* draw module */ case PIPE_CAP_TEXTURE_MIRROR_REPEAT: case PIPE_CAP_TEXTURE_SHADOW_MAP: case PIPE_CAP_TWO_SIDED_STENCIL: + case PIPE_CAP_VERTEX_ELEMENT_INSTANCE_DIVISOR: return 1; /* Features that should be supported (boolean caps). */ /* XXX: Just test the code */ case PIPE_CAP_BLEND_EQUATION_SEPARATE: - /* XXX: No code but hw supports it */ - case PIPE_CAP_POINT_SPRITE: /* Also lie about these when asked to (needed for GLSL / GL 2.0) */ return is->debug.lie ? 1 : 0; @@ -129,7 +129,6 @@ i915_get_param(struct pipe_screen *screen, enum pipe_cap cap) case PIPE_CAP_INDEP_BLEND_ENABLE: case PIPE_CAP_INDEP_BLEND_FUNC: case PIPE_CAP_TGSI_INSTANCEID: - case PIPE_CAP_VERTEX_ELEMENT_INSTANCE_DIVISOR: case PIPE_CAP_SHADER_STENCIL_EXPORT: case PIPE_CAP_TEXTURE_MIRROR_CLAMP: case PIPE_CAP_TEXTURE_SWIZZLE: @@ -256,7 +255,7 @@ i915_get_paramf(struct pipe_screen *screen, enum pipe_cap cap) } } -static boolean +boolean i915_is_format_supported(struct pipe_screen *screen, enum pipe_format format, enum pipe_texture_target target, @@ -266,7 +265,10 @@ i915_is_format_supported(struct pipe_screen *screen, static const enum pipe_format tex_supported[] = { PIPE_FORMAT_B8G8R8A8_UNORM, PIPE_FORMAT_B8G8R8X8_UNORM, + PIPE_FORMAT_R8G8B8A8_UNORM, + PIPE_FORMAT_R8G8B8X8_UNORM, PIPE_FORMAT_B5G6R5_UNORM, + PIPE_FORMAT_B10G10R10A2_UNORM, PIPE_FORMAT_L8_UNORM, PIPE_FORMAT_A8_UNORM, PIPE_FORMAT_I8_UNORM, @@ -285,7 +287,12 @@ i915_is_format_supported(struct pipe_screen *screen, }; static const enum pipe_format render_supported[] = { PIPE_FORMAT_B8G8R8A8_UNORM, + PIPE_FORMAT_R8G8B8A8_UNORM, PIPE_FORMAT_B5G6R5_UNORM, + PIPE_FORMAT_B10G10R10A2_UNORM, + PIPE_FORMAT_L8_UNORM, + PIPE_FORMAT_A8_UNORM, + PIPE_FORMAT_I8_UNORM, PIPE_FORMAT_NONE /* list terminator */ }; static const enum pipe_format depth_supported[] = { diff --git a/src/gallium/drivers/i915/i915_screen.h b/src/gallium/drivers/i915/i915_screen.h index cfc585b5350..9f2004eb942 100644 --- a/src/gallium/drivers/i915/i915_screen.h +++ b/src/gallium/drivers/i915/i915_screen.h @@ -65,5 +65,11 @@ i915_screen(struct pipe_screen *pscreen) return (struct i915_screen *) pscreen; } +boolean +i915_is_format_supported(struct pipe_screen *screen, + enum pipe_format format, + enum pipe_texture_target target, + unsigned sample_count, + unsigned tex_usage); #endif /* I915_SCREEN_H */ diff --git a/src/gallium/drivers/i915/i915_state.c b/src/gallium/drivers/i915/i915_state.c index 1b57c5776f2..2812de1fe80 100644 --- a/src/gallium/drivers/i915/i915_state.c +++ b/src/gallium/drivers/i915/i915_state.c @@ -146,6 +146,7 @@ i915_create_blend_state(struct pipe_context *pipe, if (blend->dither) cso_data->LIS5 |= S5_COLOR_DITHER_ENABLE; + /* XXX here take the target fixup into account */ if ((blend->rt[0].colormask & PIPE_MASK_R) == 0) cso_data->LIS5 |= S5_WRITEDISABLE_RED; @@ -243,10 +244,10 @@ i915_create_sampler_state(struct pipe_context *pipe, /* Shadow: */ - if (sampler->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) + if (sampler->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) { cso->state[0] |= (SS2_SHADOW_ENABLE | - i915_translate_compare_func(sampler->compare_func)); + i915_translate_shadow_compare_func(sampler->compare_func)); minFilt = FILTER_4X4_FLAT; magFilt = FILTER_4X4_FLAT; @@ -466,6 +467,7 @@ i915_create_fs_state(struct pipe_context *pipe, if (!ifs) return NULL; + ifs->draw_data = draw_create_fragment_shader(i915->draw, templ); ifs->state.tokens = tgsi_dup_tokens(templ->tokens); tgsi_scan_shader(templ->tokens, &ifs->info); @@ -495,6 +497,8 @@ i915_bind_fs_state(struct pipe_context *pipe, void *shader) i915->fs = (struct i915_fragment_shader*) shader; + draw_bind_fragment_shader(i915->draw, (i915->fs ? i915->fs->draw_data : NULL)); + i915->dirty |= I915_NEW_FS; } @@ -503,12 +507,14 @@ void i915_delete_fs_state(struct pipe_context *pipe, void *shader) { struct i915_fragment_shader *ifs = (struct i915_fragment_shader *) shader; - if (ifs->program) + if (ifs->program) { FREE(ifs->program); + ifs->program = NULL; + FREE((struct tgsi_token *)ifs->state.tokens); + ifs->state.tokens = NULL; + } ifs->program_len = 0; - FREE((struct tgsi_token *)ifs->state.tokens); - FREE(ifs); } diff --git a/src/gallium/drivers/i915/i915_state_derived.c b/src/gallium/drivers/i915/i915_state_derived.c index 392ba191140..e01f16e715c 100644 --- a/src/gallium/drivers/i915/i915_state_derived.c +++ b/src/gallium/drivers/i915/i915_state_derived.c @@ -33,6 +33,7 @@ #include "i915_context.h" #include "i915_state.h" #include "i915_debug.h" +#include "i915_fpc.h" #include "i915_reg.h" static uint find_mapping(const struct i915_fragment_shader* fs, int unit) @@ -58,12 +59,12 @@ static void calculate_vertex_layout(struct i915_context *i915) const struct i915_fragment_shader *fs = i915->fs; const enum interp_mode colorInterp = i915->rasterizer->color_interp; struct vertex_info vinfo; - boolean texCoords[I915_TEX_UNITS], colors[2], fog, needW; + boolean texCoords[I915_TEX_UNITS], colors[2], fog, needW, face; uint i; int src; memset(texCoords, 0, sizeof(texCoords)); - colors[0] = colors[1] = fog = needW = FALSE; + colors[0] = colors[1] = fog = needW = face = FALSE; memset(&vinfo, 0, sizeof(vinfo)); /* Determine which fragment program inputs are needed. Setup HW vertex @@ -72,6 +73,10 @@ static void calculate_vertex_layout(struct i915_context *i915) for (i = 0; i < fs->info.num_inputs; i++) { switch (fs->info.input_semantic_name[i]) { case TGSI_SEMANTIC_POSITION: + { + uint unit = I915_SEMANTIC_POS; + texCoords[find_mapping(fs, unit)] = TRUE; + } break; case TGSI_SEMANTIC_COLOR: assert(fs->info.input_semantic_index[i] < 2); @@ -80,7 +85,6 @@ static void calculate_vertex_layout(struct i915_context *i915) case TGSI_SEMANTIC_GENERIC: { /* texcoords/varyings/other generic */ - /* XXX handle back/front face and point size */ uint unit = fs->info.input_semantic_index[i]; texCoords[find_mapping(fs, unit)] = TRUE; @@ -90,7 +94,11 @@ static void calculate_vertex_layout(struct i915_context *i915) case TGSI_SEMANTIC_FOG: fog = TRUE; break; + case TGSI_SEMANTIC_FACE: + face = TRUE; + break; default: + debug_printf("Unknown input type %d\n", fs->info.input_semantic_name[i]); assert(0); } } @@ -147,6 +155,20 @@ static void calculate_vertex_layout(struct i915_context *i915) vinfo.hwfmt[1] |= hwtc << (i * 4); } + /* front/back face */ + if (face) { + uint slot = find_mapping(fs, I915_SEMANTIC_FACE); + debug_printf("Front/back face is broken\n"); + /* XXX Because of limitations in the draw module, currently src will be 0 + * for SEMANTIC_FACE, so this aliases to POS. We need to fix in the draw + * module by adding an extra shader output. + */ + src = draw_find_shader_output(i915->draw, TGSI_SEMANTIC_FACE, 0); + draw_emit_vertex_attr(&vinfo, EMIT_1F, INTERP_CONSTANT, src); + vinfo.hwfmt[1] &= ~(TEXCOORDFMT_NOT_PRESENT << (slot * 4)); + vinfo.hwfmt[1] |= TEXCOORDFMT_1D << (slot * 4); + } + draw_compute_vertex_size(&vinfo); if (memcmp(&i915->current.vertex_info, &vinfo, sizeof(vinfo))) { diff --git a/src/gallium/drivers/i915/i915_state_emit.c b/src/gallium/drivers/i915/i915_state_emit.c index 0155cd83510..4f447962bb9 100644 --- a/src/gallium/drivers/i915/i915_state_emit.c +++ b/src/gallium/drivers/i915/i915_state_emit.c @@ -34,7 +34,9 @@ #include "pipe/p_context.h" #include "pipe/p_defines.h" +#include "pipe/p_format.h" +#include "util/u_format.h" #include "util/u_math.h" #include "util/u_memory.h" @@ -128,7 +130,7 @@ validate_immediate(struct i915_context *i915, unsigned *batch_space) static void emit_immediate(struct i915_context *i915) { - /* remove unwatned bits and S7 */ + /* remove unwanted bits and S7 */ unsigned dirty = (1 << I915_IMMEDIATE_S0 | 1 << I915_IMMEDIATE_S1 | 1 << I915_IMMEDIATE_S2 | 1 << I915_IMMEDIATE_S3 | 1 << I915_IMMEDIATE_S3 | 1 << I915_IMMEDIATE_S4 | @@ -341,21 +343,84 @@ emit_constants(struct i915_context *i915) } } +static const struct +{ + enum pipe_format format; + uint hw_swizzle; +} fixup_formats[] = { + { PIPE_FORMAT_R8G8B8A8_UNORM, 0x21030000 /* BGRA */}, + { PIPE_FORMAT_L8_UNORM, 0x00030000 /* RRRA */}, + { PIPE_FORMAT_I8_UNORM, 0x00030000 /* RRRA */}, + { PIPE_FORMAT_A8_UNORM, 0x33330000 /* AAAA */}, + { PIPE_FORMAT_NONE, 0x00000000}, +}; + +static uint need_target_fixup(struct pipe_surface* p) +{ + enum pipe_format f; + /* if we don't have a surface bound yet, we don't need to fixup the shader */ + if (!p) + return 0; + + f = p->format; + for(int i=0; fixup_formats[i].format != PIPE_FORMAT_NONE; i++) + if (fixup_formats[i].format == f) + return 1; + + return 0; +} + +static uint fixup_swizzle(enum pipe_format f) +{ + for(int i=0; fixup_formats[i].format != PIPE_FORMAT_NONE; i++) + if (fixup_formats[i].format == f) + return fixup_formats[i].hw_swizzle; + + return 0; +} + static void validate_program(struct i915_context *i915, unsigned *batch_space) { - *batch_space = i915->fs->program_len; + struct pipe_surface *cbuf_surface = i915->framebuffer.cbufs[0]; + uint additional_size = need_target_fixup(cbuf_surface); + + /* we need more batch space if we want to emulate rgba framebuffers */ + *batch_space = i915->fs->program_len + 3 * additional_size; } static void emit_program(struct i915_context *i915) { - uint i; - /* we should always have, at least, a pass-through program */ - assert(i915->fs->program_len > 0); - for (i = 0; i < i915->fs->program_len; i++) { - OUT_BATCH(i915->fs->program[i]); - } + struct pipe_surface *cbuf_surface = i915->framebuffer.cbufs[0]; + uint target_fixup = need_target_fixup(cbuf_surface); + uint i; + + /* we should always have, at least, a pass-through program */ + assert(i915->fs->program_len > 0); + + { + /* first word has the size, we have to adjust that */ + uint size = (i915->fs->program[0]); + size += target_fixup * 3; + OUT_BATCH(size); + } + + /* output the declarations of the program */ + for (i=1 ; i < i915->fs->program_len; i++) + OUT_BATCH(i915->fs->program[i]); + + /* we emit an additional mov with swizzle to fake RGBA framebuffers */ + if (target_fixup) { + /* mov out_color, out_color.zyxw */ + OUT_BATCH(A0_MOV | + (REG_TYPE_OC << A0_DEST_TYPE_SHIFT) | + A0_DEST_CHANNEL_ALL | + (REG_TYPE_OC << A0_SRC0_TYPE_SHIFT) | + (T_DIFFUSE << A0_SRC0_NR_SHIFT)); + OUT_BATCH(fixup_swizzle(cbuf_surface->format)); + OUT_BATCH(0); + } } static void diff --git a/src/gallium/drivers/i915/i915_state_inlines.h b/src/gallium/drivers/i915/i915_state_inlines.h index b589117fbfe..aa992f75c51 100644 --- a/src/gallium/drivers/i915/i915_state_inlines.h +++ b/src/gallium/drivers/i915/i915_state_inlines.h @@ -60,6 +60,31 @@ i915_translate_compare_func(unsigned func) } static INLINE unsigned +i915_translate_shadow_compare_func(unsigned func) +{ + switch (func) { + case PIPE_FUNC_NEVER: + return COMPAREFUNC_ALWAYS; + case PIPE_FUNC_LESS: + return COMPAREFUNC_LEQUAL; + case PIPE_FUNC_LEQUAL: + return COMPAREFUNC_LESS; + case PIPE_FUNC_GREATER: + return COMPAREFUNC_GEQUAL; + case PIPE_FUNC_GEQUAL: + return COMPAREFUNC_GREATER; + case PIPE_FUNC_NOTEQUAL: + return COMPAREFUNC_EQUAL; + case PIPE_FUNC_EQUAL: + return COMPAREFUNC_NOTEQUAL; + case PIPE_FUNC_ALWAYS: + return COMPAREFUNC_NEVER; + default: + return COMPAREFUNC_NEVER; + } +} + +static INLINE unsigned i915_translate_stencil_op(unsigned op) { switch (op) { diff --git a/src/gallium/drivers/i915/i915_state_sampler.c b/src/gallium/drivers/i915/i915_state_sampler.c index be70e7a92c9..0103f7c3530 100644 --- a/src/gallium/drivers/i915/i915_state_sampler.c +++ b/src/gallium/drivers/i915/i915_state_sampler.c @@ -62,6 +62,7 @@ static void update_map(struct i915_context *i915, uint unit, const struct i915_texture *tex, const struct i915_sampler_state *sampler, + const struct pipe_sampler_view* view, uint state[2]); @@ -161,9 +162,10 @@ static void update_samplers(struct i915_context *i915) i915->current.sampler[unit]); /* the result */ update_map(i915, unit, - texture, /* texture */ - i915->sampler[unit], /* sampler state */ - i915->current.texbuffer[unit]); /* the result */ + texture, /* texture */ + i915->sampler[unit], /* sampler state */ + i915->fragment_sampler_views[unit], /* sampler view */ + i915->current.texbuffer[unit]); /* the result */ i915->current.sampler_enable_nr++; i915->current.sampler_enable_flags |= (1 << unit); @@ -180,13 +182,21 @@ struct i915_tracked_state i915_hw_samplers = { }; - /*********************************************************************** * Sampler views */ -static uint translate_texture_format(enum pipe_format pipeFormat) +static uint translate_texture_format(enum pipe_format pipeFormat, + const struct pipe_sampler_view* view) { + if ( (view->swizzle_r != PIPE_SWIZZLE_RED || + view->swizzle_g != PIPE_SWIZZLE_GREEN || + view->swizzle_b != PIPE_SWIZZLE_BLUE || + view->swizzle_a != PIPE_SWIZZLE_ALPHA ) && + pipeFormat != PIPE_FORMAT_Z24_UNORM_S8_USCALED && + pipeFormat != PIPE_FORMAT_Z24X8_UNORM ) + debug_printf("i915: unsupported texture swizzle for format %d\n", pipeFormat); + switch (pipeFormat) { case PIPE_FORMAT_L8_UNORM: return MAPSURF_8BIT | MT_8BIT_L8; @@ -202,16 +212,16 @@ static uint translate_texture_format(enum pipe_format pipeFormat) return MAPSURF_16BIT | MT_16BIT_ARGB1555; case PIPE_FORMAT_B4G4R4A4_UNORM: return MAPSURF_16BIT | MT_16BIT_ARGB4444; + case PIPE_FORMAT_B10G10R10A2_UNORM: + return MAPSURF_32BIT | MT_32BIT_ARGB2101010; case PIPE_FORMAT_B8G8R8A8_UNORM: return MAPSURF_32BIT | MT_32BIT_ARGB8888; case PIPE_FORMAT_B8G8R8X8_UNORM: return MAPSURF_32BIT | MT_32BIT_XRGB8888; case PIPE_FORMAT_R8G8B8A8_UNORM: return MAPSURF_32BIT | MT_32BIT_ABGR8888; -#if 0 case PIPE_FORMAT_R8G8B8X8_UNORM: return MAPSURF_32BIT | MT_32BIT_XBGR8888; -#endif case PIPE_FORMAT_YUYV: return (MAPSURF_422 | MT_422_YCRCB_NORMAL); case PIPE_FORMAT_UYVY: @@ -232,7 +242,25 @@ static uint translate_texture_format(enum pipe_format pipeFormat) return (MAPSURF_COMPRESSED | MT_COMPRESS_DXT4_5); case PIPE_FORMAT_Z24_UNORM_S8_USCALED: case PIPE_FORMAT_Z24X8_UNORM: - return (MAPSURF_32BIT | MT_32BIT_xI824); + { + if ( view->swizzle_r == PIPE_SWIZZLE_RED && + view->swizzle_g == PIPE_SWIZZLE_RED && + view->swizzle_b == PIPE_SWIZZLE_RED && + view->swizzle_a == PIPE_SWIZZLE_ONE) + return (MAPSURF_32BIT | MT_32BIT_xA824); + if ( view->swizzle_r == PIPE_SWIZZLE_RED && + view->swizzle_g == PIPE_SWIZZLE_RED && + view->swizzle_b == PIPE_SWIZZLE_RED && + view->swizzle_a == PIPE_SWIZZLE_RED) + return (MAPSURF_32BIT | MT_32BIT_xI824); + if ( view->swizzle_r == PIPE_SWIZZLE_ZERO && + view->swizzle_g == PIPE_SWIZZLE_ZERO && + view->swizzle_b == PIPE_SWIZZLE_ZERO && + view->swizzle_a == PIPE_SWIZZLE_RED) + return (MAPSURF_32BIT | MT_32BIT_xL824); + debug_printf("i915: unsupported depth swizzle\n"); + return (MAPSURF_32BIT | MT_32BIT_xL824); + } default: debug_printf("i915: translate_texture_format() bad image format %x\n", pipeFormat); @@ -262,6 +290,7 @@ static void update_map(struct i915_context *i915, uint unit, const struct i915_texture *tex, const struct i915_sampler_state *sampler, + const struct pipe_sampler_view* view, uint state[2]) { const struct pipe_resource *pt = &tex->b.b; @@ -275,7 +304,7 @@ static void update_map(struct i915_context *i915, assert(height); assert(depth); - format = translate_texture_format(pt->format); + format = translate_texture_format(pt->format, view); pitch = tex->stride; assert(format); @@ -318,8 +347,9 @@ static void update_maps(struct i915_context *i915) update_map(i915, unit, - texture, /* texture */ - i915->sampler[unit], /* sampler state */ + texture, /* texture */ + i915->sampler[unit], /* sampler state */ + i915->fragment_sampler_views[unit], /* sampler view */ i915->current.texbuffer[unit]); } } diff --git a/src/gallium/drivers/i915/i915_state_static.c b/src/gallium/drivers/i915/i915_state_static.c index 2865298318c..0e4000bc2ab 100644 --- a/src/gallium/drivers/i915/i915_state_static.c +++ b/src/gallium/drivers/i915/i915_state_static.c @@ -42,6 +42,18 @@ static unsigned translate_format(enum pipe_format format) return COLOR_BUF_ARGB8888; case PIPE_FORMAT_B5G6R5_UNORM: return COLOR_BUF_RGB565; + case PIPE_FORMAT_B5G5R5A1_UNORM: + return COLOR_BUF_ARGB1555; + case PIPE_FORMAT_R8G8B8A8_UNORM: + return COLOR_BUF_ARGB8888; + case PIPE_FORMAT_B4G4R4A4_UNORM: + return COLOR_BUF_ARGB4444; + case PIPE_FORMAT_B10G10R10A2_UNORM: + return COLOR_BUF_ARGB2101010; + case PIPE_FORMAT_L8_UNORM: + case PIPE_FORMAT_A8_UNORM: + case PIPE_FORMAT_I8_UNORM: + return COLOR_BUF_8BIT; default: assert(0); return 0; @@ -137,7 +149,8 @@ static void update_framebuffer(struct i915_context *i915) i915->static_dirty |= I915_DST_RECT; } - i915->hardware_dirty |= I915_HW_STATIC; + /* we also send a new program to make sure the fixup for RGBA surfaces happens */ + i915->hardware_dirty |= I915_HW_STATIC | I915_HW_PROGRAM; /* flush the cache in case we sample from the old renderbuffers */ i915_set_flush_dirty(i915, I915_FLUSH_CACHE); diff --git a/src/gallium/drivers/i915/i915_winsys.h b/src/gallium/drivers/i915/i915_winsys.h index 21cfdc9613e..20438609e07 100644 --- a/src/gallium/drivers/i915/i915_winsys.h +++ b/src/gallium/drivers/i915/i915_winsys.h @@ -207,6 +207,12 @@ struct i915_winsys { void (*buffer_destroy)(struct i915_winsys *iws, struct i915_winsys_buffer *buffer); + + /** + * Check if a buffer is busy. + */ + boolean (*buffer_is_busy)(struct i915_winsys *iws, + struct i915_winsys_buffer *buffer); /*@}*/ diff --git a/src/gallium/drivers/llvmpipe/Makefile b/src/gallium/drivers/llvmpipe/Makefile index ba9705bebee..f9301354fc5 100644 --- a/src/gallium/drivers/llvmpipe/Makefile +++ b/src/gallium/drivers/llvmpipe/Makefile @@ -51,6 +51,7 @@ C_SOURCES = \ CPP_SOURCES = \ PROGS := lp_test_format \ + lp_test_arit \ lp_test_blend \ lp_test_conv \ lp_test_printf \ diff --git a/src/gallium/drivers/llvmpipe/SConscript b/src/gallium/drivers/llvmpipe/SConscript index c10a8cbc12c..2b232a524ae 100644 --- a/src/gallium/drivers/llvmpipe/SConscript +++ b/src/gallium/drivers/llvmpipe/SConscript @@ -79,17 +79,18 @@ llvmpipe = env.ConvenienceLibrary( env.Alias('llvmpipe', llvmpipe) -if env['platform'] != 'embedded': +if not env['embedded']: env = env.Clone() env.Prepend(LIBS = [llvmpipe] + gallium) tests = [ + 'arit', 'format', 'blend', 'conv', - 'printf', - 'sincos', + 'printf', + 'sincos', ] if not env['msvc']: diff --git a/src/gallium/drivers/llvmpipe/lp_jit.c b/src/gallium/drivers/llvmpipe/lp_jit.c index 268f0fa034b..ce92a80721a 100644 --- a/src/gallium/drivers/llvmpipe/lp_jit.c +++ b/src/gallium/drivers/llvmpipe/lp_jit.c @@ -68,10 +68,17 @@ lp_jit_create_types(struct llvmpipe_context *lp) elem_types[LP_JIT_TEXTURE_BORDER_COLOR] = LLVMArrayType(LLVMFloatTypeInContext(lc), 4); +#if HAVE_LLVM >= 0x0300 + texture_type = LLVMStructCreateNamed(gallivm->context, "texture"); + LLVMStructSetBody(texture_type, elem_types, + Elements(elem_types), 0); +#else texture_type = LLVMStructTypeInContext(lc, elem_types, Elements(elem_types), 0); + LLVMAddTypeName(gallivm->module, "texture", texture_type); LLVMInvalidateStructLayout(gallivm->target, texture_type); +#endif LP_CHECK_MEMBER_OFFSET(struct lp_jit_texture, width, gallivm->target, texture_type, @@ -112,8 +119,6 @@ lp_jit_create_types(struct llvmpipe_context *lp) LP_CHECK_STRUCT_SIZE(struct lp_jit_texture, gallivm->target, texture_type); - - LLVMAddTypeName(gallivm->module, "texture", texture_type); } /* struct lp_jit_context */ @@ -129,11 +134,19 @@ lp_jit_create_types(struct llvmpipe_context *lp) elem_types[LP_JIT_CTX_TEXTURES] = LLVMArrayType(texture_type, PIPE_MAX_SAMPLERS); +#if HAVE_LLVM >= 0x0300 + context_type = LLVMStructCreateNamed(gallivm->context, "context"); + LLVMStructSetBody(context_type, elem_types, + Elements(elem_types), 0); +#else context_type = LLVMStructTypeInContext(lc, elem_types, Elements(elem_types), 0); LLVMInvalidateStructLayout(gallivm->target, context_type); + LLVMAddTypeName(gallivm->module, "context", context_type); +#endif + LP_CHECK_MEMBER_OFFSET(struct lp_jit_context, constants, gallivm->target, context_type, LP_JIT_CTX_CONSTANTS); @@ -155,8 +168,6 @@ lp_jit_create_types(struct llvmpipe_context *lp) LP_CHECK_STRUCT_SIZE(struct lp_jit_context, gallivm->target, context_type); - LLVMAddTypeName(gallivm->module, "context", context_type); - lp->jit_context_ptr_type = LLVMPointerType(context_type, 0); } diff --git a/src/gallium/drivers/llvmpipe/lp_screen.c b/src/gallium/drivers/llvmpipe/lp_screen.c index 036a6e0c379..e3f8c19679f 100644 --- a/src/gallium/drivers/llvmpipe/lp_screen.c +++ b/src/gallium/drivers/llvmpipe/lp_screen.c @@ -93,7 +93,9 @@ llvmpipe_get_vendor(struct pipe_screen *screen) static const char * llvmpipe_get_name(struct pipe_screen *screen) { - return "llvmpipe"; + static char buf[100]; + snprintf(buf, sizeof(buf), "llvmpipe (LLVM 0x%x)", HAVE_LLVM); + return buf; } @@ -423,7 +425,7 @@ llvmpipe_create_screen(struct sw_winsys *winsys) lp_jit_screen_init(screen); screen->num_threads = util_cpu_caps.nr_cpus > 1 ? util_cpu_caps.nr_cpus : 0; -#ifdef PIPE_OS_EMBEDDED +#ifdef PIPE_SUBSYSTEM_EMBEDDED screen->num_threads = 0; #endif screen->num_threads = debug_get_num_option("LP_NUM_THREADS", screen->num_threads); diff --git a/src/gallium/drivers/llvmpipe/lp_test_arit.c b/src/gallium/drivers/llvmpipe/lp_test_arit.c new file mode 100644 index 00000000000..f0e43e0f9cc --- /dev/null +++ b/src/gallium/drivers/llvmpipe/lp_test_arit.c @@ -0,0 +1,294 @@ +/************************************************************************** + * + * Copyright 2011 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#include <limits.h> +#include <stdio.h> +#include <stdlib.h> + +#include "util/u_pointer.h" +#include "util/u_memory.h" + +#include "gallivm/lp_bld.h" +#include "gallivm/lp_bld_init.h" +#include "gallivm/lp_bld_arit.h" + +#include "lp_test.h" + + +void +write_tsv_header(FILE *fp) +{ + fprintf(fp, + "result\t" + "format\n"); + + fflush(fp); +} + + +typedef float (*unary_func_t)(float); + + +/** + * Describe a test case of one unary function. + */ +struct unary_test_t +{ + /* + * Test name -- name of the mathematical function under test. + */ + + const char *name; + + LLVMValueRef + (*builder)(struct lp_build_context *bld, LLVMValueRef a); + + /* + * Reference (pure-C) function. + */ + float + (*ref)(float a); + + /* + * Test values. + */ + const float *values; + unsigned num_values; +}; + + +const float exp2_values[] = { + -60, + -4, + -2, + -1, + -1e-007, + 0, + 1e-007, + 1, + 2, + 4, + 60 +}; + + +const float log2_values[] = { +#if 0 + /* + * Smallest denormalized number; meant just for experimentation, but not + * validation. + */ + 1.4012984643248171e-45, +#endif + 1e-007, + 0.5, + 1, + 2, + 4, + 100000, + 1e+018 +}; + + +static float rsqrtf(float x) +{ + return 1.0/sqrt(x); +} + + +const float rsqrt_values[] = { + -1, -1e-007, + 1e-007, 1, + -4, -1, + 1, 4, + -1e+035, -100000, + 100000, 1e+035, +}; + + +const float sincos_values[] = { + -5*M_PI/4, + -4*M_PI/4, + -4*M_PI/4, + -3*M_PI/4, + -2*M_PI/4, + -1*M_PI/4, + 1*M_PI/4, + 2*M_PI/4, + 3*M_PI/4, + 4*M_PI/4, + 5*M_PI/4, +}; + + +/* + * Unary test cases. + */ + +static const struct unary_test_t unary_tests[] = { + {"exp2", &lp_build_exp2, &exp2f, exp2_values, Elements(exp2_values)}, + {"log2", &lp_build_log2, &log2f, log2_values, Elements(log2_values)}, + {"exp", &lp_build_exp, &expf, exp2_values, Elements(exp2_values)}, + {"log", &lp_build_log, &logf, log2_values, Elements(log2_values)}, + {"rsqrt", &lp_build_rsqrt, &rsqrtf, rsqrt_values, Elements(rsqrt_values)}, + {"sin", &lp_build_sin, &sinf, sincos_values, Elements(sincos_values)}, + {"cos", &lp_build_cos, &cosf, sincos_values, Elements(sincos_values)}, +}; + + +/* + * Build LLVM function that exercises the unary operator builder. + */ +static LLVMValueRef +build_unary_test_func(struct gallivm_state *gallivm, + LLVMModuleRef module, + LLVMContextRef context, + const struct unary_test_t *test) +{ + LLVMTypeRef i32t = LLVMInt32TypeInContext(context); + LLVMTypeRef f32t = LLVMFloatTypeInContext(context); + LLVMTypeRef v4f32t = LLVMVectorType(f32t, 4); + LLVMTypeRef args[1] = { f32t }; + LLVMValueRef func = LLVMAddFunction(module, test->name, LLVMFunctionType(f32t, args, Elements(args), 0)); + LLVMValueRef arg1 = LLVMGetParam(func, 0); + LLVMBuilderRef builder = gallivm->builder; + LLVMBasicBlockRef block = LLVMAppendBasicBlockInContext(context, func, "entry"); + LLVMValueRef index0 = LLVMConstInt(i32t, 0, 0); + LLVMValueRef ret; + + struct lp_build_context bld; + + lp_build_context_init(&bld, gallivm, lp_float32_vec4_type()); + + LLVMSetFunctionCallConv(func, LLVMCCallConv); + + LLVMPositionBuilderAtEnd(builder, block); + + /* scalar to vector */ + arg1 = LLVMBuildInsertElement(builder, LLVMGetUndef(v4f32t), arg1, index0, ""); + + ret = test->builder(&bld, arg1); + + /* vector to scalar */ + ret = LLVMBuildExtractElement(builder, ret, index0, ""); + + LLVMBuildRet(builder, ret); + return func; +} + + +/* + * Test one LLVM unary arithmetic builder function. + */ +static boolean +test_unary(struct gallivm_state *gallivm, unsigned verbose, FILE *fp, const struct unary_test_t *test) +{ + LLVMModuleRef module = gallivm->module; + LLVMValueRef test_func; + LLVMExecutionEngineRef engine = gallivm->engine; + LLVMContextRef context = gallivm->context; + char *error = NULL; + unary_func_t test_func_jit; + boolean success = TRUE; + int i; + + test_func = build_unary_test_func(gallivm, module, context, test); + + if (LLVMVerifyModule(module, LLVMPrintMessageAction, &error)) { + printf("LLVMVerifyModule: %s\n", error); + LLVMDumpModule(module); + abort(); + } + LLVMDisposeMessage(error); + + test_func_jit = (unary_func_t) pointer_to_func(LLVMGetPointerToGlobal(engine, test_func)); + + for (i = 0; i < test->num_values; ++i) { + float value = test->values[i]; + float ref = test->ref(value); + float src = test_func_jit(value); + + double error = fabs(src - ref); + double precision = error ? -log2(error/fabs(ref)) : FLT_MANT_DIG; + + bool pass = precision >= 20.0; + + if (isnan(ref)) { + continue; + } + + if (!pass || verbose) { + printf("%s(%.9g): ref = %.9g, src = %.9g, precision = %f bits, %s\n", + test->name, value, ref, src, precision, + pass ? "PASS" : "FAIL"); + } + + if (!pass) { + success = FALSE; + } + } + + LLVMFreeMachineCodeForFunction(engine, test_func); + + return success; +} + + +boolean +test_all(struct gallivm_state *gallivm, unsigned verbose, FILE *fp) +{ + boolean success = TRUE; + int i; + + for (i = 0; i < Elements(unary_tests); ++i) { + if (!test_unary(gallivm, verbose, fp, &unary_tests[i])) { + success = FALSE; + } + } + + return success; +} + + +boolean +test_some(struct gallivm_state *gallivm, unsigned verbose, FILE *fp, + unsigned long n) +{ + /* + * Not randomly generated test cases, so test all. + */ + + return test_all(gallivm, verbose, fp); +} + + +boolean +test_single(struct gallivm_state *gallivm, unsigned verbose, FILE *fp) +{ + return TRUE; +} diff --git a/src/gallium/drivers/nouveau/Makefile b/src/gallium/drivers/nouveau/Makefile index 3210d1ff77b..aae6d9889bb 100644 --- a/src/gallium/drivers/nouveau/Makefile +++ b/src/gallium/drivers/nouveau/Makefile @@ -10,6 +10,7 @@ LIBRARY_INCLUDES = \ C_SOURCES = nouveau_screen.c \ nouveau_fence.c \ nouveau_mm.c \ - nouveau_buffer.c + nouveau_buffer.c \ + nouveau_video.c include ../../Makefile.template diff --git a/src/gallium/drivers/nouveau/nouveau_context.h b/src/gallium/drivers/nouveau/nouveau_context.h index 696e0d3f24e..19bf7c84ac7 100644 --- a/src/gallium/drivers/nouveau/nouveau_context.h +++ b/src/gallium/drivers/nouveau/nouveau_context.h @@ -23,4 +23,7 @@ nouveau_context(struct pipe_context *pipe) return (struct nouveau_context *)pipe; } +void +nouveau_context_init_vdec(struct nouveau_context *); + #endif diff --git a/src/gallium/drivers/nouveau/nouveau_screen.c b/src/gallium/drivers/nouveau/nouveau_screen.c index 401155bba6e..223e7682ccd 100644 --- a/src/gallium/drivers/nouveau/nouveau_screen.c +++ b/src/gallium/drivers/nouveau/nouveau_screen.c @@ -81,20 +81,6 @@ nouveau_screen_bo_new(struct pipe_screen *pscreen, unsigned alignment, return bo; } -struct nouveau_bo * -nouveau_screen_bo_user(struct pipe_screen *pscreen, void *ptr, unsigned bytes) -{ - struct nouveau_device *dev = nouveau_screen(pscreen)->device; - struct nouveau_bo *bo = NULL; - int ret; - - ret = nouveau_bo_user(dev, ptr, bytes, &bo); - if (ret) - return NULL; - - return bo; -} - void * nouveau_screen_bo_map(struct pipe_screen *pscreen, struct nouveau_bo *bo, diff --git a/src/gallium/drivers/nouveau/nouveau_screen.h b/src/gallium/drivers/nouveau/nouveau_screen.h index 186ada39677..cf291c6c595 100644 --- a/src/gallium/drivers/nouveau/nouveau_screen.h +++ b/src/gallium/drivers/nouveau/nouveau_screen.h @@ -47,8 +47,6 @@ nouveau_screen(struct pipe_screen *pscreen) struct nouveau_bo * nouveau_screen_bo_new(struct pipe_screen *pscreen, unsigned alignment, unsigned usage, unsigned bind, unsigned size); -struct nouveau_bo * -nouveau_screen_bo_user(struct pipe_screen *pscreen, void *ptr, unsigned bytes); void * nouveau_screen_bo_map(struct pipe_screen *pscreen, struct nouveau_bo *pb, @@ -78,6 +76,7 @@ nouveau_screen_bo_from_handle(struct pipe_screen *pscreen, int nouveau_screen_init(struct nouveau_screen *, struct nouveau_device *); void nouveau_screen_fini(struct nouveau_screen *); +void nouveau_screen_init_vdec(struct nouveau_screen *); #ifndef NOUVEAU_NVC0 diff --git a/src/gallium/drivers/nouveau/nouveau_video.c b/src/gallium/drivers/nouveau/nouveau_video.c new file mode 100644 index 00000000000..32f038dae61 --- /dev/null +++ b/src/gallium/drivers/nouveau/nouveau_video.c @@ -0,0 +1,39 @@ + +#include "vl/vl_decoder.h" +#include "vl/vl_video_buffer.h" + +#include "nouveau/nouveau_screen.h" +#include "nouveau/nouveau_context.h" + +static int +nouveau_screen_get_video_param(struct pipe_screen *pscreen, + enum pipe_video_profile profile, + enum pipe_video_cap param) +{ + switch (param) { + case PIPE_VIDEO_CAP_SUPPORTED: + return vl_profile_supported(pscreen, profile); + case PIPE_VIDEO_CAP_NPOT_TEXTURES: + return 1; + case PIPE_VIDEO_CAP_MAX_WIDTH: + case PIPE_VIDEO_CAP_MAX_HEIGHT: + return vl_video_buffer_max_size(pscreen); + default: + debug_printf("unknown video param: %d\n", param); + return 0; + } +} + +void +nouveau_screen_init_vdec(struct nouveau_screen *screen) +{ + screen->base.get_video_param = nouveau_screen_get_video_param; + screen->base.is_video_format_supported = vl_video_buffer_is_format_supported; +} + +void +nouveau_context_init_vdec(struct nouveau_context *nv) +{ + nv->pipe.create_video_decoder = vl_create_decoder; + nv->pipe.create_video_buffer = vl_video_buffer_create; +} diff --git a/src/gallium/drivers/nv50/nv50_3d.xml.h b/src/gallium/drivers/nv50/nv50_3d.xml.h index 41a380ec2ec..1bde07fa3af 100644 --- a/src/gallium/drivers/nv50/nv50_3d.xml.h +++ b/src/gallium/drivers/nv50/nv50_3d.xml.h @@ -777,7 +777,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #define NVA3_3D_VERTEX_ARRAY_ATTRIB_ALT_FORMAT_8_8 0x03000000 #define NVA3_3D_VERTEX_ARRAY_ATTRIB_ALT_FORMAT_16 0x03600000 #define NVA3_3D_VERTEX_ARRAY_ATTRIB_ALT_FORMAT_8 0x03a00000 -#define NVA3_3D_VERTEX_ARRAY_ATTRIB_ALT_FORMAT_2_10_10_10 0x06000000 +#define NVA3_3D_VERTEX_ARRAY_ATTRIB_ALT_FORMAT_10_10_10_2 0x06000000 #define NVA3_3D_VERTEX_ARRAY_ATTRIB_ALT_TYPE__MASK 0x38000000 #define NVA3_3D_VERTEX_ARRAY_ATTRIB_ALT_TYPE__SHIFT 27 #define NVA3_3D_VERTEX_ARRAY_ATTRIB_ALT_TYPE_SNORM 0x08000000 @@ -1935,7 +1935,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #define NV50_3D_VERTEX_ARRAY_ATTRIB_FORMAT_8_8 0x00c00000 #define NV50_3D_VERTEX_ARRAY_ATTRIB_FORMAT_16 0x00d80000 #define NV50_3D_VERTEX_ARRAY_ATTRIB_FORMAT_8 0x00e80000 -#define NV50_3D_VERTEX_ARRAY_ATTRIB_FORMAT_2_10_10_10 0x01800000 +#define NV50_3D_VERTEX_ARRAY_ATTRIB_FORMAT_10_10_10_2 0x01800000 #define NV50_3D_VERTEX_ARRAY_ATTRIB_TYPE__MASK 0x7e000000 #define NV50_3D_VERTEX_ARRAY_ATTRIB_TYPE__SHIFT 25 #define NV50_3D_VERTEX_ARRAY_ATTRIB_TYPE_FLOAT 0x7e000000 diff --git a/src/gallium/drivers/nv50/nv50_context.c b/src/gallium/drivers/nv50/nv50_context.c index 632ca4daf74..0d464063b5b 100644 --- a/src/gallium/drivers/nv50/nv50_context.c +++ b/src/gallium/drivers/nv50/nv50_context.c @@ -60,13 +60,13 @@ nv50_texture_barrier(struct pipe_context *pipe) void nv50_default_flush_notify(struct nouveau_channel *chan) { - struct nv50_context *nv50 = chan->user_private; + struct nv50_screen *screen = chan->user_private; - if (!nv50) + if (!screen) return; - nouveau_fence_update(&nv50->screen->base, TRUE); - nouveau_fence_next(&nv50->screen->base); + nouveau_fence_update(&screen->base, TRUE); + nouveau_fence_next(&screen->base); } static void @@ -100,10 +100,8 @@ nv50_destroy(struct pipe_context *pipe) draw_destroy(nv50->draw); - if (nv50->screen->cur_ctx == nv50) { - nv50->screen->base.channel->user_private = NULL; + if (nv50->screen->cur_ctx == nv50) nv50->screen->cur_ctx = NULL; - } FREE(nv50); } @@ -140,7 +138,6 @@ nv50_create(struct pipe_screen *pscreen, void *priv) if (!screen->cur_ctx) screen->cur_ctx = nv50; - screen->base.channel->user_private = nv50; screen->base.channel->flush_notify = nv50_default_flush_notify; nv50_init_query_functions(nv50); @@ -152,6 +149,8 @@ nv50_create(struct pipe_screen *pscreen, void *priv) assert(nv50->draw); draw_set_rasterize_stage(nv50->draw, nv50_draw_render_stage(nv50)); + nouveau_context_init_vdec(&nv50->base); + return pipe; } @@ -168,6 +167,7 @@ nv50_bufctx_add_resident(struct nv50_context *nv50, int ctx, if (!resource->bo) return; + nv50->residents_size += sizeof(struct resident); /* We don't need to reference the resource here, it will be referenced * in the context/state, and bufctx will be reset when state changes. @@ -189,6 +189,7 @@ nv50_bufctx_del_resident(struct nv50_context *nv50, int ctx, top = util_dynarray_pop_ptr(&nv50->residents[ctx], struct resident); if (rsd != top) *rsd = *top; + nv50->residents_size -= sizeof(struct resident); break; } } @@ -201,11 +202,15 @@ nv50_bufctx_emit_relocs(struct nv50_context *nv50) struct util_dynarray *array; unsigned ctx, i, n; + n = nv50->residents_size / sizeof(struct resident); + n += NV50_SCREEN_RESIDENT_BO_COUNT; + + MARK_RING(nv50->screen->base.channel, n, n); + for (ctx = 0; ctx < NV50_BUFCTX_COUNT; ++ctx) { array = &nv50->residents[ctx]; n = array->size / sizeof(struct resident); - MARK_RING(nv50->screen->base.channel, n, n); for (i = 0; i < n; ++i) { rsd = util_dynarray_element(array, struct resident, i); diff --git a/src/gallium/drivers/nv50/nv50_context.h b/src/gallium/drivers/nv50/nv50_context.h index 3f031994f0a..284db69e312 100644 --- a/src/gallium/drivers/nv50/nv50_context.h +++ b/src/gallium/drivers/nv50/nv50_context.h @@ -18,6 +18,7 @@ #include "nv50_screen.h" #include "nv50_program.h" #include "nv50_resource.h" +#include "nv50_transfer.h" #include "nouveau/nouveau_context.h" #include "nouveau/nv_object.xml.h" @@ -64,6 +65,7 @@ struct nv50_context { struct nv50_screen *screen; struct util_dynarray residents[NV50_BUFCTX_COUNT]; + unsigned residents_size; uint32_t dirty; @@ -129,20 +131,6 @@ nv50_context(struct pipe_context *pipe) return (struct nv50_context *)pipe; } -struct nv50_surface { - struct pipe_surface base; - uint32_t offset; - uint32_t width; - uint16_t height; - uint16_t depth; -}; - -static INLINE struct nv50_surface * -nv50_surface(struct pipe_surface *ps) -{ - return (struct nv50_surface *)ps; -} - /* nv50_context.c */ struct pipe_context *nv50_create(struct pipe_screen *, void *); @@ -156,6 +144,7 @@ void nv50_bufctx_del_resident(struct nv50_context *, int ctx, static INLINE void nv50_bufctx_reset(struct nv50_context *nv50, int ctx) { + nv50->residents_size -= nv50->residents[ctx].size; util_dynarray_resize(&nv50->residents[ctx], 0); } @@ -182,7 +171,8 @@ void nv50_validate_derived_rs(struct nv50_context *); extern void nv50_init_state_functions(struct nv50_context *); /* nv50_state_validate.c */ -extern boolean nv50_state_validate(struct nv50_context *); +/* @words: check for space before emitting relocs */ +extern boolean nv50_state_validate(struct nv50_context *, unsigned words); /* nv50_surface.c */ extern void nv50_clear(struct pipe_context *, unsigned buffers, @@ -200,6 +190,11 @@ nv50_create_sampler_view(struct pipe_context *, /* nv50_transfer.c */ void +nv50_m2mf_transfer_rect(struct pipe_screen *pscreen, + const struct nv50_m2mf_rect *dst, + const struct nv50_m2mf_rect *src, + uint32_t nblocksx, uint32_t nblocksy); +void nv50_sifc_linear_u8(struct nouveau_context *pipe, struct nouveau_bo *dst, unsigned offset, unsigned domain, unsigned size, void *data); diff --git a/src/gallium/drivers/nv50/nv50_defs.xml.h b/src/gallium/drivers/nv50/nv50_defs.xml.h index 1bf2f802b56..27046e9e564 100644 --- a/src/gallium/drivers/nv50/nv50_defs.xml.h +++ b/src/gallium/drivers/nv50/nv50_defs.xml.h @@ -1,5 +1,5 @@ -#ifndef NV50_DEFS_XML -#define NV50_DEFS_XML +#ifndef RNNDB_NV50_DEFS_XML +#define RNNDB_NV50_DEFS_XML /* Autogenerated file, DO NOT EDIT manually! @@ -8,10 +8,11 @@ http://0x04.net/cgit/index.cgi/rules-ng-ng git clone git://0x04.net/rules-ng-ng The rules-ng-ng source files this header was generated from are: -- nv50_defs.xml ( 4482 bytes, from 2010-10-03 13:18:37) -- copyright.xml ( 6498 bytes, from 2010-10-03 13:18:37) +- rnndb/nv50_defs.xml ( 5468 bytes, from 2011-07-09 13:43:58) +- ./rnndb/copyright.xml ( 6452 bytes, from 2011-07-09 13:43:58) +- ./rnndb/nvchipsets.xml ( 3617 bytes, from 2011-07-09 13:43:58) -Copyright (C) 2006-2010 by the following authors: +Copyright (C) 2006-2011 by the following authors: - Artur Huillet <[email protected]> (ahuillet) - Ben Skeggs (darktama, darktama_) - B. R. <[email protected]> (koala_br) @@ -22,7 +23,7 @@ Copyright (C) 2006-2010 by the following authors: - Dmitry Eremin-Solenikov <[email protected]> (lumag) - EdB <[email protected]> (edb_) - Erik Waling <[email protected]> (erikwaling) -- Francisco Jerez <[email protected]> (curro, curro_, currojerez) +- Francisco Jerez <[email protected]> (curro) - imirkin <[email protected]> (imirkin) - jb17bsome <[email protected]> (jb17bsome) - Jeremy Kolb <[email protected]> (kjeremy) @@ -70,44 +71,50 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -#define NV50_SURFACE_FORMAT_R32G32B32A32_FLOAT 0x000000c0 -#define NV50_SURFACE_FORMAT_R32G32B32A32_SINT 0x000000c1 -#define NV50_SURFACE_FORMAT_R32G32B32A32_UINT 0x000000c2 -#define NV50_SURFACE_FORMAT_R32G32B32X32_FLOAT 0x000000c3 -#define NV50_SURFACE_FORMAT_R16G16B16A16_UNORM 0x000000c6 -#define NV50_SURFACE_FORMAT_R16G16B16A16_SNORM 0x000000c7 -#define NV50_SURFACE_FORMAT_R16G16B16A16_SINT 0x000000c8 -#define NV50_SURFACE_FORMAT_R16G16B16A16_UINT 0x000000c9 -#define NV50_SURFACE_FORMAT_R16G16B16A16_FLOAT 0x000000ca -#define NV50_SURFACE_FORMAT_R32G32_FLOAT 0x000000cb -#define NV50_SURFACE_FORMAT_R32G32_SINT 0x000000cc -#define NV50_SURFACE_FORMAT_R32G32_UINT 0x000000cd -#define NV50_SURFACE_FORMAT_R16G16B16X16_FLOAT 0x000000ce -#define NV50_SURFACE_FORMAT_A8R8G8B8_UNORM 0x000000cf -#define NV50_SURFACE_FORMAT_A8R8G8B8_SRGB 0x000000d0 -#define NV50_SURFACE_FORMAT_A2B10G10R10_UNORM 0x000000d1 -#define NV50_SURFACE_FORMAT_A2B10G10R10_UINT 0x000000d2 -#define NV50_SURFACE_FORMAT_A8B8G8R8_UNORM 0x000000d5 -#define NV50_SURFACE_FORMAT_A8B8G8R8_SRGB 0x000000d6 -#define NV50_SURFACE_FORMAT_A8B8G8R8_SNORM 0x000000d7 -#define NV50_SURFACE_FORMAT_A8B8G8R8_SINT 0x000000d8 -#define NV50_SURFACE_FORMAT_A8B8G8R8_UINT 0x000000d9 -#define NV50_SURFACE_FORMAT_R16G16_UNORM 0x000000da -#define NV50_SURFACE_FORMAT_R16G16_SNORM 0x000000db -#define NV50_SURFACE_FORMAT_R16G16_SINT 0x000000dc -#define NV50_SURFACE_FORMAT_R16G16_UINT 0x000000dd -#define NV50_SURFACE_FORMAT_R16G16_FLOAT 0x000000de -#define NV50_SURFACE_FORMAT_A2R10G10B10_UNORM 0x000000df -#define NV50_SURFACE_FORMAT_B10G11R11_FLOAT 0x000000e0 +#define NV50_SURFACE_FORMAT_BITMAP 0x0000001c +#define NV50_SURFACE_FORMAT_UNK1D 0x0000001d +#define NV50_SURFACE_FORMAT_RGBA32_FLOAT 0x000000c0 +#define NV50_SURFACE_FORMAT_RGBA32_SINT 0x000000c1 +#define NV50_SURFACE_FORMAT_RGBA32_UINT 0x000000c2 +#define NV50_SURFACE_FORMAT_RGBX32_FLOAT 0x000000c3 +#define NV50_SURFACE_FORMAT_RGBX32_SINT 0x000000c4 +#define NV50_SURFACE_FORMAT_RGBX32_UINT 0x000000c5 +#define NV50_SURFACE_FORMAT_RGBA16_UNORM 0x000000c6 +#define NV50_SURFACE_FORMAT_RGBA16_SNORM 0x000000c7 +#define NV50_SURFACE_FORMAT_RGBA16_SINT 0x000000c8 +#define NV50_SURFACE_FORMAT_RGBA16_UINT 0x000000c9 +#define NV50_SURFACE_FORMAT_RGBA16_FLOAT 0x000000ca +#define NV50_SURFACE_FORMAT_RG32_FLOAT 0x000000cb +#define NV50_SURFACE_FORMAT_RG32_SINT 0x000000cc +#define NV50_SURFACE_FORMAT_RG32_UINT 0x000000cd +#define NV50_SURFACE_FORMAT_RGBX16_FLOAT 0x000000ce +#define NV50_SURFACE_FORMAT_BGRA8_UNORM 0x000000cf +#define NV50_SURFACE_FORMAT_BGRA8_SRGB 0x000000d0 +#define NV50_SURFACE_FORMAT_RGB10_A2_UNORM 0x000000d1 +#define NV50_SURFACE_FORMAT_RGB10_A2_UINT 0x000000d2 +#define NV50_SURFACE_FORMAT_RGBA8_UNORM 0x000000d5 +#define NV50_SURFACE_FORMAT_RGBA8_SRGB 0x000000d6 +#define NV50_SURFACE_FORMAT_RGBA8_SNORM 0x000000d7 +#define NV50_SURFACE_FORMAT_RGBA8_SINT 0x000000d8 +#define NV50_SURFACE_FORMAT_RGBA8_UINT 0x000000d9 +#define NV50_SURFACE_FORMAT_RG16_UNORM 0x000000da +#define NV50_SURFACE_FORMAT_RG16_SNORM 0x000000db +#define NV50_SURFACE_FORMAT_RG16_SINT 0x000000dc +#define NV50_SURFACE_FORMAT_RG16_UINT 0x000000dd +#define NV50_SURFACE_FORMAT_RG16_FLOAT 0x000000de +#define NV50_SURFACE_FORMAT_BGR10_A2_UNORM 0x000000df +#define NV50_SURFACE_FORMAT_R11G11B10_FLOAT 0x000000e0 +#define NV50_SURFACE_FORMAT_R32_SINT 0x000000e3 +#define NV50_SURFACE_FORMAT_R32_UINT 0x000000e4 #define NV50_SURFACE_FORMAT_R32_FLOAT 0x000000e5 -#define NV50_SURFACE_FORMAT_X8R8G8B8_UNORM 0x000000e6 -#define NV50_SURFACE_FORMAT_X8R8G8B8_SRGB 0x000000e7 -#define NV50_SURFACE_FORMAT_R5G6B5_UNORM 0x000000e8 -#define NV50_SURFACE_FORMAT_A1R5G5B5_UNORM 0x000000e9 -#define NV50_SURFACE_FORMAT_R8G8_UNORM 0x000000ea -#define NV50_SURFACE_FORMAT_R8G8_SNORM 0x000000eb -#define NV50_SURFACE_FORMAT_R8G8_SINT 0x000000ec -#define NV50_SURFACE_FORMAT_R8G8_UINT 0x000000ed +#define NV50_SURFACE_FORMAT_BGRX8_UNORM 0x000000e6 +#define NV50_SURFACE_FORMAT_BGRX8_SRGB 0x000000e7 +#define NV50_SURFACE_FORMAT_B5G6R5_UNORM 0x000000e8 +#define NV50_SURFACE_FORMAT_BGR5_A1_UNORM 0x000000e9 +#define NV50_SURFACE_FORMAT_RG8_UNORM 0x000000ea +#define NV50_SURFACE_FORMAT_RG8_SNORM 0x000000eb +#define NV50_SURFACE_FORMAT_RG8_SINT 0x000000ec +#define NV50_SURFACE_FORMAT_RG8_UINT 0x000000ed #define NV50_SURFACE_FORMAT_R16_UNORM 0x000000ee #define NV50_SURFACE_FORMAT_R16_SNORM 0x000000ef #define NV50_SURFACE_FORMAT_R16_SINT 0x000000f0 @@ -118,19 +125,24 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #define NV50_SURFACE_FORMAT_R8_SINT 0x000000f5 #define NV50_SURFACE_FORMAT_R8_UINT 0x000000f6 #define NV50_SURFACE_FORMAT_A8_UNORM 0x000000f7 -#define NV50_SURFACE_FORMAT_X1R5G5B5_UNORM 0x000000f8 -#define NV50_SURFACE_FORMAT_X8B8G8R8_UNORM 0x000000f9 -#define NV50_SURFACE_FORMAT_X8B8G8R8_SRGB 0x000000fa +#define NV50_SURFACE_FORMAT_BGR5_X1_UNORM 0x000000f8 +#define NV50_SURFACE_FORMAT_RGBX8_UNORM 0x000000f9 +#define NV50_SURFACE_FORMAT_RGBX8_SRGB 0x000000fa +#define NV50_SURFACE_FORMAT_BGR5_X1_UNORM_UNKFB 0x000000fb +#define NV50_SURFACE_FORMAT_BGR5_X1_UNORM_UNKFC 0x000000fc +#define NV50_SURFACE_FORMAT_BGRX8_UNORM_UNKFD 0x000000fd +#define NV50_SURFACE_FORMAT_BGRX8_UNORM_UNKFE 0x000000fe +#define NV50_SURFACE_FORMAT_Y32_UINT_UNKFF 0x000000ff #define NV50_ZETA_FORMAT_Z32_FLOAT 0x0000000a #define NV50_ZETA_FORMAT_Z16_UNORM 0x00000013 -#define NV50_ZETA_FORMAT_Z24S8_UNORM 0x00000014 -#define NV50_ZETA_FORMAT_X8Z24_UNORM 0x00000015 -#define NV50_ZETA_FORMAT_S8Z24_UNORM 0x00000016 -#define NV50_ZETA_FORMAT_UNK18 0x00000018 -#define NV50_ZETA_FORMAT_Z32_FLOAT_X24S8_UNORM 0x00000019 -#define NV50_ZETA_FORMAT_UNK1D 0x0000001d -#define NV50_ZETA_FORMAT_UNK1E 0x0000001e -#define NV50_ZETA_FORMAT_UNK1F 0x0000001f +#define NV50_ZETA_FORMAT_S8_Z24_UNORM 0x00000014 +#define NV50_ZETA_FORMAT_Z24_X8_UNORM 0x00000015 +#define NV50_ZETA_FORMAT_Z24_S8_UNORM 0x00000016 +#define NV50_ZETA_FORMAT_Z24_C8_UNORM 0x00000018 +#define NV50_ZETA_FORMAT_Z32_S8_X24_FLOAT 0x00000019 +#define NV50_ZETA_FORMAT_Z24_X8_S8_C8_X16_UNORM 0x0000001d +#define NV50_ZETA_FORMAT_Z32_X8_C8_X16_FLOAT 0x0000001e +#define NV50_ZETA_FORMAT_Z32_S8_C8_X16_FLOAT 0x0000001f #define NV50_QUERY__SIZE 0x00000010 #define NV50_QUERY_COUNTER 0x00000000 @@ -139,4 +151,4 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #define NV50_QUERY_TIME 0x00000008 -#endif /* NV50_DEFS_XML */ +#endif /* RNNDB_NV50_DEFS_XML */ diff --git a/src/gallium/drivers/nv50/nv50_formats.c b/src/gallium/drivers/nv50/nv50_formats.c index 96ed9a7d6d4..34502d0a397 100644 --- a/src/gallium/drivers/nv50/nv50_formats.c +++ b/src/gallium/drivers/nv50/nv50_formats.c @@ -65,28 +65,32 @@ const struct nv50_format nv50_format_table[PIPE_FORMAT_COUNT] = { /* COMMON FORMATS */ - [PIPE_FORMAT_B8G8R8A8_UNORM] = { NV50_SURFACE_FORMAT_A8R8G8B8_UNORM, + [PIPE_FORMAT_B8G8R8A8_UNORM] = { NV50_SURFACE_FORMAT_BGRA8_UNORM, A_(C2, C1, C0, C3, UNORM, UNORM, UNORM, UNORM, 8_8_8_8, 1), VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET | SCANOUT }, - [PIPE_FORMAT_B8G8R8X8_UNORM] = { NV50_SURFACE_FORMAT_X8R8G8B8_UNORM, + [PIPE_FORMAT_B8G8R8X8_UNORM] = { NV50_SURFACE_FORMAT_BGRX8_UNORM, A_(C2, C1, C0, ONE_FLOAT, UNORM, UNORM, UNORM, UNORM, 8_8_8_8, 1), VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET | SCANOUT }, - [PIPE_FORMAT_B8G8R8A8_SRGB] = { NV50_SURFACE_FORMAT_A8R8G8B8_SRGB, + [PIPE_FORMAT_B8G8R8A8_SRGB] = { NV50_SURFACE_FORMAT_BGRA8_SRGB, A_(C2, C1, C0, C3, UNORM, UNORM, UNORM, UNORM, 8_8_8_8, 1), VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET }, - [PIPE_FORMAT_B8G8R8X8_SRGB] = { NV50_SURFACE_FORMAT_X8R8G8B8_SRGB, + [PIPE_FORMAT_B8G8R8X8_SRGB] = { NV50_SURFACE_FORMAT_BGRX8_SRGB, A_(C2, C1, C0, ONE_FLOAT, UNORM, UNORM, UNORM, UNORM, 8_8_8_8, 1), VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET }, - [PIPE_FORMAT_B5G6R5_UNORM] = { NV50_SURFACE_FORMAT_R5G6B5_UNORM, + [PIPE_FORMAT_B5G6R5_UNORM] = { NV50_SURFACE_FORMAT_B5G6R5_UNORM, B_(C2, C1, C0, ONE_FLOAT, UNORM, UNORM, UNORM, UNORM, 5_6_5, 1), SAMPLER_VIEW | RENDER_TARGET | SCANOUT }, - [PIPE_FORMAT_B5G5R5A1_UNORM] = { NV50_SURFACE_FORMAT_A1R5G5B5_UNORM, - B_(C2, C1, C0, C3, UNORM, UNORM, UNORM, UNORM, 1_5_5_5, 1), + [PIPE_FORMAT_B5G5R5A1_UNORM] = { NV50_SURFACE_FORMAT_BGR5_A1_UNORM, + B_(C2, C1, C0, C3, UNORM, UNORM, UNORM, UNORM, 5_5_5_1, 1), + SAMPLER_VIEW | RENDER_TARGET | SCANOUT }, + + [PIPE_FORMAT_B5G5R5X1_UNORM] = { NV50_SURFACE_FORMAT_BGR5_X1_UNORM, + B_(C2, C1, C0, ONE_FLOAT, UNORM, UNORM, UNORM, UNORM, 5_5_5_1, 1), SAMPLER_VIEW | RENDER_TARGET | SCANOUT }, [PIPE_FORMAT_B4G4R4A4_UNORM] = { 0, @@ -97,12 +101,12 @@ const struct nv50_format nv50_format_table[PIPE_FORMAT_COUNT] = B_(C2, C1, C0, ONE_FLOAT, UNORM, UNORM, UNORM, UNORM, 4_4_4_4, 1), SAMPLER_VIEW }, - [PIPE_FORMAT_R10G10B10A2_UNORM] = { NV50_SURFACE_FORMAT_A2B10G10R10_UNORM, - A_(C0, C1, C2, C3, UNORM, UNORM, UNORM, UNORM, 2_10_10_10, 0), + [PIPE_FORMAT_R10G10B10A2_UNORM] = { NV50_SURFACE_FORMAT_RGB10_A2_UNORM, + A_(C0, C1, C2, C3, UNORM, UNORM, UNORM, UNORM, 10_10_10_2, 0), SAMPLER_VIEW | RENDER_TARGET | VERTEX_BUFFER | SCANOUT }, - [PIPE_FORMAT_B10G10R10A2_UNORM] = { NV50_SURFACE_FORMAT_A2R10G10B10_UNORM, - A_(C2, C1, C0, C3, UNORM, UNORM, UNORM, UNORM, 2_10_10_10, 1), + [PIPE_FORMAT_B10G10R10A2_UNORM] = { NV50_SURFACE_FORMAT_BGR10_A2_UNORM, + A_(C2, C1, C0, C3, UNORM, UNORM, UNORM, UNORM, 10_10_10_2, 1), SAMPLER_VIEW | RENDER_TARGET | VERTEX_BUFFER }, /* DEPTH/STENCIL FORMATS */ @@ -111,25 +115,24 @@ const struct nv50_format nv50_format_table[PIPE_FORMAT_COUNT] = B_(C0, C0, C0, ONE_FLOAT, UNORM, UINT, UINT, UINT, Z16, 0), SAMPLER_VIEW | DEPTH_STENCIL }, - [PIPE_FORMAT_Z24_UNORM_S8_USCALED] = { NV50_ZETA_FORMAT_S8Z24_UNORM, - B_(C0, C0, C0, ONE_FLOAT, UNORM, UINT, UINT, UINT, S8Z24, 0), + [PIPE_FORMAT_Z24_UNORM_S8_USCALED] = { NV50_ZETA_FORMAT_Z24_S8_UNORM, + B_(C0, C1, C0, ONE_FLOAT, UNORM, UINT, UINT, UINT, Z24_S8, 0), SAMPLER_VIEW | DEPTH_STENCIL }, - [PIPE_FORMAT_Z24X8_UNORM] = { NV50_ZETA_FORMAT_X8Z24_UNORM, - B_(C0, C0, C0, ONE_FLOAT, UNORM, UINT, UINT, UINT, X8Z24, 0), + [PIPE_FORMAT_Z24X8_UNORM] = { NV50_ZETA_FORMAT_Z24_X8_UNORM, + B_(C0, C0, C0, ONE_FLOAT, UNORM, UINT, UINT, UINT, Z24_X8, 0), SAMPLER_VIEW | DEPTH_STENCIL }, - [PIPE_FORMAT_S8_USCALED_Z24_UNORM] = { NV50_ZETA_FORMAT_Z24S8_UNORM, - B_(C1, C1, C1, ONE_FLOAT, UINT, UNORM, UINT, UINT, Z24S8, 0), + [PIPE_FORMAT_S8_USCALED_Z24_UNORM] = { NV50_ZETA_FORMAT_S8_Z24_UNORM, + B_(C1, C0, C1, ONE_FLOAT, UINT, UNORM, UINT, UINT, S8_Z24, 0), SAMPLER_VIEW | DEPTH_STENCIL }, [PIPE_FORMAT_Z32_FLOAT] = { NV50_ZETA_FORMAT_Z32_FLOAT, B_(C0, C0, C0, ONE_FLOAT, FLOAT, UINT, UINT, UINT, Z32, 0), SAMPLER_VIEW | DEPTH_STENCIL }, - [PIPE_FORMAT_Z32_FLOAT_S8X24_USCALED] = { - NV50_ZETA_FORMAT_Z32_FLOAT_X24S8_UNORM, - B_(C0, C0, C0, ONE_FLOAT, FLOAT, UINT, UINT, UINT, X24S8Z32, 0), + [PIPE_FORMAT_Z32_FLOAT_S8X24_USCALED] = { NV50_ZETA_FORMAT_Z32_S8_X24_FLOAT, + B_(C0, C0, C0, ONE_FLOAT, FLOAT, UINT, UINT, UINT, Z32_S8_X24, 0), SAMPLER_VIEW | DEPTH_STENCIL }, /* LUMINANCE, ALPHA, INTENSITY */ @@ -278,15 +281,15 @@ const struct nv50_format nv50_format_table[PIPE_FORMAT_COUNT] = /* FLOAT 16 */ - [PIPE_FORMAT_R16G16B16A16_FLOAT] = { NV50_SURFACE_FORMAT_R16G16B16A16_FLOAT, + [PIPE_FORMAT_R16G16B16A16_FLOAT] = { NV50_SURFACE_FORMAT_RGBA16_FLOAT, A_(C0, C1, C2, C3, FLOAT, FLOAT, FLOAT, FLOAT, 16_16_16_16, 0), VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET }, - [PIPE_FORMAT_R16G16B16_FLOAT] = { NV50_SURFACE_FORMAT_R16G16B16X16_FLOAT, + [PIPE_FORMAT_R16G16B16_FLOAT] = { NV50_SURFACE_FORMAT_RGBX16_FLOAT, A_(C0, C1, C2, ONE_FLOAT, FLOAT, FLOAT, FLOAT, FLOAT, 16_16_16, 0), - VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET }, + VERTEX_BUFFER }, - [PIPE_FORMAT_R16G16_FLOAT] = { NV50_SURFACE_FORMAT_R16G16_FLOAT, + [PIPE_FORMAT_R16G16_FLOAT] = { NV50_SURFACE_FORMAT_RG16_FLOAT, A_(C0, C1, ZERO, ONE_FLOAT, FLOAT, FLOAT, FLOAT, FLOAT, 16_16, 0), VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET }, @@ -296,15 +299,15 @@ const struct nv50_format nv50_format_table[PIPE_FORMAT_COUNT] = /* FLOAT 32 */ - [PIPE_FORMAT_R32G32B32A32_FLOAT] = { NV50_SURFACE_FORMAT_R32G32B32A32_FLOAT, + [PIPE_FORMAT_R32G32B32A32_FLOAT] = { NV50_SURFACE_FORMAT_RGBA32_FLOAT, A_(C0, C1, C2, C3, FLOAT, FLOAT, FLOAT, FLOAT, 32_32_32_32, 0), VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET }, - [PIPE_FORMAT_R32G32B32_FLOAT] = { NV50_SURFACE_FORMAT_R32G32B32X32_FLOAT, + [PIPE_FORMAT_R32G32B32_FLOAT] = { NV50_SURFACE_FORMAT_RGBX32_FLOAT, A_(C0, C1, C2, ONE_FLOAT, FLOAT, FLOAT, FLOAT, FLOAT, 32_32_32, 0), - VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET }, + VERTEX_BUFFER }, - [PIPE_FORMAT_R32G32_FLOAT] = { NV50_SURFACE_FORMAT_R32G32_FLOAT, + [PIPE_FORMAT_R32G32_FLOAT] = { NV50_SURFACE_FORMAT_RG32_FLOAT, A_(C0, C1, ZERO, ONE_FLOAT, FLOAT, FLOAT, FLOAT, FLOAT, 32_32, 0), VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET }, @@ -314,12 +317,12 @@ const struct nv50_format nv50_format_table[PIPE_FORMAT_COUNT] = /* ODD FORMATS */ - [PIPE_FORMAT_R11G11B10_FLOAT] = { NV50_SURFACE_FORMAT_B10G11R11_FLOAT, - B_(C0, C1, C2, ONE_FLOAT, FLOAT, FLOAT, FLOAT, FLOAT, 10_11_11, 0), + [PIPE_FORMAT_R11G11B10_FLOAT] = { NV50_SURFACE_FORMAT_R11G11B10_FLOAT, + B_(C0, C1, C2, ONE_FLOAT, FLOAT, FLOAT, FLOAT, FLOAT, 11_11_10, 0), SAMPLER_VIEW | RENDER_TARGET }, [PIPE_FORMAT_R9G9B9E5_FLOAT] = { 0, - B_(C0, C1, C2, ONE_FLOAT, FLOAT, FLOAT, FLOAT, FLOAT, E5_9_9_9, 0), + B_(C0, C1, C2, ONE_FLOAT, FLOAT, FLOAT, FLOAT, FLOAT, 9_9_9_E5, 0), SAMPLER_VIEW }, /* SNORM 32 */ @@ -330,7 +333,7 @@ const struct nv50_format nv50_format_table[PIPE_FORMAT_COUNT] = [PIPE_FORMAT_R32G32B32_SNORM] = { 0, A_(C0, C1, C2, ONE_FLOAT, SNORM, SNORM, SNORM, SNORM, 32_32_32, 0), - VERTEX_BUFFER | SAMPLER_VIEW }, + VERTEX_BUFFER }, [PIPE_FORMAT_R32G32_SNORM] = { 0, A_(C0, C1, ZERO, ONE_FLOAT, SNORM, SNORM, SNORM, SNORM, 32_32, 0), @@ -348,7 +351,7 @@ const struct nv50_format nv50_format_table[PIPE_FORMAT_COUNT] = [PIPE_FORMAT_R32G32B32_UNORM] = { 0, A_(C0, C1, C2, ONE_FLOAT, UNORM, UNORM, UNORM, UNORM, 32_32_32, 0), - VERTEX_BUFFER | SAMPLER_VIEW }, + VERTEX_BUFFER }, [PIPE_FORMAT_R32G32_UNORM] = { 0, A_(C0, C1, ZERO, ONE_FLOAT, UNORM, UNORM, UNORM, UNORM, 32_32, 0), @@ -360,15 +363,15 @@ const struct nv50_format nv50_format_table[PIPE_FORMAT_COUNT] = /* SNORM 16 */ - [PIPE_FORMAT_R16G16B16A16_SNORM] = { NV50_SURFACE_FORMAT_R16G16B16A16_SNORM, + [PIPE_FORMAT_R16G16B16A16_SNORM] = { NV50_SURFACE_FORMAT_RGBA16_SNORM, A_(C0, C1, C2, C3, SNORM, SNORM, SNORM, SNORM, 16_16_16_16, 0), VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET }, [PIPE_FORMAT_R16G16B16_SNORM] = { 0, A_(C0, C1, C2, ONE_FLOAT, SNORM, SNORM, SNORM, SNORM, 16_16_16, 0), - VERTEX_BUFFER | SAMPLER_VIEW }, + VERTEX_BUFFER }, - [PIPE_FORMAT_R16G16_SNORM] = { NV50_SURFACE_FORMAT_R16G16_SNORM, + [PIPE_FORMAT_R16G16_SNORM] = { NV50_SURFACE_FORMAT_RG16_SNORM, A_(C0, C1, ZERO, ONE_FLOAT, SNORM, SNORM, SNORM, SNORM, 16_16, 0), VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET }, @@ -378,15 +381,15 @@ const struct nv50_format nv50_format_table[PIPE_FORMAT_COUNT] = /* UNORM 16 */ - [PIPE_FORMAT_R16G16B16A16_UNORM] = { NV50_SURFACE_FORMAT_R16G16B16A16_UNORM, + [PIPE_FORMAT_R16G16B16A16_UNORM] = { NV50_SURFACE_FORMAT_RGBA16_UNORM, A_(C0, C1, C2, C3, UNORM, UNORM, UNORM, UNORM, 16_16_16_16, 0), VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET }, [PIPE_FORMAT_R16G16B16_UNORM] = { 0, A_(C0, C1, C2, ONE_FLOAT, UNORM, UNORM, UNORM, UNORM, 16_16_16, 0), - VERTEX_BUFFER | SAMPLER_VIEW }, + VERTEX_BUFFER }, - [PIPE_FORMAT_R16G16_UNORM] = { NV50_SURFACE_FORMAT_R16G16_UNORM, + [PIPE_FORMAT_R16G16_UNORM] = { NV50_SURFACE_FORMAT_RG16_UNORM, A_(C0, C1, ZERO, ONE_FLOAT, UNORM, UNORM, UNORM, UNORM, 16_16, 0), VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET }, @@ -396,15 +399,15 @@ const struct nv50_format nv50_format_table[PIPE_FORMAT_COUNT] = /* SNORM 8 */ - [PIPE_FORMAT_R8G8B8A8_SNORM] = { NV50_SURFACE_FORMAT_A8B8G8R8_SNORM, + [PIPE_FORMAT_R8G8B8A8_SNORM] = { NV50_SURFACE_FORMAT_RGBA8_SNORM, A_(C0, C1, C2, C3, SNORM, SNORM, SNORM, SNORM, 8_8_8_8, 0), VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET }, [PIPE_FORMAT_R8G8B8_SNORM] = { 0, A_(C0, C1, C2, ONE_FLOAT, SNORM, SNORM, SNORM, SNORM, 8_8_8, 0), - VERTEX_BUFFER | SAMPLER_VIEW }, + VERTEX_BUFFER }, - [PIPE_FORMAT_R8G8_SNORM] = { NV50_SURFACE_FORMAT_R8G8_SNORM, + [PIPE_FORMAT_R8G8_SNORM] = { NV50_SURFACE_FORMAT_RG8_SNORM, A_(C0, C1, ZERO, ONE_FLOAT, SNORM, SNORM, SNORM, SNORM, 8_8, 0), VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET }, @@ -414,23 +417,23 @@ const struct nv50_format nv50_format_table[PIPE_FORMAT_COUNT] = /* UNORM 8 */ - [PIPE_FORMAT_R8G8B8A8_UNORM] = { NV50_SURFACE_FORMAT_A8B8G8R8_UNORM, + [PIPE_FORMAT_R8G8B8A8_UNORM] = { NV50_SURFACE_FORMAT_RGBA8_UNORM, A_(C0, C1, C2, C3, UNORM, UNORM, UNORM, UNORM, 8_8_8_8, 0), VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET }, - [PIPE_FORMAT_R8G8B8A8_SRGB] = { NV50_SURFACE_FORMAT_A8B8G8R8_SRGB, - A_(C0, C1, C2, C3, UNORM, UNORM, UNORM, UNORM, 8_8_8_8, 0), + [PIPE_FORMAT_R8G8B8A8_SRGB] = { NV50_SURFACE_FORMAT_RGBA8_SRGB, + B_(C0, C1, C2, C3, UNORM, UNORM, UNORM, UNORM, 8_8_8_8, 0), SAMPLER_VIEW | RENDER_TARGET }, - [PIPE_FORMAT_R8G8B8_UNORM] = { NV50_SURFACE_FORMAT_X8B8G8R8_UNORM, - A_(C0, C1, C2, ONE_FLOAT, UNORM, UNORM, UNORM, UNORM, 8_8_8, 0), - VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET }, + [PIPE_FORMAT_R8G8B8X8_UNORM] = { NV50_SURFACE_FORMAT_RGBX8_UNORM, + B_(C0, C1, C2, ONE_FLOAT, UNORM, UNORM, UNORM, UNORM, 8_8_8_8, 0), + SAMPLER_VIEW | RENDER_TARGET }, - [PIPE_FORMAT_R8G8B8_SRGB] = { NV50_SURFACE_FORMAT_X8B8G8R8_SRGB, + [PIPE_FORMAT_R8G8B8_UNORM] = { 0, A_(C0, C1, C2, ONE_FLOAT, UNORM, UNORM, UNORM, UNORM, 8_8_8, 0), - SAMPLER_VIEW | RENDER_TARGET }, + VERTEX_BUFFER }, - [PIPE_FORMAT_R8G8_UNORM] = { NV50_SURFACE_FORMAT_R8G8_UNORM, + [PIPE_FORMAT_R8G8_UNORM] = { NV50_SURFACE_FORMAT_RG8_UNORM, A_(C0, C1, ZERO, ONE_FLOAT, UNORM, UNORM, UNORM, UNORM, 8_8, 0), VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET }, @@ -549,11 +552,11 @@ const struct nv50_format nv50_format_table[PIPE_FORMAT_COUNT] = /* OTHER FORMATS */ [PIPE_FORMAT_R8G8_B8G8_UNORM] = { 0, - B_(C0, C1, C2, ONE_FLOAT, UNORM, UNORM, UNORM, UNORM, C1_C2_C1_C0, 0), + B_(C0, C1, C2, ONE_FLOAT, UNORM, UNORM, UNORM, UNORM, U8_YA8_V8_YB8, 0), SAMPLER_VIEW }, [PIPE_FORMAT_G8R8_G8B8_UNORM] = { 0, - B_(C0, C1, C2, ONE_FLOAT, UNORM, UNORM, UNORM, UNORM, C2_C1_C0_C1, 0), + B_(C0, C1, C2, ONE_FLOAT, UNORM, UNORM, UNORM, UNORM, YA8_U8_YB8_V8, 0), SAMPLER_VIEW }, [PIPE_FORMAT_R8SG8SB8UX8U_NORM] = { 0, @@ -561,11 +564,11 @@ const struct nv50_format nv50_format_table[PIPE_FORMAT_COUNT] = SAMPLER_VIEW }, [PIPE_FORMAT_R5SG5SB6U_NORM] = { 0, - B_(C0, C1, C2, ONE_FLOAT, SNORM, SNORM, UNORM, UNORM, 6_5_5, 0), + B_(C0, C1, C2, ONE_FLOAT, SNORM, SNORM, UNORM, UNORM, 5_5_6, 0), SAMPLER_VIEW }, - [PIPE_FORMAT_R1_UNORM] = { 0, - B_(C0, ZERO, ZERO, ONE_FLOAT, UNORM, UNORM, UNORM, UNORM, BITMAP_8X8, 0), + [PIPE_FORMAT_R1_UNORM] = { NV50_SURFACE_FORMAT_BITMAP, + B_(C0, ZERO, ZERO, ONE_FLOAT, UNORM, UNORM, UNORM, UNORM, BITMAP, 0), SAMPLER_VIEW }, [PIPE_FORMAT_A8B8G8R8_UNORM] = { 0, diff --git a/src/gallium/drivers/nv50/nv50_miptree.c b/src/gallium/drivers/nv50/nv50_miptree.c index 486b368ae98..bc81604508b 100644 --- a/src/gallium/drivers/nv50/nv50_miptree.c +++ b/src/gallium/drivers/nv50/nv50_miptree.c @@ -27,52 +27,90 @@ #include "nv50_context.h" #include "nv50_resource.h" -#include "nv50_transfer.h" static INLINE uint32_t -get_tile_dims(unsigned nx, unsigned ny, unsigned nz) +nv50_tex_choose_tile_dims(unsigned nx, unsigned ny, unsigned nz) { - uint32_t tile_mode = 0x00; - - if (ny > 32) tile_mode = 0x04; /* height 128 tiles */ - else - if (ny > 16) tile_mode = 0x03; /* height 64 tiles */ - else - if (ny > 8) tile_mode = 0x02; /* height 32 tiles */ - else - if (ny > 4) tile_mode = 0x01; /* height 16 tiles */ - - if (nz == 1) - return tile_mode; - else - if (tile_mode > 0x02) - tile_mode = 0x02; - - if (nz > 16 && tile_mode < 0x02) - return tile_mode | 0x50; /* depth 32 tiles */ - if (nz > 8) return tile_mode | 0x40; /* depth 16 tiles */ - if (nz > 4) return tile_mode | 0x30; /* depth 8 tiles */ - if (nz > 2) return tile_mode | 0x20; /* depth 4 tiles */ - - return tile_mode | 0x10; + return nvc0_tex_choose_tile_dims(nx, ny * 2, nz) >> 4; } -static INLINE unsigned -calc_zslice_offset(uint32_t tile_mode, unsigned z, unsigned pitch, unsigned nbh) +static uint32_t +nv50_mt_choose_storage_type(struct nv50_miptree *mt, boolean compressed) { - unsigned tile_h = NV50_TILE_HEIGHT(tile_mode); - unsigned tile_d_shift = NV50_TILE_DIM_SHIFT(tile_mode, 1); - unsigned tile_d = 1 << tile_d_shift; + const unsigned ms = util_logbase2(mt->base.base.nr_samples); - /* stride_2d == to next slice within this volume tile */ - /* stride_3d == size (in bytes) of a volume tile */ - unsigned stride_2d = tile_h * NV50_TILE_PITCH(tile_mode); - unsigned stride_3d = tile_d * align(nbh, tile_h) * pitch; + uint32_t tile_flags; + + if (mt->base.base.bind & PIPE_BIND_CURSOR) + return NOUVEAU_BO_TILE_SCANOUT; + + switch (mt->base.base.format) { + case PIPE_FORMAT_Z16_UNORM: + tile_flags = 0x6c00 + (ms << 8); + break; + case PIPE_FORMAT_S8_USCALED_Z24_UNORM: + tile_flags = 0x1800 + (ms << 8); + break; + case PIPE_FORMAT_Z24X8_UNORM: + case PIPE_FORMAT_Z24_UNORM_S8_USCALED: + tile_flags = 0x22800 + (ms << 8); + break; + case PIPE_FORMAT_Z32_FLOAT: + tile_flags = 0x4000 + (ms << 8); + break; + case PIPE_FORMAT_Z32_FLOAT_S8X24_USCALED: + tile_flags = 0x6000 + (ms << 8); + break; + default: + switch (util_format_get_blocksizebits(mt->base.base.format)) { + case 128: + assert(ms < 3); + tile_flags = 0x7400; + break; + case 64: + switch (ms) { + case 2: tile_flags = 0x17c00; break; + case 3: tile_flags = 0x17d00; break; + default: + tile_flags = 0x7000; + break; + } + break; + case 32: + if (mt->base.base.bind & PIPE_BIND_SCANOUT) { + assert(ms == 0); + tile_flags = 0x7a00; + } else { + switch (ms) { + case 2: tile_flags = 0x17800; break; + case 3: tile_flags = 0x17900; break; + default: + tile_flags = 0x7000; + break; + } + } + break; + case 16: + case 8: + tile_flags = 0x7000; + break; + default: + return 0; + } + if (mt->base.base.bind & PIPE_BIND_CURSOR) + tile_flags = 0; + } + + if (mt->base.base.bind & (PIPE_BIND_SCANOUT | PIPE_BIND_CURSOR)) + tile_flags |= NOUVEAU_BO_TILE_SCANOUT; + + if (!compressed) + tile_flags &= ~0x30000; - return (z & (tile_d - 1)) * stride_2d + (z >> tile_d_shift) * stride_3d; + return tile_flags; } -static void +void nv50_miptree_destroy(struct pipe_screen *pscreen, struct pipe_resource *pt) { struct nv50_miptree *mt = nv50_miptree(pt); @@ -82,7 +120,7 @@ nv50_miptree_destroy(struct pipe_screen *pscreen, struct pipe_resource *pt) FREE(mt); } -static boolean +boolean nv50_miptree_get_handle(struct pipe_screen *pscreen, struct pipe_resource *pt, struct winsys_handle *whandle) @@ -108,88 +146,95 @@ const struct u_resource_vtbl nv50_miptree_vtbl = nv50_miptree_destroy, /* resource_destroy */ nv50_miptree_transfer_new, /* get_transfer */ nv50_miptree_transfer_del, /* transfer_destroy */ - nv50_miptree_transfer_map, /* transfer_map */ + nv50_miptree_transfer_map, /* transfer_map */ u_default_transfer_flush_region, /* transfer_flush_region */ nv50_miptree_transfer_unmap, /* transfer_unmap */ u_default_transfer_inline_write /* transfer_inline_write */ }; -struct pipe_resource * -nv50_miptree_create(struct pipe_screen *pscreen, - const struct pipe_resource *templ) +static INLINE boolean +nv50_miptree_init_ms_mode(struct nv50_miptree *mt) { - struct nouveau_device *dev = nouveau_screen(pscreen)->device; - struct nv50_miptree *mt = CALLOC_STRUCT(nv50_miptree); - struct pipe_resource *pt = &mt->base.base; - int ret; - unsigned w, h, d, l, alloc_size; - uint32_t tile_flags; - - if (!mt) - return NULL; - - mt->base.vtbl = &nv50_miptree_vtbl; - *pt = *templ; - pipe_reference_init(&pt->reference, 1); - pt->screen = pscreen; - - mt->layout_3d = pt->target == PIPE_TEXTURE_3D; - - w = pt->width0; - h = pt->height0; - d = mt->layout_3d ? pt->depth0 : 1; - - switch (pt->format) { - case PIPE_FORMAT_Z16_UNORM: - tile_flags = 0x6c00; + switch (mt->base.base.nr_samples) { + case 8: + mt->ms_mode = NV50_3D_MULTISAMPLE_MODE_MS8; + mt->ms_x = 2; + mt->ms_y = 1; break; - case PIPE_FORMAT_S8_USCALED_Z24_UNORM: - tile_flags = 0x1800; - break; - case PIPE_FORMAT_Z24X8_UNORM: - case PIPE_FORMAT_Z24_UNORM_S8_USCALED: - tile_flags = 0x2800; + case 4: + mt->ms_mode = NV50_3D_MULTISAMPLE_MODE_MS4; + mt->ms_x = 1; + mt->ms_y = 1; break; - case PIPE_FORMAT_R32G32B32A32_FLOAT: - case PIPE_FORMAT_R32G32B32_FLOAT: - tile_flags = 0x7400; + case 2: + mt->ms_mode = NV50_3D_MULTISAMPLE_MODE_MS2; + mt->ms_x = 1; break; - case PIPE_FORMAT_Z32_FLOAT_S8X24_USCALED: - tile_flags = 0x6000; + case 1: + case 0: + mt->ms_mode = NV50_3D_MULTISAMPLE_MODE_MS1; break; default: - if (pt->bind & PIPE_BIND_CURSOR) - tile_flags = 0; - else - if ((pt->bind & PIPE_BIND_SCANOUT) && - util_format_get_blocksizebits(pt->format) == 32) - tile_flags = 0x7a00; - else - tile_flags = 0x7000; - break; + NOUVEAU_ERR("invalid nr_samples: %u\n", mt->base.base.nr_samples); + return FALSE; } - if (pt->bind & (PIPE_BIND_SCANOUT | PIPE_BIND_CURSOR)) - tile_flags |= NOUVEAU_BO_TILE_SCANOUT; + return TRUE; +} + +boolean +nv50_miptree_init_layout_linear(struct nv50_miptree *mt) +{ + struct pipe_resource *pt = &mt->base.base; + + if (util_format_is_depth_or_stencil(pt->format)) + return FALSE; + + if ((pt->last_level > 0) || (pt->depth0 > 1) || (pt->array_size > 1)) + return FALSE; + if (mt->ms_x | mt->ms_y) + return FALSE; + + mt->level[0].pitch = align(pt->width0, 64); + + mt->total_size = mt->level[0].pitch * pt->height0; + + return TRUE; +} + +static void +nv50_miptree_init_layout_tiled(struct nv50_miptree *mt) +{ + struct pipe_resource *pt = &mt->base.base; + unsigned w, h, d, l; + const unsigned blocksize = util_format_get_blocksize(pt->format); + + mt->layout_3d = pt->target == PIPE_TEXTURE_3D; + + w = pt->width0 << mt->ms_x; + h = pt->height0 << mt->ms_y; /* For 3D textures, a mipmap is spanned by all the layers, for array * textures and cube maps, each layer contains its own mipmaps. */ + d = mt->layout_3d ? pt->depth0 : 1; + for (l = 0; l <= pt->last_level; ++l) { struct nv50_miptree_level *lvl = &mt->level[l]; + unsigned tsx, tsy, tsz; unsigned nbx = util_format_get_nblocksx(pt->format, w); unsigned nby = util_format_get_nblocksy(pt->format, h); - unsigned blocksize = util_format_get_blocksize(pt->format); lvl->offset = mt->total_size; - if (tile_flags & NOUVEAU_BO_TILE_LAYOUT_MASK) - lvl->tile_mode = get_tile_dims(nbx, nby, d); + lvl->tile_mode = nv50_tex_choose_tile_dims(nbx, nby, d); - lvl->pitch = align(nbx * blocksize, NV50_TILE_PITCH(lvl->tile_mode)); + tsx = NV50_TILE_SIZE_X(lvl->tile_mode); /* x is tile row pitch in bytes */ + tsy = NV50_TILE_SIZE_Y(lvl->tile_mode); + tsz = NV50_TILE_SIZE_Z(lvl->tile_mode); - mt->total_size += lvl->pitch * - align(nby, NV50_TILE_HEIGHT(lvl->tile_mode)) * - align(d, NV50_TILE_DEPTH(lvl->tile_mode)); + lvl->pitch = align(nbx * blocksize, tsx); + + mt->total_size += lvl->pitch * align(nby, tsy) * align(d, tsz); w = u_minify(w, 1); h = u_minify(h, 1); @@ -201,10 +246,43 @@ nv50_miptree_create(struct pipe_screen *pscreen, NV50_TILE_SIZE(mt->level[0].tile_mode)); mt->total_size = mt->layer_stride * pt->array_size; } +} + +struct pipe_resource * +nv50_miptree_create(struct pipe_screen *pscreen, + const struct pipe_resource *templ) +{ + struct nouveau_device *dev = nouveau_screen(pscreen)->device; + struct nv50_miptree *mt = CALLOC_STRUCT(nv50_miptree); + struct pipe_resource *pt = &mt->base.base; + int ret; + uint32_t tile_flags; + + if (!mt) + return NULL; + + mt->base.vtbl = &nv50_miptree_vtbl; + *pt = *templ; + pipe_reference_init(&pt->reference, 1); + pt->screen = pscreen; - alloc_size = mt->total_size; + tile_flags = nv50_mt_choose_storage_type(mt, TRUE); - ret = nouveau_bo_new_tile(dev, NOUVEAU_BO_VRAM, 256, alloc_size, + if (!nv50_miptree_init_ms_mode(mt)) { + FREE(mt); + return NULL; + } + + if (tile_flags & NOUVEAU_BO_TILE_LAYOUT_MASK) { + nv50_miptree_init_layout_tiled(mt); + } else + if (!nv50_miptree_init_layout_linear(mt)) { + FREE(mt); + return NULL; + } + + ret = nouveau_bo_new_tile(dev, NOUVEAU_BO_VRAM, 4096, + mt->total_size, mt->level[0].tile_mode, tile_flags, &mt->base.bo); if (ret) { @@ -255,58 +333,92 @@ nv50_miptree_from_handle(struct pipe_screen *pscreen, } +/* Offset of zslice @z from start of level @l. */ +INLINE unsigned +nv50_mt_zslice_offset(const struct nv50_miptree *mt, unsigned l, unsigned z) +{ + const struct pipe_resource *pt = &mt->base.base; + + unsigned tds = NV50_TILE_SHIFT_Z(mt->level[l].tile_mode); + unsigned ths = NV50_TILE_SHIFT_Y(mt->level[l].tile_mode); + + unsigned nby = util_format_get_nblocksy(pt->format, + u_minify(pt->height0, l)); + + /* to next 2D tile slice within a 3D tile */ + unsigned stride_2d = NV50_TILE_SIZE_2D(mt->level[l].tile_mode); + + /* to slice in the next (in z direction) 3D tile */ + unsigned stride_3d = (align(nby, (1 << ths)) * mt->level[l].pitch) << tds; + + return (z & ((1 << tds) - 1)) * stride_2d + (z >> tds) * stride_3d; +} + /* Surface functions. */ -struct pipe_surface * -nv50_miptree_surface_new(struct pipe_context *pipe, - struct pipe_resource *pt, - const struct pipe_surface *templ) +struct nv50_surface * +nv50_surface_from_miptree(struct nv50_miptree *mt, + const struct pipe_surface *templ) { - struct nv50_miptree *mt = nv50_miptree(pt); /* guaranteed */ - struct nv50_surface *ns; struct pipe_surface *ps; - struct nv50_miptree_level *lvl = &mt->level[templ->u.tex.level]; - - ns = CALLOC_STRUCT(nv50_surface); + struct nv50_surface *ns = CALLOC_STRUCT(nv50_surface); if (!ns) return NULL; ps = &ns->base; pipe_reference_init(&ps->reference, 1); - pipe_resource_reference(&ps->texture, pt); - ps->context = pipe; + pipe_resource_reference(&ps->texture, &mt->base.base); + ps->format = templ->format; ps->usage = templ->usage; ps->u.tex.level = templ->u.tex.level; ps->u.tex.first_layer = templ->u.tex.first_layer; ps->u.tex.last_layer = templ->u.tex.last_layer; - ns->width = u_minify(pt->width0, ps->u.tex.level); - ns->height = u_minify(pt->height0, ps->u.tex.level); + ns->width = u_minify(mt->base.base.width0, ps->u.tex.level); + ns->height = u_minify(mt->base.base.height0, ps->u.tex.level); ns->depth = ps->u.tex.last_layer - ps->u.tex.first_layer + 1; - ns->offset = lvl->offset; + ns->offset = mt->level[templ->u.tex.level].offset; /* comment says there are going to be removed, but they're used by the st */ ps->width = ns->width; ps->height = ns->height; - if (mt->layout_3d) { - unsigned zslice = ps->u.tex.first_layer; + ns->width <<= mt->ms_x; + ns->height <<= mt->ms_y; - /* TODO: re-layout the texture to use only depth 1 tiles in this case: */ - if (ns->depth > 1 && (zslice & (NV50_TILE_DEPTH(lvl->tile_mode) - 1))) - NOUVEAU_ERR("Creating unsupported 3D surface of slices [%u:%u].\n", - zslice, ps->u.tex.last_layer); + return ns; +} - ns->offset += calc_zslice_offset(lvl->tile_mode, zslice, lvl->pitch, - util_format_get_nblocksy(pt->format, - ns->height)); - } else { - ns->offset += mt->layer_stride * ps->u.tex.first_layer; +struct pipe_surface * +nv50_miptree_surface_new(struct pipe_context *pipe, + struct pipe_resource *pt, + const struct pipe_surface *templ) +{ + struct nv50_miptree *mt = nv50_miptree(pt); + struct nv50_surface *ns = nv50_surface_from_miptree(mt, templ); + if (!ns) + return NULL; + ns->base.context = pipe; + + if (ns->base.u.tex.first_layer) { + const unsigned l = ns->base.u.tex.level; + const unsigned z = ns->base.u.tex.first_layer; + + if (mt->layout_3d) { + ns->offset += nv50_mt_zslice_offset(mt, l, z); + + /* TODO: switch to depth 1 tiles; but actually this shouldn't happen */ + if (ns->depth > 1 && + (z & (NV50_TILE_SIZE_Z(mt->level[l].tile_mode) - 1))) + NOUVEAU_ERR("Creating unsupported 3D surface !\n"); + } else { + ns->offset += mt->layer_stride * z; + } } - return ps; + return &ns->base; } void diff --git a/src/gallium/drivers/nv50/nv50_resource.h b/src/gallium/drivers/nv50/nv50_resource.h index 0e9f0a2557e..66d21209be2 100644 --- a/src/gallium/drivers/nv50/nv50_resource.h +++ b/src/gallium/drivers/nv50/nv50_resource.h @@ -9,22 +9,32 @@ #include "nouveau/nouveau_buffer.h" #undef NOUVEAU_NVC0 +#ifndef __NVC0_RESOURCE_H__ /* make sure we don't use these in nvc0: */ + void nv50_init_resource_functions(struct pipe_context *pcontext); void nv50_screen_init_resource_functions(struct pipe_screen *pscreen); -#define NV50_TILE_DIM_SHIFT(m, d) (((m) >> (d * 4)) & 0xf) -#define NV50_TILE_PITCH(m) (64 << 0) -#define NV50_TILE_HEIGHT(m) ( 4 << NV50_TILE_DIM_SHIFT(m, 0)) -#define NV50_TILE_DEPTH(m) ( 1 << NV50_TILE_DIM_SHIFT(m, 1)) +#define NV50_TILE_SHIFT_X(m) 6 +#define NV50_TILE_SHIFT_Y(m) ((((m) >> 0) & 0xf) + 2) +#define NV50_TILE_SHIFT_Z(m) ((((m) >> 4) & 0xf) + 0) + +#define NV50_TILE_SIZE_X(m) 64 +#define NV50_TILE_SIZE_Y(m) ( 4 << (((m) >> 0) & 0xf)) +#define NV50_TILE_SIZE_Z(m) ( 1 << (((m) >> 4) & 0xf)) + +#define NV50_TILE_SIZE_2D(m) (NV50_TILE_SIZE_X(m) << NV50_TILE_SHIFT_Y(m)) -#define NV50_TILE_SIZE_2D(m) ((64 * 4) << \ - NV50_TILE_DIM_SHIFT(m, 0)) +#define NV50_TILE_SIZE(m) (NV50_TILE_SIZE_2D(m) << NV50_TILE_SHIFT_Z(m)) + +#endif /* __NVC0_RESOURCE_H__ */ + +uint32_t +nvc0_tex_choose_tile_dims(unsigned nx, unsigned ny, unsigned nz); -#define NV50_TILE_SIZE(m) (NV50_TILE_SIZE_2D(m) << NV50_TILE_DIM_SHIFT(m, 1)) struct nv50_miptree_level { uint32_t offset; @@ -40,6 +50,9 @@ struct nv50_miptree { uint32_t total_size; uint32_t layer_stride; boolean layout_3d; /* TRUE if layer count varies with mip level */ + uint8_t ms_x; /* log2 of number of samples in x/y dimension */ + uint8_t ms_y; + uint8_t ms_mode; }; static INLINE struct nv50_miptree * @@ -50,21 +63,73 @@ nv50_miptree(struct pipe_resource *pt) /* Internal functions: */ +boolean +nv50_miptree_init_layout_linear(struct nv50_miptree *mt); + struct pipe_resource * nv50_miptree_create(struct pipe_screen *pscreen, const struct pipe_resource *tmp); +void +nv50_miptree_destroy(struct pipe_screen *pscreen, struct pipe_resource *pt); + struct pipe_resource * nv50_miptree_from_handle(struct pipe_screen *pscreen, const struct pipe_resource *template, struct winsys_handle *whandle); +boolean +nv50_miptree_get_handle(struct pipe_screen *pscreen, + struct pipe_resource *pt, + struct winsys_handle *whandle); + +struct nv50_surface { + struct pipe_surface base; + uint32_t offset; + uint32_t width; + uint16_t height; + uint16_t depth; +}; + +static INLINE struct nv50_surface * +nv50_surface(struct pipe_surface *ps) +{ + return (struct nv50_surface *)ps; +} + +#ifndef __NVC0_RESOURCE_H__ + +unsigned +nv50_mt_zslice_offset(const struct nv50_miptree *mt, unsigned l, unsigned z); + struct pipe_surface * nv50_miptree_surface_new(struct pipe_context *, struct pipe_resource *, const struct pipe_surface *templ); +struct pipe_transfer * +nv50_miptree_transfer_new(struct pipe_context *pcontext, + struct pipe_resource *pt, + unsigned level, + unsigned usage, + const struct pipe_box *box); +void +nv50_miptree_transfer_del(struct pipe_context *pcontext, + struct pipe_transfer *ptx); +void * +nv50_miptree_transfer_map(struct pipe_context *pcontext, + struct pipe_transfer *ptx); +void +nv50_miptree_transfer_unmap(struct pipe_context *pcontext, + struct pipe_transfer *ptx); + +#endif /* __NVC0_RESOURCE_H__ */ + +struct nv50_surface * +nv50_surface_from_miptree(struct nv50_miptree *mt, + const struct pipe_surface *templ); + void nv50_miptree_surface_del(struct pipe_context *, struct pipe_surface *); -#endif +#endif /* __NV50_RESOURCE_H__ */ diff --git a/src/gallium/drivers/nv50/nv50_screen.c b/src/gallium/drivers/nv50/nv50_screen.c index 7e436fd47d8..581aad19627 100644 --- a/src/gallium/drivers/nv50/nv50_screen.c +++ b/src/gallium/drivers/nv50/nv50_screen.c @@ -43,7 +43,9 @@ nv50_screen_is_format_supported(struct pipe_screen *pscreen, unsigned sample_count, unsigned bindings) { - if (sample_count > 1) + if (sample_count > 2 && sample_count != 4 && sample_count != 8) + return FALSE; + if (sample_count == 8 && util_format_get_blocksizebits(format) >= 128) return FALSE; if (!util_format_is_supported(format, bindings)) @@ -89,6 +91,7 @@ nv50_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) case PIPE_CAP_TEXTURE_SHADOW_MAP: case PIPE_CAP_NPOT_TEXTURES: case PIPE_CAP_ANISOTROPIC_FILTER: + case PIPE_CAP_SCALED_RESOLVE: return 1; case PIPE_CAP_SEAMLESS_CUBE_MAP: return nv50_screen(pscreen)->tesla->grclass >= NVA0_3D; @@ -217,6 +220,7 @@ nv50_screen_destroy(struct pipe_screen *pscreen) nouveau_fence_wait(screen->base.fence.current); nouveau_fence_ref (NULL, &screen->base.fence.current); } + screen->base.channel->user_private = NULL; nouveau_bo_ref(NULL, &screen->code); nouveau_bo_ref(NULL, &screen->tls_bo); @@ -302,6 +306,7 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev) FAIL_SCREEN_INIT("nouveau_screen_init failed: %d\n", ret); chan = screen->base.channel; + chan->user_private = screen; pscreen->winsys = ws; pscreen->destroy = nv50_screen_destroy; @@ -313,6 +318,8 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev) nv50_screen_init_resource_functions(pscreen); + nouveau_screen_init_vdec(&screen->base); + ret = nouveau_bo_new(dev, NOUVEAU_BO_GART | NOUVEAU_BO_MAP, 0, 4096, &screen->fence.bo); if (ret) @@ -600,6 +607,9 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev) screen->mm_VRAM_fe0 = nouveau_mm_create(dev, NOUVEAU_BO_VRAM, 0xfe0); + if (!nv50_blitctx_create(screen)) + goto fail; + nouveau_fence_new(&screen->base, &screen->base.fence.current, FALSE); return pscreen; diff --git a/src/gallium/drivers/nv50/nv50_screen.h b/src/gallium/drivers/nv50/nv50_screen.h index aea434b8679..315ca80c0d2 100644 --- a/src/gallium/drivers/nv50/nv50_screen.h +++ b/src/gallium/drivers/nv50/nv50_screen.h @@ -19,6 +19,10 @@ struct nv50_context; #define NV50_SCRATCH_SIZE (2 << 20) #define NV50_SCRATCH_NR_BUFFERS 2 +#define NV50_SCREEN_RESIDENT_BO_COUNT 5 + +struct nv50_blitctx; + struct nv50_screen { struct nouveau_screen base; struct nouveau_winsys *nvws; @@ -37,6 +41,8 @@ struct nv50_screen { struct nouveau_resource *gp_code_heap; struct nouveau_resource *fp_code_heap; + struct nv50_blitctx *blitctx; + struct { void **entries; int next; @@ -69,6 +75,8 @@ nv50_screen(struct pipe_screen *screen) return (struct nv50_screen *)screen; } +boolean nv50_blitctx_create(struct nv50_screen *); + void nv50_screen_make_buffers_resident(struct nv50_screen *); int nv50_screen_tic_alloc(struct nv50_screen *, void *); diff --git a/src/gallium/drivers/nv50/nv50_shader_state.c b/src/gallium/drivers/nv50/nv50_shader_state.c index e5b10c37bef..d73f7c7f213 100644 --- a/src/gallium/drivers/nv50/nv50_shader_state.c +++ b/src/gallium/drivers/nv50/nv50_shader_state.c @@ -130,13 +130,14 @@ nv50_program_validate(struct nv50_context *nv50, struct nv50_program *prog) int ret; unsigned size; - if (prog->translated) + if (!prog->translated) { + prog->translated = nv50_program_translate(prog); + if (!prog->translated) + return FALSE; + } else + if (prog->res) return TRUE; - prog->translated = nv50_program_translate(prog); - if (!prog->translated) - return FALSE; - if (prog->type == PIPE_SHADER_FRAGMENT) heap = nv50->screen->fp_code_heap; else if (prog->type == PIPE_SHADER_GEOMETRY) heap = nv50->screen->gp_code_heap; diff --git a/src/gallium/drivers/nv50/nv50_state.c b/src/gallium/drivers/nv50/nv50_state.c index fb125f3a8d8..49ea646c77c 100644 --- a/src/gallium/drivers/nv50/nv50_state.c +++ b/src/gallium/drivers/nv50/nv50_state.c @@ -119,6 +119,7 @@ nv50_blend_state_create(struct pipe_context *pipe, struct nv50_blend_stateobj *so = CALLOC_STRUCT(nv50_blend_stateobj); int i; boolean emit_common_func = cso->rt[0].blend_enable; + uint32_t ms; if (nv50_context(pipe)->screen->tesla->grclass >= NVA3_3D) { SB_BEGIN_3D(so, BLEND_INDEPENDENT, 1); @@ -190,6 +191,15 @@ nv50_blend_state_create(struct pipe_context *pipe, SB_DATA (so, nv50_colormask(cso->rt[0].colormask)); } + ms = 0; + if (cso->alpha_to_coverage) + ms |= NV50_3D_MULTISAMPLE_CTRL_ALPHA_TO_COVERAGE; + if (cso->alpha_to_one) + ms |= NV50_3D_MULTISAMPLE_CTRL_ALPHA_TO_ONE; + + SB_BEGIN_3D(so, MULTISAMPLE_CTRL, 1); + SB_DATA (so, ms); + assert(so->size <= (sizeof(so->state) / sizeof(so->state[0]))); return so; } @@ -237,6 +247,9 @@ nv50_rasterizer_state_create(struct pipe_context *pipe, SB_BEGIN_3D(so, FRAG_COLOR_CLAMP_EN, 1); SB_DATA (so, cso->clamp_fragment_color ? 0x11111111 : 0x00000000); + SB_BEGIN_3D(so, MULTISAMPLE_ENABLE, 1); + SB_DATA (so, cso->multisample); + SB_BEGIN_3D(so, LINE_WIDTH, 1); SB_DATA (so, fui(cso->line_width)); SB_BEGIN_3D(so, LINE_SMOOTH_ENABLE, 1); diff --git a/src/gallium/drivers/nv50/nv50_state_validate.c b/src/gallium/drivers/nv50/nv50_state_validate.c index 11561f5a8e6..44f2d25c1a7 100644 --- a/src/gallium/drivers/nv50/nv50_state_validate.c +++ b/src/gallium/drivers/nv50/nv50_state_validate.c @@ -8,6 +8,7 @@ nv50_validate_fb(struct nv50_context *nv50) struct nouveau_channel *chan = nv50->screen->base.channel; struct pipe_framebuffer_state *fb = &nv50->framebuffer; unsigned i; + unsigned ms_mode = NV50_3D_MULTISAMPLE_MODE_MS1; boolean serialize = FALSE; nv50_bufctx_reset(nv50, NV50_BUFCTX_FRAME); @@ -38,6 +39,8 @@ nv50_validate_fb(struct nv50_context *nv50) BEGIN_RING(chan, RING_3D(RT_ARRAY_MODE), 1); OUT_RING (chan, sf->depth); + ms_mode = mt->ms_mode; + if (mt->base.status & NOUVEAU_BUFFER_STATUS_GPU_READING) serialize = TRUE; mt->base.status |= NOUVEAU_BUFFER_STATUS_GPU_WRITING; @@ -69,6 +72,8 @@ nv50_validate_fb(struct nv50_context *nv50) OUT_RING (chan, sf->height); OUT_RING (chan, (unk << 16) | sf->depth); + ms_mode = mt->ms_mode; + if (mt->base.status & NOUVEAU_BUFFER_STATUS_GPU_READING) serialize = TRUE; mt->base.status |= NOUVEAU_BUFFER_STATUS_GPU_WRITING; @@ -81,6 +86,9 @@ nv50_validate_fb(struct nv50_context *nv50) OUT_RING (chan, 0); } + BEGIN_RING(chan, RING_3D(MULTISAMPLE_MODE), 1); + OUT_RING (chan, ms_mode); + BEGIN_RING(chan, RING_3D(VIEWPORT_HORIZ(0)), 2); OUT_RING (chan, fb->width << 16); OUT_RING (chan, fb->height << 16); @@ -258,6 +266,26 @@ nv50_validate_rasterizer(struct nv50_context *nv50) } static void +nv50_validate_sample_mask(struct nv50_context *nv50) +{ + struct nouveau_channel *chan = nv50->screen->base.channel; + + unsigned mask[4] = + { + nv50->sample_mask & 0xffff, + nv50->sample_mask & 0xffff, + nv50->sample_mask & 0xffff, + nv50->sample_mask & 0xffff + }; + + BEGIN_RING(chan, RING_3D(MSAA_MASK(0)), 4); + OUT_RING (chan, mask[0]); + OUT_RING (chan, mask[1]); + OUT_RING (chan, mask[2]); + OUT_RING (chan, mask[3]); +} + +static void nv50_switch_pipe_context(struct nv50_context *ctx_to) { struct nv50_context *ctx_from = ctx_to->screen->cur_ctx; @@ -282,8 +310,7 @@ nv50_switch_pipe_context(struct nv50_context *ctx_to) if (!ctx_to->zsa) ctx_to->dirty &= ~NV50_NEW_ZSA; - ctx_to->screen->base.channel->user_private = ctx_to->screen->cur_ctx = - ctx_to; + ctx_to->screen->cur_ctx = ctx_to; } static struct state_validate { @@ -293,6 +320,7 @@ static struct state_validate { { nv50_validate_fb, NV50_NEW_FRAMEBUFFER }, { nv50_validate_blend, NV50_NEW_BLEND }, { nv50_validate_zsa, NV50_NEW_ZSA }, + { nv50_validate_sample_mask, NV50_NEW_SAMPLE_MASK }, { nv50_validate_rasterizer, NV50_NEW_RASTERIZER }, { nv50_validate_blend_colour, NV50_NEW_BLEND_COLOUR }, { nv50_validate_stencil_ref, NV50_NEW_STENCIL_REF }, @@ -322,7 +350,7 @@ static struct state_validate { #define validate_list_len (sizeof(validate_list) / sizeof(validate_list[0])) boolean -nv50_state_validate(struct nv50_context *nv50) +nv50_state_validate(struct nv50_context *nv50, unsigned words) { unsigned i; @@ -339,6 +367,8 @@ nv50_state_validate(struct nv50_context *nv50) nv50->dirty = 0; } + MARK_RING(nv50->screen->base.channel, words, 0); + nv50_bufctx_emit_relocs(nv50); return TRUE; diff --git a/src/gallium/drivers/nv50/nv50_stateobj.h b/src/gallium/drivers/nv50/nv50_stateobj.h index 4c98c7e46fc..d367a064d61 100644 --- a/src/gallium/drivers/nv50/nv50_stateobj.h +++ b/src/gallium/drivers/nv50/nv50_stateobj.h @@ -21,13 +21,13 @@ struct nv50_blend_stateobj { struct pipe_blend_state pipe; int size; - uint32_t state[82]; // TODO: allocate less if !independent_blend_enable + uint32_t state[84]; // TODO: allocate less if !independent_blend_enable }; struct nv50_rasterizer_stateobj { struct pipe_rasterizer_state pipe; int size; - uint32_t state[42]; + uint32_t state[44]; }; struct nv50_zsa_stateobj { diff --git a/src/gallium/drivers/nv50/nv50_surface.c b/src/gallium/drivers/nv50/nv50_surface.c index 3d7e880ccce..1a5077e970b 100644 --- a/src/gallium/drivers/nv50/nv50_surface.c +++ b/src/gallium/drivers/nv50/nv50_surface.c @@ -34,25 +34,16 @@ #include "nv50_defs.xml.h" +#define NV50_ENG2D_SUPPORTED_FORMATS 0xff0843e080608409ULL + /* return TRUE for formats that can be converted among each other by NV50_2D */ static INLINE boolean nv50_2d_format_faithful(enum pipe_format format) { - switch (format) { - case PIPE_FORMAT_B8G8R8A8_UNORM: - case PIPE_FORMAT_B8G8R8X8_UNORM: - case PIPE_FORMAT_B8G8R8A8_SRGB: - case PIPE_FORMAT_B8G8R8X8_SRGB: - case PIPE_FORMAT_B5G6R5_UNORM: - case PIPE_FORMAT_B5G5R5A1_UNORM: - case PIPE_FORMAT_B10G10R10A2_UNORM: - case PIPE_FORMAT_R8_UNORM: - case PIPE_FORMAT_R32G32B32A32_FLOAT: - case PIPE_FORMAT_R32G32B32_FLOAT: - return TRUE; - default: - return FALSE; - } + uint8_t id = nv50_format_table[format].rt; + + return (id >= 0xc0) && + (NV50_ENG2D_SUPPORTED_FORMATS & (1ULL << (id - 0xc0))); } static INLINE uint8_t @@ -63,7 +54,7 @@ nv50_2d_format(enum pipe_format format) /* Hardware values for color formats range from 0xc0 to 0xff, * but the 2D engine doesn't support all of them. */ - if ((id >= 0xc0) && (0xff0843e080608409ULL & (1ULL << (id - 0xc0)))) + if ((id >= 0xc0) && (NV50_ENG2D_SUPPORTED_FORMATS & (1ULL << (id - 0xc0)))) return id; switch (util_format_get_blocksize(format)) { @@ -72,7 +63,7 @@ nv50_2d_format(enum pipe_format format) case 2: return NV50_SURFACE_FORMAT_R16_UNORM; case 4: - return NV50_SURFACE_FORMAT_A8R8G8B8_UNORM; + return NV50_SURFACE_FORMAT_BGRA8_UNORM; default: return 0; } @@ -96,8 +87,8 @@ nv50_2d_texture_set(struct nouveau_channel *chan, int dst, return 1; } - width = u_minify(mt->base.base.width0, level); - height = u_minify(mt->base.base.height0, level); + width = u_minify(mt->base.base.width0, level) << mt->ms_x; + height = u_minify(mt->base.base.height0, level) << mt->ms_y; offset = mt->level[level].offset; if (!mt->layout_3d) { @@ -152,7 +143,13 @@ nv50_2d_texture_do_copy(struct nouveau_channel *chan, unsigned sx, unsigned sy, unsigned sz, unsigned w, unsigned h) { + static const uint32_t duvdxy[5] = + { + 0x40000000, 0x80000000, 0x00000001, 0x00000002, 0x00000004 + }; + int ret; + uint32_t ctrl; ret = MARK_RING(chan, 2 * 16 + 32, 4); if (ret) @@ -166,24 +163,28 @@ nv50_2d_texture_do_copy(struct nouveau_channel *chan, if (ret) return ret; - /* 0/1 = CENTER/CORNER, 10/00 = POINT/BILINEAR */ + /* NOTE: 2D engine doesn't work for MS8 */ + if (src->ms_x) + ctrl = 0x11; + + /* 0/1 = CENTER/CORNER, 00/10 = POINT/BILINEAR */ BEGIN_RING(chan, RING_2D(BLIT_CONTROL), 1); - OUT_RING (chan, 0); + OUT_RING (chan, ctrl); BEGIN_RING(chan, RING_2D(BLIT_DST_X), 4); - OUT_RING (chan, dx); - OUT_RING (chan, dy); - OUT_RING (chan, w); - OUT_RING (chan, h); + OUT_RING (chan, dx << dst->ms_x); + OUT_RING (chan, dy << dst->ms_y); + OUT_RING (chan, w << dst->ms_x); + OUT_RING (chan, h << dst->ms_y); BEGIN_RING(chan, RING_2D(BLIT_DU_DX_FRACT), 4); - OUT_RING (chan, 0); - OUT_RING (chan, 1); - OUT_RING (chan, 0); - OUT_RING (chan, 1); + OUT_RING (chan, duvdxy[2 + ((int)src->ms_x - (int)dst->ms_x)] & 0xf0000000); + OUT_RING (chan, duvdxy[2 + ((int)src->ms_x - (int)dst->ms_x)] & 0x0000000f); + OUT_RING (chan, duvdxy[2 + ((int)src->ms_y - (int)dst->ms_y)] & 0xf0000000); + OUT_RING (chan, duvdxy[2 + ((int)src->ms_y - (int)dst->ms_y)] & 0x0000000f); BEGIN_RING(chan, RING_2D(BLIT_SRC_X_FRACT), 4); OUT_RING (chan, 0); - OUT_RING (chan, sx); + OUT_RING (chan, sx << src->ms_x); OUT_RING (chan, 0); - OUT_RING (chan, sy); + OUT_RING (chan, sy << src->ms_y); return 0; } @@ -206,6 +207,34 @@ nv50_resource_copy_region(struct pipe_context *pipe, return; } + nv04_resource(dst)->status |= NOUVEAU_BUFFER_STATUS_GPU_WRITING; + + if (src->format == dst->format && src->nr_samples == dst->nr_samples) { + struct nv50_m2mf_rect drect, srect; + unsigned i; + unsigned nx = util_format_get_nblocksx(src->format, src_box->width); + unsigned ny = util_format_get_nblocksy(src->format, src_box->height); + + nv50_m2mf_rect_setup(&drect, dst, dst_level, dstx, dsty, dstz); + nv50_m2mf_rect_setup(&srect, src, src_level, + src_box->x, src_box->y, src_box->z); + + for (i = 0; i < src_box->depth; ++i) { + nv50_m2mf_transfer_rect(&screen->base.base, &drect, &srect, nx, ny); + + if (nv50_miptree(dst)->layout_3d) + drect.z++; + else + drect.base += nv50_miptree(dst)->layer_stride; + + if (nv50_miptree(src)->layout_3d) + srect.z++; + else + srect.base += nv50_miptree(src)->layer_stride; + } + return; + } + assert((src->format == dst->format) || (nv50_2d_format_faithful(src->format) && nv50_2d_format_faithful(dst->format))); @@ -339,7 +368,7 @@ nv50_clear(struct pipe_context *pipe, unsigned buffers, /* don't need NEW_BLEND, COLOR_MASK doesn't affect CLEAR_BUFFERS */ nv50->dirty &= NV50_NEW_FRAMEBUFFER; - if (!nv50_state_validate(nv50)) + if (!nv50_state_validate(nv50, 9 + (fb->nr_cbufs * 2))) return; if (buffers & PIPE_CLEAR_COLOR && fb->nr_cbufs) { @@ -376,12 +405,546 @@ nv50_clear(struct pipe_context *pipe, unsigned buffers, nv50->dirty = dirty & ~NV50_NEW_FRAMEBUFFER; } + +struct nv50_blitctx +{ + struct nv50_screen *screen; + struct { + struct pipe_framebuffer_state fb; + struct nv50_program *vp; + struct nv50_program *gp; + struct nv50_program *fp; + unsigned num_textures[3]; + unsigned num_samplers[3]; + struct pipe_sampler_view *texture; + struct nv50_tsc_entry *sampler; + unsigned dirty; + unsigned clip_nr; + } saved; + struct nv50_program vp; + struct nv50_program fp; + struct nv50_tsc_entry sampler[2]; /* nearest, bilinear */ + uint32_t fp_offset; + uint16_t color_mask; + uint8_t filter; +}; + +static void +nv50_blitctx_make_vp(struct nv50_blitctx *blit) +{ + static const uint32_t code[] = + { + 0x10000001, /* mov b32 o[0x00] s[0x00] */ /* HPOS.x */ + 0x0423c788, + 0x10000205, /* mov b32 o[0x04] s[0x04] */ /* HPOS.y */ + 0x0423c788, + 0x10000409, /* mov b32 o[0x08] s[0x08] */ /* TEXC.x */ + 0x0423c788, + 0x1000060d, /* mov b32 o[0x0c] s[0x0c] */ /* TEXC.y */ + 0x0423c788, + 0x10000811, /* exit mov b32 o[0x10] s[0x10] */ /* TEXC.z */ + 0x0423c789, + }; + + blit->vp.type = PIPE_SHADER_VERTEX; + blit->vp.translated = TRUE; + blit->vp.code = (uint32_t *)code; /* const_cast */ + blit->vp.code_size = sizeof(code); + blit->vp.max_gpr = 4; + blit->vp.max_out = 5; + blit->vp.out_nr = 2; + blit->vp.out[0].mask = 0x3; + blit->vp.out[0].sn = TGSI_SEMANTIC_POSITION; + blit->vp.out[1].hw = 2; + blit->vp.out[1].mask = 0x7; + blit->vp.out[1].sn = TGSI_SEMANTIC_GENERIC; + blit->vp.vp.attrs[0] = 0x73; + blit->vp.vp.psiz = 0x40; + blit->vp.vp.edgeflag = 0x40; +} + +static void +nv50_blitctx_make_fp(struct nv50_blitctx *blit) +{ + static const uint32_t code[] = + { + /* 3 coords RGBA in, RGBA out, also for Z32_FLOAT(_S8X24_USCALED) */ + 0x80000000, /* interp $r0 v[0x0] */ + 0x80010004, /* interp $r1 v[0x4] */ + 0x80020009, /* interp $r2 flat v[0x8] */ + 0x00040780, + 0xf6800001, /* texauto live { $r0,1,2,3 } $t0 $s0 { $r0,1,2 } */ + 0x0000c785, /* exit */ + + /* 3 coords ZS in, S encoded in R, Z encoded in GBA (8_UNORM) */ + 0x80000000, /* interp $r0 v[0x00] */ + 0x80010004, /* interp $r1 v[0x04] */ + 0x80020009, /* interp $r2 flat v[0x8] */ + 0x00040780, + 0xf6800001, /* texauto live { $r0,1,#,# } $t0 $s0 { $r0,1,2 } */ + 0x00000784, + 0xc03f0009, /* mul f32 $r2 $r0 (2^24 - 1) */ + 0x04b7ffff, + 0xa0000201, /* cvt f32 $r0 s32 $r1 */ + 0x44014780, + 0xa0000409, /* cvt rni s32 $r2 f32 $r2 */ + 0x8c004780, + 0xc0010001, /* mul f32 $r0 $r0 1/0xff */ + 0x03b8080b, + 0xd03f0405, /* and b32 $r1 $r2 0x0000ff */ + 0x0000000f, + 0xd000040d, /* and b32 $r3 $r2 0xff0000 */ + 0x000ff003, + 0xd0000409, /* and b32 $r2 $r2 0x00ff00 */ + 0x00000ff3, + 0xa0000205, /* cvt f32 $r1 s32 $r1 */ + 0x44014780, + 0xa000060d, /* cvt f32 $r3 s32 $r3 */ + 0x44014780, + 0xa0000409, /* cvt f32 $r2 s32 $r2 */ + 0x44014780, + 0xc0010205, /* mul f32 $r1 $r1 1/0x0000ff */ + 0x03b8080b, + 0xc001060d, /* mul f32 $r3 $r3 1/0x00ff00 */ + 0x0338080b, + 0xc0010409, /* mul f32 $r2 $r2 1/0xff0000 */ + 0x0378080b, + 0xf0000001, /* exit never nop */ + 0xe0000001, + + /* 3 coords ZS in, Z encoded in RGB, S encoded in A (U8_UNORM) */ + 0x80000000, /* interp $r0 v[0x00] */ + 0x80010004, /* interp $r1 v[0x04] */ + 0x80020009, /* interp $r2 flat v[0x8] */ + 0x00040780, + 0xf6800001, /* texauto live { $r0,1,#,# } $t0 $s0 { $r0,1,2 } */ + 0x00000784, + 0xc03f0009, /* mul f32 $r2 $r0 (2^24 - 1) */ + 0x04b7ffff, + 0xa0000281, /* cvt f32 $r3 s32 $r1 */ + 0x44014780, + 0xa0000409, /* cvt rni s32 $r2 f32 $r2 */ + 0x8c004780, + 0xc001060d, /* mul f32 $r3 $r3 1/0xff */ + 0x03b8080b, + 0xd03f0401, /* and b32 $r0 $r2 0x0000ff */ + 0x0000000f, + 0xd0000405, /* and b32 $r1 $r2 0x00ff00 */ + 0x00000ff3, + 0xd0000409, /* and b32 $r2 $r2 0xff0000 */ + 0x000ff003, + 0xa0000001, /* cvt f32 $r0 s32 $r0 */ + 0x44014780, + 0xa0000205, /* cvt f32 $r1 s32 $r1 */ + 0x44014780, + 0xa0000409, /* cvt f32 $r2 s32 $r2 */ + 0x44014780, + 0xc0010001, /* mul f32 $r0 $r0 1/0x0000ff */ + 0x03b8080b, + 0xc0010205, /* mul f32 $r1 $r1 1/0x00ff00 */ + 0x0378080b, + 0xc0010409, /* mul f32 $r2 $r2 1/0xff0000 */ + 0x0338080b, + 0xf0000001, /* exit never nop */ + 0xe0000001 + }; + + blit->fp.type = PIPE_SHADER_FRAGMENT; + blit->fp.translated = TRUE; + blit->fp.code = (uint32_t *)code; /* const_cast */ + blit->fp.code_size = sizeof(code); + blit->fp.max_gpr = 4; + blit->fp.max_out = 4; + blit->fp.in_nr = 1; + blit->fp.in[0].mask = 0x7; /* last component flat */ + blit->fp.in[0].linear = 1; + blit->fp.in[0].sn = TGSI_SEMANTIC_GENERIC; + blit->fp.out_nr = 1; + blit->fp.out[0].mask = 0xf; + blit->fp.out[0].sn = TGSI_SEMANTIC_COLOR; + blit->fp.fp.interp = 0x00020403; + blit->fp.gp.primid = 0x80; +} + +static void +nv50_blitctx_make_sampler(struct nv50_blitctx *blit) +{ + /* clamp to edge, min/max lod = 0, nearest filtering */ + + blit->sampler[0].id = -1; + + blit->sampler[0].tsc[0] = 0x00000092; + blit->sampler[0].tsc[1] = 0x00000051; + + /* clamp to edge, min/max lod = 0, bilinear filtering */ + + blit->sampler[1].id = -1; + + blit->sampler[1].tsc[0] = 0x00000092; + blit->sampler[1].tsc[1] = 0x00000062; +} + +/* Since shaders cannot export stencil, we cannot copy stencil values when + * rendering to ZETA, so we attach the ZS surface to a colour render target. + */ +static INLINE enum pipe_format +nv50_blit_zeta_to_colour_format(enum pipe_format format) +{ + switch (format) { + case PIPE_FORMAT_Z16_UNORM: return PIPE_FORMAT_R16_UNORM; + case PIPE_FORMAT_Z24_UNORM_S8_USCALED: + case PIPE_FORMAT_S8_USCALED_Z24_UNORM: + case PIPE_FORMAT_Z24X8_UNORM: return PIPE_FORMAT_R8G8B8A8_UNORM; + case PIPE_FORMAT_Z32_FLOAT: return PIPE_FORMAT_R32_FLOAT; + case PIPE_FORMAT_Z32_FLOAT_S8X24_USCALED: return PIPE_FORMAT_R32G32_FLOAT; + default: + assert(0); + return PIPE_FORMAT_NONE; + } +} + +static void +nv50_blitctx_get_color_mask_and_fp(struct nv50_blitctx *blit, + enum pipe_format format, uint8_t mask) +{ + blit->color_mask = 0; + + switch (format) { + case PIPE_FORMAT_Z24X8_UNORM: + case PIPE_FORMAT_Z24_UNORM_S8_USCALED: + blit->fp_offset = 160; + if (mask & PIPE_MASK_Z) + blit->color_mask |= 0x0111; + if (mask & PIPE_MASK_S) + blit->color_mask |= 0x1000; + break; + case PIPE_FORMAT_S8_USCALED_Z24_UNORM: + blit->fp_offset = 24; + if (mask & PIPE_MASK_Z) + blit->color_mask |= 0x1110; + if (mask & PIPE_MASK_S) + blit->color_mask |= 0x0001; + break; + default: + blit->fp_offset = 0; + if (mask & (PIPE_MASK_R | PIPE_MASK_Z)) blit->color_mask |= 0x0001; + if (mask & (PIPE_MASK_G | PIPE_MASK_S)) blit->color_mask |= 0x0010; + if (mask & PIPE_MASK_B) blit->color_mask |= 0x0100; + if (mask & PIPE_MASK_A) blit->color_mask |= 0x1000; + break; + } +} + +static void +nv50_blit_set_dst(struct nv50_context *nv50, + struct pipe_resource *res, unsigned level, unsigned layer) +{ + struct pipe_context *pipe = &nv50->base.pipe; + struct pipe_surface templ; + + if (util_format_is_depth_or_stencil(res->format)) + templ.format = nv50_blit_zeta_to_colour_format(res->format); + else + templ.format = res->format; + + templ.usage = PIPE_USAGE_STREAM; + templ.u.tex.level = level; + templ.u.tex.first_layer = templ.u.tex.last_layer = layer; + + nv50->framebuffer.cbufs[0] = nv50_miptree_surface_new(pipe, res, &templ); + nv50->framebuffer.nr_cbufs = 1; + nv50->framebuffer.zsbuf = NULL; + nv50->framebuffer.width = nv50->framebuffer.cbufs[0]->width; + nv50->framebuffer.height = nv50->framebuffer.cbufs[0]->height; +} + +static INLINE void +nv50_blit_fixup_tic_entry(struct pipe_sampler_view *view) +{ + struct nv50_tic_entry *ent = nv50_tic_entry(view); + + ent->tic[2] &= ~(1 << 31); /* scaled coordinates, ok with 3d textures ? */ + + /* magic: */ + + ent->tic[3] = 0x20000000; /* affects quality of near vertical edges in MS8 */ +} + +static void +nv50_blit_set_src(struct nv50_context *nv50, + struct pipe_resource *res, unsigned level, unsigned layer) +{ + struct pipe_context *pipe = &nv50->base.pipe; + struct pipe_sampler_view templ; + + templ.format = res->format; + templ.u.tex.first_layer = templ.u.tex.last_layer = layer; + templ.u.tex.first_level = templ.u.tex.last_level = level; + templ.swizzle_r = PIPE_SWIZZLE_RED; + templ.swizzle_g = PIPE_SWIZZLE_GREEN; + templ.swizzle_b = PIPE_SWIZZLE_BLUE; + templ.swizzle_a = PIPE_SWIZZLE_ALPHA; + + nv50->textures[2][0] = nv50_create_sampler_view(pipe, res, &templ); + + nv50_blit_fixup_tic_entry(nv50->textures[2][0]); + + nv50->num_textures[0] = nv50->num_textures[1] = 0; + nv50->num_textures[2] = 1; +} + +static void +nv50_blitctx_prepare_state(struct nv50_blitctx *blit) +{ + struct nouveau_channel *chan = blit->screen->base.channel; + + /* blend state */ + BEGIN_RING(chan, RING_3D(COLOR_MASK(0)), 1); + OUT_RING (chan, blit->color_mask); + BEGIN_RING(chan, RING_3D(BLEND_ENABLE(0)), 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, RING_3D(LOGIC_OP_ENABLE), 1); + OUT_RING (chan, 0); + + /* rasterizer state */ +#ifndef NV50_SCISSORS_CLIPPING + BEGIN_RING(chan, RING_3D(SCISSOR_ENABLE(0)), 1); + OUT_RING (chan, 1); +#endif + BEGIN_RING(chan, RING_3D(VERTEX_TWO_SIDE_ENABLE), 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, RING_3D(FRAG_COLOR_CLAMP_EN), 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, RING_3D(MULTISAMPLE_ENABLE), 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, RING_3D(MSAA_MASK(0)), 4); + OUT_RING (chan, 0xffff); + OUT_RING (chan, 0xffff); + OUT_RING (chan, 0xffff); + OUT_RING (chan, 0xffff); + BEGIN_RING(chan, RING_3D(POLYGON_MODE_FRONT), 3); + OUT_RING (chan, NV50_3D_POLYGON_MODE_FRONT_FILL); + OUT_RING (chan, NV50_3D_POLYGON_MODE_BACK_FILL); + OUT_RING (chan, 0); + BEGIN_RING(chan, RING_3D(CULL_FACE_ENABLE), 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, RING_3D(POLYGON_STIPPLE_ENABLE), 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, RING_3D(POLYGON_OFFSET_FILL_ENABLE), 1); + OUT_RING (chan, 0); + + /* zsa state */ + BEGIN_RING(chan, RING_3D(DEPTH_TEST_ENABLE), 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, RING_3D(STENCIL_ENABLE), 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, RING_3D(ALPHA_TEST_ENABLE), 1); + OUT_RING (chan, 0); +} + +static void +nv50_blitctx_pre_blit(struct nv50_blitctx *blit, struct nv50_context *nv50) +{ + int s; + + blit->saved.fb.width = nv50->framebuffer.width; + blit->saved.fb.height = nv50->framebuffer.height; + blit->saved.fb.nr_cbufs = nv50->framebuffer.nr_cbufs; + blit->saved.fb.cbufs[0] = nv50->framebuffer.cbufs[0]; + blit->saved.fb.zsbuf = nv50->framebuffer.zsbuf; + + blit->saved.vp = nv50->vertprog; + blit->saved.gp = nv50->gmtyprog; + blit->saved.fp = nv50->fragprog; + + nv50->vertprog = &blit->vp; + nv50->gmtyprog = NULL; + nv50->fragprog = &blit->fp; + + blit->saved.clip_nr = nv50->clip.nr; + + nv50->clip.nr = 0; + + for (s = 0; s < 3; ++s) { + blit->saved.num_textures[s] = nv50->num_textures[s]; + blit->saved.num_samplers[s] = nv50->num_samplers[s]; + } + blit->saved.texture = nv50->textures[2][0]; + blit->saved.sampler = nv50->samplers[2][0]; + + nv50->samplers[2][0] = &blit->sampler[blit->filter]; + + nv50->num_samplers[0] = nv50->num_samplers[1] = 0; + nv50->num_samplers[2] = 1; + + blit->saved.dirty = nv50->dirty; + + nv50->dirty = + NV50_NEW_FRAMEBUFFER | + NV50_NEW_VERTPROG | NV50_NEW_FRAGPROG | NV50_NEW_GMTYPROG | + NV50_NEW_TEXTURES | NV50_NEW_SAMPLERS; +} + +static void +nv50_blitctx_post_blit(struct nv50_context *nv50, struct nv50_blitctx *blit) +{ + int s; + + pipe_surface_reference(&nv50->framebuffer.cbufs[0], NULL); + + nv50->framebuffer.width = blit->saved.fb.width; + nv50->framebuffer.height = blit->saved.fb.height; + nv50->framebuffer.nr_cbufs = blit->saved.fb.nr_cbufs; + nv50->framebuffer.cbufs[0] = blit->saved.fb.cbufs[0]; + nv50->framebuffer.zsbuf = blit->saved.fb.zsbuf; + + nv50->vertprog = blit->saved.vp; + nv50->gmtyprog = blit->saved.gp; + nv50->fragprog = blit->saved.fp; + + nv50->clip.nr = blit->saved.clip_nr; + + pipe_sampler_view_reference(&nv50->textures[2][0], NULL); + + for (s = 0; s < 3; ++s) { + nv50->num_textures[s] = blit->saved.num_textures[s]; + nv50->num_samplers[s] = blit->saved.num_samplers[s]; + } + nv50->textures[2][0] = blit->saved.texture; + nv50->samplers[2][0] = blit->saved.sampler; + + nv50->dirty = blit->saved.dirty | + (NV50_NEW_FRAMEBUFFER | NV50_NEW_SCISSOR | NV50_NEW_SAMPLE_MASK | + NV50_NEW_RASTERIZER | NV50_NEW_ZSA | NV50_NEW_BLEND | + NV50_NEW_TEXTURES | NV50_NEW_SAMPLERS | + NV50_NEW_VERTPROG | NV50_NEW_GMTYPROG | NV50_NEW_FRAGPROG); +} + +static void +nv50_resource_resolve(struct pipe_context *pipe, + const struct pipe_resolve_info *info) +{ + struct nv50_context *nv50 = nv50_context(pipe); + struct nv50_screen *screen = nv50->screen; + struct nv50_blitctx *blit = screen->blitctx; + struct nouveau_channel *chan = screen->base.channel; + struct pipe_resource *src = info->src.res; + struct pipe_resource *dst = info->dst.res; + float x0, x1, y0, y1, z; + float x_range, y_range; + + nv50_blitctx_get_color_mask_and_fp(blit, dst->format, info->mask); + + blit->filter = util_format_is_depth_or_stencil(dst->format) ? 0 : 1; + + nv50_blitctx_pre_blit(blit, nv50); + + nv50_blit_set_dst(nv50, dst, info->dst.level, info->dst.layer); + nv50_blit_set_src(nv50, src, 0, info->src.layer); + + nv50_blitctx_prepare_state(blit); + + nv50_state_validate(nv50, 36); + + x_range = + (float)(info->src.x1 - info->src.x0) / + (float)(info->dst.x1 - info->dst.x0); + y_range = + (float)(info->src.y1 - info->src.y0) / + (float)(info->dst.y1 - info->dst.y0); + + x0 = (float)info->src.x0 - x_range * (float)info->dst.x0; + y0 = (float)info->src.y0 - y_range * (float)info->dst.y0; + + x1 = x0 + 16384.0f * x_range; + y1 = y0 + 16384.0f * y_range; + + x0 *= (float)(1 << nv50_miptree(src)->ms_x); + x1 *= (float)(1 << nv50_miptree(src)->ms_x); + y0 *= (float)(1 << nv50_miptree(src)->ms_y); + y1 *= (float)(1 << nv50_miptree(src)->ms_y); + + z = (float)info->src.layer; + + BEGIN_RING(chan, RING_3D(FP_START_ID), 1); + OUT_RING (chan, + blit->fp.code_base + blit->fp_offset); + + BEGIN_RING(chan, RING_3D(VIEWPORT_TRANSFORM_EN), 1); + OUT_RING (chan, 0); + + /* Draw a large triangle in screen coordinates covering the whole + * render target, with scissors defining the destination region. + * The vertex is supplied with non-normalized texture coordinates + * arranged in a way to yield the desired offset and scale. + */ + + BEGIN_RING(chan, RING_3D(SCISSOR_HORIZ(0)), 2); + OUT_RING (chan, (info->dst.x1 << 16) | info->dst.x0); + OUT_RING (chan, (info->dst.y1 << 16) | info->dst.y0); + + BEGIN_RING(chan, RING_3D(VERTEX_BEGIN_GL), 1); + OUT_RING (chan, NV50_3D_VERTEX_BEGIN_GL_PRIMITIVE_TRIANGLES); + BEGIN_RING(chan, RING_3D(VTX_ATTR_3F_X(1)), 3); + OUT_RINGf (chan, x0); + OUT_RINGf (chan, y0); + OUT_RINGf (chan, z); + BEGIN_RING(chan, RING_3D(VTX_ATTR_2F_X(0)), 2); + OUT_RINGf (chan, 0.0f); + OUT_RINGf (chan, 0.0f); + BEGIN_RING(chan, RING_3D(VTX_ATTR_3F_X(1)), 3); + OUT_RINGf (chan, x1); + OUT_RINGf (chan, y0); + OUT_RINGf (chan, z); + BEGIN_RING(chan, RING_3D(VTX_ATTR_2F_X(0)), 2); + OUT_RINGf (chan, 16384 << nv50_miptree(dst)->ms_x); + OUT_RINGf (chan, 0.0f); + BEGIN_RING(chan, RING_3D(VTX_ATTR_3F_X(1)), 3); + OUT_RINGf (chan, x0); + OUT_RINGf (chan, y1); + OUT_RINGf (chan, z); + BEGIN_RING(chan, RING_3D(VTX_ATTR_2F_X(0)), 2); + OUT_RINGf (chan, 0.0f); + OUT_RINGf (chan, 16384 << nv50_miptree(dst)->ms_y); + BEGIN_RING(chan, RING_3D(VERTEX_END_GL), 1); + OUT_RING (chan, 0); + + /* re-enable normally constant state */ + + BEGIN_RING(chan, RING_3D(VIEWPORT_TRANSFORM_EN), 1); + OUT_RING (chan, 1); + + nv50_blitctx_post_blit(nv50, blit); +} + +boolean +nv50_blitctx_create(struct nv50_screen *screen) +{ + screen->blitctx = CALLOC_STRUCT(nv50_blitctx); + if (!screen->blitctx) { + NOUVEAU_ERR("failed to allocate blit context\n"); + return FALSE; + } + + screen->blitctx->screen = screen; + + nv50_blitctx_make_vp(screen->blitctx); + nv50_blitctx_make_fp(screen->blitctx); + + nv50_blitctx_make_sampler(screen->blitctx); + + screen->blitctx->color_mask = 0x1111; + + return TRUE; +} + void nv50_init_surface_functions(struct nv50_context *nv50) { struct pipe_context *pipe = &nv50->base.pipe; pipe->resource_copy_region = nv50_resource_copy_region; + pipe->resource_resolve = nv50_resource_resolve; pipe->clear_render_target = nv50_clear_render_target; pipe->clear_depth_stencil = nv50_clear_depth_stencil; } diff --git a/src/gallium/drivers/nv50/nv50_tex.c b/src/gallium/drivers/nv50/nv50_tex.c index 9192d2e2590..73db9ca4fd1 100644 --- a/src/gallium/drivers/nv50/nv50_tex.c +++ b/src/gallium/drivers/nv50/nv50_tex.c @@ -159,13 +159,13 @@ nv50_create_sampler_view(struct pipe_context *pipe, else tic[3] = 0x00300000; - tic[4] = (1 << 31) | mt->base.base.width0; + tic[4] = (1 << 31) | (mt->base.base.width0 << mt->ms_x); - tic[5] = mt->base.base.height0 & 0xffff; + tic[5] = (mt->base.base.height0 << mt->ms_y) & 0xffff; tic[5] |= depth << 16; tic[5] |= mt->base.base.last_level << 28; - tic[6] = 0x03000000; + tic[6] = (mt->ms_x > 1) ? 0x88000000 : 0x03000000; /* sampling points */ tic[7] = (view->pipe.u.tex.last_level << 4) | view->pipe.u.tex.first_level; diff --git a/src/gallium/drivers/nv50/nv50_texture.xml.h b/src/gallium/drivers/nv50/nv50_texture.xml.h index e0cbbdf0d7b..08f6efdd7bf 100644 --- a/src/gallium/drivers/nv50/nv50_texture.xml.h +++ b/src/gallium/drivers/nv50/nv50_texture.xml.h @@ -1,5 +1,5 @@ -#ifndef NV50_TEXTURE_XML -#define NV50_TEXTURE_XML +#ifndef RNNDB_NV50_TEXTURE_XML +#define RNNDB_NV50_TEXTURE_XML /* Autogenerated file, DO NOT EDIT manually! @@ -8,8 +8,10 @@ http://0x04.net/cgit/index.cgi/rules-ng-ng git clone git://0x04.net/rules-ng-ng The rules-ng-ng source files this header was generated from are: -- nv50_texture.xml ( 8377 bytes, from 2011-02-12 12:05:21) -- copyright.xml ( 6452 bytes, from 2010-11-25 23:28:20) +- rnndb/nv50_texture.xml ( 7947 bytes, from 2011-07-09 13:43:58) +- ./rnndb/copyright.xml ( 6452 bytes, from 2011-07-09 13:43:58) +- ./rnndb/nvchipsets.xml ( 3617 bytes, from 2011-07-09 13:43:58) +- ./rnndb/nv50_defs.xml ( 5468 bytes, from 2011-07-09 13:43:58) Copyright (C) 2006-2011 by the following authors: - Artur Huillet <[email protected]> (ahuillet) @@ -115,52 +117,52 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #define NV50_TIC_0_FMT_32_32_32_32 0x00000001 #define NV50_TIC_0_FMT_16_16_16_16 0x00000003 #define NV50_TIC_0_FMT_32_32 0x00000004 -#define NV50_TIC_0_FMT_32_8 0x00000005 +#define NV50_TIC_0_FMT_32_8_X24 0x00000005 #define NV50_TIC_0_FMT_8_8_8_8 0x00000008 -#define NV50_TIC_0_FMT_2_10_10_10 0x00000009 +#define NV50_TIC_0_FMT_10_10_10_2 0x00000009 #define NV50_TIC_0_FMT_16_16 0x0000000c -#define NV50_TIC_0_FMT_8_24 0x0000000d -#define NV50_TIC_0_FMT_24_8 0x0000000e +#define NV50_TIC_0_FMT_24_8 0x0000000d +#define NV50_TIC_0_FMT_8_24 0x0000000e #define NV50_TIC_0_FMT_32 0x0000000f #define NV50_TIC_0_FMT_BPTC_FLOAT 0x00000010 #define NV50_TIC_0_FMT_BPTC_UFLOAT 0x00000011 #define NV50_TIC_0_FMT_4_4_4_4 0x00000012 -#define NV50_TIC_0_FMT_5_5_5_1 0x00000013 -#define NV50_TIC_0_FMT_1_5_5_5 0x00000014 +#define NV50_TIC_0_FMT_1_5_5_5 0x00000013 +#define NV50_TIC_0_FMT_5_5_5_1 0x00000014 #define NV50_TIC_0_FMT_5_6_5 0x00000015 -#define NV50_TIC_0_FMT_6_5_5 0x00000016 +#define NV50_TIC_0_FMT_5_5_6 0x00000016 #define NV50_TIC_0_FMT_BPTC 0x00000017 #define NV50_TIC_0_FMT_8_8 0x00000018 #define NV50_TIC_0_FMT_16 0x0000001b #define NV50_TIC_0_FMT_8 0x0000001d #define NV50_TIC_0_FMT_4_4 0x0000001e -#define NV50_TIC_0_FMT_BITMAP_8X8 0x0000001f -#define NV50_TIC_0_FMT_E5_9_9_9 0x00000020 -#define NV50_TIC_0_FMT_10_11_11 0x00000021 -#define NV50_TIC_0_FMT_C1_C2_C1_C0 0x00000022 -#define NV50_TIC_0_FMT_C2_C1_C0_C1 0x00000023 +#define NV50_TIC_0_FMT_BITMAP 0x0000001f +#define NV50_TIC_0_FMT_9_9_9_E5 0x00000020 +#define NV50_TIC_0_FMT_11_11_10 0x00000021 +#define NV50_TIC_0_FMT_U8_YA8_V8_YB8 0x00000022 +#define NV50_TIC_0_FMT_YA8_U8_YB8_V8 0x00000023 #define NV50_TIC_0_FMT_DXT1 0x00000024 #define NV50_TIC_0_FMT_DXT3 0x00000025 #define NV50_TIC_0_FMT_DXT5 0x00000026 #define NV50_TIC_0_FMT_RGTC1 0x00000027 #define NV50_TIC_0_FMT_RGTC2 0x00000028 -#define NV50_TIC_0_FMT_Z24S8 0x00000029 -#define NV50_TIC_0_FMT_S8Z24 0x0000002a -#define NV50_TIC_0_FMT_X8Z24 0x0000002b -#define NV50_TIC_0_FMT_C8Z24_MS4_CS4 0x0000002c -#define NV50_TIC_0_FMT_C8Z24_MS8_CS8 0x0000002d -#define NV50_TIC_0_FMT_C8Z24_MS4_CS12 0x0000002e +#define NV50_TIC_0_FMT_S8_Z24 0x00000029 +#define NV50_TIC_0_FMT_Z24_S8 0x0000002a +#define NV50_TIC_0_FMT_Z24_X8 0x0000002b +#define NV50_TIC_0_FMT_Z24_C8_MS4_CS4 0x0000002c +#define NV50_TIC_0_FMT_Z24_C8_MS8_CS8 0x0000002d +#define NV50_TIC_0_FMT_Z24_C8_MS4_CS12 0x0000002e #define NV50_TIC_0_FMT_Z32 0x0000002f -#define NV50_TIC_0_FMT_X24S8Z32 0x00000030 -#define NV50_TIC_0_FMT_X16C8S8X8Z24_MS4_CS4 0x00000031 -#define NV50_TIC_0_FMT_X16C8S8X8Z24_MS8_CS8 0x00000032 -#define NV50_TIC_0_FMT_X16C8X8Z32_MS4_CS4 0x00000033 -#define NV50_TIC_0_FMT_X16C8X8Z32_MS8_CS8 0x00000034 -#define NV50_TIC_0_FMT_X16C8S8Z32_MS4_CS4 0x00000035 -#define NV50_TIC_0_FMT_X16C8S8Z32_MS8_CS8 0x00000036 -#define NV50_TIC_0_FMT_X16C8S8X8Z24_MS4_CS12 0x00000037 -#define NV50_TIC_0_FMT_X16C8X8Z32_MS4_CS12 0x00000038 -#define NV50_TIC_0_FMT_X16C8S8Z32_MS4_CS12 0x00000039 +#define NV50_TIC_0_FMT_Z32_S8_X24 0x00000030 +#define NV50_TIC_0_FMT_Z24_X8_S8_C8_X16_MS4_CS4 0x00000031 +#define NV50_TIC_0_FMT_Z24_X8_S8_C8_X16_MS8_CS8 0x00000032 +#define NV50_TIC_0_FMT_Z32_X8_C8_X16_MS4_CS4 0x00000033 +#define NV50_TIC_0_FMT_Z32_X8_C8_X16_MS8_CS8 0x00000034 +#define NV50_TIC_0_FMT_Z32_S8_C8_X16_MS4_CS4 0x00000035 +#define NV50_TIC_0_FMT_Z32_S8_C8_X16_MS8_CS8 0x00000036 +#define NV50_TIC_0_FMT_Z24_X8_S8_C8_X16_MS4_CS12 0x00000037 +#define NV50_TIC_0_FMT_Z32_X8_C8_X16_MS4_CS12 0x00000038 +#define NV50_TIC_0_FMT_Z32_S8_C8_X16_MS4_CS12 0x00000039 #define NV50_TIC_0_FMT_Z16 0x0000003a #define NV50_TIC_1 0x00000004 @@ -215,6 +217,19 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #define NV50_TIC_7_BASE_LEVEL__SHIFT 0 #define NV50_TIC_7_MAX_LEVEL__MASK 0x000000f0 #define NV50_TIC_7_MAX_LEVEL__SHIFT 4 +#define NV50_TIC_7_MS_MODE__MASK 0x0000f000 +#define NV50_TIC_7_MS_MODE__SHIFT 12 +#define NV50_TIC_7_MS_MODE_MS1 0x00000000 +#define NV50_TIC_7_MS_MODE_MS2 0x00001000 +#define NV50_TIC_7_MS_MODE_MS4 0x00002000 +#define NV50_TIC_7_MS_MODE_MS8 0x00003000 +#define NVA3_TIC_7_MS_MODE_MS8_ALT 0x00004000 +#define NVA3_TIC_7_MS_MODE_MS2_ALT 0x00005000 +#define NVC0_TIC_7_MS_MODE_UNK6 0x00006000 +#define NV50_TIC_7_MS_MODE_MS4_CS4 0x00008000 +#define NV50_TIC_7_MS_MODE_MS4_CS12 0x00009000 +#define NV50_TIC_7_MS_MODE_MS8_CS8 0x0000a000 +#define NVC0_TIC_7_MS_MODE_MS8_CS24 0x0000b000 #define NV50_TSC__SIZE 0x00000020 #define NV50_TSC_0 0x00000000 @@ -276,4 +291,4 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #define NV50_TSC_7_BORDER_COLOR_ALPHA__SHIFT 0 -#endif /* NV50_TEXTURE_XML */ +#endif /* RNNDB_NV50_TEXTURE_XML */ diff --git a/src/gallium/drivers/nv50/nv50_transfer.c b/src/gallium/drivers/nv50/nv50_transfer.c index d9fb22aa673..0ff79eb908a 100644 --- a/src/gallium/drivers/nv50/nv50_transfer.c +++ b/src/gallium/drivers/nv50/nv50_transfer.c @@ -2,7 +2,6 @@ #include "util/u_format.h" #include "nv50_context.h" -#include "nv50_transfer.h" #include "nv50_defs.xml.h" @@ -13,7 +12,44 @@ struct nv50_transfer { uint32_t nblocksy; }; -static void +void +nv50_m2mf_rect_setup(struct nv50_m2mf_rect *rect, + struct pipe_resource *restrict res, unsigned l, + unsigned x, unsigned y, unsigned z) +{ + struct nv50_miptree *mt = nv50_miptree(res); + const unsigned w = u_minify(res->width0, l); + const unsigned h = u_minify(res->height0, l); + + rect->bo = mt->base.bo; + rect->domain = mt->base.domain; + rect->base = mt->level[l].offset; + rect->pitch = mt->level[l].pitch; + if (util_format_is_plain(res->format)) { + rect->width = w << mt->ms_x; + rect->height = h << mt->ms_y; + rect->x = x << mt->ms_x; + rect->y = y << mt->ms_y; + } else { + rect->width = util_format_get_nblocksx(res->format, w); + rect->height = util_format_get_nblocksy(res->format, h); + rect->x = util_format_get_nblocksx(res->format, x); + rect->y = util_format_get_nblocksy(res->format, y); + } + rect->tile_mode = mt->level[l].tile_mode; + rect->cpp = util_format_get_blocksize(res->format); + + if (mt->layout_3d) { + rect->z = z; + rect->depth = u_minify(res->depth0, l); + } else { + rect->base += z * mt->layer_stride; + rect->z = 0; + rect->depth = 1; + } +} + +void nv50_m2mf_transfer_rect(struct pipe_screen *pscreen, const struct nv50_m2mf_rect *dst, const struct nv50_m2mf_rect *src, @@ -202,26 +238,14 @@ nv50_miptree_transfer_new(struct pipe_context *pctx, struct nv50_context *nv50 = nv50_context(pctx); struct pipe_screen *pscreen = pctx->screen; struct nouveau_device *dev = nv50->screen->base.device; - struct nv50_miptree *mt = nv50_miptree(res); - struct nv50_miptree_level *lvl = &mt->level[level]; + const struct nv50_miptree *mt = nv50_miptree(res); struct nv50_transfer *tx; uint32_t size; - uint32_t w, h, d, z, layer; int ret; if (usage & PIPE_TRANSFER_MAP_DIRECTLY) return NULL; - if (mt->layout_3d) { - z = box->z; - d = u_minify(res->depth0, level); - layer = 0; - } else { - z = 0; - d = 1; - layer = box->z; - } - tx = CALLOC_STRUCT(nv50_transfer); if (!tx) return NULL; @@ -232,28 +256,18 @@ nv50_miptree_transfer_new(struct pipe_context *pctx, tx->base.usage = usage; tx->base.box = *box; - tx->nblocksx = util_format_get_nblocksx(res->format, box->width); - tx->nblocksy = util_format_get_nblocksy(res->format, box->height); + if (util_format_is_plain(res->format)) { + tx->nblocksx = box->width << mt->ms_x; + tx->nblocksy = box->height << mt->ms_x; + } else { + tx->nblocksx = util_format_get_nblocksx(res->format, box->width); + tx->nblocksy = util_format_get_nblocksy(res->format, box->height); + } tx->base.stride = tx->nblocksx * util_format_get_blocksize(res->format); tx->base.layer_stride = tx->nblocksy * tx->base.stride; - w = u_minify(res->width0, level); - h = u_minify(res->height0, level); - - tx->rect[0].cpp = tx->rect[1].cpp = util_format_get_blocksize(res->format); - - tx->rect[0].bo = mt->base.bo; - tx->rect[0].base = lvl->offset + layer * mt->layer_stride; - tx->rect[0].tile_mode = lvl->tile_mode; - tx->rect[0].x = util_format_get_nblocksx(res->format, box->x); - tx->rect[0].y = util_format_get_nblocksy(res->format, box->y); - tx->rect[0].z = z; - tx->rect[0].width = util_format_get_nblocksx(res->format, w); - tx->rect[0].height = util_format_get_nblocksy(res->format, h); - tx->rect[0].depth = d; - tx->rect[0].pitch = lvl->pitch; - tx->rect[0].domain = NOUVEAU_BO_VRAM; + nv50_m2mf_rect_setup(&tx->rect[0], res, level, box->x, box->y, box->z); size = tx->base.layer_stride; @@ -264,6 +278,7 @@ nv50_miptree_transfer_new(struct pipe_context *pctx, return NULL; } + tx->rect[1].cpp = tx->rect[0].cpp; tx->rect[1].width = tx->nblocksx; tx->rect[1].height = tx->nblocksy; tx->rect[1].depth = 1; @@ -272,6 +287,7 @@ nv50_miptree_transfer_new(struct pipe_context *pctx, if (usage & PIPE_TRANSFER_READ) { unsigned base = tx->rect[0].base; + unsigned z = tx->rect[0].z; unsigned i; for (i = 0; i < box->depth; ++i) { nv50_m2mf_transfer_rect(pscreen, &tx->rect[1], &tx->rect[0], diff --git a/src/gallium/drivers/nv50/nv50_transfer.h b/src/gallium/drivers/nv50/nv50_transfer.h index d3259ef4a5d..c58cb0008df 100644 --- a/src/gallium/drivers/nv50/nv50_transfer.h +++ b/src/gallium/drivers/nv50/nv50_transfer.h @@ -4,22 +4,6 @@ #include "pipe/p_state.h" -struct pipe_transfer * -nv50_miptree_transfer_new(struct pipe_context *pcontext, - struct pipe_resource *pt, - unsigned level, - unsigned usage, - const struct pipe_box *box); -void -nv50_miptree_transfer_del(struct pipe_context *pcontext, - struct pipe_transfer *ptx); -void * -nv50_miptree_transfer_map(struct pipe_context *pcontext, - struct pipe_transfer *ptx); -void -nv50_miptree_transfer_unmap(struct pipe_context *pcontext, - struct pipe_transfer *ptx); - struct nv50_m2mf_rect { struct nouveau_bo *bo; uint32_t base; @@ -35,4 +19,9 @@ struct nv50_m2mf_rect { uint16_t cpp; }; +void +nv50_m2mf_rect_setup(struct nv50_m2mf_rect *rect, + struct pipe_resource *restrict res, unsigned l, + unsigned x, unsigned y, unsigned z); + #endif diff --git a/src/gallium/drivers/nv50/nv50_vbo.c b/src/gallium/drivers/nv50/nv50_vbo.c index abdb9ce2f93..1c8347a793a 100644 --- a/src/gallium/drivers/nv50/nv50_vbo.c +++ b/src/gallium/drivers/nv50/nv50_vbo.c @@ -389,11 +389,11 @@ nv50_prim_gl(unsigned prim) static void nv50_draw_vbo_flush_notify(struct nouveau_channel *chan) { - struct nv50_context *nv50 = chan->user_private; + struct nv50_screen *screen = chan->user_private; - nouveau_fence_update(&nv50->screen->base, TRUE); + nouveau_fence_update(&screen->base, TRUE); - nv50_bufctx_emit_relocs(nv50); + nv50_bufctx_emit_relocs(screen->cur_ctx); } static void @@ -404,9 +404,6 @@ nv50_draw_arrays(struct nv50_context *nv50, struct nouveau_channel *chan = nv50->screen->base.channel; unsigned prim; - chan->flush_notify = nv50_draw_vbo_flush_notify; - chan->user_private = nv50; - prim = nv50_prim_gl(mode); while (instance_count--) { @@ -420,8 +417,6 @@ nv50_draw_arrays(struct nv50_context *nv50, prim |= NV50_3D_VERTEX_BEGIN_GL_INSTANCE_NEXT; } - - chan->flush_notify = nv50_default_flush_notify; } static void @@ -523,9 +518,6 @@ nv50_draw_elements(struct nv50_context *nv50, boolean shorten, unsigned prim; const unsigned index_size = nv50->idxbuf.index_size; - chan->flush_notify = nv50_draw_vbo_flush_notify; - chan->user_private = nv50; - prim = nv50_prim_gl(mode); if (index_bias != nv50->state.index_bias) { @@ -631,8 +623,6 @@ nv50_draw_elements(struct nv50_context *nv50, boolean shorten, prim |= NV50_3D_VERTEX_BEGIN_GL_INSTANCE_NEXT; } } - - chan->flush_notify = nv50_default_flush_notify; } void @@ -657,10 +647,13 @@ nv50_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info) if (nv50->vbo_user && !(nv50->dirty & (NV50_NEW_VERTEX | NV50_NEW_ARRAYS))) nv50_update_user_vbufs(nv50); - nv50_state_validate(nv50); + nv50_state_validate(nv50, 8); /* 8 as minimum, we use flush_notify here */ + + chan->flush_notify = nv50_draw_vbo_flush_notify; if (nv50->vbo_fifo) { nv50_push_vbo(nv50, info); + chan->flush_notify = nv50_default_flush_notify; return; } @@ -712,6 +705,7 @@ nv50_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info) info->mode, info->start, info->count, info->instance_count, info->index_bias); } + chan->flush_notify = nv50_default_flush_notify; nv50_release_user_vbufs(nv50); } diff --git a/src/gallium/drivers/nvc0/nvc0_3d.xml.h b/src/gallium/drivers/nvc0/nvc0_3d.xml.h index 2ca0bc23836..63016372827 100644 --- a/src/gallium/drivers/nvc0/nvc0_3d.xml.h +++ b/src/gallium/drivers/nvc0/nvc0_3d.xml.h @@ -146,6 +146,11 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #define NVC0_3D_TFB_ENABLE 0x00000744 +#define NVC0_3D_SAMPLE_SHADING 0x00000754 +#define NVC0_3D_SAMPLE_SHADING_MIN_SAMPLES__MASK 0x0000000f +#define NVC0_3D_SAMPLE_SHADING_MIN_SAMPLES__SHIFT 0 +#define NVC0_3D_SAMPLE_SHADING_ENABLE 0x00000010 + #define NVC0_3D_LOCAL_BASE 0x0000077c #define NVC0_3D_LOCAL_ADDRESS_HIGH 0x00000790 @@ -419,7 +424,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #define NVC0_3D_VERTEX_ATTRIB_FORMAT(i0) (0x00001160 + 0x4*(i0)) #define NVC0_3D_VERTEX_ATTRIB_FORMAT__ESIZE 0x00000004 #define NVC0_3D_VERTEX_ATTRIB_FORMAT__LEN 0x00000020 -#define NVC0_3D_VERTEX_ATTRIB_FORMAT_BUFFER__MASK 0x0000003f +#define NVC0_3D_VERTEX_ATTRIB_FORMAT_BUFFER__MASK 0x0000001f #define NVC0_3D_VERTEX_ATTRIB_FORMAT_BUFFER__SHIFT 0 #define NVC0_3D_VERTEX_ATTRIB_FORMAT_CONST 0x00000040 #define NVC0_3D_VERTEX_ATTRIB_FORMAT_OFFSET__MASK 0x001fff80 @@ -438,8 +443,8 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #define NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE_8_8 0x03000000 #define NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE_16 0x03600000 #define NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE_8 0x03a00000 -#define NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE_2_10_10_10 0x06000000 -#define NVC0_3D_VERTEX_ATTRIB_FORMAT_TYPE__MASK 0x78000000 +#define NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE_10_10_10_2 0x06000000 +#define NVC0_3D_VERTEX_ATTRIB_FORMAT_TYPE__MASK 0x38000000 #define NVC0_3D_VERTEX_ATTRIB_FORMAT_TYPE__SHIFT 27 #define NVC0_3D_VERTEX_ATTRIB_FORMAT_TYPE_SNORM 0x08000000 #define NVC0_3D_VERTEX_ATTRIB_FORMAT_TYPE_UNORM 0x10000000 @@ -819,13 +824,17 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #define NVC0_3D_LAYER_USE_GP 0x00010000 #define NVC0_3D_MULTISAMPLE_MODE 0x000015d0 -#define NVC0_3D_MULTISAMPLE_MODE_1X 0x00000000 -#define NVC0_3D_MULTISAMPLE_MODE_2XMS 0x00000001 -#define NVC0_3D_MULTISAMPLE_MODE_4XMS 0x00000002 -#define NVC0_3D_MULTISAMPLE_MODE_8XMS 0x00000003 -#define NVC0_3D_MULTISAMPLE_MODE_4XMS_4XCS 0x00000008 -#define NVC0_3D_MULTISAMPLE_MODE_4XMS_12XCS 0x00000009 -#define NVC0_3D_MULTISAMPLE_MODE_8XMS_8XCS 0x0000000a +#define NVC0_3D_MULTISAMPLE_MODE_MS1 0x00000000 +#define NVC0_3D_MULTISAMPLE_MODE_MS2 0x00000001 +#define NVC0_3D_MULTISAMPLE_MODE_MS4 0x00000002 +#define NVC0_3D_MULTISAMPLE_MODE_MS8 0x00000003 +#define NVC0_3D_MULTISAMPLE_MODE_MS8_ALT 0x00000004 +#define NVC0_3D_MULTISAMPLE_MODE_MS2_ALT 0x00000005 +#define NVC0_3D_MULTISAMPLE_MODE_UNK6 0x00000006 +#define NVC0_3D_MULTISAMPLE_MODE_MS4_CS4 0x00000008 +#define NVC0_3D_MULTISAMPLE_MODE_MS4_CS12 0x00000009 +#define NVC0_3D_MULTISAMPLE_MODE_MS8_CS8 0x0000000a +#define NVC0_3D_MULTISAMPLE_MODE_MS8_CS24 0x0000000b #define NVC0_3D_VERTEX_BEGIN_D3D 0x000015d4 #define NVC0_3D_VERTEX_BEGIN_D3D_PRIMITIVE__MASK 0x0fffffff diff --git a/src/gallium/drivers/nvc0/nvc0_context.c b/src/gallium/drivers/nvc0/nvc0_context.c index 2f2a3da7c44..360afbb943e 100644 --- a/src/gallium/drivers/nvc0/nvc0_context.c +++ b/src/gallium/drivers/nvc0/nvc0_context.c @@ -89,10 +89,8 @@ nvc0_destroy(struct pipe_context *pipe) draw_destroy(nvc0->draw); - if (nvc0->screen->cur_ctx == nvc0) { - nvc0->screen->base.channel->user_private = NULL; + if (nvc0->screen->cur_ctx == nvc0) nvc0->screen->cur_ctx = NULL; - } FREE(nvc0); } @@ -100,13 +98,13 @@ nvc0_destroy(struct pipe_context *pipe) void nvc0_default_flush_notify(struct nouveau_channel *chan) { - struct nvc0_context *nvc0 = chan->user_private; + struct nvc0_screen *screen = chan->user_private; - if (!nvc0) + if (!screen) return; - nouveau_fence_update(&nvc0->screen->base, TRUE); - nouveau_fence_next(&nvc0->screen->base); + nouveau_fence_update(&screen->base, TRUE); + nouveau_fence_next(&screen->base); } struct pipe_context * @@ -141,7 +139,6 @@ nvc0_create(struct pipe_screen *pscreen, void *priv) if (!screen->cur_ctx) screen->cur_ctx = nvc0; - screen->base.channel->user_private = nvc0; screen->base.channel->flush_notify = nvc0_default_flush_notify; nvc0_init_query_functions(nvc0); @@ -153,6 +150,8 @@ nvc0_create(struct pipe_screen *pscreen, void *priv) assert(nvc0->draw); draw_set_rasterize_stage(nvc0->draw, nvc0_draw_render_stage(nvc0)); + nouveau_context_init_vdec(&nvc0->base); + return pipe; } @@ -169,6 +168,7 @@ nvc0_bufctx_add_resident(struct nvc0_context *nvc0, int ctx, if (!resource->bo) return; + nvc0->residents_size += sizeof(struct resident); /* We don't need to reference the resource here, it will be referenced * in the context/state, and bufctx will be reset when state changes. @@ -190,6 +190,7 @@ nvc0_bufctx_del_resident(struct nvc0_context *nvc0, int ctx, top = util_dynarray_pop_ptr(&nvc0->residents[ctx], struct resident); if (rsd != top) *rsd = *top; + nvc0->residents_size -= sizeof(struct resident); break; } } @@ -202,11 +203,15 @@ nvc0_bufctx_emit_relocs(struct nvc0_context *nvc0) struct util_dynarray *array; unsigned ctx, i, n; + n = nvc0->residents_size / sizeof(struct resident); + n += NVC0_SCREEN_RESIDENT_BO_COUNT; + + MARK_RING(nvc0->screen->base.channel, n, n); + for (ctx = 0; ctx < NVC0_BUFCTX_COUNT; ++ctx) { array = &nvc0->residents[ctx]; n = array->size / sizeof(struct resident); - MARK_RING(nvc0->screen->base.channel, n, n); for (i = 0; i < n; ++i) { rsd = util_dynarray_element(array, struct resident, i); diff --git a/src/gallium/drivers/nvc0/nvc0_context.h b/src/gallium/drivers/nvc0/nvc0_context.h index f97141dd46e..bf891649a57 100644 --- a/src/gallium/drivers/nvc0/nvc0_context.h +++ b/src/gallium/drivers/nvc0/nvc0_context.h @@ -19,6 +19,8 @@ #include "nvc0_program.h" #include "nvc0_resource.h" +#include "nv50/nv50_transfer.h" + #include "nouveau/nouveau_context.h" #include "nvc0_3ddefs.xml.h" @@ -62,6 +64,7 @@ struct nvc0_context { struct nvc0_screen *screen; struct util_dynarray residents[NVC0_BUFCTX_COUNT]; + unsigned residents_size; uint32_t dirty; @@ -136,20 +139,6 @@ nvc0_context(struct pipe_context *pipe) return (struct nvc0_context *)pipe; } -struct nvc0_surface { - struct pipe_surface base; - uint32_t offset; - uint32_t width; - uint16_t height; - uint16_t depth; -}; - -static INLINE struct nvc0_surface * -nvc0_surface(struct pipe_surface *ps) -{ - return (struct nvc0_surface *)ps; -} - /* nvc0_context.c */ struct pipe_context *nvc0_create(struct pipe_screen *, void *); @@ -163,6 +152,7 @@ void nvc0_bufctx_del_resident(struct nvc0_context *, int ctx, static INLINE void nvc0_bufctx_reset(struct nvc0_context *nvc0, int ctx) { + nvc0->residents_size -= nvc0->residents[ctx].size; util_dynarray_resize(&nvc0->residents[ctx], 0); } @@ -207,6 +197,11 @@ nvc0_create_sampler_view(struct pipe_context *, /* nvc0_transfer.c */ void +nvc0_m2mf_transfer_rect(struct pipe_screen *pscreen, + const struct nv50_m2mf_rect *dst, + const struct nv50_m2mf_rect *src, + uint32_t nblocksx, uint32_t nblocksy); +void nvc0_m2mf_push_linear(struct nouveau_context *nv, struct nouveau_bo *dst, unsigned offset, unsigned domain, unsigned size, void *data); diff --git a/src/gallium/drivers/nvc0/nvc0_formats.c b/src/gallium/drivers/nvc0/nvc0_formats.c index 81077a7fa80..8dd4419436d 100644 --- a/src/gallium/drivers/nvc0/nvc0_formats.c +++ b/src/gallium/drivers/nvc0/nvc0_formats.c @@ -66,33 +66,33 @@ const struct nvc0_format nvc0_format_table[PIPE_FORMAT_COUNT] = { /* COMMON FORMATS */ - [PIPE_FORMAT_B8G8R8A8_UNORM] = { NV50_SURFACE_FORMAT_A8R8G8B8_UNORM, + [PIPE_FORMAT_B8G8R8A8_UNORM] = { NV50_SURFACE_FORMAT_BGRA8_UNORM, A_(C2, C1, C0, C3, UNORM, UNORM, UNORM, UNORM, 8_8_8_8, 1), VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET | SCANOUT }, - [PIPE_FORMAT_B8G8R8X8_UNORM] = { NV50_SURFACE_FORMAT_X8R8G8B8_UNORM, + [PIPE_FORMAT_B8G8R8X8_UNORM] = { NV50_SURFACE_FORMAT_BGRX8_UNORM, A_(C2, C1, C0, ONE_FLOAT, UNORM, UNORM, UNORM, UNORM, 8_8_8_8, 1), VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET | SCANOUT }, - [PIPE_FORMAT_B8G8R8A8_SRGB] = { NV50_SURFACE_FORMAT_A8R8G8B8_SRGB, + [PIPE_FORMAT_B8G8R8A8_SRGB] = { NV50_SURFACE_FORMAT_BGRA8_SRGB, A_(C2, C1, C0, C3, UNORM, UNORM, UNORM, UNORM, 8_8_8_8, 1), VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET }, - [PIPE_FORMAT_B8G8R8X8_SRGB] = { NV50_SURFACE_FORMAT_X8R8G8B8_SRGB, + [PIPE_FORMAT_B8G8R8X8_SRGB] = { NV50_SURFACE_FORMAT_BGRX8_SRGB, A_(C2, C1, C0, ONE_FLOAT, UNORM, UNORM, UNORM, UNORM, 8_8_8_8, 1), VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET }, - [PIPE_FORMAT_B5G6R5_UNORM] = { NV50_SURFACE_FORMAT_R5G6B5_UNORM, + [PIPE_FORMAT_B5G6R5_UNORM] = { NV50_SURFACE_FORMAT_B5G6R5_UNORM, B_(C2, C1, C0, ONE_FLOAT, UNORM, UNORM, UNORM, UNORM, 5_6_5, 1), SAMPLER_VIEW | RENDER_TARGET | SCANOUT }, - [PIPE_FORMAT_B5G5R5A1_UNORM] = { NV50_SURFACE_FORMAT_A1R5G5B5_UNORM, - B_(C2, C1, C0, C3, UNORM, UNORM, UNORM, UNORM, 1_5_5_5, 1), + [PIPE_FORMAT_B5G5R5A1_UNORM] = { NV50_SURFACE_FORMAT_BGR5_A1_UNORM, + B_(C2, C1, C0, C3, UNORM, UNORM, UNORM, UNORM, 5_5_5_1, 1), SAMPLER_VIEW | RENDER_TARGET | SCANOUT }, - [PIPE_FORMAT_B5G5R5X1_UNORM] = { 0, - B_(C2, C1, C0, ONE_FLOAT, UNORM, UNORM, UNORM, UNORM, 1_5_5_5, 1), - SAMPLER_VIEW | SCANOUT }, + [PIPE_FORMAT_B5G5R5X1_UNORM] = { NV50_SURFACE_FORMAT_BGR5_X1_UNORM, + B_(C2, C1, C0, ONE_FLOAT, UNORM, UNORM, UNORM, UNORM, 5_5_5_1, 1), + SAMPLER_VIEW | RENDER_TARGET | SCANOUT }, [PIPE_FORMAT_B4G4R4A4_UNORM] = { 0, B_(C2, C1, C0, C3, UNORM, UNORM, UNORM, UNORM, 4_4_4_4, 1), @@ -102,12 +102,12 @@ const struct nvc0_format nvc0_format_table[PIPE_FORMAT_COUNT] = B_(C2, C1, C0, ONE_FLOAT, UNORM, UNORM, UNORM, UNORM, 4_4_4_4, 1), SAMPLER_VIEW }, - [PIPE_FORMAT_R10G10B10A2_UNORM] = { NV50_SURFACE_FORMAT_A2B10G10R10_UNORM, - A_(C0, C1, C2, C3, UNORM, UNORM, UNORM, UNORM, 2_10_10_10, 0), + [PIPE_FORMAT_R10G10B10A2_UNORM] = { NV50_SURFACE_FORMAT_RGB10_A2_UNORM, + A_(C0, C1, C2, C3, UNORM, UNORM, UNORM, UNORM, 10_10_10_2, 0), SAMPLER_VIEW | RENDER_TARGET | VERTEX_BUFFER | SCANOUT }, - [PIPE_FORMAT_B10G10R10A2_UNORM] = { NV50_SURFACE_FORMAT_A2R10G10B10_UNORM, - A_(C2, C1, C0, C3, UNORM, UNORM, UNORM, UNORM, 2_10_10_10, 1), + [PIPE_FORMAT_B10G10R10A2_UNORM] = { NV50_SURFACE_FORMAT_BGR10_A2_UNORM, + A_(C2, C1, C0, C3, UNORM, UNORM, UNORM, UNORM, 10_10_10_2, 1), SAMPLER_VIEW | RENDER_TARGET | VERTEX_BUFFER }, /* DEPTH/STENCIL FORMATS */ @@ -116,25 +116,24 @@ const struct nvc0_format nvc0_format_table[PIPE_FORMAT_COUNT] = B_(C0, C0, C0, ONE_FLOAT, UNORM, UINT, UINT, UINT, Z16, 0), SAMPLER_VIEW | DEPTH_STENCIL }, - [PIPE_FORMAT_Z24_UNORM_S8_USCALED] = { NV50_ZETA_FORMAT_S8Z24_UNORM, - B_(C0, C0, C0, ONE_FLOAT, UNORM, UINT, UINT, UINT, S8Z24, 0), + [PIPE_FORMAT_Z24_UNORM_S8_USCALED] = { NV50_ZETA_FORMAT_Z24_S8_UNORM, + B_(C0, C0, C0, ONE_FLOAT, UNORM, UINT, UINT, UINT, Z24_S8, 0), SAMPLER_VIEW | DEPTH_STENCIL }, - [PIPE_FORMAT_Z24X8_UNORM] = { NV50_ZETA_FORMAT_X8Z24_UNORM, - B_(C0, C0, C0, ONE_FLOAT, UNORM, UINT, UINT, UINT, X8Z24, 0), + [PIPE_FORMAT_Z24X8_UNORM] = { NV50_ZETA_FORMAT_Z24_X8_UNORM, + B_(C0, C0, C0, ONE_FLOAT, UNORM, UINT, UINT, UINT, Z24_X8, 0), SAMPLER_VIEW | DEPTH_STENCIL }, - [PIPE_FORMAT_S8_USCALED_Z24_UNORM] = { NV50_ZETA_FORMAT_Z24S8_UNORM, - B_(C1, C1, C1, ONE_FLOAT, UINT, UNORM, UINT, UINT, Z24S8, 0), + [PIPE_FORMAT_S8_USCALED_Z24_UNORM] = { NV50_ZETA_FORMAT_S8_Z24_UNORM, + B_(C1, C1, C1, ONE_FLOAT, UINT, UNORM, UINT, UINT, S8_Z24, 0), SAMPLER_VIEW | DEPTH_STENCIL }, [PIPE_FORMAT_Z32_FLOAT] = { NV50_ZETA_FORMAT_Z32_FLOAT, B_(C0, C0, C0, ONE_FLOAT, FLOAT, UINT, UINT, UINT, Z32, 0), SAMPLER_VIEW | DEPTH_STENCIL }, - [PIPE_FORMAT_Z32_FLOAT_S8X24_USCALED] = { - NV50_ZETA_FORMAT_Z32_FLOAT_X24S8_UNORM, - B_(C0, C0, C0, ONE_FLOAT, FLOAT, UINT, UINT, UINT, X24S8Z32, 0), + [PIPE_FORMAT_Z32_FLOAT_S8X24_USCALED] = { NV50_ZETA_FORMAT_Z32_S8_X24_FLOAT, + B_(C0, C0, C0, ONE_FLOAT, FLOAT, UINT, UINT, UINT, Z32_S8_X24, 0), SAMPLER_VIEW | DEPTH_STENCIL }, /* LUMINANCE, ALPHA, INTENSITY */ @@ -283,15 +282,15 @@ const struct nvc0_format nvc0_format_table[PIPE_FORMAT_COUNT] = /* FLOAT 16 */ - [PIPE_FORMAT_R16G16B16A16_FLOAT] = { NV50_SURFACE_FORMAT_R16G16B16A16_FLOAT, + [PIPE_FORMAT_R16G16B16A16_FLOAT] = { NV50_SURFACE_FORMAT_RGBA16_FLOAT, A_(C0, C1, C2, C3, FLOAT, FLOAT, FLOAT, FLOAT, 16_16_16_16, 0), VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET }, - [PIPE_FORMAT_R16G16B16_FLOAT] = { NV50_SURFACE_FORMAT_R16G16B16X16_FLOAT, + [PIPE_FORMAT_R16G16B16_FLOAT] = { NV50_SURFACE_FORMAT_RGBX16_FLOAT, A_(C0, C1, C2, ONE_FLOAT, FLOAT, FLOAT, FLOAT, FLOAT, 16_16_16, 0), - VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET }, + VERTEX_BUFFER }, - [PIPE_FORMAT_R16G16_FLOAT] = { NV50_SURFACE_FORMAT_R16G16_FLOAT, + [PIPE_FORMAT_R16G16_FLOAT] = { NV50_SURFACE_FORMAT_RG16_FLOAT, A_(C0, C1, ZERO, ONE_FLOAT, FLOAT, FLOAT, FLOAT, FLOAT, 16_16, 0), VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET }, @@ -301,15 +300,15 @@ const struct nvc0_format nvc0_format_table[PIPE_FORMAT_COUNT] = /* FLOAT 32 */ - [PIPE_FORMAT_R32G32B32A32_FLOAT] = { NV50_SURFACE_FORMAT_R32G32B32A32_FLOAT, + [PIPE_FORMAT_R32G32B32A32_FLOAT] = { NV50_SURFACE_FORMAT_RGBA32_FLOAT, A_(C0, C1, C2, C3, FLOAT, FLOAT, FLOAT, FLOAT, 32_32_32_32, 0), VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET }, - [PIPE_FORMAT_R32G32B32_FLOAT] = { NV50_SURFACE_FORMAT_R32G32B32X32_FLOAT, + [PIPE_FORMAT_R32G32B32_FLOAT] = { NV50_SURFACE_FORMAT_RGBX32_FLOAT, A_(C0, C1, C2, ONE_FLOAT, FLOAT, FLOAT, FLOAT, FLOAT, 32_32_32, 0), - VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET }, + VERTEX_BUFFER }, - [PIPE_FORMAT_R32G32_FLOAT] = { NV50_SURFACE_FORMAT_R32G32_FLOAT, + [PIPE_FORMAT_R32G32_FLOAT] = { NV50_SURFACE_FORMAT_RG32_FLOAT, A_(C0, C1, ZERO, ONE_FLOAT, FLOAT, FLOAT, FLOAT, FLOAT, 32_32, 0), VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET }, @@ -319,12 +318,12 @@ const struct nvc0_format nvc0_format_table[PIPE_FORMAT_COUNT] = /* ODD FORMATS */ - [PIPE_FORMAT_R11G11B10_FLOAT] = { NV50_SURFACE_FORMAT_B10G11R11_FLOAT, - B_(C0, C1, C2, ONE_FLOAT, FLOAT, FLOAT, FLOAT, FLOAT, 10_11_11, 0), + [PIPE_FORMAT_R11G11B10_FLOAT] = { NV50_SURFACE_FORMAT_R11G11B10_FLOAT, + B_(C0, C1, C2, ONE_FLOAT, FLOAT, FLOAT, FLOAT, FLOAT, 11_11_10, 0), SAMPLER_VIEW | RENDER_TARGET | VERTEX_BUFFER }, [PIPE_FORMAT_R9G9B9E5_FLOAT] = { 0, - B_(C0, C1, C2, ONE_FLOAT, FLOAT, FLOAT, FLOAT, FLOAT, E5_9_9_9, 0), + B_(C0, C1, C2, ONE_FLOAT, FLOAT, FLOAT, FLOAT, FLOAT, 9_9_9_E5, 0), SAMPLER_VIEW }, /* SNORM 32 */ @@ -335,7 +334,7 @@ const struct nvc0_format nvc0_format_table[PIPE_FORMAT_COUNT] = [PIPE_FORMAT_R32G32B32_SNORM] = { 0, A_(C0, C1, C2, ONE_FLOAT, SNORM, SNORM, SNORM, SNORM, 32_32_32, 0), - VERTEX_BUFFER | SAMPLER_VIEW }, + VERTEX_BUFFER }, [PIPE_FORMAT_R32G32_SNORM] = { 0, A_(C0, C1, ZERO, ONE_FLOAT, SNORM, SNORM, SNORM, SNORM, 32_32, 0), @@ -353,7 +352,7 @@ const struct nvc0_format nvc0_format_table[PIPE_FORMAT_COUNT] = [PIPE_FORMAT_R32G32B32_UNORM] = { 0, A_(C0, C1, C2, ONE_FLOAT, UNORM, UNORM, UNORM, UNORM, 32_32_32, 0), - VERTEX_BUFFER | SAMPLER_VIEW }, + VERTEX_BUFFER }, [PIPE_FORMAT_R32G32_UNORM] = { 0, A_(C0, C1, ZERO, ONE_FLOAT, UNORM, UNORM, UNORM, UNORM, 32_32, 0), @@ -365,15 +364,15 @@ const struct nvc0_format nvc0_format_table[PIPE_FORMAT_COUNT] = /* SNORM 16 */ - [PIPE_FORMAT_R16G16B16A16_SNORM] = { NV50_SURFACE_FORMAT_R16G16B16A16_SNORM, + [PIPE_FORMAT_R16G16B16A16_SNORM] = { NV50_SURFACE_FORMAT_RGBA16_SNORM, A_(C0, C1, C2, C3, SNORM, SNORM, SNORM, SNORM, 16_16_16_16, 0), VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET }, [PIPE_FORMAT_R16G16B16_SNORM] = { 0, A_(C0, C1, C2, ONE_FLOAT, SNORM, SNORM, SNORM, SNORM, 16_16_16, 0), - VERTEX_BUFFER | SAMPLER_VIEW }, + VERTEX_BUFFER }, - [PIPE_FORMAT_R16G16_SNORM] = { NV50_SURFACE_FORMAT_R16G16_SNORM, + [PIPE_FORMAT_R16G16_SNORM] = { NV50_SURFACE_FORMAT_RG16_SNORM, A_(C0, C1, ZERO, ONE_FLOAT, SNORM, SNORM, SNORM, SNORM, 16_16, 0), VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET }, @@ -383,15 +382,15 @@ const struct nvc0_format nvc0_format_table[PIPE_FORMAT_COUNT] = /* UNORM 16 */ - [PIPE_FORMAT_R16G16B16A16_UNORM] = { NV50_SURFACE_FORMAT_R16G16B16A16_UNORM, + [PIPE_FORMAT_R16G16B16A16_UNORM] = { NV50_SURFACE_FORMAT_RGBA16_UNORM, A_(C0, C1, C2, C3, UNORM, UNORM, UNORM, UNORM, 16_16_16_16, 0), VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET }, [PIPE_FORMAT_R16G16B16_UNORM] = { 0, A_(C0, C1, C2, ONE_FLOAT, UNORM, UNORM, UNORM, UNORM, 16_16_16, 0), - VERTEX_BUFFER | SAMPLER_VIEW }, + VERTEX_BUFFER }, - [PIPE_FORMAT_R16G16_UNORM] = { NV50_SURFACE_FORMAT_R16G16_UNORM, + [PIPE_FORMAT_R16G16_UNORM] = { NV50_SURFACE_FORMAT_RG16_UNORM, A_(C0, C1, ZERO, ONE_FLOAT, UNORM, UNORM, UNORM, UNORM, 16_16, 0), VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET }, @@ -401,15 +400,15 @@ const struct nvc0_format nvc0_format_table[PIPE_FORMAT_COUNT] = /* SNORM 8 */ - [PIPE_FORMAT_R8G8B8A8_SNORM] = { NV50_SURFACE_FORMAT_A8B8G8R8_SNORM, + [PIPE_FORMAT_R8G8B8A8_SNORM] = { NV50_SURFACE_FORMAT_RGBA8_SNORM, A_(C0, C1, C2, C3, SNORM, SNORM, SNORM, SNORM, 8_8_8_8, 0), VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET }, [PIPE_FORMAT_R8G8B8_SNORM] = { 0, A_(C0, C1, C2, ONE_FLOAT, SNORM, SNORM, SNORM, SNORM, 8_8_8, 0), - VERTEX_BUFFER | SAMPLER_VIEW }, + VERTEX_BUFFER }, - [PIPE_FORMAT_R8G8_SNORM] = { NV50_SURFACE_FORMAT_R8G8_SNORM, + [PIPE_FORMAT_R8G8_SNORM] = { NV50_SURFACE_FORMAT_RG8_SNORM, A_(C0, C1, ZERO, ONE_FLOAT, SNORM, SNORM, SNORM, SNORM, 8_8, 0), VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET }, @@ -419,23 +418,23 @@ const struct nvc0_format nvc0_format_table[PIPE_FORMAT_COUNT] = /* UNORM 8 */ - [PIPE_FORMAT_R8G8B8A8_UNORM] = { NV50_SURFACE_FORMAT_A8B8G8R8_UNORM, + [PIPE_FORMAT_R8G8B8A8_UNORM] = { NV50_SURFACE_FORMAT_RGBA8_UNORM, A_(C0, C1, C2, C3, UNORM, UNORM, UNORM, UNORM, 8_8_8_8, 0), VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET }, - [PIPE_FORMAT_R8G8B8A8_SRGB] = { NV50_SURFACE_FORMAT_A8B8G8R8_SRGB, - A_(C0, C1, C2, C3, UNORM, UNORM, UNORM, UNORM, 8_8_8_8, 0), + [PIPE_FORMAT_R8G8B8A8_SRGB] = { NV50_SURFACE_FORMAT_RGBA8_SRGB, + B_(C0, C1, C2, C3, UNORM, UNORM, UNORM, UNORM, 8_8_8_8, 0), SAMPLER_VIEW | RENDER_TARGET }, - [PIPE_FORMAT_R8G8B8_UNORM] = { NV50_SURFACE_FORMAT_X8B8G8R8_UNORM, - A_(C0, C1, C2, ONE_FLOAT, UNORM, UNORM, UNORM, UNORM, 8_8_8, 0), - VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET }, + [PIPE_FORMAT_R8G8B8X8_UNORM] = { NV50_SURFACE_FORMAT_RGBX8_UNORM, + B_(C0, C1, C2, ONE_FLOAT, UNORM, UNORM, UNORM, UNORM, 8_8_8_8, 0), + SAMPLER_VIEW | RENDER_TARGET }, - [PIPE_FORMAT_R8G8B8_SRGB] = { NV50_SURFACE_FORMAT_X8B8G8R8_SRGB, + [PIPE_FORMAT_R8G8B8_UNORM] = { 0, A_(C0, C1, C2, ONE_FLOAT, UNORM, UNORM, UNORM, UNORM, 8_8_8, 0), - SAMPLER_VIEW | RENDER_TARGET }, + VERTEX_BUFFER }, - [PIPE_FORMAT_R8G8_UNORM] = { NV50_SURFACE_FORMAT_R8G8_UNORM, + [PIPE_FORMAT_R8G8_UNORM] = { NV50_SURFACE_FORMAT_RG8_UNORM, A_(C0, C1, ZERO, ONE_FLOAT, UNORM, UNORM, UNORM, UNORM, 8_8, 0), VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET }, @@ -554,11 +553,11 @@ const struct nvc0_format nvc0_format_table[PIPE_FORMAT_COUNT] = /* OTHER FORMATS */ [PIPE_FORMAT_R8G8_B8G8_UNORM] = { 0, - B_(C0, C1, C2, ONE_FLOAT, UNORM, UNORM, UNORM, UNORM, C1_C2_C1_C0, 0), + B_(C0, C1, C2, ONE_FLOAT, UNORM, UNORM, UNORM, UNORM, U8_YA8_V8_YB8, 0), SAMPLER_VIEW }, [PIPE_FORMAT_G8R8_G8B8_UNORM] = { 0, - B_(C0, C1, C2, ONE_FLOAT, UNORM, UNORM, UNORM, UNORM, C2_C1_C0_C1, 0), + B_(C0, C1, C2, ONE_FLOAT, UNORM, UNORM, UNORM, UNORM, YA8_U8_YB8_V8, 0), SAMPLER_VIEW }, [PIPE_FORMAT_R8SG8SB8UX8U_NORM] = { 0, @@ -566,11 +565,11 @@ const struct nvc0_format nvc0_format_table[PIPE_FORMAT_COUNT] = SAMPLER_VIEW }, [PIPE_FORMAT_R5SG5SB6U_NORM] = { 0, - B_(C0, C1, C2, ONE_FLOAT, SNORM, SNORM, UNORM, UNORM, 6_5_5, 0), + B_(C0, C1, C2, ONE_FLOAT, SNORM, SNORM, UNORM, UNORM, 5_5_6, 0), SAMPLER_VIEW }, [PIPE_FORMAT_R1_UNORM] = { 0, - B_(C0, ZERO, ZERO, ONE_FLOAT, UNORM, UNORM, UNORM, UNORM, BITMAP_8X8, 0), + B_(C0, ZERO, ZERO, ONE_FLOAT, UNORM, UNORM, UNORM, UNORM, BITMAP, 0), SAMPLER_VIEW }, [PIPE_FORMAT_A8B8G8R8_UNORM] = { 0, diff --git a/src/gallium/drivers/nvc0/nvc0_miptree.c b/src/gallium/drivers/nvc0/nvc0_miptree.c index bced3245524..a72ddf306cd 100644 --- a/src/gallium/drivers/nvc0/nvc0_miptree.c +++ b/src/gallium/drivers/nvc0/nvc0_miptree.c @@ -27,10 +27,9 @@ #include "nvc0_context.h" #include "nvc0_resource.h" -#include "nvc0_transfer.h" -static INLINE uint32_t -get_tile_dims(unsigned nx, unsigned ny, unsigned nz) +uint32_t +nvc0_tex_choose_tile_dims(unsigned nx, unsigned ny, unsigned nz) { uint32_t tile_mode = 0x000; @@ -57,66 +56,188 @@ get_tile_dims(unsigned nx, unsigned ny, unsigned nz) return tile_mode | 0x100; } -uint32_t -nvc0_miptree_zslice_offset(struct nvc0_miptree *mt, unsigned l, unsigned z) +static uint32_t +nvc0_mt_choose_storage_type(struct nv50_miptree *mt, boolean compressed) { - unsigned nblocksy; /* height of texture level aligned to tile height */ + const unsigned ms = util_logbase2(mt->base.base.nr_samples); - unsigned stride_2d; /* to next slice within a 3D tile */ - unsigned stride_3d; /* to slice in the next (in z direction !) 3D tile */ + uint32_t tile_flags; - unsigned tile_d_shift = NVC0_TILE_DIM_SHIFT(mt->level[l].tile_mode, 2); - unsigned tile_d = 1 << tile_d_shift; + compressed = FALSE; /* not yet supported */ - nblocksy = util_format_get_nblocksy(mt->base.base.format, - u_minify(mt->base.base.height0, l)); + if (mt->base.base.bind & PIPE_BIND_CURSOR) + return NOUVEAU_BO_TILE_SCANOUT; - nblocksy = align(nblocksy, NVC0_TILE_HEIGHT(mt->level[l].tile_mode)); + switch (mt->base.base.format) { + case PIPE_FORMAT_Z16_UNORM: + if (compressed) + tile_flags = 0x0200 + (ms << 8); + else + tile_flags = 0x0100; + break; + case PIPE_FORMAT_S8_USCALED_Z24_UNORM: + if (compressed) + tile_flags = 0x5100 + (ms << 8); + else + tile_flags = 0x4600; + break; + case PIPE_FORMAT_Z24X8_UNORM: + case PIPE_FORMAT_Z24_UNORM_S8_USCALED: + if (compressed) + tile_flags = 0x1700 + (ms << 8); + else + tile_flags = 0x1100; + break; + case PIPE_FORMAT_Z32_FLOAT: + if (compressed) + tile_flags = 0x8600 + (ms << 8); + else + tile_flags = 0x7b00; + break; + case PIPE_FORMAT_Z32_FLOAT_S8X24_USCALED: + if (compressed) + tile_flags = 0xce00 + (ms << 8); + else + tile_flags = 0xc300; + break; + default: + switch (util_format_get_blocksizebits(mt->base.base.format)) { + case 128: + if (compressed) + tile_flags = 0xf400 + (ms << 9); + else + tile_flags = 0xfe00; + break; + case 64: + if (compressed) { + switch (ms) { + case 0: tile_flags = 0xe600; break; + case 1: tile_flags = 0xeb00; break; + case 2: tile_flags = 0xed00; break; + case 3: tile_flags = 0xf200; break; + default: + return 0; + } + } else { + tile_flags = 0xfe00; + } + break; + case 32: + if (compressed) { + switch (ms) { + case 0: tile_flags = 0xdb00; break; + case 1: tile_flags = 0xdd00; break; + case 2: tile_flags = 0xdf00; break; + case 3: tile_flags = 0xe400; break; + default: + return 0; + } + } else { + tile_flags = 0xfe00; + } + break; + case 16: + case 8: + tile_flags = 0xfe00; + break; + default: + return 0; + } + break; + } - stride_2d = NVC0_TILE_SIZE_2D(mt->level[l].tile_mode); + if (mt->base.base.bind & PIPE_BIND_SCANOUT) + tile_flags |= NOUVEAU_BO_TILE_SCANOUT; - stride_3d = (nblocksy * mt->level[l].pitch) << tile_d_shift; + return tile_flags; +} - return (z & (tile_d - 1)) * stride_2d + (z >> tile_d_shift) * stride_3d; +static INLINE boolean +nvc0_miptree_init_ms_mode(struct nv50_miptree *mt) +{ + switch (mt->base.base.nr_samples) { + case 8: + mt->ms_mode = NVC0_3D_MULTISAMPLE_MODE_MS8; + mt->ms_x = 2; + mt->ms_y = 1; + break; + case 4: + mt->ms_mode = NVC0_3D_MULTISAMPLE_MODE_MS4; + mt->ms_x = 1; + mt->ms_y = 1; + break; + case 2: + mt->ms_mode = NVC0_3D_MULTISAMPLE_MODE_MS2; + mt->ms_x = 1; + break; + case 1: + case 0: + mt->ms_mode = NVC0_3D_MULTISAMPLE_MODE_MS1; + break; + default: + NOUVEAU_ERR("invalid nr_samples: %u\n", mt->base.base.nr_samples); + return FALSE; + } + return TRUE; } +boolean +nv50_miptree_init_layout_linear(struct nv50_miptree *); + static void -nvc0_miptree_destroy(struct pipe_screen *pscreen, struct pipe_resource *pt) +nvc0_miptree_init_layout_tiled(struct nv50_miptree *mt) { - struct nvc0_miptree *mt = nvc0_miptree(pt); + struct pipe_resource *pt = &mt->base.base; + unsigned w, h, d, l; + const unsigned blocksize = util_format_get_blocksize(pt->format); - nouveau_screen_bo_release(pscreen, mt->base.bo); + mt->layout_3d = pt->target == PIPE_TEXTURE_3D; - FREE(mt); -} + w = pt->width0 << mt->ms_x; + h = pt->height0 << mt->ms_y; -static boolean -nvc0_miptree_get_handle(struct pipe_screen *pscreen, - struct pipe_resource *pt, - struct winsys_handle *whandle) -{ - struct nvc0_miptree *mt = nvc0_miptree(pt); - unsigned stride; + /* For 3D textures, a mipmap is spanned by all the layers, for array + * textures and cube maps, each layer contains its own mipmaps. + */ + d = mt->layout_3d ? pt->depth0 : 1; - if (!mt || !mt->base.bo) - return FALSE; + for (l = 0; l <= pt->last_level; ++l) { + struct nv50_miptree_level *lvl = &mt->level[l]; + unsigned tsx, tsy, tsz; + unsigned nbx = util_format_get_nblocksx(pt->format, w); + unsigned nby = util_format_get_nblocksy(pt->format, h); + + lvl->offset = mt->total_size; + + lvl->tile_mode = nvc0_tex_choose_tile_dims(nbx, nby, d); + + tsx = NVC0_TILE_SIZE_X(lvl->tile_mode); /* x is tile row pitch in bytes */ + tsy = NVC0_TILE_SIZE_Y(lvl->tile_mode); + tsz = NVC0_TILE_SIZE_Z(lvl->tile_mode); - stride = util_format_get_stride(mt->base.base.format, - mt->base.base.width0); + lvl->pitch = align(nbx * blocksize, tsx); - return nouveau_screen_bo_get_handle(pscreen, - mt->base.bo, - stride, - whandle); + mt->total_size += lvl->pitch * align(nby, tsy) * align(d, tsz); + + w = u_minify(w, 1); + h = u_minify(h, 1); + d = u_minify(d, 1); + } + + if (pt->array_size > 1) { + mt->layer_stride = align(mt->total_size, + NVC0_TILE_SIZE(mt->level[0].tile_mode)); + mt->total_size = mt->layer_stride * pt->array_size; + } } const struct u_resource_vtbl nvc0_miptree_vtbl = { - nvc0_miptree_get_handle, /* get_handle */ - nvc0_miptree_destroy, /* resource_destroy */ + nv50_miptree_get_handle, /* get_handle */ + nv50_miptree_destroy, /* resource_destroy */ nvc0_miptree_transfer_new, /* get_transfer */ nvc0_miptree_transfer_del, /* transfer_destroy */ - nvc0_miptree_transfer_map, /* transfer_map */ + nvc0_miptree_transfer_map, /* transfer_map */ u_default_transfer_flush_region, /* transfer_flush_region */ nvc0_miptree_transfer_unmap, /* transfer_unmap */ u_default_transfer_inline_write /* transfer_inline_write */ @@ -127,10 +248,9 @@ nvc0_miptree_create(struct pipe_screen *pscreen, const struct pipe_resource *templ) { struct nouveau_device *dev = nouveau_screen(pscreen)->device; - struct nvc0_miptree *mt = CALLOC_STRUCT(nvc0_miptree); + struct nv50_miptree *mt = CALLOC_STRUCT(nv50_miptree); struct pipe_resource *pt = &mt->base.base; int ret; - unsigned w, h, d, l, alloc_size; uint32_t tile_flags; if (!mt) @@ -141,84 +261,23 @@ nvc0_miptree_create(struct pipe_screen *pscreen, pipe_reference_init(&pt->reference, 1); pt->screen = pscreen; - mt->layout_3d = pt->target == PIPE_TEXTURE_3D; - - w = pt->width0; - h = pt->height0; - d = mt->layout_3d ? pt->depth0 : 1; - - switch (pt->format) { - case PIPE_FORMAT_Z16_UNORM: - tile_flags = 0x0700; /* COMPRESSED */ - tile_flags = 0x0100; /* NORMAL */ - break; - case PIPE_FORMAT_S8_USCALED_Z24_UNORM: - tile_flags = 0x5300; /* MSAA 4, COMPRESSED */ - tile_flags = 0x4600; /* NORMAL */ - break; - case PIPE_FORMAT_Z24X8_UNORM: - case PIPE_FORMAT_Z24_UNORM_S8_USCALED: - tile_flags = 0x1100; /* NORMAL */ - if (w * h >= 128 * 128 && 0) - tile_flags = 0x1700; /* COMPRESSED, requires magic */ - break; - case PIPE_FORMAT_R32G32B32A32_FLOAT: - tile_flags = 0xf500; /* COMPRESSED */ - tile_flags = 0xf700; /* MSAA 2 */ - tile_flags = 0xf900; /* MSAA 4 */ - tile_flags = 0xfe00; /* NORMAL */ - break; - case PIPE_FORMAT_Z32_FLOAT_S8X24_USCALED: - tile_flags = 0xce00; /* COMPRESSED */ - tile_flags = 0xcf00; /* MSAA 2, COMPRESSED */ - tile_flags = 0xd000; /* MSAA 4, COMPRESSED */ - tile_flags = 0xc300; /* NORMAL */ - break; - case PIPE_FORMAT_R16G16B16A16_UNORM: - tile_flags = 0xe900; /* COMPRESSED */ - tile_flags = 0xfe00; /* NORMAL */ - break; - default: - tile_flags = 0xe000; /* MSAA 4, COMPRESSED 32 BIT */ - tile_flags = 0xfe00; /* NORMAL 32 BIT */ - if (w * h >= 128 * 128 && 0) - tile_flags = 0xdb00; /* COMPRESSED 32 BIT, requires magic */ - break; - } - - /* For 3D textures, a mipmap is spanned by all the layers, for array - * textures and cube maps, each layer contains its own mipmaps. - */ - for (l = 0; l <= pt->last_level; ++l) { - struct nvc0_miptree_level *lvl = &mt->level[l]; - unsigned nbx = util_format_get_nblocksx(pt->format, w); - unsigned nby = util_format_get_nblocksy(pt->format, h); - unsigned blocksize = util_format_get_blocksize(pt->format); - - lvl->offset = mt->total_size; - lvl->tile_mode = get_tile_dims(nbx, nby, d); - lvl->pitch = align(nbx * blocksize, NVC0_TILE_PITCH(lvl->tile_mode)); + tile_flags = nvc0_mt_choose_storage_type(mt, TRUE); - mt->total_size += lvl->pitch * - align(nby, NVC0_TILE_HEIGHT(lvl->tile_mode)) * - align(d, NVC0_TILE_DEPTH(lvl->tile_mode)); - - w = u_minify(w, 1); - h = u_minify(h, 1); - d = u_minify(d, 1); + if (!nvc0_miptree_init_ms_mode(mt)) { + FREE(mt); + return NULL; } - if (pt->array_size > 1) { - mt->layer_stride = align(mt->total_size, - NVC0_TILE_SIZE(mt->level[0].tile_mode)); - mt->total_size = mt->layer_stride * pt->array_size; + if (tile_flags & NOUVEAU_BO_TILE_LAYOUT_MASK) { + nvc0_miptree_init_layout_tiled(mt); + } else + if (!nv50_miptree_init_layout_linear(mt)) { + FREE(mt); + return NULL; } - alloc_size = mt->total_size; - if (tile_flags == 0x1700) - alloc_size *= 3; /* HiZ, XXX: correct size */ - - ret = nouveau_bo_new_tile(dev, NOUVEAU_BO_VRAM, 256, alloc_size, + ret = nouveau_bo_new_tile(dev, NOUVEAU_BO_VRAM, 4096, + mt->total_size, mt->level[0].tile_mode, tile_flags, &mt->base.bo); if (ret) { @@ -230,44 +289,26 @@ nvc0_miptree_create(struct pipe_screen *pscreen, return pt; } -struct pipe_resource * -nvc0_miptree_from_handle(struct pipe_screen *pscreen, - const struct pipe_resource *templ, - struct winsys_handle *whandle) +/* Offset of zslice @z from start of level @l. */ +INLINE unsigned +nvc0_mt_zslice_offset(const struct nv50_miptree *mt, unsigned l, unsigned z) { - struct nvc0_miptree *mt; - unsigned stride; - - /* only supports 2D, non-mipmapped textures for the moment */ - if ((templ->target != PIPE_TEXTURE_2D && - templ->target != PIPE_TEXTURE_RECT) || - templ->last_level != 0 || - templ->depth0 != 1 || - templ->array_size > 1) - return NULL; + const struct pipe_resource *pt = &mt->base.base; - mt = CALLOC_STRUCT(nvc0_miptree); - if (!mt) - return NULL; + unsigned tds = NVC0_TILE_SHIFT_Z(mt->level[l].tile_mode); + unsigned ths = NVC0_TILE_SHIFT_Y(mt->level[l].tile_mode); - mt->base.bo = nouveau_screen_bo_from_handle(pscreen, whandle, &stride); - if (mt->base.bo == NULL) { - FREE(mt); - return NULL; - } + unsigned nby = util_format_get_nblocksy(pt->format, + u_minify(pt->height0, l)); - mt->base.base = *templ; - mt->base.vtbl = &nvc0_miptree_vtbl; - pipe_reference_init(&mt->base.base.reference, 1); - mt->base.base.screen = pscreen; - mt->level[0].pitch = stride; - mt->level[0].offset = 0; - mt->level[0].tile_mode = mt->base.bo->tile_mode; - - /* no need to adjust bo reference count */ - return &mt->base.base; -} + /* to next 2D tile slice within a 3D tile */ + unsigned stride_2d = NVC0_TILE_SIZE_2D(mt->level[l].tile_mode); + + /* to slice in the next (in z direction) 3D tile */ + unsigned stride_3d = (align(nby, (1 << ths)) * mt->level[l].pitch) << tds; + return (z & (1 << (tds - 1))) * stride_2d + (z >> tds) * stride_3d; +} /* Surface functions. */ @@ -277,43 +318,9 @@ nvc0_miptree_surface_new(struct pipe_context *pipe, struct pipe_resource *pt, const struct pipe_surface *templ) { - struct nvc0_miptree *mt = nvc0_miptree(pt); /* guaranteed */ - struct nvc0_surface *ns; - struct pipe_surface *ps; - struct nvc0_miptree_level *lvl = &mt->level[templ->u.tex.level]; - - ns = CALLOC_STRUCT(nvc0_surface); + struct nv50_surface *ns = nv50_surface_from_miptree(nv50_miptree(pt), templ); if (!ns) return NULL; - ps = &ns->base; - - pipe_reference_init(&ps->reference, 1); - pipe_resource_reference(&ps->texture, pt); - ps->context = pipe; - ps->format = templ->format; - ps->usage = templ->usage; - ps->u.tex.level = templ->u.tex.level; - ps->u.tex.first_layer = templ->u.tex.first_layer; - ps->u.tex.last_layer = templ->u.tex.last_layer; - - ns->width = u_minify(pt->width0, ps->u.tex.level); - ns->height = u_minify(pt->height0, ps->u.tex.level); - ns->depth = ps->u.tex.last_layer - ps->u.tex.first_layer + 1; - ns->offset = lvl->offset; - - /* comment says there are going to be removed, but they're used by the st */ - ps->width = ns->width; - ps->height = ns->height; - - return ps; -} - -void -nvc0_miptree_surface_del(struct pipe_context *pipe, struct pipe_surface *ps) -{ - struct nvc0_surface *s = nvc0_surface(ps); - - pipe_resource_reference(&ps->texture, NULL); - - FREE(s); + ns->base.context = pipe; + return &ns->base; } diff --git a/src/gallium/drivers/nvc0/nvc0_resource.c b/src/gallium/drivers/nvc0/nvc0_resource.c index 44e66314e7e..a0289728ee9 100644 --- a/src/gallium/drivers/nvc0/nvc0_resource.c +++ b/src/gallium/drivers/nvc0/nvc0_resource.c @@ -21,10 +21,14 @@ nvc0_resource_from_handle(struct pipe_screen * screen, const struct pipe_resource *templ, struct winsys_handle *whandle) { - if (templ->target == PIPE_BUFFER) + if (templ->target == PIPE_BUFFER) { return NULL; - else - return nvc0_miptree_from_handle(screen, templ, whandle); + } else { + struct pipe_resource *res = nv50_miptree_from_handle(screen, + templ, whandle); + nv04_resource(res)->vtbl = &nvc0_miptree_vtbl; + return res; + } } void @@ -37,7 +41,7 @@ nvc0_init_resource_functions(struct pipe_context *pcontext) pcontext->transfer_destroy = u_transfer_destroy_vtbl; pcontext->transfer_inline_write = u_transfer_inline_write_vtbl; pcontext->create_surface = nvc0_miptree_surface_new; - pcontext->surface_destroy = nvc0_miptree_surface_del; + pcontext->surface_destroy = nv50_miptree_surface_del; } void diff --git a/src/gallium/drivers/nvc0/nvc0_resource.h b/src/gallium/drivers/nvc0/nvc0_resource.h index f1c445b5152..6d946c83834 100644 --- a/src/gallium/drivers/nvc0/nvc0_resource.h +++ b/src/gallium/drivers/nvc0/nvc0_resource.h @@ -2,53 +2,29 @@ #ifndef __NVC0_RESOURCE_H__ #define __NVC0_RESOURCE_H__ -#include "util/u_transfer.h" -#include "util/u_double_list.h" -#define NOUVEAU_NVC0 -#include "nouveau/nouveau_winsys.h" -#include "nouveau/nouveau_fence.h" -#include "nouveau/nouveau_buffer.h" -#undef NOUVEAU_NVC0 +#include "nv50/nv50_resource.h" -void -nvc0_init_resource_functions(struct pipe_context *pcontext); -void -nvc0_screen_init_resource_functions(struct pipe_screen *pscreen); +#define NVC0_TILE_SHIFT_X(m) ((((m) >> 0) & 0xf) + 6) +#define NVC0_TILE_SHIFT_Y(m) ((((m) >> 4) & 0xf) + 3) +#define NVC0_TILE_SHIFT_Z(m) ((((m) >> 8) & 0xf) + 0) -#define NVC0_TILE_DIM_SHIFT(m, d) (((m) >> (d * 4)) & 0xf) +#define NVC0_TILE_SIZE_X(m) (64 << (((m) >> 0) & 0xf)) +#define NVC0_TILE_SIZE_Y(m) ( 8 << (((m) >> 4) & 0xf)) +#define NVC0_TILE_SIZE_Z(m) ( 1 << (((m) >> 8) & 0xf)) -#define NVC0_TILE_PITCH(m) (64 << NVC0_TILE_DIM_SHIFT(m, 0)) -#define NVC0_TILE_HEIGHT(m) ( 8 << NVC0_TILE_DIM_SHIFT(m, 1)) -#define NVC0_TILE_DEPTH(m) ( 1 << NVC0_TILE_DIM_SHIFT(m, 2)) +/* it's ok to mask only in the end because max value is 3 * 5 */ -#define NVC0_TILE_SIZE_2D(m) (((64 * 8) << \ - NVC0_TILE_DIM_SHIFT(m, 0)) << \ - NVC0_TILE_DIM_SHIFT(m, 1)) +#define NVC0_TILE_SIZE_2D(m) ((64 * 8) << (((m) + ((m) >> 4)) & 0xf)) -#define NVC0_TILE_SIZE(m) (NVC0_TILE_SIZE_2D(m) << NVC0_TILE_DIM_SHIFT(m, 2)) +#define NVC0_TILE_SIZE(m) ((64 * 8) << (((m) + ((m) >> 4) + ((m) >> 8)) & 0xf)) -struct nvc0_miptree_level { - uint32_t offset; - uint32_t pitch; - uint32_t tile_mode; -}; -#define NVC0_MAX_TEXTURE_LEVELS 16 - -struct nvc0_miptree { - struct nv04_resource base; - struct nvc0_miptree_level level[NVC0_MAX_TEXTURE_LEVELS]; - uint32_t total_size; - uint32_t layer_stride; - boolean layout_3d; /* TRUE if layer count varies with mip level */ -}; +void +nvc0_init_resource_functions(struct pipe_context *pcontext); -static INLINE struct nvc0_miptree * -nvc0_miptree(struct pipe_resource *pt) -{ - return (struct nvc0_miptree *)pt; -} +void +nvc0_screen_init_resource_functions(struct pipe_screen *pscreen); /* Internal functions: */ @@ -56,20 +32,30 @@ struct pipe_resource * nvc0_miptree_create(struct pipe_screen *pscreen, const struct pipe_resource *tmp); -struct pipe_resource * -nvc0_miptree_from_handle(struct pipe_screen *pscreen, - const struct pipe_resource *template, - struct winsys_handle *whandle); +const struct u_resource_vtbl nvc0_miptree_vtbl; struct pipe_surface * nvc0_miptree_surface_new(struct pipe_context *, struct pipe_resource *, const struct pipe_surface *templ); -void -nvc0_miptree_surface_del(struct pipe_context *, struct pipe_surface *); +unsigned +nvc0_mt_zslice_offset(const struct nv50_miptree *, unsigned l, unsigned z); -uint32_t -nvc0_miptree_zslice_offset(struct nvc0_miptree *, unsigned l, unsigned z); +struct pipe_transfer * +nvc0_miptree_transfer_new(struct pipe_context *pcontext, + struct pipe_resource *pt, + unsigned level, + unsigned usage, + const struct pipe_box *box); +void +nvc0_miptree_transfer_del(struct pipe_context *pcontext, + struct pipe_transfer *ptx); +void * +nvc0_miptree_transfer_map(struct pipe_context *pcontext, + struct pipe_transfer *ptx); +void +nvc0_miptree_transfer_unmap(struct pipe_context *pcontext, + struct pipe_transfer *ptx); #endif diff --git a/src/gallium/drivers/nvc0/nvc0_screen.c b/src/gallium/drivers/nvc0/nvc0_screen.c index 52143981500..c79256a6ba2 100644 --- a/src/gallium/drivers/nvc0/nvc0_screen.c +++ b/src/gallium/drivers/nvc0/nvc0_screen.c @@ -24,6 +24,9 @@ #include "util/u_format_s3tc.h" #include "pipe/p_screen.h" +#include "vl/vl_decoder.h" +#include "vl/vl_video_buffer.h" + #include "nvc0_context.h" #include "nvc0_screen.h" @@ -37,7 +40,7 @@ nvc0_screen_is_format_supported(struct pipe_screen *pscreen, unsigned sample_count, unsigned bindings) { - if (sample_count > 1) + if (sample_count > 2 && sample_count != 4 && sample_count != 8) return FALSE; if (!util_format_is_supported(format, bindings)) @@ -200,8 +203,11 @@ nvc0_screen_destroy(struct pipe_screen *pscreen) { struct nvc0_screen *screen = nvc0_screen(pscreen); - nouveau_fence_wait(screen->base.fence.current); - nouveau_fence_ref(NULL, &screen->base.fence.current); + if (screen->base.fence.current) { + nouveau_fence_wait(screen->base.fence.current); + nouveau_fence_ref(NULL, &screen->base.fence.current); + } + screen->base.channel->user_private = NULL; nouveau_bo_ref(NULL, &screen->text); nouveau_bo_ref(NULL, &screen->tls); @@ -360,6 +366,7 @@ nvc0_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev) return NULL; } chan = screen->base.channel; + chan->user_private = screen; pscreen->winsys = ws; pscreen->destroy = nvc0_screen_destroy; @@ -371,6 +378,8 @@ nvc0_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev) nvc0_screen_init_resource_functions(pscreen); + nouveau_screen_init_vdec(&screen->base); + ret = nouveau_bo_new(dev, NOUVEAU_BO_GART | NOUVEAU_BO_MAP, 0, 4096, &screen->fence.bo); if (ret) @@ -435,7 +444,7 @@ nvc0_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev) BEGIN_RING(chan, RING_3D(MULTISAMPLE_ENABLE), 1); OUT_RING (chan, 0); BEGIN_RING(chan, RING_3D(MULTISAMPLE_MODE), 1); - OUT_RING (chan, NVC0_3D_MULTISAMPLE_MODE_1X); + OUT_RING (chan, NVC0_3D_MULTISAMPLE_MODE_MS1); BEGIN_RING(chan, RING_3D(MULTISAMPLE_CTRL), 1); OUT_RING (chan, 0); BEGIN_RING(chan, RING_3D(LINE_WIDTH_SEPARATE), 1); diff --git a/src/gallium/drivers/nvc0/nvc0_screen.h b/src/gallium/drivers/nvc0/nvc0_screen.h index 94bf0cf3481..a3133b28876 100644 --- a/src/gallium/drivers/nvc0/nvc0_screen.h +++ b/src/gallium/drivers/nvc0/nvc0_screen.h @@ -4,6 +4,7 @@ #define NOUVEAU_NVC0 #include "nouveau/nouveau_screen.h" #include "nouveau/nouveau_mm.h" +#include "nouveau/nouveau_fence.h" #undef NOUVEAU_NVC0 #include "nvc0_winsys.h" #include "nvc0_stateobj.h" @@ -16,6 +17,8 @@ struct nvc0_context; #define NVC0_SCRATCH_SIZE (2 << 20) #define NVC0_SCRATCH_NR_BUFFERS 2 +#define NVC0_SCREEN_RESIDENT_BO_COUNT 5 + struct nvc0_screen { struct nouveau_screen base; struct nouveau_winsys *nvws; diff --git a/src/gallium/drivers/nvc0/nvc0_state.c b/src/gallium/drivers/nvc0/nvc0_state.c index b0b2065167e..9f9921cc09e 100644 --- a/src/gallium/drivers/nvc0/nvc0_state.c +++ b/src/gallium/drivers/nvc0/nvc0_state.c @@ -88,6 +88,7 @@ nvc0_blend_state_create(struct pipe_context *pipe, { struct nvc0_blend_stateobj *so = CALLOC_STRUCT(nvc0_blend_stateobj); int i; + uint32_t ms; so->pipe = *cso; @@ -144,6 +145,15 @@ nvc0_blend_state_create(struct pipe_context *pipe, SB_DATA(so, nvc0_colormask(cso->rt[i].colormask)); } + ms = 0; + if (cso->alpha_to_coverage) + ms |= NVC0_3D_MULTISAMPLE_CTRL_ALPHA_TO_COVERAGE; + if (cso->alpha_to_one) + ms |= NVC0_3D_MULTISAMPLE_CTRL_ALPHA_TO_ONE; + + SB_BEGIN_3D(so, MULTISAMPLE_CTRL, 1); + SB_DATA (so, ms); + assert(so->size <= (sizeof(so->state) / sizeof(so->state[0]))); return so; } @@ -190,6 +200,8 @@ nvc0_rasterizer_state_create(struct pipe_context *pipe, SB_BEGIN_3D(so, FRAG_COLOR_CLAMP_EN, 1); SB_DATA (so, cso->clamp_fragment_color ? 0x11111111 : 0x00000000); + SB_IMMED_3D(so, MULTISAMPLE_ENABLE, cso->multisample); + SB_IMMED_3D(so, LINE_SMOOTH_ENABLE, cso->line_smooth); if (cso->line_smooth) SB_BEGIN_3D(so, LINE_WIDTH_SMOOTH, 1); diff --git a/src/gallium/drivers/nvc0/nvc0_state_validate.c b/src/gallium/drivers/nvc0/nvc0_state_validate.c index 9b2a28150b1..968d7a7bd49 100644 --- a/src/gallium/drivers/nvc0/nvc0_state_validate.c +++ b/src/gallium/drivers/nvc0/nvc0_state_validate.c @@ -8,8 +8,8 @@ nvc0_validate_zcull(struct nvc0_context *nvc0) { struct nouveau_channel *chan = nvc0->screen->base.channel; struct pipe_framebuffer_state *fb = &nvc0->framebuffer; - struct nvc0_surface *sf = nvc0_surface(fb->zsbuf); - struct nvc0_miptree *mt = nvc0_miptree(sf->base.texture); + struct nv50_surface *sf = nv50_surface(fb->zsbuf); + struct nv50_miptree *mt = nv50_miptree(sf->base.texture); struct nouveau_bo *bo = mt->base.bo; uint32_t size; uint32_t offset = align(mt->total_size, 1 << 17); @@ -59,6 +59,7 @@ nvc0_validate_fb(struct nvc0_context *nvc0) struct nouveau_channel *chan = nvc0->screen->base.channel; struct pipe_framebuffer_state *fb = &nvc0->framebuffer; unsigned i; + unsigned ms_mode = NVC0_3D_MULTISAMPLE_MODE_MS1; boolean serialize = FALSE; nvc0_bufctx_reset(nvc0, NVC0_BUFCTX_FRAME); @@ -72,8 +73,8 @@ nvc0_validate_fb(struct nvc0_context *nvc0) MARK_RING(chan, 9 * fb->nr_cbufs, 2 * fb->nr_cbufs); for (i = 0; i < fb->nr_cbufs; ++i) { - struct nvc0_miptree *mt = nvc0_miptree(fb->cbufs[i]->texture); - struct nvc0_surface *sf = nvc0_surface(fb->cbufs[i]); + struct nv50_miptree *mt = nv50_miptree(fb->cbufs[i]->texture); + struct nv50_surface *sf = nv50_surface(fb->cbufs[i]); struct nouveau_bo *bo = mt->base.bo; uint32_t offset = sf->offset; @@ -89,6 +90,8 @@ nvc0_validate_fb(struct nvc0_context *nvc0) OUT_RING (chan, mt->layer_stride >> 2); OUT_RING (chan, sf->base.u.tex.first_layer); + ms_mode = mt->ms_mode; + if (mt->base.status & NOUVEAU_BUFFER_STATUS_GPU_READING) serialize = TRUE; mt->base.status |= NOUVEAU_BUFFER_STATUS_GPU_WRITING; @@ -100,8 +103,8 @@ nvc0_validate_fb(struct nvc0_context *nvc0) } if (fb->zsbuf) { - struct nvc0_miptree *mt = nvc0_miptree(fb->zsbuf->texture); - struct nvc0_surface *sf = nvc0_surface(fb->zsbuf); + struct nv50_miptree *mt = nv50_miptree(fb->zsbuf->texture); + struct nv50_surface *sf = nv50_surface(fb->zsbuf); struct nouveau_bo *bo = mt->base.bo; int unk = mt->base.base.target == PIPE_TEXTURE_2D; uint32_t offset = sf->offset; @@ -123,6 +126,8 @@ nvc0_validate_fb(struct nvc0_context *nvc0) BEGIN_RING(chan, RING_3D(ZETA_BASE_LAYER), 1); OUT_RING (chan, sf->base.u.tex.first_layer); + ms_mode = mt->ms_mode; + if (mt->base.status & NOUVEAU_BUFFER_STATUS_GPU_READING) serialize = TRUE; mt->base.status |= NOUVEAU_BUFFER_STATUS_GPU_WRITING; @@ -135,6 +140,8 @@ nvc0_validate_fb(struct nvc0_context *nvc0) OUT_RING (chan, 0); } + IMMED_RING(chan, RING_3D(MULTISAMPLE_MODE), ms_mode); + if (serialize) { BEGIN_RING(chan, RING_3D(SERIALIZE), 1); OUT_RING (chan, 0); @@ -390,6 +397,28 @@ nvc0_constbufs_validate(struct nvc0_context *nvc0) } static void +nvc0_validate_sample_mask(struct nvc0_context *nvc0) +{ + struct nouveau_channel *chan = nvc0->screen->base.channel; + + unsigned mask[4] = + { + nvc0->sample_mask & 0xffff, + nvc0->sample_mask & 0xffff, + nvc0->sample_mask & 0xffff, + nvc0->sample_mask & 0xffff + }; + + BEGIN_RING(chan, RING_3D(MSAA_MASK(0)), 4); + OUT_RING (chan, mask[0]); + OUT_RING (chan, mask[1]); + OUT_RING (chan, mask[2]); + OUT_RING (chan, mask[3]); + BEGIN_RING(chan, RING_3D(SAMPLE_SHADING), 1); + OUT_RING (chan, 0x01); +} + +static void nvc0_validate_derived_1(struct nvc0_context *nvc0) { struct nouveau_channel *chan = nvc0->screen->base.channel; @@ -428,8 +457,7 @@ nvc0_switch_pipe_context(struct nvc0_context *ctx_to) if (!ctx_to->zsa) ctx_to->dirty &= ~NVC0_NEW_ZSA; - ctx_to->screen->base.channel->user_private = ctx_to->screen->cur_ctx = - ctx_to; + ctx_to->screen->cur_ctx = ctx_to; } static struct state_validate { @@ -439,6 +467,7 @@ static struct state_validate { { nvc0_validate_fb, NVC0_NEW_FRAMEBUFFER }, { nvc0_validate_blend, NVC0_NEW_BLEND }, { nvc0_validate_zsa, NVC0_NEW_ZSA }, + { nvc0_validate_sample_mask, NVC0_NEW_SAMPLE_MASK }, { nvc0_validate_rasterizer, NVC0_NEW_RASTERIZER }, { nvc0_validate_blend_colour, NVC0_NEW_BLEND_COLOUR }, { nvc0_validate_stencil_ref, NVC0_NEW_STENCIL_REF }, diff --git a/src/gallium/drivers/nvc0/nvc0_stateobj.h b/src/gallium/drivers/nvc0/nvc0_stateobj.h index e0fe9df25d7..0686c52fd0c 100644 --- a/src/gallium/drivers/nvc0/nvc0_stateobj.h +++ b/src/gallium/drivers/nvc0/nvc0_stateobj.h @@ -19,13 +19,13 @@ struct nvc0_blend_stateobj { struct pipe_blend_state pipe; int size; - uint32_t state[70]; + uint32_t state[72]; }; struct nvc0_rasterizer_stateobj { struct pipe_rasterizer_state pipe; int size; - uint32_t state[38]; + uint32_t state[39]; }; struct nvc0_zsa_stateobj { diff --git a/src/gallium/drivers/nvc0/nvc0_surface.c b/src/gallium/drivers/nvc0/nvc0_surface.c index 6be3702bddc..67bba3c6cc3 100644 --- a/src/gallium/drivers/nvc0/nvc0_surface.c +++ b/src/gallium/drivers/nvc0/nvc0_surface.c @@ -31,7 +31,6 @@ #include "nvc0_context.h" #include "nvc0_resource.h" -#include "nvc0_transfer.h" #include "nv50/nv50_defs.xml.h" @@ -63,11 +62,11 @@ nvc0_2d_format(enum pipe_format format) case 2: return NV50_SURFACE_FORMAT_R16_UNORM; case 4: - return NV50_SURFACE_FORMAT_A8R8G8B8_UNORM; + return NV50_SURFACE_FORMAT_BGRA8_UNORM; case 8: - return NV50_SURFACE_FORMAT_R16G16B16A16_UNORM; + return NV50_SURFACE_FORMAT_RGBA16_UNORM; case 16: - return NV50_SURFACE_FORMAT_R32G32B32A32_FLOAT; + return NV50_SURFACE_FORMAT_RGBA32_FLOAT; default: return 0; } @@ -75,7 +74,7 @@ nvc0_2d_format(enum pipe_format format) static int nvc0_2d_texture_set(struct nouveau_channel *chan, int dst, - struct nvc0_miptree *mt, unsigned level, unsigned layer) + struct nv50_miptree *mt, unsigned level, unsigned layer) { struct nouveau_bo *bo = mt->base.bo; uint32_t width, height, depth; @@ -91,8 +90,8 @@ nvc0_2d_texture_set(struct nouveau_channel *chan, int dst, return 1; } - width = u_minify(mt->base.base.width0, level); - height = u_minify(mt->base.base.height0, level); + width = u_minify(mt->base.base.width0, level) << mt->ms_x; + height = u_minify(mt->base.base.height0, level) << mt->ms_y; depth = u_minify(mt->base.base.depth0, level); /* layer has to be < depth, and depth > tile depth / 2 */ @@ -103,7 +102,7 @@ nvc0_2d_texture_set(struct nouveau_channel *chan, int dst, depth = 1; } else if (!dst) { - offset += nvc0_miptree_zslice_offset(mt, level, layer); + offset += nvc0_mt_zslice_offset(mt, level, layer); layer = 0; } @@ -145,13 +144,19 @@ nvc0_2d_texture_set(struct nouveau_channel *chan, int dst, static int nvc0_2d_texture_do_copy(struct nouveau_channel *chan, - struct nvc0_miptree *dst, unsigned dst_level, + struct nv50_miptree *dst, unsigned dst_level, unsigned dx, unsigned dy, unsigned dz, - struct nvc0_miptree *src, unsigned src_level, + struct nv50_miptree *src, unsigned src_level, unsigned sx, unsigned sy, unsigned sz, unsigned w, unsigned h) { + static const uint32_t duvdxy[5] = + { + 0x40000000, 0x80000000, 0x00000001, 0x00000002, 0x00000004 + }; + int ret; + uint32_t ctrl = 0x00; ret = MARK_RING(chan, 2 * 16 + 32, 4); if (ret) @@ -165,66 +170,33 @@ nvc0_2d_texture_do_copy(struct nouveau_channel *chan, if (ret) return ret; - /* 0/1 = CENTER/CORNER, 10/00 = POINT/BILINEAR */ + /* NOTE: 2D engine doesn't work for MS8 */ + if (src->ms_x) + ctrl = 0x11; + + /* 0/1 = CENTER/CORNER, 00/10 = POINT/BILINEAR */ BEGIN_RING(chan, RING_2D(BLIT_CONTROL), 1); - OUT_RING (chan, 0); + OUT_RING (chan, ctrl); BEGIN_RING(chan, RING_2D(BLIT_DST_X), 4); - OUT_RING (chan, dx); - OUT_RING (chan, dy); - OUT_RING (chan, w); - OUT_RING (chan, h); + OUT_RING (chan, dx << dst->ms_x); + OUT_RING (chan, dy << dst->ms_y); + OUT_RING (chan, w << dst->ms_x); + OUT_RING (chan, h << dst->ms_y); BEGIN_RING(chan, RING_2D(BLIT_DU_DX_FRACT), 4); - OUT_RING (chan, 0); - OUT_RING (chan, 1); - OUT_RING (chan, 0); - OUT_RING (chan, 1); + OUT_RING (chan, duvdxy[2 + ((int)src->ms_x - (int)dst->ms_x)] & 0xf0000000); + OUT_RING (chan, duvdxy[2 + ((int)src->ms_x - (int)dst->ms_x)] & 0x0000000f); + OUT_RING (chan, duvdxy[2 + ((int)src->ms_y - (int)dst->ms_y)] & 0xf0000000); + OUT_RING (chan, duvdxy[2 + ((int)src->ms_y - (int)dst->ms_y)] & 0x0000000f); BEGIN_RING(chan, RING_2D(BLIT_SRC_X_FRACT), 4); OUT_RING (chan, 0); - OUT_RING (chan, sx); + OUT_RING (chan, sx << src->ms_x); OUT_RING (chan, 0); - OUT_RING (chan, sy); + OUT_RING (chan, sy << src->ms_x); return 0; } static void -nvc0_setup_m2mf_rect(struct nvc0_m2mf_rect *rect, - struct pipe_resource *restrict res, unsigned l, - unsigned x, unsigned y, unsigned z) -{ - struct nvc0_miptree *mt = nvc0_miptree(res); - const unsigned w = u_minify(res->width0, l); - const unsigned h = u_minify(res->height0, l); - - rect->bo = mt->base.bo; - rect->domain = mt->base.domain; - rect->base = mt->level[l].offset; - rect->pitch = mt->level[l].pitch; - if (util_format_is_plain(res->format)) { - rect->width = w; - rect->height = h; - rect->x = x; - rect->y = y; - } else { - rect->width = util_format_get_nblocksx(res->format, w); - rect->height = util_format_get_nblocksy(res->format, h); - rect->x = util_format_get_nblocksx(res->format, x); - rect->y = util_format_get_nblocksy(res->format, y); - } - rect->tile_mode = mt->level[l].tile_mode; - rect->cpp = util_format_get_blocksize(res->format); - - if (mt->layout_3d) { - rect->z = z; - rect->depth = u_minify(res->depth0, l); - } else { - rect->base += z * mt->layer_stride; - rect->z = 0; - rect->depth = 1; - } -} - -static void nvc0_resource_copy_region(struct pipe_context *pipe, struct pipe_resource *dst, unsigned dst_level, unsigned dstx, unsigned dsty, unsigned dstz, @@ -244,28 +216,28 @@ nvc0_resource_copy_region(struct pipe_context *pipe, nv04_resource(dst)->status |= NOUVEAU_BUFFER_STATUS_GPU_WRITING; - if (src->format == dst->format) { - struct nvc0_m2mf_rect drect, srect; + if (src->format == dst->format && src->nr_samples == dst->nr_samples) { + struct nv50_m2mf_rect drect, srect; unsigned i; unsigned nx = util_format_get_nblocksx(src->format, src_box->width); unsigned ny = util_format_get_nblocksy(src->format, src_box->height); - nvc0_setup_m2mf_rect(&drect, dst, dst_level, dstx, dsty, dstz); - nvc0_setup_m2mf_rect(&srect, src, src_level, + nv50_m2mf_rect_setup(&drect, dst, dst_level, dstx, dsty, dstz); + nv50_m2mf_rect_setup(&srect, src, src_level, src_box->x, src_box->y, src_box->z); for (i = 0; i < src_box->depth; ++i) { nvc0_m2mf_transfer_rect(&screen->base.base, &drect, &srect, nx, ny); - if (nvc0_miptree(dst)->layout_3d) + if (nv50_miptree(dst)->layout_3d) drect.z++; else - drect.base += nvc0_miptree(dst)->layer_stride; + drect.base += nv50_miptree(dst)->layer_stride; - if (nvc0_miptree(src)->layout_3d) + if (nv50_miptree(src)->layout_3d) srect.z++; else - srect.base += nvc0_miptree(src)->layer_stride; + srect.base += nv50_miptree(src)->layer_stride; } return; } @@ -275,9 +247,9 @@ nvc0_resource_copy_region(struct pipe_context *pipe, for (; dst_layer < dstz + src_box->depth; ++dst_layer, ++src_layer) { ret = nvc0_2d_texture_do_copy(screen->base.channel, - nvc0_miptree(dst), dst_level, + nv50_miptree(dst), dst_level, dstx, dsty, dst_layer, - nvc0_miptree(src), src_level, + nv50_miptree(src), src_level, src_box->x, src_box->y, src_layer, src_box->width, src_box->height); if (ret) @@ -295,8 +267,8 @@ nvc0_clear_render_target(struct pipe_context *pipe, struct nvc0_context *nv50 = nvc0_context(pipe); struct nvc0_screen *screen = nv50->screen; struct nouveau_channel *chan = screen->base.channel; - struct nvc0_miptree *mt = nvc0_miptree(dst->texture); - struct nvc0_surface *sf = nvc0_surface(dst); + struct nv50_miptree *mt = nv50_miptree(dst->texture); + struct nv50_surface *sf = nv50_surface(dst); struct nouveau_bo *bo = mt->base.bo; BEGIN_RING(chan, RING_3D(CLEAR_COLOR(0)), 4); @@ -347,8 +319,8 @@ nvc0_clear_depth_stencil(struct pipe_context *pipe, struct nvc0_context *nv50 = nvc0_context(pipe); struct nvc0_screen *screen = nv50->screen; struct nouveau_channel *chan = screen->base.channel; - struct nvc0_miptree *mt = nvc0_miptree(dst->texture); - struct nvc0_surface *sf = nvc0_surface(dst); + struct nv50_miptree *mt = nv50_miptree(dst->texture); + struct nv50_surface *sf = nv50_surface(dst); struct nouveau_bo *bo = mt->base.bo; uint32_t mode = 0; int unk = mt->base.base.target == PIPE_TEXTURE_2D; diff --git a/src/gallium/drivers/nvc0/nvc0_tex.c b/src/gallium/drivers/nvc0/nvc0_tex.c index 24850b19986..0cbb4b33b59 100644 --- a/src/gallium/drivers/nvc0/nvc0_tex.c +++ b/src/gallium/drivers/nvc0/nvc0_tex.c @@ -60,7 +60,7 @@ nvc0_create_sampler_view(struct pipe_context *pipe, uint32_t swz[4]; uint32_t depth; struct nv50_tic_entry *view; - struct nvc0_miptree *mt = nvc0_miptree(texture); + struct nv50_miptree *mt = nv50_miptree(texture); boolean tex_int; view = MALLOC_STRUCT(nv50_tic_entry); @@ -114,6 +114,7 @@ nvc0_create_sampler_view(struct pipe_context *pipe, depth = MAX2(mt->base.base.array_size, mt->base.base.depth0); if (mt->base.base.target == PIPE_TEXTURE_1D_ARRAY || + /* mt->base.base.target == PIPE_TEXTURE_2D_ARRAY_MS || */ mt->base.base.target == PIPE_TEXTURE_2D_ARRAY) { /* there doesn't seem to be a base layer field in TIC */ tic[1] = view->pipe.u.tex.first_layer * mt->layer_stride; @@ -124,6 +125,7 @@ nvc0_create_sampler_view(struct pipe_context *pipe, case PIPE_TEXTURE_1D: tic[2] |= NV50_TIC_2_TARGET_1D; break; +/* case PIPE_TEXTURE_2D_MS: */ case PIPE_TEXTURE_2D: tic[2] |= NV50_TIC_2_TARGET_2D; break; @@ -143,6 +145,7 @@ nvc0_create_sampler_view(struct pipe_context *pipe, case PIPE_TEXTURE_1D_ARRAY: tic[2] |= NV50_TIC_2_TARGET_1D_ARRAY; break; +/* case PIPE_TEXTURE_2D_ARRAY_MS: */ case PIPE_TEXTURE_2D_ARRAY: tic[2] |= NV50_TIC_2_TARGET_2D_ARRAY; break; @@ -159,16 +162,22 @@ nvc0_create_sampler_view(struct pipe_context *pipe, else tic[3] = 0x00300000; - tic[4] = (1 << 31) | mt->base.base.width0; + tic[4] = (1 << 31) | (mt->base.base.width0 << mt->ms_x); - tic[5] = mt->base.base.height0 & 0xffff; + tic[5] = (mt->base.base.height0 << mt->ms_y) & 0xffff; tic[5] |= depth << 16; tic[5] |= mt->base.base.last_level << 28; - tic[6] = 0x03000000; + tic[6] = (mt->ms_x > 1) ? 0x88000000 : 0x03000000; /* sampling points */ tic[7] = (view->pipe.u.tex.last_level << 4) | view->pipe.u.tex.first_level; + /* + if (mt->base.base.target == PIPE_TEXTURE_2D_MS || + mt->base.base.target == PIPE_TEXTURE_2D_ARRAY_MS) + tic[7] |= mt->ms_mode << 12; + */ + return &view->pipe; } @@ -189,7 +198,7 @@ nvc0_validate_tic(struct nvc0_context *nvc0, int s) OUT_RING (chan, (i << 1) | 0); continue; } - res = &nvc0_miptree(tic->pipe.texture)->base; + res = &nv50_miptree(tic->pipe.texture)->base; if (tic->id < 0) { uint32_t offset = tic->tic[1]; diff --git a/src/gallium/drivers/nvc0/nvc0_transfer.c b/src/gallium/drivers/nvc0/nvc0_transfer.c index 0509113e005..ecc9e213250 100644 --- a/src/gallium/drivers/nvc0/nvc0_transfer.c +++ b/src/gallium/drivers/nvc0/nvc0_transfer.c @@ -2,13 +2,12 @@ #include "util/u_format.h" #include "nvc0_context.h" -#include "nvc0_transfer.h" #include "nv50/nv50_defs.xml.h" struct nvc0_transfer { struct pipe_transfer base; - struct nvc0_m2mf_rect rect[2]; + struct nv50_m2mf_rect rect[2]; uint32_t nblocksx; uint16_t nblocksy; uint16_t nlayers; @@ -16,8 +15,8 @@ struct nvc0_transfer { void nvc0_m2mf_transfer_rect(struct pipe_screen *pscreen, - const struct nvc0_m2mf_rect *dst, - const struct nvc0_m2mf_rect *src, + const struct nv50_m2mf_rect *dst, + const struct nv50_m2mf_rect *src, uint32_t nblocksx, uint32_t nblocksy) { struct nouveau_channel *chan = nouveau_screen(pscreen)->channel; @@ -174,9 +173,10 @@ nvc0_m2mf_copy_linear(struct nouveau_context *nv, } } +#if 0 static void nvc0_m2mf_push_rect(struct pipe_screen *pscreen, - const struct nvc0_m2mf_rect *dst, + const struct nv50_m2mf_rect *dst, const void *data, unsigned nblocksx, unsigned nblocksy) { @@ -228,6 +228,7 @@ nvc0_m2mf_push_rect(struct pipe_screen *pscreen, nblocksy -= line_count; } } +#endif struct pipe_transfer * nvc0_miptree_transfer_new(struct pipe_context *pctx, @@ -239,11 +240,9 @@ nvc0_miptree_transfer_new(struct pipe_context *pctx, struct nvc0_context *nvc0 = nvc0_context(pctx); struct pipe_screen *pscreen = pctx->screen; struct nouveau_device *dev = nvc0->screen->base.device; - struct nvc0_miptree *mt = nvc0_miptree(res); - struct nvc0_miptree_level *lvl = &mt->level[level]; + struct nv50_miptree *mt = nv50_miptree(res); struct nvc0_transfer *tx; uint32_t size; - uint32_t w, h, d, z, layer; int ret; if (usage & PIPE_TRANSFER_MAP_DIRECTLY) @@ -253,45 +252,25 @@ nvc0_miptree_transfer_new(struct pipe_context *pctx, if (!tx) return NULL; - if (mt->layout_3d) { - z = box->z; - d = u_minify(res->depth0, level); - layer = 0; - } else { - z = 0; - d = 1; - layer = box->z; - } - tx->nlayers = box->depth; - pipe_resource_reference(&tx->base.resource, res); tx->base.level = level; tx->base.usage = usage; tx->base.box = *box; - tx->nblocksx = util_format_get_nblocksx(res->format, box->width); - tx->nblocksy = util_format_get_nblocksy(res->format, box->height); + if (util_format_is_plain(res->format)) { + tx->nblocksx = box->width << mt->ms_x; + tx->nblocksy = box->height << mt->ms_y; + } else { + tx->nblocksx = util_format_get_nblocksx(res->format, box->width); + tx->nblocksy = util_format_get_nblocksy(res->format, box->height); + } + tx->nlayers = box->depth; tx->base.stride = tx->nblocksx * util_format_get_blocksize(res->format); tx->base.layer_stride = tx->nblocksy * tx->base.stride; - w = u_minify(res->width0, level); - h = u_minify(res->height0, level); - - tx->rect[0].cpp = tx->rect[1].cpp = util_format_get_blocksize(res->format); - - tx->rect[0].bo = mt->base.bo; - tx->rect[0].base = lvl->offset + layer * mt->layer_stride; - tx->rect[0].tile_mode = lvl->tile_mode; - tx->rect[0].x = util_format_get_nblocksx(res->format, box->x); - tx->rect[0].y = util_format_get_nblocksy(res->format, box->y); - tx->rect[0].z = z; - tx->rect[0].width = util_format_get_nblocksx(res->format, w); - tx->rect[0].height = util_format_get_nblocksy(res->format, h); - tx->rect[0].depth = d; - tx->rect[0].pitch = lvl->pitch; - tx->rect[0].domain = NOUVEAU_BO_VRAM; + nv50_m2mf_rect_setup(&tx->rect[0], res, level, box->x, box->y, box->z); size = tx->base.layer_stride; @@ -302,6 +281,7 @@ nvc0_miptree_transfer_new(struct pipe_context *pctx, return NULL; } + tx->rect[1].cpp = tx->rect[0].cpp; tx->rect[1].width = tx->nblocksx; tx->rect[1].height = tx->nblocksy; tx->rect[1].depth = 1; @@ -310,6 +290,7 @@ nvc0_miptree_transfer_new(struct pipe_context *pctx, if (usage & PIPE_TRANSFER_READ) { unsigned base = tx->rect[0].base; + unsigned z = tx->rect[0].z; unsigned i; for (i = 0; i < tx->nlayers; ++i) { nvc0_m2mf_transfer_rect(pscreen, &tx->rect[1], &tx->rect[0], @@ -334,7 +315,7 @@ nvc0_miptree_transfer_del(struct pipe_context *pctx, { struct pipe_screen *pscreen = pctx->screen; struct nvc0_transfer *tx = (struct nvc0_transfer *)transfer; - struct nvc0_miptree *mt = nvc0_miptree(tx->base.resource); + struct nv50_miptree *mt = nv50_miptree(tx->base.resource); unsigned i; if (tx->base.usage & PIPE_TRANSFER_WRITE) { diff --git a/src/gallium/drivers/nvc0/nvc0_transfer.h b/src/gallium/drivers/nvc0/nvc0_transfer.h deleted file mode 100644 index 803ee3463ec..00000000000 --- a/src/gallium/drivers/nvc0/nvc0_transfer.h +++ /dev/null @@ -1,44 +0,0 @@ - -#ifndef __NVC0_TRANSFER_H__ -#define __NVC0_TRANSFER_H__ - -#include "pipe/p_state.h" - -struct pipe_transfer * -nvc0_miptree_transfer_new(struct pipe_context *pcontext, - struct pipe_resource *pt, - unsigned level, - unsigned usage, - const struct pipe_box *box); -void -nvc0_miptree_transfer_del(struct pipe_context *pcontext, - struct pipe_transfer *ptx); -void * -nvc0_miptree_transfer_map(struct pipe_context *pcontext, - struct pipe_transfer *ptx); -void -nvc0_miptree_transfer_unmap(struct pipe_context *pcontext, - struct pipe_transfer *ptx); - -struct nvc0_m2mf_rect { - struct nouveau_bo *bo; - uint32_t base; - unsigned domain; - uint32_t pitch; - uint32_t width; - uint32_t x; - uint32_t height; - uint32_t y; - uint16_t depth; - uint16_t z; - uint16_t tile_mode; - uint16_t cpp; -}; - -void -nvc0_m2mf_transfer_rect(struct pipe_screen *pscreen, - const struct nvc0_m2mf_rect *dst, - const struct nvc0_m2mf_rect *src, - uint32_t nblocksx, uint32_t nblocksy); - -#endif diff --git a/src/gallium/drivers/nvc0/nvc0_vbo.c b/src/gallium/drivers/nvc0/nvc0_vbo.c index 6bbcf2447ec..8a5bf8dc582 100644 --- a/src/gallium/drivers/nvc0/nvc0_vbo.c +++ b/src/gallium/drivers/nvc0/nvc0_vbo.c @@ -367,11 +367,11 @@ nvc0_prim_gl(unsigned prim) static void nvc0_draw_vbo_flush_notify(struct nouveau_channel *chan) { - struct nvc0_context *nvc0 = chan->user_private; + struct nvc0_screen *screen = chan->user_private; - nouveau_fence_update(&nvc0->screen->base, TRUE); + nouveau_fence_update(&screen->base, TRUE); - nvc0_bufctx_emit_relocs(nvc0); + nvc0_bufctx_emit_relocs(screen->cur_ctx); } static void @@ -382,9 +382,6 @@ nvc0_draw_arrays(struct nvc0_context *nvc0, struct nouveau_channel *chan = nvc0->screen->base.channel; unsigned prim; - chan->flush_notify = nvc0_draw_vbo_flush_notify; - chan->user_private = nvc0; - prim = nvc0_prim_gl(mode); while (instance_count--) { @@ -397,8 +394,6 @@ nvc0_draw_arrays(struct nvc0_context *nvc0, prim |= NVC0_3D_VERTEX_BEGIN_GL_INSTANCE_NEXT; } - - chan->flush_notify = nvc0_default_flush_notify; } static void @@ -500,9 +495,6 @@ nvc0_draw_elements(struct nvc0_context *nvc0, boolean shorten, unsigned prim; const unsigned index_size = nvc0->idxbuf.index_size; - chan->flush_notify = nvc0_draw_vbo_flush_notify; - chan->user_private = nvc0; - prim = nvc0_prim_gl(mode); if (index_bias != nvc0->state.index_bias) { @@ -568,8 +560,6 @@ nvc0_draw_elements(struct nvc0_context *nvc0, boolean shorten, prim |= NVC0_3D_VERTEX_BEGIN_GL_INSTANCE_NEXT; } } - - chan->flush_notify = nvc0_default_flush_notify; } void @@ -596,8 +586,11 @@ nvc0_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info) nvc0_state_validate(nvc0); + chan->flush_notify = nvc0_draw_vbo_flush_notify; + if (nvc0->vbo_fifo) { nvc0_push_vbo(nvc0, info); + chan->flush_notify = nvc0_default_flush_notify; return; } @@ -648,6 +641,7 @@ nvc0_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info) info->mode, info->start, info->count, info->instance_count, info->index_bias); } + chan->flush_notify = nvc0_default_flush_notify; nvc0_release_user_vbufs(nvc0); } diff --git a/src/gallium/drivers/nvfx/nvfx_context.c b/src/gallium/drivers/nvfx/nvfx_context.c index 2b1510264a1..e2cdcf636f9 100644 --- a/src/gallium/drivers/nvfx/nvfx_context.c +++ b/src/gallium/drivers/nvfx/nvfx_context.c @@ -1,6 +1,8 @@ #include "draw/draw_context.h" #include "pipe/p_defines.h" #include "util/u_framebuffer.h" +#include "vl/vl_decoder.h" +#include "vl/vl_video_buffer.h" #include "nvfx_context.h" #include "nvfx_screen.h" @@ -24,9 +26,21 @@ nvfx_flush(struct pipe_context *pipe, OUT_RING(chan, 1); }*/ - FIRE_RING(chan); - if (fence) + if (fence) { + /* horrific hack to make glFinish() work in the absence of + * having proper fences in nvfx. a pending rewrite will + * fix this properly, but may be a while off. + */ + MARK_RING(chan, 1, 1); + OUT_RELOC(chan, screen->fence, 0, NOUVEAU_BO_WR | + NOUVEAU_BO_DUMMY, 0, 0); + FIRE_RING(chan); + nouveau_bo_map(screen->fence, NOUVEAU_BO_RDWR); + nouveau_bo_unmap(screen->fence); *fence = NULL; + } else { + FIRE_RING(chan); + } } static void @@ -76,6 +90,9 @@ nvfx_create(struct pipe_screen *pscreen, void *priv) nvfx->pipe.clear = nvfx_clear; nvfx->pipe.flush = nvfx_flush; + nvfx->pipe.create_video_decoder = vl_create_decoder; + nvfx->pipe.create_video_buffer = vl_video_buffer_create; + nvfx->is_nv4x = screen->is_nv4x; nvfx->use_nv4x = screen->use_nv4x; /* TODO: it seems that nv30 might have fixed function clipping usable with vertex programs diff --git a/src/gallium/drivers/nvfx/nvfx_screen.c b/src/gallium/drivers/nvfx/nvfx_screen.c index d880b12fcaa..0e8f96772c6 100644 --- a/src/gallium/drivers/nvfx/nvfx_screen.c +++ b/src/gallium/drivers/nvfx/nvfx_screen.c @@ -3,6 +3,8 @@ #include "util/u_format.h" #include "util/u_format_s3tc.h" #include "util/u_simple_screen.h" +#include "vl/vl_decoder.h" +#include "vl/vl_video_buffer.h" #include "nouveau/nouveau_screen.h" #include "nouveau/nv_object.xml.h" @@ -33,6 +35,9 @@ nvfx_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) return 1; case PIPE_CAP_GLSL: return 1; + case PIPE_CAP_SM3: + /* TODO: >= nv4x support Shader Model 3.0 */ + return 0; case PIPE_CAP_ANISOTROPIC_FILTER: return 1; case PIPE_CAP_POINT_SPRITE: @@ -208,6 +213,24 @@ nvfx_screen_get_paramf(struct pipe_screen *pscreen, enum pipe_cap param) } } +static int +nvfx_screen_get_video_param(struct pipe_screen *screen, + enum pipe_video_profile profile, + enum pipe_video_cap param) +{ + switch (param) { + case PIPE_VIDEO_CAP_SUPPORTED: + return vl_profile_supported(screen, profile); + case PIPE_VIDEO_CAP_NPOT_TEXTURES: + return 0; + case PIPE_VIDEO_CAP_MAX_WIDTH: + case PIPE_VIDEO_CAP_MAX_HEIGHT: + return vl_video_buffer_max_size(screen); + default: + return 0; + } +} + static boolean nvfx_screen_is_format_supported(struct pipe_screen *pscreen, enum pipe_format format, @@ -306,6 +329,7 @@ nvfx_screen_destroy(struct pipe_screen *pscreen) nouveau_notifier_free(&screen->sync); nouveau_grobj_free(&screen->eng3d); nvfx_screen_surface_takedown(pscreen); + nouveau_bo_ref(NULL, &screen->fence); nouveau_screen_fini(&screen->base); @@ -467,9 +491,17 @@ nvfx_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev) pscreen->get_param = nvfx_screen_get_param; pscreen->get_shader_param = nvfx_screen_get_shader_param; pscreen->get_paramf = nvfx_screen_get_paramf; + pscreen->get_video_param = nvfx_screen_get_video_param; pscreen->is_format_supported = nvfx_screen_is_format_supported; + pscreen->is_video_format_supported = vl_video_buffer_is_format_supported; pscreen->context_create = nvfx_create; + ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 0, 4096, &screen->fence); + if (ret) { + nvfx_screen_destroy(pscreen); + return NULL; + } + switch (dev->chipset & 0xf0) { case 0x30: if (NV30_3D_CHIPSET_3X_MASK & (1 << (dev->chipset & 0x0f))) diff --git a/src/gallium/drivers/nvfx/nvfx_screen.h b/src/gallium/drivers/nvfx/nvfx_screen.h index b1f07187c78..02e7c5d1cad 100644 --- a/src/gallium/drivers/nvfx/nvfx_screen.h +++ b/src/gallium/drivers/nvfx/nvfx_screen.h @@ -11,6 +11,7 @@ struct nvfx_screen { struct nouveau_screen base; struct nouveau_winsys *nvws; + struct nouveau_bo *fence; struct nvfx_context *cur_ctx; diff --git a/src/gallium/drivers/nvfx/nvfx_state.h b/src/gallium/drivers/nvfx/nvfx_state.h index 8fafca1950c..15e1cbb1986 100644 --- a/src/gallium/drivers/nvfx/nvfx_state.h +++ b/src/gallium/drivers/nvfx/nvfx_state.h @@ -2,6 +2,7 @@ #define __NVFX_STATE_H__ #include "pipe/p_state.h" +#include "pipe/p_video_state.h" #include "tgsi/tgsi_scan.h" #include "nouveau/nouveau_statebuf.h" #include "util/u_dynarray.h" diff --git a/src/gallium/drivers/r300/Makefile b/src/gallium/drivers/r300/Makefile index 4088216adcb..4f021276a8f 100644 --- a/src/gallium/drivers/r300/Makefile +++ b/src/gallium/drivers/r300/Makefile @@ -26,19 +26,51 @@ C_SOURCES = \ r300_texture.c \ r300_texture_desc.c \ r300_tgsi_to_rc.c \ - r300_transfer.c + r300_transfer.c \ + \ + compiler/radeon_code.c \ + compiler/radeon_compiler.c \ + compiler/radeon_compiler_util.c \ + compiler/radeon_emulate_branches.c \ + compiler/radeon_emulate_loops.c \ + compiler/radeon_program.c \ + compiler/radeon_program_print.c \ + compiler/radeon_opcodes.c \ + compiler/radeon_program_alu.c \ + compiler/radeon_program_pair.c \ + compiler/radeon_program_tex.c \ + compiler/radeon_pair_translate.c \ + compiler/radeon_pair_schedule.c \ + compiler/radeon_pair_regalloc.c \ + compiler/radeon_pair_dead_sources.c \ + compiler/radeon_dataflow.c \ + compiler/radeon_dataflow_deadcode.c \ + compiler/radeon_dataflow_swizzles.c \ + compiler/radeon_list.c \ + compiler/radeon_optimize.c \ + compiler/radeon_remove_constants.c \ + compiler/radeon_rename_regs.c \ + compiler/radeon_variable.c \ + compiler/r3xx_fragprog.c \ + compiler/r300_fragprog.c \ + compiler/r300_fragprog_swizzle.c \ + compiler/r300_fragprog_emit.c \ + compiler/r500_fragprog.c \ + compiler/r500_fragprog_emit.c \ + compiler/r3xx_vertprog.c \ + compiler/r3xx_vertprog_dump.c \ + compiler/memory_pool.c \ + \ + $(TOP)/src/glsl/ralloc.c \ + $(TOP)/src/mesa/program/register_allocate.c -LIBRARY_INCLUDES = \ - -I$(TOP)/src/mesa/drivers/dri/r300/compiler \ - -I$(TOP)/include - -COMPILER_ARCHIVE = $(TOP)/src/mesa/drivers/dri/r300/compiler/libr300compiler.a -EXTRA_OBJECTS = \ - $(COMPILER_ARCHIVE) +LIBRARY_INCLUDES = \ + -I$(TOP)/include \ + -I$(TOP)/src/mesa \ + -I$(TOP)/src/glsl include ../../Makefile.template -.PHONY: $(COMPILER_ARCHIVE) -$(COMPILER_ARCHIVE): - $(MAKE) -C $(TOP)/src/mesa/drivers/dri/r300/compiler +test: default + @$(MAKE) -s -C compiler/tests/ diff --git a/src/gallium/drivers/r300/SConscript b/src/gallium/drivers/r300/SConscript index 3af157a7956..7ffd1c27c96 100644 --- a/src/gallium/drivers/r300/SConscript +++ b/src/gallium/drivers/r300/SConscript @@ -1,13 +1,11 @@ Import('*') -r300compiler = SConscript('#/src/mesa/drivers/dri/r300/compiler/SConscript') - env = env.Clone() -# add the paths for r300compiler env.Append(CPPPATH = [ - '#/src/mesa/drivers/dri/r300/compiler', '#/include', '#/src/mesa', + '#/src/glsl', + '#/src/mapi', ]) r300 = env.ConvenienceLibrary( @@ -36,7 +34,41 @@ r300 = env.ConvenienceLibrary( 'r300_texture_desc.c', 'r300_tgsi_to_rc.c', 'r300_transfer.c', - ] + r300compiler) + r300compiler + 'compiler/radeon_code.c', + 'compiler/radeon_compiler.c', + 'compiler/radeon_compiler_util.c', + 'compiler/radeon_program.c', + 'compiler/radeon_program_print.c', + 'compiler/radeon_opcodes.c', + 'compiler/radeon_program_alu.c', + 'compiler/radeon_program_pair.c', + 'compiler/radeon_program_tex.c', + 'compiler/radeon_pair_translate.c', + 'compiler/radeon_pair_schedule.c', + 'compiler/radeon_pair_regalloc.c', + 'compiler/radeon_pair_dead_sources.c', + 'compiler/radeon_optimize.c', + 'compiler/radeon_remove_constants.c', + 'compiler/radeon_rename_regs.c', + 'compiler/radeon_emulate_branches.c', + 'compiler/radeon_emulate_loops.c', + 'compiler/radeon_dataflow.c', + 'compiler/radeon_dataflow_deadcode.c', + 'compiler/radeon_dataflow_swizzles.c', + 'compiler/radeon_variable.c', + 'compiler/radeon_list.c', + 'compiler/r3xx_fragprog.c', + 'compiler/r300_fragprog.c', + 'compiler/r300_fragprog_swizzle.c', + 'compiler/r300_fragprog_emit.c', + 'compiler/r500_fragprog.c', + 'compiler/r500_fragprog_emit.c', + 'compiler/r3xx_vertprog.c', + 'compiler/r3xx_vertprog_dump.c', + 'compiler/memory_pool.c', + '#/src/glsl/ralloc.c', + '#/src/mesa/program/register_allocate.c' + ]) env.Alias('r300', r300) diff --git a/src/gallium/drivers/r300/compiler/memory_pool.c b/src/gallium/drivers/r300/compiler/memory_pool.c new file mode 100644 index 00000000000..ddcdddf9e3c --- /dev/null +++ b/src/gallium/drivers/r300/compiler/memory_pool.c @@ -0,0 +1,97 @@ +/* + * Copyright 2009 Nicolai Hähnle <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. */ + +#include "memory_pool.h" + +#include <assert.h> +#include <stdlib.h> +#include <string.h> + + +#define POOL_LARGE_ALLOC 4096 +#define POOL_ALIGN 8 + + +struct memory_block { + struct memory_block * next; +}; + +void memory_pool_init(struct memory_pool * pool) +{ + memset(pool, 0, sizeof(struct memory_pool)); +} + + +void memory_pool_destroy(struct memory_pool * pool) +{ + while(pool->blocks) { + struct memory_block * block = pool->blocks; + pool->blocks = block->next; + free(block); + } +} + +static void refill_pool(struct memory_pool * pool) +{ + unsigned int blocksize = pool->total_allocated; + struct memory_block * newblock; + + if (!blocksize) + blocksize = 2*POOL_LARGE_ALLOC; + + newblock = (struct memory_block*)malloc(blocksize); + newblock->next = pool->blocks; + pool->blocks = newblock; + + pool->head = (unsigned char*)(newblock + 1); + pool->end = ((unsigned char*)newblock) + blocksize; + pool->total_allocated += blocksize; +} + + +void * memory_pool_malloc(struct memory_pool * pool, unsigned int bytes) +{ + if (bytes < POOL_LARGE_ALLOC) { + void * ptr; + + if (pool->head + bytes > pool->end) + refill_pool(pool); + + assert(pool->head + bytes <= pool->end); + + ptr = pool->head; + + pool->head += bytes; + pool->head = (unsigned char*)(((unsigned long)pool->head + POOL_ALIGN - 1) & ~(POOL_ALIGN - 1)); + + return ptr; + } else { + struct memory_block * block = (struct memory_block*)malloc(bytes + sizeof(struct memory_block)); + + block->next = pool->blocks; + pool->blocks = block; + + return (block + 1); + } +} + + diff --git a/src/gallium/drivers/r300/compiler/memory_pool.h b/src/gallium/drivers/r300/compiler/memory_pool.h new file mode 100644 index 00000000000..42344d0e3ba --- /dev/null +++ b/src/gallium/drivers/r300/compiler/memory_pool.h @@ -0,0 +1,80 @@ +/* + * Copyright 2009 Nicolai Hähnle <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. */ + +#ifndef MEMORY_POOL_H +#define MEMORY_POOL_H + +struct memory_block; + +/** + * Provides a pool of memory that can quickly be allocated from, at the + * cost of being unable to explicitly free one of the allocated blocks. + * Instead, the entire pool can be freed at once. + * + * The idea is to allow one to quickly allocate a flexible amount of + * memory during operations like shader compilation while avoiding + * reference counting headaches. + */ +struct memory_pool { + unsigned char * head; + unsigned char * end; + unsigned int total_allocated; + struct memory_block * blocks; +}; + + +void memory_pool_init(struct memory_pool * pool); +void memory_pool_destroy(struct memory_pool * pool); +void * memory_pool_malloc(struct memory_pool * pool, unsigned int bytes); + + +/** + * Generic helper for growing an array that has separate size/count + * and reserved counters to accomodate up to num new element. + * + * type * Array; + * unsigned int Size; + * unsigned int Reserved; + * + * memory_pool_array_reserve(pool, type, Array, Size, Reserved, k); + * assert(Size + k < Reserved); + * + * \note Size is not changed by this macro. + * + * \warning Array, Size, Reserved have to be lvalues and may be evaluated + * several times. + */ +#define memory_pool_array_reserve(pool, type, array, size, reserved, num) do { \ + unsigned int _num = (num); \ + if ((size) + _num > (reserved)) { \ + unsigned int newreserve = (reserved) * 2; \ + type * newarray; \ + if (newreserve < _num) \ + newreserve = 4 * _num; /* arbitrary heuristic */ \ + newarray = memory_pool_malloc((pool), newreserve * sizeof(type)); \ + memcpy(newarray, (array), (size) * sizeof(type)); \ + (array) = newarray; \ + (reserved) = newreserve; \ + } \ +} while(0) + +#endif /* MEMORY_POOL_H */ diff --git a/src/gallium/drivers/r300/compiler/r300_fragprog.c b/src/gallium/drivers/r300/compiler/r300_fragprog.c new file mode 100644 index 00000000000..deba9ca834d --- /dev/null +++ b/src/gallium/drivers/r300/compiler/r300_fragprog.c @@ -0,0 +1,338 @@ +/* + * Copyright (C) 2005 Ben Skeggs. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "r300_fragprog.h" + +#include <stdio.h> + +#include "../r300_reg.h" + +static void presub_string(char out[10], unsigned int inst) +{ + switch(inst & 0x600000){ + case R300_ALU_SRCP_1_MINUS_2_SRC0: + sprintf(out, "bias"); + break; + case R300_ALU_SRCP_SRC1_MINUS_SRC0: + sprintf(out, "sub"); + break; + case R300_ALU_SRCP_SRC1_PLUS_SRC0: + sprintf(out, "add"); + break; + case R300_ALU_SRCP_1_MINUS_SRC0: + sprintf(out, "inv "); + break; + } +} + +static int get_msb(unsigned int bit, unsigned int r400_ext_addr) +{ + return (r400_ext_addr & bit) ? 1 << 5 : 0; +} + +/* just some random things... */ +void r300FragmentProgramDump(struct radeon_compiler *c, void *user) +{ + struct r300_fragment_program_compiler *compiler = (struct r300_fragment_program_compiler*)c; + struct r300_fragment_program_code *code = &compiler->code->code.r300; + int n, i, j; + static int pc = 0; + + fprintf(stderr, "pc=%d*************************************\n", pc++); + + fprintf(stderr, "Hardware program\n"); + fprintf(stderr, "----------------\n"); + if (c->is_r400) { + fprintf(stderr, "code_offset_ext: %08x\n", code->r400_code_offset_ext); + } + + for (n = 0; n <= (code->config & 3); n++) { + uint32_t code_addr = code->code_addr[3 - (code->config & 3) + n]; + unsigned int alu_offset = ((code_addr & R300_ALU_START_MASK) >> R300_ALU_START_SHIFT) + + (((code->r400_code_offset_ext >> (24 - (n * 6))) & 0x7) << 6); + unsigned int alu_end = ((code_addr & R300_ALU_SIZE_MASK) >> R300_ALU_SIZE_SHIFT) + + (((code->r400_code_offset_ext >> (27 - (n * 6))) & 0x7) << 6); + int tex_offset = (code_addr & R300_TEX_START_MASK) >> R300_TEX_START_SHIFT; + int tex_end = (code_addr & R300_TEX_SIZE_MASK) >> R300_TEX_SIZE_SHIFT; + + fprintf(stderr, "NODE %d: alu_offset: %u, tex_offset: %d, " + "alu_end: %u, tex_end: %d (code_addr: %08x)\n", n, + alu_offset, tex_offset, alu_end, tex_end, code_addr); + + if (n > 0 || (code->config & R300_PFS_CNTL_FIRST_NODE_HAS_TEX)) { + fprintf(stderr, " TEX:\n"); + for (i = tex_offset; + i <= tex_offset + tex_end; + ++i) { + const char *instr; + + switch ((code->tex. + inst[i] >> R300_TEX_INST_SHIFT) & + 15) { + case R300_TEX_OP_LD: + instr = "TEX"; + break; + case R300_TEX_OP_KIL: + instr = "KIL"; + break; + case R300_TEX_OP_TXP: + instr = "TXP"; + break; + case R300_TEX_OP_TXB: + instr = "TXB"; + break; + default: + instr = "UNKNOWN"; + } + + fprintf(stderr, + " %s t%i, %c%i, texture[%i] (%08x)\n", + instr, + (code->tex. + inst[i] >> R300_DST_ADDR_SHIFT) & 31, + 't', + (code->tex. + inst[i] >> R300_SRC_ADDR_SHIFT) & 31, + (code->tex. + inst[i] & R300_TEX_ID_MASK) >> + R300_TEX_ID_SHIFT, + code->tex.inst[i]); + } + } + + for (i = alu_offset; + i <= alu_offset + alu_end; ++i) { + char srcc[4][10], dstc[20]; + char srca[4][10], dsta[20]; + char argc[3][20]; + char arga[3][20]; + char flags[5], tmp[10]; + + for (j = 0; j < 3; ++j) { + int regc = code->alu.inst[i].rgb_addr >> (j * 6); + int rega = code->alu.inst[i].alpha_addr >> (j * 6); + int msbc = get_msb(R400_ADDR_EXT_RGB_MSB_BIT(j), + code->alu.inst[i].r400_ext_addr); + int msba = get_msb(R400_ADDR_EXT_A_MSB_BIT(j), + code->alu.inst[i].r400_ext_addr); + + sprintf(srcc[j], "%c%i", + (regc & 32) ? 'c' : 't', (regc & 31) | msbc); + sprintf(srca[j], "%c%i", + (rega & 32) ? 'c' : 't', (rega & 31) | msba); + } + + dstc[0] = 0; + sprintf(flags, "%s%s%s", + (code->alu.inst[i]. + rgb_addr & R300_ALU_DSTC_REG_X) ? "x" : "", + (code->alu.inst[i]. + rgb_addr & R300_ALU_DSTC_REG_Y) ? "y" : "", + (code->alu.inst[i]. + rgb_addr & R300_ALU_DSTC_REG_Z) ? "z" : ""); + if (flags[0] != 0) { + unsigned int msb = get_msb( + R400_ADDRD_EXT_RGB_MSB_BIT, + code->alu.inst[i].r400_ext_addr); + + sprintf(dstc, "t%i.%s ", + ((code->alu.inst[i]. + rgb_addr >> R300_ALU_DSTC_SHIFT) + & 31) | msb, + flags); + } + sprintf(flags, "%s%s%s", + (code->alu.inst[i]. + rgb_addr & R300_ALU_DSTC_OUTPUT_X) ? "x" : "", + (code->alu.inst[i]. + rgb_addr & R300_ALU_DSTC_OUTPUT_Y) ? "y" : "", + (code->alu.inst[i]. + rgb_addr & R300_ALU_DSTC_OUTPUT_Z) ? "z" : ""); + if (flags[0] != 0) { + sprintf(tmp, "o%i.%s", + (code->alu.inst[i]. + rgb_addr >> 29) & 3, + flags); + strcat(dstc, tmp); + } + /* Presub */ + presub_string(srcc[3], code->alu.inst[i].rgb_inst); + presub_string(srca[3], code->alu.inst[i].alpha_inst); + + dsta[0] = 0; + if (code->alu.inst[i].alpha_addr & R300_ALU_DSTA_REG) { + unsigned int msb = get_msb( + R400_ADDRD_EXT_A_MSB_BIT, + code->alu.inst[i].r400_ext_addr); + sprintf(dsta, "t%i.w ", + ((code->alu.inst[i]. + alpha_addr >> R300_ALU_DSTA_SHIFT) & 31) + | msb); + } + if (code->alu.inst[i].alpha_addr & R300_ALU_DSTA_OUTPUT) { + sprintf(tmp, "o%i.w ", + (code->alu.inst[i]. + alpha_addr >> 25) & 3); + strcat(dsta, tmp); + } + if (code->alu.inst[i].alpha_addr & R300_ALU_DSTA_DEPTH) { + strcat(dsta, "Z"); + } + + fprintf(stderr, + "%3i: xyz: %3s %3s %3s %5s-> %-20s (%08x)\n" + " w: %3s %3s %3s %5s-> %-20s (%08x)\n", i, + srcc[0], srcc[1], srcc[2], srcc[3], dstc, + code->alu.inst[i].rgb_addr, srca[0], srca[1], + srca[2], srca[3], dsta, + code->alu.inst[i].alpha_addr); + + for (j = 0; j < 3; ++j) { + int regc = code->alu.inst[i].rgb_inst >> (j * 7); + int rega = code->alu.inst[i].alpha_inst >> (j * 7); + int d; + char buf[20]; + + d = regc & 31; + if (d < 12) { + switch (d % 4) { + case R300_ALU_ARGC_SRC0C_XYZ: + sprintf(buf, "%s.xyz", + srcc[d / 4]); + break; + case R300_ALU_ARGC_SRC0C_XXX: + sprintf(buf, "%s.xxx", + srcc[d / 4]); + break; + case R300_ALU_ARGC_SRC0C_YYY: + sprintf(buf, "%s.yyy", + srcc[d / 4]); + break; + case R300_ALU_ARGC_SRC0C_ZZZ: + sprintf(buf, "%s.zzz", + srcc[d / 4]); + break; + } + } else if (d < 15) { + sprintf(buf, "%s.www", srca[d - 12]); + } else if (d < 20 ) { + switch(d) { + case R300_ALU_ARGC_SRCP_XYZ: + sprintf(buf, "srcp.xyz"); + break; + case R300_ALU_ARGC_SRCP_XXX: + sprintf(buf, "srcp.xxx"); + break; + case R300_ALU_ARGC_SRCP_YYY: + sprintf(buf, "srcp.yyy"); + break; + case R300_ALU_ARGC_SRCP_ZZZ: + sprintf(buf, "srcp.zzz"); + break; + case R300_ALU_ARGC_SRCP_WWW: + sprintf(buf, "srcp.www"); + break; + } + } else if (d == 20) { + sprintf(buf, "0.0"); + } else if (d == 21) { + sprintf(buf, "1.0"); + } else if (d == 22) { + sprintf(buf, "0.5"); + } else if (d >= 23 && d < 32) { + d -= 23; + switch (d / 3) { + case 0: + sprintf(buf, "%s.yzx", + srcc[d % 3]); + break; + case 1: + sprintf(buf, "%s.zxy", + srcc[d % 3]); + break; + case 2: + sprintf(buf, "%s.Wzy", + srcc[d % 3]); + break; + } + } else { + sprintf(buf, "%i", d); + } + + sprintf(argc[j], "%s%s%s%s", + (regc & 32) ? "-" : "", + (regc & 64) ? "|" : "", + buf, (regc & 64) ? "|" : ""); + + d = rega & 31; + if (d < 9) { + sprintf(buf, "%s.%c", srcc[d / 3], + 'x' + (char)(d % 3)); + } else if (d < 12) { + sprintf(buf, "%s.w", srca[d - 9]); + } else if (d < 16) { + switch(d) { + case R300_ALU_ARGA_SRCP_X: + sprintf(buf, "srcp.x"); + break; + case R300_ALU_ARGA_SRCP_Y: + sprintf(buf, "srcp.y"); + break; + case R300_ALU_ARGA_SRCP_Z: + sprintf(buf, "srcp.z"); + break; + case R300_ALU_ARGA_SRCP_W: + sprintf(buf, "srcp.w"); + break; + } + } else if (d == 16) { + sprintf(buf, "0.0"); + } else if (d == 17) { + sprintf(buf, "1.0"); + } else if (d == 18) { + sprintf(buf, "0.5"); + } else { + sprintf(buf, "%i", d); + } + + sprintf(arga[j], "%s%s%s%s", + (rega & 32) ? "-" : "", + (rega & 64) ? "|" : "", + buf, (rega & 64) ? "|" : ""); + } + + fprintf(stderr, " xyz: %8s %8s %8s op: %08x %s\n" + " w: %8s %8s %8s op: %08x\n", + argc[0], argc[1], argc[2], + code->alu.inst[i].rgb_inst, + code->alu.inst[i].rgb_inst & R300_ALU_INSERT_NOP ? + "NOP" : "", + arga[0], arga[1],arga[2], + code->alu.inst[i].alpha_inst); + } + } +} diff --git a/src/gallium/drivers/r300/compiler/r300_fragprog.h b/src/gallium/drivers/r300/compiler/r300_fragprog.h new file mode 100644 index 00000000000..0c88bab2f33 --- /dev/null +++ b/src/gallium/drivers/r300/compiler/r300_fragprog.h @@ -0,0 +1,44 @@ +/* + * Copyright (C) 2005 Ben Skeggs. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +/* + * Authors: + * Ben Skeggs <[email protected]> + * Jerome Glisse <[email protected]> + */ +#ifndef __R300_FRAGPROG_H_ +#define __R300_FRAGPROG_H_ + +#include "radeon_compiler.h" +#include "radeon_program.h" + + +extern void r300BuildFragmentProgramHwCode(struct radeon_compiler *c, void *user); + +extern void r300FragmentProgramDump(struct radeon_compiler *c, void *user); + +#endif diff --git a/src/gallium/drivers/r300/compiler/r300_fragprog_emit.c b/src/gallium/drivers/r300/compiler/r300_fragprog_emit.c new file mode 100644 index 00000000000..e6fd1fde62d --- /dev/null +++ b/src/gallium/drivers/r300/compiler/r300_fragprog_emit.c @@ -0,0 +1,536 @@ +/* + * Copyright (C) 2005 Ben Skeggs. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +/** + * \file + * + * Emit the r300_fragment_program_code that can be understood by the hardware. + * Input is a pre-transformed radeon_program. + * + * \author Ben Skeggs <[email protected]> + * + * \author Jerome Glisse <[email protected]> + */ + +#include "r300_fragprog.h" + +#include "../r300_reg.h" + +#include "radeon_program_pair.h" +#include "r300_fragprog_swizzle.h" + + +struct r300_emit_state { + struct r300_fragment_program_compiler * compiler; + + unsigned current_node : 2; + unsigned node_first_tex : 8; + unsigned node_first_alu : 8; + uint32_t node_flags; +}; + +#define PROG_CODE \ + struct r300_fragment_program_compiler *c = emit->compiler; \ + struct r300_fragment_program_code *code = &c->code->code.r300 + +#define error(fmt, args...) do { \ + rc_error(&c->Base, "%s::%s(): " fmt "\n", \ + __FILE__, __FUNCTION__, ##args); \ + } while(0) + +static unsigned int get_msbs_alu(unsigned int bits) +{ + return (bits >> 6) & 0x7; +} + +/** + * @param lsbs The number of least significant bits + */ +static unsigned int get_msbs_tex(unsigned int bits, unsigned int lsbs) +{ + return (bits >> lsbs) & 0x15; +} + +#define R400_EXT_GET_MSBS(x, lsbs, mask) (((x) >> lsbs) & mask) + +/** + * Mark a temporary register as used. + */ +static void use_temporary(struct r300_fragment_program_code *code, unsigned int index) +{ + if (index > code->pixsize) + code->pixsize = index; +} + +static unsigned int use_source(struct r300_fragment_program_code* code, struct rc_pair_instruction_source src) +{ + if (!src.Used) + return 0; + + if (src.File == RC_FILE_CONSTANT) { + return src.Index | (1 << 5); + } else if (src.File == RC_FILE_TEMPORARY || src.File == RC_FILE_INPUT) { + use_temporary(code, src.Index); + return src.Index & 0x1f; + } + + return 0; +} + + +static unsigned int translate_rgb_opcode(struct r300_fragment_program_compiler * c, rc_opcode opcode) +{ + switch(opcode) { + case RC_OPCODE_CMP: return R300_ALU_OUTC_CMP; + case RC_OPCODE_CND: return R300_ALU_OUTC_CND; + case RC_OPCODE_DP3: return R300_ALU_OUTC_DP3; + case RC_OPCODE_DP4: return R300_ALU_OUTC_DP4; + case RC_OPCODE_FRC: return R300_ALU_OUTC_FRC; + default: + error("translate_rgb_opcode: Unknown opcode %s", rc_get_opcode_info(opcode)->Name); + /* fall through */ + case RC_OPCODE_NOP: + /* fall through */ + case RC_OPCODE_MAD: return R300_ALU_OUTC_MAD; + case RC_OPCODE_MAX: return R300_ALU_OUTC_MAX; + case RC_OPCODE_MIN: return R300_ALU_OUTC_MIN; + case RC_OPCODE_REPL_ALPHA: return R300_ALU_OUTC_REPL_ALPHA; + } +} + +static unsigned int translate_alpha_opcode(struct r300_fragment_program_compiler * c, rc_opcode opcode) +{ + switch(opcode) { + case RC_OPCODE_CMP: return R300_ALU_OUTA_CMP; + case RC_OPCODE_CND: return R300_ALU_OUTA_CND; + case RC_OPCODE_DP3: return R300_ALU_OUTA_DP4; + case RC_OPCODE_DP4: return R300_ALU_OUTA_DP4; + case RC_OPCODE_EX2: return R300_ALU_OUTA_EX2; + case RC_OPCODE_FRC: return R300_ALU_OUTA_FRC; + case RC_OPCODE_LG2: return R300_ALU_OUTA_LG2; + default: + error("translate_rgb_opcode: Unknown opcode %s", rc_get_opcode_info(opcode)->Name); + /* fall through */ + case RC_OPCODE_NOP: + /* fall through */ + case RC_OPCODE_MAD: return R300_ALU_OUTA_MAD; + case RC_OPCODE_MAX: return R300_ALU_OUTA_MAX; + case RC_OPCODE_MIN: return R300_ALU_OUTA_MIN; + case RC_OPCODE_RCP: return R300_ALU_OUTA_RCP; + case RC_OPCODE_RSQ: return R300_ALU_OUTA_RSQ; + } +} + +/** + * Emit one paired ALU instruction. + */ +static int emit_alu(struct r300_emit_state * emit, struct rc_pair_instruction* inst) +{ + int ip; + int j; + PROG_CODE; + + if (code->alu.length >= c->Base.max_alu_insts) { + error("Too many ALU instructions"); + return 0; + } + + ip = code->alu.length++; + + code->alu.inst[ip].rgb_inst = translate_rgb_opcode(c, inst->RGB.Opcode); + code->alu.inst[ip].alpha_inst = translate_alpha_opcode(c, inst->Alpha.Opcode); + + for(j = 0; j < 3; ++j) { + /* Set the RGB address */ + unsigned int src = use_source(code, inst->RGB.Src[j]); + unsigned int arg; + if (inst->RGB.Src[j].Index >= R300_PFS_NUM_TEMP_REGS) + code->alu.inst[ip].r400_ext_addr |= R400_ADDR_EXT_RGB_MSB_BIT(j); + + code->alu.inst[ip].rgb_addr |= src << (6*j); + + /* Set the Alpha address */ + src = use_source(code, inst->Alpha.Src[j]); + if (inst->Alpha.Src[j].Index >= R300_PFS_NUM_TEMP_REGS) + code->alu.inst[ip].r400_ext_addr |= R400_ADDR_EXT_A_MSB_BIT(j); + + code->alu.inst[ip].alpha_addr |= src << (6*j); + + arg = r300FPTranslateRGBSwizzle(inst->RGB.Arg[j].Source, inst->RGB.Arg[j].Swizzle); + arg |= inst->RGB.Arg[j].Abs << 6; + arg |= inst->RGB.Arg[j].Negate << 5; + code->alu.inst[ip].rgb_inst |= arg << (7*j); + + arg = r300FPTranslateAlphaSwizzle(inst->Alpha.Arg[j].Source, inst->Alpha.Arg[j].Swizzle); + arg |= inst->Alpha.Arg[j].Abs << 6; + arg |= inst->Alpha.Arg[j].Negate << 5; + code->alu.inst[ip].alpha_inst |= arg << (7*j); + } + + /* Presubtract */ + if (inst->RGB.Src[RC_PAIR_PRESUB_SRC].Used) { + switch(inst->RGB.Src[RC_PAIR_PRESUB_SRC].Index) { + case RC_PRESUB_BIAS: + code->alu.inst[ip].rgb_inst |= + R300_ALU_SRCP_1_MINUS_2_SRC0; + break; + case RC_PRESUB_ADD: + code->alu.inst[ip].rgb_inst |= + R300_ALU_SRCP_SRC1_PLUS_SRC0; + break; + case RC_PRESUB_SUB: + code->alu.inst[ip].rgb_inst |= + R300_ALU_SRCP_SRC1_MINUS_SRC0; + break; + case RC_PRESUB_INV: + code->alu.inst[ip].rgb_inst |= + R300_ALU_SRCP_1_MINUS_SRC0; + break; + default: + break; + } + } + + if (inst->Alpha.Src[RC_PAIR_PRESUB_SRC].Used) { + switch(inst->Alpha.Src[RC_PAIR_PRESUB_SRC].Index) { + case RC_PRESUB_BIAS: + code->alu.inst[ip].alpha_inst |= + R300_ALU_SRCP_1_MINUS_2_SRC0; + break; + case RC_PRESUB_ADD: + code->alu.inst[ip].alpha_inst |= + R300_ALU_SRCP_SRC1_PLUS_SRC0; + break; + case RC_PRESUB_SUB: + code->alu.inst[ip].alpha_inst |= + R300_ALU_SRCP_SRC1_MINUS_SRC0; + break; + case RC_PRESUB_INV: + code->alu.inst[ip].alpha_inst |= + R300_ALU_SRCP_1_MINUS_SRC0; + break; + default: + break; + } + } + + if (inst->RGB.Saturate) + code->alu.inst[ip].rgb_inst |= R300_ALU_OUTC_CLAMP; + if (inst->Alpha.Saturate) + code->alu.inst[ip].alpha_inst |= R300_ALU_OUTA_CLAMP; + + if (inst->RGB.WriteMask) { + use_temporary(code, inst->RGB.DestIndex); + if (inst->RGB.DestIndex >= R300_PFS_NUM_TEMP_REGS) + code->alu.inst[ip].r400_ext_addr |= R400_ADDRD_EXT_RGB_MSB_BIT; + code->alu.inst[ip].rgb_addr |= + ((inst->RGB.DestIndex & 0x1f) << R300_ALU_DSTC_SHIFT) | + (inst->RGB.WriteMask << R300_ALU_DSTC_REG_MASK_SHIFT); + } + if (inst->RGB.OutputWriteMask) { + code->alu.inst[ip].rgb_addr |= + (inst->RGB.OutputWriteMask << R300_ALU_DSTC_OUTPUT_MASK_SHIFT) | + R300_RGB_TARGET(inst->RGB.Target); + emit->node_flags |= R300_RGBA_OUT; + } + + if (inst->Alpha.WriteMask) { + use_temporary(code, inst->Alpha.DestIndex); + if (inst->Alpha.DestIndex >= R300_PFS_NUM_TEMP_REGS) + code->alu.inst[ip].r400_ext_addr |= R400_ADDRD_EXT_A_MSB_BIT; + code->alu.inst[ip].alpha_addr |= + ((inst->Alpha.DestIndex & 0x1f) << R300_ALU_DSTA_SHIFT) | + R300_ALU_DSTA_REG; + } + if (inst->Alpha.OutputWriteMask) { + code->alu.inst[ip].alpha_addr |= R300_ALU_DSTA_OUTPUT | + R300_ALPHA_TARGET(inst->Alpha.Target); + emit->node_flags |= R300_RGBA_OUT; + } + if (inst->Alpha.DepthWriteMask) { + code->alu.inst[ip].alpha_addr |= R300_ALU_DSTA_DEPTH; + emit->node_flags |= R300_W_OUT; + c->code->writes_depth = 1; + } + if (inst->Nop) + code->alu.inst[ip].rgb_inst |= R300_ALU_INSERT_NOP; + + return 1; +} + + +/** + * Finish the current node without advancing to the next one. + */ +static int finish_node(struct r300_emit_state * emit) +{ + struct r300_fragment_program_compiler * c = emit->compiler; + struct r300_fragment_program_code *code = &emit->compiler->code->code.r300; + unsigned alu_offset; + unsigned alu_end; + unsigned tex_offset; + unsigned tex_end; + + unsigned int alu_offset_msbs, alu_end_msbs; + + if (code->alu.length == emit->node_first_alu) { + /* Generate a single NOP for this node */ + struct rc_pair_instruction inst; + memset(&inst, 0, sizeof(inst)); + if (!emit_alu(emit, &inst)) + return 0; + } + + alu_offset = emit->node_first_alu; + alu_end = code->alu.length - alu_offset - 1; + tex_offset = emit->node_first_tex; + tex_end = code->tex.length - tex_offset - 1; + + if (code->tex.length == emit->node_first_tex) { + if (emit->current_node > 0) { + error("Node %i has no TEX instructions", emit->current_node); + return 0; + } + + tex_end = 0; + } else { + if (emit->current_node == 0) + code->config |= R300_PFS_CNTL_FIRST_NODE_HAS_TEX; + } + + /* Write the config register. + * Note: The order in which the words for each node are written + * is not correct here and needs to be fixed up once we're entirely + * done + * + * Also note that the register specification from AMD is slightly + * incorrect in its description of this register. */ + code->code_addr[emit->current_node] = + ((alu_offset << R300_ALU_START_SHIFT) + & R300_ALU_START_MASK) + | ((alu_end << R300_ALU_SIZE_SHIFT) + & R300_ALU_SIZE_MASK) + | ((tex_offset << R300_TEX_START_SHIFT) + & R300_TEX_START_MASK) + | ((tex_end << R300_TEX_SIZE_SHIFT) + & R300_TEX_SIZE_MASK) + | emit->node_flags + | (get_msbs_tex(tex_offset, 5) + << R400_TEX_START_MSB_SHIFT) + | (get_msbs_tex(tex_end, 5) + << R400_TEX_SIZE_MSB_SHIFT) + ; + + /* Write r400 extended instruction fields. These will be ignored on + * r300 cards. */ + alu_offset_msbs = get_msbs_alu(alu_offset); + alu_end_msbs = get_msbs_alu(alu_end); + switch(emit->current_node) { + case 0: + code->r400_code_offset_ext |= + alu_offset_msbs << R400_ALU_START3_MSB_SHIFT + | alu_end_msbs << R400_ALU_SIZE3_MSB_SHIFT; + break; + case 1: + code->r400_code_offset_ext |= + alu_offset_msbs << R400_ALU_START2_MSB_SHIFT + | alu_end_msbs << R400_ALU_SIZE2_MSB_SHIFT; + break; + case 2: + code->r400_code_offset_ext |= + alu_offset_msbs << R400_ALU_START1_MSB_SHIFT + | alu_end_msbs << R400_ALU_SIZE1_MSB_SHIFT; + break; + case 3: + code->r400_code_offset_ext |= + alu_offset_msbs << R400_ALU_START0_MSB_SHIFT + | alu_end_msbs << R400_ALU_SIZE0_MSB_SHIFT; + break; + } + return 1; +} + + +/** + * Begin a block of texture instructions. + * Create the necessary indirection. + */ +static int begin_tex(struct r300_emit_state * emit) +{ + PROG_CODE; + + if (code->alu.length == emit->node_first_alu && + code->tex.length == emit->node_first_tex) { + return 1; + } + + if (emit->current_node == 3) { + error("Too many texture indirections"); + return 0; + } + + if (!finish_node(emit)) + return 0; + + emit->current_node++; + emit->node_first_tex = code->tex.length; + emit->node_first_alu = code->alu.length; + emit->node_flags = 0; + return 1; +} + + +static int emit_tex(struct r300_emit_state * emit, struct rc_instruction * inst) +{ + unsigned int unit; + unsigned int dest; + unsigned int opcode; + PROG_CODE; + + if (code->tex.length >= emit->compiler->Base.max_tex_insts) { + error("Too many TEX instructions"); + return 0; + } + + unit = inst->U.I.TexSrcUnit; + dest = inst->U.I.DstReg.Index; + + switch(inst->U.I.Opcode) { + case RC_OPCODE_KIL: opcode = R300_TEX_OP_KIL; break; + case RC_OPCODE_TEX: opcode = R300_TEX_OP_LD; break; + case RC_OPCODE_TXB: opcode = R300_TEX_OP_TXB; break; + case RC_OPCODE_TXP: opcode = R300_TEX_OP_TXP; break; + default: + error("Unknown texture opcode %s", rc_get_opcode_info(inst->U.I.Opcode)->Name); + return 0; + } + + if (inst->U.I.Opcode == RC_OPCODE_KIL) { + unit = 0; + dest = 0; + } else { + use_temporary(code, dest); + } + + use_temporary(code, inst->U.I.SrcReg[0].Index); + + code->tex.inst[code->tex.length++] = + ((inst->U.I.SrcReg[0].Index << R300_SRC_ADDR_SHIFT) + & R300_SRC_ADDR_MASK) + | ((dest << R300_DST_ADDR_SHIFT) + & R300_DST_ADDR_MASK) + | (unit << R300_TEX_ID_SHIFT) + | (opcode << R300_TEX_INST_SHIFT) + | (inst->U.I.SrcReg[0].Index >= R300_PFS_NUM_TEMP_REGS ? + R400_SRC_ADDR_EXT_BIT : 0) + | (dest >= R300_PFS_NUM_TEMP_REGS ? + R400_DST_ADDR_EXT_BIT : 0) + ; + return 1; +} + + +/** + * Final compilation step: Turn the intermediate radeon_program into + * machine-readable instructions. + */ +void r300BuildFragmentProgramHwCode(struct radeon_compiler *c, void *user) +{ + struct r300_fragment_program_compiler *compiler = (struct r300_fragment_program_compiler*)c; + struct r300_emit_state emit; + struct r300_fragment_program_code *code = &compiler->code->code.r300; + unsigned int tex_end; + + memset(&emit, 0, sizeof(emit)); + emit.compiler = compiler; + + memset(code, 0, sizeof(struct r300_fragment_program_code)); + + for(struct rc_instruction * inst = compiler->Base.Program.Instructions.Next; + inst != &compiler->Base.Program.Instructions && !compiler->Base.Error; + inst = inst->Next) { + if (inst->Type == RC_INSTRUCTION_NORMAL) { + if (inst->U.I.Opcode == RC_OPCODE_BEGIN_TEX) { + begin_tex(&emit); + continue; + } + + emit_tex(&emit, inst); + } else { + emit_alu(&emit, &inst->U.P); + } + } + + if (code->pixsize >= compiler->Base.max_temp_regs) + rc_error(&compiler->Base, "Too many hardware temporaries used.\n"); + + if (compiler->Base.Error) + return; + + /* Finish the program */ + finish_node(&emit); + + code->config |= emit.current_node; /* FIRST_NODE_HAS_TEX set by finish_node */ + + /* Set r400 extended instruction fields. These values will be ignored + * on r300 cards. */ + code->r400_code_offset_ext |= + (get_msbs_alu(0) + << R400_ALU_OFFSET_MSB_SHIFT) + | (get_msbs_alu(code->alu.length - 1) + << R400_ALU_SIZE_MSB_SHIFT); + + tex_end = code->tex.length ? code->tex.length - 1 : 0; + code->code_offset = + ((0 << R300_PFS_CNTL_ALU_OFFSET_SHIFT) + & R300_PFS_CNTL_ALU_OFFSET_MASK) + | (((code->alu.length - 1) << R300_PFS_CNTL_ALU_END_SHIFT) + & R300_PFS_CNTL_ALU_END_MASK) + | ((0 << R300_PFS_CNTL_TEX_OFFSET_SHIFT) + & R300_PFS_CNTL_TEX_OFFSET_MASK) + | ((tex_end << R300_PFS_CNTL_TEX_END_SHIFT) + & R300_PFS_CNTL_TEX_END_MASK) + | (get_msbs_tex(0, 5) << R400_TEX_START_MSB_SHIFT) + | (get_msbs_tex(tex_end, 6) << R400_TEX_SIZE_MSB_SHIFT) + ; + + if (emit.current_node < 3) { + int shift = 3 - emit.current_node; + int i; + for(i = emit.current_node; i >= 0; --i) + code->code_addr[shift + i] = code->code_addr[i]; + for(i = 0; i < shift; ++i) + code->code_addr[i] = 0; + } + + if (code->pixsize >= R300_PFS_NUM_TEMP_REGS + || code->alu.length > R300_PFS_MAX_ALU_INST + || code->tex.length > R300_PFS_MAX_TEX_INST) { + + code->r390_mode = 1; + } +} diff --git a/src/gallium/drivers/r300/compiler/r300_fragprog_swizzle.c b/src/gallium/drivers/r300/compiler/r300_fragprog_swizzle.c new file mode 100644 index 00000000000..b7bca8c0cfa --- /dev/null +++ b/src/gallium/drivers/r300/compiler/r300_fragprog_swizzle.c @@ -0,0 +1,243 @@ +/* + * Copyright (C) 2008 Nicolai Haehnle. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +/** + * @file + * Utilities to deal with the somewhat odd restriction on R300 fragment + * program swizzles. + */ + +#include "r300_fragprog_swizzle.h" + +#include <stdio.h> + +#include "../r300_reg.h" +#include "radeon_compiler.h" + +#define MAKE_SWZ3(x, y, z) (RC_MAKE_SWIZZLE(RC_SWIZZLE_##x, RC_SWIZZLE_##y, RC_SWIZZLE_##z, RC_SWIZZLE_ZERO)) + +struct swizzle_data { + unsigned int hash; /**< swizzle value this matches */ + unsigned int base; /**< base value for hw swizzle */ + unsigned int stride; /**< difference in base between arg0/1/2 */ + unsigned int srcp_stride; /**< difference in base between arg0/scrp */ +}; + +static const struct swizzle_data native_swizzles[] = { + {MAKE_SWZ3(X, Y, Z), R300_ALU_ARGC_SRC0C_XYZ, 4, 15}, + {MAKE_SWZ3(X, X, X), R300_ALU_ARGC_SRC0C_XXX, 4, 15}, + {MAKE_SWZ3(Y, Y, Y), R300_ALU_ARGC_SRC0C_YYY, 4, 15}, + {MAKE_SWZ3(Z, Z, Z), R300_ALU_ARGC_SRC0C_ZZZ, 4, 15}, + {MAKE_SWZ3(W, W, W), R300_ALU_ARGC_SRC0A, 1, 7}, + {MAKE_SWZ3(Y, Z, X), R300_ALU_ARGC_SRC0C_YZX, 1, 0}, + {MAKE_SWZ3(Z, X, Y), R300_ALU_ARGC_SRC0C_ZXY, 1, 0}, + {MAKE_SWZ3(W, Z, Y), R300_ALU_ARGC_SRC0CA_WZY, 1, 0}, + {MAKE_SWZ3(ONE, ONE, ONE), R300_ALU_ARGC_ONE, 0, 0}, + {MAKE_SWZ3(ZERO, ZERO, ZERO), R300_ALU_ARGC_ZERO, 0, 0}, + {MAKE_SWZ3(HALF, HALF, HALF), R300_ALU_ARGC_HALF, 0, 0} +}; + +static const int num_native_swizzles = sizeof(native_swizzles)/sizeof(native_swizzles[0]); + +/** + * Find a native RGB swizzle that matches the given swizzle. + * Returns 0 if none found. + */ +static const struct swizzle_data* lookup_native_swizzle(unsigned int swizzle) +{ + int i, comp; + + for(i = 0; i < num_native_swizzles; ++i) { + const struct swizzle_data* sd = &native_swizzles[i]; + for(comp = 0; comp < 3; ++comp) { + unsigned int swz = GET_SWZ(swizzle, comp); + if (swz == RC_SWIZZLE_UNUSED) + continue; + if (swz != GET_SWZ(sd->hash, comp)) + break; + } + if (comp == 3) + return sd; + } + + return 0; +} + +/** + * Determines if the given swizzle is valid for r300/r400. In most situations + * it is better to use r300_swizzle_is_native() which can be accesed via + * struct radeon_compiler *c; c->SwizzleCaps->IsNative(). + */ +int r300_swizzle_is_native_basic(unsigned int swizzle) +{ + if(lookup_native_swizzle(swizzle)) + return 1; + else + return 0; +} + +/** + * Check whether the given instruction supports the swizzle and negate + * combinations in the given source register. + */ +static int r300_swizzle_is_native(rc_opcode opcode, struct rc_src_register reg) +{ + const struct swizzle_data* sd; + unsigned int relevant; + int j; + + if (opcode == RC_OPCODE_KIL || + opcode == RC_OPCODE_TEX || + opcode == RC_OPCODE_TXB || + opcode == RC_OPCODE_TXP) { + if (reg.Abs || reg.Negate) + return 0; + + for(j = 0; j < 4; ++j) { + unsigned int swz = GET_SWZ(reg.Swizzle, j); + if (swz == RC_SWIZZLE_UNUSED) + continue; + if (swz != j) + return 0; + } + + return 1; + } + + relevant = 0; + + for(j = 0; j < 3; ++j) + if (GET_SWZ(reg.Swizzle, j) != RC_SWIZZLE_UNUSED) + relevant |= 1 << j; + + if ((reg.Negate & relevant) && ((reg.Negate & relevant) != relevant)) + return 0; + + sd = lookup_native_swizzle(reg.Swizzle); + if (!sd || (reg.File == RC_FILE_PRESUB && sd->srcp_stride == 0)) + return 0; + + return 1; +} + + +static void r300_swizzle_split( + struct rc_src_register src, unsigned int mask, + struct rc_swizzle_split * split) +{ + split->NumPhases = 0; + + while(mask) { + unsigned int best_matchcount = 0; + unsigned int best_matchmask = 0; + int i, comp; + + for(i = 0; i < num_native_swizzles; ++i) { + const struct swizzle_data *sd = &native_swizzles[i]; + unsigned int matchcount = 0; + unsigned int matchmask = 0; + for(comp = 0; comp < 3; ++comp) { + unsigned int swz; + if (!GET_BIT(mask, comp)) + continue; + swz = GET_SWZ(src.Swizzle, comp); + if (swz == RC_SWIZZLE_UNUSED) + continue; + if (swz == GET_SWZ(sd->hash, comp)) { + /* check if the negate bit of current component + * is the same for already matched components */ + if (matchmask && (!!(src.Negate & matchmask) != !!(src.Negate & (1 << comp)))) + continue; + + matchcount++; + matchmask |= 1 << comp; + } + } + if (matchcount > best_matchcount) { + best_matchcount = matchcount; + best_matchmask = matchmask; + if (matchmask == (mask & RC_MASK_XYZ)) + break; + } + } + + if (mask & RC_MASK_W) + best_matchmask |= RC_MASK_W; + + split->Phase[split->NumPhases++] = best_matchmask; + mask &= ~best_matchmask; + } +} + +struct rc_swizzle_caps r300_swizzle_caps = { + .IsNative = r300_swizzle_is_native, + .Split = r300_swizzle_split +}; + + +/** + * Translate an RGB (XYZ) swizzle into the hardware code for the given + * instruction source. + */ +unsigned int r300FPTranslateRGBSwizzle(unsigned int src, unsigned int swizzle) +{ + const struct swizzle_data* sd = lookup_native_swizzle(swizzle); + + if (!sd || (src == RC_PAIR_PRESUB_SRC && sd->srcp_stride == 0)) { + fprintf(stderr, "Not a native swizzle: %08x\n", swizzle); + return 0; + } + + if (src == RC_PAIR_PRESUB_SRC) { + return sd->base + sd->srcp_stride; + } else { + return sd->base + src*sd->stride; + } +} + + +/** + * Translate an Alpha (W) swizzle into the hardware code for the given + * instruction source. + */ +unsigned int r300FPTranslateAlphaSwizzle(unsigned int src, unsigned int swizzle) +{ + unsigned int swz = GET_SWZ(swizzle, 0); + if (src == RC_PAIR_PRESUB_SRC) { + return R300_ALU_ARGA_SRCP_X + swz; + } + if (swz < 3) + return swz + 3*src; + + switch(swz) { + case RC_SWIZZLE_W: return R300_ALU_ARGA_SRC0A + src; + case RC_SWIZZLE_ONE: return R300_ALU_ARGA_ONE; + case RC_SWIZZLE_ZERO: return R300_ALU_ARGA_ZERO; + case RC_SWIZZLE_HALF: return R300_ALU_ARGA_HALF; + default: return R300_ALU_ARGA_ONE; + } +} diff --git a/src/gallium/drivers/r300/compiler/r300_fragprog_swizzle.h b/src/gallium/drivers/r300/compiler/r300_fragprog_swizzle.h new file mode 100644 index 00000000000..f2635be140d --- /dev/null +++ b/src/gallium/drivers/r300/compiler/r300_fragprog_swizzle.h @@ -0,0 +1,39 @@ +/* + * Copyright (C) 2008 Nicolai Haehnle. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __R300_FRAGPROG_SWIZZLE_H_ +#define __R300_FRAGPROG_SWIZZLE_H_ + +#include "radeon_swizzle.h" + +extern struct rc_swizzle_caps r300_swizzle_caps; + +unsigned int r300FPTranslateRGBSwizzle(unsigned int src, unsigned int swizzle); +unsigned int r300FPTranslateAlphaSwizzle(unsigned int src, unsigned int swizzle); +int r300_swizzle_is_native_basic(unsigned int swizzle); + +#endif /* __R300_FRAGPROG_SWIZZLE_H_ */ diff --git a/src/gallium/drivers/r300/compiler/r3xx_fragprog.c b/src/gallium/drivers/r300/compiler/r3xx_fragprog.c new file mode 100644 index 00000000000..bb6c010e8e3 --- /dev/null +++ b/src/gallium/drivers/r300/compiler/r3xx_fragprog.c @@ -0,0 +1,172 @@ +/* + * Copyright 2009 Nicolai Hähnle <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. */ + +#include "radeon_compiler.h" + +#include <stdio.h> + +#include "radeon_compiler_util.h" +#include "radeon_dataflow.h" +#include "radeon_emulate_branches.h" +#include "radeon_emulate_loops.h" +#include "radeon_program_alu.h" +#include "radeon_program_tex.h" +#include "radeon_rename_regs.h" +#include "radeon_remove_constants.h" +#include "r300_fragprog.h" +#include "r300_fragprog_swizzle.h" +#include "r500_fragprog.h" + + +static void dataflow_outputs_mark_use(void * userdata, void * data, + void (*callback)(void *, unsigned int, unsigned int)) +{ + struct r300_fragment_program_compiler * c = userdata; + callback(data, c->OutputColor[0], RC_MASK_XYZW); + callback(data, c->OutputColor[1], RC_MASK_XYZW); + callback(data, c->OutputColor[2], RC_MASK_XYZW); + callback(data, c->OutputColor[3], RC_MASK_XYZW); + callback(data, c->OutputDepth, RC_MASK_W); +} + +static void rc_rewrite_depth_out(struct radeon_compiler *cc, void *user) +{ + struct r300_fragment_program_compiler *c = (struct r300_fragment_program_compiler*)cc; + struct rc_instruction *rci; + + for (rci = c->Base.Program.Instructions.Next; rci != &c->Base.Program.Instructions; rci = rci->Next) { + struct rc_sub_instruction * inst = &rci->U.I; + unsigned i; + const struct rc_opcode_info *info = rc_get_opcode_info(inst->Opcode); + + if (inst->DstReg.File != RC_FILE_OUTPUT || inst->DstReg.Index != c->OutputDepth) + continue; + + if (inst->DstReg.WriteMask & RC_MASK_Z) { + inst->DstReg.WriteMask = RC_MASK_W; + } else { + inst->DstReg.WriteMask = 0; + continue; + } + + if (!info->IsComponentwise) { + continue; + } + + for (i = 0; i < info->NumSrcRegs; i++) { + inst->SrcReg[i] = lmul_swizzle(RC_SWIZZLE_ZZZZ, inst->SrcReg[i]); + } + } +} + +static int radeon_saturate_output( + struct radeon_compiler * c, + struct rc_instruction * inst, + void* data) +{ + const struct rc_opcode_info *info = rc_get_opcode_info(inst->U.I.Opcode); + + if (!info->HasDstReg || inst->U.I.DstReg.File != RC_FILE_OUTPUT) + return 0; + + inst->U.I.SaturateMode = RC_SATURATE_ZERO_ONE; + return 1; +} + +void r3xx_compile_fragment_program(struct r300_fragment_program_compiler* c) +{ + int is_r500 = c->Base.is_r500; + int opt = !c->Base.disable_optimizations; + int sat_out = c->state.frag_clamp; + + /* Lists of instruction transformations. */ + struct radeon_program_transformation saturate_output[] = { + { &radeon_saturate_output, c }, + { 0, 0 } + }; + + struct radeon_program_transformation rewrite_tex[] = { + { &radeonTransformTEX, c }, + { 0, 0 } + }; + + struct radeon_program_transformation rewrite_if[] = { + { &r500_transform_IF, 0 }, + {0, 0} + }; + + struct radeon_program_transformation native_rewrite_r500[] = { + { &radeonTransformALU, 0 }, + { &radeonTransformDeriv, 0 }, + { &radeonTransformTrigScale, 0 }, + { 0, 0 } + }; + + struct radeon_program_transformation native_rewrite_r300[] = { + { &radeonTransformALU, 0 }, + { &r300_transform_trig_simple, 0 }, + { 0, 0 } + }; + + /* List of compiler passes. */ + struct radeon_compiler_pass fs_list[] = { + /* NAME DUMP PREDICATE FUNCTION PARAM */ + {"rewrite depth out", 1, 1, rc_rewrite_depth_out, NULL}, + /* This transformation needs to be done before any of the IF + * instructions are modified. */ + {"transform KILP", 1, 1, rc_transform_KILP, NULL}, + {"unroll loops", 1, is_r500, rc_unroll_loops, NULL}, + {"transform loops", 1, !is_r500, rc_transform_loops, NULL}, + {"emulate branches", 1, !is_r500, rc_emulate_branches, NULL}, + {"saturate output writes", 1, sat_out, rc_local_transform, saturate_output}, + {"transform TEX", 1, 1, rc_local_transform, rewrite_tex}, + {"transform IF", 1, is_r500, rc_local_transform, rewrite_if}, + {"native rewrite", 1, is_r500, rc_local_transform, native_rewrite_r500}, + {"native rewrite", 1, !is_r500, rc_local_transform, native_rewrite_r300}, + {"deadcode", 1, opt, rc_dataflow_deadcode, dataflow_outputs_mark_use}, + {"emulate loops", 1, !is_r500, rc_emulate_loops, NULL}, + {"dataflow optimize", 1, opt, rc_optimize, NULL}, + {"dataflow swizzles", 1, 1, rc_dataflow_swizzles, NULL}, + {"dead constants", 1, 1, rc_remove_unused_constants, &c->code->constants_remap_table}, + /* This pass makes it easier for the scheduler to group TEX + * instructions and reduces the chances of creating too + * many texture indirections.*/ + {"register rename", 1, !is_r500, rc_rename_regs, NULL}, + {"pair translate", 1, 1, rc_pair_translate, NULL}, + {"pair scheduling", 1, 1, rc_pair_schedule, NULL}, + {"dead sources", 1, 1, rc_pair_remove_dead_sources, NULL}, + {"register allocation", 1, 1, rc_pair_regalloc, &opt}, + {"final code validation", 0, 1, rc_validate_final_shader, NULL}, + {"machine code generation", 0, is_r500, r500BuildFragmentProgramHwCode, NULL}, + {"machine code generation", 0, !is_r500, r300BuildFragmentProgramHwCode, NULL}, + {"dump machine code", 0, is_r500 && (c->Base.Debug & RC_DBG_LOG), r500FragmentProgramDump, NULL}, + {"dump machine code", 0, !is_r500 && (c->Base.Debug & RC_DBG_LOG), r300FragmentProgramDump, NULL}, + {NULL, 0, 0, NULL, NULL} + }; + + c->Base.type = RC_FRAGMENT_PROGRAM; + c->Base.SwizzleCaps = c->Base.is_r500 ? &r500_swizzle_caps : &r300_swizzle_caps; + + rc_run_compiler(&c->Base, fs_list); + + rc_constants_copy(&c->code->constants, &c->Base.Program.Constants); +} diff --git a/src/gallium/drivers/r300/compiler/r3xx_vertprog.c b/src/gallium/drivers/r300/compiler/r3xx_vertprog.c new file mode 100644 index 00000000000..654f9a070d5 --- /dev/null +++ b/src/gallium/drivers/r300/compiler/r3xx_vertprog.c @@ -0,0 +1,1045 @@ +/* + * Copyright 2009 Nicolai Hähnle <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. */ + +#include "radeon_compiler.h" + +#include <stdio.h> + +#include "../r300_reg.h" + +#include "radeon_compiler_util.h" +#include "radeon_dataflow.h" +#include "radeon_program_alu.h" +#include "radeon_swizzle.h" +#include "radeon_emulate_branches.h" +#include "radeon_emulate_loops.h" +#include "radeon_remove_constants.h" + +struct loop { + int BgnLoop; + +}; + +/* + * Take an already-setup and valid source then swizzle it appropriately to + * obtain a constant ZERO or ONE source. + */ +#define __CONST(x, y) \ + (PVS_SRC_OPERAND(t_src_index(vp, &vpi->SrcReg[x]), \ + t_swizzle(y), \ + t_swizzle(y), \ + t_swizzle(y), \ + t_swizzle(y), \ + t_src_class(vpi->SrcReg[x].File), \ + RC_MASK_NONE) | (vpi->SrcReg[x].RelAddr << 4)) + + +static unsigned long t_dst_mask(unsigned int mask) +{ + /* RC_MASK_* is equivalent to VSF_FLAG_* */ + return mask & RC_MASK_XYZW; +} + +static unsigned long t_dst_class(rc_register_file file) +{ + switch (file) { + default: + fprintf(stderr, "%s: Bad register file %i\n", __FUNCTION__, file); + /* fall-through */ + case RC_FILE_TEMPORARY: + return PVS_DST_REG_TEMPORARY; + case RC_FILE_OUTPUT: + return PVS_DST_REG_OUT; + case RC_FILE_ADDRESS: + return PVS_DST_REG_A0; + } +} + +static unsigned long t_dst_index(struct r300_vertex_program_code *vp, + struct rc_dst_register *dst) +{ + if (dst->File == RC_FILE_OUTPUT) + return vp->outputs[dst->Index]; + + return dst->Index; +} + +static unsigned long t_src_class(rc_register_file file) +{ + switch (file) { + default: + fprintf(stderr, "%s: Bad register file %i\n", __FUNCTION__, file); + /* fall-through */ + case RC_FILE_NONE: + case RC_FILE_TEMPORARY: + return PVS_SRC_REG_TEMPORARY; + case RC_FILE_INPUT: + return PVS_SRC_REG_INPUT; + case RC_FILE_CONSTANT: + return PVS_SRC_REG_CONSTANT; + } +} + +static int t_src_conflict(struct rc_src_register a, struct rc_src_register b) +{ + unsigned long aclass = t_src_class(a.File); + unsigned long bclass = t_src_class(b.File); + + if (aclass != bclass) + return 0; + if (aclass == PVS_SRC_REG_TEMPORARY) + return 0; + + if (a.RelAddr || b.RelAddr) + return 1; + if (a.Index != b.Index) + return 1; + + return 0; +} + +static inline unsigned long t_swizzle(unsigned int swizzle) +{ + /* this is in fact a NOP as the Mesa RC_SWIZZLE_* are all identical to VSF_IN_COMPONENT_* */ + return swizzle; +} + +static unsigned long t_src_index(struct r300_vertex_program_code *vp, + struct rc_src_register *src) +{ + if (src->File == RC_FILE_INPUT) { + assert(vp->inputs[src->Index] != -1); + return vp->inputs[src->Index]; + } else { + if (src->Index < 0) { + fprintf(stderr, + "negative offsets for indirect addressing do not work.\n"); + return 0; + } + return src->Index; + } +} + +/* these two functions should probably be merged... */ + +static unsigned long t_src(struct r300_vertex_program_code *vp, + struct rc_src_register *src) +{ + /* src->Negate uses the RC_MASK_ flags from program_instruction.h, + * which equal our VSF_FLAGS_ values, so it's safe to just pass it here. + */ + return PVS_SRC_OPERAND(t_src_index(vp, src), + t_swizzle(GET_SWZ(src->Swizzle, 0)), + t_swizzle(GET_SWZ(src->Swizzle, 1)), + t_swizzle(GET_SWZ(src->Swizzle, 2)), + t_swizzle(GET_SWZ(src->Swizzle, 3)), + t_src_class(src->File), + src->Negate) | + (src->RelAddr << 4) | (src->Abs << 3); +} + +static unsigned long t_src_scalar(struct r300_vertex_program_code *vp, + struct rc_src_register *src) +{ + /* src->Negate uses the RC_MASK_ flags from program_instruction.h, + * which equal our VSF_FLAGS_ values, so it's safe to just pass it here. + */ + return PVS_SRC_OPERAND(t_src_index(vp, src), + t_swizzle(GET_SWZ(src->Swizzle, 0)), + t_swizzle(GET_SWZ(src->Swizzle, 0)), + t_swizzle(GET_SWZ(src->Swizzle, 0)), + t_swizzle(GET_SWZ(src->Swizzle, 0)), + t_src_class(src->File), + src->Negate ? RC_MASK_XYZW : RC_MASK_NONE) | + (src->RelAddr << 4) | (src->Abs << 3); +} + +static int valid_dst(struct r300_vertex_program_code *vp, + struct rc_dst_register *dst) +{ + if (dst->File == RC_FILE_OUTPUT && vp->outputs[dst->Index] == -1) { + return 0; + } else if (dst->File == RC_FILE_ADDRESS) { + assert(dst->Index == 0); + } + + return 1; +} + +static void ei_vector1(struct r300_vertex_program_code *vp, + unsigned int hw_opcode, + struct rc_sub_instruction *vpi, + unsigned int * inst) +{ + inst[0] = PVS_OP_DST_OPERAND(hw_opcode, + 0, + 0, + t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + inst[1] = t_src(vp, &vpi->SrcReg[0]); + inst[2] = __CONST(0, RC_SWIZZLE_ZERO); + inst[3] = __CONST(0, RC_SWIZZLE_ZERO); +} + +static void ei_vector2(struct r300_vertex_program_code *vp, + unsigned int hw_opcode, + struct rc_sub_instruction *vpi, + unsigned int * inst) +{ + inst[0] = PVS_OP_DST_OPERAND(hw_opcode, + 0, + 0, + t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + inst[1] = t_src(vp, &vpi->SrcReg[0]); + inst[2] = t_src(vp, &vpi->SrcReg[1]); + inst[3] = __CONST(1, RC_SWIZZLE_ZERO); +} + +static void ei_math1(struct r300_vertex_program_code *vp, + unsigned int hw_opcode, + struct rc_sub_instruction *vpi, + unsigned int * inst) +{ + inst[0] = PVS_OP_DST_OPERAND(hw_opcode, + 1, + 0, + t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + inst[1] = t_src_scalar(vp, &vpi->SrcReg[0]); + inst[2] = __CONST(0, RC_SWIZZLE_ZERO); + inst[3] = __CONST(0, RC_SWIZZLE_ZERO); +} + +static void ei_lit(struct r300_vertex_program_code *vp, + struct rc_sub_instruction *vpi, + unsigned int * inst) +{ + //LIT TMP 1.Y Z TMP 1{} {X W Z Y} TMP 1{} {Y W Z X} TMP 1{} {Y X Z W} + + inst[0] = PVS_OP_DST_OPERAND(ME_LIGHT_COEFF_DX, + 1, + 0, + t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + /* NOTE: Users swizzling might not work. */ + inst[1] = PVS_SRC_OPERAND(t_src_index(vp, &vpi->SrcReg[0]), t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 0)), // X + t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 3)), // W + PVS_SRC_SELECT_FORCE_0, // Z + t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 1)), // Y + t_src_class(vpi->SrcReg[0].File), + vpi->SrcReg[0].Negate ? RC_MASK_XYZW : RC_MASK_NONE) | + (vpi->SrcReg[0].RelAddr << 4); + inst[2] = PVS_SRC_OPERAND(t_src_index(vp, &vpi->SrcReg[0]), t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 1)), // Y + t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 3)), // W + PVS_SRC_SELECT_FORCE_0, // Z + t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 0)), // X + t_src_class(vpi->SrcReg[0].File), + vpi->SrcReg[0].Negate ? RC_MASK_XYZW : RC_MASK_NONE) | + (vpi->SrcReg[0].RelAddr << 4); + inst[3] = PVS_SRC_OPERAND(t_src_index(vp, &vpi->SrcReg[0]), t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 1)), // Y + t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 0)), // X + PVS_SRC_SELECT_FORCE_0, // Z + t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 3)), // W + t_src_class(vpi->SrcReg[0].File), + vpi->SrcReg[0].Negate ? RC_MASK_XYZW : RC_MASK_NONE) | + (vpi->SrcReg[0].RelAddr << 4); +} + +static void ei_mad(struct r300_vertex_program_code *vp, + struct rc_sub_instruction *vpi, + unsigned int * inst) +{ + unsigned int i; + /* Remarks about hardware limitations of MAD + * (please preserve this comment, as this information is _NOT_ + * in the documentation provided by AMD). + * + * As described in the documentation, MAD with three unique temporary + * source registers requires the use of the macro version. + * + * However (and this is not mentioned in the documentation), apparently + * the macro version is _NOT_ a full superset of the normal version. + * In particular, the macro version does not always work when relative + * addressing is used in the source operands. + * + * This limitation caused incorrect rendering in Sauerbraten's OpenGL + * assembly shader path when using medium quality animations + * (i.e. animations with matrix blending instead of quaternion blending). + * + * Unfortunately, I (nha) have been unable to extract a Piglit regression + * test for this issue - for some reason, it is possible to have vertex + * programs whose prefix is *exactly* the same as the prefix of the + * offending program in Sauerbraten up to the offending instruction + * without causing any trouble. + * + * Bottom line: Only use the macro version only when really necessary; + * according to AMD docs, this should improve performance by one clock + * as a nice side bonus. + */ + if (vpi->SrcReg[0].File == RC_FILE_TEMPORARY && + vpi->SrcReg[1].File == RC_FILE_TEMPORARY && + vpi->SrcReg[2].File == RC_FILE_TEMPORARY && + vpi->SrcReg[0].Index != vpi->SrcReg[1].Index && + vpi->SrcReg[0].Index != vpi->SrcReg[2].Index && + vpi->SrcReg[1].Index != vpi->SrcReg[2].Index) { + inst[0] = PVS_OP_DST_OPERAND(PVS_MACRO_OP_2CLK_MADD, + 0, + 1, + t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + } else { + inst[0] = PVS_OP_DST_OPERAND(VE_MULTIPLY_ADD, + 0, + 0, + t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + + /* Arguments with constant swizzles still count as a unique + * temporary, so we should make sure these arguments share a + * register index with one of the other arguments. */ + for (i = 0; i < 3; i++) { + unsigned int j; + if (vpi->SrcReg[i].File != RC_FILE_NONE) + continue; + + for (j = 0; j < 3; j++) { + if (i != j) { + vpi->SrcReg[i].Index = + vpi->SrcReg[j].Index; + break; + } + } + } + } + inst[1] = t_src(vp, &vpi->SrcReg[0]); + inst[2] = t_src(vp, &vpi->SrcReg[1]); + inst[3] = t_src(vp, &vpi->SrcReg[2]); +} + +static void ei_pow(struct r300_vertex_program_code *vp, + struct rc_sub_instruction *vpi, + unsigned int * inst) +{ + inst[0] = PVS_OP_DST_OPERAND(ME_POWER_FUNC_FF, + 1, + 0, + t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + inst[1] = t_src_scalar(vp, &vpi->SrcReg[0]); + inst[2] = __CONST(0, RC_SWIZZLE_ZERO); + inst[3] = t_src_scalar(vp, &vpi->SrcReg[1]); +} + +static void mark_write(void * userdata, struct rc_instruction * inst, + rc_register_file file, unsigned int index, unsigned int mask) +{ + unsigned int * writemasks = userdata; + + if (file != RC_FILE_TEMPORARY) + return; + + if (index >= R300_VS_MAX_TEMPS) + return; + + writemasks[index] |= mask; +} + +static unsigned long t_pred_src(struct r300_vertex_program_compiler * compiler) +{ + return PVS_SRC_OPERAND(compiler->PredicateIndex, + t_swizzle(RC_SWIZZLE_ZERO), + t_swizzle(RC_SWIZZLE_ZERO), + t_swizzle(RC_SWIZZLE_ZERO), + t_swizzle(RC_SWIZZLE_W), + t_src_class(RC_FILE_TEMPORARY), + 0); +} + +static unsigned long t_pred_dst(struct r300_vertex_program_compiler * compiler, + unsigned int hw_opcode, int is_math) +{ + return PVS_OP_DST_OPERAND(hw_opcode, + is_math, + 0, + compiler->PredicateIndex, + RC_MASK_W, + t_dst_class(RC_FILE_TEMPORARY)); + +} + +static void ei_if(struct r300_vertex_program_compiler * compiler, + struct rc_instruction *rci, + unsigned int * inst, + unsigned int branch_depth) +{ + unsigned int predicate_opcode; + int is_math = 0; + + if (!compiler->Base.is_r500) { + rc_error(&compiler->Base,"Opcode IF not supported\n"); + return; + } + + /* Reserve a temporary to use as our predicate stack counter, if we + * don't already have one. */ + if (!compiler->PredicateMask) { + unsigned int writemasks[RC_REGISTER_MAX_INDEX]; + struct rc_instruction * inst; + unsigned int i; + memset(writemasks, 0, sizeof(writemasks)); + for(inst = compiler->Base.Program.Instructions.Next; + inst != &compiler->Base.Program.Instructions; + inst = inst->Next) { + rc_for_all_writes_mask(inst, mark_write, writemasks); + } + for(i = 0; i < compiler->Base.max_temp_regs; i++) { + unsigned int mask = ~writemasks[i] & RC_MASK_XYZW; + /* Only the W component can be used fo the predicate + * stack counter. */ + if (mask & RC_MASK_W) { + compiler->PredicateMask = RC_MASK_W; + compiler->PredicateIndex = i; + break; + } + } + if (i == compiler->Base.max_temp_regs) { + rc_error(&compiler->Base, "No free temporary to use for" + " predicate stack counter.\n"); + return; + } + } + predicate_opcode = + branch_depth ? VE_PRED_SET_NEQ_PUSH : ME_PRED_SET_NEQ; + + rci->U.I.SrcReg[0].Swizzle = RC_MAKE_SWIZZLE_SMEAR(GET_SWZ(rci->U.I.SrcReg[0].Swizzle,0)); + if (branch_depth == 0) { + is_math = 1; + predicate_opcode = ME_PRED_SET_NEQ; + inst[1] = t_src(compiler->code, &rci->U.I.SrcReg[0]); + inst[2] = 0; + } else { + predicate_opcode = VE_PRED_SET_NEQ_PUSH; + inst[1] = t_pred_src(compiler); + inst[2] = t_src(compiler->code, &rci->U.I.SrcReg[0]); + } + + inst[0] = t_pred_dst(compiler, predicate_opcode, is_math); + inst[3] = 0; + +} + +static void ei_else(struct r300_vertex_program_compiler * compiler, + unsigned int * inst) +{ + if (!compiler->Base.is_r500) { + rc_error(&compiler->Base,"Opcode ELSE not supported\n"); + return; + } + inst[0] = t_pred_dst(compiler, ME_PRED_SET_INV, 1); + inst[1] = t_pred_src(compiler); + inst[2] = 0; + inst[3] = 0; +} + +static void ei_endif(struct r300_vertex_program_compiler *compiler, + unsigned int * inst) +{ + if (!compiler->Base.is_r500) { + rc_error(&compiler->Base,"Opcode ENDIF not supported\n"); + return; + } + inst[0] = t_pred_dst(compiler, ME_PRED_SET_POP, 1); + inst[1] = t_pred_src(compiler); + inst[2] = 0; + inst[3] = 0; +} + +static void translate_vertex_program(struct radeon_compiler *c, void *user) +{ + struct r300_vertex_program_compiler *compiler = (struct r300_vertex_program_compiler*)c; + struct rc_instruction *rci; + + struct loop * loops = NULL; + int current_loop_depth = 0; + int loops_reserved = 0; + + unsigned int branch_depth = 0; + + compiler->code->pos_end = 0; /* Not supported yet */ + compiler->code->length = 0; + compiler->code->num_temporaries = 0; + + compiler->SetHwInputOutput(compiler); + + for(rci = compiler->Base.Program.Instructions.Next; rci != &compiler->Base.Program.Instructions; rci = rci->Next) { + struct rc_sub_instruction *vpi = &rci->U.I; + unsigned int *inst = compiler->code->body.d + compiler->code->length; + const struct rc_opcode_info *info = rc_get_opcode_info(vpi->Opcode); + + /* Skip instructions writing to non-existing destination */ + if (!valid_dst(compiler->code, &vpi->DstReg)) + continue; + + if (info->HasDstReg) { + /* Neither is Saturate. */ + if (vpi->SaturateMode != RC_SATURATE_NONE) { + rc_error(&compiler->Base, "Vertex program does not support the Saturate " + "modifier (yet).\n"); + } + } + + if (compiler->code->length >= c->max_alu_insts * 4) { + rc_error(&compiler->Base, "Vertex program has too many instructions\n"); + return; + } + + assert(compiler->Base.is_r500 || + (vpi->Opcode != RC_OPCODE_SEQ && + vpi->Opcode != RC_OPCODE_SNE)); + + switch (vpi->Opcode) { + case RC_OPCODE_ADD: ei_vector2(compiler->code, VE_ADD, vpi, inst); break; + case RC_OPCODE_ARL: ei_vector1(compiler->code, VE_FLT2FIX_DX, vpi, inst); break; + case RC_OPCODE_COS: ei_math1(compiler->code, ME_COS, vpi, inst); break; + case RC_OPCODE_DP4: ei_vector2(compiler->code, VE_DOT_PRODUCT, vpi, inst); break; + case RC_OPCODE_DST: ei_vector2(compiler->code, VE_DISTANCE_VECTOR, vpi, inst); break; + case RC_OPCODE_ELSE: ei_else(compiler, inst); break; + case RC_OPCODE_ENDIF: ei_endif(compiler, inst); branch_depth--; break; + case RC_OPCODE_EX2: ei_math1(compiler->code, ME_EXP_BASE2_FULL_DX, vpi, inst); break; + case RC_OPCODE_EXP: ei_math1(compiler->code, ME_EXP_BASE2_DX, vpi, inst); break; + case RC_OPCODE_FRC: ei_vector1(compiler->code, VE_FRACTION, vpi, inst); break; + case RC_OPCODE_IF: ei_if(compiler, rci, inst, branch_depth); branch_depth++; break; + case RC_OPCODE_LG2: ei_math1(compiler->code, ME_LOG_BASE2_FULL_DX, vpi, inst); break; + case RC_OPCODE_LIT: ei_lit(compiler->code, vpi, inst); break; + case RC_OPCODE_LOG: ei_math1(compiler->code, ME_LOG_BASE2_DX, vpi, inst); break; + case RC_OPCODE_MAD: ei_mad(compiler->code, vpi, inst); break; + case RC_OPCODE_MAX: ei_vector2(compiler->code, VE_MAXIMUM, vpi, inst); break; + case RC_OPCODE_MIN: ei_vector2(compiler->code, VE_MINIMUM, vpi, inst); break; + case RC_OPCODE_MOV: ei_vector1(compiler->code, VE_ADD, vpi, inst); break; + case RC_OPCODE_MUL: ei_vector2(compiler->code, VE_MULTIPLY, vpi, inst); break; + case RC_OPCODE_POW: ei_pow(compiler->code, vpi, inst); break; + case RC_OPCODE_RCP: ei_math1(compiler->code, ME_RECIP_DX, vpi, inst); break; + case RC_OPCODE_RSQ: ei_math1(compiler->code, ME_RECIP_SQRT_DX, vpi, inst); break; + case RC_OPCODE_SEQ: ei_vector2(compiler->code, VE_SET_EQUAL, vpi, inst); break; + case RC_OPCODE_SGE: ei_vector2(compiler->code, VE_SET_GREATER_THAN_EQUAL, vpi, inst); break; + case RC_OPCODE_SIN: ei_math1(compiler->code, ME_SIN, vpi, inst); break; + case RC_OPCODE_SLT: ei_vector2(compiler->code, VE_SET_LESS_THAN, vpi, inst); break; + case RC_OPCODE_SNE: ei_vector2(compiler->code, VE_SET_NOT_EQUAL, vpi, inst); break; + case RC_OPCODE_BGNLOOP: + { + struct loop * l; + + if ((!compiler->Base.is_r500 + && loops_reserved >= R300_VS_MAX_LOOP_DEPTH) + || loops_reserved >= R500_VS_MAX_FC_DEPTH) { + rc_error(&compiler->Base, + "Loops are nested too deep."); + return; + } + memory_pool_array_reserve(&compiler->Base.Pool, + struct loop, loops, current_loop_depth, + loops_reserved, 1); + l = &loops[current_loop_depth++]; + memset(l , 0, sizeof(struct loop)); + l->BgnLoop = (compiler->code->length / 4); + continue; + } + case RC_OPCODE_ENDLOOP: + { + struct loop * l; + unsigned int act_addr; + unsigned int last_addr; + unsigned int ret_addr; + + assert(loops); + l = &loops[current_loop_depth - 1]; + act_addr = l->BgnLoop - 1; + last_addr = (compiler->code->length / 4) - 1; + ret_addr = l->BgnLoop; + + if (loops_reserved >= R300_VS_MAX_FC_OPS) { + rc_error(&compiler->Base, + "Too many flow control instructions."); + return; + } + if (compiler->Base.is_r500) { + compiler->code->fc_op_addrs.r500 + [compiler->code->num_fc_ops].lw = + R500_PVS_FC_ACT_ADRS(act_addr) + | R500_PVS_FC_LOOP_CNT_JMP_INST(0xffff) + ; + compiler->code->fc_op_addrs.r500 + [compiler->code->num_fc_ops].uw = + R500_PVS_FC_LAST_INST(last_addr) + | R500_PVS_FC_RTN_INST(ret_addr) + ; + } else { + compiler->code->fc_op_addrs.r300 + [compiler->code->num_fc_ops] = + R300_PVS_FC_ACT_ADRS(act_addr) + | R300_PVS_FC_LOOP_CNT_JMP_INST(0xff) + | R300_PVS_FC_LAST_INST(last_addr) + | R300_PVS_FC_RTN_INST(ret_addr) + ; + } + compiler->code->fc_loop_index[compiler->code->num_fc_ops] = + R300_PVS_FC_LOOP_INIT_VAL(0x0) + | R300_PVS_FC_LOOP_STEP_VAL(0x1) + ; + compiler->code->fc_ops |= R300_VAP_PVS_FC_OPC_LOOP( + compiler->code->num_fc_ops); + compiler->code->num_fc_ops++; + current_loop_depth--; + continue; + } + + default: + rc_error(&compiler->Base, "Unknown opcode %s\n", info->Name); + return; + } + + /* Non-flow control instructions that are inside an if statement + * need to pay attention to the predicate bit. */ + if (branch_depth + && vpi->Opcode != RC_OPCODE_IF + && vpi->Opcode != RC_OPCODE_ELSE + && vpi->Opcode != RC_OPCODE_ENDIF) { + + inst[0] |= (PVS_DST_PRED_ENABLE_MASK + << PVS_DST_PRED_ENABLE_SHIFT); + inst[0] |= (PVS_DST_PRED_SENSE_MASK + << PVS_DST_PRED_SENSE_SHIFT); + } + + /* Update the number of temporaries. */ + if (info->HasDstReg && vpi->DstReg.File == RC_FILE_TEMPORARY && + vpi->DstReg.Index >= compiler->code->num_temporaries) + compiler->code->num_temporaries = vpi->DstReg.Index + 1; + + for (unsigned i = 0; i < info->NumSrcRegs; i++) + if (vpi->SrcReg[i].File == RC_FILE_TEMPORARY && + vpi->SrcReg[i].Index >= compiler->code->num_temporaries) + compiler->code->num_temporaries = vpi->SrcReg[i].Index + 1; + + if (compiler->PredicateMask) + if (compiler->PredicateIndex >= compiler->code->num_temporaries) + compiler->code->num_temporaries = compiler->PredicateIndex + 1; + + if (compiler->code->num_temporaries > compiler->Base.max_temp_regs) { + rc_error(&compiler->Base, "Too many temporaries.\n"); + return; + } + + compiler->code->length += 4; + + if (compiler->Base.Error) + return; + } +} + +struct temporary_allocation { + unsigned int Allocated:1; + unsigned int HwTemp:15; + struct rc_instruction * LastRead; +}; + +static void allocate_temporary_registers(struct radeon_compiler *c, void *user) +{ + struct r300_vertex_program_compiler *compiler = (struct r300_vertex_program_compiler*)c; + struct rc_instruction *inst; + struct rc_instruction *end_loop = NULL; + unsigned int num_orig_temps = 0; + char hwtemps[RC_REGISTER_MAX_INDEX]; + struct temporary_allocation * ta; + unsigned int i, j; + + memset(hwtemps, 0, sizeof(hwtemps)); + + rc_recompute_ips(c); + + /* Pass 1: Count original temporaries. */ + for(inst = compiler->Base.Program.Instructions.Next; inst != &compiler->Base.Program.Instructions; inst = inst->Next) { + const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); + + for (i = 0; i < opcode->NumSrcRegs; ++i) { + if (inst->U.I.SrcReg[i].File == RC_FILE_TEMPORARY) { + if (inst->U.I.SrcReg[i].Index >= num_orig_temps) + num_orig_temps = inst->U.I.SrcReg[i].Index + 1; + } + } + + if (opcode->HasDstReg) { + if (inst->U.I.DstReg.File == RC_FILE_TEMPORARY) { + if (inst->U.I.DstReg.Index >= num_orig_temps) + num_orig_temps = inst->U.I.DstReg.Index + 1; + } + } + } + + ta = (struct temporary_allocation*)memory_pool_malloc(&compiler->Base.Pool, + sizeof(struct temporary_allocation) * num_orig_temps); + memset(ta, 0, sizeof(struct temporary_allocation) * num_orig_temps); + + /* Pass 2: Determine original temporary lifetimes */ + for(inst = compiler->Base.Program.Instructions.Next; inst != &compiler->Base.Program.Instructions; inst = inst->Next) { + const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); + /* Instructions inside of loops need to use the ENDLOOP + * instruction as their LastRead. */ + if (!end_loop && inst->U.I.Opcode == RC_OPCODE_BGNLOOP) { + int endloops = 1; + struct rc_instruction * ptr; + for(ptr = inst->Next; + ptr != &compiler->Base.Program.Instructions; + ptr = ptr->Next){ + if (ptr->U.I.Opcode == RC_OPCODE_BGNLOOP) { + endloops++; + } else if (ptr->U.I.Opcode == RC_OPCODE_ENDLOOP) { + endloops--; + if (endloops <= 0) { + end_loop = ptr; + break; + } + } + } + } + + if (inst == end_loop) { + end_loop = NULL; + continue; + } + + for (i = 0; i < opcode->NumSrcRegs; ++i) { + if (inst->U.I.SrcReg[i].File == RC_FILE_TEMPORARY) { + ta[inst->U.I.SrcReg[i].Index].LastRead = end_loop ? end_loop : inst; + } + } + } + + /* Pass 3: Register allocation */ + for(inst = compiler->Base.Program.Instructions.Next; inst != &compiler->Base.Program.Instructions; inst = inst->Next) { + const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); + + for (i = 0; i < opcode->NumSrcRegs; ++i) { + if (inst->U.I.SrcReg[i].File == RC_FILE_TEMPORARY) { + unsigned int orig = inst->U.I.SrcReg[i].Index; + inst->U.I.SrcReg[i].Index = ta[orig].HwTemp; + + if (ta[orig].Allocated && inst == ta[orig].LastRead) + hwtemps[ta[orig].HwTemp] = 0; + } + } + + if (opcode->HasDstReg) { + if (inst->U.I.DstReg.File == RC_FILE_TEMPORARY) { + unsigned int orig = inst->U.I.DstReg.Index; + + if (!ta[orig].Allocated) { + for(j = 0; j < c->max_temp_regs; ++j) { + if (!hwtemps[j]) + break; + } + ta[orig].Allocated = 1; + ta[orig].HwTemp = j; + hwtemps[ta[orig].HwTemp] = 1; + } + + inst->U.I.DstReg.Index = ta[orig].HwTemp; + } + } + } +} + +/** + * R3xx-R4xx vertex engine does not support the Absolute source operand modifier + * and the Saturate opcode modifier. Only Absolute is currently transformed. + */ +static int transform_nonnative_modifiers( + struct radeon_compiler *c, + struct rc_instruction *inst, + void* unused) +{ + const struct rc_opcode_info *opcode = rc_get_opcode_info(inst->U.I.Opcode); + unsigned i; + + /* Transform ABS(a) to MAX(a, -a). */ + for (i = 0; i < opcode->NumSrcRegs; i++) { + if (inst->U.I.SrcReg[i].Abs) { + struct rc_instruction *new_inst; + unsigned temp; + + inst->U.I.SrcReg[i].Abs = 0; + + temp = rc_find_free_temporary(c); + + new_inst = rc_insert_new_instruction(c, inst->Prev); + new_inst->U.I.Opcode = RC_OPCODE_MAX; + new_inst->U.I.DstReg.File = RC_FILE_TEMPORARY; + new_inst->U.I.DstReg.Index = temp; + new_inst->U.I.SrcReg[0] = inst->U.I.SrcReg[i]; + new_inst->U.I.SrcReg[1] = inst->U.I.SrcReg[i]; + new_inst->U.I.SrcReg[1].Negate ^= RC_MASK_XYZW; + + memset(&inst->U.I.SrcReg[i], 0, sizeof(inst->U.I.SrcReg[i])); + inst->U.I.SrcReg[i].File = RC_FILE_TEMPORARY; + inst->U.I.SrcReg[i].Index = temp; + inst->U.I.SrcReg[i].Swizzle = RC_SWIZZLE_XYZW; + } + } + return 1; +} + +/** + * Vertex engine cannot read two inputs or two constants at the same time. + * Introduce intermediate MOVs to temporary registers to account for this. + */ +static int transform_source_conflicts( + struct radeon_compiler *c, + struct rc_instruction* inst, + void* unused) +{ + const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); + + if (opcode->NumSrcRegs == 3) { + if (t_src_conflict(inst->U.I.SrcReg[1], inst->U.I.SrcReg[2]) + || t_src_conflict(inst->U.I.SrcReg[0], inst->U.I.SrcReg[2])) { + int tmpreg = rc_find_free_temporary(c); + struct rc_instruction * inst_mov = rc_insert_new_instruction(c, inst->Prev); + inst_mov->U.I.Opcode = RC_OPCODE_MOV; + inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY; + inst_mov->U.I.DstReg.Index = tmpreg; + inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[2]; + + reset_srcreg(&inst->U.I.SrcReg[2]); + inst->U.I.SrcReg[2].File = RC_FILE_TEMPORARY; + inst->U.I.SrcReg[2].Index = tmpreg; + } + } + + if (opcode->NumSrcRegs >= 2) { + if (t_src_conflict(inst->U.I.SrcReg[1], inst->U.I.SrcReg[0])) { + int tmpreg = rc_find_free_temporary(c); + struct rc_instruction * inst_mov = rc_insert_new_instruction(c, inst->Prev); + inst_mov->U.I.Opcode = RC_OPCODE_MOV; + inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY; + inst_mov->U.I.DstReg.Index = tmpreg; + inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[1]; + + reset_srcreg(&inst->U.I.SrcReg[1]); + inst->U.I.SrcReg[1].File = RC_FILE_TEMPORARY; + inst->U.I.SrcReg[1].Index = tmpreg; + } + } + + return 1; +} + +static void rc_vs_add_artificial_outputs(struct radeon_compiler *c, void *user) +{ + struct r300_vertex_program_compiler * compiler = (struct r300_vertex_program_compiler*)c; + int i; + + for(i = 0; i < 32; ++i) { + if ((compiler->RequiredOutputs & (1 << i)) && + !(compiler->Base.Program.OutputsWritten & (1 << i))) { + struct rc_instruction * inst = rc_insert_new_instruction(&compiler->Base, compiler->Base.Program.Instructions.Prev); + inst->U.I.Opcode = RC_OPCODE_MOV; + + inst->U.I.DstReg.File = RC_FILE_OUTPUT; + inst->U.I.DstReg.Index = i; + inst->U.I.DstReg.WriteMask = RC_MASK_XYZW; + + inst->U.I.SrcReg[0].File = RC_FILE_CONSTANT; + inst->U.I.SrcReg[0].Index = 0; + inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZW; + + compiler->Base.Program.OutputsWritten |= 1 << i; + } + } +} + +static void dataflow_outputs_mark_used(void * userdata, void * data, + void (*callback)(void *, unsigned int, unsigned int)) +{ + struct r300_vertex_program_compiler * c = userdata; + int i; + + for(i = 0; i < 32; ++i) { + if (c->RequiredOutputs & (1 << i)) + callback(data, i, RC_MASK_XYZW); + } +} + +static int swizzle_is_native(rc_opcode opcode, struct rc_src_register reg) +{ + (void) opcode; + (void) reg; + + return 1; +} + +static void transform_negative_addressing(struct r300_vertex_program_compiler *c, + struct rc_instruction *arl, + struct rc_instruction *end, + int min_offset) +{ + struct rc_instruction *inst, *add; + unsigned const_swizzle; + + /* Transform ARL */ + add = rc_insert_new_instruction(&c->Base, arl->Prev); + add->U.I.Opcode = RC_OPCODE_ADD; + add->U.I.DstReg.File = RC_FILE_TEMPORARY; + add->U.I.DstReg.Index = rc_find_free_temporary(&c->Base); + add->U.I.DstReg.WriteMask = RC_MASK_X; + add->U.I.SrcReg[0] = arl->U.I.SrcReg[0]; + add->U.I.SrcReg[1].File = RC_FILE_CONSTANT; + add->U.I.SrcReg[1].Index = rc_constants_add_immediate_scalar(&c->Base.Program.Constants, + min_offset, &const_swizzle); + add->U.I.SrcReg[1].Swizzle = const_swizzle; + + arl->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; + arl->U.I.SrcReg[0].Index = add->U.I.DstReg.Index; + arl->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XXXX; + + /* Rewrite offsets up to and excluding inst. */ + for (inst = arl->Next; inst != end; inst = inst->Next) { + const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); + + for (unsigned i = 0; i < opcode->NumSrcRegs; i++) + if (inst->U.I.SrcReg[i].RelAddr) + inst->U.I.SrcReg[i].Index -= min_offset; + } +} + +static void rc_emulate_negative_addressing(struct radeon_compiler *compiler, void *user) +{ + struct r300_vertex_program_compiler * c = (struct r300_vertex_program_compiler*)compiler; + struct rc_instruction *inst, *lastARL = NULL; + int min_offset = 0; + + for (inst = c->Base.Program.Instructions.Next; inst != &c->Base.Program.Instructions; inst = inst->Next) { + const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); + + if (inst->U.I.Opcode == RC_OPCODE_ARL) { + if (lastARL != NULL && min_offset < 0) + transform_negative_addressing(c, lastARL, inst, min_offset); + + lastARL = inst; + min_offset = 0; + continue; + } + + for (unsigned i = 0; i < opcode->NumSrcRegs; i++) { + if (inst->U.I.SrcReg[i].RelAddr && + inst->U.I.SrcReg[i].Index < 0) { + /* ARL must precede any indirect addressing. */ + if (lastARL == NULL) { + rc_error(&c->Base, "Vertex shader: Found relative addressing without ARL."); + return; + } + + if (inst->U.I.SrcReg[i].Index < min_offset) + min_offset = inst->U.I.SrcReg[i].Index; + } + } + } + + if (lastARL != NULL && min_offset < 0) + transform_negative_addressing(c, lastARL, inst, min_offset); +} + +static struct rc_swizzle_caps r300_vertprog_swizzle_caps = { + .IsNative = &swizzle_is_native, + .Split = 0 /* should never be called */ +}; + +void r3xx_compile_vertex_program(struct r300_vertex_program_compiler *c) +{ + int is_r500 = c->Base.is_r500; + int opt = !c->Base.disable_optimizations; + + /* Lists of instruction transformations. */ + struct radeon_program_transformation alu_rewrite_r500[] = { + { &r300_transform_vertex_alu, 0 }, + { &r300_transform_trig_scale_vertex, 0 }, + { 0, 0 } + }; + + struct radeon_program_transformation alu_rewrite_r300[] = { + { &r300_transform_vertex_alu, 0 }, + { &r300_transform_trig_simple, 0 }, + { 0, 0 } + }; + + /* Note: These passes have to be done seperately from ALU rewrite, + * otherwise non-native ALU instructions with source conflits + * or non-native modifiers will not be treated properly. + */ + struct radeon_program_transformation emulate_modifiers[] = { + { &transform_nonnative_modifiers, 0 }, + { 0, 0 } + }; + + struct radeon_program_transformation resolve_src_conflicts[] = { + { &transform_source_conflicts, 0 }, + { 0, 0 } + }; + + /* List of compiler passes. */ + struct radeon_compiler_pass vs_list[] = { + /* NAME DUMP PREDICATE FUNCTION PARAM */ + {"add artificial outputs", 0, 1, rc_vs_add_artificial_outputs, NULL}, + {"transform loops", 1, 1, rc_transform_loops, NULL}, + {"emulate branches", 1, !is_r500, rc_emulate_branches, NULL}, + {"emulate negative addressing", 1, 1, rc_emulate_negative_addressing, NULL}, + {"native rewrite", 1, is_r500, rc_local_transform, alu_rewrite_r500}, + {"native rewrite", 1, !is_r500, rc_local_transform, alu_rewrite_r300}, + {"emulate modifiers", 1, !is_r500, rc_local_transform, emulate_modifiers}, + {"deadcode", 1, opt, rc_dataflow_deadcode, dataflow_outputs_mark_used}, + {"dataflow optimize", 1, opt, rc_optimize, NULL}, + /* This pass must be done after optimizations. */ + {"source conflict resolve", 1, 1, rc_local_transform, resolve_src_conflicts}, + {"register allocation", 1, opt, allocate_temporary_registers, NULL}, + {"dead constants", 1, 1, rc_remove_unused_constants, &c->code->constants_remap_table}, + {"final code validation", 0, 1, rc_validate_final_shader, NULL}, + {"machine code generation", 0, 1, translate_vertex_program, NULL}, + {"dump machine code", 0, c->Base.Debug & RC_DBG_LOG, r300_vertex_program_dump, NULL}, + {NULL, 0, 0, NULL, NULL} + }; + + c->Base.type = RC_VERTEX_PROGRAM; + c->Base.SwizzleCaps = &r300_vertprog_swizzle_caps; + + rc_run_compiler(&c->Base, vs_list); + + c->code->InputsRead = c->Base.Program.InputsRead; + c->code->OutputsWritten = c->Base.Program.OutputsWritten; + rc_constants_copy(&c->code->constants, &c->Base.Program.Constants); +} diff --git a/src/gallium/drivers/r300/compiler/r3xx_vertprog_dump.c b/src/gallium/drivers/r300/compiler/r3xx_vertprog_dump.c new file mode 100644 index 00000000000..2bc0a87eed8 --- /dev/null +++ b/src/gallium/drivers/r300/compiler/r3xx_vertprog_dump.c @@ -0,0 +1,207 @@ +/* + * Copyright 2009 Nicolai Hähnle <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. */ + +#include "radeon_compiler.h" +#include "radeon_code.h" +#include "../r300_reg.h" + +#include <stdio.h> + +static char* r300_vs_ve_ops[] = { + /* R300 vector ops */ + " VE_NO_OP", + " VE_DOT_PRODUCT", + " VE_MULTIPLY", + " VE_ADD", + " VE_MULTIPLY_ADD", + " VE_DISTANCE_FACTOR", + " VE_FRACTION", + " VE_MAXIMUM", + " VE_MINIMUM", + "VE_SET_GREATER_THAN_EQUAL", + " VE_SET_LESS_THAN", + " VE_MULTIPLYX2_ADD", + " VE_MULTIPLY_CLAMP", + " VE_FLT2FIX_DX", + " VE_FLT2FIX_DX_RND", + /* R500 vector ops */ + " VE_PRED_SET_EQ_PUSH", + " VE_PRED_SET_GT_PUSH", + " VE_PRED_SET_GTE_PUSH", + " VE_PRED_SET_NEQ_PUSH", + " VE_COND_WRITE_EQ", + " VE_COND_WRITE_GT", + " VE_COND_WRITE_GTE", + " VE_COND_WRITE_NEQ", + " VE_COND_MUX_EQ", + " VE_COND_MUX_GT", + " VE_COND_MUX_GTE", + " VE_SET_GREATER_THAN", + " VE_SET_EQUAL", + " VE_SET_NOT_EQUAL", + " (reserved)", + " (reserved)", + " (reserved)", +}; + +static char* r300_vs_me_ops[] = { + /* R300 math ops */ + " ME_NO_OP", + " ME_EXP_BASE2_DX", + " ME_LOG_BASE2_DX", + " ME_EXP_BASEE_FF", + " ME_LIGHT_COEFF_DX", + " ME_POWER_FUNC_FF", + " ME_RECIP_DX", + " ME_RECIP_FF", + " ME_RECIP_SQRT_DX", + " ME_RECIP_SQRT_FF", + " ME_MULTIPLY", + " ME_EXP_BASE2_FULL_DX", + " ME_LOG_BASE2_FULL_DX", + " ME_POWER_FUNC_FF_CLAMP_B", + "ME_POWER_FUNC_FF_CLAMP_B1", + "ME_POWER_FUNC_FF_CLAMP_01", + " ME_SIN", + " ME_COS", + /* R500 math ops */ + " ME_LOG_BASE2_IEEE", + " ME_RECIP_IEEE", + " ME_RECIP_SQRT_IEEE", + " ME_PRED_SET_EQ", + " ME_PRED_SET_GT", + " ME_PRED_SET_GTE", + " ME_PRED_SET_NEQ", + " ME_PRED_SET_CLR", + " ME_PRED_SET_INV", + " ME_PRED_SET_POP", + " ME_PRED_SET_RESTORE", + " (reserved)", + " (reserved)", + " (reserved)", +}; + +/* XXX refactor to avoid clashing symbols */ +static char* r300_vs_src_debug[] = { + "t", + "i", + "c", + "a", +}; + +static char* r300_vs_dst_debug[] = { + "t", + "a0", + "o", + "ox", + "a", + "i", + "u", + "u", +}; + +static char* r300_vs_swiz_debug[] = { + "X", + "Y", + "Z", + "W", + "0", + "1", + "U", + "U", +}; + + +static void r300_vs_op_dump(uint32_t op) +{ + fprintf(stderr, " dst: %d%s op: ", + (op >> 13) & 0x7f, r300_vs_dst_debug[(op >> 8) & 0x7]); + if ((op >> PVS_DST_PRED_ENABLE_SHIFT) & 0x1) { + fprintf(stderr, "PRED %u", + (op >> PVS_DST_PRED_SENSE_SHIFT) & 0x1); + } + if (op & 0x80) { + if (op & 0x1) { + fprintf(stderr, "PVS_MACRO_OP_2CLK_M2X_ADD\n"); + } else { + fprintf(stderr, " PVS_MACRO_OP_2CLK_MADD\n"); + } + } else if (op & 0x40) { + fprintf(stderr, "%s\n", r300_vs_me_ops[op & 0x1f]); + } else { + fprintf(stderr, "%s\n", r300_vs_ve_ops[op & 0x1f]); + } +} + +static void r300_vs_src_dump(uint32_t src) +{ + fprintf(stderr, " reg: %d%s swiz: %s%s/%s%s/%s%s/%s%s\n", + (src >> 5) & 0xff, r300_vs_src_debug[src & 0x3], + src & (1 << 25) ? "-" : " ", + r300_vs_swiz_debug[(src >> 13) & 0x7], + src & (1 << 26) ? "-" : " ", + r300_vs_swiz_debug[(src >> 16) & 0x7], + src & (1 << 27) ? "-" : " ", + r300_vs_swiz_debug[(src >> 19) & 0x7], + src & (1 << 28) ? "-" : " ", + r300_vs_swiz_debug[(src >> 22) & 0x7]); +} + +void r300_vertex_program_dump(struct radeon_compiler *compiler, void *user) +{ + struct r300_vertex_program_compiler *c = (struct r300_vertex_program_compiler*)compiler; + struct r300_vertex_program_code * vs = c->code; + unsigned instrcount = vs->length / 4; + unsigned i; + + fprintf(stderr, "Final vertex program code:\n"); + + for(i = 0; i < instrcount; i++) { + unsigned offset = i*4; + unsigned src; + + fprintf(stderr, "%d: op: 0x%08x", i, vs->body.d[offset]); + r300_vs_op_dump(vs->body.d[offset]); + + for(src = 0; src < 3; ++src) { + fprintf(stderr, " src%i: 0x%08x", src, vs->body.d[offset+1+src]); + r300_vs_src_dump(vs->body.d[offset+1+src]); + } + } + + fprintf(stderr, "Flow Control Ops: 0x%08x\n",vs->fc_ops); + for(i = 0; i < vs->num_fc_ops; i++) { + switch((vs->fc_ops >> (i * 2)) & 0x3 ) { + case 0: fprintf(stderr, "NOP"); break; + case 1: fprintf(stderr, "JUMP"); break; + case 2: fprintf(stderr, "LOOP"); break; + case 3: fprintf(stderr, "JSR"); break; + } + if (c->Base.is_r500) { + fprintf(stderr,": uw-> 0x%08x lw-> 0x%08x\n", + vs->fc_op_addrs.r500[i].uw, + vs->fc_op_addrs.r500[i].lw); + } else { + fprintf(stderr,": 0x%08x\n", vs->fc_op_addrs.r300[i]); + } + } +} diff --git a/src/gallium/drivers/r300/compiler/r500_fragprog.c b/src/gallium/drivers/r300/compiler/r500_fragprog.c new file mode 100644 index 00000000000..cf99f5e4538 --- /dev/null +++ b/src/gallium/drivers/r300/compiler/r500_fragprog.c @@ -0,0 +1,539 @@ +/* + * Copyright 2008 Corbin Simpson <[email protected]> + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "r500_fragprog.h" + +#include <stdio.h> + +#include "radeon_compiler_util.h" +#include "radeon_list.h" +#include "radeon_variable.h" +#include "../r300_reg.h" + +/** + * Rewrite IF instructions to use the ALU result special register. + */ +int r500_transform_IF( + struct radeon_compiler * c, + struct rc_instruction * inst_if, + void *data) +{ + struct rc_variable * writer; + struct rc_list * writer_list, * list_ptr; + struct rc_list * var_list = rc_get_variables(c); + unsigned int generic_if = 0; + unsigned int alu_chan; + + if (inst_if->U.I.Opcode != RC_OPCODE_IF) { + return 0; + } + + writer_list = rc_variable_list_get_writers( + var_list, inst_if->Type, &inst_if->U.I.SrcReg[0]); + if (!writer_list) { + generic_if = 1; + } else { + + /* Make sure it is safe for the writers to write to + * ALU Result */ + for (list_ptr = writer_list; list_ptr; + list_ptr = list_ptr->Next) { + struct rc_instruction * inst; + writer = list_ptr->Item; + /* We are going to modify the destination register + * of writer, so if it has a reader other than + * inst_if (aka ReaderCount > 1) we must fall back to + * our generic IF. + * If the writer has a lower IP than inst_if, this + * means that inst_if is above the writer in a loop. + * I'm not sure why this would ever happen, but + * if it does we want to make sure we fall back + * to our generic IF. */ + if (writer->ReaderCount > 1 || writer->Inst->IP < inst_if->IP) { + generic_if = 1; + break; + } + + /* The ALU Result is not preserved across IF + * instructions, so if there is another IF + * instruction between writer and inst_if, then + * we need to fall back to generic IF. */ + for (inst = writer->Inst; inst != inst_if; inst = inst->Next) { + const struct rc_opcode_info * info = + rc_get_opcode_info(inst->U.I.Opcode); + if (info->IsFlowControl) { + generic_if = 1; + break; + } + } + if (generic_if) { + break; + } + } + } + + if (GET_SWZ(inst_if->U.I.SrcReg[0].Swizzle, 0) == RC_SWIZZLE_X) { + alu_chan = RC_ALURESULT_X; + } else { + alu_chan = RC_ALURESULT_W; + } + if (generic_if) { + struct rc_instruction * inst_mov = + rc_insert_new_instruction(c, inst_if->Prev); + + inst_mov->U.I.Opcode = RC_OPCODE_MOV; + inst_mov->U.I.DstReg.WriteMask = 0; + inst_mov->U.I.DstReg.File = RC_FILE_NONE; + inst_mov->U.I.ALUResultCompare = RC_COMPARE_FUNC_NOTEQUAL; + inst_mov->U.I.WriteALUResult = alu_chan; + inst_mov->U.I.SrcReg[0] = inst_if->U.I.SrcReg[0]; + if (alu_chan == RC_ALURESULT_X) { + inst_mov->U.I.SrcReg[0].Swizzle = combine_swizzles4( + inst_mov->U.I.SrcReg[0].Swizzle, + RC_SWIZZLE_X, RC_SWIZZLE_UNUSED, + RC_SWIZZLE_UNUSED, RC_SWIZZLE_UNUSED); + } else { + inst_mov->U.I.SrcReg[0].Swizzle = combine_swizzles4( + inst_mov->U.I.SrcReg[0].Swizzle, + RC_SWIZZLE_UNUSED, RC_SWIZZLE_UNUSED, + RC_SWIZZLE_UNUSED, RC_SWIZZLE_Z); + } + } else { + rc_compare_func compare_func = RC_COMPARE_FUNC_NEVER; + unsigned int reverse_srcs = 0; + unsigned int preserve_opcode = 0; + for (list_ptr = writer_list; list_ptr; + list_ptr = list_ptr->Next) { + writer = list_ptr->Item; + switch(writer->Inst->U.I.Opcode) { + case RC_OPCODE_SEQ: + compare_func = RC_COMPARE_FUNC_EQUAL; + break; + case RC_OPCODE_SNE: + compare_func = RC_COMPARE_FUNC_NOTEQUAL; + break; + case RC_OPCODE_SLE: + reverse_srcs = 1; + /* Fall through */ + case RC_OPCODE_SGE: + compare_func = RC_COMPARE_FUNC_GEQUAL; + break; + case RC_OPCODE_SGT: + reverse_srcs = 1; + /* Fall through */ + case RC_OPCODE_SLT: + compare_func = RC_COMPARE_FUNC_LESS; + break; + default: + compare_func = RC_COMPARE_FUNC_NOTEQUAL; + preserve_opcode = 1; + break; + } + if (!preserve_opcode) { + writer->Inst->U.I.Opcode = RC_OPCODE_SUB; + } + writer->Inst->U.I.DstReg.WriteMask = 0; + writer->Inst->U.I.DstReg.File = RC_FILE_NONE; + writer->Inst->U.I.WriteALUResult = alu_chan; + writer->Inst->U.I.ALUResultCompare = compare_func; + if (reverse_srcs) { + struct rc_src_register temp_src; + temp_src = writer->Inst->U.I.SrcReg[0]; + writer->Inst->U.I.SrcReg[0] = + writer->Inst->U.I.SrcReg[1]; + writer->Inst->U.I.SrcReg[1] = temp_src; + } + } + } + + inst_if->U.I.SrcReg[0].File = RC_FILE_SPECIAL; + inst_if->U.I.SrcReg[0].Index = RC_SPECIAL_ALU_RESULT; + inst_if->U.I.SrcReg[0].Swizzle = RC_MAKE_SWIZZLE( + RC_SWIZZLE_X, RC_SWIZZLE_UNUSED, + RC_SWIZZLE_UNUSED, RC_SWIZZLE_UNUSED); + inst_if->U.I.SrcReg[0].Negate = 0; + + return 1; +} + +static int r500_swizzle_is_native(rc_opcode opcode, struct rc_src_register reg) +{ + unsigned int relevant; + int i; + + if (opcode == RC_OPCODE_TEX || + opcode == RC_OPCODE_TXB || + opcode == RC_OPCODE_TXP || + opcode == RC_OPCODE_TXD || + opcode == RC_OPCODE_TXL || + opcode == RC_OPCODE_KIL) { + if (reg.Abs) + return 0; + + if (opcode == RC_OPCODE_KIL && (reg.Swizzle != RC_SWIZZLE_XYZW || reg.Negate != RC_MASK_NONE)) + return 0; + + for(i = 0; i < 4; ++i) { + unsigned int swz = GET_SWZ(reg.Swizzle, i); + if (swz == RC_SWIZZLE_UNUSED) { + reg.Negate &= ~(1 << i); + continue; + } + if (swz >= 4) + return 0; + } + + if (reg.Negate) + return 0; + + return 1; + } else if (opcode == RC_OPCODE_DDX || opcode == RC_OPCODE_DDY) { + /* DDX/MDH and DDY/MDV explicitly ignore incoming swizzles; + * if it doesn't fit perfectly into a .xyzw case... */ + if (reg.Swizzle == RC_SWIZZLE_XYZW && !reg.Abs && !reg.Negate) + return 1; + + return 0; + } else { + /* ALU instructions support almost everything */ + relevant = 0; + for(i = 0; i < 3; ++i) { + unsigned int swz = GET_SWZ(reg.Swizzle, i); + if (swz != RC_SWIZZLE_UNUSED && swz != RC_SWIZZLE_ZERO) + relevant |= 1 << i; + } + if ((reg.Negate & relevant) && ((reg.Negate & relevant) != relevant)) + return 0; + + return 1; + } +} + +/** + * Split source register access. + * + * The only thing we *cannot* do in an ALU instruction is per-component + * negation. + */ +static void r500_swizzle_split(struct rc_src_register src, unsigned int usemask, + struct rc_swizzle_split * split) +{ + unsigned int negatebase[2] = { 0, 0 }; + int i; + + for(i = 0; i < 4; ++i) { + unsigned int swz = GET_SWZ(src.Swizzle, i); + if (swz == RC_SWIZZLE_UNUSED || !GET_BIT(usemask, i)) + continue; + negatebase[GET_BIT(src.Negate, i)] |= 1 << i; + } + + split->NumPhases = 0; + + for(i = 0; i <= 1; ++i) { + if (!negatebase[i]) + continue; + + split->Phase[split->NumPhases++] = negatebase[i]; + } +} + +struct rc_swizzle_caps r500_swizzle_caps = { + .IsNative = r500_swizzle_is_native, + .Split = r500_swizzle_split +}; + +static char *toswiz(int swiz_val) { + switch(swiz_val) { + case 0: return "R"; + case 1: return "G"; + case 2: return "B"; + case 3: return "A"; + case 4: return "0"; + case 5: return "H"; + case 6: return "1"; + case 7: return "U"; + } + return NULL; +} + +static char *toop(int op_val) +{ + char *str = NULL; + switch (op_val) { + case 0: str = "MAD"; break; + case 1: str = "DP3"; break; + case 2: str = "DP4"; break; + case 3: str = "D2A"; break; + case 4: str = "MIN"; break; + case 5: str = "MAX"; break; + case 6: str = "Reserved"; break; + case 7: str = "CND"; break; + case 8: str = "CMP"; break; + case 9: str = "FRC"; break; + case 10: str = "SOP"; break; + case 11: str = "MDH"; break; + case 12: str = "MDV"; break; + } + return str; +} + +static char *to_alpha_op(int op_val) +{ + char *str = NULL; + switch (op_val) { + case 0: str = "MAD"; break; + case 1: str = "DP"; break; + case 2: str = "MIN"; break; + case 3: str = "MAX"; break; + case 4: str = "Reserved"; break; + case 5: str = "CND"; break; + case 6: str = "CMP"; break; + case 7: str = "FRC"; break; + case 8: str = "EX2"; break; + case 9: str = "LN2"; break; + case 10: str = "RCP"; break; + case 11: str = "RSQ"; break; + case 12: str = "SIN"; break; + case 13: str = "COS"; break; + case 14: str = "MDH"; break; + case 15: str = "MDV"; break; + } + return str; +} + +static char *to_mask(int val) +{ + char *str = NULL; + switch(val) { + case 0: str = "NONE"; break; + case 1: str = "R"; break; + case 2: str = "G"; break; + case 3: str = "RG"; break; + case 4: str = "B"; break; + case 5: str = "RB"; break; + case 6: str = "GB"; break; + case 7: str = "RGB"; break; + case 8: str = "A"; break; + case 9: str = "AR"; break; + case 10: str = "AG"; break; + case 11: str = "ARG"; break; + case 12: str = "AB"; break; + case 13: str = "ARB"; break; + case 14: str = "AGB"; break; + case 15: str = "ARGB"; break; + } + return str; +} + +static char *to_texop(int val) +{ + switch(val) { + case 0: return "NOP"; + case 1: return "LD"; + case 2: return "TEXKILL"; + case 3: return "PROJ"; + case 4: return "LODBIAS"; + case 5: return "LOD"; + case 6: return "DXDY"; + } + return NULL; +} + +void r500FragmentProgramDump(struct radeon_compiler *c, void *user) +{ + struct r300_fragment_program_compiler *compiler = (struct r300_fragment_program_compiler*)c; + struct r500_fragment_program_code *code = &compiler->code->code.r500; + int n, i; + uint32_t inst; + uint32_t inst0; + char *str = NULL; + fprintf(stderr, "R500 Fragment Program:\n--------\n"); + + for (n = 0; n < code->inst_end+1; n++) { + inst0 = inst = code->inst[n].inst0; + fprintf(stderr,"%d\t0:CMN_INST 0x%08x:", n, inst); + switch(inst & 0x3) { + case R500_INST_TYPE_ALU: str = "ALU"; break; + case R500_INST_TYPE_OUT: str = "OUT"; break; + case R500_INST_TYPE_FC: str = "FC"; break; + case R500_INST_TYPE_TEX: str = "TEX"; break; + }; + fprintf(stderr,"%s %s %s %s %s ", str, + inst & R500_INST_TEX_SEM_WAIT ? "TEX_WAIT" : "", + inst & R500_INST_LAST ? "LAST" : "", + inst & R500_INST_NOP ? "NOP" : "", + inst & R500_INST_ALU_WAIT ? "ALU WAIT" : ""); + fprintf(stderr,"wmask: %s omask: %s\n", to_mask((inst >> 11) & 0xf), + to_mask((inst >> 15) & 0xf)); + + switch(inst0 & 0x3) { + case R500_INST_TYPE_ALU: + case R500_INST_TYPE_OUT: + fprintf(stderr,"\t1:RGB_ADDR 0x%08x:", code->inst[n].inst1); + inst = code->inst[n].inst1; + + fprintf(stderr,"Addr0: %d%c, Addr1: %d%c, Addr2: %d%c, srcp:%d\n", + inst & 0xff, (inst & (1<<8)) ? 'c' : 't', + (inst >> 10) & 0xff, (inst & (1<<18)) ? 'c' : 't', + (inst >> 20) & 0xff, (inst & (1<<28)) ? 'c' : 't', + (inst >> 30)); + + fprintf(stderr,"\t2:ALPHA_ADDR 0x%08x:", code->inst[n].inst2); + inst = code->inst[n].inst2; + fprintf(stderr,"Addr0: %d%c, Addr1: %d%c, Addr2: %d%c, srcp:%d\n", + inst & 0xff, (inst & (1<<8)) ? 'c' : 't', + (inst >> 10) & 0xff, (inst & (1<<18)) ? 'c' : 't', + (inst >> 20) & 0xff, (inst & (1<<28)) ? 'c' : 't', + (inst >> 30)); + fprintf(stderr,"\t3 RGB_INST: 0x%08x:", code->inst[n].inst3); + inst = code->inst[n].inst3; + fprintf(stderr,"rgb_A_src:%d %s/%s/%s %d rgb_B_src:%d %s/%s/%s %d targ: %d\n", + (inst) & 0x3, toswiz((inst >> 2) & 0x7), toswiz((inst >> 5) & 0x7), toswiz((inst >> 8) & 0x7), + (inst >> 11) & 0x3, + (inst >> 13) & 0x3, toswiz((inst >> 15) & 0x7), toswiz((inst >> 18) & 0x7), toswiz((inst >> 21) & 0x7), + (inst >> 24) & 0x3, (inst >> 29) & 0x3); + + + fprintf(stderr,"\t4 ALPHA_INST:0x%08x:", code->inst[n].inst4); + inst = code->inst[n].inst4; + fprintf(stderr,"%s dest:%d%s alp_A_src:%d %s %d alp_B_src:%d %s %d targ %d w:%d\n", to_alpha_op(inst & 0xf), + (inst >> 4) & 0x7f, inst & (1<<11) ? "(rel)":"", + (inst >> 12) & 0x3, toswiz((inst >> 14) & 0x7), (inst >> 17) & 0x3, + (inst >> 19) & 0x3, toswiz((inst >> 21) & 0x7), (inst >> 24) & 0x3, + (inst >> 29) & 0x3, + (inst >> 31) & 0x1); + + fprintf(stderr,"\t5 RGBA_INST: 0x%08x:", code->inst[n].inst5); + inst = code->inst[n].inst5; + fprintf(stderr,"%s dest:%d%s rgb_C_src:%d %s/%s/%s %d alp_C_src:%d %s %d\n", toop(inst & 0xf), + (inst >> 4) & 0x7f, inst & (1<<11) ? "(rel)":"", + (inst >> 12) & 0x3, toswiz((inst >> 14) & 0x7), toswiz((inst >> 17) & 0x7), toswiz((inst >> 20) & 0x7), + (inst >> 23) & 0x3, + (inst >> 25) & 0x3, toswiz((inst >> 27) & 0x7), (inst >> 30) & 0x3); + break; + case R500_INST_TYPE_FC: + fprintf(stderr, "\t2:FC_INST 0x%08x:", code->inst[n].inst2); + inst = code->inst[n].inst2; + /* JUMP_FUNC JUMP_ANY*/ + fprintf(stderr, "0x%02x %1x ", inst >> 8 & 0xff, + (inst & R500_FC_JUMP_ANY) >> 5); + + /* OP */ + switch(inst & 0x7){ + case R500_FC_OP_JUMP: + fprintf(stderr, "JUMP"); + break; + case R500_FC_OP_LOOP: + fprintf(stderr, "LOOP"); + break; + case R500_FC_OP_ENDLOOP: + fprintf(stderr, "ENDLOOP"); + break; + case R500_FC_OP_REP: + fprintf(stderr, "REP"); + break; + case R500_FC_OP_ENDREP: + fprintf(stderr, "ENDREP"); + break; + case R500_FC_OP_BREAKLOOP: + fprintf(stderr, "BREAKLOOP"); + break; + case R500_FC_OP_BREAKREP: + fprintf(stderr, "BREAKREP"); + break; + case R500_FC_OP_CONTINUE: + fprintf(stderr, "CONTINUE"); + break; + } + fprintf(stderr," "); + /* A_OP */ + switch(inst & (0x3 << 6)){ + case R500_FC_A_OP_NONE: + fprintf(stderr, "NONE"); + break; + case R500_FC_A_OP_POP: + fprintf(stderr, "POP"); + break; + case R500_FC_A_OP_PUSH: + fprintf(stderr, "PUSH"); + break; + } + /* B_OP0 B_OP1 */ + for(i=0; i<2; i++){ + fprintf(stderr, " "); + switch(inst & (0x3 << (24 + (i * 2)))){ + /* R500_FC_B_OP0_NONE + * R500_FC_B_OP1_NONE */ + case 0: + fprintf(stderr, "NONE"); + break; + case R500_FC_B_OP0_DECR: + case R500_FC_B_OP1_DECR: + fprintf(stderr, "DECR"); + break; + case R500_FC_B_OP0_INCR: + case R500_FC_B_OP1_INCR: + fprintf(stderr, "INCR"); + break; + } + } + /*POP_CNT B_ELSE */ + fprintf(stderr, " %d %1x", (inst >> 16) & 0x1f, (inst & R500_FC_B_ELSE) >> 4); + inst = code->inst[n].inst3; + /* JUMP_ADDR */ + fprintf(stderr, " %d", inst >> 16); + + if(code->inst[n].inst2 & R500_FC_IGNORE_UNCOVERED){ + fprintf(stderr, " IGN_UNC"); + } + inst = code->inst[n].inst3; + fprintf(stderr, "\n\t3:FC_ADDR 0x%08x:", inst); + fprintf(stderr, "BOOL: 0x%02x, INT: 0x%02x, JUMP_ADDR: %d, JMP_GLBL: %1x\n", + inst & 0x1f, (inst >> 8) & 0x1f, (inst >> 16) & 0x1ff, inst >> 31); + break; + case R500_INST_TYPE_TEX: + inst = code->inst[n].inst1; + fprintf(stderr,"\t1:TEX_INST: 0x%08x: id: %d op:%s, %s, %s %s\n", inst, (inst >> 16) & 0xf, + to_texop((inst >> 22) & 0x7), (inst & (1<<25)) ? "ACQ" : "", + (inst & (1<<26)) ? "IGNUNC" : "", (inst & (1<<27)) ? "UNSCALED" : "SCALED"); + inst = code->inst[n].inst2; + fprintf(stderr,"\t2:TEX_ADDR: 0x%08x: src: %d%s %s/%s/%s/%s dst: %d%s %s/%s/%s/%s\n", inst, + inst & 127, inst & (1<<7) ? "(rel)" : "", + toswiz((inst >> 8) & 0x3), toswiz((inst >> 10) & 0x3), + toswiz((inst >> 12) & 0x3), toswiz((inst >> 14) & 0x3), + (inst >> 16) & 127, inst & (1<<23) ? "(rel)" : "", + toswiz((inst >> 24) & 0x3), toswiz((inst >> 26) & 0x3), + toswiz((inst >> 28) & 0x3), toswiz((inst >> 30) & 0x3)); + + fprintf(stderr,"\t3:TEX_DXDY: 0x%08x\n", code->inst[n].inst3); + break; + } + fprintf(stderr,"\n"); + } + +} diff --git a/src/gallium/drivers/r300/compiler/r500_fragprog.h b/src/gallium/drivers/r300/compiler/r500_fragprog.h new file mode 100644 index 00000000000..6aa448cc6f7 --- /dev/null +++ b/src/gallium/drivers/r300/compiler/r500_fragprog.h @@ -0,0 +1,50 @@ +/* + * Copyright (C) 2005 Ben Skeggs. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +/* + * Authors: + * Ben Skeggs <[email protected]> + * Jerome Glisse <[email protected]> + */ +#ifndef __R500_FRAGPROG_H_ +#define __R500_FRAGPROG_H_ + +#include "radeon_compiler.h" +#include "radeon_swizzle.h" + +extern void r500BuildFragmentProgramHwCode(struct radeon_compiler *c, void *user); + +extern void r500FragmentProgramDump(struct radeon_compiler *c, void *user); + +extern struct rc_swizzle_caps r500_swizzle_caps; + +extern int r500_transform_IF( + struct radeon_compiler * c, + struct rc_instruction * inst_if, + void* data); + +#endif diff --git a/src/gallium/drivers/r300/compiler/r500_fragprog_emit.c b/src/gallium/drivers/r300/compiler/r500_fragprog_emit.c new file mode 100644 index 00000000000..c30cd753d15 --- /dev/null +++ b/src/gallium/drivers/r300/compiler/r500_fragprog_emit.c @@ -0,0 +1,678 @@ +/* + * Copyright (C) 2005 Ben Skeggs. + * + * Copyright 2008 Corbin Simpson <[email protected]> + * Adaptation and modification for ATI/AMD Radeon R500 GPU chipsets. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +/** + * \file + * + * \author Ben Skeggs <[email protected]> + * + * \author Jerome Glisse <[email protected]> + * + * \author Corbin Simpson <[email protected]> + * + */ + +#include "r500_fragprog.h" + +#include "../r300_reg.h" + +#include "radeon_program_pair.h" + +#define PROG_CODE \ + struct r500_fragment_program_code *code = &c->code->code.r500 + +#define error(fmt, args...) do { \ + rc_error(&c->Base, "%s::%s(): " fmt "\n", \ + __FILE__, __FUNCTION__, ##args); \ + } while(0) + + +struct branch_info { + int If; + int Else; + int Endif; +}; + +struct r500_loop_info { + int BgnLoop; + + int BranchDepth; + int * Brks; + int BrkCount; + int BrkReserved; + + int * Conts; + int ContCount; + int ContReserved; +}; + +struct emit_state { + struct radeon_compiler * C; + struct r500_fragment_program_code * Code; + + struct branch_info * Branches; + unsigned int CurrentBranchDepth; + unsigned int BranchesReserved; + + struct r500_loop_info * Loops; + unsigned int CurrentLoopDepth; + unsigned int LoopsReserved; + + unsigned int MaxBranchDepth; + +}; + +static unsigned int translate_rgb_op(struct r300_fragment_program_compiler *c, rc_opcode opcode) +{ + switch(opcode) { + case RC_OPCODE_CMP: return R500_ALU_RGBA_OP_CMP; + case RC_OPCODE_CND: return R500_ALU_RGBA_OP_CND; + case RC_OPCODE_DDX: return R500_ALU_RGBA_OP_MDH; + case RC_OPCODE_DDY: return R500_ALU_RGBA_OP_MDV; + case RC_OPCODE_DP3: return R500_ALU_RGBA_OP_DP3; + case RC_OPCODE_DP4: return R500_ALU_RGBA_OP_DP4; + case RC_OPCODE_FRC: return R500_ALU_RGBA_OP_FRC; + default: + error("translate_rgb_op: unknown opcode %s\n", rc_get_opcode_info(opcode)->Name); + /* fall through */ + case RC_OPCODE_NOP: + /* fall through */ + case RC_OPCODE_MAD: return R500_ALU_RGBA_OP_MAD; + case RC_OPCODE_MAX: return R500_ALU_RGBA_OP_MAX; + case RC_OPCODE_MIN: return R500_ALU_RGBA_OP_MIN; + case RC_OPCODE_REPL_ALPHA: return R500_ALU_RGBA_OP_SOP; + } +} + +static unsigned int translate_alpha_op(struct r300_fragment_program_compiler *c, rc_opcode opcode) +{ + switch(opcode) { + case RC_OPCODE_CMP: return R500_ALPHA_OP_CMP; + case RC_OPCODE_CND: return R500_ALPHA_OP_CND; + case RC_OPCODE_COS: return R500_ALPHA_OP_COS; + case RC_OPCODE_DDX: return R500_ALPHA_OP_MDH; + case RC_OPCODE_DDY: return R500_ALPHA_OP_MDV; + case RC_OPCODE_DP3: return R500_ALPHA_OP_DP; + case RC_OPCODE_DP4: return R500_ALPHA_OP_DP; + case RC_OPCODE_EX2: return R500_ALPHA_OP_EX2; + case RC_OPCODE_FRC: return R500_ALPHA_OP_FRC; + case RC_OPCODE_LG2: return R500_ALPHA_OP_LN2; + default: + error("translate_alpha_op: unknown opcode %s\n", rc_get_opcode_info(opcode)->Name); + /* fall through */ + case RC_OPCODE_NOP: + /* fall through */ + case RC_OPCODE_MAD: return R500_ALPHA_OP_MAD; + case RC_OPCODE_MAX: return R500_ALPHA_OP_MAX; + case RC_OPCODE_MIN: return R500_ALPHA_OP_MIN; + case RC_OPCODE_RCP: return R500_ALPHA_OP_RCP; + case RC_OPCODE_RSQ: return R500_ALPHA_OP_RSQ; + case RC_OPCODE_SIN: return R500_ALPHA_OP_SIN; + } +} + +static unsigned int fix_hw_swizzle(unsigned int swz) +{ + switch (swz) { + case RC_SWIZZLE_ZERO: + case RC_SWIZZLE_UNUSED: + swz = 4; + break; + case RC_SWIZZLE_HALF: + swz = 5; + break; + case RC_SWIZZLE_ONE: + swz = 6; + break; + } + + return swz; +} + +static unsigned int translate_arg_rgb(struct rc_pair_instruction *inst, int arg) +{ + unsigned int t = inst->RGB.Arg[arg].Source; + int comp; + t |= inst->RGB.Arg[arg].Negate << 11; + t |= inst->RGB.Arg[arg].Abs << 12; + + for(comp = 0; comp < 3; ++comp) + t |= fix_hw_swizzle(GET_SWZ(inst->RGB.Arg[arg].Swizzle, comp)) << (3*comp + 2); + + return t; +} + +static unsigned int translate_arg_alpha(struct rc_pair_instruction *inst, int i) +{ + unsigned int t = inst->Alpha.Arg[i].Source; + t |= fix_hw_swizzle(GET_SWZ(inst->Alpha.Arg[i].Swizzle, 0)) << 2; + t |= inst->Alpha.Arg[i].Negate << 5; + t |= inst->Alpha.Arg[i].Abs << 6; + return t; +} + +static uint32_t translate_alu_result_op(struct r300_fragment_program_compiler * c, rc_compare_func func) +{ + switch(func) { + case RC_COMPARE_FUNC_EQUAL: return R500_INST_ALU_RESULT_OP_EQ; + case RC_COMPARE_FUNC_LESS: return R500_INST_ALU_RESULT_OP_LT; + case RC_COMPARE_FUNC_GEQUAL: return R500_INST_ALU_RESULT_OP_GE; + case RC_COMPARE_FUNC_NOTEQUAL: return R500_INST_ALU_RESULT_OP_NE; + default: + rc_error(&c->Base, "%s: unsupported compare func %i\n", __FUNCTION__, func); + return 0; + } +} + +static void use_temporary(struct r500_fragment_program_code* code, unsigned int index) +{ + if (index > code->max_temp_idx) + code->max_temp_idx = index; +} + +static unsigned int use_source(struct r500_fragment_program_code* code, struct rc_pair_instruction_source src) +{ + /* From docs: + * Note that inline constants set the MSB of ADDR0 and clear ADDR0_CONST. + * MSB = 1 << 7 */ + if (!src.Used) + return 1 << 7; + + if (src.File == RC_FILE_CONSTANT) { + return src.Index | R500_RGB_ADDR0_CONST; + } else if (src.File == RC_FILE_TEMPORARY || src.File == RC_FILE_INPUT) { + use_temporary(code, src.Index); + return src.Index; + } + + return 0; +} + +/** + * NOP the specified instruction if it is not a texture lookup. + */ +static void alu_nop(struct r300_fragment_program_compiler *c, int ip) +{ + PROG_CODE; + + if ((code->inst[ip].inst0 & 0x3) != R500_INST_TYPE_TEX) { + code->inst[ip].inst0 |= R500_INST_NOP; + } +} + +/** + * Emit a paired ALU instruction. + */ +static void emit_paired(struct r300_fragment_program_compiler *c, struct rc_pair_instruction *inst) +{ + int ip; + PROG_CODE; + + if (code->inst_end >= c->Base.max_alu_insts-1) { + error("emit_alu: Too many instructions"); + return; + } + + ip = ++code->inst_end; + + /* Quirk: MDH/MDV (DDX/DDY) need a NOP on previous non-TEX instructions. */ + if (inst->RGB.Opcode == RC_OPCODE_DDX || inst->Alpha.Opcode == RC_OPCODE_DDX || + inst->RGB.Opcode == RC_OPCODE_DDY || inst->Alpha.Opcode == RC_OPCODE_DDY) { + if (ip > 0) { + alu_nop(c, ip - 1); + } + } + + code->inst[ip].inst5 = translate_rgb_op(c, inst->RGB.Opcode); + code->inst[ip].inst4 = translate_alpha_op(c, inst->Alpha.Opcode); + + if (inst->RGB.OutputWriteMask || inst->Alpha.OutputWriteMask || inst->Alpha.DepthWriteMask) { + code->inst[ip].inst0 = R500_INST_TYPE_OUT; + if (inst->WriteALUResult) { + error("Cannot write output and ALU result at the same time"); + return; + } + } else { + code->inst[ip].inst0 = R500_INST_TYPE_ALU; + } + code->inst[ip].inst0 |= R500_INST_TEX_SEM_WAIT; + + code->inst[ip].inst0 |= (inst->RGB.WriteMask << 11); + code->inst[ip].inst0 |= inst->Alpha.WriteMask ? 1 << 14 : 0; + code->inst[ip].inst0 |= (inst->RGB.OutputWriteMask << 15) | (inst->Alpha.OutputWriteMask << 18); + if (inst->Nop) { + code->inst[ip].inst0 |= R500_INST_NOP; + } + if (inst->Alpha.DepthWriteMask) { + code->inst[ip].inst4 |= R500_ALPHA_W_OMASK; + c->code->writes_depth = 1; + } + + code->inst[ip].inst4 |= R500_ALPHA_ADDRD(inst->Alpha.DestIndex); + code->inst[ip].inst5 |= R500_ALU_RGBA_ADDRD(inst->RGB.DestIndex); + use_temporary(code, inst->Alpha.DestIndex); + use_temporary(code, inst->RGB.DestIndex); + + if (inst->RGB.Saturate) + code->inst[ip].inst0 |= R500_INST_RGB_CLAMP; + if (inst->Alpha.Saturate) + code->inst[ip].inst0 |= R500_INST_ALPHA_CLAMP; + + /* Set the presubtract operation. */ + switch(inst->RGB.Src[RC_PAIR_PRESUB_SRC].Index) { + case RC_PRESUB_BIAS: + code->inst[ip].inst1 |= R500_RGB_SRCP_OP_1_MINUS_2RGB0; + break; + case RC_PRESUB_SUB: + code->inst[ip].inst1 |= R500_RGB_SRCP_OP_RGB1_MINUS_RGB0; + break; + case RC_PRESUB_ADD: + code->inst[ip].inst1 |= R500_RGB_SRCP_OP_RGB1_PLUS_RGB0; + break; + case RC_PRESUB_INV: + code->inst[ip].inst1 |= R500_RGB_SRCP_OP_1_MINUS_RGB0; + break; + default: + break; + } + switch(inst->Alpha.Src[RC_PAIR_PRESUB_SRC].Index) { + case RC_PRESUB_BIAS: + code->inst[ip].inst2 |= R500_ALPHA_SRCP_OP_1_MINUS_2A0; + break; + case RC_PRESUB_SUB: + code->inst[ip].inst2 |= R500_ALPHA_SRCP_OP_A1_MINUS_A0; + break; + case RC_PRESUB_ADD: + code->inst[ip].inst2 |= R500_ALPHA_SRCP_OP_A1_PLUS_A0; + break; + case RC_PRESUB_INV: + code->inst[ip].inst2 |= R500_ALPHA_SRCP_OP_1_MINUS_A0; + break; + default: + break; + } + + code->inst[ip].inst1 |= R500_RGB_ADDR0(use_source(code, inst->RGB.Src[0])); + code->inst[ip].inst1 |= R500_RGB_ADDR1(use_source(code, inst->RGB.Src[1])); + code->inst[ip].inst1 |= R500_RGB_ADDR2(use_source(code, inst->RGB.Src[2])); + + code->inst[ip].inst2 |= R500_ALPHA_ADDR0(use_source(code, inst->Alpha.Src[0])); + code->inst[ip].inst2 |= R500_ALPHA_ADDR1(use_source(code, inst->Alpha.Src[1])); + code->inst[ip].inst2 |= R500_ALPHA_ADDR2(use_source(code, inst->Alpha.Src[2])); + + code->inst[ip].inst3 |= translate_arg_rgb(inst, 0) << R500_ALU_RGB_SEL_A_SHIFT; + code->inst[ip].inst3 |= translate_arg_rgb(inst, 1) << R500_ALU_RGB_SEL_B_SHIFT; + code->inst[ip].inst5 |= translate_arg_rgb(inst, 2) << R500_ALU_RGBA_SEL_C_SHIFT; + + code->inst[ip].inst4 |= translate_arg_alpha(inst, 0) << R500_ALPHA_SEL_A_SHIFT; + code->inst[ip].inst4 |= translate_arg_alpha(inst, 1) << R500_ALPHA_SEL_B_SHIFT; + code->inst[ip].inst5 |= translate_arg_alpha(inst, 2) << R500_ALU_RGBA_ALPHA_SEL_C_SHIFT; + + code->inst[ip].inst3 |= R500_ALU_RGB_TARGET(inst->RGB.Target); + code->inst[ip].inst4 |= R500_ALPHA_TARGET(inst->Alpha.Target); + + if (inst->WriteALUResult) { + code->inst[ip].inst3 |= R500_ALU_RGB_WMASK; + + if (inst->WriteALUResult == RC_ALURESULT_X) + code->inst[ip].inst0 |= R500_INST_ALU_RESULT_SEL_RED; + else + code->inst[ip].inst0 |= R500_INST_ALU_RESULT_SEL_ALPHA; + + code->inst[ip].inst0 |= translate_alu_result_op(c, inst->ALUResultCompare); + } +} + +static unsigned int translate_strq_swizzle(unsigned int swizzle) +{ + unsigned int swiz = 0; + int i; + for (i = 0; i < 4; i++) + swiz |= (GET_SWZ(swizzle, i) & 0x3) << i*2; + return swiz; +} + +/** + * Emit a single TEX instruction + */ +static int emit_tex(struct r300_fragment_program_compiler *c, struct rc_sub_instruction *inst) +{ + int ip; + PROG_CODE; + + if (code->inst_end >= c->Base.max_alu_insts-1) { + error("emit_tex: Too many instructions"); + return 0; + } + + ip = ++code->inst_end; + + code->inst[ip].inst0 = R500_INST_TYPE_TEX + | (inst->DstReg.WriteMask << 11) + | R500_INST_TEX_SEM_WAIT; + code->inst[ip].inst1 = R500_TEX_ID(inst->TexSrcUnit) + | R500_TEX_SEM_ACQUIRE; + + if (inst->TexSrcTarget == RC_TEXTURE_RECT) + code->inst[ip].inst1 |= R500_TEX_UNSCALED; + + switch (inst->Opcode) { + case RC_OPCODE_KIL: + code->inst[ip].inst1 |= R500_TEX_INST_TEXKILL; + break; + case RC_OPCODE_TEX: + code->inst[ip].inst1 |= R500_TEX_INST_LD; + break; + case RC_OPCODE_TXB: + code->inst[ip].inst1 |= R500_TEX_INST_LODBIAS; + break; + case RC_OPCODE_TXP: + code->inst[ip].inst1 |= R500_TEX_INST_PROJ; + break; + case RC_OPCODE_TXD: + code->inst[ip].inst1 |= R500_TEX_INST_DXDY; + break; + case RC_OPCODE_TXL: + code->inst[ip].inst1 |= R500_TEX_INST_LOD; + break; + default: + error("emit_tex can't handle opcode %s\n", rc_get_opcode_info(inst->Opcode)->Name); + } + + use_temporary(code, inst->SrcReg[0].Index); + if (inst->Opcode != RC_OPCODE_KIL) + use_temporary(code, inst->DstReg.Index); + + code->inst[ip].inst2 = R500_TEX_SRC_ADDR(inst->SrcReg[0].Index) + | (translate_strq_swizzle(inst->SrcReg[0].Swizzle) << 8) + | R500_TEX_DST_ADDR(inst->DstReg.Index) + | (GET_SWZ(inst->TexSwizzle, 0) << 24) + | (GET_SWZ(inst->TexSwizzle, 1) << 26) + | (GET_SWZ(inst->TexSwizzle, 2) << 28) + | (GET_SWZ(inst->TexSwizzle, 3) << 30) + ; + + if (inst->Opcode == RC_OPCODE_TXD) { + use_temporary(code, inst->SrcReg[1].Index); + use_temporary(code, inst->SrcReg[2].Index); + + /* DX and DY parameters are specified in a separate register. */ + code->inst[ip].inst3 = + R500_DX_ADDR(inst->SrcReg[1].Index) | + (translate_strq_swizzle(inst->SrcReg[1].Swizzle) << 8) | + R500_DY_ADDR(inst->SrcReg[2].Index) | + (translate_strq_swizzle(inst->SrcReg[2].Swizzle) << 24); + } + + return 1; +} + +static void emit_flowcontrol(struct emit_state * s, struct rc_instruction * inst) +{ + unsigned int newip; + + if (s->Code->inst_end >= s->C->max_alu_insts-1) { + rc_error(s->C, "emit_tex: Too many instructions"); + return; + } + + newip = ++s->Code->inst_end; + + /* Currently all loops use the same integer constant to intialize + * the loop variables. */ + if(!s->Code->int_constants[0]) { + s->Code->int_constants[0] = R500_FC_INT_CONST_KR(0xff); + s->Code->int_constant_count = 1; + } + s->Code->inst[newip].inst0 = R500_INST_TYPE_FC | R500_INST_ALU_WAIT; + + switch(inst->U.I.Opcode){ + struct branch_info * branch; + struct r500_loop_info * loop; + case RC_OPCODE_BGNLOOP: + memory_pool_array_reserve(&s->C->Pool, struct r500_loop_info, + s->Loops, s->CurrentLoopDepth, s->LoopsReserved, 1); + + loop = &s->Loops[s->CurrentLoopDepth++]; + memset(loop, 0, sizeof(struct r500_loop_info)); + loop->BranchDepth = s->CurrentBranchDepth; + loop->BgnLoop = newip; + + s->Code->inst[newip].inst2 = R500_FC_OP_LOOP + | R500_FC_JUMP_FUNC(0x00) + | R500_FC_IGNORE_UNCOVERED + ; + break; + case RC_OPCODE_BRK: + loop = &s->Loops[s->CurrentLoopDepth - 1]; + memory_pool_array_reserve(&s->C->Pool, int, loop->Brks, + loop->BrkCount, loop->BrkReserved, 1); + + loop->Brks[loop->BrkCount++] = newip; + s->Code->inst[newip].inst2 = R500_FC_OP_BREAKLOOP + | R500_FC_JUMP_FUNC(0xff) + | R500_FC_B_OP1_DECR + | R500_FC_B_POP_CNT( + s->CurrentBranchDepth - loop->BranchDepth) + | R500_FC_IGNORE_UNCOVERED + ; + break; + + case RC_OPCODE_CONT: + loop = &s->Loops[s->CurrentLoopDepth - 1]; + memory_pool_array_reserve(&s->C->Pool, int, loop->Conts, + loop->ContCount, loop->ContReserved, 1); + loop->Conts[loop->ContCount++] = newip; + s->Code->inst[newip].inst2 = R500_FC_OP_CONTINUE + | R500_FC_JUMP_FUNC(0xff) + | R500_FC_B_OP1_DECR + | R500_FC_B_POP_CNT( + s->CurrentBranchDepth - loop->BranchDepth) + | R500_FC_IGNORE_UNCOVERED + ; + break; + + case RC_OPCODE_ENDLOOP: + { + loop = &s->Loops[s->CurrentLoopDepth - 1]; + /* Emit ENDLOOP */ + s->Code->inst[newip].inst2 = R500_FC_OP_ENDLOOP + | R500_FC_JUMP_FUNC(0xff) + | R500_FC_JUMP_ANY + | R500_FC_IGNORE_UNCOVERED + ; + /* The constant integer at index 0 is used by all loops. */ + s->Code->inst[newip].inst3 = R500_FC_INT_ADDR(0) + | R500_FC_JUMP_ADDR(loop->BgnLoop + 1) + ; + + /* Set jump address and int constant for BGNLOOP */ + s->Code->inst[loop->BgnLoop].inst3 = R500_FC_INT_ADDR(0) + | R500_FC_JUMP_ADDR(newip) + ; + + /* Set jump address for the BRK instructions. */ + while(loop->BrkCount--) { + s->Code->inst[loop->Brks[loop->BrkCount]].inst3 = + R500_FC_JUMP_ADDR(newip + 1); + } + + /* Set jump address for CONT instructions. */ + while(loop->ContCount--) { + s->Code->inst[loop->Conts[loop->ContCount]].inst3 = + R500_FC_JUMP_ADDR(newip); + } + s->CurrentLoopDepth--; + break; + } + case RC_OPCODE_IF: + if ( s->CurrentBranchDepth >= R500_PFS_MAX_BRANCH_DEPTH_FULL) { + rc_error(s->C, "Branch depth exceeds hardware limit"); + return; + } + memory_pool_array_reserve(&s->C->Pool, struct branch_info, + s->Branches, s->CurrentBranchDepth, s->BranchesReserved, 1); + + branch = &s->Branches[s->CurrentBranchDepth++]; + branch->If = newip; + branch->Else = -1; + branch->Endif = -1; + + if (s->CurrentBranchDepth > s->MaxBranchDepth) + s->MaxBranchDepth = s->CurrentBranchDepth; + + /* actual instruction is filled in at ENDIF time */ + break; + + case RC_OPCODE_ELSE: + if (!s->CurrentBranchDepth) { + rc_error(s->C, "%s: got ELSE outside a branch", __FUNCTION__); + return; + } + + branch = &s->Branches[s->CurrentBranchDepth - 1]; + branch->Else = newip; + + /* actual instruction is filled in at ENDIF time */ + break; + + case RC_OPCODE_ENDIF: + if (!s->CurrentBranchDepth) { + rc_error(s->C, "%s: got ELSE outside a branch", __FUNCTION__); + return; + } + + branch = &s->Branches[s->CurrentBranchDepth - 1]; + branch->Endif = newip; + + s->Code->inst[branch->Endif].inst2 = R500_FC_OP_JUMP + | R500_FC_A_OP_NONE /* no address stack */ + | R500_FC_JUMP_ANY /* docs says set this, but I don't understand why */ + | R500_FC_B_OP0_DECR /* decrement branch counter if stay */ + | R500_FC_B_OP1_NONE /* no branch counter if stay */ + | R500_FC_B_POP_CNT(1) + ; + s->Code->inst[branch->Endif].inst3 = R500_FC_JUMP_ADDR(branch->Endif + 1); + s->Code->inst[branch->If].inst2 = R500_FC_OP_JUMP + | R500_FC_A_OP_NONE /* no address stack */ + | R500_FC_JUMP_FUNC(0x0f) /* jump if ALU result is false */ + | R500_FC_B_OP0_INCR /* increment branch counter if stay */ + | R500_FC_IGNORE_UNCOVERED + ; + + if (branch->Else >= 0) { + /* increment branch counter also if jump */ + s->Code->inst[branch->If].inst2 |= R500_FC_B_OP1_INCR; + s->Code->inst[branch->If].inst3 = R500_FC_JUMP_ADDR(branch->Else + 1); + + s->Code->inst[branch->Else].inst2 = R500_FC_OP_JUMP + | R500_FC_A_OP_NONE /* no address stack */ + | R500_FC_B_ELSE /* all active pixels want to jump */ + | R500_FC_B_OP0_NONE /* no counter op if stay */ + | R500_FC_B_OP1_DECR /* decrement branch counter if jump */ + | R500_FC_B_POP_CNT(1) + ; + s->Code->inst[branch->Else].inst3 = R500_FC_JUMP_ADDR(branch->Endif + 1); + } else { + /* don't touch branch counter on jump */ + s->Code->inst[branch->If].inst2 |= R500_FC_B_OP1_NONE; + s->Code->inst[branch->If].inst3 = R500_FC_JUMP_ADDR(branch->Endif + 1); + } + + + s->CurrentBranchDepth--; + break; + default: + rc_error(s->C, "%s: unknown opcode %s\n", __FUNCTION__, rc_get_opcode_info(inst->U.I.Opcode)->Name); + } +} + +void r500BuildFragmentProgramHwCode(struct radeon_compiler *c, void *user) +{ + struct r300_fragment_program_compiler *compiler = (struct r300_fragment_program_compiler*)c; + struct emit_state s; + struct r500_fragment_program_code *code = &compiler->code->code.r500; + + memset(&s, 0, sizeof(s)); + s.C = &compiler->Base; + s.Code = code; + + memset(code, 0, sizeof(*code)); + code->max_temp_idx = 1; + code->inst_end = -1; + + for(struct rc_instruction * inst = compiler->Base.Program.Instructions.Next; + inst != &compiler->Base.Program.Instructions && !compiler->Base.Error; + inst = inst->Next) { + if (inst->Type == RC_INSTRUCTION_NORMAL) { + const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); + + if (opcode->IsFlowControl) { + emit_flowcontrol(&s, inst); + } else if (inst->U.I.Opcode == RC_OPCODE_BEGIN_TEX) { + continue; + } else { + emit_tex(compiler, &inst->U.I); + } + } else { + emit_paired(compiler, &inst->U.P); + } + } + + if (code->max_temp_idx >= compiler->Base.max_temp_regs) + rc_error(&compiler->Base, "Too many hardware temporaries used"); + + if (compiler->Base.Error) + return; + + if (code->inst_end == -1 || + (code->inst[code->inst_end].inst0 & R500_INST_TYPE_MASK) != R500_INST_TYPE_OUT) { + int ip; + + /* This may happen when dead-code elimination is disabled or + * when most of the fragment program logic is leading to a KIL */ + if (code->inst_end >= compiler->Base.max_alu_insts-1) { + rc_error(&compiler->Base, "Introducing fake OUT: Too many instructions"); + return; + } + + ip = ++code->inst_end; + code->inst[ip].inst0 = R500_INST_TYPE_OUT | R500_INST_TEX_SEM_WAIT; + } + + /* Enable full flow control mode if we are using loops or have if + * statements nested at least four deep. */ + if (s.MaxBranchDepth >= 4 || s.LoopsReserved > 0) { + if (code->max_temp_idx < 1) + code->max_temp_idx = 1; + + code->us_fc_ctrl |= R500_FC_FULL_FC_EN; + } +} diff --git a/src/gallium/drivers/r300/compiler/radeon_code.c b/src/gallium/drivers/r300/compiler/radeon_code.c new file mode 100644 index 00000000000..6842fb873bc --- /dev/null +++ b/src/gallium/drivers/r300/compiler/radeon_code.c @@ -0,0 +1,187 @@ +/* + * Copyright (C) 2009 Nicolai Haehnle. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "radeon_code.h" + +#include <stdlib.h> +#include <stdio.h> +#include <string.h> + +#include "radeon_program.h" + +void rc_constants_init(struct rc_constant_list * c) +{ + memset(c, 0, sizeof(*c)); +} + +/** + * Copy a constants structure, assuming that the destination structure + * is not initialized. + */ +void rc_constants_copy(struct rc_constant_list * dst, struct rc_constant_list * src) +{ + dst->Constants = malloc(sizeof(struct rc_constant) * src->Count); + memcpy(dst->Constants, src->Constants, sizeof(struct rc_constant) * src->Count); + dst->Count = src->Count; + dst->_Reserved = src->Count; +} + +void rc_constants_destroy(struct rc_constant_list * c) +{ + free(c->Constants); + memset(c, 0, sizeof(*c)); +} + +unsigned rc_constants_add(struct rc_constant_list * c, struct rc_constant * constant) +{ + unsigned index = c->Count; + + if (c->Count >= c->_Reserved) { + struct rc_constant * newlist; + + c->_Reserved = c->_Reserved * 2; + if (!c->_Reserved) + c->_Reserved = 16; + + newlist = malloc(sizeof(struct rc_constant) * c->_Reserved); + memcpy(newlist, c->Constants, sizeof(struct rc_constant) * c->Count); + + free(c->Constants); + c->Constants = newlist; + } + + c->Constants[index] = *constant; + c->Count++; + + return index; +} + + +/** + * Add a state vector to the constant list, while trying to avoid duplicates. + */ +unsigned rc_constants_add_state(struct rc_constant_list * c, unsigned state0, unsigned state1) +{ + unsigned index; + struct rc_constant constant; + + for(index = 0; index < c->Count; ++index) { + if (c->Constants[index].Type == RC_CONSTANT_STATE) { + if (c->Constants[index].u.State[0] == state0 && + c->Constants[index].u.State[1] == state1) + return index; + } + } + + memset(&constant, 0, sizeof(constant)); + constant.Type = RC_CONSTANT_STATE; + constant.Size = 4; + constant.u.State[0] = state0; + constant.u.State[1] = state1; + + return rc_constants_add(c, &constant); +} + + +/** + * Add an immediate vector to the constant list, while trying to avoid + * duplicates. + */ +unsigned rc_constants_add_immediate_vec4(struct rc_constant_list * c, const float * data) +{ + unsigned index; + struct rc_constant constant; + + for(index = 0; index < c->Count; ++index) { + if (c->Constants[index].Type == RC_CONSTANT_IMMEDIATE) { + if (!memcmp(c->Constants[index].u.Immediate, data, sizeof(float)*4)) + return index; + } + } + + memset(&constant, 0, sizeof(constant)); + constant.Type = RC_CONSTANT_IMMEDIATE; + constant.Size = 4; + memcpy(constant.u.Immediate, data, sizeof(float) * 4); + + return rc_constants_add(c, &constant); +} + + +/** + * Add an immediate scalar to the constant list, while trying to avoid + * duplicates. + */ +unsigned rc_constants_add_immediate_scalar(struct rc_constant_list * c, float data, unsigned * swizzle) +{ + unsigned index; + int free_index = -1; + struct rc_constant constant; + + for(index = 0; index < c->Count; ++index) { + if (c->Constants[index].Type == RC_CONSTANT_IMMEDIATE) { + unsigned comp; + for(comp = 0; comp < c->Constants[index].Size; ++comp) { + if (c->Constants[index].u.Immediate[comp] == data) { + *swizzle = RC_MAKE_SWIZZLE_SMEAR(comp); + return index; + } + } + + if (c->Constants[index].Size < 4) + free_index = index; + } + } + + if (free_index >= 0) { + unsigned comp = c->Constants[free_index].Size++; + c->Constants[free_index].u.Immediate[comp] = data; + *swizzle = RC_MAKE_SWIZZLE_SMEAR(comp); + return free_index; + } + + memset(&constant, 0, sizeof(constant)); + constant.Type = RC_CONSTANT_IMMEDIATE; + constant.Size = 1; + constant.u.Immediate[0] = data; + *swizzle = RC_SWIZZLE_XXXX; + + return rc_constants_add(c, &constant); +} + +void rc_constants_print(struct rc_constant_list * c) +{ + unsigned int i; + for(i = 0; i < c->Count; i++) { + if (c->Constants[i].Type == RC_CONSTANT_IMMEDIATE) { + float * values = c->Constants[i].u.Immediate; + fprintf(stderr, "CONST[%u] = " + "{ %10.4f %10.4f %10.4f %10.4f }\n", + i, values[0],values[1], values[2], values[3]); + } + } +} diff --git a/src/gallium/drivers/r300/compiler/radeon_code.h b/src/gallium/drivers/r300/compiler/radeon_code.h new file mode 100644 index 00000000000..67e6acf8b10 --- /dev/null +++ b/src/gallium/drivers/r300/compiler/radeon_code.h @@ -0,0 +1,306 @@ +/* + * Copyright 2009 Nicolai Hähnle <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. */ + +#ifndef RADEON_CODE_H +#define RADEON_CODE_H + +#include <stdint.h> + +#define R300_PFS_MAX_ALU_INST 64 +#define R300_PFS_MAX_TEX_INST 32 +#define R300_PFS_MAX_TEX_INDIRECT 4 +#define R300_PFS_NUM_TEMP_REGS 32 +#define R300_PFS_NUM_CONST_REGS 32 + +#define R400_PFS_MAX_ALU_INST 512 +#define R400_PFS_MAX_TEX_INST 512 + +#define R500_PFS_MAX_INST 512 +#define R500_PFS_NUM_TEMP_REGS 128 +#define R500_PFS_NUM_CONST_REGS 256 +#define R500_PFS_MAX_BRANCH_DEPTH_FULL 32 +#define R500_PFS_MAX_BRANCH_DEPTH_PARTIAL 4 + + +#define STATE_R300_WINDOW_DIMENSION (STATE_INTERNAL_DRIVER+0) + +enum { + /** + * External constants are constants whose meaning is unknown to this + * compiler. For example, a Mesa gl_program's constants are turned + * into external constants. + */ + RC_CONSTANT_EXTERNAL = 0, + + RC_CONSTANT_IMMEDIATE, + + /** + * Constant referring to state that is known by this compiler, + * see RC_STATE_xxx, i.e. *not* arbitrary Mesa (or other) state. + */ + RC_CONSTANT_STATE +}; + +enum { + RC_STATE_SHADOW_AMBIENT = 0, + + RC_STATE_R300_WINDOW_DIMENSION, + RC_STATE_R300_TEXRECT_FACTOR, + RC_STATE_R300_TEXSCALE_FACTOR, + RC_STATE_R300_VIEWPORT_SCALE, + RC_STATE_R300_VIEWPORT_OFFSET +}; + +struct rc_constant { + unsigned Type:2; /**< RC_CONSTANT_xxx */ + unsigned Size:3; + + union { + unsigned External; + float Immediate[4]; + unsigned State[2]; + } u; +}; + +struct rc_constant_list { + struct rc_constant * Constants; + unsigned Count; + + unsigned _Reserved; +}; + +void rc_constants_init(struct rc_constant_list * c); +void rc_constants_copy(struct rc_constant_list * dst, struct rc_constant_list * src); +void rc_constants_destroy(struct rc_constant_list * c); +unsigned rc_constants_add(struct rc_constant_list * c, struct rc_constant * constant); +unsigned rc_constants_add_state(struct rc_constant_list * c, unsigned state1, unsigned state2); +unsigned rc_constants_add_immediate_vec4(struct rc_constant_list * c, const float * data); +unsigned rc_constants_add_immediate_scalar(struct rc_constant_list * c, float data, unsigned * swizzle); +void rc_constants_print(struct rc_constant_list * c); + +/** + * Compare functions. + * + * \note By design, RC_COMPARE_FUNC_xxx + GL_NEVER gives you + * the correct GL compare function. + */ +typedef enum { + RC_COMPARE_FUNC_NEVER = 0, + RC_COMPARE_FUNC_LESS, + RC_COMPARE_FUNC_EQUAL, + RC_COMPARE_FUNC_LEQUAL, + RC_COMPARE_FUNC_GREATER, + RC_COMPARE_FUNC_NOTEQUAL, + RC_COMPARE_FUNC_GEQUAL, + RC_COMPARE_FUNC_ALWAYS +} rc_compare_func; + +/** + * Coordinate wrapping modes. + * + * These are not quite the same as their GL counterparts yet. + */ +typedef enum { + RC_WRAP_NONE = 0, + RC_WRAP_REPEAT, + RC_WRAP_MIRRORED_REPEAT, + RC_WRAP_MIRRORED_CLAMP +} rc_wrap_mode; + +/** + * Stores state that influences the compilation of a fragment program. + */ +struct r300_fragment_program_external_state { + struct { + /** + * This field contains swizzle for some lowering passes + * (shadow comparison, unorm->snorm conversion) + */ + unsigned texture_swizzle:12; + + /** + * If the sampler is used as a shadow sampler, + * this field specifies the compare function. + * + * Otherwise, this field is \ref RC_COMPARE_FUNC_NEVER (aka 0). + * \sa rc_compare_func + */ + unsigned texture_compare_func : 3; + + /** + * No matter what the sampler type is, + * this field turns it into a shadow sampler. + */ + unsigned compare_mode_enabled : 1; + + /** + * If the sampler will receive non-normalized coords, + * this field is set. The scaling factor is given by + * RC_STATE_R300_TEXRECT_FACTOR. + */ + unsigned non_normalized_coords : 1; + + /** + * This field specifies wrapping modes for the sampler. + * + * If this field is \ref RC_WRAP_NONE (aka 0), no wrapping maths + * will be performed on the coordinates. + */ + unsigned wrap_mode : 3; + + /** + * The coords are scaled after applying the wrap mode emulation + * and right before texture fetch. The scaling factor is given by + * RC_STATE_R300_TEXSCALE_FACTOR. */ + unsigned clamp_and_scale_before_fetch : 1; + + /** + * Fetch RGTC1_SNORM or LATC1_SNORM as UNORM and convert UNORM -> SNORM + * in the shader. + */ + unsigned convert_unorm_to_snorm:1; + } unit[16]; + + unsigned frag_clamp:1; +}; + + + +struct r300_fragment_program_node { + int tex_offset; /**< first tex instruction */ + int tex_end; /**< last tex instruction, relative to tex_offset */ + int alu_offset; /**< first ALU instruction */ + int alu_end; /**< last ALU instruction, relative to alu_offset */ + int flags; +}; + +/** + * Stores an R300 fragment program in its compiled-to-hardware form. + */ +struct r300_fragment_program_code { + struct { + unsigned int length; /**< total # of texture instructions used */ + uint32_t inst[R400_PFS_MAX_TEX_INST]; + } tex; + + struct { + unsigned int length; /**< total # of ALU instructions used */ + struct { + uint32_t rgb_inst; + uint32_t rgb_addr; + uint32_t alpha_inst; + uint32_t alpha_addr; + uint32_t r400_ext_addr; + } inst[R400_PFS_MAX_ALU_INST]; + } alu; + + uint32_t config; /* US_CONFIG */ + uint32_t pixsize; /* US_PIXSIZE */ + uint32_t code_offset; /* US_CODE_OFFSET */ + uint32_t r400_code_offset_ext; /* US_CODE_EXT */ + uint32_t code_addr[4]; /* US_CODE_ADDR */ + /*US_CODE_BANK.R390_MODE: Enables 512 instructions and 64 temporaries + * for r400 cards */ + unsigned int r390_mode:1; +}; + + +struct r500_fragment_program_code { + struct { + uint32_t inst0; + uint32_t inst1; + uint32_t inst2; + uint32_t inst3; + uint32_t inst4; + uint32_t inst5; + } inst[R500_PFS_MAX_INST]; + + int inst_end; /* Number of instructions - 1; also, last instruction to be executed */ + + int max_temp_idx; + + uint32_t us_fc_ctrl; + + uint32_t int_constants[32]; + uint32_t int_constant_count; +}; + +struct rX00_fragment_program_code { + union { + struct r300_fragment_program_code r300; + struct r500_fragment_program_code r500; + } code; + + unsigned writes_depth:1; + + struct rc_constant_list constants; + unsigned *constants_remap_table; +}; + + +#define R300_VS_MAX_ALU 256 +#define R300_VS_MAX_ALU_DWORDS (R300_VS_MAX_ALU * 4) +#define R500_VS_MAX_ALU 1024 +#define R500_VS_MAX_ALU_DWORDS (R500_VS_MAX_ALU * 4) +#define R300_VS_MAX_TEMPS 32 +/* This is the max for all chipsets (r300-r500) */ +#define R300_VS_MAX_FC_OPS 16 +/* The r500 maximum depth is not just for loops, but any combination of loops + * and subroutine jumps. */ +#define R500_VS_MAX_FC_DEPTH 8 +#define R300_VS_MAX_LOOP_DEPTH 1 + +#define VSF_MAX_INPUTS 32 +#define VSF_MAX_OUTPUTS 32 + +struct r300_vertex_program_code { + int length; + union { + uint32_t d[R500_VS_MAX_ALU_DWORDS]; + float f[R500_VS_MAX_ALU_DWORDS]; + } body; + + int pos_end; + int num_temporaries; /* Number of temp vars used by program */ + int inputs[VSF_MAX_INPUTS]; + int outputs[VSF_MAX_OUTPUTS]; + + struct rc_constant_list constants; + unsigned *constants_remap_table; + + uint32_t InputsRead; + uint32_t OutputsWritten; + + unsigned int num_fc_ops; + uint32_t fc_ops; + union { + uint32_t r300[R300_VS_MAX_FC_OPS]; + struct { + uint32_t lw; + uint32_t uw; + } r500[R300_VS_MAX_FC_OPS]; + } fc_op_addrs; + int32_t fc_loop_index[R300_VS_MAX_FC_OPS]; +}; + +#endif /* RADEON_CODE_H */ + diff --git a/src/gallium/drivers/r300/compiler/radeon_compiler.c b/src/gallium/drivers/r300/compiler/radeon_compiler.c new file mode 100644 index 00000000000..b7936725d85 --- /dev/null +++ b/src/gallium/drivers/r300/compiler/radeon_compiler.c @@ -0,0 +1,489 @@ +/* + * Copyright 2009 Nicolai Hähnle <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. */ + +#include "radeon_compiler.h" + +#include <stdarg.h> +#include <stdio.h> +#include <stdlib.h> + +#include "radeon_dataflow.h" +#include "radeon_program.h" +#include "radeon_program_pair.h" +#include "radeon_compiler_util.h" + + +void rc_init(struct radeon_compiler * c) +{ + memset(c, 0, sizeof(*c)); + + memory_pool_init(&c->Pool); + c->Program.Instructions.Prev = &c->Program.Instructions; + c->Program.Instructions.Next = &c->Program.Instructions; + c->Program.Instructions.U.I.Opcode = RC_OPCODE_ILLEGAL_OPCODE; +} + +void rc_destroy(struct radeon_compiler * c) +{ + rc_constants_destroy(&c->Program.Constants); + memory_pool_destroy(&c->Pool); + free(c->ErrorMsg); +} + +void rc_debug(struct radeon_compiler * c, const char * fmt, ...) +{ + va_list ap; + + if (!(c->Debug & RC_DBG_LOG)) + return; + + va_start(ap, fmt); + vfprintf(stderr, fmt, ap); + va_end(ap); +} + +void rc_error(struct radeon_compiler * c, const char * fmt, ...) +{ + va_list ap; + + c->Error = 1; + + if (!c->ErrorMsg) { + /* Only remember the first error */ + char buf[1024]; + int written; + + va_start(ap, fmt); + written = vsnprintf(buf, sizeof(buf), fmt, ap); + va_end(ap); + + if (written < sizeof(buf)) { + c->ErrorMsg = strdup(buf); + } else { + c->ErrorMsg = malloc(written + 1); + + va_start(ap, fmt); + vsnprintf(c->ErrorMsg, written + 1, fmt, ap); + va_end(ap); + } + } + + if (c->Debug & RC_DBG_LOG) { + fprintf(stderr, "r300compiler error: "); + + va_start(ap, fmt); + vfprintf(stderr, fmt, ap); + va_end(ap); + } +} + +int rc_if_fail_helper(struct radeon_compiler * c, const char * file, int line, const char * assertion) +{ + rc_error(c, "ICE at %s:%i: assertion failed: %s\n", file, line, assertion); + return 1; +} + +/** + * Recompute c->Program.InputsRead and c->Program.OutputsWritten + * based on which inputs and outputs are actually referenced + * in program instructions. + */ +void rc_calculate_inputs_outputs(struct radeon_compiler * c) +{ + struct rc_instruction *inst; + + c->Program.InputsRead = 0; + c->Program.OutputsWritten = 0; + + for(inst = c->Program.Instructions.Next; inst != &c->Program.Instructions; inst = inst->Next) + { + const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); + int i; + + for (i = 0; i < opcode->NumSrcRegs; ++i) { + if (inst->U.I.SrcReg[i].File == RC_FILE_INPUT) + c->Program.InputsRead |= 1 << inst->U.I.SrcReg[i].Index; + } + + if (opcode->HasDstReg) { + if (inst->U.I.DstReg.File == RC_FILE_OUTPUT) + c->Program.OutputsWritten |= 1 << inst->U.I.DstReg.Index; + } + } +} + +/** + * Rewrite the program such that everything that source the given input + * register will source new_input instead. + */ +void rc_move_input(struct radeon_compiler * c, unsigned input, struct rc_src_register new_input) +{ + struct rc_instruction * inst; + + c->Program.InputsRead &= ~(1 << input); + + for(inst = c->Program.Instructions.Next; inst != &c->Program.Instructions; inst = inst->Next) { + const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); + unsigned i; + + for(i = 0; i < opcode->NumSrcRegs; ++i) { + if (inst->U.I.SrcReg[i].File == RC_FILE_INPUT && inst->U.I.SrcReg[i].Index == input) { + inst->U.I.SrcReg[i].File = new_input.File; + inst->U.I.SrcReg[i].Index = new_input.Index; + inst->U.I.SrcReg[i].Swizzle = combine_swizzles(new_input.Swizzle, inst->U.I.SrcReg[i].Swizzle); + if (!inst->U.I.SrcReg[i].Abs) { + inst->U.I.SrcReg[i].Negate ^= new_input.Negate; + inst->U.I.SrcReg[i].Abs = new_input.Abs; + } + + c->Program.InputsRead |= 1 << new_input.Index; + } + } + } +} + + +/** + * Rewrite the program such that everything that writes into the given + * output register will instead write to new_output. The new_output + * writemask is honoured. + */ +void rc_move_output(struct radeon_compiler * c, unsigned output, unsigned new_output, unsigned writemask) +{ + struct rc_instruction * inst; + + c->Program.OutputsWritten &= ~(1 << output); + + for(inst = c->Program.Instructions.Next; inst != &c->Program.Instructions; inst = inst->Next) { + const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); + + if (opcode->HasDstReg) { + if (inst->U.I.DstReg.File == RC_FILE_OUTPUT && inst->U.I.DstReg.Index == output) { + inst->U.I.DstReg.Index = new_output; + inst->U.I.DstReg.WriteMask &= writemask; + + c->Program.OutputsWritten |= 1 << new_output; + } + } + } +} + + +/** + * Rewrite the program such that a given output is duplicated. + */ +void rc_copy_output(struct radeon_compiler * c, unsigned output, unsigned dup_output) +{ + unsigned tempreg = rc_find_free_temporary(c); + struct rc_instruction * inst; + + for(inst = c->Program.Instructions.Next; inst != &c->Program.Instructions; inst = inst->Next) { + const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); + + if (opcode->HasDstReg) { + if (inst->U.I.DstReg.File == RC_FILE_OUTPUT && inst->U.I.DstReg.Index == output) { + inst->U.I.DstReg.File = RC_FILE_TEMPORARY; + inst->U.I.DstReg.Index = tempreg; + } + } + } + + inst = rc_insert_new_instruction(c, c->Program.Instructions.Prev); + inst->U.I.Opcode = RC_OPCODE_MOV; + inst->U.I.DstReg.File = RC_FILE_OUTPUT; + inst->U.I.DstReg.Index = output; + + inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; + inst->U.I.SrcReg[0].Index = tempreg; + inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZW; + + inst = rc_insert_new_instruction(c, c->Program.Instructions.Prev); + inst->U.I.Opcode = RC_OPCODE_MOV; + inst->U.I.DstReg.File = RC_FILE_OUTPUT; + inst->U.I.DstReg.Index = dup_output; + + inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; + inst->U.I.SrcReg[0].Index = tempreg; + inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZW; + + c->Program.OutputsWritten |= 1 << dup_output; +} + + +/** + * Introduce standard code fragment to deal with fragment.position. + */ +void rc_transform_fragment_wpos(struct radeon_compiler * c, unsigned wpos, unsigned new_input, + int full_vtransform) +{ + unsigned tempregi = rc_find_free_temporary(c); + struct rc_instruction * inst_rcp; + struct rc_instruction * inst_mul; + struct rc_instruction * inst_mad; + struct rc_instruction * inst; + + c->Program.InputsRead &= ~(1 << wpos); + c->Program.InputsRead |= 1 << new_input; + + /* perspective divide */ + inst_rcp = rc_insert_new_instruction(c, &c->Program.Instructions); + inst_rcp->U.I.Opcode = RC_OPCODE_RCP; + + inst_rcp->U.I.DstReg.File = RC_FILE_TEMPORARY; + inst_rcp->U.I.DstReg.Index = tempregi; + inst_rcp->U.I.DstReg.WriteMask = RC_MASK_W; + + inst_rcp->U.I.SrcReg[0].File = RC_FILE_INPUT; + inst_rcp->U.I.SrcReg[0].Index = new_input; + inst_rcp->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_WWWW; + + inst_mul = rc_insert_new_instruction(c, inst_rcp); + inst_mul->U.I.Opcode = RC_OPCODE_MUL; + + inst_mul->U.I.DstReg.File = RC_FILE_TEMPORARY; + inst_mul->U.I.DstReg.Index = tempregi; + inst_mul->U.I.DstReg.WriteMask = RC_MASK_XYZ; + + inst_mul->U.I.SrcReg[0].File = RC_FILE_INPUT; + inst_mul->U.I.SrcReg[0].Index = new_input; + + inst_mul->U.I.SrcReg[1].File = RC_FILE_TEMPORARY; + inst_mul->U.I.SrcReg[1].Index = tempregi; + inst_mul->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_WWWW; + + /* viewport transformation */ + inst_mad = rc_insert_new_instruction(c, inst_mul); + inst_mad->U.I.Opcode = RC_OPCODE_MAD; + + inst_mad->U.I.DstReg.File = RC_FILE_TEMPORARY; + inst_mad->U.I.DstReg.Index = tempregi; + inst_mad->U.I.DstReg.WriteMask = RC_MASK_XYZ; + + inst_mad->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; + inst_mad->U.I.SrcReg[0].Index = tempregi; + inst_mad->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZ0; + + inst_mad->U.I.SrcReg[1].File = RC_FILE_CONSTANT; + inst_mad->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_XYZ0; + + inst_mad->U.I.SrcReg[2].File = RC_FILE_CONSTANT; + inst_mad->U.I.SrcReg[2].Swizzle = RC_SWIZZLE_XYZ0; + + if (full_vtransform) { + inst_mad->U.I.SrcReg[1].Index = rc_constants_add_state(&c->Program.Constants, RC_STATE_R300_VIEWPORT_SCALE, 0); + inst_mad->U.I.SrcReg[2].Index = rc_constants_add_state(&c->Program.Constants, RC_STATE_R300_VIEWPORT_OFFSET, 0); + } else { + inst_mad->U.I.SrcReg[1].Index = + inst_mad->U.I.SrcReg[2].Index = rc_constants_add_state(&c->Program.Constants, RC_STATE_R300_WINDOW_DIMENSION, 0); + } + + for (inst = inst_mad->Next; inst != &c->Program.Instructions; inst = inst->Next) { + const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); + unsigned i; + + for(i = 0; i < opcode->NumSrcRegs; i++) { + if (inst->U.I.SrcReg[i].File == RC_FILE_INPUT && + inst->U.I.SrcReg[i].Index == wpos) { + inst->U.I.SrcReg[i].File = RC_FILE_TEMPORARY; + inst->U.I.SrcReg[i].Index = tempregi; + } + } + } +} + + +/** + * The FACE input in hardware contains 1 if it's a back face, 0 otherwise. + * Gallium and OpenGL define it the other way around. + * + * So let's just negate FACE at the beginning of the shader and rewrite the rest + * of the shader to read from the newly allocated temporary. + */ +void rc_transform_fragment_face(struct radeon_compiler *c, unsigned face) +{ + unsigned tempregi = rc_find_free_temporary(c); + struct rc_instruction *inst_add; + struct rc_instruction *inst; + + /* perspective divide */ + inst_add = rc_insert_new_instruction(c, &c->Program.Instructions); + inst_add->U.I.Opcode = RC_OPCODE_ADD; + + inst_add->U.I.DstReg.File = RC_FILE_TEMPORARY; + inst_add->U.I.DstReg.Index = tempregi; + inst_add->U.I.DstReg.WriteMask = RC_MASK_X; + + inst_add->U.I.SrcReg[0].File = RC_FILE_NONE; + inst_add->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_1111; + + inst_add->U.I.SrcReg[1].File = RC_FILE_INPUT; + inst_add->U.I.SrcReg[1].Index = face; + inst_add->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_XXXX; + inst_add->U.I.SrcReg[1].Negate = RC_MASK_XYZW; + + for (inst = inst_add->Next; inst != &c->Program.Instructions; inst = inst->Next) { + const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); + unsigned i; + + for(i = 0; i < opcode->NumSrcRegs; i++) { + if (inst->U.I.SrcReg[i].File == RC_FILE_INPUT && + inst->U.I.SrcReg[i].Index == face) { + inst->U.I.SrcReg[i].File = RC_FILE_TEMPORARY; + inst->U.I.SrcReg[i].Index = tempregi; + } + } + } +} + +static void reg_count_callback(void * userdata, struct rc_instruction * inst, + rc_register_file file, unsigned int index, unsigned int mask) +{ + int *max_reg = userdata; + if (file == RC_FILE_TEMPORARY) + (int)index > *max_reg ? *max_reg = index : 0; +} + +void rc_get_stats(struct radeon_compiler *c, struct rc_program_stats *s) +{ + int max_reg = -1; + struct rc_instruction * tmp; + memset(s, 0, sizeof(*s)); + + for(tmp = c->Program.Instructions.Next; tmp != &c->Program.Instructions; + tmp = tmp->Next){ + const struct rc_opcode_info * info; + rc_for_all_reads_mask(tmp, reg_count_callback, &max_reg); + if (tmp->Type == RC_INSTRUCTION_NORMAL) { + info = rc_get_opcode_info(tmp->U.I.Opcode); + if (info->Opcode == RC_OPCODE_BEGIN_TEX) + continue; + if (tmp->U.I.PreSub.Opcode != RC_PRESUB_NONE) + s->num_presub_ops++; + } else { + if (tmp->U.P.RGB.Src[RC_PAIR_PRESUB_SRC].Used) + s->num_presub_ops++; + if (tmp->U.P.Alpha.Src[RC_PAIR_PRESUB_SRC].Used) + s->num_presub_ops++; + /* Assuming alpha will never be a flow control or + * a tex instruction. */ + if (tmp->U.P.Alpha.Opcode != RC_OPCODE_NOP) + s->num_alpha_insts++; + if (tmp->U.P.RGB.Opcode != RC_OPCODE_NOP) + s->num_rgb_insts++; + info = rc_get_opcode_info(tmp->U.P.RGB.Opcode); + } + if (info->IsFlowControl) + s->num_fc_insts++; + if (info->HasTexture) + s->num_tex_insts++; + s->num_insts++; + } + s->num_temp_regs = max_reg + 1; +} + +static void print_stats(struct radeon_compiler * c) +{ + struct rc_program_stats s; + + if (c->initial_num_insts <= 5) + return; + + rc_get_stats(c, &s); + + switch (c->type) { + case RC_VERTEX_PROGRAM: + fprintf(stderr,"~~~~~~~~~ VERTEX PROGRAM ~~~~~~~~\n" + "~%4u Instructions\n" + "~%4u Flow Control Instructions\n" + "~%4u Temporary Registers\n" + "~~~~~~~~~~~~~~ END ~~~~~~~~~~~~~~\n", + s.num_insts, s.num_fc_insts, s.num_temp_regs); + break; + + case RC_FRAGMENT_PROGRAM: + fprintf(stderr,"~~~~~~~~ FRAGMENT PROGRAM ~~~~~~~\n" + "~%4u Instructions\n" + "~%4u Vector Instructions (RGB)\n" + "~%4u Scalar Instructions (Alpha)\n" + "~%4u Flow Control Instructions\n" + "~%4u Texture Instructions\n" + "~%4u Presub Operations\n" + "~%4u Temporary Registers\n" + "~~~~~~~~~~~~~~ END ~~~~~~~~~~~~~~\n", + s.num_insts, s.num_rgb_insts, s.num_alpha_insts, + s.num_fc_insts, s.num_tex_insts, s.num_presub_ops, + s.num_temp_regs); + break; + default: + assert(0); + } +} + +static const char *shader_name[RC_NUM_PROGRAM_TYPES] = { + "Vertex Program", + "Fragment Program" +}; + +void rc_run_compiler_passes(struct radeon_compiler *c, struct radeon_compiler_pass *list) +{ + for (unsigned i = 0; list[i].name; i++) { + if (list[i].predicate) { + list[i].run(c, list[i].user); + + if (c->Error) + return; + + if ((c->Debug & RC_DBG_LOG) && list[i].dump) { + fprintf(stderr, "%s: after '%s'\n", shader_name[c->type], list[i].name); + rc_print_program(&c->Program); + } + } + } +} + +/* Executes a list of compiler passes given in the parameter 'list'. */ +void rc_run_compiler(struct radeon_compiler *c, struct radeon_compiler_pass *list) +{ + struct rc_program_stats s; + + rc_get_stats(c, &s); + c->initial_num_insts = s.num_insts; + + if (c->Debug & RC_DBG_LOG) { + fprintf(stderr, "%s: before compilation\n", shader_name[c->type]); + rc_print_program(&c->Program); + } + + rc_run_compiler_passes(c, list); + + if (c->Debug & RC_DBG_STATS) + print_stats(c); +} + +void rc_validate_final_shader(struct radeon_compiler *c, void *user) +{ + /* Check the number of constants. */ + if (c->Program.Constants.Count > c->max_constants) { + rc_error(c, "Too many constants. Max: %i, Got: %i\n", + c->max_constants, c->Program.Constants.Count); + } +} diff --git a/src/gallium/drivers/r300/compiler/radeon_compiler.h b/src/gallium/drivers/r300/compiler/radeon_compiler.h new file mode 100644 index 00000000000..74594af23c2 --- /dev/null +++ b/src/gallium/drivers/r300/compiler/radeon_compiler.h @@ -0,0 +1,171 @@ +/* + * Copyright 2009 Nicolai Hähnle <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. */ + +#ifndef RADEON_COMPILER_H +#define RADEON_COMPILER_H + +#include "main/compiler.h" + +#include "memory_pool.h" +#include "radeon_code.h" +#include "radeon_program.h" +#include "radeon_emulate_loops.h" + +#define RC_DBG_LOG (1 << 0) +#define RC_DBG_STATS (1 << 1) + +struct rc_swizzle_caps; + +enum rc_program_type { + RC_VERTEX_PROGRAM, + RC_FRAGMENT_PROGRAM, + RC_NUM_PROGRAM_TYPES +}; + +struct radeon_compiler { + struct memory_pool Pool; + struct rc_program Program; + enum rc_program_type type; + unsigned Debug:2; + unsigned Error:1; + char * ErrorMsg; + + /* Hardware specification. */ + unsigned is_r400:1; + unsigned is_r500:1; + unsigned has_half_swizzles:1; + unsigned has_presub:1; + unsigned disable_optimizations:1; + unsigned max_temp_regs; + unsigned max_constants; + int max_alu_insts; + unsigned max_tex_insts; + + /* Whether to remove unused constants and empty holes in constant space. */ + unsigned remove_unused_constants:1; + + /** + * Variables used internally, not be touched by callers + * of the compiler + */ + /*@{*/ + struct rc_swizzle_caps * SwizzleCaps; + /*@}*/ + + struct emulate_loop_state loop_state; + + unsigned initial_num_insts; /* Number of instructions at start. */ +}; + +void rc_init(struct radeon_compiler * c); +void rc_destroy(struct radeon_compiler * c); + +void rc_debug(struct radeon_compiler * c, const char * fmt, ...); +void rc_error(struct radeon_compiler * c, const char * fmt, ...); + +int rc_if_fail_helper(struct radeon_compiler * c, const char * file, int line, const char * assertion); + +/** + * This macro acts like an if-statement that can be used to implement + * non-aborting assertions in the compiler. + * + * It checks whether \p cond is true. If not, an internal compiler error is + * flagged and the if-clause is run. + * + * A typical use-case would be: + * + * if (rc_assert(c, condition-that-must-be-true)) + * return; + */ +#define rc_assert(c, cond) \ + (!(cond) && rc_if_fail_helper(c, __FILE__, __LINE__, #cond)) + +void rc_calculate_inputs_outputs(struct radeon_compiler * c); + +void rc_move_input(struct radeon_compiler * c, unsigned input, struct rc_src_register new_input); +void rc_move_output(struct radeon_compiler * c, unsigned output, unsigned new_output, unsigned writemask); +void rc_copy_output(struct radeon_compiler * c, unsigned output, unsigned dup_output); +void rc_transform_fragment_wpos(struct radeon_compiler * c, unsigned wpos, unsigned new_input, + int full_vtransform); +void rc_transform_fragment_face(struct radeon_compiler *c, unsigned face); + +struct r300_fragment_program_compiler { + struct radeon_compiler Base; + struct rX00_fragment_program_code *code; + /* Optional transformations and features. */ + struct r300_fragment_program_external_state state; + unsigned enable_shadow_ambient; + /* Register corresponding to the depthbuffer. */ + unsigned OutputDepth; + /* Registers corresponding to the four colorbuffers. */ + unsigned OutputColor[4]; + + void * UserData; + void (*AllocateHwInputs)( + struct r300_fragment_program_compiler * c, + void (*allocate)(void * data, unsigned input, unsigned hwreg), + void * mydata); +}; + +void r3xx_compile_fragment_program(struct r300_fragment_program_compiler* c); + +struct r300_vertex_program_compiler { + struct radeon_compiler Base; + struct r300_vertex_program_code *code; + uint32_t RequiredOutputs; + + void * UserData; + void (*SetHwInputOutput)(struct r300_vertex_program_compiler * c); + + int PredicateIndex; + unsigned int PredicateMask; +}; + +void r3xx_compile_vertex_program(struct r300_vertex_program_compiler* c); +void r300_vertex_program_dump(struct radeon_compiler *compiler, void *user); + +struct radeon_compiler_pass { + const char *name; /* Name of the pass. */ + int dump; /* Dump the program if Debug == 1? */ + int predicate; /* Run this pass? */ + void (*run)(struct radeon_compiler *c, void *user); /* The main entrypoint. */ + void *user; /* Optional parameter which is passed to the run function. */ +}; + +struct rc_program_stats { + unsigned num_insts; + unsigned num_fc_insts; + unsigned num_tex_insts; + unsigned num_rgb_insts; + unsigned num_alpha_insts; + unsigned num_presub_ops; + unsigned num_temp_regs; +}; + +void rc_get_stats(struct radeon_compiler *c, struct rc_program_stats *s); + +/* Executes a list of compiler passes given in the parameter 'list'. */ +void rc_run_compiler_passes(struct radeon_compiler *c, struct radeon_compiler_pass *list); +void rc_run_compiler(struct radeon_compiler *c, struct radeon_compiler_pass *list); +void rc_validate_final_shader(struct radeon_compiler *c, void *user); + +#endif /* RADEON_COMPILER_H */ diff --git a/src/gallium/drivers/r300/compiler/radeon_compiler_util.c b/src/gallium/drivers/r300/compiler/radeon_compiler_util.c new file mode 100644 index 00000000000..2742721f800 --- /dev/null +++ b/src/gallium/drivers/r300/compiler/radeon_compiler_util.c @@ -0,0 +1,701 @@ +/* + * Copyright 2010 Tom Stellard <[email protected]> + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +/** + * \file + */ + +#include "radeon_compiler_util.h" + +#include "radeon_compiler.h" +#include "radeon_dataflow.h" +/** + */ +unsigned int rc_swizzle_to_writemask(unsigned int swz) +{ + unsigned int mask = 0; + unsigned int i; + + for(i = 0; i < 4; i++) { + mask |= 1 << GET_SWZ(swz, i); + } + mask &= RC_MASK_XYZW; + + return mask; +} + +rc_swizzle get_swz(unsigned int swz, rc_swizzle idx) +{ + if (idx & 0x4) + return idx; + return GET_SWZ(swz, idx); +} + +/** + * The purpose of this function is to standardize the number channels used by + * swizzles. All swizzles regardless of what instruction they are a part of + * should have 4 channels initialized with values. + * @param channels The number of channels in initial_value that have a + * meaningful value. + * @return An initialized swizzle that has all of the unused channels set to + * RC_SWIZZLE_UNUSED. + */ +unsigned int rc_init_swizzle(unsigned int initial_value, unsigned int channels) +{ + unsigned int i; + for (i = channels; i < 4; i++) { + SET_SWZ(initial_value, i, RC_SWIZZLE_UNUSED); + } + return initial_value; +} + +unsigned int combine_swizzles4(unsigned int src, + rc_swizzle swz_x, rc_swizzle swz_y, rc_swizzle swz_z, rc_swizzle swz_w) +{ + unsigned int ret = 0; + + ret |= get_swz(src, swz_x); + ret |= get_swz(src, swz_y) << 3; + ret |= get_swz(src, swz_z) << 6; + ret |= get_swz(src, swz_w) << 9; + + return ret; +} + +unsigned int combine_swizzles(unsigned int src, unsigned int swz) +{ + unsigned int ret = 0; + + ret |= get_swz(src, GET_SWZ(swz, RC_SWIZZLE_X)); + ret |= get_swz(src, GET_SWZ(swz, RC_SWIZZLE_Y)) << 3; + ret |= get_swz(src, GET_SWZ(swz, RC_SWIZZLE_Z)) << 6; + ret |= get_swz(src, GET_SWZ(swz, RC_SWIZZLE_W)) << 9; + + return ret; +} + +/** + * @param mask Must be either RC_MASK_X, RC_MASK_Y, RC_MASK_Z, or RC_MASK_W + */ +rc_swizzle rc_mask_to_swizzle(unsigned int mask) +{ + switch (mask) { + case RC_MASK_X: return RC_SWIZZLE_X; + case RC_MASK_Y: return RC_SWIZZLE_Y; + case RC_MASK_Z: return RC_SWIZZLE_Z; + case RC_MASK_W: return RC_SWIZZLE_W; + } + return RC_SWIZZLE_UNUSED; +} + +/* Reorder mask bits according to swizzle. */ +unsigned swizzle_mask(unsigned swizzle, unsigned mask) +{ + unsigned ret = 0; + for (unsigned chan = 0; chan < 4; ++chan) { + unsigned swz = GET_SWZ(swizzle, chan); + if (swz < 4) + ret |= GET_BIT(mask, swz) << chan; + } + return ret; +} + +static unsigned int srcs_need_rewrite(const struct rc_opcode_info * info) +{ + if (info->HasTexture) { + return 0; + } + switch (info->Opcode) { + case RC_OPCODE_DP2: + case RC_OPCODE_DP3: + case RC_OPCODE_DP4: + case RC_OPCODE_DDX: + case RC_OPCODE_DDY: + return 0; + default: + return 1; + } +} + +/** + * @return A swizzle the results from converting old_swizzle using + * conversion_swizzle + */ +unsigned int rc_adjust_channels( + unsigned int old_swizzle, + unsigned int conversion_swizzle) +{ + unsigned int i; + unsigned int new_swizzle = rc_init_swizzle(RC_SWIZZLE_UNUSED, 0); + for (i = 0; i < 4; i++) { + unsigned int new_chan = get_swz(conversion_swizzle, i); + if (new_chan == RC_SWIZZLE_UNUSED) { + continue; + } + SET_SWZ(new_swizzle, new_chan, GET_SWZ(old_swizzle, i)); + } + return new_swizzle; +} + +static unsigned int rewrite_writemask( + unsigned int old_mask, + unsigned int conversion_swizzle) +{ + unsigned int new_mask = 0; + unsigned int i; + + for (i = 0; i < 4; i++) { + if (!GET_BIT(old_mask, i) + || GET_SWZ(conversion_swizzle, i) == RC_SWIZZLE_UNUSED) { + continue; + } + new_mask |= (1 << GET_SWZ(conversion_swizzle, i)); + } + + return new_mask; +} + +/** + * This function rewrites the writemask of sub and adjusts the swizzles + * of all its source registers based on the conversion_swizzle. + * conversion_swizzle represents a mapping of the old writemask to the + * new writemask. For a detailed description of how conversion swizzles + * work see rc_rewrite_swizzle(). + */ +void rc_pair_rewrite_writemask( + struct rc_pair_sub_instruction * sub, + unsigned int conversion_swizzle) +{ + const struct rc_opcode_info * info = rc_get_opcode_info(sub->Opcode); + unsigned int i; + + sub->WriteMask = rewrite_writemask(sub->WriteMask, conversion_swizzle); + + if (!srcs_need_rewrite(info)) { + return ; + } + + for (i = 0; i < info->NumSrcRegs; i++) { + sub->Arg[i].Swizzle = + rc_adjust_channels(sub->Arg[i].Swizzle, + conversion_swizzle); + } +} + +static void normal_rewrite_writemask_cb( + void * userdata, + struct rc_instruction * inst, + struct rc_src_register * src) +{ + unsigned int * new_mask = (unsigned int *)userdata; + src->Swizzle = rc_adjust_channels(src->Swizzle, *new_mask); +} + +/** + * This function is the same as rc_pair_rewrite_writemask() except it + * operates on normal instructions. + */ +void rc_normal_rewrite_writemask( + struct rc_instruction * inst, + unsigned int conversion_swizzle) +{ + unsigned int new_mask; + struct rc_sub_instruction * sub = &inst->U.I; + const struct rc_opcode_info * info = rc_get_opcode_info(sub->Opcode); + sub->DstReg.WriteMask = + rewrite_writemask(sub->DstReg.WriteMask, conversion_swizzle); + + if (info->HasTexture) { + unsigned int i; + assert(sub->TexSwizzle == RC_SWIZZLE_XYZW); + for (i = 0; i < 4; i++) { + unsigned int swz = GET_SWZ(conversion_swizzle, i); + if (swz > 3) + continue; + SET_SWZ(sub->TexSwizzle, swz, i); + } + } + + if (!srcs_need_rewrite(info)) { + return; + } + + new_mask = sub->DstReg.WriteMask; + rc_for_all_reads_src(inst, normal_rewrite_writemask_cb, &new_mask); +} + +/** + * This function replaces each value 'swz' in swizzle with the value of + * GET_SWZ(conversion_swizzle, swz). So, if you want to change all the X's + * in swizzle to Y, then conversion_swizzle should be Y___ (0xff9). If you want + * to change all the Y's in swizzle to X, then conversion_swizzle should be + * _X__ (0xfc7). If you want to change the Y's to X and the X's to Y, then + * conversion swizzle should be YX__ (0xfc1). + * @param swizzle The swizzle to change + * @param conversion_swizzle Describes the conversion to perform on the swizzle + * @return A converted swizzle + */ +unsigned int rc_rewrite_swizzle( + unsigned int swizzle, + unsigned int conversion_swizzle) +{ + unsigned int chan; + unsigned int out_swizzle = swizzle; + + for (chan = 0; chan < 4; chan++) { + unsigned int swz = GET_SWZ(swizzle, chan); + unsigned int new_swz; + if (swz > 3) { + SET_SWZ(out_swizzle, chan, swz); + } else { + new_swz = GET_SWZ(conversion_swizzle, swz); + if (new_swz != RC_SWIZZLE_UNUSED) { + SET_SWZ(out_swizzle, chan, new_swz); + } else { + SET_SWZ(out_swizzle, chan, swz); + } + } + } + return out_swizzle; +} + +/** + * Left multiplication of a register with a swizzle + */ +struct rc_src_register lmul_swizzle(unsigned int swizzle, struct rc_src_register srcreg) +{ + struct rc_src_register tmp = srcreg; + int i; + tmp.Swizzle = 0; + tmp.Negate = 0; + for(i = 0; i < 4; ++i) { + rc_swizzle swz = GET_SWZ(swizzle, i); + if (swz < 4) { + tmp.Swizzle |= GET_SWZ(srcreg.Swizzle, swz) << (i*3); + tmp.Negate |= GET_BIT(srcreg.Negate, swz) << i; + } else { + tmp.Swizzle |= swz << (i*3); + } + } + return tmp; +} + +void reset_srcreg(struct rc_src_register* reg) +{ + memset(reg, 0, sizeof(struct rc_src_register)); + reg->Swizzle = RC_SWIZZLE_XYZW; +} + +unsigned int rc_src_reads_dst_mask( + rc_register_file src_file, + unsigned int src_idx, + unsigned int src_swz, + rc_register_file dst_file, + unsigned int dst_idx, + unsigned int dst_mask) +{ + if (src_file != dst_file || src_idx != dst_idx) { + return RC_MASK_NONE; + } + return dst_mask & rc_swizzle_to_writemask(src_swz); +} + +/** + * @return A bit mask specifying whether this swizzle will select from an RGB + * source, an Alpha source, or both. + */ +unsigned int rc_source_type_swz(unsigned int swizzle) +{ + unsigned int chan; + unsigned int swz = RC_SWIZZLE_UNUSED; + unsigned int ret = RC_SOURCE_NONE; + + for(chan = 0; chan < 4; chan++) { + swz = GET_SWZ(swizzle, chan); + if (swz == RC_SWIZZLE_W) { + ret |= RC_SOURCE_ALPHA; + } else if (swz == RC_SWIZZLE_X || swz == RC_SWIZZLE_Y + || swz == RC_SWIZZLE_Z) { + ret |= RC_SOURCE_RGB; + } + } + return ret; +} + +unsigned int rc_source_type_mask(unsigned int mask) +{ + unsigned int ret = RC_SOURCE_NONE; + + if (mask & RC_MASK_XYZ) + ret |= RC_SOURCE_RGB; + + if (mask & RC_MASK_W) + ret |= RC_SOURCE_ALPHA; + + return ret; +} + +struct src_select { + rc_register_file File; + int Index; + unsigned int SrcType; +}; + +struct can_use_presub_data { + struct src_select Selects[5]; + unsigned int SelectCount; + const struct rc_src_register * ReplaceReg; + unsigned int ReplaceRemoved; +}; + +static void can_use_presub_data_add_select( + struct can_use_presub_data * data, + rc_register_file file, + unsigned int index, + unsigned int src_type) +{ + struct src_select * select; + + select = &data->Selects[data->SelectCount++]; + select->File = file; + select->Index = index; + select->SrcType = src_type; +} + +/** + * This callback function counts the number of sources in inst that are + * different from the sources in can_use_presub_data->RemoveSrcs. + */ +static void can_use_presub_read_cb( + void * userdata, + struct rc_instruction * inst, + struct rc_src_register * src) +{ + struct can_use_presub_data * d = userdata; + + if (!d->ReplaceRemoved && src == d->ReplaceReg) { + d->ReplaceRemoved = 1; + return; + } + + if (src->File == RC_FILE_NONE) + return; + + can_use_presub_data_add_select(d, src->File, src->Index, + rc_source_type_swz(src->Swizzle)); +} + +unsigned int rc_inst_can_use_presub( + struct rc_instruction * inst, + rc_presubtract_op presub_op, + unsigned int presub_writemask, + const struct rc_src_register * replace_reg, + const struct rc_src_register * presub_src0, + const struct rc_src_register * presub_src1) +{ + struct can_use_presub_data d; + unsigned int num_presub_srcs; + unsigned int i; + const struct rc_opcode_info * info = + rc_get_opcode_info(inst->U.I.Opcode); + int rgb_count = 0, alpha_count = 0; + unsigned int src_type0, src_type1; + + if (presub_op == RC_PRESUB_NONE) { + return 1; + } + + if (info->HasTexture) { + return 0; + } + + /* We can't use more than one presubtract value in an + * instruction, unless the two prsubtract operations + * are the same and read from the same registers. + * XXX For now we will limit instructions to only one presubtract + * value.*/ + if (inst->U.I.PreSub.Opcode != RC_PRESUB_NONE) { + return 0; + } + + memset(&d, 0, sizeof(d)); + d.ReplaceReg = replace_reg; + + rc_for_all_reads_src(inst, can_use_presub_read_cb, &d); + + num_presub_srcs = rc_presubtract_src_reg_count(presub_op); + + src_type0 = rc_source_type_swz(presub_src0->Swizzle); + can_use_presub_data_add_select(&d, + presub_src0->File, + presub_src0->Index, + src_type0); + + if (num_presub_srcs > 1) { + src_type1 = rc_source_type_swz(presub_src1->Swizzle); + can_use_presub_data_add_select(&d, + presub_src1->File, + presub_src1->Index, + src_type1); + + /* Even if both of the presub sources read from the same + * register, we still need to use 2 different source selects + * for them, so we need to increment the count to compensate. + */ + if (presub_src0->File == presub_src1->File + && presub_src0->Index == presub_src1->Index) { + if (src_type0 & src_type1 & RC_SOURCE_RGB) { + rgb_count++; + } + if (src_type0 & src_type1 & RC_SOURCE_ALPHA) { + alpha_count++; + } + } + } + + /* Count the number of source selects for Alpha and RGB. If we + * encounter two of the same source selects then we can ignore the + * first one. */ + for (i = 0; i < d.SelectCount; i++) { + unsigned int j; + unsigned int src_type = d.Selects[i].SrcType; + for (j = i + 1; j < d.SelectCount; j++) { + if (d.Selects[i].File == d.Selects[j].File + && d.Selects[i].Index == d.Selects[j].Index) { + src_type &= ~d.Selects[j].SrcType; + } + } + if (src_type & RC_SOURCE_RGB) { + rgb_count++; + } + + if (src_type & RC_SOURCE_ALPHA) { + alpha_count++; + } + } + + if (rgb_count > 3 || alpha_count > 3) { + return 0; + } + + return 1; +} + +struct max_data { + unsigned int Max; + unsigned int HasFileType; + rc_register_file File; +}; + +static void max_callback( + void * userdata, + struct rc_instruction * inst, + rc_register_file file, + unsigned int index, + unsigned int mask) +{ + struct max_data * d = (struct max_data*)userdata; + if (file == d->File && (!d->HasFileType || index > d->Max)) { + d->Max = index; + d->HasFileType = 1; + } +} + +/** + * @return The maximum index of the specified register file used by the + * program. + */ +int rc_get_max_index( + struct radeon_compiler * c, + rc_register_file file) +{ + struct max_data data; + struct rc_instruction * inst; + data.Max = 0; + data.HasFileType = 0; + data.File = file; + for (inst = c->Program.Instructions.Next; + inst != &c->Program.Instructions; + inst = inst->Next) { + rc_for_all_reads_mask(inst, max_callback, &data); + rc_for_all_writes_mask(inst, max_callback, &data); + } + if (!data.HasFileType) { + return -1; + } else { + return data.Max; + } +} + +static unsigned int get_source_readmask( + struct rc_pair_sub_instruction * sub, + unsigned int source, + unsigned int src_type) +{ + unsigned int i; + unsigned int readmask = 0; + const struct rc_opcode_info * info = rc_get_opcode_info(sub->Opcode); + + for (i = 0; i < info->NumSrcRegs; i++) { + if (sub->Arg[i].Source != source + || src_type != rc_source_type_swz(sub->Arg[i].Swizzle)) { + continue; + } + readmask |= rc_swizzle_to_writemask(sub->Arg[i].Swizzle); + } + return readmask; +} + +/** + * This function attempts to remove a source from a pair instructions. + * @param inst + * @param src_type RC_SOURCE_RGB, RC_SOURCE_ALPHA, or both bitwise or'd + * @param source The index of the source to remove + * @param new_readmask A mask representing the components that are read by + * the source that is intended to replace the one you are removing. If you + * want to remove a source only and not replace it, this parameter should be + * zero. + * @return 1 if the source was successfully removed, 0 if it was not + */ +unsigned int rc_pair_remove_src( + struct rc_instruction * inst, + unsigned int src_type, + unsigned int source, + unsigned int new_readmask) +{ + unsigned int readmask = 0; + + readmask |= get_source_readmask(&inst->U.P.RGB, source, src_type); + readmask |= get_source_readmask(&inst->U.P.Alpha, source, src_type); + + if ((new_readmask & readmask) != readmask) + return 0; + + if (src_type & RC_SOURCE_RGB) { + memset(&inst->U.P.RGB.Src[source], 0, + sizeof(struct rc_pair_instruction_source)); + } + + if (src_type & RC_SOURCE_ALPHA) { + memset(&inst->U.P.Alpha.Src[source], 0, + sizeof(struct rc_pair_instruction_source)); + } + + return 1; +} + +/** + * @return RC_OPCODE_NOOP if inst is not a flow control instruction. + * @return The opcode of inst if it is a flow control instruction. + */ +rc_opcode rc_get_flow_control_inst(struct rc_instruction * inst) +{ + const struct rc_opcode_info * info; + if (inst->Type == RC_INSTRUCTION_NORMAL) { + info = rc_get_opcode_info(inst->U.I.Opcode); + } else { + info = rc_get_opcode_info(inst->U.P.RGB.Opcode); + /*A flow control instruction shouldn't have an alpha + * instruction.*/ + assert(!info->IsFlowControl || + inst->U.P.Alpha.Opcode == RC_OPCODE_NOP); + } + + if (info->IsFlowControl) + return info->Opcode; + else + return RC_OPCODE_NOP; + +} + +/** + * @return The BGNLOOP instruction that starts the loop ended by endloop. + */ +struct rc_instruction * rc_match_endloop(struct rc_instruction * endloop) +{ + unsigned int endloop_count = 0; + struct rc_instruction * inst; + for (inst = endloop->Prev; inst != endloop; inst = inst->Prev) { + rc_opcode op = rc_get_flow_control_inst(inst); + if (op == RC_OPCODE_ENDLOOP) { + endloop_count++; + } else if (op == RC_OPCODE_BGNLOOP) { + if (endloop_count == 0) { + return inst; + } else { + endloop_count--; + } + } + } + return NULL; +} + +/** + * @return The ENDLOOP instruction that ends the loop started by bgnloop. + */ +struct rc_instruction * rc_match_bgnloop(struct rc_instruction * bgnloop) +{ + unsigned int bgnloop_count = 0; + struct rc_instruction * inst; + for (inst = bgnloop->Next; inst!=bgnloop; inst = inst->Next) { + rc_opcode op = rc_get_flow_control_inst(inst); + if (op == RC_OPCODE_BGNLOOP) { + bgnloop_count++; + } else if (op == RC_OPCODE_ENDLOOP) { + if (bgnloop_count == 0) { + return inst; + } else { + bgnloop_count--; + } + } + } + return NULL; +} + +/** + * @return A conversion swizzle for converting from old_mask->new_mask + */ +unsigned int rc_make_conversion_swizzle( + unsigned int old_mask, + unsigned int new_mask) +{ + unsigned int conversion_swizzle = rc_init_swizzle(RC_SWIZZLE_UNUSED, 0); + unsigned int old_idx; + unsigned int new_idx = 0; + for (old_idx = 0; old_idx < 4; old_idx++) { + if (!GET_BIT(old_mask, old_idx)) + continue; + for ( ; new_idx < 4; new_idx++) { + if (GET_BIT(new_mask, new_idx)) { + SET_SWZ(conversion_swizzle, old_idx, new_idx); + new_idx++; + break; + } + } + } + return conversion_swizzle; +} diff --git a/src/gallium/drivers/r300/compiler/radeon_compiler_util.h b/src/gallium/drivers/r300/compiler/radeon_compiler_util.h new file mode 100644 index 00000000000..3730aa888c0 --- /dev/null +++ b/src/gallium/drivers/r300/compiler/radeon_compiler_util.h @@ -0,0 +1,89 @@ +#include "radeon_program_constants.h" + +#ifndef RADEON_PROGRAM_UTIL_H +#define RADEON_PROGRAM_UTIL_H + +#include "radeon_opcodes.h" + +struct radeon_compiler; +struct rc_instruction; +struct rc_pair_instruction; +struct rc_pair_sub_instruction; +struct rc_src_register; + +unsigned int rc_swizzle_to_writemask(unsigned int swz); + +rc_swizzle get_swz(unsigned int swz, rc_swizzle idx); + +unsigned int rc_init_swizzle(unsigned int initial_value, unsigned int channels); + +unsigned int combine_swizzles4(unsigned int src, + rc_swizzle swz_x, rc_swizzle swz_y, + rc_swizzle swz_z, rc_swizzle swz_w); + +unsigned int combine_swizzles(unsigned int src, unsigned int swz); + +rc_swizzle rc_mask_to_swizzle(unsigned int mask); + +unsigned swizzle_mask(unsigned swizzle, unsigned mask); + +unsigned int rc_adjust_channels( + unsigned int old_swizzle, + unsigned int conversion_swizzle); + +void rc_pair_rewrite_writemask( + struct rc_pair_sub_instruction * sub, + unsigned int conversion_swizzle); + +void rc_normal_rewrite_writemask( + struct rc_instruction * inst, + unsigned int conversion_swizzle); + +unsigned int rc_rewrite_swizzle( + unsigned int swizzle, + unsigned int new_mask); + +struct rc_src_register lmul_swizzle(unsigned int swizzle, struct rc_src_register srcreg); + +void reset_srcreg(struct rc_src_register* reg); + +unsigned int rc_src_reads_dst_mask( + rc_register_file src_file, + unsigned int src_idx, + unsigned int src_swz, + rc_register_file dst_file, + unsigned int dst_idx, + unsigned int dst_mask); + +unsigned int rc_source_type_swz(unsigned int swizzle); + +unsigned int rc_source_type_mask(unsigned int mask); + +unsigned int rc_inst_can_use_presub( + struct rc_instruction * inst, + rc_presubtract_op presub_op, + unsigned int presub_writemask, + const struct rc_src_register * replace_reg, + const struct rc_src_register * presub_src0, + const struct rc_src_register * presub_src1); + +int rc_get_max_index( + struct radeon_compiler * c, + rc_register_file file); + +unsigned int rc_pair_remove_src( + struct rc_instruction * inst, + unsigned int src_type, + unsigned int source, + unsigned int new_readmask); + +rc_opcode rc_get_flow_control_inst(struct rc_instruction * inst); + +struct rc_instruction * rc_match_endloop(struct rc_instruction * endloop); +struct rc_instruction * rc_match_bgnloop(struct rc_instruction * bgnloop); + +unsigned int rc_make_conversion_swizzle( + unsigned int old_mask, + unsigned int new_mask); + +#endif /* RADEON_PROGRAM_UTIL_H */ diff --git a/src/gallium/drivers/r300/compiler/radeon_dataflow.c b/src/gallium/drivers/r300/compiler/radeon_dataflow.c new file mode 100644 index 00000000000..a8decacedaf --- /dev/null +++ b/src/gallium/drivers/r300/compiler/radeon_dataflow.c @@ -0,0 +1,892 @@ +/* + * Copyright (C) 2009 Nicolai Haehnle. + * Copyright 2010 Tom Stellard <[email protected]> + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "radeon_dataflow.h" + +#include "radeon_compiler.h" +#include "radeon_compiler_util.h" +#include "radeon_program.h" + +struct read_write_mask_data { + void * UserData; + rc_read_write_mask_fn Cb; +}; + +static void reads_normal_callback( + void * userdata, + struct rc_instruction * fullinst, + struct rc_src_register * src) +{ + struct read_write_mask_data * cb_data = userdata; + unsigned int refmask = 0; + unsigned int chan; + for(chan = 0; chan < 4; chan++) { + refmask |= 1 << GET_SWZ(src->Swizzle, chan); + } + refmask &= RC_MASK_XYZW; + + if (refmask) { + cb_data->Cb(cb_data->UserData, fullinst, src->File, + src->Index, refmask); + } + + if (refmask && src->RelAddr) { + cb_data->Cb(cb_data->UserData, fullinst, RC_FILE_ADDRESS, 0, + RC_MASK_X); + } +} + +static void pair_get_src_refmasks(unsigned int * refmasks, + struct rc_pair_instruction * inst, + unsigned int swz, unsigned int src) +{ + if (swz >= 4) + return; + + if (swz == RC_SWIZZLE_X || swz == RC_SWIZZLE_Y || swz == RC_SWIZZLE_Z) { + if(src == RC_PAIR_PRESUB_SRC) { + unsigned int i; + int srcp_regs = + rc_presubtract_src_reg_count( + inst->RGB.Src[src].Index); + for(i = 0; i < srcp_regs; i++) { + refmasks[i] |= 1 << swz; + } + } + else { + refmasks[src] |= 1 << swz; + } + } + + if (swz == RC_SWIZZLE_W) { + if (src == RC_PAIR_PRESUB_SRC) { + unsigned int i; + int srcp_regs = rc_presubtract_src_reg_count( + inst->Alpha.Src[src].Index); + for(i = 0; i < srcp_regs; i++) { + refmasks[i] |= 1 << swz; + } + } + else { + refmasks[src] |= 1 << swz; + } + } +} + +static void reads_pair(struct rc_instruction * fullinst, rc_read_write_mask_fn cb, void * userdata) +{ + struct rc_pair_instruction * inst = &fullinst->U.P; + unsigned int refmasks[3] = { 0, 0, 0 }; + + unsigned int arg; + + for(arg = 0; arg < 3; ++arg) { + unsigned int chan; + for(chan = 0; chan < 3; ++chan) { + unsigned int swz_rgb = + GET_SWZ(inst->RGB.Arg[arg].Swizzle, chan); + unsigned int swz_alpha = + GET_SWZ(inst->Alpha.Arg[arg].Swizzle, chan); + pair_get_src_refmasks(refmasks, inst, swz_rgb, + inst->RGB.Arg[arg].Source); + pair_get_src_refmasks(refmasks, inst, swz_alpha, + inst->Alpha.Arg[arg].Source); + } + } + + for(unsigned int src = 0; src < 3; ++src) { + if (inst->RGB.Src[src].Used && (refmasks[src] & RC_MASK_XYZ)) + cb(userdata, fullinst, inst->RGB.Src[src].File, inst->RGB.Src[src].Index, + refmasks[src] & RC_MASK_XYZ); + + if (inst->Alpha.Src[src].Used && (refmasks[src] & RC_MASK_W)) + cb(userdata, fullinst, inst->Alpha.Src[src].File, inst->Alpha.Src[src].Index, RC_MASK_W); + } +} + +static void pair_sub_for_all_args( + struct rc_instruction * fullinst, + struct rc_pair_sub_instruction * sub, + rc_pair_read_arg_fn cb, + void * userdata) +{ + int i; + const struct rc_opcode_info * info = rc_get_opcode_info(sub->Opcode); + + for(i = 0; i < info->NumSrcRegs; i++) { + unsigned int src_type; + + src_type = rc_source_type_swz(sub->Arg[i].Swizzle); + + if (src_type == RC_SOURCE_NONE) + continue; + + if (sub->Arg[i].Source == RC_PAIR_PRESUB_SRC) { + unsigned int presub_type; + unsigned int presub_src_count; + struct rc_pair_instruction_source * src_array; + unsigned int j; + + if (src_type & RC_SOURCE_RGB) { + presub_type = fullinst-> + U.P.RGB.Src[RC_PAIR_PRESUB_SRC].Index; + src_array = fullinst->U.P.RGB.Src; + } else { + presub_type = fullinst-> + U.P.Alpha.Src[RC_PAIR_PRESUB_SRC].Index; + src_array = fullinst->U.P.Alpha.Src; + } + presub_src_count + = rc_presubtract_src_reg_count(presub_type); + for(j = 0; j < presub_src_count; j++) { + cb(userdata, fullinst, &sub->Arg[i], + &src_array[j]); + } + } else { + struct rc_pair_instruction_source * src = + rc_pair_get_src(&fullinst->U.P, &sub->Arg[i]); + if (src) { + cb(userdata, fullinst, &sub->Arg[i], src); + } + } + } +} + +/* This function calls the callback function (cb) for each source used by + * the instruction. + * */ +void rc_for_all_reads_src( + struct rc_instruction * inst, + rc_read_src_fn cb, + void * userdata) +{ + const struct rc_opcode_info * opcode = + rc_get_opcode_info(inst->U.I.Opcode); + + /* This function only works with normal instructions. */ + if (inst->Type != RC_INSTRUCTION_NORMAL) { + assert(0); + return; + } + + for(unsigned int src = 0; src < opcode->NumSrcRegs; ++src) { + + if (inst->U.I.SrcReg[src].File == RC_FILE_NONE) + continue; + + if (inst->U.I.SrcReg[src].File == RC_FILE_PRESUB) { + unsigned int i; + unsigned int srcp_regs = rc_presubtract_src_reg_count( + inst->U.I.PreSub.Opcode); + for( i = 0; i < srcp_regs; i++) { + cb(userdata, inst, &inst->U.I.PreSub.SrcReg[i]); + } + } else { + cb(userdata, inst, &inst->U.I.SrcReg[src]); + } + } +} + +/** + * This function calls the callback function (cb) for each arg of the RGB and + * alpha components. + */ +void rc_pair_for_all_reads_arg( + struct rc_instruction * inst, + rc_pair_read_arg_fn cb, + void * userdata) +{ + /* This function only works with pair instructions. */ + if (inst->Type != RC_INSTRUCTION_PAIR) { + assert(0); + return; + } + + pair_sub_for_all_args(inst, &inst->U.P.RGB, cb, userdata); + pair_sub_for_all_args(inst, &inst->U.P.Alpha, cb, userdata); +} + +/** + * Calls a callback function for all register reads. + * + * This is conservative, i.e. if the same register is referenced multiple times, + * the callback may also be called multiple times. + * Also, the writemask of the instruction is not taken into account. + */ +void rc_for_all_reads_mask(struct rc_instruction * inst, rc_read_write_mask_fn cb, void * userdata) +{ + if (inst->Type == RC_INSTRUCTION_NORMAL) { + struct read_write_mask_data cb_data; + cb_data.UserData = userdata; + cb_data.Cb = cb; + + rc_for_all_reads_src(inst, reads_normal_callback, &cb_data); + } else { + reads_pair(inst, cb, userdata); + } +} + + + +static void writes_normal(struct rc_instruction * fullinst, rc_read_write_mask_fn cb, void * userdata) +{ + struct rc_sub_instruction * inst = &fullinst->U.I; + const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->Opcode); + + if (opcode->HasDstReg && inst->DstReg.WriteMask) + cb(userdata, fullinst, inst->DstReg.File, inst->DstReg.Index, inst->DstReg.WriteMask); + + if (inst->WriteALUResult) + cb(userdata, fullinst, RC_FILE_SPECIAL, RC_SPECIAL_ALU_RESULT, RC_MASK_X); +} + +static void writes_pair(struct rc_instruction * fullinst, rc_read_write_mask_fn cb, void * userdata) +{ + struct rc_pair_instruction * inst = &fullinst->U.P; + + if (inst->RGB.WriteMask) + cb(userdata, fullinst, RC_FILE_TEMPORARY, inst->RGB.DestIndex, inst->RGB.WriteMask); + + if (inst->Alpha.WriteMask) + cb(userdata, fullinst, RC_FILE_TEMPORARY, inst->Alpha.DestIndex, RC_MASK_W); + + if (inst->WriteALUResult) + cb(userdata, fullinst, RC_FILE_SPECIAL, RC_SPECIAL_ALU_RESULT, RC_MASK_X); +} + +/** + * Calls a callback function for all register writes in the instruction, + * reporting writemasks to the callback function. + * + * \warning Does not report output registers for paired instructions! + */ +void rc_for_all_writes_mask(struct rc_instruction * inst, rc_read_write_mask_fn cb, void * userdata) +{ + if (inst->Type == RC_INSTRUCTION_NORMAL) { + writes_normal(inst, cb, userdata); + } else { + writes_pair(inst, cb, userdata); + } +} + + +struct mask_to_chan_data { + void * UserData; + rc_read_write_chan_fn Fn; +}; + +static void mask_to_chan_cb(void * data, struct rc_instruction * inst, + rc_register_file file, unsigned int index, unsigned int mask) +{ + struct mask_to_chan_data * d = data; + for(unsigned int chan = 0; chan < 4; ++chan) { + if (GET_BIT(mask, chan)) + d->Fn(d->UserData, inst, file, index, chan); + } +} + +/** + * Calls a callback function for all sourced register channels. + * + * This is conservative, i.e. channels may be called multiple times, + * and the writemask of the instruction is not taken into account. + */ +void rc_for_all_reads_chan(struct rc_instruction * inst, rc_read_write_chan_fn cb, void * userdata) +{ + struct mask_to_chan_data d; + d.UserData = userdata; + d.Fn = cb; + rc_for_all_reads_mask(inst, &mask_to_chan_cb, &d); +} + +/** + * Calls a callback function for all written register channels. + * + * \warning Does not report output registers for paired instructions! + */ +void rc_for_all_writes_chan(struct rc_instruction * inst, rc_read_write_chan_fn cb, void * userdata) +{ + struct mask_to_chan_data d; + d.UserData = userdata; + d.Fn = cb; + rc_for_all_writes_mask(inst, &mask_to_chan_cb, &d); +} + +static void remap_normal_instruction(struct rc_instruction * fullinst, + rc_remap_register_fn cb, void * userdata) +{ + struct rc_sub_instruction * inst = &fullinst->U.I; + const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->Opcode); + unsigned int remapped_presub = 0; + + if (opcode->HasDstReg) { + rc_register_file file = inst->DstReg.File; + unsigned int index = inst->DstReg.Index; + + cb(userdata, fullinst, &file, &index); + + inst->DstReg.File = file; + inst->DstReg.Index = index; + } + + for(unsigned int src = 0; src < opcode->NumSrcRegs; ++src) { + rc_register_file file = inst->SrcReg[src].File; + unsigned int index = inst->SrcReg[src].Index; + + if (file == RC_FILE_PRESUB) { + unsigned int i; + unsigned int srcp_srcs = rc_presubtract_src_reg_count( + inst->PreSub.Opcode); + /* Make sure we only remap presubtract sources once in + * case more than one source register reads the + * presubtract result. */ + if (remapped_presub) + continue; + + for(i = 0; i < srcp_srcs; i++) { + file = inst->PreSub.SrcReg[i].File; + index = inst->PreSub.SrcReg[i].Index; + cb(userdata, fullinst, &file, &index); + inst->PreSub.SrcReg[i].File = file; + inst->PreSub.SrcReg[i].Index = index; + } + remapped_presub = 1; + } + else { + cb(userdata, fullinst, &file, &index); + + inst->SrcReg[src].File = file; + inst->SrcReg[src].Index = index; + } + } +} + +static void remap_pair_instruction(struct rc_instruction * fullinst, + rc_remap_register_fn cb, void * userdata) +{ + struct rc_pair_instruction * inst = &fullinst->U.P; + + if (inst->RGB.WriteMask) { + rc_register_file file = RC_FILE_TEMPORARY; + unsigned int index = inst->RGB.DestIndex; + + cb(userdata, fullinst, &file, &index); + + inst->RGB.DestIndex = index; + } + + if (inst->Alpha.WriteMask) { + rc_register_file file = RC_FILE_TEMPORARY; + unsigned int index = inst->Alpha.DestIndex; + + cb(userdata, fullinst, &file, &index); + + inst->Alpha.DestIndex = index; + } + + for(unsigned int src = 0; src < 3; ++src) { + if (inst->RGB.Src[src].Used) { + rc_register_file file = inst->RGB.Src[src].File; + unsigned int index = inst->RGB.Src[src].Index; + + cb(userdata, fullinst, &file, &index); + + inst->RGB.Src[src].File = file; + inst->RGB.Src[src].Index = index; + } + + if (inst->Alpha.Src[src].Used) { + rc_register_file file = inst->Alpha.Src[src].File; + unsigned int index = inst->Alpha.Src[src].Index; + + cb(userdata, fullinst, &file, &index); + + inst->Alpha.Src[src].File = file; + inst->Alpha.Src[src].Index = index; + } + } +} + + +/** + * Remap all register accesses according to the given function. + * That is, call the function \p cb for each referenced register (both read and written) + * and update the given instruction \p inst accordingly + * if it modifies its \ref pfile and \ref pindex contents. + */ +void rc_remap_registers(struct rc_instruction * inst, rc_remap_register_fn cb, void * userdata) +{ + if (inst->Type == RC_INSTRUCTION_NORMAL) + remap_normal_instruction(inst, cb, userdata); + else + remap_pair_instruction(inst, cb, userdata); +} + +struct branch_write_mask { + unsigned int IfWriteMask:4; + unsigned int ElseWriteMask:4; + unsigned int HasElse:1; +}; + +union get_readers_read_cb { + rc_read_src_fn I; + rc_pair_read_arg_fn P; +}; + +struct get_readers_callback_data { + struct radeon_compiler * C; + struct rc_reader_data * ReaderData; + rc_read_src_fn ReadNormalCB; + rc_pair_read_arg_fn ReadPairCB; + rc_read_write_mask_fn WriteCB; + rc_register_file DstFile; + unsigned int DstIndex; + unsigned int DstMask; + unsigned int AliveWriteMask; + /* For convenience, this is indexed starting at 1 */ + struct branch_write_mask BranchMasks[R500_PFS_MAX_BRANCH_DEPTH_FULL + 1]; +}; + +static struct rc_reader * add_reader( + struct memory_pool * pool, + struct rc_reader_data * data, + struct rc_instruction * inst, + unsigned int mask) +{ + struct rc_reader * new; + memory_pool_array_reserve(pool, struct rc_reader, data->Readers, + data->ReaderCount, data->ReadersReserved, 1); + new = &data->Readers[data->ReaderCount++]; + new->Inst = inst; + new->WriteMask = mask; + return new; +} + +static void add_reader_normal( + struct memory_pool * pool, + struct rc_reader_data * data, + struct rc_instruction * inst, + unsigned int mask, + struct rc_src_register * src) +{ + struct rc_reader * new = add_reader(pool, data, inst, mask); + new->U.I.Src = src; +} + + +static void add_reader_pair( + struct memory_pool * pool, + struct rc_reader_data * data, + struct rc_instruction * inst, + unsigned int mask, + struct rc_pair_instruction_arg * arg, + struct rc_pair_instruction_source * src) +{ + struct rc_reader * new = add_reader(pool, data, inst, mask); + new->U.P.Src = src; + new->U.P.Arg = arg; +} + +static unsigned int get_readers_read_callback( + struct get_readers_callback_data * cb_data, + unsigned int has_rel_addr, + rc_register_file file, + unsigned int index, + unsigned int swizzle) +{ + unsigned int shared_mask, read_mask; + + if (has_rel_addr) { + cb_data->ReaderData->Abort = 1; + return RC_MASK_NONE; + } + + shared_mask = rc_src_reads_dst_mask(file, index, swizzle, + cb_data->DstFile, cb_data->DstIndex, cb_data->AliveWriteMask); + + if (shared_mask == RC_MASK_NONE) + return shared_mask; + + /* If we make it this far, it means that this source reads from the + * same register written to by d->ReaderData->Writer. */ + + read_mask = rc_swizzle_to_writemask(swizzle); + if (cb_data->ReaderData->AbortOnRead & read_mask) { + cb_data->ReaderData->Abort = 1; + return shared_mask; + } + + if (cb_data->ReaderData->LoopDepth > 0) { + cb_data->ReaderData->AbortOnWrite |= + (read_mask & cb_data->AliveWriteMask); + } + + /* XXX The behavior in this case should be configurable. */ + if ((read_mask & cb_data->AliveWriteMask) != read_mask) { + cb_data->ReaderData->Abort = 1; + return shared_mask; + } + + return shared_mask; +} + +static void get_readers_pair_read_callback( + void * userdata, + struct rc_instruction * inst, + struct rc_pair_instruction_arg * arg, + struct rc_pair_instruction_source * src) +{ + unsigned int shared_mask; + struct get_readers_callback_data * d = userdata; + + shared_mask = get_readers_read_callback(d, + 0 /*Pair Instructions don't use RelAddr*/, + src->File, src->Index, arg->Swizzle); + + if (shared_mask == RC_MASK_NONE) + return; + + if (d->ReadPairCB) + d->ReadPairCB(d->ReaderData, inst, arg, src); + + if (d->ReaderData->ExitOnAbort && d->ReaderData->Abort) + return; + + add_reader_pair(&d->C->Pool, d->ReaderData, inst, shared_mask, arg, src); +} + +/** + * This function is used by rc_get_readers_normal() to determine whether inst + * is a reader of userdata->ReaderData->Writer + */ +static void get_readers_normal_read_callback( + void * userdata, + struct rc_instruction * inst, + struct rc_src_register * src) +{ + struct get_readers_callback_data * d = userdata; + unsigned int shared_mask; + + shared_mask = get_readers_read_callback(d, + src->RelAddr, src->File, src->Index, src->Swizzle); + + if (shared_mask == RC_MASK_NONE) + return; + /* The callback function could potentially clear d->ReaderData->Abort, + * so we need to call it before we return. */ + if (d->ReadNormalCB) + d->ReadNormalCB(d->ReaderData, inst, src); + + if (d->ReaderData->ExitOnAbort && d->ReaderData->Abort) + return; + + add_reader_normal(&d->C->Pool, d->ReaderData, inst, shared_mask, src); +} + +/** + * This function is used by rc_get_readers_normal() to determine when + * userdata->ReaderData->Writer is dead (i. e. All compontents of its + * destination register have been overwritten by other instructions). + */ +static void get_readers_write_callback( + void *userdata, + struct rc_instruction * inst, + rc_register_file file, + unsigned int index, + unsigned int mask) +{ + struct get_readers_callback_data * d = userdata; + + if (index == d->DstIndex && file == d->DstFile) { + unsigned int shared_mask = mask & d->DstMask; + d->ReaderData->AbortOnRead &= ~shared_mask; + d->AliveWriteMask &= ~shared_mask; + if (d->ReaderData->AbortOnWrite & shared_mask) { + d->ReaderData->Abort = 1; + } + } + + if(d->WriteCB) + d->WriteCB(d->ReaderData, inst, file, index, mask); +} + +static void push_branch_mask( + struct get_readers_callback_data * d, + unsigned int * branch_depth) +{ + (*branch_depth)++; + if (*branch_depth > R500_PFS_MAX_BRANCH_DEPTH_FULL) { + d->ReaderData->Abort = 1; + return; + } + d->BranchMasks[*branch_depth].IfWriteMask = + d->AliveWriteMask; +} + +static void pop_branch_mask( + struct get_readers_callback_data * d, + unsigned int * branch_depth) +{ + struct branch_write_mask * masks = &d->BranchMasks[*branch_depth]; + + if (masks->HasElse) { + /* Abort on read for components that were written in the IF + * block. */ + d->ReaderData->AbortOnRead |= + masks->IfWriteMask & ~masks->ElseWriteMask; + /* Abort on read for components that were written in the ELSE + * block. */ + d->ReaderData->AbortOnRead |= + masks->ElseWriteMask & ~d->AliveWriteMask; + + d->AliveWriteMask = masks->IfWriteMask + ^ ((masks->IfWriteMask ^ masks->ElseWriteMask) + & (masks->IfWriteMask ^ d->AliveWriteMask)); + } else { + d->ReaderData->AbortOnRead |= + masks->IfWriteMask & ~d->AliveWriteMask; + d->AliveWriteMask = masks->IfWriteMask; + + } + memset(masks, 0, sizeof(struct branch_write_mask)); + (*branch_depth)--; +} + +static void get_readers_for_single_write( + void * userdata, + struct rc_instruction * writer, + rc_register_file dst_file, + unsigned int dst_index, + unsigned int dst_mask) +{ + struct rc_instruction * tmp; + unsigned int branch_depth = 0; + struct rc_instruction * endloop = NULL; + unsigned int abort_on_read_at_endloop = 0; + struct get_readers_callback_data * d = userdata; + + d->ReaderData->Writer = writer; + d->ReaderData->AbortOnRead = 0; + d->ReaderData->AbortOnWrite = 0; + d->ReaderData->LoopDepth = 0; + d->ReaderData->InElse = 0; + d->DstFile = dst_file; + d->DstIndex = dst_index; + d->DstMask = dst_mask; + d->AliveWriteMask = dst_mask; + memset(d->BranchMasks, 0, sizeof(d->BranchMasks)); + + if (!dst_mask) + return; + + for(tmp = writer->Next; tmp != &d->C->Program.Instructions; + tmp = tmp->Next){ + rc_opcode opcode = rc_get_flow_control_inst(tmp); + switch(opcode) { + case RC_OPCODE_BGNLOOP: + d->ReaderData->LoopDepth++; + push_branch_mask(d, &branch_depth); + break; + case RC_OPCODE_ENDLOOP: + if (d->ReaderData->LoopDepth > 0) { + d->ReaderData->LoopDepth--; + if (d->ReaderData->LoopDepth == 0) { + d->ReaderData->AbortOnWrite = 0; + } + pop_branch_mask(d, &branch_depth); + } else { + /* Here we have reached an ENDLOOP without + * seeing its BGNLOOP. These means that + * the writer was written inside of a loop, + * so it could have readers that are above it + * (i.e. they have a lower IP). To find these + * readers we jump to the BGNLOOP instruction + * and check each instruction until we get + * back to the writer. + */ + endloop = tmp; + tmp = rc_match_endloop(tmp); + if (!tmp) { + rc_error(d->C, "Failed to match endloop.\n"); + d->ReaderData->Abort = 1; + return; + } + abort_on_read_at_endloop = d->ReaderData->AbortOnRead; + d->ReaderData->AbortOnRead |= d->AliveWriteMask; + continue; + } + break; + case RC_OPCODE_IF: + push_branch_mask(d, &branch_depth); + break; + case RC_OPCODE_ELSE: + if (branch_depth == 0) { + d->ReaderData->InElse = 1; + } else { + unsigned int temp_mask = d->AliveWriteMask; + d->AliveWriteMask = + d->BranchMasks[branch_depth].IfWriteMask; + d->BranchMasks[branch_depth].ElseWriteMask = + temp_mask; + d->BranchMasks[branch_depth].HasElse = 1; + } + break; + case RC_OPCODE_ENDIF: + if (branch_depth == 0) { + d->ReaderData->AbortOnRead = d->AliveWriteMask; + d->ReaderData->InElse = 0; + } + else { + pop_branch_mask(d, &branch_depth); + } + break; + default: + break; + } + + if (d->ReaderData->InElse) + continue; + + if (tmp->Type == RC_INSTRUCTION_NORMAL) { + rc_for_all_reads_src(tmp, + get_readers_normal_read_callback, d); + } else { + rc_pair_for_all_reads_arg(tmp, + get_readers_pair_read_callback, d); + } + + /* This can happen when we jump from an ENDLOOP to BGNLOOP */ + if (tmp == writer) { + tmp = endloop; + endloop = NULL; + d->ReaderData->AbortOnRead = abort_on_read_at_endloop; + continue; + } + rc_for_all_writes_mask(tmp, get_readers_write_callback, d); + + if (d->ReaderData->ExitOnAbort && d->ReaderData->Abort) + return; + + if (branch_depth == 0 && !d->AliveWriteMask) + return; + } +} + +static void init_get_readers_callback_data( + struct get_readers_callback_data * d, + struct rc_reader_data * reader_data, + struct radeon_compiler * c, + rc_read_src_fn read_normal_cb, + rc_pair_read_arg_fn read_pair_cb, + rc_read_write_mask_fn write_cb) +{ + reader_data->Abort = 0; + reader_data->ReaderCount = 0; + reader_data->ReadersReserved = 0; + reader_data->Readers = NULL; + + d->C = c; + d->ReaderData = reader_data; + d->ReadNormalCB = read_normal_cb; + d->ReadPairCB = read_pair_cb; + d->WriteCB = write_cb; +} + +/** + * This function will create a list of readers via the rc_reader_data struct. + * This function will abort (set the flag data->Abort) and return if it + * encounters an instruction that reads from @param writer and also a different + * instruction. Here are some examples: + * + * writer = instruction 0; + * 0 MOV TEMP[0].xy, TEMP[1].xy + * 1 MOV TEMP[0].zw, TEMP[2].xy + * 2 MOV TEMP[3], TEMP[0] + * The Abort flag will be set on instruction 2, because it reads values written + * by instructions 0 and 1. + * + * writer = instruction 1; + * 0 IF TEMP[0].x + * 1 MOV TEMP[1], TEMP[2] + * 2 ELSE + * 3 MOV TEMP[1], TEMP[2] + * 4 ENDIF + * 5 MOV TEMP[3], TEMP[1] + * The Abort flag will be set on instruction 5, because it could read from the + * value written by either instruction 1 or 3, depending on the jump decision + * made at instruction 0. + * + * writer = instruction 0; + * 0 MOV TEMP[0], TEMP[1] + * 2 BGNLOOP + * 3 ADD TEMP[0], TEMP[0], none.1 + * 4 ENDLOOP + * The Abort flag will be set on instruction 3, because in the first iteration + * of the loop it reads the value written by instruction 0 and in all other + * iterations it reads the value written by instruction 3. + * + * @param read_cb This function will be called for for every instruction that + * has been determined to be a reader of writer. + * @param write_cb This function will be called for every instruction after + * writer. + */ +void rc_get_readers( + struct radeon_compiler * c, + struct rc_instruction * writer, + struct rc_reader_data * data, + rc_read_src_fn read_normal_cb, + rc_pair_read_arg_fn read_pair_cb, + rc_read_write_mask_fn write_cb) +{ + struct get_readers_callback_data d; + + init_get_readers_callback_data(&d, data, c, read_normal_cb, + read_pair_cb, write_cb); + + rc_for_all_writes_mask(writer, get_readers_for_single_write, &d); +} + +void rc_get_readers_sub( + struct radeon_compiler * c, + struct rc_instruction * writer, + struct rc_pair_sub_instruction * sub_writer, + struct rc_reader_data * data, + rc_read_src_fn read_normal_cb, + rc_pair_read_arg_fn read_pair_cb, + rc_read_write_mask_fn write_cb) +{ + struct get_readers_callback_data d; + + init_get_readers_callback_data(&d, data, c, read_normal_cb, + read_pair_cb, write_cb); + + if (sub_writer->WriteMask) { + get_readers_for_single_write(&d, writer, RC_FILE_TEMPORARY, + sub_writer->DestIndex, sub_writer->WriteMask); + } +} diff --git a/src/gallium/drivers/r300/compiler/radeon_dataflow.h b/src/gallium/drivers/r300/compiler/radeon_dataflow.h new file mode 100644 index 00000000000..d8a627258ea --- /dev/null +++ b/src/gallium/drivers/r300/compiler/radeon_dataflow.h @@ -0,0 +1,134 @@ +/* + * Copyright (C) 2009 Nicolai Haehnle. + * Copyright 2010 Tom Stellard <[email protected]> + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef RADEON_DATAFLOW_H +#define RADEON_DATAFLOW_H + +#include "radeon_program_constants.h" + +struct radeon_compiler; +struct rc_instruction; +struct rc_swizzle_caps; +struct rc_src_register; +struct rc_pair_instruction_arg; +struct rc_pair_instruction_source; +struct rc_pair_sub_instruction; +struct rc_compiler; + + +/** + * Help analyze and modify the register accesses of instructions. + */ +/*@{*/ +typedef void (*rc_read_write_chan_fn)(void * userdata, struct rc_instruction * inst, + rc_register_file file, unsigned int index, unsigned int chan); +void rc_for_all_reads_chan(struct rc_instruction * inst, rc_read_write_chan_fn cb, void * userdata); +void rc_for_all_writes_chan(struct rc_instruction * inst, rc_read_write_chan_fn cb, void * userdata); + +typedef void (*rc_read_write_mask_fn)(void * userdata, struct rc_instruction * inst, + rc_register_file file, unsigned int index, unsigned int mask); +void rc_for_all_reads_mask(struct rc_instruction * inst, rc_read_write_mask_fn cb, void * userdata); +void rc_for_all_writes_mask(struct rc_instruction * inst, rc_read_write_mask_fn cb, void * userdata); + +typedef void (*rc_read_src_fn)(void * userdata, struct rc_instruction * inst, + struct rc_src_register * src); +void rc_for_all_reads_src(struct rc_instruction * inst, rc_read_src_fn cb, + void * userdata); + +typedef void (*rc_pair_read_arg_fn)(void * userdata, + struct rc_instruction * inst, struct rc_pair_instruction_arg * arg, + struct rc_pair_instruction_source * src); +void rc_pair_for_all_reads_arg(struct rc_instruction * inst, + rc_pair_read_arg_fn cb, void * userdata); + +typedef void (*rc_remap_register_fn)(void * userdata, struct rc_instruction * inst, + rc_register_file * pfile, unsigned int * pindex); +void rc_remap_registers(struct rc_instruction * inst, rc_remap_register_fn cb, void * userdata); +/*@}*/ + +struct rc_reader { + struct rc_instruction * Inst; + unsigned int WriteMask; + union { + struct { + struct rc_src_register * Src; + } I; + struct { + struct rc_pair_instruction_arg * Arg; + struct rc_pair_instruction_source * Src; + } P; + } U; +}; + +struct rc_reader_data { + unsigned int Abort; + unsigned int AbortOnRead; + unsigned int AbortOnWrite; + unsigned int LoopDepth; + unsigned int InElse; + struct rc_instruction * Writer; + + unsigned int ReaderCount; + unsigned int ReadersReserved; + struct rc_reader * Readers; + + /* If this flag is enabled, rc_get_readers will exit as soon possbile + * after the Abort flag is set.*/ + unsigned int ExitOnAbort; + void * CbData; +}; + +void rc_get_readers( + struct radeon_compiler * c, + struct rc_instruction * writer, + struct rc_reader_data * data, + rc_read_src_fn read_normal_cb, + rc_pair_read_arg_fn read_pair_cb, + rc_read_write_mask_fn write_cb); + +void rc_get_readers_sub( + struct radeon_compiler * c, + struct rc_instruction * writer, + struct rc_pair_sub_instruction * sub_writer, + struct rc_reader_data * data, + rc_read_src_fn read_normal_cb, + rc_pair_read_arg_fn read_pair_cb, + rc_read_write_mask_fn write_cb); +/** + * Compiler passes based on dataflow analysis. + */ +/*@{*/ +typedef void (*rc_dataflow_mark_outputs_fn)(void * userdata, void * data, + void (*mark_fn)(void * data, unsigned int index, unsigned int mask)); +void rc_dataflow_deadcode(struct radeon_compiler * c, void *user); +void rc_dataflow_swizzles(struct radeon_compiler * c, void *user); +/*@}*/ + +void rc_optimize(struct radeon_compiler * c, void *user); + +#endif /* RADEON_DATAFLOW_H */ diff --git a/src/gallium/drivers/r300/compiler/radeon_dataflow_deadcode.c b/src/gallium/drivers/r300/compiler/radeon_dataflow_deadcode.c new file mode 100644 index 00000000000..678e1475883 --- /dev/null +++ b/src/gallium/drivers/r300/compiler/radeon_dataflow_deadcode.c @@ -0,0 +1,359 @@ +/* + * Copyright (C) 2009 Nicolai Haehnle. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "radeon_dataflow.h" + +#include "radeon_compiler.h" + + +struct updatemask_state { + unsigned char Output[RC_REGISTER_MAX_INDEX]; + unsigned char Temporary[RC_REGISTER_MAX_INDEX]; + unsigned char Address; + unsigned char Special[RC_NUM_SPECIAL_REGISTERS]; +}; + +struct instruction_state { + unsigned char WriteMask:4; + unsigned char WriteALUResult:1; + unsigned char SrcReg[3]; +}; + +struct loopinfo { + struct updatemask_state * Breaks; + unsigned int BreakCount; + unsigned int BreaksReserved; +}; + +struct branchinfo { + unsigned int HaveElse:1; + + struct updatemask_state StoreEndif; + struct updatemask_state StoreElse; +}; + +struct deadcode_state { + struct radeon_compiler * C; + struct instruction_state * Instructions; + + struct updatemask_state R; + + struct branchinfo * BranchStack; + unsigned int BranchStackSize; + unsigned int BranchStackReserved; + + struct loopinfo * LoopStack; + unsigned int LoopStackSize; + unsigned int LoopStackReserved; +}; + + +static void or_updatemasks( + struct updatemask_state * dst, + struct updatemask_state * a, + struct updatemask_state * b) +{ + for(unsigned int i = 0; i < RC_REGISTER_MAX_INDEX; ++i) { + dst->Output[i] = a->Output[i] | b->Output[i]; + dst->Temporary[i] = a->Temporary[i] | b->Temporary[i]; + } + + for(unsigned int i = 0; i < RC_NUM_SPECIAL_REGISTERS; ++i) + dst->Special[i] = a->Special[i] | b->Special[i]; + + dst->Address = a->Address | b->Address; +} + +static void push_break(struct deadcode_state *s) +{ + struct loopinfo * loop = &s->LoopStack[s->LoopStackSize - 1]; + memory_pool_array_reserve(&s->C->Pool, struct updatemask_state, + loop->Breaks, loop->BreakCount, loop->BreaksReserved, 1); + + memcpy(&loop->Breaks[loop->BreakCount++], &s->R, sizeof(s->R)); +} + +static void push_loop(struct deadcode_state * s) +{ + memory_pool_array_reserve(&s->C->Pool, struct loopinfo, s->LoopStack, + s->LoopStackSize, s->LoopStackReserved, 1); + memset(&s->LoopStack[s->LoopStackSize++], 0, sizeof(struct loopinfo)); +} + +static void push_branch(struct deadcode_state * s) +{ + struct branchinfo * branch; + + memory_pool_array_reserve(&s->C->Pool, struct branchinfo, s->BranchStack, + s->BranchStackSize, s->BranchStackReserved, 1); + + branch = &s->BranchStack[s->BranchStackSize++]; + branch->HaveElse = 0; + memcpy(&branch->StoreEndif, &s->R, sizeof(s->R)); +} + +static unsigned char * get_used_ptr(struct deadcode_state *s, rc_register_file file, unsigned int index) +{ + if (file == RC_FILE_OUTPUT || file == RC_FILE_TEMPORARY) { + if (index >= RC_REGISTER_MAX_INDEX) { + rc_error(s->C, "%s: index %i is out of bounds for file %i\n", __FUNCTION__, index, file); + return 0; + } + + if (file == RC_FILE_OUTPUT) + return &s->R.Output[index]; + else + return &s->R.Temporary[index]; + } else if (file == RC_FILE_ADDRESS) { + return &s->R.Address; + } else if (file == RC_FILE_SPECIAL) { + if (index >= RC_NUM_SPECIAL_REGISTERS) { + rc_error(s->C, "%s: special file index %i out of bounds\n", __FUNCTION__, index); + return 0; + } + + return &s->R.Special[index]; + } + + return 0; +} + +static void mark_used(struct deadcode_state * s, rc_register_file file, unsigned int index, unsigned int mask) +{ + unsigned char * pused = get_used_ptr(s, file, index); + if (pused) + *pused |= mask; +} + +static void update_instruction(struct deadcode_state * s, struct rc_instruction * inst) +{ + const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); + struct instruction_state * insts = &s->Instructions[inst->IP]; + unsigned int usedmask = 0; + unsigned int srcmasks[3]; + + if (opcode->HasDstReg) { + unsigned char * pused = get_used_ptr(s, inst->U.I.DstReg.File, inst->U.I.DstReg.Index); + if (pused) { + usedmask = *pused & inst->U.I.DstReg.WriteMask; + *pused &= ~usedmask; + } + } + + insts->WriteMask |= usedmask; + + if (inst->U.I.WriteALUResult) { + unsigned char * pused = get_used_ptr(s, RC_FILE_SPECIAL, RC_SPECIAL_ALU_RESULT); + if (pused && *pused) { + if (inst->U.I.WriteALUResult == RC_ALURESULT_X) + usedmask |= RC_MASK_X; + else if (inst->U.I.WriteALUResult == RC_ALURESULT_W) + usedmask |= RC_MASK_W; + + *pused = 0; + insts->WriteALUResult = 1; + } + } + + rc_compute_sources_for_writemask(inst, usedmask, srcmasks); + + for(unsigned int src = 0; src < opcode->NumSrcRegs; ++src) { + unsigned int refmask = 0; + unsigned int newsrcmask = srcmasks[src] & ~insts->SrcReg[src]; + insts->SrcReg[src] |= newsrcmask; + + for(unsigned int chan = 0; chan < 4; ++chan) { + if (GET_BIT(newsrcmask, chan)) + refmask |= 1 << GET_SWZ(inst->U.I.SrcReg[src].Swizzle, chan); + } + + /* get rid of spurious bits from ZERO, ONE, etc. swizzles */ + refmask &= RC_MASK_XYZW; + + if (!refmask) + continue; + + mark_used(s, inst->U.I.SrcReg[src].File, inst->U.I.SrcReg[src].Index, refmask); + + if (inst->U.I.SrcReg[src].RelAddr) + mark_used(s, RC_FILE_ADDRESS, 0, RC_MASK_X); + } +} + +static void mark_output_use(void * data, unsigned int index, unsigned int mask) +{ + struct deadcode_state * s = data; + + mark_used(s, RC_FILE_OUTPUT, index, mask); +} + +void rc_dataflow_deadcode(struct radeon_compiler * c, void *user) +{ + struct deadcode_state s; + unsigned int nr_instructions; + rc_dataflow_mark_outputs_fn dce = (rc_dataflow_mark_outputs_fn)user; + unsigned int ip; + + memset(&s, 0, sizeof(s)); + s.C = c; + + nr_instructions = rc_recompute_ips(c); + s.Instructions = memory_pool_malloc(&c->Pool, sizeof(struct instruction_state)*nr_instructions); + memset(s.Instructions, 0, sizeof(struct instruction_state)*nr_instructions); + + dce(c, &s, &mark_output_use); + + for(struct rc_instruction * inst = c->Program.Instructions.Prev; + inst != &c->Program.Instructions; + inst = inst->Prev) { + const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); + + switch(opcode->Opcode){ + /* Mark all sources in the loop body as used before doing + * normal deadcode analysis. This is probably not optimal. + */ + case RC_OPCODE_ENDLOOP: + { + int endloops = 1; + struct rc_instruction *ptr; + for(ptr = inst->Prev; endloops > 0; ptr = ptr->Prev){ + opcode = rc_get_opcode_info(ptr->U.I.Opcode); + if(ptr->U.I.Opcode == RC_OPCODE_BGNLOOP){ + endloops--; + continue; + } + if(ptr->U.I.Opcode == RC_OPCODE_ENDLOOP){ + endloops++; + continue; + } + if(opcode->HasDstReg){ + int src = 0; + unsigned int srcmasks[3]; + rc_compute_sources_for_writemask(ptr, + ptr->U.I.DstReg.WriteMask, srcmasks); + for(src=0; src < opcode->NumSrcRegs; src++){ + mark_used(&s, + ptr->U.I.SrcReg[src].File, + ptr->U.I.SrcReg[src].Index, + srcmasks[src]); + } + } + } + push_loop(&s); + break; + } + case RC_OPCODE_BRK: + push_break(&s); + break; + case RC_OPCODE_BGNLOOP: + { + unsigned int i; + struct loopinfo * loop = &s.LoopStack[s.LoopStackSize-1]; + for(i = 0; i < loop->BreakCount; i++) { + or_updatemasks(&s.R, &s.R, &loop->Breaks[i]); + } + break; + } + case RC_OPCODE_CONT: + break; + case RC_OPCODE_ENDIF: + push_branch(&s); + break; + default: + if (opcode->IsFlowControl && s.BranchStackSize) { + struct branchinfo * branch = &s.BranchStack[s.BranchStackSize-1]; + if (opcode->Opcode == RC_OPCODE_IF) { + or_updatemasks(&s.R, + &s.R, + branch->HaveElse ? &branch->StoreElse : &branch->StoreEndif); + + s.BranchStackSize--; + } else if (opcode->Opcode == RC_OPCODE_ELSE) { + if (branch->HaveElse) { + rc_error(c, "%s: Multiple ELSE for one IF/ENDIF\n", __FUNCTION__); + } else { + memcpy(&branch->StoreElse, &s.R, sizeof(s.R)); + memcpy(&s.R, &branch->StoreEndif, sizeof(s.R)); + branch->HaveElse = 1; + } + } else { + rc_error(c, "%s: Unhandled control flow instruction %s\n", __FUNCTION__, opcode->Name); + } + } + } + + update_instruction(&s, inst); + } + + ip = 0; + for(struct rc_instruction * inst = c->Program.Instructions.Next; + inst != &c->Program.Instructions; + inst = inst->Next, ++ip) { + const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); + int dead = 1; + unsigned int srcmasks[3]; + unsigned int usemask; + + if (!opcode->HasDstReg) { + dead = 0; + } else { + inst->U.I.DstReg.WriteMask = s.Instructions[ip].WriteMask; + if (s.Instructions[ip].WriteMask) + dead = 0; + + if (s.Instructions[ip].WriteALUResult) + dead = 0; + else + inst->U.I.WriteALUResult = RC_ALURESULT_NONE; + } + + if (dead) { + struct rc_instruction * todelete = inst; + inst = inst->Prev; + rc_remove_instruction(todelete); + continue; + } + + usemask = s.Instructions[ip].WriteMask; + + if (inst->U.I.WriteALUResult == RC_ALURESULT_X) + usemask |= RC_MASK_X; + else if (inst->U.I.WriteALUResult == RC_ALURESULT_W) + usemask |= RC_MASK_W; + + rc_compute_sources_for_writemask(inst, usemask, srcmasks); + + for(unsigned int src = 0; src < 3; ++src) { + for(unsigned int chan = 0; chan < 4; ++chan) { + if (!GET_BIT(srcmasks[src], chan)) + SET_SWZ(inst->U.I.SrcReg[src].Swizzle, chan, RC_SWIZZLE_UNUSED); + } + } + } + + rc_calculate_inputs_outputs(c); +} diff --git a/src/gallium/drivers/r300/compiler/radeon_dataflow_swizzles.c b/src/gallium/drivers/r300/compiler/radeon_dataflow_swizzles.c new file mode 100644 index 00000000000..133a9f72ec7 --- /dev/null +++ b/src/gallium/drivers/r300/compiler/radeon_dataflow_swizzles.c @@ -0,0 +1,103 @@ +/* + * Copyright (C) 2009 Nicolai Haehnle. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "radeon_dataflow.h" + +#include "radeon_compiler.h" +#include "radeon_swizzle.h" + + +static void rewrite_source(struct radeon_compiler * c, + struct rc_instruction * inst, unsigned src) +{ + struct rc_swizzle_split split; + unsigned int tempreg = rc_find_free_temporary(c); + unsigned int usemask; + + usemask = 0; + for(unsigned int chan = 0; chan < 4; ++chan) { + if (GET_SWZ(inst->U.I.SrcReg[src].Swizzle, chan) != RC_SWIZZLE_UNUSED) + usemask |= 1 << chan; + } + + c->SwizzleCaps->Split(inst->U.I.SrcReg[src], usemask, &split); + + for(unsigned int phase = 0; phase < split.NumPhases; ++phase) { + struct rc_instruction * mov = rc_insert_new_instruction(c, inst->Prev); + unsigned int phase_refmask; + unsigned int masked_negate; + + mov->U.I.Opcode = RC_OPCODE_MOV; + mov->U.I.DstReg.File = RC_FILE_TEMPORARY; + mov->U.I.DstReg.Index = tempreg; + mov->U.I.DstReg.WriteMask = split.Phase[phase]; + mov->U.I.SrcReg[0] = inst->U.I.SrcReg[src]; + mov->U.I.PreSub = inst->U.I.PreSub; + + phase_refmask = 0; + for(unsigned int chan = 0; chan < 4; ++chan) { + if (!GET_BIT(split.Phase[phase], chan)) + SET_SWZ(mov->U.I.SrcReg[0].Swizzle, chan, RC_SWIZZLE_UNUSED); + else + phase_refmask |= 1 << GET_SWZ(mov->U.I.SrcReg[0].Swizzle, chan); + } + + phase_refmask &= RC_MASK_XYZW; + + masked_negate = split.Phase[phase] & mov->U.I.SrcReg[0].Negate; + if (masked_negate == 0) + mov->U.I.SrcReg[0].Negate = 0; + else if (masked_negate == split.Phase[phase]) + mov->U.I.SrcReg[0].Negate = RC_MASK_XYZW; + + } + + inst->U.I.SrcReg[src].File = RC_FILE_TEMPORARY; + inst->U.I.SrcReg[src].Index = tempreg; + inst->U.I.SrcReg[src].Swizzle = 0; + inst->U.I.SrcReg[src].Negate = RC_MASK_NONE; + inst->U.I.SrcReg[src].Abs = 0; + for(unsigned int chan = 0; chan < 4; ++chan) { + SET_SWZ(inst->U.I.SrcReg[src].Swizzle, chan, + GET_BIT(usemask, chan) ? chan : RC_SWIZZLE_UNUSED); + } +} + +void rc_dataflow_swizzles(struct radeon_compiler * c, void *user) +{ + struct rc_instruction * inst; + + for(inst = c->Program.Instructions.Next; inst != &c->Program.Instructions; inst = inst->Next) { + const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); + unsigned int src; + + for(src = 0; src < opcode->NumSrcRegs; ++src) { + if (!c->SwizzleCaps->IsNative(inst->U.I.Opcode, inst->U.I.SrcReg[src])) + rewrite_source(c, inst, src); + } + } +} diff --git a/src/gallium/drivers/r300/compiler/radeon_emulate_branches.c b/src/gallium/drivers/r300/compiler/radeon_emulate_branches.c new file mode 100644 index 00000000000..7bede344f30 --- /dev/null +++ b/src/gallium/drivers/r300/compiler/radeon_emulate_branches.c @@ -0,0 +1,342 @@ +/* + * Copyright 2009 Nicolai Hähnle <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. */ + +#include "radeon_emulate_branches.h" + +#include <stdio.h> + +#include "radeon_compiler.h" +#include "radeon_dataflow.h" + +#define VERBOSE 0 + +#define DBG(...) do { if (VERBOSE) fprintf(stderr, __VA_ARGS__); } while(0) + + +struct proxy_info { + unsigned int Proxied:1; + unsigned int Index:RC_REGISTER_INDEX_BITS; +}; + +struct register_proxies { + struct proxy_info Temporary[RC_REGISTER_MAX_INDEX]; +}; + +struct branch_info { + struct rc_instruction * If; + struct rc_instruction * Else; +}; + +struct emulate_branch_state { + struct radeon_compiler * C; + + struct branch_info * Branches; + unsigned int BranchCount; + unsigned int BranchReserved; +}; + + +static void handle_if(struct emulate_branch_state * s, struct rc_instruction * inst) +{ + struct branch_info * branch; + struct rc_instruction * inst_mov; + + memory_pool_array_reserve(&s->C->Pool, struct branch_info, + s->Branches, s->BranchCount, s->BranchReserved, 1); + + DBG("%s\n", __FUNCTION__); + + branch = &s->Branches[s->BranchCount++]; + memset(branch, 0, sizeof(struct branch_info)); + branch->If = inst; + + /* Make a safety copy of the decision register, because we will need + * it at ENDIF time and it might be overwritten in both branches. */ + inst_mov = rc_insert_new_instruction(s->C, inst->Prev); + inst_mov->U.I.Opcode = RC_OPCODE_MOV; + inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY; + inst_mov->U.I.DstReg.Index = rc_find_free_temporary(s->C); + inst_mov->U.I.DstReg.WriteMask = RC_MASK_X; + inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[0]; + + inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; + inst->U.I.SrcReg[0].Index = inst_mov->U.I.DstReg.Index; + inst->U.I.SrcReg[0].Swizzle = 0; + inst->U.I.SrcReg[0].Abs = 0; + inst->U.I.SrcReg[0].Negate = 0; +} + +static void handle_else(struct emulate_branch_state * s, struct rc_instruction * inst) +{ + struct branch_info * branch; + + if (!s->BranchCount) { + rc_error(s->C, "Encountered ELSE outside of branches"); + return; + } + + DBG("%s\n", __FUNCTION__); + + branch = &s->Branches[s->BranchCount - 1]; + branch->Else = inst; +} + + +struct state_and_proxies { + struct emulate_branch_state * S; + struct register_proxies * Proxies; +}; + +static struct proxy_info * get_proxy_info(struct state_and_proxies * sap, + rc_register_file file, unsigned int index) +{ + if (file == RC_FILE_TEMPORARY) { + return &sap->Proxies->Temporary[index]; + } else { + return 0; + } +} + +static void scan_write(void * userdata, struct rc_instruction * inst, + rc_register_file file, unsigned int index, unsigned int comp) +{ + struct state_and_proxies * sap = userdata; + struct proxy_info * proxy = get_proxy_info(sap, file, index); + + if (proxy && !proxy->Proxied) { + proxy->Proxied = 1; + proxy->Index = rc_find_free_temporary(sap->S->C); + } +} + +static void remap_proxy_function(void * userdata, struct rc_instruction * inst, + rc_register_file * pfile, unsigned int * pindex) +{ + struct state_and_proxies * sap = userdata; + struct proxy_info * proxy = get_proxy_info(sap, *pfile, *pindex); + + if (proxy && proxy->Proxied) { + *pfile = RC_FILE_TEMPORARY; + *pindex = proxy->Index; + } +} + +/** + * Redirect all writes in the instruction range [begin, end) to proxy + * temporary registers. + */ +static void allocate_and_insert_proxies(struct emulate_branch_state * s, + struct register_proxies * proxies, + struct rc_instruction * begin, + struct rc_instruction * end) +{ + struct state_and_proxies sap; + + sap.S = s; + sap.Proxies = proxies; + + for(struct rc_instruction * inst = begin; inst != end; inst = inst->Next) { + rc_for_all_writes_mask(inst, scan_write, &sap); + rc_remap_registers(inst, remap_proxy_function, &sap); + } + + for(unsigned int index = 0; index < RC_REGISTER_MAX_INDEX; ++index) { + if (proxies->Temporary[index].Proxied) { + struct rc_instruction * inst_mov = rc_insert_new_instruction(s->C, begin->Prev); + inst_mov->U.I.Opcode = RC_OPCODE_MOV; + inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY; + inst_mov->U.I.DstReg.Index = proxies->Temporary[index].Index; + inst_mov->U.I.DstReg.WriteMask = RC_MASK_XYZW; + inst_mov->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; + inst_mov->U.I.SrcReg[0].Index = index; + } + } +} + + +static void inject_cmp(struct emulate_branch_state * s, + struct rc_instruction * inst_if, + struct rc_instruction * inst_endif, + rc_register_file file, unsigned int index, + struct proxy_info ifproxy, + struct proxy_info elseproxy) +{ + struct rc_instruction * inst_cmp = rc_insert_new_instruction(s->C, inst_endif); + inst_cmp->U.I.Opcode = RC_OPCODE_CMP; + inst_cmp->U.I.DstReg.File = file; + inst_cmp->U.I.DstReg.Index = index; + inst_cmp->U.I.DstReg.WriteMask = RC_MASK_XYZW; + inst_cmp->U.I.SrcReg[0] = inst_if->U.I.SrcReg[0]; + inst_cmp->U.I.SrcReg[0].Abs = 1; + inst_cmp->U.I.SrcReg[0].Negate = RC_MASK_XYZW; + inst_cmp->U.I.SrcReg[1].File = RC_FILE_TEMPORARY; + inst_cmp->U.I.SrcReg[1].Index = ifproxy.Proxied ? ifproxy.Index : index; + inst_cmp->U.I.SrcReg[2].File = RC_FILE_TEMPORARY; + inst_cmp->U.I.SrcReg[2].Index = elseproxy.Proxied ? elseproxy.Index : index; +} + +static void handle_endif(struct emulate_branch_state * s, struct rc_instruction * inst) +{ + struct branch_info * branch; + struct register_proxies IfProxies; + struct register_proxies ElseProxies; + + if (!s->BranchCount) { + rc_error(s->C, "Encountered ENDIF outside of branches"); + return; + } + + DBG("%s\n", __FUNCTION__); + + branch = &s->Branches[s->BranchCount - 1]; + + memset(&IfProxies, 0, sizeof(IfProxies)); + memset(&ElseProxies, 0, sizeof(ElseProxies)); + + allocate_and_insert_proxies(s, &IfProxies, branch->If->Next, branch->Else ? branch->Else : inst); + + if (branch->Else) + allocate_and_insert_proxies(s, &ElseProxies, branch->Else->Next, inst); + + /* Insert the CMP instructions at the end. */ + for(unsigned int index = 0; index < RC_REGISTER_MAX_INDEX; ++index) { + if (IfProxies.Temporary[index].Proxied || ElseProxies.Temporary[index].Proxied) { + inject_cmp(s, branch->If, inst, RC_FILE_TEMPORARY, index, + IfProxies.Temporary[index], ElseProxies.Temporary[index]); + } + } + + /* Remove all traces of the branch instructions */ + rc_remove_instruction(branch->If); + if (branch->Else) + rc_remove_instruction(branch->Else); + rc_remove_instruction(inst); + + s->BranchCount--; + + if (VERBOSE) { + DBG("Program after ENDIF handling:\n"); + rc_print_program(&s->C->Program); + } +} + + +struct remap_output_data { + unsigned int Output:RC_REGISTER_INDEX_BITS; + unsigned int Temporary:RC_REGISTER_INDEX_BITS; +}; + +static void remap_output_function(void * userdata, struct rc_instruction * inst, + rc_register_file * pfile, unsigned int * pindex) +{ + struct remap_output_data * data = userdata; + + if (*pfile == RC_FILE_OUTPUT && *pindex == data->Output) { + *pfile = RC_FILE_TEMPORARY; + *pindex = data->Temporary; + } +} + + +/** + * Output registers cannot be read from and so cannot be dealt with like + * temporary registers. + * + * We do the simplest thing: If an output registers is written within + * a branch, then *all* writes to this register are proxied to a + * temporary register, and a final MOV is appended to the end of + * the program. + */ +static void fix_output_writes(struct emulate_branch_state * s, struct rc_instruction * inst) +{ + const struct rc_opcode_info * opcode; + + if (!s->BranchCount) + return; + + opcode = rc_get_opcode_info(inst->U.I.Opcode); + + if (!opcode->HasDstReg) + return; + + if (inst->U.I.DstReg.File == RC_FILE_OUTPUT) { + struct remap_output_data remap; + struct rc_instruction * inst_mov; + + remap.Output = inst->U.I.DstReg.Index; + remap.Temporary = rc_find_free_temporary(s->C); + + for(struct rc_instruction * inst = s->C->Program.Instructions.Next; + inst != &s->C->Program.Instructions; + inst = inst->Next) { + rc_remap_registers(inst, &remap_output_function, &remap); + } + + inst_mov = rc_insert_new_instruction(s->C, s->C->Program.Instructions.Prev); + inst_mov->U.I.Opcode = RC_OPCODE_MOV; + inst_mov->U.I.DstReg.File = RC_FILE_OUTPUT; + inst_mov->U.I.DstReg.Index = remap.Output; + inst_mov->U.I.DstReg.WriteMask = RC_MASK_XYZW; + inst_mov->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; + inst_mov->U.I.SrcReg[0].Index = remap.Temporary; + } +} + +/** + * Remove branch instructions; instead, execute both branches + * on different register sets and choose between their results + * using CMP instructions in place of the original ENDIF. + */ +void rc_emulate_branches(struct radeon_compiler *c, void *user) +{ + struct emulate_branch_state s; + struct rc_instruction * ptr; + + memset(&s, 0, sizeof(s)); + s.C = c; + + /* Untypical loop because we may remove the current instruction */ + ptr = c->Program.Instructions.Next; + while(ptr != &c->Program.Instructions) { + struct rc_instruction * inst = ptr; + ptr = ptr->Next; + + if (inst->Type == RC_INSTRUCTION_NORMAL) { + switch(inst->U.I.Opcode) { + case RC_OPCODE_IF: + handle_if(&s, inst); + break; + case RC_OPCODE_ELSE: + handle_else(&s, inst); + break; + case RC_OPCODE_ENDIF: + handle_endif(&s, inst); + break; + default: + fix_output_writes(&s, inst); + break; + } + } else { + rc_error(c, "%s: unhandled instruction type\n", __FUNCTION__); + } + } +} diff --git a/src/gallium/drivers/r300/compiler/radeon_emulate_branches.h b/src/gallium/drivers/r300/compiler/radeon_emulate_branches.h new file mode 100644 index 00000000000..818ab84d0cd --- /dev/null +++ b/src/gallium/drivers/r300/compiler/radeon_emulate_branches.h @@ -0,0 +1,30 @@ +/* + * Copyright 2009 Nicolai Hähnle <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. */ + +#ifndef RADEON_EMULATE_BRANCHES_H +#define RADEON_EMULATE_BRANCHES_H + +struct radeon_compiler; + +void rc_emulate_branches(struct radeon_compiler *c, void *user); + +#endif /* RADEON_EMULATE_BRANCHES_H */ diff --git a/src/gallium/drivers/r300/compiler/radeon_emulate_loops.c b/src/gallium/drivers/r300/compiler/radeon_emulate_loops.c new file mode 100644 index 00000000000..205eecd1129 --- /dev/null +++ b/src/gallium/drivers/r300/compiler/radeon_emulate_loops.c @@ -0,0 +1,522 @@ +/* + * Copyright 2010 Tom Stellard <[email protected]> + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +/** + * \file + */ + +#include "radeon_emulate_loops.h" + +#include "radeon_compiler.h" +#include "radeon_dataflow.h" + +#define VERBOSE 0 + +#define DBG(...) do { if (VERBOSE) fprintf(stderr, __VA_ARGS__); } while(0) + +struct const_value { + struct radeon_compiler * C; + struct rc_src_register * Src; + float Value; + int HasValue; +}; + +struct count_inst { + struct radeon_compiler * C; + int Index; + rc_swizzle Swz; + float Amount; + int Unknown; +}; + +static float get_constant_value(struct radeon_compiler * c, + struct rc_src_register * src, + int chan) +{ + float base = 1.0f; + int swz = GET_SWZ(src->Swizzle, chan); + if(swz >= 4 || src->Index >= c->Program.Constants.Count ){ + rc_error(c, "get_constant_value: Can't find a value.\n"); + return 0.0f; + } + if(GET_BIT(src->Negate, chan)){ + base = -1.0f; + } + return base * + c->Program.Constants.Constants[src->Index].u.Immediate[swz]; +} + +static int src_reg_is_immediate(struct rc_src_register * src, + struct radeon_compiler * c) +{ + return src->File == RC_FILE_CONSTANT && + c->Program.Constants.Constants[src->Index].Type==RC_CONSTANT_IMMEDIATE; +} + +static unsigned int loop_max_possible_iterations(struct radeon_compiler *c, + struct loop_info * loop) +{ + unsigned int total_i = rc_recompute_ips(c); + unsigned int loop_i = (loop->EndLoop->IP - loop->BeginLoop->IP) - 1; + /* +1 because the program already has one iteration of the loop. */ + return 1 + ((c->max_alu_insts - total_i) / loop_i); +} + +static void unroll_loop(struct radeon_compiler * c, struct loop_info * loop, + unsigned int iterations) +{ + unsigned int i; + struct rc_instruction * ptr; + struct rc_instruction * first = loop->BeginLoop->Next; + struct rc_instruction * last = loop->EndLoop->Prev; + struct rc_instruction * append_to = last; + rc_remove_instruction(loop->BeginLoop); + rc_remove_instruction(loop->EndLoop); + for( i = 1; i < iterations; i++){ + for(ptr = first; ptr != last->Next; ptr = ptr->Next){ + struct rc_instruction *new = rc_alloc_instruction(c); + memcpy(new, ptr, sizeof(struct rc_instruction)); + rc_insert_instruction(append_to, new); + append_to = new; + } + } +} + + +static void update_const_value(void * data, struct rc_instruction * inst, + rc_register_file file, unsigned int index, unsigned int mask) +{ + struct const_value * value = data; + if(value->Src->File != file || + value->Src->Index != index || + !(1 << GET_SWZ(value->Src->Swizzle, 0) & mask)){ + return; + } + switch(inst->U.I.Opcode){ + case RC_OPCODE_MOV: + if(!src_reg_is_immediate(&inst->U.I.SrcReg[0], value->C)){ + return; + } + value->HasValue = 1; + value->Value = + get_constant_value(value->C, &inst->U.I.SrcReg[0], 0); + break; + } +} + +static void get_incr_amount(void * data, struct rc_instruction * inst, + rc_register_file file, unsigned int index, unsigned int mask) +{ + struct count_inst * count_inst = data; + int amnt_src_index; + const struct rc_opcode_info * opcode; + float amount; + + if(file != RC_FILE_TEMPORARY || + count_inst->Index != index || + (1 << GET_SWZ(count_inst->Swz,0) != mask)){ + return; + } + /* Find the index of the counter register. */ + opcode = rc_get_opcode_info(inst->U.I.Opcode); + if(opcode->NumSrcRegs != 2){ + count_inst->Unknown = 1; + return; + } + if(inst->U.I.SrcReg[0].File == RC_FILE_TEMPORARY && + inst->U.I.SrcReg[0].Index == count_inst->Index && + inst->U.I.SrcReg[0].Swizzle == count_inst->Swz){ + amnt_src_index = 1; + } else if( inst->U.I.SrcReg[1].File == RC_FILE_TEMPORARY && + inst->U.I.SrcReg[1].Index == count_inst->Index && + inst->U.I.SrcReg[1].Swizzle == count_inst->Swz){ + amnt_src_index = 0; + } + else{ + count_inst->Unknown = 1; + return; + } + if(src_reg_is_immediate(&inst->U.I.SrcReg[amnt_src_index], + count_inst->C)){ + amount = get_constant_value(count_inst->C, + &inst->U.I.SrcReg[amnt_src_index], 0); + } + else{ + count_inst->Unknown = 1 ; + return; + } + switch(inst->U.I.Opcode){ + case RC_OPCODE_ADD: + count_inst->Amount += amount; + break; + case RC_OPCODE_SUB: + if(amnt_src_index == 0){ + count_inst->Unknown = 0; + return; + } + count_inst->Amount -= amount; + break; + default: + count_inst->Unknown = 1; + return; + } +} + +/** + * If c->max_alu_inst is -1, then all eligible loops will be unrolled regardless + * of how many iterations they have. + */ +static int try_unroll_loop(struct radeon_compiler * c, struct loop_info * loop) +{ + int end_loops; + int iterations; + struct count_inst count_inst; + float limit_value; + struct rc_src_register * counter; + struct rc_src_register * limit; + struct const_value counter_value; + struct rc_instruction * inst; + + /* Find the counter and the upper limit */ + + if(src_reg_is_immediate(&loop->Cond->U.I.SrcReg[0], c)){ + limit = &loop->Cond->U.I.SrcReg[0]; + counter = &loop->Cond->U.I.SrcReg[1]; + } + else if(src_reg_is_immediate(&loop->Cond->U.I.SrcReg[1], c)){ + limit = &loop->Cond->U.I.SrcReg[1]; + counter = &loop->Cond->U.I.SrcReg[0]; + } + else{ + DBG("No constant limit.\n"); + return 0; + } + + /* Find the initial value of the counter */ + counter_value.Src = counter; + counter_value.Value = 0.0f; + counter_value.HasValue = 0; + counter_value.C = c; + for(inst = c->Program.Instructions.Next; inst != loop->BeginLoop; + inst = inst->Next){ + rc_for_all_writes_mask(inst, update_const_value, &counter_value); + } + if(!counter_value.HasValue){ + DBG("Initial counter value cannot be determined.\n"); + return 0; + } + DBG("Initial counter value is %f\n", counter_value.Value); + /* Determine how the counter is modified each loop */ + count_inst.C = c; + count_inst.Index = counter->Index; + count_inst.Swz = counter->Swizzle; + count_inst.Amount = 0.0f; + count_inst.Unknown = 0; + end_loops = 1; + for(inst = loop->BeginLoop->Next; end_loops > 0; inst = inst->Next){ + switch(inst->U.I.Opcode){ + /* XXX In the future we might want to try to unroll nested + * loops here.*/ + case RC_OPCODE_BGNLOOP: + end_loops++; + break; + case RC_OPCODE_ENDLOOP: + loop->EndLoop = inst; + end_loops--; + break; + case RC_OPCODE_BRK: + /* Don't unroll loops if it has a BRK instruction + * other one used when testing the main conditional + * of the loop. */ + + /* Make sure we haven't entered a nested loops. */ + if(inst != loop->Brk && end_loops == 1) { + return 0; + } + break; + /* XXX Check if the counter is modified within an if statement. + */ + case RC_OPCODE_IF: + break; + default: + rc_for_all_writes_mask(inst, get_incr_amount, &count_inst); + if(count_inst.Unknown){ + return 0; + } + break; + } + } + /* Infinite loop */ + if(count_inst.Amount == 0.0f){ + return 0; + } + DBG("Counter is increased by %f each iteration.\n", count_inst.Amount); + /* Calculate the number of iterations of this loop. Keeping this + * simple, since we only support increment and decrement loops. + */ + limit_value = get_constant_value(c, limit, 0); + DBG("Limit is %f.\n", limit_value); + /* The iteration calculations are opposite of what you would expect. + * In a normal loop, if the condition is met, then loop continues, but + * with our loops, if the condition is met, the is exited. */ + switch(loop->Cond->U.I.Opcode){ + case RC_OPCODE_SGE: + case RC_OPCODE_SLE: + iterations = (int) ceilf((limit_value - counter_value.Value) / + count_inst.Amount); + break; + + case RC_OPCODE_SGT: + case RC_OPCODE_SLT: + iterations = (int) floorf((limit_value - counter_value.Value) / + count_inst.Amount) + 1; + break; + default: + return 0; + } + + if (c->max_alu_insts > 0 + && iterations > loop_max_possible_iterations(c, loop)) { + return 0; + } + + DBG("Loop will have %d iterations.\n", iterations); + + /* Prepare loop for unrolling */ + rc_remove_instruction(loop->Cond); + rc_remove_instruction(loop->If); + rc_remove_instruction(loop->Brk); + rc_remove_instruction(loop->EndIf); + + unroll_loop(c, loop, iterations); + loop->EndLoop = NULL; + return 1; +} + +/** + * @param c + * @param loop + * @param inst A pointer to a BGNLOOP instruction. + * @return 1 if all of the members of loop where set. + * @return 0 if there was an error and some members of loop are still NULL. + */ +static int build_loop_info(struct radeon_compiler * c, struct loop_info * loop, + struct rc_instruction * inst) +{ + struct rc_instruction * ptr; + + if(inst->U.I.Opcode != RC_OPCODE_BGNLOOP){ + rc_error(c, "%s: expected BGNLOOP", __FUNCTION__); + return 0; + } + + memset(loop, 0, sizeof(struct loop_info)); + + loop->BeginLoop = inst; + + for(ptr = loop->BeginLoop->Next; !loop->EndLoop; ptr = ptr->Next) { + + if (ptr == &c->Program.Instructions) { + rc_error(c, "%s: BGNLOOP without an ENDLOOOP.\n", + __FUNCTION__); + return 0; + } + + switch(ptr->U.I.Opcode){ + case RC_OPCODE_BGNLOOP: + { + /* Nested loop, skip ahead to the end. */ + unsigned int loop_depth = 1; + for(ptr = ptr->Next; ptr != &c->Program.Instructions; + ptr = ptr->Next){ + if (ptr->U.I.Opcode == RC_OPCODE_BGNLOOP) { + loop_depth++; + } else if (ptr->U.I.Opcode == RC_OPCODE_ENDLOOP) { + if (!--loop_depth) { + break; + } + } + } + if (ptr == &c->Program.Instructions) { + rc_error(c, "%s: BGNLOOP without an ENDLOOOP\n", + __FUNCTION__); + return 0; + } + break; + } + case RC_OPCODE_BRK: + if(ptr->Next->U.I.Opcode != RC_OPCODE_ENDIF + || ptr->Prev->U.I.Opcode != RC_OPCODE_IF + || loop->Brk){ + continue; + } + loop->Brk = ptr; + loop->If = ptr->Prev; + loop->EndIf = ptr->Next; + switch(loop->If->Prev->U.I.Opcode){ + case RC_OPCODE_SLT: + case RC_OPCODE_SGE: + case RC_OPCODE_SGT: + case RC_OPCODE_SLE: + case RC_OPCODE_SEQ: + case RC_OPCODE_SNE: + break; + default: + return 0; + } + loop->Cond = loop->If->Prev; + break; + + case RC_OPCODE_ENDLOOP: + loop->EndLoop = ptr; + break; + } + } + + if (loop->BeginLoop && loop->Brk && loop->If && loop->EndIf + && loop->Cond && loop->EndLoop) { + return 1; + } + return 0; +} + +/** + * This function prepares a loop to be unrolled by converting it into an if + * statement. Here is an outline of the conversion process: + * BGNLOOP; -> BGNLOOP; + * <Additional conditional code> -> <Additional conditional code> + * SGE/SLT temp[0], temp[1], temp[2]; -> SLT/SGE temp[0], temp[1], temp[2]; + * IF temp[0]; -> IF temp[0]; + * BRK; -> + * ENDIF; -> <Loop Body> + * <Loop Body> -> ENDIF; + * ENDLOOP; -> ENDLOOP + * + * @param inst A pointer to a BGNLOOP instruction. + * @return 1 for success, 0 for failure + */ +static int transform_loop(struct emulate_loop_state * s, + struct rc_instruction * inst) +{ + struct loop_info * loop; + + memory_pool_array_reserve(&s->C->Pool, struct loop_info, + s->Loops, s->LoopCount, s->LoopReserved, 1); + + loop = &s->Loops[s->LoopCount++]; + + if (!build_loop_info(s->C, loop, inst)) { + rc_error(s->C, "Failed to build loop info\n"); + return 0; + } + + if(try_unroll_loop(s->C, loop)){ + return 1; + } + + /* Reverse the conditional instruction */ + switch(loop->Cond->U.I.Opcode){ + case RC_OPCODE_SGE: + loop->Cond->U.I.Opcode = RC_OPCODE_SLT; + break; + case RC_OPCODE_SLT: + loop->Cond->U.I.Opcode = RC_OPCODE_SGE; + break; + case RC_OPCODE_SLE: + loop->Cond->U.I.Opcode = RC_OPCODE_SGT; + break; + case RC_OPCODE_SGT: + loop->Cond->U.I.Opcode = RC_OPCODE_SLE; + break; + case RC_OPCODE_SEQ: + loop->Cond->U.I.Opcode = RC_OPCODE_SNE; + break; + case RC_OPCODE_SNE: + loop->Cond->U.I.Opcode = RC_OPCODE_SEQ; + break; + default: + rc_error(s->C, "loop->Cond is not a conditional.\n"); + return 0; + } + + /* Prepare the loop to be emulated */ + rc_remove_instruction(loop->Brk); + rc_remove_instruction(loop->EndIf); + rc_insert_instruction(loop->EndLoop->Prev, loop->EndIf); + return 1; +} + +void rc_transform_loops(struct radeon_compiler *c, void *user) +{ + struct emulate_loop_state * s = &c->loop_state; + struct rc_instruction * ptr; + + memset(s, 0, sizeof(struct emulate_loop_state)); + s->C = c; + for(ptr = s->C->Program.Instructions.Next; + ptr != &s->C->Program.Instructions; ptr = ptr->Next) { + if(ptr->Type == RC_INSTRUCTION_NORMAL && + ptr->U.I.Opcode == RC_OPCODE_BGNLOOP){ + if (!transform_loop(s, ptr)) + return; + } + } +} + +void rc_unroll_loops(struct radeon_compiler *c, void *user) +{ + struct rc_instruction * inst; + struct loop_info loop; + + for(inst = c->Program.Instructions.Next; + inst != &c->Program.Instructions; inst = inst->Next) { + + if (inst->U.I.Opcode == RC_OPCODE_BGNLOOP) { + if (build_loop_info(c, &loop, inst)) { + try_unroll_loop(c, &loop); + } + } + } +} + +void rc_emulate_loops(struct radeon_compiler *c, void *user) +{ + struct emulate_loop_state * s = &c->loop_state; + int i; + /* Iterate backwards of the list of loops so that loops that nested + * loops are unrolled first. + */ + for( i = s->LoopCount - 1; i >= 0; i-- ){ + unsigned int iterations; + + if(!s->Loops[i].EndLoop){ + continue; + } + iterations = loop_max_possible_iterations(s->C, &s->Loops[i]); + unroll_loop(s->C, &s->Loops[i], iterations); + } +} diff --git a/src/gallium/drivers/r300/compiler/radeon_emulate_loops.h b/src/gallium/drivers/r300/compiler/radeon_emulate_loops.h new file mode 100644 index 00000000000..cd800c059d9 --- /dev/null +++ b/src/gallium/drivers/r300/compiler/radeon_emulate_loops.h @@ -0,0 +1,32 @@ + + +#ifndef RADEON_EMULATE_LOOPS_H +#define RADEON_EMULATE_LOOPS_H + +#define MAX_ITERATIONS 8 + +struct radeon_compiler; + +struct loop_info { + struct rc_instruction * BeginLoop; + struct rc_instruction * Cond; + struct rc_instruction * If; + struct rc_instruction * Brk; + struct rc_instruction * EndIf; + struct rc_instruction * EndLoop; +}; + +struct emulate_loop_state { + struct radeon_compiler * C; + struct loop_info * Loops; + unsigned int LoopCount; + unsigned int LoopReserved; +}; + +void rc_transform_loops(struct radeon_compiler *c, void *user); + +void rc_unroll_loops(struct radeon_compiler * c, void *user); + +void rc_emulate_loops(struct radeon_compiler * c, void *user); + +#endif /* RADEON_EMULATE_LOOPS_H */ diff --git a/src/gallium/drivers/r300/compiler/radeon_list.c b/src/gallium/drivers/r300/compiler/radeon_list.c new file mode 100644 index 00000000000..811c908a81a --- /dev/null +++ b/src/gallium/drivers/r300/compiler/radeon_list.c @@ -0,0 +1,90 @@ +/* + * Copyright 2011 Tom Stellard <[email protected]> + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "radeon_list.h" + +#include <stdlib.h> +#include <stdio.h> + +#include "memory_pool.h" + +struct rc_list * rc_list(struct memory_pool * pool, void * item) +{ + struct rc_list * new = memory_pool_malloc(pool, sizeof(struct rc_list)); + new->Item = item; + new->Next = NULL; + new->Prev = NULL; + + return new; +} + +void rc_list_add(struct rc_list ** list, struct rc_list * new_value) +{ + struct rc_list * temp; + + if (*list == NULL) { + *list = new_value; + return; + } + + for (temp = *list; temp->Next; temp = temp->Next); + + temp->Next = new_value; + new_value->Prev = temp; +} + +void rc_list_remove(struct rc_list ** list, struct rc_list * rm_value) +{ + if (*list == rm_value) { + *list = rm_value->Next; + return; + } + + rm_value->Prev->Next = rm_value->Next; + if (rm_value->Next) { + rm_value->Next->Prev = rm_value->Prev; + } +} + +unsigned int rc_list_count(struct rc_list * list) +{ + unsigned int count = 0; + while (list) { + count++; + list = list->Next; + } + return count; +} + +void rc_list_print(struct rc_list * list) +{ + while(list) { + fprintf(stderr, "%p->", list->Item); + list = list->Next; + } + fprintf(stderr, "\n"); +} diff --git a/src/gallium/drivers/r300/compiler/radeon_list.h b/src/gallium/drivers/r300/compiler/radeon_list.h new file mode 100644 index 00000000000..b3c8f89cc68 --- /dev/null +++ b/src/gallium/drivers/r300/compiler/radeon_list.h @@ -0,0 +1,46 @@ +/* + * Copyright 2011 Tom Stellard <[email protected]> + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef RADEON_LIST_H +#define RADEON_LIST_H + +struct memory_pool; + +struct rc_list { + void * Item; + struct rc_list * Prev; + struct rc_list * Next; +}; + +struct rc_list * rc_list(struct memory_pool * pool, void * item); +void rc_list_add(struct rc_list ** list, struct rc_list * new_value); +void rc_list_remove(struct rc_list ** list, struct rc_list * rm_value); +unsigned int rc_list_count(struct rc_list * list); +void rc_list_print(struct rc_list * list); + +#endif /* RADEON_LIST_H */ + diff --git a/src/gallium/drivers/r300/compiler/radeon_opcodes.c b/src/gallium/drivers/r300/compiler/radeon_opcodes.c new file mode 100644 index 00000000000..afd78ad79dd --- /dev/null +++ b/src/gallium/drivers/r300/compiler/radeon_opcodes.c @@ -0,0 +1,546 @@ +/* + * Copyright (C) 2009 Nicolai Haehnle. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "radeon_opcodes.h" +#include "radeon_program.h" + +#include "radeon_program_constants.h" + +struct rc_opcode_info rc_opcodes[MAX_RC_OPCODE] = { + { + .Opcode = RC_OPCODE_NOP, + .Name = "NOP" + }, + { + .Opcode = RC_OPCODE_ILLEGAL_OPCODE, + .Name = "ILLEGAL OPCODE" + }, + { + .Opcode = RC_OPCODE_ABS, + .Name = "ABS", + .NumSrcRegs = 1, + .HasDstReg = 1, + .IsComponentwise = 1 + }, + { + .Opcode = RC_OPCODE_ADD, + .Name = "ADD", + .NumSrcRegs = 2, + .HasDstReg = 1, + .IsComponentwise = 1 + }, + { + .Opcode = RC_OPCODE_ARL, + .Name = "ARL", + .NumSrcRegs = 1, + .HasDstReg = 1 + }, + { + .Opcode = RC_OPCODE_CEIL, + .Name = "CEIL", + .NumSrcRegs = 1, + .HasDstReg = 1, + .IsComponentwise = 1 + }, + { + .Opcode = RC_OPCODE_CLAMP, + .Name = "CLAMP", + .NumSrcRegs = 3, + .HasDstReg = 1, + .IsComponentwise = 1 + }, + { + .Opcode = RC_OPCODE_CMP, + .Name = "CMP", + .NumSrcRegs = 3, + .HasDstReg = 1, + .IsComponentwise = 1 + }, + { + .Opcode = RC_OPCODE_CND, + .Name = "CND", + .NumSrcRegs = 3, + .HasDstReg = 1, + .IsComponentwise = 1 + }, + { + .Opcode = RC_OPCODE_COS, + .Name = "COS", + .NumSrcRegs = 1, + .HasDstReg = 1, + .IsStandardScalar = 1 + }, + { + .Opcode = RC_OPCODE_DDX, + .Name = "DDX", + .NumSrcRegs = 2, + .HasDstReg = 1, + .IsComponentwise = 1 + }, + { + .Opcode = RC_OPCODE_DDY, + .Name = "DDY", + .NumSrcRegs = 2, + .HasDstReg = 1, + .IsComponentwise = 1 + }, + { + .Opcode = RC_OPCODE_DP2, + .Name = "DP2", + .NumSrcRegs = 2, + .HasDstReg = 1 + }, + { + .Opcode = RC_OPCODE_DP3, + .Name = "DP3", + .NumSrcRegs = 2, + .HasDstReg = 1 + }, + { + .Opcode = RC_OPCODE_DP4, + .Name = "DP4", + .NumSrcRegs = 2, + .HasDstReg = 1 + }, + { + .Opcode = RC_OPCODE_DPH, + .Name = "DPH", + .NumSrcRegs = 2, + .HasDstReg = 1 + }, + { + .Opcode = RC_OPCODE_DST, + .Name = "DST", + .NumSrcRegs = 2, + .HasDstReg = 1 + }, + { + .Opcode = RC_OPCODE_EX2, + .Name = "EX2", + .NumSrcRegs = 1, + .HasDstReg = 1, + .IsStandardScalar = 1 + }, + { + .Opcode = RC_OPCODE_EXP, + .Name = "EXP", + .NumSrcRegs = 1, + .HasDstReg = 1 + }, + { + .Opcode = RC_OPCODE_FLR, + .Name = "FLR", + .NumSrcRegs = 1, + .HasDstReg = 1, + .IsComponentwise = 1 + }, + { + .Opcode = RC_OPCODE_FRC, + .Name = "FRC", + .NumSrcRegs = 1, + .HasDstReg = 1, + .IsComponentwise = 1 + }, + { + .Opcode = RC_OPCODE_KIL, + .Name = "KIL", + .NumSrcRegs = 1 + }, + { + .Opcode = RC_OPCODE_LG2, + .Name = "LG2", + .NumSrcRegs = 1, + .HasDstReg = 1, + .IsStandardScalar = 1 + }, + { + .Opcode = RC_OPCODE_LIT, + .Name = "LIT", + .NumSrcRegs = 1, + .HasDstReg = 1 + }, + { + .Opcode = RC_OPCODE_LOG, + .Name = "LOG", + .NumSrcRegs = 1, + .HasDstReg = 1 + }, + { + .Opcode = RC_OPCODE_LRP, + .Name = "LRP", + .NumSrcRegs = 3, + .HasDstReg = 1, + .IsComponentwise = 1 + }, + { + .Opcode = RC_OPCODE_MAD, + .Name = "MAD", + .NumSrcRegs = 3, + .HasDstReg = 1, + .IsComponentwise = 1 + }, + { + .Opcode = RC_OPCODE_MAX, + .Name = "MAX", + .NumSrcRegs = 2, + .HasDstReg = 1, + .IsComponentwise = 1 + }, + { + .Opcode = RC_OPCODE_MIN, + .Name = "MIN", + .NumSrcRegs = 2, + .HasDstReg = 1, + .IsComponentwise = 1 + }, + { + .Opcode = RC_OPCODE_MOV, + .Name = "MOV", + .NumSrcRegs = 1, + .HasDstReg = 1, + .IsComponentwise = 1 + }, + { + .Opcode = RC_OPCODE_MUL, + .Name = "MUL", + .NumSrcRegs = 2, + .HasDstReg = 1, + .IsComponentwise = 1 + }, + { + .Opcode = RC_OPCODE_POW, + .Name = "POW", + .NumSrcRegs = 2, + .HasDstReg = 1, + .IsStandardScalar = 1 + }, + { + .Opcode = RC_OPCODE_RCP, + .Name = "RCP", + .NumSrcRegs = 1, + .HasDstReg = 1, + .IsStandardScalar = 1 + }, + { + .Opcode = RC_OPCODE_RSQ, + .Name = "RSQ", + .NumSrcRegs = 1, + .HasDstReg = 1, + .IsStandardScalar = 1 + }, + { + .Opcode = RC_OPCODE_SCS, + .Name = "SCS", + .NumSrcRegs = 1, + .HasDstReg = 1 + }, + { + .Opcode = RC_OPCODE_SEQ, + .Name = "SEQ", + .NumSrcRegs = 2, + .HasDstReg = 1, + .IsComponentwise = 1 + }, + { + .Opcode = RC_OPCODE_SFL, + .Name = "SFL", + .NumSrcRegs = 0, + .HasDstReg = 1, + .IsComponentwise = 1 + }, + { + .Opcode = RC_OPCODE_SGE, + .Name = "SGE", + .NumSrcRegs = 2, + .HasDstReg = 1, + .IsComponentwise = 1 + }, + { + .Opcode = RC_OPCODE_SGT, + .Name = "SGT", + .NumSrcRegs = 2, + .HasDstReg = 1, + .IsComponentwise = 1 + }, + { + .Opcode = RC_OPCODE_SIN, + .Name = "SIN", + .NumSrcRegs = 1, + .HasDstReg = 1, + .IsStandardScalar = 1 + }, + { + .Opcode = RC_OPCODE_SLE, + .Name = "SLE", + .NumSrcRegs = 2, + .HasDstReg = 1, + .IsComponentwise = 1 + }, + { + .Opcode = RC_OPCODE_SLT, + .Name = "SLT", + .NumSrcRegs = 2, + .HasDstReg = 1, + .IsComponentwise = 1 + }, + { + .Opcode = RC_OPCODE_SNE, + .Name = "SNE", + .NumSrcRegs = 2, + .HasDstReg = 1, + .IsComponentwise = 1 + }, + { + .Opcode = RC_OPCODE_SSG, + .Name = "SSG", + .NumSrcRegs = 1, + .HasDstReg = 1, + .IsComponentwise = 1 + }, + { + .Opcode = RC_OPCODE_SUB, + .Name = "SUB", + .NumSrcRegs = 2, + .HasDstReg = 1, + .IsComponentwise = 1 + }, + { + .Opcode = RC_OPCODE_SWZ, + .Name = "SWZ", + .NumSrcRegs = 1, + .HasDstReg = 1, + .IsComponentwise = 1 + }, + { + .Opcode = RC_OPCODE_XPD, + .Name = "XPD", + .NumSrcRegs = 2, + .HasDstReg = 1 + }, + { + .Opcode = RC_OPCODE_TEX, + .Name = "TEX", + .HasTexture = 1, + .NumSrcRegs = 1, + .HasDstReg = 1 + }, + { + .Opcode = RC_OPCODE_TXB, + .Name = "TXB", + .HasTexture = 1, + .NumSrcRegs = 1, + .HasDstReg = 1 + }, + { + .Opcode = RC_OPCODE_TXD, + .Name = "TXD", + .HasTexture = 1, + .NumSrcRegs = 3, + .HasDstReg = 1 + }, + { + .Opcode = RC_OPCODE_TXL, + .Name = "TXL", + .HasTexture = 1, + .NumSrcRegs = 1, + .HasDstReg = 1 + }, + { + .Opcode = RC_OPCODE_TXP, + .Name = "TXP", + .HasTexture = 1, + .NumSrcRegs = 1, + .HasDstReg = 1 + }, + { + .Opcode = RC_OPCODE_IF, + .Name = "IF", + .IsFlowControl = 1, + .NumSrcRegs = 1 + }, + { + .Opcode = RC_OPCODE_ELSE, + .Name = "ELSE", + .IsFlowControl = 1, + .NumSrcRegs = 0 + }, + { + .Opcode = RC_OPCODE_ENDIF, + .Name = "ENDIF", + .IsFlowControl = 1, + .NumSrcRegs = 0 + }, + { + .Opcode = RC_OPCODE_BGNLOOP, + .Name = "BGNLOOP", + .IsFlowControl = 1, + .NumSrcRegs = 0 + }, + { + .Opcode = RC_OPCODE_BRK, + .Name = "BRK", + .IsFlowControl = 1, + .NumSrcRegs = 0 + }, + { + .Opcode = RC_OPCODE_ENDLOOP, + .Name = "ENDLOOP", + .IsFlowControl = 1, + .NumSrcRegs = 0, + }, + { + .Opcode = RC_OPCODE_CONT, + .Name = "CONT", + .IsFlowControl = 1, + .NumSrcRegs = 0 + }, + { + .Opcode = RC_OPCODE_REPL_ALPHA, + .Name = "REPL_ALPHA", + .HasDstReg = 1 + }, + { + .Opcode = RC_OPCODE_BEGIN_TEX, + .Name = "BEGIN_TEX" + }, + { + .Opcode = RC_OPCODE_KILP, + .Name = "KILP", + } +}; + +void rc_compute_sources_for_writemask( + const struct rc_instruction *inst, + unsigned int writemask, + unsigned int *srcmasks) +{ + const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); + srcmasks[0] = 0; + srcmasks[1] = 0; + srcmasks[2] = 0; + + if (opcode->Opcode == RC_OPCODE_KIL) + srcmasks[0] |= RC_MASK_XYZW; + else if (opcode->Opcode == RC_OPCODE_IF) + srcmasks[0] |= RC_MASK_X; + + if (!writemask) + return; + + if (opcode->IsComponentwise) { + for(unsigned int src = 0; src < opcode->NumSrcRegs; ++src) + srcmasks[src] |= writemask; + } else if (opcode->IsStandardScalar) { + for(unsigned int src = 0; src < opcode->NumSrcRegs; ++src) + srcmasks[src] |= RC_MASK_X; + } else { + switch(opcode->Opcode) { + case RC_OPCODE_ARL: + srcmasks[0] |= RC_MASK_X; + break; + case RC_OPCODE_DP2: + srcmasks[0] |= RC_MASK_XY; + srcmasks[1] |= RC_MASK_XY; + break; + case RC_OPCODE_DP3: + case RC_OPCODE_XPD: + srcmasks[0] |= RC_MASK_XYZ; + srcmasks[1] |= RC_MASK_XYZ; + break; + case RC_OPCODE_DP4: + srcmasks[0] |= RC_MASK_XYZW; + srcmasks[1] |= RC_MASK_XYZW; + break; + case RC_OPCODE_DPH: + srcmasks[0] |= RC_MASK_XYZ; + srcmasks[1] |= RC_MASK_XYZW; + break; + case RC_OPCODE_TXB: + case RC_OPCODE_TXP: + case RC_OPCODE_TXL: + srcmasks[0] |= RC_MASK_W; + /* Fall through */ + case RC_OPCODE_TEX: + switch (inst->U.I.TexSrcTarget) { + case RC_TEXTURE_1D: + srcmasks[0] |= RC_MASK_X; + break; + case RC_TEXTURE_2D: + case RC_TEXTURE_RECT: + case RC_TEXTURE_1D_ARRAY: + srcmasks[0] |= RC_MASK_XY; + break; + case RC_TEXTURE_3D: + case RC_TEXTURE_CUBE: + case RC_TEXTURE_2D_ARRAY: + srcmasks[0] |= RC_MASK_XYZ; + break; + } + break; + case RC_OPCODE_TXD: + switch (inst->U.I.TexSrcTarget) { + case RC_TEXTURE_1D_ARRAY: + srcmasks[0] |= RC_MASK_Y; + /* Fall through. */ + case RC_TEXTURE_1D: + srcmasks[0] |= RC_MASK_X; + srcmasks[1] |= RC_MASK_X; + srcmasks[2] |= RC_MASK_X; + break; + case RC_TEXTURE_2D_ARRAY: + srcmasks[0] |= RC_MASK_Z; + /* Fall through. */ + case RC_TEXTURE_2D: + case RC_TEXTURE_RECT: + srcmasks[0] |= RC_MASK_XY; + srcmasks[1] |= RC_MASK_XY; + srcmasks[2] |= RC_MASK_XY; + break; + case RC_TEXTURE_3D: + case RC_TEXTURE_CUBE: + srcmasks[0] |= RC_MASK_XYZ; + srcmasks[1] |= RC_MASK_XYZ; + srcmasks[2] |= RC_MASK_XYZ; + break; + } + break; + case RC_OPCODE_DST: + srcmasks[0] |= RC_MASK_Y | RC_MASK_Z; + srcmasks[1] |= RC_MASK_Y | RC_MASK_W; + break; + case RC_OPCODE_EXP: + case RC_OPCODE_LOG: + srcmasks[0] |= RC_MASK_XY; + break; + case RC_OPCODE_LIT: + srcmasks[0] |= RC_MASK_X | RC_MASK_Y | RC_MASK_W; + break; + default: + break; + } + } +} diff --git a/src/gallium/drivers/r300/compiler/radeon_opcodes.h b/src/gallium/drivers/r300/compiler/radeon_opcodes.h new file mode 100644 index 00000000000..b5868820611 --- /dev/null +++ b/src/gallium/drivers/r300/compiler/radeon_opcodes.h @@ -0,0 +1,263 @@ +/* + * Copyright (C) 2009 Nicolai Haehnle. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef RADEON_OPCODES_H +#define RADEON_OPCODES_H + +#include <assert.h> + +/** + * Opcodes understood by the Radeon compiler. + */ +typedef enum { + RC_OPCODE_NOP = 0, + RC_OPCODE_ILLEGAL_OPCODE, + + /** vec4 instruction: dst.c = abs(src0.c); */ + RC_OPCODE_ABS, + + /** vec4 instruction: dst.c = src0.c + src1.c; */ + RC_OPCODE_ADD, + + /** special instruction: load address register + * dst.x = floor(src.x), where dst must be an address register */ + RC_OPCODE_ARL, + + /** vec4 instruction: dst.c = ceil(src0.c) */ + RC_OPCODE_CEIL, + + /** vec4 instruction: dst.c = clamp(src0.c, src1.c, src2.c) */ + RC_OPCODE_CLAMP, + + /** vec4 instruction: dst.c = src0.c < 0.0 ? src1.c : src2.c */ + RC_OPCODE_CMP, + + /** vec4 instruction: dst.c = src2.c > 0.5 ? src0.c : src1.c */ + RC_OPCODE_CND, + + /** scalar instruction: dst = cos(src0.x) */ + RC_OPCODE_COS, + + /** special instruction: take vec4 partial derivative in X direction + * dst.c = d src0.c / dx */ + RC_OPCODE_DDX, + + /** special instruction: take vec4 partial derivative in Y direction + * dst.c = d src0.c / dy */ + RC_OPCODE_DDY, + + /** scalar instruction: dst = src0.x*src1.x + src0.y*src1.y */ + RC_OPCODE_DP2, + + /** scalar instruction: dst = src0.x*src1.x + src0.y*src1.y + src0.z*src1.z */ + RC_OPCODE_DP3, + + /** scalar instruction: dst = src0.x*src1.x + src0.y*src1.y + src0.z*src1.z + src0.w*src1.w */ + RC_OPCODE_DP4, + + /** scalar instruction: dst = src0.x*src1.x + src0.y*src1.y + src0.z*src1.z + src1.w */ + RC_OPCODE_DPH, + + /** special instruction, see ARB_fragment_program */ + RC_OPCODE_DST, + + /** scalar instruction: dst = 2**src0.x */ + RC_OPCODE_EX2, + + /** special instruction, see ARB_vertex_program */ + RC_OPCODE_EXP, + + /** vec4 instruction: dst.c = floor(src0.c) */ + RC_OPCODE_FLR, + + /** vec4 instruction: dst.c = src0.c - floor(src0.c) */ + RC_OPCODE_FRC, + + /** special instruction: stop execution if any component of src0 is negative */ + RC_OPCODE_KIL, + + /** scalar instruction: dst = log_2(src0.x) */ + RC_OPCODE_LG2, + + /** special instruction, see ARB_vertex_program */ + RC_OPCODE_LIT, + + /** special instruction, see ARB_vertex_program */ + RC_OPCODE_LOG, + + /** vec4 instruction: dst.c = src0.c*src1.c + (1 - src0.c)*src2.c */ + RC_OPCODE_LRP, + + /** vec4 instruction: dst.c = src0.c*src1.c + src2.c */ + RC_OPCODE_MAD, + + /** vec4 instruction: dst.c = max(src0.c, src1.c) */ + RC_OPCODE_MAX, + + /** vec4 instruction: dst.c = min(src0.c, src1.c) */ + RC_OPCODE_MIN, + + /** vec4 instruction: dst.c = src0.c */ + RC_OPCODE_MOV, + + /** vec4 instruction: dst.c = src0.c*src1.c */ + RC_OPCODE_MUL, + + /** scalar instruction: dst = src0.x ** src1.x */ + RC_OPCODE_POW, + + /** scalar instruction: dst = 1 / src0.x */ + RC_OPCODE_RCP, + + /** scalar instruction: dst = 1 / sqrt(src0.x) */ + RC_OPCODE_RSQ, + + /** special instruction, see ARB_fragment_program */ + RC_OPCODE_SCS, + + /** vec4 instruction: dst.c = (src0.c == src1.c) ? 1.0 : 0.0 */ + RC_OPCODE_SEQ, + + /** vec4 instruction: dst.c = 0.0 */ + RC_OPCODE_SFL, + + /** vec4 instruction: dst.c = (src0.c >= src1.c) ? 1.0 : 0.0 */ + RC_OPCODE_SGE, + + /** vec4 instruction: dst.c = (src0.c > src1.c) ? 1.0 : 0.0 */ + RC_OPCODE_SGT, + + /** scalar instruction: dst = sin(src0.x) */ + RC_OPCODE_SIN, + + /** vec4 instruction: dst.c = (src0.c <= src1.c) ? 1.0 : 0.0 */ + RC_OPCODE_SLE, + + /** vec4 instruction: dst.c = (src0.c < src1.c) ? 1.0 : 0.0 */ + RC_OPCODE_SLT, + + /** vec4 instruction: dst.c = (src0.c != src1.c) ? 1.0 : 0.0 */ + RC_OPCODE_SNE, + + /** vec4 instruction: dst.c = (src0.c < 0 ?) -1 : ((src0.c > 0) : 1 : 0) */ + RC_OPCODE_SSG, + + /** vec4 instruction: dst.c = src0.c - src1.c */ + RC_OPCODE_SUB, + + /** vec4 instruction: dst.c = src0.c */ + RC_OPCODE_SWZ, + + /** special instruction, see ARB_fragment_program */ + RC_OPCODE_XPD, + + RC_OPCODE_TEX, + RC_OPCODE_TXB, + RC_OPCODE_TXD, + RC_OPCODE_TXL, + RC_OPCODE_TXP, + + /** branch instruction: + * If src0.x != 0.0, continue with the next instruction; + * otherwise, jump to matching RC_OPCODE_ELSE or RC_OPCODE_ENDIF. + */ + RC_OPCODE_IF, + + /** branch instruction: jump to matching RC_OPCODE_ENDIF */ + RC_OPCODE_ELSE, + + /** branch instruction: has no effect */ + RC_OPCODE_ENDIF, + + RC_OPCODE_BGNLOOP, + + RC_OPCODE_BRK, + + RC_OPCODE_ENDLOOP, + + RC_OPCODE_CONT, + + /** special instruction, used in R300-R500 fragment program pair instructions + * indicates that the result of the alpha operation shall be replicated + * across all other channels */ + RC_OPCODE_REPL_ALPHA, + + /** special instruction, used in R300-R500 fragment programs + * to indicate the start of a block of texture instructions that + * can run simultaneously. */ + RC_OPCODE_BEGIN_TEX, + + /** Stop execution of the shader (GLSL discard) */ + RC_OPCODE_KILP, + + MAX_RC_OPCODE +} rc_opcode; + + +struct rc_opcode_info { + rc_opcode Opcode; + const char * Name; + + /** true if the instruction reads from a texture. + * + * \note This is false for the KIL instruction, even though KIL is + * a texture instruction from a hardware point of view. */ + unsigned int HasTexture:1; + + unsigned int NumSrcRegs:2; + unsigned int HasDstReg:1; + + /** true if this instruction affects control flow */ + unsigned int IsFlowControl:1; + + /** true if this is a vector instruction that operates on components in parallel + * without any cross-component interaction */ + unsigned int IsComponentwise:1; + + /** true if this instruction sources only its operands X components + * to compute one result which is smeared across all output channels */ + unsigned int IsStandardScalar:1; +}; + +extern struct rc_opcode_info rc_opcodes[MAX_RC_OPCODE]; + +static inline const struct rc_opcode_info * rc_get_opcode_info(rc_opcode opcode) +{ + assert((unsigned int)opcode < MAX_RC_OPCODE); + assert(rc_opcodes[opcode].Opcode == opcode); + + return &rc_opcodes[opcode]; +} + +struct rc_instruction; + +void rc_compute_sources_for_writemask( + const struct rc_instruction *inst, + unsigned int writemask, + unsigned int *srcmasks); + +#endif /* RADEON_OPCODES_H */ diff --git a/src/gallium/drivers/r300/compiler/radeon_optimize.c b/src/gallium/drivers/r300/compiler/radeon_optimize.c new file mode 100644 index 00000000000..39dcb21d4f4 --- /dev/null +++ b/src/gallium/drivers/r300/compiler/radeon_optimize.c @@ -0,0 +1,700 @@ +/* + * Copyright (C) 2009 Nicolai Haehnle. + * Copyright 2010 Tom Stellard <[email protected]> + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "radeon_dataflow.h" + +#include "radeon_compiler.h" +#include "radeon_compiler_util.h" +#include "radeon_swizzle.h" + +struct src_clobbered_reads_cb_data { + rc_register_file File; + unsigned int Index; + unsigned int Mask; + struct rc_reader_data * ReaderData; +}; + +typedef void (*rc_presub_replace_fn)(struct rc_instruction *, + struct rc_instruction *, + unsigned int); + +static struct rc_src_register chain_srcregs(struct rc_src_register outer, struct rc_src_register inner) +{ + struct rc_src_register combine; + combine.File = inner.File; + combine.Index = inner.Index; + combine.RelAddr = inner.RelAddr; + if (outer.Abs) { + combine.Abs = 1; + combine.Negate = outer.Negate; + } else { + combine.Abs = inner.Abs; + combine.Negate = swizzle_mask(outer.Swizzle, inner.Negate); + combine.Negate ^= outer.Negate; + } + combine.Swizzle = combine_swizzles(inner.Swizzle, outer.Swizzle); + return combine; +} + +static void copy_propagate_scan_read(void * data, struct rc_instruction * inst, + struct rc_src_register * src) +{ + rc_register_file file = src->File; + struct rc_reader_data * reader_data = data; + + if(!rc_inst_can_use_presub(inst, + reader_data->Writer->U.I.PreSub.Opcode, + rc_swizzle_to_writemask(src->Swizzle), + src, + &reader_data->Writer->U.I.PreSub.SrcReg[0], + &reader_data->Writer->U.I.PreSub.SrcReg[1])) { + reader_data->Abort = 1; + return; + } + + /* XXX This could probably be handled better. */ + if (file == RC_FILE_ADDRESS) { + reader_data->Abort = 1; + return; + } + + /* These instructions cannot read from the constants file. + * see radeonTransformTEX() + */ + if(reader_data->Writer->U.I.SrcReg[0].File != RC_FILE_TEMPORARY && + reader_data->Writer->U.I.SrcReg[0].File != RC_FILE_INPUT && + (inst->U.I.Opcode == RC_OPCODE_TEX || + inst->U.I.Opcode == RC_OPCODE_TXB || + inst->U.I.Opcode == RC_OPCODE_TXP || + inst->U.I.Opcode == RC_OPCODE_TXD || + inst->U.I.Opcode == RC_OPCODE_TXL || + inst->U.I.Opcode == RC_OPCODE_KIL)){ + reader_data->Abort = 1; + return; + } +} + +static void src_clobbered_reads_cb( + void * data, + struct rc_instruction * inst, + struct rc_src_register * src) +{ + struct src_clobbered_reads_cb_data * sc_data = data; + + if (src->File == sc_data->File + && src->Index == sc_data->Index + && (rc_swizzle_to_writemask(src->Swizzle) & sc_data->Mask)) { + + sc_data->ReaderData->AbortOnRead = RC_MASK_XYZW; + } + + if (src->RelAddr && sc_data->File == RC_FILE_ADDRESS) { + sc_data->ReaderData->AbortOnRead = RC_MASK_XYZW; + } +} + +static void is_src_clobbered_scan_write( + void * data, + struct rc_instruction * inst, + rc_register_file file, + unsigned int index, + unsigned int mask) +{ + struct src_clobbered_reads_cb_data sc_data; + struct rc_reader_data * reader_data = data; + sc_data.File = file; + sc_data.Index = index; + sc_data.Mask = mask; + sc_data.ReaderData = reader_data; + rc_for_all_reads_src(reader_data->Writer, + src_clobbered_reads_cb, &sc_data); +} + +static void copy_propagate(struct radeon_compiler * c, struct rc_instruction * inst_mov) +{ + struct rc_reader_data reader_data; + unsigned int i; + + if (inst_mov->U.I.DstReg.File != RC_FILE_TEMPORARY || + inst_mov->U.I.WriteALUResult || + inst_mov->U.I.SaturateMode) + return; + + /* Get a list of all the readers of this MOV instruction. */ + reader_data.ExitOnAbort = 1; + rc_get_readers(c, inst_mov, &reader_data, + copy_propagate_scan_read, NULL, + is_src_clobbered_scan_write); + + if (reader_data.Abort || reader_data.ReaderCount == 0) + return; + + /* Propagate the MOV instruction. */ + for (i = 0; i < reader_data.ReaderCount; i++) { + struct rc_instruction * inst = reader_data.Readers[i].Inst; + *reader_data.Readers[i].U.I.Src = chain_srcregs(*reader_data.Readers[i].U.I.Src, inst_mov->U.I.SrcReg[0]); + + if (inst_mov->U.I.SrcReg[0].File == RC_FILE_PRESUB) + inst->U.I.PreSub = inst_mov->U.I.PreSub; + } + + /* Finally, remove the original MOV instruction */ + rc_remove_instruction(inst_mov); +} + +/** + * Check if a source register is actually always the same + * swizzle constant. + */ +static int is_src_uniform_constant(struct rc_src_register src, + rc_swizzle * pswz, unsigned int * pnegate) +{ + int have_used = 0; + + if (src.File != RC_FILE_NONE) { + *pswz = 0; + return 0; + } + + for(unsigned int chan = 0; chan < 4; ++chan) { + unsigned int swz = GET_SWZ(src.Swizzle, chan); + if (swz < 4) { + *pswz = 0; + return 0; + } + if (swz == RC_SWIZZLE_UNUSED) + continue; + + if (!have_used) { + *pswz = swz; + *pnegate = GET_BIT(src.Negate, chan); + have_used = 1; + } else { + if (swz != *pswz || *pnegate != GET_BIT(src.Negate, chan)) { + *pswz = 0; + return 0; + } + } + } + + return 1; +} + +static void constant_folding_mad(struct rc_instruction * inst) +{ + rc_swizzle swz = 0; + unsigned int negate= 0; + + if (is_src_uniform_constant(inst->U.I.SrcReg[2], &swz, &negate)) { + if (swz == RC_SWIZZLE_ZERO) { + inst->U.I.Opcode = RC_OPCODE_MUL; + return; + } + } + + if (is_src_uniform_constant(inst->U.I.SrcReg[1], &swz, &negate)) { + if (swz == RC_SWIZZLE_ONE) { + inst->U.I.Opcode = RC_OPCODE_ADD; + if (negate) + inst->U.I.SrcReg[0].Negate ^= RC_MASK_XYZW; + inst->U.I.SrcReg[1] = inst->U.I.SrcReg[2]; + return; + } else if (swz == RC_SWIZZLE_ZERO) { + inst->U.I.Opcode = RC_OPCODE_MOV; + inst->U.I.SrcReg[0] = inst->U.I.SrcReg[2]; + return; + } + } + + if (is_src_uniform_constant(inst->U.I.SrcReg[0], &swz, &negate)) { + if (swz == RC_SWIZZLE_ONE) { + inst->U.I.Opcode = RC_OPCODE_ADD; + if (negate) + inst->U.I.SrcReg[1].Negate ^= RC_MASK_XYZW; + inst->U.I.SrcReg[0] = inst->U.I.SrcReg[2]; + return; + } else if (swz == RC_SWIZZLE_ZERO) { + inst->U.I.Opcode = RC_OPCODE_MOV; + inst->U.I.SrcReg[0] = inst->U.I.SrcReg[2]; + return; + } + } +} + +static void constant_folding_mul(struct rc_instruction * inst) +{ + rc_swizzle swz = 0; + unsigned int negate = 0; + + if (is_src_uniform_constant(inst->U.I.SrcReg[0], &swz, &negate)) { + if (swz == RC_SWIZZLE_ONE) { + inst->U.I.Opcode = RC_OPCODE_MOV; + inst->U.I.SrcReg[0] = inst->U.I.SrcReg[1]; + if (negate) + inst->U.I.SrcReg[0].Negate ^= RC_MASK_XYZW; + return; + } else if (swz == RC_SWIZZLE_ZERO) { + inst->U.I.Opcode = RC_OPCODE_MOV; + inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_0000; + return; + } + } + + if (is_src_uniform_constant(inst->U.I.SrcReg[1], &swz, &negate)) { + if (swz == RC_SWIZZLE_ONE) { + inst->U.I.Opcode = RC_OPCODE_MOV; + if (negate) + inst->U.I.SrcReg[0].Negate ^= RC_MASK_XYZW; + return; + } else if (swz == RC_SWIZZLE_ZERO) { + inst->U.I.Opcode = RC_OPCODE_MOV; + inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_0000; + return; + } + } +} + +static void constant_folding_add(struct rc_instruction * inst) +{ + rc_swizzle swz = 0; + unsigned int negate = 0; + + if (is_src_uniform_constant(inst->U.I.SrcReg[0], &swz, &negate)) { + if (swz == RC_SWIZZLE_ZERO) { + inst->U.I.Opcode = RC_OPCODE_MOV; + inst->U.I.SrcReg[0] = inst->U.I.SrcReg[1]; + return; + } + } + + if (is_src_uniform_constant(inst->U.I.SrcReg[1], &swz, &negate)) { + if (swz == RC_SWIZZLE_ZERO) { + inst->U.I.Opcode = RC_OPCODE_MOV; + return; + } + } +} + +/** + * Replace 0.0, 1.0 and 0.5 immediate constants by their + * respective swizzles. Simplify instructions like ADD dst, src, 0; + */ +static void constant_folding(struct radeon_compiler * c, struct rc_instruction * inst) +{ + const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); + unsigned int i; + + /* Replace 0.0, 1.0 and 0.5 immediates by their explicit swizzles */ + for(unsigned int src = 0; src < opcode->NumSrcRegs; ++src) { + struct rc_constant * constant; + struct rc_src_register newsrc; + int have_real_reference; + unsigned int chan; + + /* If there are only 0, 0.5, 1, or _ swizzles, mark the source as a constant. */ + for (chan = 0; chan < 4; ++chan) + if (GET_SWZ(inst->U.I.SrcReg[src].Swizzle, chan) <= 3) + break; + if (chan == 4) { + inst->U.I.SrcReg[src].File = RC_FILE_NONE; + continue; + } + + /* Convert immediates to swizzles. */ + if (inst->U.I.SrcReg[src].File != RC_FILE_CONSTANT || + inst->U.I.SrcReg[src].RelAddr || + inst->U.I.SrcReg[src].Index >= c->Program.Constants.Count) + continue; + + constant = + &c->Program.Constants.Constants[inst->U.I.SrcReg[src].Index]; + + if (constant->Type != RC_CONSTANT_IMMEDIATE) + continue; + + newsrc = inst->U.I.SrcReg[src]; + have_real_reference = 0; + for (chan = 0; chan < 4; ++chan) { + unsigned int swz = GET_SWZ(newsrc.Swizzle, chan); + unsigned int newswz; + float imm; + float baseimm; + + if (swz >= 4) + continue; + + imm = constant->u.Immediate[swz]; + baseimm = imm; + if (imm < 0.0) + baseimm = -baseimm; + + if (baseimm == 0.0) { + newswz = RC_SWIZZLE_ZERO; + } else if (baseimm == 1.0) { + newswz = RC_SWIZZLE_ONE; + } else if (baseimm == 0.5 && c->has_half_swizzles) { + newswz = RC_SWIZZLE_HALF; + } else { + have_real_reference = 1; + continue; + } + + SET_SWZ(newsrc.Swizzle, chan, newswz); + if (imm < 0.0 && !newsrc.Abs) + newsrc.Negate ^= 1 << chan; + } + + if (!have_real_reference) { + newsrc.File = RC_FILE_NONE; + newsrc.Index = 0; + } + + /* don't make the swizzle worse */ + if (!c->SwizzleCaps->IsNative(inst->U.I.Opcode, newsrc) && + c->SwizzleCaps->IsNative(inst->U.I.Opcode, inst->U.I.SrcReg[src])) + continue; + + inst->U.I.SrcReg[src] = newsrc; + } + + /* Simplify instructions based on constants */ + if (inst->U.I.Opcode == RC_OPCODE_MAD) + constant_folding_mad(inst); + + /* note: MAD can simplify to MUL or ADD */ + if (inst->U.I.Opcode == RC_OPCODE_MUL) + constant_folding_mul(inst); + else if (inst->U.I.Opcode == RC_OPCODE_ADD) + constant_folding_add(inst); + + /* In case this instruction has been converted, make sure all of the + * registers that are no longer used are empty. */ + opcode = rc_get_opcode_info(inst->U.I.Opcode); + for(i = opcode->NumSrcRegs; i < 3; i++) { + memset(&inst->U.I.SrcReg[i], 0, sizeof(struct rc_src_register)); + } +} + +/** + * If src and dst use the same register, this function returns a writemask that + * indicates wich components are read by src. Otherwise zero is returned. + */ +static unsigned int src_reads_dst_mask(struct rc_src_register src, + struct rc_dst_register dst) +{ + if (dst.File != src.File || dst.Index != src.Index) { + return 0; + } + return rc_swizzle_to_writemask(src.Swizzle); +} + +/* Return 1 if the source registers has a constant swizzle (e.g. 0, 0.5, 1.0) + * in any of its channels. Return 0 otherwise. */ +static int src_has_const_swz(struct rc_src_register src) { + int chan; + for(chan = 0; chan < 4; chan++) { + unsigned int swz = GET_SWZ(src.Swizzle, chan); + if (swz == RC_SWIZZLE_ZERO || swz == RC_SWIZZLE_HALF + || swz == RC_SWIZZLE_ONE) { + return 1; + } + } + return 0; +} + +static void presub_scan_read( + void * data, + struct rc_instruction * inst, + struct rc_src_register * src) +{ + struct rc_reader_data * reader_data = data; + rc_presubtract_op * presub_opcode = reader_data->CbData; + + if (!rc_inst_can_use_presub(inst, *presub_opcode, + reader_data->Writer->U.I.DstReg.WriteMask, + src, + &reader_data->Writer->U.I.SrcReg[0], + &reader_data->Writer->U.I.SrcReg[1])) { + reader_data->Abort = 1; + return; + } +} + +static int presub_helper( + struct radeon_compiler * c, + struct rc_instruction * inst_add, + rc_presubtract_op presub_opcode, + rc_presub_replace_fn presub_replace) +{ + struct rc_reader_data reader_data; + unsigned int i; + rc_presubtract_op cb_op = presub_opcode; + + reader_data.CbData = &cb_op; + reader_data.ExitOnAbort = 1; + rc_get_readers(c, inst_add, &reader_data, presub_scan_read, NULL, + is_src_clobbered_scan_write); + + if (reader_data.Abort || reader_data.ReaderCount == 0) + return 0; + + for(i = 0; i < reader_data.ReaderCount; i++) { + unsigned int src_index; + struct rc_reader reader = reader_data.Readers[i]; + const struct rc_opcode_info * info = + rc_get_opcode_info(reader.Inst->U.I.Opcode); + + for (src_index = 0; src_index < info->NumSrcRegs; src_index++) { + if (&reader.Inst->U.I.SrcReg[src_index] == reader.U.I.Src) + presub_replace(inst_add, reader.Inst, src_index); + } + } + return 1; +} + +/* This function assumes that inst_add->U.I.SrcReg[0] and + * inst_add->U.I.SrcReg[1] aren't both negative. */ +static void presub_replace_add( + struct rc_instruction * inst_add, + struct rc_instruction * inst_reader, + unsigned int src_index) +{ + rc_presubtract_op presub_opcode; + if (inst_add->U.I.SrcReg[1].Negate || inst_add->U.I.SrcReg[0].Negate) + presub_opcode = RC_PRESUB_SUB; + else + presub_opcode = RC_PRESUB_ADD; + + if (inst_add->U.I.SrcReg[1].Negate) { + inst_reader->U.I.PreSub.SrcReg[0] = inst_add->U.I.SrcReg[1]; + inst_reader->U.I.PreSub.SrcReg[1] = inst_add->U.I.SrcReg[0]; + } else { + inst_reader->U.I.PreSub.SrcReg[0] = inst_add->U.I.SrcReg[0]; + inst_reader->U.I.PreSub.SrcReg[1] = inst_add->U.I.SrcReg[1]; + } + inst_reader->U.I.PreSub.SrcReg[0].Negate = 0; + inst_reader->U.I.PreSub.SrcReg[1].Negate = 0; + inst_reader->U.I.PreSub.Opcode = presub_opcode; + inst_reader->U.I.SrcReg[src_index] = + chain_srcregs(inst_reader->U.I.SrcReg[src_index], + inst_reader->U.I.PreSub.SrcReg[0]); + inst_reader->U.I.SrcReg[src_index].File = RC_FILE_PRESUB; + inst_reader->U.I.SrcReg[src_index].Index = presub_opcode; +} + +static int is_presub_candidate( + struct radeon_compiler * c, + struct rc_instruction * inst) +{ + const struct rc_opcode_info * info = rc_get_opcode_info(inst->U.I.Opcode); + unsigned int i; + unsigned int is_constant[2] = {0, 0}; + + assert(inst->U.I.Opcode == RC_OPCODE_ADD); + + if (inst->U.I.PreSub.Opcode != RC_PRESUB_NONE + || inst->U.I.SaturateMode + || inst->U.I.WriteALUResult) { + return 0; + } + + /* If both sources use a constant swizzle, then we can't convert it to + * a presubtract operation. In fact for the ADD and SUB presubtract + * operations neither source can contain a constant swizzle. This + * specific case is checked in peephole_add_presub_add() when + * we make sure the swizzles for both sources are equal, so we + * don't need to worry about it here. */ + for (i = 0; i < 2; i++) { + int chan; + for (chan = 0; chan < 4; chan++) { + rc_swizzle swz = + get_swz(inst->U.I.SrcReg[i].Swizzle, chan); + if (swz == RC_SWIZZLE_ONE + || swz == RC_SWIZZLE_ZERO + || swz == RC_SWIZZLE_HALF) { + is_constant[i] = 1; + } + } + } + if (is_constant[0] && is_constant[1]) + return 0; + + for(i = 0; i < info->NumSrcRegs; i++) { + struct rc_src_register src = inst->U.I.SrcReg[i]; + if (src_reads_dst_mask(src, inst->U.I.DstReg)) + return 0; + + src.File = RC_FILE_PRESUB; + if (!c->SwizzleCaps->IsNative(inst->U.I.Opcode, src)) + return 0; + } + return 1; +} + +static int peephole_add_presub_add( + struct radeon_compiler * c, + struct rc_instruction * inst_add) +{ + unsigned dstmask = inst_add->U.I.DstReg.WriteMask; + unsigned src0_neg = inst_add->U.I.SrcReg[0].Negate & dstmask; + unsigned src1_neg = inst_add->U.I.SrcReg[1].Negate & dstmask; + + if (inst_add->U.I.SrcReg[0].Swizzle != inst_add->U.I.SrcReg[1].Swizzle) + return 0; + + /* src0 and src1 can't have absolute values */ + if (inst_add->U.I.SrcReg[0].Abs || inst_add->U.I.SrcReg[1].Abs) + return 0; + + /* presub_replace_add() assumes only one is negative */ + if (inst_add->U.I.SrcReg[0].Negate && inst_add->U.I.SrcReg[1].Negate) + return 0; + + /* if src0 is negative, at least all bits of dstmask have to be set */ + if (inst_add->U.I.SrcReg[0].Negate && src0_neg != dstmask) + return 0; + + /* if src1 is negative, at least all bits of dstmask have to be set */ + if (inst_add->U.I.SrcReg[1].Negate && src1_neg != dstmask) + return 0; + + if (!is_presub_candidate(c, inst_add)) + return 0; + + if (presub_helper(c, inst_add, RC_PRESUB_ADD, presub_replace_add)) { + rc_remove_instruction(inst_add); + return 1; + } + return 0; +} + +static void presub_replace_inv( + struct rc_instruction * inst_add, + struct rc_instruction * inst_reader, + unsigned int src_index) +{ + /* We must be careful not to modify inst_add, since it + * is possible it will remain part of the program.*/ + inst_reader->U.I.PreSub.SrcReg[0] = inst_add->U.I.SrcReg[1]; + inst_reader->U.I.PreSub.SrcReg[0].Negate = 0; + inst_reader->U.I.PreSub.Opcode = RC_PRESUB_INV; + inst_reader->U.I.SrcReg[src_index] = chain_srcregs(inst_reader->U.I.SrcReg[src_index], + inst_reader->U.I.PreSub.SrcReg[0]); + + inst_reader->U.I.SrcReg[src_index].File = RC_FILE_PRESUB; + inst_reader->U.I.SrcReg[src_index].Index = RC_PRESUB_INV; +} + +/** + * PRESUB_INV: ADD TEMP[0], none.1, -TEMP[1] + * Use the presubtract 1 - src0 for all readers of TEMP[0]. The first source + * of the add instruction must have the constatnt 1 swizzle. This function + * does not check const registers to see if their value is 1.0, so it should + * be called after the constant_folding optimization. + * @return + * 0 if the ADD instruction is still part of the program. + * 1 if the ADD instruction is no longer part of the program. + */ +static int peephole_add_presub_inv( + struct radeon_compiler * c, + struct rc_instruction * inst_add) +{ + unsigned int i, swz; + + if (!is_presub_candidate(c, inst_add)) + return 0; + + /* Check if src0 is 1. */ + /* XXX It would be nice to use is_src_uniform_constant here, but that + * function only works if the register's file is RC_FILE_NONE */ + for(i = 0; i < 4; i++ ) { + swz = GET_SWZ(inst_add->U.I.SrcReg[0].Swizzle, i); + if(((1 << i) & inst_add->U.I.DstReg.WriteMask) + && swz != RC_SWIZZLE_ONE) { + return 0; + } + } + + /* Check src1. */ + if ((inst_add->U.I.SrcReg[1].Negate & inst_add->U.I.DstReg.WriteMask) != + inst_add->U.I.DstReg.WriteMask + || inst_add->U.I.SrcReg[1].Abs + || (inst_add->U.I.SrcReg[1].File != RC_FILE_TEMPORARY + && inst_add->U.I.SrcReg[1].File != RC_FILE_CONSTANT) + || src_has_const_swz(inst_add->U.I.SrcReg[1])) { + + return 0; + } + + if (presub_helper(c, inst_add, RC_PRESUB_INV, presub_replace_inv)) { + rc_remove_instruction(inst_add); + return 1; + } + return 0; +} + +/** + * @return + * 0 if inst is still part of the program. + * 1 if inst is no longer part of the program. + */ +static int peephole(struct radeon_compiler * c, struct rc_instruction * inst) +{ + switch(inst->U.I.Opcode){ + case RC_OPCODE_ADD: + if (c->has_presub) { + if(peephole_add_presub_inv(c, inst)) + return 1; + if(peephole_add_presub_add(c, inst)) + return 1; + } + break; + default: + break; + } + return 0; +} + +void rc_optimize(struct radeon_compiler * c, void *user) +{ + struct rc_instruction * inst = c->Program.Instructions.Next; + while(inst != &c->Program.Instructions) { + struct rc_instruction * cur = inst; + inst = inst->Next; + + constant_folding(c, cur); + + if(peephole(c, cur)) + continue; + + if (cur->U.I.Opcode == RC_OPCODE_MOV) { + copy_propagate(c, cur); + /* cur may no longer be part of the program */ + } + } +} diff --git a/src/gallium/drivers/r300/compiler/radeon_pair_dead_sources.c b/src/gallium/drivers/r300/compiler/radeon_pair_dead_sources.c new file mode 100644 index 00000000000..1e9a2c09d44 --- /dev/null +++ b/src/gallium/drivers/r300/compiler/radeon_pair_dead_sources.c @@ -0,0 +1,62 @@ + +#include "radeon_compiler.h" +#include "radeon_compiler_util.h" +#include "radeon_opcodes.h" +#include "radeon_program_pair.h" + +static void mark_used_presub(struct rc_pair_sub_instruction * sub) +{ + if (sub->Src[RC_PAIR_PRESUB_SRC].Used) { + unsigned int presub_reg_count = rc_presubtract_src_reg_count( + sub->Src[RC_PAIR_PRESUB_SRC].Index); + unsigned int i; + for (i = 0; i < presub_reg_count; i++) { + sub->Src[i].Used = 1; + } + } +} + +static void mark_used( + struct rc_instruction * inst, + struct rc_pair_sub_instruction * sub) +{ + unsigned int i; + const struct rc_opcode_info * info = rc_get_opcode_info(sub->Opcode); + for (i = 0; i < info->NumSrcRegs; i++) { + unsigned int src_type = rc_source_type_swz(sub->Arg[i].Swizzle); + if (src_type & RC_SOURCE_RGB) { + inst->U.P.RGB.Src[sub->Arg[i].Source].Used = 1; + } + + if (src_type & RC_SOURCE_ALPHA) { + inst->U.P.Alpha.Src[sub->Arg[i].Source].Used = 1; + } + } +} + +/** + * This pass finds sources that are not used by their instruction and marks + * them as unused. + */ +void rc_pair_remove_dead_sources(struct radeon_compiler * c, void *user) +{ + struct rc_instruction * inst; + for (inst = c->Program.Instructions.Next; + inst != &c->Program.Instructions; + inst = inst->Next) { + unsigned int i; + if (inst->Type == RC_INSTRUCTION_NORMAL) + continue; + + /* Mark all sources as unused */ + for (i = 0; i < 4; i++) { + inst->U.P.RGB.Src[i].Used = 0; + inst->U.P.Alpha.Src[i].Used = 0; + } + mark_used(inst, &inst->U.P.RGB); + mark_used(inst, &inst->U.P.Alpha); + + mark_used_presub(&inst->U.P.RGB); + mark_used_presub(&inst->U.P.Alpha); + } +} diff --git a/src/gallium/drivers/r300/compiler/radeon_pair_regalloc.c b/src/gallium/drivers/r300/compiler/radeon_pair_regalloc.c new file mode 100644 index 00000000000..49983d6ce75 --- /dev/null +++ b/src/gallium/drivers/r300/compiler/radeon_pair_regalloc.c @@ -0,0 +1,706 @@ +/* + * Copyright (C) 2009 Nicolai Haehnle. + * Copyright 2011 Tom Stellard <[email protected]> + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "radeon_program_pair.h" + +#include <stdio.h> + +#include "main/glheader.h" +#include "program/register_allocate.h" +#include "ralloc.h" + +#include "r300_fragprog_swizzle.h" +#include "radeon_compiler.h" +#include "radeon_compiler_util.h" +#include "radeon_dataflow.h" +#include "radeon_list.h" +#include "radeon_variable.h" + +#define VERBOSE 0 + +#define DBG(...) do { if (VERBOSE) fprintf(stderr, __VA_ARGS__); } while(0) + + + +struct register_info { + struct live_intervals Live[4]; + + unsigned int Used:1; + unsigned int Allocated:1; + unsigned int File:3; + unsigned int Index:RC_REGISTER_INDEX_BITS; + unsigned int Writemask; +}; + +struct regalloc_state { + struct radeon_compiler * C; + + struct register_info * Input; + unsigned int NumInputs; + + struct register_info * Temporary; + unsigned int NumTemporaries; + + unsigned int Simple; + int LoopEnd; +}; + +enum rc_reg_class { + RC_REG_CLASS_SINGLE, + RC_REG_CLASS_DOUBLE, + RC_REG_CLASS_TRIPLE, + RC_REG_CLASS_ALPHA, + RC_REG_CLASS_SINGLE_PLUS_ALPHA, + RC_REG_CLASS_DOUBLE_PLUS_ALPHA, + RC_REG_CLASS_TRIPLE_PLUS_ALPHA, + RC_REG_CLASS_X, + RC_REG_CLASS_Y, + RC_REG_CLASS_Z, + RC_REG_CLASS_XY, + RC_REG_CLASS_YZ, + RC_REG_CLASS_XZ, + RC_REG_CLASS_XW, + RC_REG_CLASS_YW, + RC_REG_CLASS_ZW, + RC_REG_CLASS_XYW, + RC_REG_CLASS_YZW, + RC_REG_CLASS_XZW, + RC_REG_CLASS_COUNT +}; + +struct rc_class { + enum rc_reg_class Class; + + unsigned int WritemaskCount; + + /** This is 1 if this class is being used by the register allocator + * and 0 otherwise */ + unsigned int Used; + + /** This is the ID number assigned to this class by ra. */ + unsigned int Id; + + /** List of writemasks that belong to this class */ + unsigned int Writemasks[3]; + + +}; + +static void print_live_intervals(struct live_intervals * src) +{ + if (!src || !src->Used) { + DBG("(null)"); + return; + } + + DBG("(%i,%i)", src->Start, src->End); +} + +static int overlap_live_intervals(struct live_intervals * a, struct live_intervals * b) +{ + if (VERBOSE) { + DBG("overlap_live_intervals: "); + print_live_intervals(a); + DBG(" to "); + print_live_intervals(b); + DBG("\n"); + } + + if (!a->Used || !b->Used) { + DBG(" unused interval\n"); + return 0; + } + + if (a->Start > b->Start) { + if (a->Start < b->End) { + DBG(" overlap\n"); + return 1; + } + } else if (b->Start > a->Start) { + if (b->Start < a->End) { + DBG(" overlap\n"); + return 1; + } + } else { /* a->Start == b->Start */ + if (a->Start != a->End && b->Start != b->End) { + DBG(" overlap\n"); + return 1; + } + } + + DBG(" no overlap\n"); + + return 0; +} + +static void scan_read_callback(void * data, struct rc_instruction * inst, + rc_register_file file, unsigned int index, unsigned int mask) +{ + struct regalloc_state * s = data; + struct register_info * reg; + unsigned int i; + + if (file != RC_FILE_INPUT) + return; + + s->Input[index].Used = 1; + reg = &s->Input[index]; + + for (i = 0; i < 4; i++) { + if (!((mask >> i) & 0x1)) { + continue; + } + reg->Live[i].Used = 1; + reg->Live[i].Start = 0; + reg->Live[i].End = + s->LoopEnd > inst->IP ? s->LoopEnd : inst->IP; + } +} + +static void remap_register(void * data, struct rc_instruction * inst, + rc_register_file * file, unsigned int * index) +{ + struct regalloc_state * s = data; + const struct register_info * reg; + + if (*file == RC_FILE_TEMPORARY && s->Simple) + reg = &s->Temporary[*index]; + else if (*file == RC_FILE_INPUT) + reg = &s->Input[*index]; + else + return; + + if (reg->Allocated) { + *index = reg->Index; + } +} + +static void alloc_input_simple(void * data, unsigned int input, + unsigned int hwreg) +{ + struct regalloc_state * s = data; + + if (input >= s->NumInputs) + return; + + s->Input[input].Allocated = 1; + s->Input[input].File = RC_FILE_TEMPORARY; + s->Input[input].Index = hwreg; +} + +/* This functions offsets the temporary register indices by the number + * of input registers, because input registers are actually temporaries and + * should not occupy the same space. + * + * This pass is supposed to be used to maintain correct allocation of inputs + * if the standard register allocation is disabled. */ +static void do_regalloc_inputs_only(struct regalloc_state * s) +{ + for (unsigned i = 0; i < s->NumTemporaries; i++) { + s->Temporary[i].Allocated = 1; + s->Temporary[i].File = RC_FILE_TEMPORARY; + s->Temporary[i].Index = i + s->NumInputs; + } +} + +static unsigned int is_derivative(rc_opcode op) +{ + return (op == RC_OPCODE_DDX || op == RC_OPCODE_DDY); +} + +static int find_class( + struct rc_class * classes, + unsigned int writemask, + unsigned int max_writemask_count) +{ + unsigned int i; + for (i = 0; i < RC_REG_CLASS_COUNT; i++) { + unsigned int j; + if (classes[i].WritemaskCount > max_writemask_count) { + continue; + } + for (j = 0; j < 3; j++) { + if (classes[i].Writemasks[j] == writemask) { + return i; + } + } + } + return -1; +} + +static enum rc_reg_class variable_get_class( + struct rc_variable * variable, + struct rc_class * classes) +{ + unsigned int i; + unsigned int can_change_writemask= 1; + unsigned int writemask = rc_variable_writemask_sum(variable); + struct rc_list * readers = rc_variable_readers_union(variable); + int class_index; + + if (!variable->C->is_r500) { + struct rc_class c; + /* The assumption here is that if an instruction has type + * RC_INSTRUCTION_NORMAL then it is a TEX instruction. + * r300 and r400 can't swizzle the result of a TEX lookup. */ + if (variable->Inst->Type == RC_INSTRUCTION_NORMAL) { + writemask = RC_MASK_XYZW; + } + + /* Check if it is possible to do swizzle packing for r300/r400 + * without creating non-native swizzles. */ + class_index = find_class(classes, writemask, 3); + if (class_index < 0) { + goto error; + } + c = classes[class_index]; + for (i = 0; i < c.WritemaskCount; i++) { + int j; + unsigned int conversion_swizzle = + rc_make_conversion_swizzle( + writemask, c.Writemasks[i]); + for (j = 0; j < variable->ReaderCount; j++) { + unsigned int old_swizzle; + unsigned int new_swizzle; + struct rc_reader r = variable->Readers[j]; + if (r.Inst->Type == RC_INSTRUCTION_PAIR ) { + old_swizzle = r.U.P.Arg->Swizzle; + } else { + old_swizzle = r.U.I.Src->Swizzle; + } + new_swizzle = rc_adjust_channels( + old_swizzle, conversion_swizzle); + if (!r300_swizzle_is_native_basic(new_swizzle)) { + can_change_writemask = 0; + break; + } + } + if (!can_change_writemask) { + break; + } + } + } + + if (variable->Inst->Type == RC_INSTRUCTION_PAIR) { + /* DDX/DDY seem to always fail when their writemasks are + * changed.*/ + if (is_derivative(variable->Inst->U.P.RGB.Opcode) + || is_derivative(variable->Inst->U.P.Alpha.Opcode)) { + can_change_writemask = 0; + } + } + for ( ; readers; readers = readers->Next) { + struct rc_reader * r = readers->Item; + if (r->Inst->Type == RC_INSTRUCTION_PAIR) { + if (r->U.P.Arg->Source == RC_PAIR_PRESUB_SRC) { + can_change_writemask = 0; + break; + } + /* DDX/DDY also fail when their swizzles are changed. */ + if (is_derivative(r->Inst->U.P.RGB.Opcode) + || is_derivative(r->Inst->U.P.Alpha.Opcode)) { + can_change_writemask = 0; + break; + } + } + } + + class_index = find_class(classes, writemask, + can_change_writemask ? 3 : 1); + if (class_index > -1) { + return classes[class_index].Class; + } else { +error: + rc_error(variable->C, + "Could not find class for index=%u mask=%u\n", + variable->Dst.Index, writemask); + return 0; + } +} + +static unsigned int overlap_live_intervals_array( + struct live_intervals * a, + struct live_intervals * b) +{ + unsigned int a_chan, b_chan; + for (a_chan = 0; a_chan < 4; a_chan++) { + for (b_chan = 0; b_chan < 4; b_chan++) { + if (overlap_live_intervals(&a[a_chan], &b[b_chan])) { + return 1; + } + } + } + return 0; +} + +static unsigned int reg_get_index(int reg) +{ + return reg / RC_MASK_XYZW; +} + +static unsigned int reg_get_writemask(int reg) +{ + return (reg % RC_MASK_XYZW) + 1; +} + +static int get_reg_id(unsigned int index, unsigned int writemask) +{ + assert(writemask); + if (writemask == 0) { + return 0; + } + return (index * RC_MASK_XYZW) + (writemask - 1); +} + +#if VERBOSE +static void print_reg(int reg) +{ + unsigned int index = reg_get_index(reg); + unsigned int mask = reg_get_writemask(reg); + fprintf(stderr, "Temp[%u].%c%c%c%c", index, + mask & RC_MASK_X ? 'x' : '_', + mask & RC_MASK_Y ? 'y' : '_', + mask & RC_MASK_Z ? 'z' : '_', + mask & RC_MASK_W ? 'w' : '_'); +} +#endif + +static void add_register_conflicts( + struct ra_regs * regs, + unsigned int max_temp_regs) +{ + unsigned int index, a_mask, b_mask; + for (index = 0; index < max_temp_regs; index++) { + for(a_mask = 1; a_mask <= RC_MASK_XYZW; a_mask++) { + for (b_mask = a_mask + 1; b_mask <= RC_MASK_XYZW; + b_mask++) { + if (a_mask & b_mask) { + ra_add_reg_conflict(regs, + get_reg_id(index, a_mask), + get_reg_id(index, b_mask)); + } + } + } + } +} + +static void do_advanced_regalloc(struct regalloc_state * s) +{ + struct rc_class rc_class_list [] = { + {RC_REG_CLASS_SINGLE, 3, 0, 0, + {RC_MASK_X, + RC_MASK_Y, + RC_MASK_Z}}, + {RC_REG_CLASS_DOUBLE, 3, 0, 0, + {RC_MASK_X | RC_MASK_Y, + RC_MASK_X | RC_MASK_Z, + RC_MASK_Y | RC_MASK_Z}}, + {RC_REG_CLASS_TRIPLE, 1, 0, 0, + {RC_MASK_X | RC_MASK_Y | RC_MASK_Z, + RC_MASK_NONE, + RC_MASK_NONE}}, + {RC_REG_CLASS_ALPHA, 1, 0, 0, + {RC_MASK_W, + RC_MASK_NONE, + RC_MASK_NONE}}, + {RC_REG_CLASS_SINGLE_PLUS_ALPHA, 3, 0, 0, + {RC_MASK_X | RC_MASK_W, + RC_MASK_Y | RC_MASK_W, + RC_MASK_Z | RC_MASK_W}}, + {RC_REG_CLASS_DOUBLE_PLUS_ALPHA, 3, 0, 0, + {RC_MASK_X | RC_MASK_Y | RC_MASK_W, + RC_MASK_X | RC_MASK_Z | RC_MASK_W, + RC_MASK_Y | RC_MASK_Z | RC_MASK_W}}, + {RC_REG_CLASS_TRIPLE_PLUS_ALPHA, 1, 0, 0, + {RC_MASK_X | RC_MASK_Y | RC_MASK_Z | RC_MASK_W, + RC_MASK_NONE, + RC_MASK_NONE}}, + {RC_REG_CLASS_X, 1, 0, 0, + {RC_MASK_X, + RC_MASK_NONE, + RC_MASK_NONE}}, + {RC_REG_CLASS_Y, 1, 0, 0, + {RC_MASK_Y, + RC_MASK_NONE, + RC_MASK_NONE}}, + {RC_REG_CLASS_Z, 1, 0, 0, + {RC_MASK_Z, + RC_MASK_NONE, + RC_MASK_NONE}}, + {RC_REG_CLASS_XY, 1, 0, 0, + {RC_MASK_X | RC_MASK_Y, + RC_MASK_NONE, + RC_MASK_NONE}}, + {RC_REG_CLASS_YZ, 1, 0, 0, + {RC_MASK_Y | RC_MASK_Z, + RC_MASK_NONE, + RC_MASK_NONE}}, + {RC_REG_CLASS_XZ, 1, 0, 0, + {RC_MASK_X | RC_MASK_Z, + RC_MASK_NONE, + RC_MASK_NONE}}, + {RC_REG_CLASS_XW, 1, 0, 0, + {RC_MASK_X | RC_MASK_W, + RC_MASK_NONE, + RC_MASK_NONE}}, + {RC_REG_CLASS_YW, 1, 0, 0, + {RC_MASK_Y | RC_MASK_W, + RC_MASK_NONE, + RC_MASK_NONE}}, + {RC_REG_CLASS_ZW, 1, 0, 0, + {RC_MASK_Z | RC_MASK_W, + RC_MASK_NONE, + RC_MASK_NONE}}, + {RC_REG_CLASS_XYW, 1, 0, 0, + {RC_MASK_X | RC_MASK_Y | RC_MASK_W, + RC_MASK_NONE, + RC_MASK_NONE}}, + {RC_REG_CLASS_YZW, 1, 0, 0, + {RC_MASK_Y | RC_MASK_Z | RC_MASK_W, + RC_MASK_NONE, + RC_MASK_NONE}}, + {RC_REG_CLASS_XZW, 1, 0, 0, + {RC_MASK_X | RC_MASK_Z | RC_MASK_W, + RC_MASK_NONE, + RC_MASK_NONE}} + }; + + unsigned int i, j, index, input_node, node_count, node_index; + unsigned int * node_classes; + unsigned int * input_classes; + struct rc_instruction * inst; + struct rc_list * var_ptr; + struct rc_list * variables; + struct ra_regs * regs; + struct ra_graph * graph; + + /* Allocate the main ra data structure */ + regs = ra_alloc_reg_set(s->C->max_temp_regs * RC_MASK_XYZW); + + /* Get list of program variables */ + variables = rc_get_variables(s->C); + node_count = rc_list_count(variables); + node_classes = memory_pool_malloc(&s->C->Pool, + node_count * sizeof(unsigned int)); + input_classes = memory_pool_malloc(&s->C->Pool, + s->NumInputs * sizeof(unsigned int)); + + for (var_ptr = variables, node_index = 0; var_ptr; + var_ptr = var_ptr->Next, node_index++) { + unsigned int class_index; + /* Compute the live intervals */ + rc_variable_compute_live_intervals(var_ptr->Item); + + class_index = variable_get_class(var_ptr->Item, rc_class_list); + + /* If we haven't used this register class yet, mark it + * as used and allocate space for it. */ + if (!rc_class_list[class_index].Used) { + rc_class_list[class_index].Used = 1; + rc_class_list[class_index].Id = ra_alloc_reg_class(regs); + } + + node_classes[node_index] = rc_class_list[class_index].Id; + } + + + /* Assign registers to the classes */ + for (i = 0; i < RC_REG_CLASS_COUNT; i++) { + struct rc_class class = rc_class_list[i]; + if (!class.Used) { + continue; + } + + for (index = 0; index < s->C->max_temp_regs; index++) { + for (j = 0; j < class.WritemaskCount; j++) { + int reg_id = get_reg_id(index, + class.Writemasks[j]); + ra_class_add_reg(regs, class.Id, reg_id); + } + } + } + + /* Add register conflicts */ + add_register_conflicts(regs, s->C->max_temp_regs); + + /* Calculate live intervals for input registers */ + for (inst = s->C->Program.Instructions.Next; + inst != &s->C->Program.Instructions; + inst = inst->Next) { + rc_opcode op = rc_get_flow_control_inst(inst); + if (op == RC_OPCODE_BGNLOOP) { + struct rc_instruction * endloop = + rc_match_bgnloop(inst); + if (endloop->IP > s->LoopEnd) { + s->LoopEnd = endloop->IP; + } + } + rc_for_all_reads_mask(inst, scan_read_callback, s); + } + + /* Create classes for input registers */ + for (i = 0; i < s->NumInputs; i++) { + unsigned int chan, class_id, writemask = 0; + for (chan = 0; chan < 4; chan++) { + if (s->Input[i].Live[chan].Used) { + writemask |= (1 << chan); + } + } + s->Input[i].Writemask = writemask; + if (!writemask) { + continue; + } + + class_id = ra_alloc_reg_class(regs); + input_classes[i] = class_id; + ra_class_add_reg(regs, class_id, + get_reg_id(s->Input[i].Index, writemask)); + } + + ra_set_finalize(regs); + + graph = ra_alloc_interference_graph(regs, node_count + s->NumInputs); + + /* Build the interference graph */ + for (var_ptr = variables, node_index = 0; var_ptr; + var_ptr = var_ptr->Next,node_index++) { + struct rc_list * a, * b; + unsigned int b_index; + + ra_set_node_class(graph, node_index, node_classes[node_index]); + + for (a = var_ptr, b = var_ptr->Next, b_index = node_index + 1; + b; b = b->Next, b_index++) { + struct rc_variable * var_a = a->Item; + while (var_a) { + struct rc_variable * var_b = b->Item; + while (var_b) { + if (overlap_live_intervals_array(var_a->Live, var_b->Live)) { + ra_add_node_interference(graph, + node_index, b_index); + } + var_b = var_b->Friend; + } + var_a = var_a->Friend; + } + } + } + + /* Add input registers to the interference graph */ + for (i = 0, input_node = 0; i< s->NumInputs; i++) { + if (!s->Input[i].Writemask) { + continue; + } + ra_set_node_class(graph, node_count + input_node, + input_classes[i]); + for (var_ptr = variables, node_index = 0; + var_ptr; var_ptr = var_ptr->Next, node_index++) { + struct rc_variable * var = var_ptr->Item; + if (overlap_live_intervals_array(s->Input[i].Live, + var->Live)) { + ra_add_node_interference(graph, node_index, + node_count + input_node); + } + } + /* Manually allocate a register for this input */ + ra_set_node_reg(graph, node_count + input_node, get_reg_id( + s->Input[i].Index, s->Input[i].Writemask)); + input_node++; + } + + if (!ra_allocate_no_spills(graph)) { + rc_error(s->C, "Ran out of hardware temporaries\n"); + return; + } + + /* Rewrite the registers */ + for (var_ptr = variables, node_index = 0; var_ptr; + var_ptr = var_ptr->Next, node_index++) { + int reg = ra_get_node_reg(graph, node_index); + unsigned int writemask = reg_get_writemask(reg); + unsigned int index = reg_get_index(reg); + struct rc_variable * var = var_ptr->Item; + + if (!s->C->is_r500 && var->Inst->Type == RC_INSTRUCTION_NORMAL) { + writemask = rc_variable_writemask_sum(var); + } + + if (var->Dst.File == RC_FILE_INPUT) { + continue; + } + rc_variable_change_dst(var, index, writemask); + } + + ralloc_free(graph); + ralloc_free(regs); +} + +/** + * @param user This parameter should be a pointer to an integer value. If this + * integer value is zero, then a simple register allocator will be used that + * only allocates space for input registers (\sa do_regalloc_inputs_only). If + * user is non-zero, then the regular register allocator will be used + * (\sa do_regalloc). + */ +void rc_pair_regalloc(struct radeon_compiler *cc, void *user) +{ + struct r300_fragment_program_compiler *c = + (struct r300_fragment_program_compiler*)cc; + struct regalloc_state s; + int * do_full_regalloc = (int*)user; + + memset(&s, 0, sizeof(s)); + s.C = cc; + s.NumInputs = rc_get_max_index(cc, RC_FILE_INPUT) + 1; + s.Input = memory_pool_malloc(&cc->Pool, + s.NumInputs * sizeof(struct register_info)); + memset(s.Input, 0, s.NumInputs * sizeof(struct register_info)); + + s.NumTemporaries = rc_get_max_index(cc, RC_FILE_TEMPORARY) + 1; + s.Temporary = memory_pool_malloc(&cc->Pool, + s.NumTemporaries * sizeof(struct register_info)); + memset(s.Temporary, 0, s.NumTemporaries * sizeof(struct register_info)); + + rc_recompute_ips(s.C); + + c->AllocateHwInputs(c, &alloc_input_simple, &s); + if (*do_full_regalloc) { + do_advanced_regalloc(&s); + } else { + s.Simple = 1; + do_regalloc_inputs_only(&s); + } + + /* Rewrite inputs and if we are doing the simple allocation, rewrite + * temporaries too. */ + for (struct rc_instruction *inst = s.C->Program.Instructions.Next; + inst != &s.C->Program.Instructions; + inst = inst->Next) { + rc_remap_registers(inst, &remap_register, &s); + } +} diff --git a/src/gallium/drivers/r300/compiler/radeon_pair_schedule.c b/src/gallium/drivers/r300/compiler/radeon_pair_schedule.c new file mode 100644 index 00000000000..25cd52c9cd4 --- /dev/null +++ b/src/gallium/drivers/r300/compiler/radeon_pair_schedule.c @@ -0,0 +1,1010 @@ +/* + * Copyright (C) 2009 Nicolai Haehnle. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "radeon_program_pair.h" + +#include <stdio.h> + +#include "radeon_compiler.h" +#include "radeon_compiler_util.h" +#include "radeon_dataflow.h" + + +#define VERBOSE 0 + +#define DBG(...) do { if (VERBOSE) fprintf(stderr, __VA_ARGS__); } while(0) + +struct schedule_instruction { + struct rc_instruction * Instruction; + + /** Next instruction in the linked list of ready instructions. */ + struct schedule_instruction *NextReady; + + /** Values that this instruction reads and writes */ + struct reg_value * WriteValues[4]; + struct reg_value * ReadValues[12]; + unsigned int NumWriteValues:3; + unsigned int NumReadValues:4; + + /** + * Number of (read and write) dependencies that must be resolved before + * this instruction can be scheduled. + */ + unsigned int NumDependencies:5; + + /** List of all readers (see rc_get_readers() for the definition of + * "all readers"), even those outside the basic block this instruction + * lives in. */ + struct rc_reader_data GlobalReaders; +}; + + +/** + * Used to keep track of which instructions read a value. + */ +struct reg_value_reader { + struct schedule_instruction *Reader; + struct reg_value_reader *Next; +}; + +/** + * Used to keep track which values are stored in each component of a + * RC_FILE_TEMPORARY. + */ +struct reg_value { + struct schedule_instruction * Writer; + + /** + * Unordered linked list of instructions that read from this value. + * When this value becomes available, we increase all readers' + * dependency count. + */ + struct reg_value_reader *Readers; + + /** + * Number of readers of this value. This is decremented each time + * a reader of the value is committed. + * When the reader cound reaches zero, the dependency count + * of the instruction writing \ref Next is decremented. + */ + unsigned int NumReaders; + + struct reg_value *Next; /**< Pointer to the next value to be written to the same register */ +}; + +struct register_state { + struct reg_value * Values[4]; +}; + +struct remap_reg { + struct rc_instruciont * Inst; + unsigned int OldIndex:(RC_REGISTER_INDEX_BITS+1); + unsigned int OldSwizzle:3; + unsigned int NewIndex:(RC_REGISTER_INDEX_BITS+1); + unsigned int NewSwizzle:3; + unsigned int OnlyTexReads:1; + struct remap_reg * Next; +}; + +struct schedule_state { + struct radeon_compiler * C; + struct schedule_instruction * Current; + + struct register_state Temporary[RC_REGISTER_MAX_INDEX]; + + /** + * Linked lists of instructions that can be scheduled right now, + * based on which ALU/TEX resources they require. + */ + /*@{*/ + struct schedule_instruction *ReadyFullALU; + struct schedule_instruction *ReadyRGB; + struct schedule_instruction *ReadyAlpha; + struct schedule_instruction *ReadyTEX; + /*@}*/ +}; + +static struct reg_value ** get_reg_valuep(struct schedule_state * s, + rc_register_file file, unsigned int index, unsigned int chan) +{ + if (file != RC_FILE_TEMPORARY) + return 0; + + if (index >= RC_REGISTER_MAX_INDEX) { + rc_error(s->C, "%s: index %i out of bounds\n", __FUNCTION__, index); + return 0; + } + + return &s->Temporary[index].Values[chan]; +} + +static void add_inst_to_list(struct schedule_instruction ** list, struct schedule_instruction * inst) +{ + inst->NextReady = *list; + *list = inst; +} + +static void add_inst_to_list_end(struct schedule_instruction ** list, + struct schedule_instruction * inst) +{ + if(!*list){ + *list = inst; + }else{ + struct schedule_instruction * temp = *list; + while(temp->NextReady){ + temp = temp->NextReady; + } + temp->NextReady = inst; + } +} + +static void instruction_ready(struct schedule_state * s, struct schedule_instruction * sinst) +{ + DBG("%i is now ready\n", sinst->Instruction->IP); + + /* Adding Ready TEX instructions to the end of the "Ready List" helps + * us emit TEX instructions in blocks without losing our place. */ + if (sinst->Instruction->Type == RC_INSTRUCTION_NORMAL) + add_inst_to_list_end(&s->ReadyTEX, sinst); + else if (sinst->Instruction->U.P.Alpha.Opcode == RC_OPCODE_NOP) + add_inst_to_list(&s->ReadyRGB, sinst); + else if (sinst->Instruction->U.P.RGB.Opcode == RC_OPCODE_NOP) + add_inst_to_list(&s->ReadyAlpha, sinst); + else + add_inst_to_list(&s->ReadyFullALU, sinst); +} + +static void decrease_dependencies(struct schedule_state * s, struct schedule_instruction * sinst) +{ + assert(sinst->NumDependencies > 0); + sinst->NumDependencies--; + if (!sinst->NumDependencies) + instruction_ready(s, sinst); +} + +/** + * This function decreases the dependencies of the next instruction that + * wants to write to each of sinst's read values. + */ +static void commit_update_reads(struct schedule_state * s, + struct schedule_instruction * sinst){ + unsigned int i; + for(i = 0; i < sinst->NumReadValues; ++i) { + struct reg_value * v = sinst->ReadValues[i]; + assert(v->NumReaders > 0); + v->NumReaders--; + if (!v->NumReaders) { + if (v->Next) + decrease_dependencies(s, v->Next->Writer); + } + } +} + +static void commit_update_writes(struct schedule_state * s, + struct schedule_instruction * sinst){ + unsigned int i; + for(i = 0; i < sinst->NumWriteValues; ++i) { + struct reg_value * v = sinst->WriteValues[i]; + if (v->NumReaders) { + for(struct reg_value_reader * r = v->Readers; r; r = r->Next) { + decrease_dependencies(s, r->Reader); + } + } else { + /* This happens in instruction sequences of the type + * OP r.x, ...; + * OP r.x, r.x, ...; + * See also the subtlety in how instructions that both + * read and write the same register are scanned. + */ + if (v->Next) + decrease_dependencies(s, v->Next->Writer); + } + } +} + +static void commit_alu_instruction(struct schedule_state * s, struct schedule_instruction * sinst) +{ + DBG("%i: commit\n", sinst->Instruction->IP); + + commit_update_reads(s, sinst); + + commit_update_writes(s, sinst); +} + +/** + * Emit all ready texture instructions in a single block. + * + * Emit as a single block to (hopefully) sample many textures in parallel, + * and to avoid hardware indirections on R300. + */ +static void emit_all_tex(struct schedule_state * s, struct rc_instruction * before) +{ + struct schedule_instruction *readytex; + struct rc_instruction * inst_begin; + + assert(s->ReadyTEX); + + /* Node marker for R300 */ + inst_begin = rc_insert_new_instruction(s->C, before->Prev); + inst_begin->U.I.Opcode = RC_OPCODE_BEGIN_TEX; + + /* Link texture instructions back in */ + readytex = s->ReadyTEX; + while(readytex) { + rc_insert_instruction(before->Prev, readytex->Instruction); + DBG("%i: commit TEX reads\n", readytex->Instruction->IP); + + /* All of the TEX instructions in the same TEX block have + * their source registers read from before any of the + * instructions in that block write to their destination + * registers. This means that when we commit a TEX + * instruction, any other TEX instruction that wants to write + * to one of the committed instruction's source register can be + * marked as ready and should be emitted in the same TEX + * block. This prevents the following sequence from being + * emitted in two different TEX blocks: + * 0: TEX temp[0].xyz, temp[1].xy__, 2D[0]; + * 1: TEX temp[1].xyz, temp[2].xy__, 2D[0]; + */ + commit_update_reads(s, readytex); + readytex = readytex->NextReady; + } + readytex = s->ReadyTEX; + s->ReadyTEX = 0; + while(readytex){ + DBG("%i: commit TEX writes\n", readytex->Instruction->IP); + commit_update_writes(s, readytex); + readytex = readytex->NextReady; + } +} + +/* This is a helper function for destructive_merge_instructions(). It helps + * merge presubtract sources from two instructions and makes sure the + * presubtract sources end up in the correct spot. This function assumes that + * dst_full is an rgb instruction, meaning that it has a vector instruction(rgb) + * but no scalar instruction (alpha). + * @return 0 if merging the presubtract sources fails. + * @retrun 1 if merging the presubtract sources succeeds. + */ +static int merge_presub_sources( + struct rc_pair_instruction * dst_full, + struct rc_pair_sub_instruction src, + unsigned int type) +{ + unsigned int srcp_src, srcp_regs, is_rgb, is_alpha; + struct rc_pair_sub_instruction * dst_sub; + const struct rc_opcode_info * info; + + assert(dst_full->Alpha.Opcode == RC_OPCODE_NOP); + + switch(type) { + case RC_SOURCE_RGB: + is_rgb = 1; + is_alpha = 0; + dst_sub = &dst_full->RGB; + break; + case RC_SOURCE_ALPHA: + is_rgb = 0; + is_alpha = 1; + dst_sub = &dst_full->Alpha; + break; + default: + assert(0); + return 0; + } + + info = rc_get_opcode_info(dst_full->RGB.Opcode); + + if (dst_sub->Src[RC_PAIR_PRESUB_SRC].Used) + return 0; + + srcp_regs = rc_presubtract_src_reg_count( + src.Src[RC_PAIR_PRESUB_SRC].Index); + for(srcp_src = 0; srcp_src < srcp_regs; srcp_src++) { + unsigned int arg; + int free_source; + unsigned int one_way = 0; + struct rc_pair_instruction_source srcp = src.Src[srcp_src]; + struct rc_pair_instruction_source temp; + + free_source = rc_pair_alloc_source(dst_full, is_rgb, is_alpha, + srcp.File, srcp.Index); + + /* If free_source < 0 then there are no free source + * slots. */ + if (free_source < 0) + return 0; + + temp = dst_sub->Src[srcp_src]; + dst_sub->Src[srcp_src] = dst_sub->Src[free_source]; + + /* srcp needs src0 and src1 to be the same */ + if (free_source < srcp_src) { + if (!temp.Used) + continue; + free_source = rc_pair_alloc_source(dst_full, is_rgb, + is_alpha, temp.File, temp.Index); + if (free_source < 0) + return 0; + one_way = 1; + } else { + dst_sub->Src[free_source] = temp; + } + + /* If free_source == srcp_src, then the presubtract + * source is already in the correct place. */ + if (free_source == srcp_src) + continue; + + /* Shuffle the sources, so we can put the + * presubtract source in the correct place. */ + for(arg = 0; arg < info->NumSrcRegs; arg++) { + /*If this arg does not read from an rgb source, + * do nothing. */ + if (!(rc_source_type_swz(dst_full->RGB.Arg[arg].Swizzle) + & type)) { + continue; + } + + if (dst_full->RGB.Arg[arg].Source == srcp_src) + dst_full->RGB.Arg[arg].Source = free_source; + /* We need to do this just in case register + * is one of the sources already, but in the + * wrong spot. */ + else if(dst_full->RGB.Arg[arg].Source == free_source + && !one_way) { + dst_full->RGB.Arg[arg].Source = srcp_src; + } + } + } + return 1; +} + + +/* This function assumes that rgb.Alpha and alpha.RGB are unused */ +static int destructive_merge_instructions( + struct rc_pair_instruction * rgb, + struct rc_pair_instruction * alpha) +{ + const struct rc_opcode_info * opcode; + + assert(rgb->Alpha.Opcode == RC_OPCODE_NOP); + assert(alpha->RGB.Opcode == RC_OPCODE_NOP); + + /* Presubtract registers need to be merged first so that registers + * needed by the presubtract operation can be placed in src0 and/or + * src1. */ + + /* Merge the rgb presubtract registers. */ + if (alpha->RGB.Src[RC_PAIR_PRESUB_SRC].Used) { + if (!merge_presub_sources(rgb, alpha->RGB, RC_SOURCE_RGB)) { + return 0; + } + } + /* Merge the alpha presubtract registers */ + if (alpha->Alpha.Src[RC_PAIR_PRESUB_SRC].Used) { + if(!merge_presub_sources(rgb, alpha->Alpha, RC_SOURCE_ALPHA)){ + return 0; + } + } + + /* Copy alpha args into rgb */ + opcode = rc_get_opcode_info(alpha->Alpha.Opcode); + + for(unsigned int arg = 0; arg < opcode->NumSrcRegs; ++arg) { + unsigned int srcrgb = 0; + unsigned int srcalpha = 0; + unsigned int oldsrc = alpha->Alpha.Arg[arg].Source; + rc_register_file file = 0; + unsigned int index = 0; + int source; + + if (GET_SWZ(alpha->Alpha.Arg[arg].Swizzle, 0) < 3) { + srcrgb = 1; + file = alpha->RGB.Src[oldsrc].File; + index = alpha->RGB.Src[oldsrc].Index; + } else if (GET_SWZ(alpha->Alpha.Arg[arg].Swizzle, 0) < 4) { + srcalpha = 1; + file = alpha->Alpha.Src[oldsrc].File; + index = alpha->Alpha.Src[oldsrc].Index; + } + + source = rc_pair_alloc_source(rgb, srcrgb, srcalpha, file, index); + if (source < 0) + return 0; + + rgb->Alpha.Arg[arg].Source = source; + rgb->Alpha.Arg[arg].Swizzle = alpha->Alpha.Arg[arg].Swizzle; + rgb->Alpha.Arg[arg].Abs = alpha->Alpha.Arg[arg].Abs; + rgb->Alpha.Arg[arg].Negate = alpha->Alpha.Arg[arg].Negate; + } + + /* Copy alpha opcode into rgb */ + rgb->Alpha.Opcode = alpha->Alpha.Opcode; + rgb->Alpha.DestIndex = alpha->Alpha.DestIndex; + rgb->Alpha.WriteMask = alpha->Alpha.WriteMask; + rgb->Alpha.OutputWriteMask = alpha->Alpha.OutputWriteMask; + rgb->Alpha.DepthWriteMask = alpha->Alpha.DepthWriteMask; + rgb->Alpha.Saturate = alpha->Alpha.Saturate; + + /* Merge ALU result writing */ + if (alpha->WriteALUResult) { + if (rgb->WriteALUResult) + return 0; + + rgb->WriteALUResult = alpha->WriteALUResult; + rgb->ALUResultCompare = alpha->ALUResultCompare; + } + + return 1; +} + +/** + * Try to merge the given instructions into the rgb instructions. + * + * Return true on success; on failure, return false, and keep + * the instructions untouched. + */ +static int merge_instructions(struct rc_pair_instruction * rgb, struct rc_pair_instruction * alpha) +{ + struct rc_pair_instruction backup; + + /*Instructions can't write output registers and ALU result at the + * same time. */ + if ((rgb->WriteALUResult && alpha->Alpha.OutputWriteMask) + || (rgb->RGB.OutputWriteMask && alpha->WriteALUResult)) { + return 0; + } + memcpy(&backup, rgb, sizeof(struct rc_pair_instruction)); + + if (destructive_merge_instructions(rgb, alpha)) + return 1; + + memcpy(rgb, &backup, sizeof(struct rc_pair_instruction)); + return 0; +} + +static void presub_nop(struct rc_instruction * emitted) { + int prev_rgb_index, prev_alpha_index, i, num_src; + + /* We don't need a nop if the previous instruction is a TEX. */ + if (emitted->Prev->Type != RC_INSTRUCTION_PAIR) { + return; + } + if (emitted->Prev->U.P.RGB.WriteMask) + prev_rgb_index = emitted->Prev->U.P.RGB.DestIndex; + else + prev_rgb_index = -1; + if (emitted->Prev->U.P.Alpha.WriteMask) + prev_alpha_index = emitted->Prev->U.P.Alpha.DestIndex; + else + prev_alpha_index = 1; + + /* Check the previous rgb instruction */ + if (emitted->U.P.RGB.Src[RC_PAIR_PRESUB_SRC].Used) { + num_src = rc_presubtract_src_reg_count( + emitted->U.P.RGB.Src[RC_PAIR_PRESUB_SRC].Index); + for (i = 0; i < num_src; i++) { + unsigned int index = emitted->U.P.RGB.Src[i].Index; + if (emitted->U.P.RGB.Src[i].File == RC_FILE_TEMPORARY + && (index == prev_rgb_index + || index == prev_alpha_index)) { + emitted->Prev->U.P.Nop = 1; + return; + } + } + } + + /* Check the previous alpha instruction. */ + if (!emitted->U.P.Alpha.Src[RC_PAIR_PRESUB_SRC].Used) + return; + + num_src = rc_presubtract_src_reg_count( + emitted->U.P.Alpha.Src[RC_PAIR_PRESUB_SRC].Index); + for (i = 0; i < num_src; i++) { + unsigned int index = emitted->U.P.Alpha.Src[i].Index; + if(emitted->U.P.Alpha.Src[i].File == RC_FILE_TEMPORARY + && (index == prev_rgb_index || index == prev_alpha_index)) { + emitted->Prev->U.P.Nop = 1; + return; + } + } +} + +static void rgb_to_alpha_remap ( + struct rc_instruction * inst, + struct rc_pair_instruction_arg * arg, + rc_register_file old_file, + rc_swizzle old_swz, + unsigned int new_index) +{ + int new_src_index; + unsigned int i; + + for (i = 0; i < 3; i++) { + if (get_swz(arg->Swizzle, i) == old_swz) { + SET_SWZ(arg->Swizzle, i, RC_SWIZZLE_W); + } + } + new_src_index = rc_pair_alloc_source(&inst->U.P, 0, 1, + old_file, new_index); + /* This conversion is not possible, we must have made a mistake in + * is_rgb_to_alpha_possible. */ + if (new_src_index < 0) { + assert(0); + return; + } + + arg->Source = new_src_index; +} + +static int can_remap(unsigned int opcode) +{ + switch(opcode) { + case RC_OPCODE_DDX: + case RC_OPCODE_DDY: + return 0; + default: + return 1; + } +} + +static int can_convert_opcode_to_alpha(unsigned int opcode) +{ + switch(opcode) { + case RC_OPCODE_DDX: + case RC_OPCODE_DDY: + case RC_OPCODE_DP2: + case RC_OPCODE_DP3: + case RC_OPCODE_DP4: + case RC_OPCODE_DPH: + return 0; + default: + return 1; + } +} + +static void is_rgb_to_alpha_possible( + void * userdata, + struct rc_instruction * inst, + struct rc_pair_instruction_arg * arg, + struct rc_pair_instruction_source * src) +{ + unsigned int chan_count = 0; + unsigned int alpha_sources = 0; + unsigned int i; + struct rc_reader_data * reader_data = userdata; + + if (!can_remap(inst->U.P.RGB.Opcode) + || !can_remap(inst->U.P.Alpha.Opcode)) { + reader_data->Abort = 1; + return; + } + + if (!src) + return; + + /* XXX There are some cases where we can still do the conversion if + * a reader reads from a presubtract source, but for now we'll prevent + * it. */ + if (arg->Source == RC_PAIR_PRESUB_SRC) { + reader_data->Abort = 1; + return; + } + + /* Make sure the source only reads from one component. + * XXX We should allow the source to read from the same component twice. + * XXX If the index we will be converting to is the same as the + * current index, then it is OK to read from more than one component. + */ + for (i = 0; i < 3; i++) { + rc_swizzle swz = get_swz(arg->Swizzle, i); + switch(swz) { + case RC_SWIZZLE_X: + case RC_SWIZZLE_Y: + case RC_SWIZZLE_Z: + case RC_SWIZZLE_W: + chan_count++; + break; + default: + break; + } + } + if (chan_count > 1) { + reader_data->Abort = 1; + return; + } + + /* Make sure there are enough alpha sources. + * XXX If we know what register all the readers are going + * to be remapped to, then in some situations we can still do + * the subsitution, even if all 3 alpha sources are being used.*/ + for (i = 0; i < 3; i++) { + if (inst->U.P.Alpha.Src[i].Used) { + alpha_sources++; + } + } + if (alpha_sources > 2) { + reader_data->Abort = 1; + return; + } +} + +static int convert_rgb_to_alpha( + struct schedule_state * s, + struct schedule_instruction * sched_inst) +{ + struct rc_pair_instruction * pair_inst = &sched_inst->Instruction->U.P; + unsigned int old_mask = pair_inst->RGB.WriteMask; + unsigned int old_swz = rc_mask_to_swizzle(old_mask); + const struct rc_opcode_info * info = + rc_get_opcode_info(pair_inst->RGB.Opcode); + int new_index = -1; + unsigned int i; + + if (sched_inst->GlobalReaders.Abort) + return 0; + + if (!pair_inst->RGB.WriteMask) + return 0; + + if (!can_convert_opcode_to_alpha(pair_inst->RGB.Opcode) + || !can_convert_opcode_to_alpha(pair_inst->Alpha.Opcode)) { + return 0; + } + + assert(sched_inst->NumWriteValues == 1); + + if (!sched_inst->WriteValues[0]) { + assert(0); + return 0; + } + + /* We start at the old index, because if we can reuse the same + * register and just change the swizzle then it is more likely we + * will be able to convert all the readers. */ + for (i = pair_inst->RGB.DestIndex; i < RC_REGISTER_MAX_INDEX; i++) { + struct reg_value ** new_regvalp = get_reg_valuep( + s, RC_FILE_TEMPORARY, i, 3); + if (!*new_regvalp) { + struct reg_value ** old_regvalp = + get_reg_valuep(s, + RC_FILE_TEMPORARY, + pair_inst->RGB.DestIndex, + rc_mask_to_swizzle(old_mask)); + new_index = i; + *new_regvalp = *old_regvalp; + *old_regvalp = NULL; + new_regvalp = get_reg_valuep(s, RC_FILE_TEMPORARY, i, 3); + break; + } + } + if (new_index < 0) { + return 0; + } + + pair_inst->Alpha.Opcode = pair_inst->RGB.Opcode; + pair_inst->Alpha.DestIndex = new_index; + pair_inst->Alpha.WriteMask = RC_MASK_W; + pair_inst->Alpha.Target = pair_inst->RGB.Target; + pair_inst->Alpha.OutputWriteMask = pair_inst->RGB.OutputWriteMask; + pair_inst->Alpha.DepthWriteMask = pair_inst->RGB.DepthWriteMask; + pair_inst->Alpha.Saturate = pair_inst->RGB.Saturate; + memcpy(pair_inst->Alpha.Arg, pair_inst->RGB.Arg, + sizeof(pair_inst->Alpha.Arg)); + /* Move the swizzles into the first chan */ + for (i = 0; i < info->NumSrcRegs; i++) { + unsigned int j; + for (j = 0; j < 3; j++) { + unsigned int swz = get_swz(pair_inst->Alpha.Arg[i].Swizzle, j); + if (swz != RC_SWIZZLE_UNUSED) { + pair_inst->Alpha.Arg[i].Swizzle = + rc_init_swizzle(swz, 1); + break; + } + } + } + pair_inst->RGB.Opcode = RC_OPCODE_NOP; + pair_inst->RGB.DestIndex = 0; + pair_inst->RGB.WriteMask = 0; + pair_inst->RGB.Target = 0; + pair_inst->RGB.OutputWriteMask = 0; + pair_inst->RGB.DepthWriteMask = 0; + pair_inst->RGB.Saturate = 0; + memset(pair_inst->RGB.Arg, 0, sizeof(pair_inst->RGB.Arg)); + + for(i = 0; i < sched_inst->GlobalReaders.ReaderCount; i++) { + struct rc_reader reader = sched_inst->GlobalReaders.Readers[i]; + rgb_to_alpha_remap(reader.Inst, reader.U.P.Arg, + RC_FILE_TEMPORARY, old_swz, new_index); + } + return 1; +} + +/** + * Find a good ALU instruction or pair of ALU instruction and emit it. + * + * Prefer emitting full ALU instructions, so that when we reach a point + * where no full ALU instruction can be emitted, we have more candidates + * for RGB/Alpha pairing. + */ +static void emit_one_alu(struct schedule_state *s, struct rc_instruction * before) +{ + struct schedule_instruction * sinst; + + if (s->ReadyFullALU) { + sinst = s->ReadyFullALU; + s->ReadyFullALU = s->ReadyFullALU->NextReady; + rc_insert_instruction(before->Prev, sinst->Instruction); + commit_alu_instruction(s, sinst); + } else { + struct schedule_instruction **prgb; + struct schedule_instruction **palpha; + struct schedule_instruction *prev; +pair: + /* Some pairings might fail because they require too + * many source slots; try all possible pairings if necessary */ + for(prgb = &s->ReadyRGB; *prgb; prgb = &(*prgb)->NextReady) { + for(palpha = &s->ReadyAlpha; *palpha; palpha = &(*palpha)->NextReady) { + struct schedule_instruction * psirgb = *prgb; + struct schedule_instruction * psialpha = *palpha; + + if (!merge_instructions(&psirgb->Instruction->U.P, &psialpha->Instruction->U.P)) + continue; + + *prgb = (*prgb)->NextReady; + *palpha = (*palpha)->NextReady; + rc_insert_instruction(before->Prev, psirgb->Instruction); + commit_alu_instruction(s, psirgb); + commit_alu_instruction(s, psialpha); + goto success; + } + } + prev = NULL; + /* No success in pairing, now try to convert one of the RGB + * instructions to an Alpha so we can pair it with another RGB. + */ + if (s->ReadyRGB && s->ReadyRGB->NextReady) { + for(prgb = &s->ReadyRGB; *prgb; prgb = &(*prgb)->NextReady) { + if ((*prgb)->NumWriteValues == 1) { + struct schedule_instruction * prgb_next; + if (!convert_rgb_to_alpha(s, *prgb)) + goto cont_loop; + prgb_next = (*prgb)->NextReady; + /* Add instruction to the Alpha ready list. */ + (*prgb)->NextReady = s->ReadyAlpha; + s->ReadyAlpha = *prgb; + /* Remove instruction from the RGB ready list.*/ + if (prev) + prev->NextReady = prgb_next; + else + s->ReadyRGB = prgb_next; + goto pair; + } +cont_loop: + prev = *prgb; + } + } + /* Still no success in pairing, just take the first RGB + * or alpha instruction. */ + if (s->ReadyRGB) { + sinst = s->ReadyRGB; + s->ReadyRGB = s->ReadyRGB->NextReady; + } else if (s->ReadyAlpha) { + sinst = s->ReadyAlpha; + s->ReadyAlpha = s->ReadyAlpha->NextReady; + } else { + /*XXX Something real bad has happened. */ + assert(0); + } + + rc_insert_instruction(before->Prev, sinst->Instruction); + commit_alu_instruction(s, sinst); + success: ; + } + /* If the instruction we just emitted uses a presubtract value, and + * the presubtract sources were written by the previous intstruction, + * the previous instruction needs a nop. */ + presub_nop(before->Prev); +} + +static void scan_read(void * data, struct rc_instruction * inst, + rc_register_file file, unsigned int index, unsigned int chan) +{ + struct schedule_state * s = data; + struct reg_value ** v = get_reg_valuep(s, file, index, chan); + struct reg_value_reader * reader; + + if (!v) + return; + + if (*v && (*v)->Writer == s->Current) { + /* The instruction reads and writes to a register component. + * In this case, we only want to increment dependencies by one. + */ + return; + } + + DBG("%i: read %i[%i] chan %i\n", s->Current->Instruction->IP, file, index, chan); + + reader = memory_pool_malloc(&s->C->Pool, sizeof(*reader)); + reader->Reader = s->Current; + if (!*v) { + /* In this situation, the instruction reads from a register + * that hasn't been written to or read from in the current + * block. */ + *v = memory_pool_malloc(&s->C->Pool, sizeof(struct reg_value)); + memset(*v, 0, sizeof(struct reg_value)); + (*v)->Readers = reader; + } else { + reader->Next = (*v)->Readers; + (*v)->Readers = reader; + /* Only update the current instruction's dependencies if the + * register it reads from has been written to in this block. */ + if ((*v)->Writer) { + s->Current->NumDependencies++; + } + } + (*v)->NumReaders++; + + if (s->Current->NumReadValues >= 12) { + rc_error(s->C, "%s: NumReadValues overflow\n", __FUNCTION__); + } else { + s->Current->ReadValues[s->Current->NumReadValues++] = *v; + } +} + +static void scan_write(void * data, struct rc_instruction * inst, + rc_register_file file, unsigned int index, unsigned int chan) +{ + struct schedule_state * s = data; + struct reg_value ** pv = get_reg_valuep(s, file, index, chan); + struct reg_value * newv; + + if (!pv) + return; + + DBG("%i: write %i[%i] chan %i\n", s->Current->Instruction->IP, file, index, chan); + + newv = memory_pool_malloc(&s->C->Pool, sizeof(*newv)); + memset(newv, 0, sizeof(*newv)); + + newv->Writer = s->Current; + + if (*pv) { + (*pv)->Next = newv; + s->Current->NumDependencies++; + } + + *pv = newv; + + if (s->Current->NumWriteValues >= 4) { + rc_error(s->C, "%s: NumWriteValues overflow\n", __FUNCTION__); + } else { + s->Current->WriteValues[s->Current->NumWriteValues++] = newv; + } +} + +static void is_rgb_to_alpha_possible_normal( + void * userdata, + struct rc_instruction * inst, + struct rc_src_register * src) +{ + struct rc_reader_data * reader_data = userdata; + reader_data->Abort = 1; + +} + +static void schedule_block(struct r300_fragment_program_compiler * c, + struct rc_instruction * begin, struct rc_instruction * end) +{ + struct schedule_state s; + unsigned int ip; + + memset(&s, 0, sizeof(s)); + s.C = &c->Base; + + /* Scan instructions for data dependencies */ + ip = 0; + for(struct rc_instruction * inst = begin; inst != end; inst = inst->Next) { + s.Current = memory_pool_malloc(&c->Base.Pool, sizeof(*s.Current)); + memset(s.Current, 0, sizeof(struct schedule_instruction)); + + s.Current->Instruction = inst; + inst->IP = ip++; + + DBG("%i: Scanning\n", inst->IP); + + /* The order of things here is subtle and maybe slightly + * counter-intuitive, to account for the case where an + * instruction writes to the same register as it reads + * from. */ + rc_for_all_writes_chan(inst, &scan_write, &s); + rc_for_all_reads_chan(inst, &scan_read, &s); + + DBG("%i: Has %i dependencies\n", inst->IP, s.Current->NumDependencies); + + if (!s.Current->NumDependencies) + instruction_ready(&s, s.Current); + + /* Get global readers for possible RGB->Alpha conversion. */ + s.Current->GlobalReaders.ExitOnAbort = 1; + rc_get_readers(s.C, inst, &s.Current->GlobalReaders, + is_rgb_to_alpha_possible_normal, + is_rgb_to_alpha_possible, NULL); + } + + /* Temporarily unlink all instructions */ + begin->Prev->Next = end; + end->Prev = begin->Prev; + + /* Schedule instructions back */ + while(!s.C->Error && + (s.ReadyTEX || s.ReadyRGB || s.ReadyAlpha || s.ReadyFullALU)) { + if (s.ReadyTEX) + emit_all_tex(&s, end); + + while(!s.C->Error && (s.ReadyFullALU || s.ReadyRGB || s.ReadyAlpha)) + emit_one_alu(&s, end); + } +} + +static int is_controlflow(struct rc_instruction * inst) +{ + if (inst->Type == RC_INSTRUCTION_NORMAL) { + const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); + return opcode->IsFlowControl; + } + return 0; +} + +void rc_pair_schedule(struct radeon_compiler *cc, void *user) +{ + struct schedule_state s; + + struct r300_fragment_program_compiler *c = (struct r300_fragment_program_compiler*)cc; + struct rc_instruction * inst = c->Base.Program.Instructions.Next; + + memset(&s, 0, sizeof(s)); + s.C = &c->Base; + while(inst != &c->Base.Program.Instructions) { + struct rc_instruction * first; + + if (is_controlflow(inst)) { + inst = inst->Next; + continue; + } + + first = inst; + + while(inst != &c->Base.Program.Instructions && !is_controlflow(inst)) + inst = inst->Next; + + DBG("Schedule one block\n"); + schedule_block(c, first, inst); + } +} diff --git a/src/gallium/drivers/r300/compiler/radeon_pair_translate.c b/src/gallium/drivers/r300/compiler/radeon_pair_translate.c new file mode 100644 index 00000000000..2dae56a2428 --- /dev/null +++ b/src/gallium/drivers/r300/compiler/radeon_pair_translate.c @@ -0,0 +1,359 @@ +/* + * Copyright (C) 2009 Nicolai Haehnle. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "radeon_program_pair.h" + +#include "radeon_compiler.h" +#include "radeon_compiler_util.h" + + +/** + * Finally rewrite ADD, MOV, MUL as the appropriate native instruction + * and reverse the order of arguments for CMP. + */ +static void final_rewrite(struct rc_sub_instruction *inst) +{ + struct rc_src_register tmp; + + switch(inst->Opcode) { + case RC_OPCODE_ADD: + inst->SrcReg[2] = inst->SrcReg[1]; + inst->SrcReg[1].File = RC_FILE_NONE; + inst->SrcReg[1].Swizzle = RC_SWIZZLE_1111; + inst->SrcReg[1].Negate = RC_MASK_NONE; + inst->Opcode = RC_OPCODE_MAD; + break; + case RC_OPCODE_CMP: + tmp = inst->SrcReg[2]; + inst->SrcReg[2] = inst->SrcReg[0]; + inst->SrcReg[0] = tmp; + break; + case RC_OPCODE_MOV: + /* AMD say we should use CMP. + * However, when we transform + * KIL -r0; + * into + * CMP tmp, -r0, -r0, 0; + * KIL tmp; + * we get incorrect behaviour on R500 when r0 == 0.0. + * It appears that the R500 KIL hardware treats -0.0 as less + * than zero. + */ + inst->SrcReg[1].File = RC_FILE_NONE; + inst->SrcReg[1].Swizzle = RC_SWIZZLE_1111; + inst->SrcReg[2].File = RC_FILE_NONE; + inst->SrcReg[2].Swizzle = RC_SWIZZLE_0000; + inst->Opcode = RC_OPCODE_MAD; + break; + case RC_OPCODE_MUL: + inst->SrcReg[2].File = RC_FILE_NONE; + inst->SrcReg[2].Swizzle = RC_SWIZZLE_0000; + inst->Opcode = RC_OPCODE_MAD; + break; + default: + /* nothing to do */ + break; + } +} + + +/** + * Classify an instruction according to which ALUs etc. it needs + */ +static void classify_instruction(struct rc_sub_instruction * inst, + int * needrgb, int * needalpha, int * istranscendent) +{ + *needrgb = (inst->DstReg.WriteMask & RC_MASK_XYZ) ? 1 : 0; + *needalpha = (inst->DstReg.WriteMask & RC_MASK_W) ? 1 : 0; + *istranscendent = 0; + + if (inst->WriteALUResult == RC_ALURESULT_X) + *needrgb = 1; + else if (inst->WriteALUResult == RC_ALURESULT_W) + *needalpha = 1; + + switch(inst->Opcode) { + case RC_OPCODE_ADD: + case RC_OPCODE_CMP: + case RC_OPCODE_CND: + case RC_OPCODE_DDX: + case RC_OPCODE_DDY: + case RC_OPCODE_FRC: + case RC_OPCODE_MAD: + case RC_OPCODE_MAX: + case RC_OPCODE_MIN: + case RC_OPCODE_MOV: + case RC_OPCODE_MUL: + break; + case RC_OPCODE_COS: + case RC_OPCODE_EX2: + case RC_OPCODE_LG2: + case RC_OPCODE_RCP: + case RC_OPCODE_RSQ: + case RC_OPCODE_SIN: + *istranscendent = 1; + *needalpha = 1; + break; + case RC_OPCODE_DP4: + *needalpha = 1; + /* fall through */ + case RC_OPCODE_DP3: + *needrgb = 1; + break; + default: + break; + } +} + +static void src_uses(struct rc_src_register src, unsigned int * rgb, + unsigned int * alpha) +{ + int j; + for(j = 0; j < 4; ++j) { + unsigned int swz = GET_SWZ(src.Swizzle, j); + if (swz < 3) + *rgb = 1; + else if (swz < 4) + *alpha = 1; + } +} + +/** + * Fill the given ALU instruction's opcodes and source operands into the given pair, + * if possible. + */ +static void set_pair_instruction(struct r300_fragment_program_compiler *c, + struct rc_pair_instruction * pair, + struct rc_sub_instruction * inst) +{ + int needrgb, needalpha, istranscendent; + const struct rc_opcode_info * opcode; + int i; + + memset(pair, 0, sizeof(struct rc_pair_instruction)); + + classify_instruction(inst, &needrgb, &needalpha, &istranscendent); + + if (needrgb) { + if (istranscendent) + pair->RGB.Opcode = RC_OPCODE_REPL_ALPHA; + else + pair->RGB.Opcode = inst->Opcode; + if (inst->SaturateMode == RC_SATURATE_ZERO_ONE) + pair->RGB.Saturate = 1; + } + if (needalpha) { + pair->Alpha.Opcode = inst->Opcode; + if (inst->SaturateMode == RC_SATURATE_ZERO_ONE) + pair->Alpha.Saturate = 1; + } + + opcode = rc_get_opcode_info(inst->Opcode); + + /* Presubtract handling: + * We need to make sure that the values used by the presubtract + * operation end up in src0 or src1. */ + if(inst->PreSub.Opcode != RC_PRESUB_NONE) { + /* rc_pair_alloc_source() will fill in data for + * pair->{RGB,ALPHA}.Src[RC_PAIR_PRESUB_SRC] */ + int j; + for(j = 0; j < 3; j++) { + int src_regs; + if(inst->SrcReg[j].File != RC_FILE_PRESUB) + continue; + + src_regs = rc_presubtract_src_reg_count( + inst->PreSub.Opcode); + for(i = 0; i < src_regs; i++) { + unsigned int rgb = 0; + unsigned int alpha = 0; + src_uses(inst->SrcReg[j], &rgb, &alpha); + if(rgb) { + pair->RGB.Src[i].File = + inst->PreSub.SrcReg[i].File; + pair->RGB.Src[i].Index = + inst->PreSub.SrcReg[i].Index; + pair->RGB.Src[i].Used = 1; + } + if(alpha) { + pair->Alpha.Src[i].File = + inst->PreSub.SrcReg[i].File; + pair->Alpha.Src[i].Index = + inst->PreSub.SrcReg[i].Index; + pair->Alpha.Src[i].Used = 1; + } + } + } + } + + for(i = 0; i < opcode->NumSrcRegs; ++i) { + int source; + if (needrgb && !istranscendent) { + unsigned int srcrgb = 0; + unsigned int srcalpha = 0; + unsigned int srcmask = 0; + int j; + /* We don't care about the alpha channel here. We only + * want the part of the swizzle that writes to rgb, + * since we are creating an rgb instruction. */ + for(j = 0; j < 3; ++j) { + unsigned int swz = GET_SWZ(inst->SrcReg[i].Swizzle, j); + + if (swz < RC_SWIZZLE_W) + srcrgb = 1; + else if (swz == RC_SWIZZLE_W) + srcalpha = 1; + + if (swz < RC_SWIZZLE_UNUSED) + srcmask |= 1 << j; + } + source = rc_pair_alloc_source(pair, srcrgb, srcalpha, + inst->SrcReg[i].File, inst->SrcReg[i].Index); + if (source < 0) { + rc_error(&c->Base, "Failed to translate " + "rgb instruction.\n"); + return; + } + pair->RGB.Arg[i].Source = source; + pair->RGB.Arg[i].Swizzle = + rc_init_swizzle(inst->SrcReg[i].Swizzle, 3); + pair->RGB.Arg[i].Abs = inst->SrcReg[i].Abs; + pair->RGB.Arg[i].Negate = !!(srcmask & inst->SrcReg[i].Negate & (RC_MASK_X | RC_MASK_Y | RC_MASK_Z)); + } + if (needalpha) { + unsigned int srcrgb = 0; + unsigned int srcalpha = 0; + unsigned int swz = GET_SWZ(inst->SrcReg[i].Swizzle, istranscendent ? 0 : 3); + if (swz < 3) + srcrgb = 1; + else if (swz < 4) + srcalpha = 1; + source = rc_pair_alloc_source(pair, srcrgb, srcalpha, + inst->SrcReg[i].File, inst->SrcReg[i].Index); + if (source < 0) { + rc_error(&c->Base, "Failed to translate " + "alpha instruction.\n"); + return; + } + pair->Alpha.Arg[i].Source = source; + pair->Alpha.Arg[i].Swizzle = rc_init_swizzle(swz, 1); + pair->Alpha.Arg[i].Abs = inst->SrcReg[i].Abs; + pair->Alpha.Arg[i].Negate = !!(inst->SrcReg[i].Negate & RC_MASK_W); + } + } + + /* Destination handling */ + if (inst->DstReg.File == RC_FILE_OUTPUT) { + if (inst->DstReg.Index == c->OutputDepth) { + pair->Alpha.DepthWriteMask |= GET_BIT(inst->DstReg.WriteMask, 3); + } else { + for (i = 0; i < 4; i++) { + if (inst->DstReg.Index == c->OutputColor[i]) { + pair->RGB.Target = i; + pair->Alpha.Target = i; + pair->RGB.OutputWriteMask |= + inst->DstReg.WriteMask & RC_MASK_XYZ; + pair->Alpha.OutputWriteMask |= + GET_BIT(inst->DstReg.WriteMask, 3); + break; + } + } + } + } else { + if (needrgb) { + pair->RGB.DestIndex = inst->DstReg.Index; + pair->RGB.WriteMask |= inst->DstReg.WriteMask & RC_MASK_XYZ; + } + + if (needalpha) { + pair->Alpha.WriteMask |= (GET_BIT(inst->DstReg.WriteMask, 3) << 3); + if (pair->Alpha.WriteMask) { + pair->Alpha.DestIndex = inst->DstReg.Index; + } + } + } + + if (inst->WriteALUResult) { + pair->WriteALUResult = inst->WriteALUResult; + pair->ALUResultCompare = inst->ALUResultCompare; + } +} + + +static void check_opcode_support(struct r300_fragment_program_compiler *c, + struct rc_sub_instruction *inst) +{ + const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->Opcode); + + if (opcode->HasDstReg) { + if (inst->SaturateMode == RC_SATURATE_MINUS_PLUS_ONE) { + rc_error(&c->Base, "Fragment program does not support signed Saturate.\n"); + return; + } + } + + for (unsigned i = 0; i < opcode->NumSrcRegs; i++) { + if (inst->SrcReg[i].RelAddr) { + rc_error(&c->Base, "Fragment program does not support relative addressing " + " of source operands.\n"); + return; + } + } +} + + +/** + * Translate all ALU instructions into corresponding pair instructions, + * performing no other changes. + */ +void rc_pair_translate(struct radeon_compiler *cc, void *user) +{ + struct r300_fragment_program_compiler *c = (struct r300_fragment_program_compiler*)cc; + + for(struct rc_instruction * inst = c->Base.Program.Instructions.Next; + inst != &c->Base.Program.Instructions; + inst = inst->Next) { + const struct rc_opcode_info * opcode; + struct rc_sub_instruction copy; + + if (inst->Type != RC_INSTRUCTION_NORMAL) + continue; + + opcode = rc_get_opcode_info(inst->U.I.Opcode); + + if (opcode->HasTexture || opcode->IsFlowControl || opcode->Opcode == RC_OPCODE_KIL) + continue; + + copy = inst->U.I; + + check_opcode_support(c, ©); + + final_rewrite(©); + inst->Type = RC_INSTRUCTION_PAIR; + set_pair_instruction(c, &inst->U.P, ©); + } +} diff --git a/src/gallium/drivers/r300/compiler/radeon_program.c b/src/gallium/drivers/r300/compiler/radeon_program.c new file mode 100644 index 00000000000..fe5756ebc45 --- /dev/null +++ b/src/gallium/drivers/r300/compiler/radeon_program.c @@ -0,0 +1,225 @@ +/* + * Copyright (C) 2008 Nicolai Haehnle. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "radeon_program.h" + +#include <stdio.h> + +#include "radeon_compiler.h" +#include "radeon_dataflow.h" + + +/** + * Transform the given clause in the following way: + * 1. Replace it with an empty clause + * 2. For every instruction in the original clause, try the given + * transformations in order. + * 3. If one of the transformations returns GL_TRUE, assume that it + * has emitted the appropriate instruction(s) into the new clause; + * otherwise, copy the instruction verbatim. + * + * \note The transformation is currently not recursive; in other words, + * instructions emitted by transformations are not transformed. + * + * \note The transform is called 'local' because it can only look at + * one instruction at a time. + */ +void rc_local_transform( + struct radeon_compiler * c, + void *user) +{ + struct radeon_program_transformation *transformations = + (struct radeon_program_transformation*)user; + struct rc_instruction * inst = c->Program.Instructions.Next; + + while(inst != &c->Program.Instructions) { + struct rc_instruction * current = inst; + int i; + + inst = inst->Next; + + for(i = 0; transformations[i].function; ++i) { + struct radeon_program_transformation* t = transformations + i; + + if (t->function(c, current, t->userData)) + break; + } + } +} + +struct get_used_temporaries_data { + unsigned char * Used; + unsigned int UsedLength; +}; + +static void get_used_temporaries_cb( + void * userdata, + struct rc_instruction * inst, + rc_register_file file, + unsigned int index, + unsigned int mask) +{ + struct get_used_temporaries_data * d = userdata; + + if (file != RC_FILE_TEMPORARY) + return; + + if (index >= d->UsedLength) + return; + + d->Used[index] |= mask; +} + +/** + * This function fills in the parameter 'used' with a writemask that + * represent which components of each temporary register are used by the + * program. This is meant to be combined with rc_find_free_temporary_list as a + * more efficient version of rc_find_free_temporary. + * @param used The function does not initialize this parameter. + */ +void rc_get_used_temporaries( + struct radeon_compiler * c, + unsigned char * used, + unsigned int used_length) +{ + struct rc_instruction * inst; + struct get_used_temporaries_data d; + d.Used = used; + d.UsedLength = used_length; + + for(inst = c->Program.Instructions.Next; + inst != &c->Program.Instructions; inst = inst->Next) { + + rc_for_all_reads_mask(inst, get_used_temporaries_cb, &d); + rc_for_all_writes_mask(inst, get_used_temporaries_cb, &d); + } +} + +/* Search a list of used temporaries for a free one + * \sa rc_get_used_temporaries + * @note If this functions finds a free temporary, it will mark it as used + * in the used temporary list (param 'used') + * @param used list of used temporaries + * @param used_length number of items in param 'used' + * @param mask which components must be free in the temporary index that is + * returned. + * @return -1 If there are no more free temporaries, otherwise the index of + * a temporary register where the components specified in param 'mask' are + * not being used. + */ +int rc_find_free_temporary_list( + struct radeon_compiler * c, + unsigned char * used, + unsigned int used_length, + unsigned int mask) +{ + int i; + for(i = 0; i < used_length; i++) { + if ((~used[i] & mask) == mask) { + used[i] |= mask; + return i; + } + } + return -1; +} + +unsigned int rc_find_free_temporary(struct radeon_compiler * c) +{ + unsigned char used[RC_REGISTER_MAX_INDEX]; + int free; + + memset(used, 0, sizeof(used)); + + rc_get_used_temporaries(c, used, RC_REGISTER_MAX_INDEX); + + free = rc_find_free_temporary_list(c, used, RC_REGISTER_MAX_INDEX, + RC_MASK_XYZW); + if (free < 0) { + rc_error(c, "Ran out of temporary registers\n"); + return 0; + } + return free; +} + + +struct rc_instruction *rc_alloc_instruction(struct radeon_compiler * c) +{ + struct rc_instruction * inst = memory_pool_malloc(&c->Pool, sizeof(struct rc_instruction)); + + memset(inst, 0, sizeof(struct rc_instruction)); + + inst->U.I.Opcode = RC_OPCODE_ILLEGAL_OPCODE; + inst->U.I.DstReg.WriteMask = RC_MASK_XYZW; + inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZW; + inst->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_XYZW; + inst->U.I.SrcReg[2].Swizzle = RC_SWIZZLE_XYZW; + + return inst; +} + +void rc_insert_instruction(struct rc_instruction * after, struct rc_instruction * inst) +{ + inst->Prev = after; + inst->Next = after->Next; + + inst->Prev->Next = inst; + inst->Next->Prev = inst; +} + +struct rc_instruction *rc_insert_new_instruction(struct radeon_compiler * c, struct rc_instruction * after) +{ + struct rc_instruction * inst = rc_alloc_instruction(c); + + rc_insert_instruction(after, inst); + + return inst; +} + +void rc_remove_instruction(struct rc_instruction * inst) +{ + inst->Prev->Next = inst->Next; + inst->Next->Prev = inst->Prev; +} + +/** + * Return the number of instructions in the program. + */ +unsigned int rc_recompute_ips(struct radeon_compiler * c) +{ + unsigned int ip = 0; + struct rc_instruction * inst; + + for(inst = c->Program.Instructions.Next; + inst != &c->Program.Instructions; + inst = inst->Next) { + inst->IP = ip++; + } + + c->Program.Instructions.IP = 0xcafedead; + + return ip; +} diff --git a/src/gallium/drivers/r300/compiler/radeon_program.h b/src/gallium/drivers/r300/compiler/radeon_program.h new file mode 100644 index 00000000000..b899eccbf53 --- /dev/null +++ b/src/gallium/drivers/r300/compiler/radeon_program.h @@ -0,0 +1,206 @@ +/* + * Copyright (C) 2008 Nicolai Haehnle. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __RADEON_PROGRAM_H_ +#define __RADEON_PROGRAM_H_ + +#include <stdint.h> +#include <string.h> + +#include "radeon_opcodes.h" +#include "radeon_code.h" +#include "radeon_program_constants.h" +#include "radeon_program_pair.h" + +struct radeon_compiler; + +struct rc_src_register { + unsigned int File:4; + + /** Negative values may be used for relative addressing. */ + signed int Index:(RC_REGISTER_INDEX_BITS+1); + unsigned int RelAddr:1; + + unsigned int Swizzle:12; + + /** Take the component-wise absolute value */ + unsigned int Abs:1; + + /** Post-Abs negation. */ + unsigned int Negate:4; +}; + +struct rc_dst_register { + unsigned int File:3; + unsigned int Index:RC_REGISTER_INDEX_BITS; + unsigned int WriteMask:4; +}; + +struct rc_presub_instruction { + rc_presubtract_op Opcode; + struct rc_src_register SrcReg[2]; +}; + +/** + * Instructions are maintained by the compiler in a doubly linked list + * of these structures. + * + * This instruction format is intended to be expanded for hardware-specific + * trickery. At different stages of compilation, a different set of + * instruction types may be valid. + */ +struct rc_sub_instruction { + struct rc_src_register SrcReg[3]; + struct rc_dst_register DstReg; + + /** + * Opcode of this instruction, according to \ref rc_opcode enums. + */ + unsigned int Opcode:8; + + /** + * Saturate each value of the result to the range [0,1] or [-1,1], + * according to \ref rc_saturate_mode enums. + */ + unsigned int SaturateMode:2; + + /** + * Writing to the special register RC_SPECIAL_ALU_RESULT + */ + /*@{*/ + unsigned int WriteALUResult:2; + unsigned int ALUResultCompare:3; + /*@}*/ + + /** + * \name Extra fields for TEX, TXB, TXD, TXL, TXP instructions. + */ + /*@{*/ + /** Source texture unit. */ + unsigned int TexSrcUnit:5; + + /** Source texture target, one of the \ref rc_texture_target enums */ + unsigned int TexSrcTarget:3; + + /** True if tex instruction should do shadow comparison */ + unsigned int TexShadow:1; + + /**R500 Only. How to swizzle the result of a TEX lookup*/ + unsigned int TexSwizzle:12; + /*@}*/ + + /** This holds information about the presubtract operation used by + * this instruction. */ + struct rc_presub_instruction PreSub; +}; + +typedef enum { + RC_INSTRUCTION_NORMAL = 0, + RC_INSTRUCTION_PAIR +} rc_instruction_type; + +struct rc_instruction { + struct rc_instruction * Prev; + struct rc_instruction * Next; + + rc_instruction_type Type; + union { + struct rc_sub_instruction I; + struct rc_pair_instruction P; + } U; + + /** + * Warning: IPs are not stable. If you want to use them, + * you need to recompute them at the beginning of each pass + * using \ref rc_recompute_ips + */ + unsigned int IP; +}; + +struct rc_program { + /** + * Instructions.Next points to the first instruction, + * Instructions.Prev points to the last instruction. + */ + struct rc_instruction Instructions; + + /* Long term, we should probably remove InputsRead & OutputsWritten, + * since updating dependent state can be fragile, and they aren't + * actually used very often. */ + uint32_t InputsRead; + uint32_t OutputsWritten; + uint32_t ShadowSamplers; /**< Texture units used for shadow sampling. */ + + struct rc_constant_list Constants; +}; + +/** + * A transformation that can be passed to \ref rc_local_transform. + * + * The function will be called once for each instruction. + * It has to either emit the appropriate transformed code for the instruction + * and return true, or return false if it doesn't understand the + * instruction. + * + * The function gets passed the userData as last parameter. + */ +struct radeon_program_transformation { + int (*function)( + struct radeon_compiler*, + struct rc_instruction*, + void*); + void *userData; +}; + +void rc_local_transform( + struct radeon_compiler *c, + void *user); + +void rc_get_used_temporaries( + struct radeon_compiler * c, + unsigned char * used, + unsigned int used_length); + +int rc_find_free_temporary_list( + struct radeon_compiler * c, + unsigned char * used, + unsigned int used_length, + unsigned int mask); + +unsigned int rc_find_free_temporary(struct radeon_compiler * c); + +struct rc_instruction *rc_alloc_instruction(struct radeon_compiler * c); +struct rc_instruction *rc_insert_new_instruction(struct radeon_compiler * c, struct rc_instruction * after); +void rc_insert_instruction(struct rc_instruction * after, struct rc_instruction * inst); +void rc_remove_instruction(struct rc_instruction * inst); + +unsigned int rc_recompute_ips(struct radeon_compiler * c); + +void rc_print_program(const struct rc_program *prog); + +rc_swizzle rc_mask_to_swizzle(unsigned int mask); +#endif diff --git a/src/gallium/drivers/r300/compiler/radeon_program_alu.c b/src/gallium/drivers/r300/compiler/radeon_program_alu.c new file mode 100644 index 00000000000..9fc991166a3 --- /dev/null +++ b/src/gallium/drivers/r300/compiler/radeon_program_alu.c @@ -0,0 +1,1154 @@ +/* + * Copyright (C) 2008 Nicolai Haehnle. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +/** + * @file + * + * Shareable transformations that transform "special" ALU instructions + * into ALU instructions that are supported by hardware. + * + */ + +#include "radeon_program_alu.h" + +#include "radeon_compiler.h" +#include "radeon_compiler_util.h" + + +static struct rc_instruction *emit1( + struct radeon_compiler * c, struct rc_instruction * after, + rc_opcode Opcode, rc_saturate_mode Saturate, struct rc_dst_register DstReg, + struct rc_src_register SrcReg) +{ + struct rc_instruction *fpi = rc_insert_new_instruction(c, after); + + fpi->U.I.Opcode = Opcode; + fpi->U.I.SaturateMode = Saturate; + fpi->U.I.DstReg = DstReg; + fpi->U.I.SrcReg[0] = SrcReg; + return fpi; +} + +static struct rc_instruction *emit2( + struct radeon_compiler * c, struct rc_instruction * after, + rc_opcode Opcode, rc_saturate_mode Saturate, struct rc_dst_register DstReg, + struct rc_src_register SrcReg0, struct rc_src_register SrcReg1) +{ + struct rc_instruction *fpi = rc_insert_new_instruction(c, after); + + fpi->U.I.Opcode = Opcode; + fpi->U.I.SaturateMode = Saturate; + fpi->U.I.DstReg = DstReg; + fpi->U.I.SrcReg[0] = SrcReg0; + fpi->U.I.SrcReg[1] = SrcReg1; + return fpi; +} + +static struct rc_instruction *emit3( + struct radeon_compiler * c, struct rc_instruction * after, + rc_opcode Opcode, rc_saturate_mode Saturate, struct rc_dst_register DstReg, + struct rc_src_register SrcReg0, struct rc_src_register SrcReg1, + struct rc_src_register SrcReg2) +{ + struct rc_instruction *fpi = rc_insert_new_instruction(c, after); + + fpi->U.I.Opcode = Opcode; + fpi->U.I.SaturateMode = Saturate; + fpi->U.I.DstReg = DstReg; + fpi->U.I.SrcReg[0] = SrcReg0; + fpi->U.I.SrcReg[1] = SrcReg1; + fpi->U.I.SrcReg[2] = SrcReg2; + return fpi; +} + +static struct rc_dst_register dstregtmpmask(int index, int mask) +{ + struct rc_dst_register dst = {0}; + dst.File = RC_FILE_TEMPORARY; + dst.Index = index; + dst.WriteMask = mask; + return dst; +} + +static const struct rc_src_register builtin_zero = { + .File = RC_FILE_NONE, + .Index = 0, + .Swizzle = RC_SWIZZLE_0000 +}; +static const struct rc_src_register builtin_one = { + .File = RC_FILE_NONE, + .Index = 0, + .Swizzle = RC_SWIZZLE_1111 +}; +static const struct rc_src_register srcreg_undefined = { + .File = RC_FILE_NONE, + .Index = 0, + .Swizzle = RC_SWIZZLE_XYZW +}; + +static struct rc_src_register srcreg(int file, int index) +{ + struct rc_src_register src = srcreg_undefined; + src.File = file; + src.Index = index; + return src; +} + +static struct rc_src_register srcregswz(int file, int index, int swz) +{ + struct rc_src_register src = srcreg_undefined; + src.File = file; + src.Index = index; + src.Swizzle = swz; + return src; +} + +static struct rc_src_register absolute(struct rc_src_register reg) +{ + struct rc_src_register newreg = reg; + newreg.Abs = 1; + newreg.Negate = RC_MASK_NONE; + return newreg; +} + +static struct rc_src_register negate(struct rc_src_register reg) +{ + struct rc_src_register newreg = reg; + newreg.Negate = newreg.Negate ^ RC_MASK_XYZW; + return newreg; +} + +static struct rc_src_register swizzle(struct rc_src_register reg, + rc_swizzle x, rc_swizzle y, rc_swizzle z, rc_swizzle w) +{ + struct rc_src_register swizzled = reg; + swizzled.Swizzle = combine_swizzles4(reg.Swizzle, x, y, z, w); + return swizzled; +} + +static struct rc_src_register swizzle_smear(struct rc_src_register reg, + rc_swizzle x) +{ + return swizzle(reg, x, x, x, x); +} + +static struct rc_src_register swizzle_xxxx(struct rc_src_register reg) +{ + return swizzle_smear(reg, RC_SWIZZLE_X); +} + +static struct rc_src_register swizzle_yyyy(struct rc_src_register reg) +{ + return swizzle_smear(reg, RC_SWIZZLE_Y); +} + +static struct rc_src_register swizzle_zzzz(struct rc_src_register reg) +{ + return swizzle_smear(reg, RC_SWIZZLE_Z); +} + +static struct rc_src_register swizzle_wwww(struct rc_src_register reg) +{ + return swizzle_smear(reg, RC_SWIZZLE_W); +} + +static int is_dst_safe_to_reuse(struct rc_instruction *inst) +{ + const struct rc_opcode_info *info = rc_get_opcode_info(inst->U.I.Opcode); + unsigned i; + + assert(info->HasDstReg); + + if (inst->U.I.DstReg.File != RC_FILE_TEMPORARY) + return 0; + + for (i = 0; i < info->NumSrcRegs; i++) { + if (inst->U.I.SrcReg[i].File == RC_FILE_TEMPORARY && + inst->U.I.SrcReg[i].Index == inst->U.I.DstReg.Index) + return 0; + } + + return 1; +} + +static struct rc_dst_register try_to_reuse_dst(struct radeon_compiler *c, + struct rc_instruction *inst) +{ + unsigned tmp; + + if (is_dst_safe_to_reuse(inst)) + tmp = inst->U.I.DstReg.Index; + else + tmp = rc_find_free_temporary(c); + + return dstregtmpmask(tmp, inst->U.I.DstReg.WriteMask); +} + +static void transform_ABS(struct radeon_compiler* c, + struct rc_instruction* inst) +{ + struct rc_src_register src = inst->U.I.SrcReg[0]; + src.Abs = 1; + src.Negate = RC_MASK_NONE; + emit1(c, inst->Prev, RC_OPCODE_MOV, inst->U.I.SaturateMode, inst->U.I.DstReg, src); + rc_remove_instruction(inst); +} + +static void transform_CEIL(struct radeon_compiler* c, + struct rc_instruction* inst) +{ + /* Assuming: + * ceil(x) = -floor(-x) + * + * After inlining floor: + * ceil(x) = -(-x-frac(-x)) + * + * After simplification: + * ceil(x) = x+frac(-x) + */ + + struct rc_dst_register dst = try_to_reuse_dst(c, inst); + emit1(c, inst->Prev, RC_OPCODE_FRC, 0, dst, negate(inst->U.I.SrcReg[0])); + emit2(c, inst->Prev, RC_OPCODE_ADD, inst->U.I.SaturateMode, inst->U.I.DstReg, + inst->U.I.SrcReg[0], srcreg(RC_FILE_TEMPORARY, dst.Index)); + rc_remove_instruction(inst); +} + +static void transform_CLAMP(struct radeon_compiler *c, + struct rc_instruction *inst) +{ + /* CLAMP dst, src, min, max + * into: + * MIN tmp, src, max + * MAX dst, tmp, min + */ + struct rc_dst_register dst = try_to_reuse_dst(c, inst); + emit2(c, inst->Prev, RC_OPCODE_MIN, 0, dst, + inst->U.I.SrcReg[0], inst->U.I.SrcReg[2]); + emit2(c, inst->Prev, RC_OPCODE_MAX, inst->U.I.SaturateMode, inst->U.I.DstReg, + srcreg(RC_FILE_TEMPORARY, dst.Index), inst->U.I.SrcReg[1]); + rc_remove_instruction(inst); +} + +static void transform_DP2(struct radeon_compiler* c, + struct rc_instruction* inst) +{ + struct rc_src_register src0 = inst->U.I.SrcReg[0]; + struct rc_src_register src1 = inst->U.I.SrcReg[1]; + src0.Negate &= ~(RC_MASK_Z | RC_MASK_W); + src0.Swizzle &= ~(63 << (3 * 2)); + src0.Swizzle |= (RC_SWIZZLE_ZERO << (3 * 2)) | (RC_SWIZZLE_ZERO << (3 * 3)); + src1.Negate &= ~(RC_MASK_Z | RC_MASK_W); + src1.Swizzle &= ~(63 << (3 * 2)); + src1.Swizzle |= (RC_SWIZZLE_ZERO << (3 * 2)) | (RC_SWIZZLE_ZERO << (3 * 3)); + emit2(c, inst->Prev, RC_OPCODE_DP3, inst->U.I.SaturateMode, inst->U.I.DstReg, src0, src1); + rc_remove_instruction(inst); +} + +static void transform_DPH(struct radeon_compiler* c, + struct rc_instruction* inst) +{ + struct rc_src_register src0 = inst->U.I.SrcReg[0]; + src0.Negate &= ~RC_MASK_W; + src0.Swizzle &= ~(7 << (3 * 3)); + src0.Swizzle |= RC_SWIZZLE_ONE << (3 * 3); + emit2(c, inst->Prev, RC_OPCODE_DP4, inst->U.I.SaturateMode, inst->U.I.DstReg, src0, inst->U.I.SrcReg[1]); + rc_remove_instruction(inst); +} + +/** + * [1, src0.y*src1.y, src0.z, src1.w] + * So basically MUL with lotsa swizzling. + */ +static void transform_DST(struct radeon_compiler* c, + struct rc_instruction* inst) +{ + emit2(c, inst->Prev, RC_OPCODE_MUL, inst->U.I.SaturateMode, inst->U.I.DstReg, + swizzle(inst->U.I.SrcReg[0], RC_SWIZZLE_ONE, RC_SWIZZLE_Y, RC_SWIZZLE_Z, RC_SWIZZLE_ONE), + swizzle(inst->U.I.SrcReg[1], RC_SWIZZLE_ONE, RC_SWIZZLE_Y, RC_SWIZZLE_ONE, RC_SWIZZLE_W)); + rc_remove_instruction(inst); +} + +static void transform_FLR(struct radeon_compiler* c, + struct rc_instruction* inst) +{ + struct rc_dst_register dst = try_to_reuse_dst(c, inst); + emit1(c, inst->Prev, RC_OPCODE_FRC, 0, dst, inst->U.I.SrcReg[0]); + emit2(c, inst->Prev, RC_OPCODE_ADD, inst->U.I.SaturateMode, inst->U.I.DstReg, + inst->U.I.SrcReg[0], negate(srcreg(RC_FILE_TEMPORARY, dst.Index))); + rc_remove_instruction(inst); +} + +/** + * Definition of LIT (from ARB_fragment_program): + * + * tmp = VectorLoad(op0); + * if (tmp.x < 0) tmp.x = 0; + * if (tmp.y < 0) tmp.y = 0; + * if (tmp.w < -(128.0-epsilon)) tmp.w = -(128.0-epsilon); + * else if (tmp.w > 128-epsilon) tmp.w = 128-epsilon; + * result.x = 1.0; + * result.y = tmp.x; + * result.z = (tmp.x > 0) ? RoughApproxPower(tmp.y, tmp.w) : 0.0; + * result.w = 1.0; + * + * The longest path of computation is the one leading to result.z, + * consisting of 5 operations. This implementation of LIT takes + * 5 slots, if the subsequent optimization passes are clever enough + * to pair instructions correctly. + */ +static void transform_LIT(struct radeon_compiler* c, + struct rc_instruction* inst) +{ + unsigned int constant; + unsigned int constant_swizzle; + unsigned int temp; + struct rc_src_register srctemp; + + constant = rc_constants_add_immediate_scalar(&c->Program.Constants, -127.999999, &constant_swizzle); + + if (inst->U.I.DstReg.WriteMask != RC_MASK_XYZW || inst->U.I.DstReg.File != RC_FILE_TEMPORARY) { + struct rc_instruction * inst_mov; + + inst_mov = emit1(c, inst, + RC_OPCODE_MOV, 0, inst->U.I.DstReg, + srcreg(RC_FILE_TEMPORARY, rc_find_free_temporary(c))); + + inst->U.I.DstReg.File = RC_FILE_TEMPORARY; + inst->U.I.DstReg.Index = inst_mov->U.I.SrcReg[0].Index; + inst->U.I.DstReg.WriteMask = RC_MASK_XYZW; + } + + temp = inst->U.I.DstReg.Index; + srctemp = srcreg(RC_FILE_TEMPORARY, temp); + + /* tmp.x = max(0.0, Src.x); */ + /* tmp.y = max(0.0, Src.y); */ + /* tmp.w = clamp(Src.z, -128+eps, 128-eps); */ + emit2(c, inst->Prev, RC_OPCODE_MAX, 0, + dstregtmpmask(temp, RC_MASK_XYW), + inst->U.I.SrcReg[0], + swizzle(srcreg(RC_FILE_CONSTANT, constant), + RC_SWIZZLE_ZERO, RC_SWIZZLE_ZERO, RC_SWIZZLE_ZERO, constant_swizzle&3)); + emit2(c, inst->Prev, RC_OPCODE_MIN, 0, + dstregtmpmask(temp, RC_MASK_Z), + swizzle_wwww(srctemp), + negate(srcregswz(RC_FILE_CONSTANT, constant, constant_swizzle))); + + /* tmp.w = Pow(tmp.y, tmp.w) */ + emit1(c, inst->Prev, RC_OPCODE_LG2, 0, + dstregtmpmask(temp, RC_MASK_W), + swizzle_yyyy(srctemp)); + emit2(c, inst->Prev, RC_OPCODE_MUL, 0, + dstregtmpmask(temp, RC_MASK_W), + swizzle_wwww(srctemp), + swizzle_zzzz(srctemp)); + emit1(c, inst->Prev, RC_OPCODE_EX2, 0, + dstregtmpmask(temp, RC_MASK_W), + swizzle_wwww(srctemp)); + + /* tmp.z = (tmp.x > 0) ? tmp.w : 0.0 */ + emit3(c, inst->Prev, RC_OPCODE_CMP, inst->U.I.SaturateMode, + dstregtmpmask(temp, RC_MASK_Z), + negate(swizzle_xxxx(srctemp)), + swizzle_wwww(srctemp), + builtin_zero); + + /* tmp.x, tmp.y, tmp.w = 1.0, tmp.x, 1.0 */ + emit1(c, inst->Prev, RC_OPCODE_MOV, inst->U.I.SaturateMode, + dstregtmpmask(temp, RC_MASK_XYW), + swizzle(srctemp, RC_SWIZZLE_ONE, RC_SWIZZLE_X, RC_SWIZZLE_ONE, RC_SWIZZLE_ONE)); + + rc_remove_instruction(inst); +} + +static void transform_LRP(struct radeon_compiler* c, + struct rc_instruction* inst) +{ + struct rc_dst_register dst = try_to_reuse_dst(c, inst); + + emit2(c, inst->Prev, RC_OPCODE_ADD, 0, + dst, + inst->U.I.SrcReg[1], negate(inst->U.I.SrcReg[2])); + emit3(c, inst->Prev, RC_OPCODE_MAD, inst->U.I.SaturateMode, + inst->U.I.DstReg, + inst->U.I.SrcReg[0], srcreg(RC_FILE_TEMPORARY, dst.Index), inst->U.I.SrcReg[2]); + + rc_remove_instruction(inst); +} + +static void transform_POW(struct radeon_compiler* c, + struct rc_instruction* inst) +{ + struct rc_dst_register tempdst = try_to_reuse_dst(c, inst); + struct rc_src_register tempsrc = srcreg(RC_FILE_TEMPORARY, tempdst.Index); + tempdst.WriteMask = RC_MASK_W; + tempsrc.Swizzle = RC_SWIZZLE_WWWW; + + emit1(c, inst->Prev, RC_OPCODE_LG2, 0, tempdst, swizzle_xxxx(inst->U.I.SrcReg[0])); + emit2(c, inst->Prev, RC_OPCODE_MUL, 0, tempdst, tempsrc, swizzle_xxxx(inst->U.I.SrcReg[1])); + emit1(c, inst->Prev, RC_OPCODE_EX2, inst->U.I.SaturateMode, inst->U.I.DstReg, tempsrc); + + rc_remove_instruction(inst); +} + +static void transform_RSQ(struct radeon_compiler* c, + struct rc_instruction* inst) +{ + inst->U.I.SrcReg[0] = absolute(inst->U.I.SrcReg[0]); +} + +static void transform_SEQ(struct radeon_compiler* c, + struct rc_instruction* inst) +{ + struct rc_dst_register dst = try_to_reuse_dst(c, inst); + + emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dst, inst->U.I.SrcReg[0], negate(inst->U.I.SrcReg[1])); + emit3(c, inst->Prev, RC_OPCODE_CMP, inst->U.I.SaturateMode, inst->U.I.DstReg, + negate(absolute(srcreg(RC_FILE_TEMPORARY, dst.Index))), builtin_zero, builtin_one); + + rc_remove_instruction(inst); +} + +static void transform_SFL(struct radeon_compiler* c, + struct rc_instruction* inst) +{ + emit1(c, inst->Prev, RC_OPCODE_MOV, inst->U.I.SaturateMode, inst->U.I.DstReg, builtin_zero); + rc_remove_instruction(inst); +} + +static void transform_SGE(struct radeon_compiler* c, + struct rc_instruction* inst) +{ + struct rc_dst_register dst = try_to_reuse_dst(c, inst); + + emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dst, inst->U.I.SrcReg[0], negate(inst->U.I.SrcReg[1])); + emit3(c, inst->Prev, RC_OPCODE_CMP, inst->U.I.SaturateMode, inst->U.I.DstReg, + srcreg(RC_FILE_TEMPORARY, dst.Index), builtin_zero, builtin_one); + + rc_remove_instruction(inst); +} + +static void transform_SGT(struct radeon_compiler* c, + struct rc_instruction* inst) +{ + struct rc_dst_register dst = try_to_reuse_dst(c, inst); + + emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dst, negate(inst->U.I.SrcReg[0]), inst->U.I.SrcReg[1]); + emit3(c, inst->Prev, RC_OPCODE_CMP, inst->U.I.SaturateMode, inst->U.I.DstReg, + srcreg(RC_FILE_TEMPORARY, dst.Index), builtin_one, builtin_zero); + + rc_remove_instruction(inst); +} + +static void transform_SLE(struct radeon_compiler* c, + struct rc_instruction* inst) +{ + struct rc_dst_register dst = try_to_reuse_dst(c, inst); + + emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dst, negate(inst->U.I.SrcReg[0]), inst->U.I.SrcReg[1]); + emit3(c, inst->Prev, RC_OPCODE_CMP, inst->U.I.SaturateMode, inst->U.I.DstReg, + srcreg(RC_FILE_TEMPORARY, dst.Index), builtin_zero, builtin_one); + + rc_remove_instruction(inst); +} + +static void transform_SLT(struct radeon_compiler* c, + struct rc_instruction* inst) +{ + struct rc_dst_register dst = try_to_reuse_dst(c, inst); + + emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dst, inst->U.I.SrcReg[0], negate(inst->U.I.SrcReg[1])); + emit3(c, inst->Prev, RC_OPCODE_CMP, inst->U.I.SaturateMode, inst->U.I.DstReg, + srcreg(RC_FILE_TEMPORARY, dst.Index), builtin_one, builtin_zero); + + rc_remove_instruction(inst); +} + +static void transform_SNE(struct radeon_compiler* c, + struct rc_instruction* inst) +{ + struct rc_dst_register dst = try_to_reuse_dst(c, inst); + + emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dst, inst->U.I.SrcReg[0], negate(inst->U.I.SrcReg[1])); + emit3(c, inst->Prev, RC_OPCODE_CMP, inst->U.I.SaturateMode, inst->U.I.DstReg, + negate(absolute(srcreg(RC_FILE_TEMPORARY, dst.Index))), builtin_one, builtin_zero); + + rc_remove_instruction(inst); +} + +static void transform_SSG(struct radeon_compiler* c, + struct rc_instruction* inst) +{ + /* result = sign(x) + * + * CMP tmp0, -x, 1, 0 + * CMP tmp1, x, 1, 0 + * ADD result, tmp0, -tmp1; + */ + struct rc_dst_register dst0; + unsigned tmp1; + + /* 0 < x */ + dst0 = try_to_reuse_dst(c, inst); + emit3(c, inst->Prev, RC_OPCODE_CMP, 0, + dst0, + negate(inst->U.I.SrcReg[0]), + builtin_one, + builtin_zero); + + /* x < 0 */ + tmp1 = rc_find_free_temporary(c); + emit3(c, inst->Prev, RC_OPCODE_CMP, 0, + dstregtmpmask(tmp1, inst->U.I.DstReg.WriteMask), + inst->U.I.SrcReg[0], + builtin_one, + builtin_zero); + + /* Either both are zero, or one of them is one and the other is zero. */ + /* result = tmp0 - tmp1 */ + emit2(c, inst->Prev, RC_OPCODE_ADD, 0, + inst->U.I.DstReg, + srcreg(RC_FILE_TEMPORARY, dst0.Index), + negate(srcreg(RC_FILE_TEMPORARY, tmp1))); + + rc_remove_instruction(inst); +} + +static void transform_SUB(struct radeon_compiler* c, + struct rc_instruction* inst) +{ + inst->U.I.Opcode = RC_OPCODE_ADD; + inst->U.I.SrcReg[1] = negate(inst->U.I.SrcReg[1]); +} + +static void transform_SWZ(struct radeon_compiler* c, + struct rc_instruction* inst) +{ + inst->U.I.Opcode = RC_OPCODE_MOV; +} + +static void transform_XPD(struct radeon_compiler* c, + struct rc_instruction* inst) +{ + struct rc_dst_register dst = try_to_reuse_dst(c, inst); + + emit2(c, inst->Prev, RC_OPCODE_MUL, 0, dst, + swizzle(inst->U.I.SrcReg[0], RC_SWIZZLE_Z, RC_SWIZZLE_X, RC_SWIZZLE_Y, RC_SWIZZLE_W), + swizzle(inst->U.I.SrcReg[1], RC_SWIZZLE_Y, RC_SWIZZLE_Z, RC_SWIZZLE_X, RC_SWIZZLE_W)); + emit3(c, inst->Prev, RC_OPCODE_MAD, inst->U.I.SaturateMode, inst->U.I.DstReg, + swizzle(inst->U.I.SrcReg[0], RC_SWIZZLE_Y, RC_SWIZZLE_Z, RC_SWIZZLE_X, RC_SWIZZLE_W), + swizzle(inst->U.I.SrcReg[1], RC_SWIZZLE_Z, RC_SWIZZLE_X, RC_SWIZZLE_Y, RC_SWIZZLE_W), + negate(srcreg(RC_FILE_TEMPORARY, dst.Index))); + + rc_remove_instruction(inst); +} + + +/** + * Can be used as a transformation for @ref radeonClauseLocalTransform, + * no userData necessary. + * + * Eliminates the following ALU instructions: + * ABS, CEIL, DPH, DST, FLR, LIT, LRP, POW, SEQ, SFL, SGE, SGT, SLE, SLT, SNE, SUB, SWZ, XPD + * using: + * MOV, ADD, MUL, MAD, FRC, DP3, LG2, EX2, CMP + * + * Transforms RSQ to Radeon's native RSQ by explicitly setting + * absolute value. + * + * @note should be applicable to R300 and R500 fragment programs. + */ +int radeonTransformALU( + struct radeon_compiler * c, + struct rc_instruction* inst, + void* unused) +{ + switch(inst->U.I.Opcode) { + case RC_OPCODE_ABS: transform_ABS(c, inst); return 1; + case RC_OPCODE_CEIL: transform_CEIL(c, inst); return 1; + case RC_OPCODE_CLAMP: transform_CLAMP(c, inst); return 1; + case RC_OPCODE_DP2: transform_DP2(c, inst); return 1; + case RC_OPCODE_DPH: transform_DPH(c, inst); return 1; + case RC_OPCODE_DST: transform_DST(c, inst); return 1; + case RC_OPCODE_FLR: transform_FLR(c, inst); return 1; + case RC_OPCODE_LIT: transform_LIT(c, inst); return 1; + case RC_OPCODE_LRP: transform_LRP(c, inst); return 1; + case RC_OPCODE_POW: transform_POW(c, inst); return 1; + case RC_OPCODE_RSQ: transform_RSQ(c, inst); return 1; + case RC_OPCODE_SEQ: transform_SEQ(c, inst); return 1; + case RC_OPCODE_SFL: transform_SFL(c, inst); return 1; + case RC_OPCODE_SGE: transform_SGE(c, inst); return 1; + case RC_OPCODE_SGT: transform_SGT(c, inst); return 1; + case RC_OPCODE_SLE: transform_SLE(c, inst); return 1; + case RC_OPCODE_SLT: transform_SLT(c, inst); return 1; + case RC_OPCODE_SNE: transform_SNE(c, inst); return 1; + case RC_OPCODE_SSG: transform_SSG(c, inst); return 1; + case RC_OPCODE_SUB: transform_SUB(c, inst); return 1; + case RC_OPCODE_SWZ: transform_SWZ(c, inst); return 1; + case RC_OPCODE_XPD: transform_XPD(c, inst); return 1; + default: + return 0; + } +} + + +static void transform_r300_vertex_ABS(struct radeon_compiler* c, + struct rc_instruction* inst) +{ + /* Note: r500 can take absolute values, but r300 cannot. */ + inst->U.I.Opcode = RC_OPCODE_MAX; + inst->U.I.SrcReg[1] = inst->U.I.SrcReg[0]; + inst->U.I.SrcReg[1].Negate ^= RC_MASK_XYZW; +} + +static void transform_r300_vertex_CMP(struct radeon_compiler* c, + struct rc_instruction* inst) +{ + /* There is no decent CMP available, so let's rig one up. + * CMP is defined as dst = src0 < 0.0 ? src1 : src2 + * The following sequence consumes zero to two temps and two extra slots + * (the second temp and the second slot is consumed by transform_LRP), + * but should be equivalent: + * + * SLT tmp0, src0, 0.0 + * LRP dst, tmp0, src1, src2 + * + * Yes, I know, I'm a mad scientist. ~ C. & M. */ + struct rc_dst_register dst = try_to_reuse_dst(c, inst); + + /* SLT tmp0, src0, 0.0 */ + emit2(c, inst->Prev, RC_OPCODE_SLT, 0, + dst, + inst->U.I.SrcReg[0], builtin_zero); + + /* LRP dst, tmp0, src1, src2 */ + transform_LRP(c, + emit3(c, inst->Prev, RC_OPCODE_LRP, 0, + inst->U.I.DstReg, + srcreg(RC_FILE_TEMPORARY, dst.Index), inst->U.I.SrcReg[1], inst->U.I.SrcReg[2])); + + rc_remove_instruction(inst); +} + +static void transform_r300_vertex_DP2(struct radeon_compiler* c, + struct rc_instruction* inst) +{ + struct rc_instruction *next_inst = inst->Next; + transform_DP2(c, inst); + next_inst->Prev->U.I.Opcode = RC_OPCODE_DP4; +} + +static void transform_r300_vertex_DP3(struct radeon_compiler* c, + struct rc_instruction* inst) +{ + struct rc_src_register src0 = inst->U.I.SrcReg[0]; + struct rc_src_register src1 = inst->U.I.SrcReg[1]; + src0.Negate &= ~RC_MASK_W; + src0.Swizzle &= ~(7 << (3 * 3)); + src0.Swizzle |= RC_SWIZZLE_ZERO << (3 * 3); + src1.Negate &= ~RC_MASK_W; + src1.Swizzle &= ~(7 << (3 * 3)); + src1.Swizzle |= RC_SWIZZLE_ZERO << (3 * 3); + emit2(c, inst->Prev, RC_OPCODE_DP4, inst->U.I.SaturateMode, inst->U.I.DstReg, src0, src1); + rc_remove_instruction(inst); +} + +static void transform_r300_vertex_fix_LIT(struct radeon_compiler* c, + struct rc_instruction* inst) +{ + struct rc_dst_register dst = try_to_reuse_dst(c, inst); + unsigned constant_swizzle; + int constant = rc_constants_add_immediate_scalar(&c->Program.Constants, + 0.0000000000000000001, + &constant_swizzle); + + /* MOV dst, src */ + dst.WriteMask = RC_MASK_XYZW; + emit1(c, inst->Prev, RC_OPCODE_MOV, 0, + dst, + inst->U.I.SrcReg[0]); + + /* MAX dst.y, src, 0.00...001 */ + emit2(c, inst->Prev, RC_OPCODE_MAX, 0, + dstregtmpmask(dst.Index, RC_MASK_Y), + srcreg(RC_FILE_TEMPORARY, dst.Index), + srcregswz(RC_FILE_CONSTANT, constant, constant_swizzle)); + + inst->U.I.SrcReg[0] = srcreg(RC_FILE_TEMPORARY, dst.Index); +} + +static void transform_r300_vertex_SEQ(struct radeon_compiler *c, + struct rc_instruction *inst) +{ + /* x = y <==> x >= y && y >= x */ + int tmp = rc_find_free_temporary(c); + + /* x <= y */ + emit2(c, inst->Prev, RC_OPCODE_SGE, 0, + dstregtmpmask(tmp, inst->U.I.DstReg.WriteMask), + inst->U.I.SrcReg[0], + inst->U.I.SrcReg[1]); + + /* y <= x */ + emit2(c, inst->Prev, RC_OPCODE_SGE, 0, + inst->U.I.DstReg, + inst->U.I.SrcReg[1], + inst->U.I.SrcReg[0]); + + /* x && y = x * y */ + emit2(c, inst->Prev, RC_OPCODE_MUL, 0, + inst->U.I.DstReg, + srcreg(RC_FILE_TEMPORARY, tmp), + srcreg(inst->U.I.DstReg.File, inst->U.I.DstReg.Index)); + + rc_remove_instruction(inst); +} + +static void transform_r300_vertex_SNE(struct radeon_compiler *c, + struct rc_instruction *inst) +{ + /* x != y <==> x < y || y < x */ + int tmp = rc_find_free_temporary(c); + + /* x < y */ + emit2(c, inst->Prev, RC_OPCODE_SLT, 0, + dstregtmpmask(tmp, inst->U.I.DstReg.WriteMask), + inst->U.I.SrcReg[0], + inst->U.I.SrcReg[1]); + + /* y < x */ + emit2(c, inst->Prev, RC_OPCODE_SLT, 0, + inst->U.I.DstReg, + inst->U.I.SrcReg[1], + inst->U.I.SrcReg[0]); + + /* x || y = max(x, y) */ + emit2(c, inst->Prev, RC_OPCODE_MAX, 0, + inst->U.I.DstReg, + srcreg(RC_FILE_TEMPORARY, tmp), + srcreg(inst->U.I.DstReg.File, inst->U.I.DstReg.Index)); + + rc_remove_instruction(inst); +} + +static void transform_r300_vertex_SGT(struct radeon_compiler* c, + struct rc_instruction* inst) +{ + /* x > y <==> -x < -y */ + inst->U.I.Opcode = RC_OPCODE_SLT; + inst->U.I.SrcReg[0].Negate ^= RC_MASK_XYZW; + inst->U.I.SrcReg[1].Negate ^= RC_MASK_XYZW; +} + +static void transform_r300_vertex_SLE(struct radeon_compiler* c, + struct rc_instruction* inst) +{ + /* x <= y <==> -x >= -y */ + inst->U.I.Opcode = RC_OPCODE_SGE; + inst->U.I.SrcReg[0].Negate ^= RC_MASK_XYZW; + inst->U.I.SrcReg[1].Negate ^= RC_MASK_XYZW; +} + +static void transform_r300_vertex_SSG(struct radeon_compiler* c, + struct rc_instruction* inst) +{ + /* result = sign(x) + * + * SLT tmp0, 0, x; + * SLT tmp1, x, 0; + * ADD result, tmp0, -tmp1; + */ + struct rc_dst_register dst0 = try_to_reuse_dst(c, inst); + unsigned tmp1; + + /* 0 < x */ + dst0 = try_to_reuse_dst(c, inst); + emit2(c, inst->Prev, RC_OPCODE_SLT, 0, + dst0, + builtin_zero, + inst->U.I.SrcReg[0]); + + /* x < 0 */ + tmp1 = rc_find_free_temporary(c); + emit2(c, inst->Prev, RC_OPCODE_SLT, 0, + dstregtmpmask(tmp1, inst->U.I.DstReg.WriteMask), + inst->U.I.SrcReg[0], + builtin_zero); + + /* Either both are zero, or one of them is one and the other is zero. */ + /* result = tmp0 - tmp1 */ + emit2(c, inst->Prev, RC_OPCODE_ADD, 0, + inst->U.I.DstReg, + srcreg(RC_FILE_TEMPORARY, dst0.Index), + negate(srcreg(RC_FILE_TEMPORARY, tmp1))); + + rc_remove_instruction(inst); +} + +/** + * For use with rc_local_transform, this transforms non-native ALU + * instructions of the r300 up to r500 vertex engine. + */ +int r300_transform_vertex_alu( + struct radeon_compiler * c, + struct rc_instruction* inst, + void* unused) +{ + switch(inst->U.I.Opcode) { + case RC_OPCODE_ABS: transform_r300_vertex_ABS(c, inst); return 1; + case RC_OPCODE_CEIL: transform_CEIL(c, inst); return 1; + case RC_OPCODE_CLAMP: transform_CLAMP(c, inst); return 1; + case RC_OPCODE_CMP: transform_r300_vertex_CMP(c, inst); return 1; + case RC_OPCODE_DP2: transform_r300_vertex_DP2(c, inst); return 1; + case RC_OPCODE_DP3: transform_r300_vertex_DP3(c, inst); return 1; + case RC_OPCODE_DPH: transform_DPH(c, inst); return 1; + case RC_OPCODE_FLR: transform_FLR(c, inst); return 1; + case RC_OPCODE_LIT: transform_r300_vertex_fix_LIT(c, inst); return 1; + case RC_OPCODE_LRP: transform_LRP(c, inst); return 1; + case RC_OPCODE_SEQ: + if (!c->is_r500) { + transform_r300_vertex_SEQ(c, inst); + return 1; + } + return 0; + case RC_OPCODE_SFL: transform_SFL(c, inst); return 1; + case RC_OPCODE_SGT: transform_r300_vertex_SGT(c, inst); return 1; + case RC_OPCODE_SLE: transform_r300_vertex_SLE(c, inst); return 1; + case RC_OPCODE_SNE: + if (!c->is_r500) { + transform_r300_vertex_SNE(c, inst); + return 1; + } + return 0; + case RC_OPCODE_SSG: transform_r300_vertex_SSG(c, inst); return 1; + case RC_OPCODE_SUB: transform_SUB(c, inst); return 1; + case RC_OPCODE_SWZ: transform_SWZ(c, inst); return 1; + case RC_OPCODE_XPD: transform_XPD(c, inst); return 1; + default: + return 0; + } +} + +static void sincos_constants(struct radeon_compiler* c, unsigned int *constants) +{ + static const float SinCosConsts[2][4] = { + { + 1.273239545, /* 4/PI */ + -0.405284735, /* -4/(PI*PI) */ + 3.141592654, /* PI */ + 0.2225 /* weight */ + }, + { + 0.75, + 0.5, + 0.159154943, /* 1/(2*PI) */ + 6.283185307 /* 2*PI */ + } + }; + int i; + + for(i = 0; i < 2; ++i) + constants[i] = rc_constants_add_immediate_vec4(&c->Program.Constants, SinCosConsts[i]); +} + +/** + * Approximate sin(x), where x is clamped to (-pi/2, pi/2). + * + * MUL tmp.xy, src, { 4/PI, -4/(PI^2) } + * MAD tmp.x, tmp.y, |src|, tmp.x + * MAD tmp.y, tmp.x, |tmp.x|, -tmp.x + * MAD dest, tmp.y, weight, tmp.x + */ +static void sin_approx( + struct radeon_compiler* c, struct rc_instruction * inst, + struct rc_dst_register dst, struct rc_src_register src, const unsigned int* constants) +{ + unsigned int tempreg = rc_find_free_temporary(c); + + emit2(c, inst->Prev, RC_OPCODE_MUL, 0, dstregtmpmask(tempreg, RC_MASK_XY), + swizzle_xxxx(src), + srcreg(RC_FILE_CONSTANT, constants[0])); + emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dstregtmpmask(tempreg, RC_MASK_X), + swizzle_yyyy(srcreg(RC_FILE_TEMPORARY, tempreg)), + absolute(swizzle_xxxx(src)), + swizzle_xxxx(srcreg(RC_FILE_TEMPORARY, tempreg))); + emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dstregtmpmask(tempreg, RC_MASK_Y), + swizzle_xxxx(srcreg(RC_FILE_TEMPORARY, tempreg)), + absolute(swizzle_xxxx(srcreg(RC_FILE_TEMPORARY, tempreg))), + negate(swizzle_xxxx(srcreg(RC_FILE_TEMPORARY, tempreg)))); + emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dst, + swizzle_yyyy(srcreg(RC_FILE_TEMPORARY, tempreg)), + swizzle_wwww(srcreg(RC_FILE_CONSTANT, constants[0])), + swizzle_xxxx(srcreg(RC_FILE_TEMPORARY, tempreg))); +} + +/** + * Translate the trigonometric functions COS, SIN, and SCS + * using only the basic instructions + * MOV, ADD, MUL, MAD, FRC + */ +int r300_transform_trig_simple(struct radeon_compiler* c, + struct rc_instruction* inst, + void* unused) +{ + unsigned int constants[2]; + unsigned int tempreg; + + if (inst->U.I.Opcode != RC_OPCODE_COS && + inst->U.I.Opcode != RC_OPCODE_SIN && + inst->U.I.Opcode != RC_OPCODE_SCS) + return 0; + + tempreg = rc_find_free_temporary(c); + + sincos_constants(c, constants); + + if (inst->U.I.Opcode == RC_OPCODE_COS) { + /* MAD tmp.x, src, 1/(2*PI), 0.75 */ + /* FRC tmp.x, tmp.x */ + /* MAD tmp.z, tmp.x, 2*PI, -PI */ + emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dstregtmpmask(tempreg, RC_MASK_W), + swizzle_xxxx(inst->U.I.SrcReg[0]), + swizzle_zzzz(srcreg(RC_FILE_CONSTANT, constants[1])), + swizzle_xxxx(srcreg(RC_FILE_CONSTANT, constants[1]))); + emit1(c, inst->Prev, RC_OPCODE_FRC, 0, dstregtmpmask(tempreg, RC_MASK_W), + swizzle_wwww(srcreg(RC_FILE_TEMPORARY, tempreg))); + emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dstregtmpmask(tempreg, RC_MASK_W), + swizzle_wwww(srcreg(RC_FILE_TEMPORARY, tempreg)), + swizzle_wwww(srcreg(RC_FILE_CONSTANT, constants[1])), + negate(swizzle_zzzz(srcreg(RC_FILE_CONSTANT, constants[0])))); + + sin_approx(c, inst, inst->U.I.DstReg, + swizzle_wwww(srcreg(RC_FILE_TEMPORARY, tempreg)), + constants); + } else if (inst->U.I.Opcode == RC_OPCODE_SIN) { + emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dstregtmpmask(tempreg, RC_MASK_W), + swizzle_xxxx(inst->U.I.SrcReg[0]), + swizzle_zzzz(srcreg(RC_FILE_CONSTANT, constants[1])), + swizzle_yyyy(srcreg(RC_FILE_CONSTANT, constants[1]))); + emit1(c, inst->Prev, RC_OPCODE_FRC, 0, dstregtmpmask(tempreg, RC_MASK_W), + swizzle_wwww(srcreg(RC_FILE_TEMPORARY, tempreg))); + emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dstregtmpmask(tempreg, RC_MASK_W), + swizzle_wwww(srcreg(RC_FILE_TEMPORARY, tempreg)), + swizzle_wwww(srcreg(RC_FILE_CONSTANT, constants[1])), + negate(swizzle_zzzz(srcreg(RC_FILE_CONSTANT, constants[0])))); + + sin_approx(c, inst, inst->U.I.DstReg, + swizzle_wwww(srcreg(RC_FILE_TEMPORARY, tempreg)), + constants); + } else { + struct rc_dst_register dst; + + emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dstregtmpmask(tempreg, RC_MASK_XY), + swizzle_xxxx(inst->U.I.SrcReg[0]), + swizzle_zzzz(srcreg(RC_FILE_CONSTANT, constants[1])), + swizzle(srcreg(RC_FILE_CONSTANT, constants[1]), RC_SWIZZLE_X, RC_SWIZZLE_Y, RC_SWIZZLE_Z, RC_SWIZZLE_W)); + emit1(c, inst->Prev, RC_OPCODE_FRC, 0, dstregtmpmask(tempreg, RC_MASK_XY), + srcreg(RC_FILE_TEMPORARY, tempreg)); + emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dstregtmpmask(tempreg, RC_MASK_XY), + srcreg(RC_FILE_TEMPORARY, tempreg), + swizzle_wwww(srcreg(RC_FILE_CONSTANT, constants[1])), + negate(swizzle_zzzz(srcreg(RC_FILE_CONSTANT, constants[0])))); + + dst = inst->U.I.DstReg; + + dst.WriteMask = inst->U.I.DstReg.WriteMask & RC_MASK_X; + sin_approx(c, inst, dst, + swizzle_xxxx(srcreg(RC_FILE_TEMPORARY, tempreg)), + constants); + + dst.WriteMask = inst->U.I.DstReg.WriteMask & RC_MASK_Y; + sin_approx(c, inst, dst, + swizzle_yyyy(srcreg(RC_FILE_TEMPORARY, tempreg)), + constants); + } + + rc_remove_instruction(inst); + + return 1; +} + +static void r300_transform_SIN_COS_SCS(struct radeon_compiler *c, + struct rc_instruction *inst, + unsigned srctmp) +{ + if (inst->U.I.Opcode == RC_OPCODE_COS) { + emit1(c, inst->Prev, RC_OPCODE_COS, inst->U.I.SaturateMode, inst->U.I.DstReg, + srcregswz(RC_FILE_TEMPORARY, srctmp, RC_SWIZZLE_WWWW)); + } else if (inst->U.I.Opcode == RC_OPCODE_SIN) { + emit1(c, inst->Prev, RC_OPCODE_SIN, inst->U.I.SaturateMode, + inst->U.I.DstReg, srcregswz(RC_FILE_TEMPORARY, srctmp, RC_SWIZZLE_WWWW)); + } else if (inst->U.I.Opcode == RC_OPCODE_SCS) { + struct rc_dst_register moddst = inst->U.I.DstReg; + + if (inst->U.I.DstReg.WriteMask & RC_MASK_X) { + moddst.WriteMask = RC_MASK_X; + emit1(c, inst->Prev, RC_OPCODE_COS, inst->U.I.SaturateMode, moddst, + srcregswz(RC_FILE_TEMPORARY, srctmp, RC_SWIZZLE_WWWW)); + } + if (inst->U.I.DstReg.WriteMask & RC_MASK_Y) { + moddst.WriteMask = RC_MASK_Y; + emit1(c, inst->Prev, RC_OPCODE_SIN, inst->U.I.SaturateMode, moddst, + srcregswz(RC_FILE_TEMPORARY, srctmp, RC_SWIZZLE_WWWW)); + } + } + + rc_remove_instruction(inst); +} + + +/** + * Transform the trigonometric functions COS, SIN, and SCS + * to include pre-scaling by 1/(2*PI) and taking the fractional + * part, so that the input to COS and SIN is always in the range [0,1). + * SCS is replaced by one COS and one SIN instruction. + * + * @warning This transformation implicitly changes the semantics of SIN and COS! + */ +int radeonTransformTrigScale(struct radeon_compiler* c, + struct rc_instruction* inst, + void* unused) +{ + static const float RCP_2PI = 0.15915494309189535; + unsigned int temp; + unsigned int constant; + unsigned int constant_swizzle; + + if (inst->U.I.Opcode != RC_OPCODE_COS && + inst->U.I.Opcode != RC_OPCODE_SIN && + inst->U.I.Opcode != RC_OPCODE_SCS) + return 0; + + temp = rc_find_free_temporary(c); + constant = rc_constants_add_immediate_scalar(&c->Program.Constants, RCP_2PI, &constant_swizzle); + + emit2(c, inst->Prev, RC_OPCODE_MUL, 0, dstregtmpmask(temp, RC_MASK_W), + swizzle_xxxx(inst->U.I.SrcReg[0]), + srcregswz(RC_FILE_CONSTANT, constant, constant_swizzle)); + emit1(c, inst->Prev, RC_OPCODE_FRC, 0, dstregtmpmask(temp, RC_MASK_W), + srcreg(RC_FILE_TEMPORARY, temp)); + + r300_transform_SIN_COS_SCS(c, inst, temp); + return 1; +} + +/** + * Transform the trigonometric functions COS, SIN, and SCS + * so that the input to COS and SIN is always in the range [-PI, PI]. + * SCS is replaced by one COS and one SIN instruction. + */ +int r300_transform_trig_scale_vertex(struct radeon_compiler *c, + struct rc_instruction *inst, + void *unused) +{ + static const float cons[4] = {0.15915494309189535, 0.5, 6.28318530717959, -3.14159265358979}; + unsigned int temp; + unsigned int constant; + + if (inst->U.I.Opcode != RC_OPCODE_COS && + inst->U.I.Opcode != RC_OPCODE_SIN && + inst->U.I.Opcode != RC_OPCODE_SCS) + return 0; + + /* Repeat x in the range [-PI, PI]: + * + * repeat(x) = frac(x / 2PI + 0.5) * 2PI - PI + */ + + temp = rc_find_free_temporary(c); + constant = rc_constants_add_immediate_vec4(&c->Program.Constants, cons); + + emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dstregtmpmask(temp, RC_MASK_W), + swizzle_xxxx(inst->U.I.SrcReg[0]), + srcregswz(RC_FILE_CONSTANT, constant, RC_SWIZZLE_XXXX), + srcregswz(RC_FILE_CONSTANT, constant, RC_SWIZZLE_YYYY)); + emit1(c, inst->Prev, RC_OPCODE_FRC, 0, dstregtmpmask(temp, RC_MASK_W), + srcreg(RC_FILE_TEMPORARY, temp)); + emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dstregtmpmask(temp, RC_MASK_W), + srcreg(RC_FILE_TEMPORARY, temp), + srcregswz(RC_FILE_CONSTANT, constant, RC_SWIZZLE_ZZZZ), + srcregswz(RC_FILE_CONSTANT, constant, RC_SWIZZLE_WWWW)); + + r300_transform_SIN_COS_SCS(c, inst, temp); + return 1; +} + +/** + * Rewrite DDX/DDY instructions to properly work with r5xx shaders. + * The r5xx MDH/MDV instruction provides per-quad partial derivatives. + * It takes the form A*B+C. A and C are set by setting src0. B should be -1. + * + * @warning This explicitly changes the form of DDX and DDY! + */ + +int radeonTransformDeriv(struct radeon_compiler* c, + struct rc_instruction* inst, + void* unused) +{ + if (inst->U.I.Opcode != RC_OPCODE_DDX && inst->U.I.Opcode != RC_OPCODE_DDY) + return 0; + + inst->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_1111; + inst->U.I.SrcReg[1].Negate = RC_MASK_XYZW; + + return 1; +} + +/** + * IF Temp[0].x -\ + * KILP - > KIL -abs(Temp[0].x) + * ENDIF -/ + * + * This needs to be done in its own pass, because it modifies the instructions + * before and after KILP. + */ +void rc_transform_KILP(struct radeon_compiler * c, void *user) +{ + struct rc_instruction * inst; + for (inst = c->Program.Instructions.Next; + inst != &c->Program.Instructions; inst = inst->Next) { + + if (inst->U.I.Opcode != RC_OPCODE_KILP) + continue; + + inst->U.I.Opcode = RC_OPCODE_KIL; + + if (inst->Prev->U.I.Opcode != RC_OPCODE_IF + || inst->Next->U.I.Opcode != RC_OPCODE_ENDIF) { + inst->U.I.SrcReg[0] = negate(builtin_one); + } else { + + inst->U.I.SrcReg[0] = + negate(absolute(inst->Prev->U.I.SrcReg[0])); + /* Remove IF */ + rc_remove_instruction(inst->Prev); + /* Remove ENDIF */ + rc_remove_instruction(inst->Next); + } + } +} diff --git a/src/gallium/drivers/r300/compiler/radeon_program_alu.h b/src/gallium/drivers/r300/compiler/radeon_program_alu.h new file mode 100644 index 00000000000..b5f361e624f --- /dev/null +++ b/src/gallium/drivers/r300/compiler/radeon_program_alu.h @@ -0,0 +1,66 @@ +/* + * Copyright (C) 2008 Nicolai Haehnle. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __RADEON_PROGRAM_ALU_H_ +#define __RADEON_PROGRAM_ALU_H_ + +#include "radeon_program.h" + +int radeonTransformALU( + struct radeon_compiler * c, + struct rc_instruction * inst, + void*); + +int r300_transform_vertex_alu( + struct radeon_compiler * c, + struct rc_instruction * inst, + void*); + +int r300_transform_trig_simple( + struct radeon_compiler * c, + struct rc_instruction * inst, + void*); + +int radeonTransformTrigScale( + struct radeon_compiler * c, + struct rc_instruction * inst, + void*); + +int r300_transform_trig_scale_vertex( + struct radeon_compiler *c, + struct rc_instruction *inst, + void*); + +int radeonTransformDeriv( + struct radeon_compiler * c, + struct rc_instruction * inst, + void*); + +void rc_transform_KILP(struct radeon_compiler * c, + void *user); + +#endif /* __RADEON_PROGRAM_ALU_H_ */ diff --git a/src/gallium/drivers/r300/compiler/radeon_program_constants.h b/src/gallium/drivers/r300/compiler/radeon_program_constants.h new file mode 100644 index 00000000000..24577333450 --- /dev/null +++ b/src/gallium/drivers/r300/compiler/radeon_program_constants.h @@ -0,0 +1,190 @@ +/* + * Copyright (C) 2009 Nicolai Haehnle. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef RADEON_PROGRAM_CONSTANTS_H +#define RADEON_PROGRAM_CONSTANTS_H + +typedef enum { + RC_SATURATE_NONE = 0, + RC_SATURATE_ZERO_ONE, + RC_SATURATE_MINUS_PLUS_ONE +} rc_saturate_mode; + +typedef enum { + RC_TEXTURE_2D_ARRAY, + RC_TEXTURE_1D_ARRAY, + RC_TEXTURE_CUBE, + RC_TEXTURE_3D, + RC_TEXTURE_RECT, + RC_TEXTURE_2D, + RC_TEXTURE_1D +} rc_texture_target; + +typedef enum { + /** + * Used to indicate unused register descriptions and + * source register that use a constant swizzle. + */ + RC_FILE_NONE = 0, + RC_FILE_TEMPORARY, + + /** + * Input register. + * + * \note The compiler attaches no implicit semantics to input registers. + * Fragment/vertex program specific semantics must be defined explicitly + * using the appropriate compiler interfaces. + */ + RC_FILE_INPUT, + + /** + * Output register. + * + * \note The compiler attaches no implicit semantics to input registers. + * Fragment/vertex program specific semantics must be defined explicitly + * using the appropriate compiler interfaces. + */ + RC_FILE_OUTPUT, + RC_FILE_ADDRESS, + + /** + * Indicates a constant from the \ref rc_constant_list . + */ + RC_FILE_CONSTANT, + + /** + * Indicates a special register, see RC_SPECIAL_xxx. + */ + RC_FILE_SPECIAL, + + /** + * Indicates this register should use the result of the presubtract + * operation. + */ + RC_FILE_PRESUB +} rc_register_file; + +enum { + /** R500 fragment program ALU result "register" */ + RC_SPECIAL_ALU_RESULT = 0, + + /** Must be last */ + RC_NUM_SPECIAL_REGISTERS +}; + +#define RC_REGISTER_INDEX_BITS 10 +#define RC_REGISTER_MAX_INDEX (1 << RC_REGISTER_INDEX_BITS) + +typedef enum { + RC_SWIZZLE_X = 0, + RC_SWIZZLE_Y, + RC_SWIZZLE_Z, + RC_SWIZZLE_W, + RC_SWIZZLE_ZERO, + RC_SWIZZLE_ONE, + RC_SWIZZLE_HALF, + RC_SWIZZLE_UNUSED +} rc_swizzle; + +#define RC_MAKE_SWIZZLE(a,b,c,d) (((a)<<0) | ((b)<<3) | ((c)<<6) | ((d)<<9)) +#define RC_MAKE_SWIZZLE_SMEAR(a) RC_MAKE_SWIZZLE((a),(a),(a),(a)) +#define GET_SWZ(swz, idx) (((swz) >> ((idx)*3)) & 0x7) +#define GET_BIT(msk, idx) (((msk) >> (idx)) & 0x1) +#define SET_SWZ(swz, idx, newv) \ + do { \ + (swz) = ((swz) & ~(7 << ((idx)*3))) | ((newv) << ((idx)*3)); \ + } while(0) + +#define RC_SWIZZLE_XYZW RC_MAKE_SWIZZLE(RC_SWIZZLE_X, RC_SWIZZLE_Y, RC_SWIZZLE_Z, RC_SWIZZLE_W) +#define RC_SWIZZLE_XYZ0 RC_MAKE_SWIZZLE(RC_SWIZZLE_X, RC_SWIZZLE_Y, RC_SWIZZLE_Z, RC_SWIZZLE_ZERO) +#define RC_SWIZZLE_XYZZ RC_MAKE_SWIZZLE(RC_SWIZZLE_X, RC_SWIZZLE_Y, RC_SWIZZLE_Z, RC_SWIZZLE_Z) +#define RC_SWIZZLE_XXXX RC_MAKE_SWIZZLE_SMEAR(RC_SWIZZLE_X) +#define RC_SWIZZLE_YYYY RC_MAKE_SWIZZLE_SMEAR(RC_SWIZZLE_Y) +#define RC_SWIZZLE_ZZZZ RC_MAKE_SWIZZLE_SMEAR(RC_SWIZZLE_Z) +#define RC_SWIZZLE_WWWW RC_MAKE_SWIZZLE_SMEAR(RC_SWIZZLE_W) +#define RC_SWIZZLE_0000 RC_MAKE_SWIZZLE_SMEAR(RC_SWIZZLE_ZERO) +#define RC_SWIZZLE_1111 RC_MAKE_SWIZZLE_SMEAR(RC_SWIZZLE_ONE) +#define RC_SWIZZLE_HHHH RC_MAKE_SWIZZLE_SMEAR(RC_SWIZZLE_HALF) +#define RC_SWIZZLE_UUUU RC_MAKE_SWIZZLE_SMEAR(RC_SWIZZLE_UNUSED) + +/** + * \name Bitmasks for components of vectors. + * + * Used for write masks, negation masks, etc. + */ +/*@{*/ +#define RC_MASK_NONE 0 +#define RC_MASK_X 1 +#define RC_MASK_Y 2 +#define RC_MASK_Z 4 +#define RC_MASK_W 8 +#define RC_MASK_XY (RC_MASK_X|RC_MASK_Y) +#define RC_MASK_XYZ (RC_MASK_X|RC_MASK_Y|RC_MASK_Z) +#define RC_MASK_XYW (RC_MASK_X|RC_MASK_Y|RC_MASK_W) +#define RC_MASK_XYZW (RC_MASK_X|RC_MASK_Y|RC_MASK_Z|RC_MASK_W) +/*@}*/ + +typedef enum { + RC_ALURESULT_NONE = 0, + RC_ALURESULT_X, + RC_ALURESULT_W +} rc_write_aluresult; + +typedef enum { + RC_PRESUB_NONE = 0, + + /** 1 - 2 * src0 */ + RC_PRESUB_BIAS, + + /** src1 - src0 */ + RC_PRESUB_SUB, + + /** src1 + src0 */ + RC_PRESUB_ADD, + + /** 1 - src0 */ + RC_PRESUB_INV +} rc_presubtract_op; + +static inline int rc_presubtract_src_reg_count(rc_presubtract_op op){ + switch(op){ + case RC_PRESUB_BIAS: + case RC_PRESUB_INV: + return 1; + case RC_PRESUB_ADD: + case RC_PRESUB_SUB: + return 2; + default: + return 0; + } +} + +#define RC_SOURCE_NONE 0x0 +#define RC_SOURCE_RGB 0x1 +#define RC_SOURCE_ALPHA 0x2 + +#endif /* RADEON_PROGRAM_CONSTANTS_H */ diff --git a/src/gallium/drivers/r300/compiler/radeon_program_pair.c b/src/gallium/drivers/r300/compiler/radeon_program_pair.c new file mode 100644 index 00000000000..52315957520 --- /dev/null +++ b/src/gallium/drivers/r300/compiler/radeon_program_pair.c @@ -0,0 +1,239 @@ +/* + * Copyright (C) 2008-2009 Nicolai Haehnle. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "radeon_program_pair.h" + +#include "radeon_compiler_util.h" + +#include <stdlib.h> + +/** + * Return the source slot where we installed the given register access, + * or -1 if no slot was free anymore. + */ +int rc_pair_alloc_source(struct rc_pair_instruction *pair, + unsigned int rgb, unsigned int alpha, + rc_register_file file, unsigned int index) +{ + int candidate = -1; + int candidate_quality = -1; + unsigned int alpha_used = 0; + unsigned int rgb_used = 0; + int i; + + if ((!rgb && !alpha) || file == RC_FILE_NONE) + return 0; + + /* Make sure only one presubtract operation is used per instruction. */ + if (file == RC_FILE_PRESUB) { + if (rgb && pair->RGB.Src[RC_PAIR_PRESUB_SRC].Used + && index != pair->RGB.Src[RC_PAIR_PRESUB_SRC].Index) { + return -1; + } + + if (alpha && pair->Alpha.Src[RC_PAIR_PRESUB_SRC].Used + && index != pair->Alpha.Src[RC_PAIR_PRESUB_SRC].Index) { + return -1; + } + } + + for(i = 0; i < 3; ++i) { + int q = 0; + if (rgb) { + if (pair->RGB.Src[i].Used) { + if (pair->RGB.Src[i].File != file || + pair->RGB.Src[i].Index != index) { + rgb_used++; + continue; + } + q++; + } + } + if (alpha) { + if (pair->Alpha.Src[i].Used) { + if (pair->Alpha.Src[i].File != file || + pair->Alpha.Src[i].Index != index) { + alpha_used++; + continue; + } + q++; + } + } + if (q > candidate_quality) { + candidate_quality = q; + candidate = i; + } + } + + if (file == RC_FILE_PRESUB) { + candidate = RC_PAIR_PRESUB_SRC; + } else if (candidate < 0 || (rgb && rgb_used > 2) + || (alpha && alpha_used > 2)) { + return -1; + } + + /* candidate >= 0 */ + + if (rgb) { + pair->RGB.Src[candidate].Used = 1; + pair->RGB.Src[candidate].File = file; + pair->RGB.Src[candidate].Index = index; + if (candidate == RC_PAIR_PRESUB_SRC) { + /* For registers with the RC_FILE_PRESUB file, + * the index stores the presubtract op. */ + int src_regs = rc_presubtract_src_reg_count(index); + for(i = 0; i < src_regs; i++) { + pair->RGB.Src[i].Used = 1; + } + } + } + if (alpha) { + pair->Alpha.Src[candidate].Used = 1; + pair->Alpha.Src[candidate].File = file; + pair->Alpha.Src[candidate].Index = index; + if (candidate == RC_PAIR_PRESUB_SRC) { + /* For registers with the RC_FILE_PRESUB file, + * the index stores the presubtract op. */ + int src_regs = rc_presubtract_src_reg_count(index); + for(i=0; i < src_regs; i++) { + pair->Alpha.Src[i].Used = 1; + } + } + } + + return candidate; +} + +static void pair_foreach_source_callback( + struct rc_pair_instruction * pair, + void * data, + rc_pair_foreach_src_fn cb, + unsigned int swz, + unsigned int src) +{ + /* swz > 3 means that the swizzle is either not used, or a constant + * swizzle (e.g. 0, 1, 0.5). */ + if(swz > 3) + return; + + if(swz == RC_SWIZZLE_W) { + if (src == RC_PAIR_PRESUB_SRC) { + unsigned int i; + unsigned int src_count = rc_presubtract_src_reg_count( + pair->Alpha.Src[RC_PAIR_PRESUB_SRC].Index); + for(i = 0; i < src_count; i++) { + cb(data, &pair->Alpha.Src[i]); + } + } else { + cb(data, &pair->Alpha.Src[src]); + } + } else { + if (src == RC_PAIR_PRESUB_SRC) { + unsigned int i; + unsigned int src_count = rc_presubtract_src_reg_count( + pair->RGB.Src[RC_PAIR_PRESUB_SRC].Index); + for(i = 0; i < src_count; i++) { + cb(data, &pair->RGB.Src[i]); + } + } + else { + cb(data, &pair->RGB.Src[src]); + } + } +} + +void rc_pair_foreach_source_that_alpha_reads( + struct rc_pair_instruction * pair, + void * data, + rc_pair_foreach_src_fn cb) +{ + unsigned int i; + const struct rc_opcode_info * info = + rc_get_opcode_info(pair->Alpha.Opcode); + for(i = 0; i < info->NumSrcRegs; i++) { + pair_foreach_source_callback(pair, data, cb, + GET_SWZ(pair->Alpha.Arg[i].Swizzle, 0), + pair->Alpha.Arg[i].Source); + } +} + +void rc_pair_foreach_source_that_rgb_reads( + struct rc_pair_instruction * pair, + void * data, + rc_pair_foreach_src_fn cb) +{ + unsigned int i; + const struct rc_opcode_info * info = + rc_get_opcode_info(pair->RGB.Opcode); + for(i = 0; i < info->NumSrcRegs; i++) { + unsigned int chan; + unsigned int swz = RC_SWIZZLE_UNUSED; + /* Find a swizzle that is either X,Y,Z,or W. We assume here + * that if one channel swizzles X,Y, or Z, then none of the + * other channels swizzle W, and vice-versa. */ + for(chan = 0; chan < 4; chan++) { + swz = GET_SWZ(pair->RGB.Arg[i].Swizzle, chan); + if(swz == RC_SWIZZLE_X || swz == RC_SWIZZLE_Y + || swz == RC_SWIZZLE_Z || swz == RC_SWIZZLE_W) + continue; + } + pair_foreach_source_callback(pair, data, cb, + swz, + pair->RGB.Arg[i].Source); + } +} + +struct rc_pair_instruction_source * rc_pair_get_src( + struct rc_pair_instruction * pair_inst, + struct rc_pair_instruction_arg * arg) +{ + unsigned int type; + + type = rc_source_type_swz(arg->Swizzle); + + if (type & RC_SOURCE_RGB) { + return &pair_inst->RGB.Src[arg->Source]; + } else if (type & RC_SOURCE_ALPHA) { + return &pair_inst->Alpha.Src[arg->Source]; + } else { + return NULL; + } +} + +int rc_pair_get_src_index( + struct rc_pair_instruction * pair_inst, + struct rc_pair_instruction_source * src) +{ + int i; + for (i = 0; i < 3; i++) { + if (&pair_inst->RGB.Src[i] == src + || &pair_inst->Alpha.Src[i] == src) { + return i; + } + } + return -1; +} diff --git a/src/gallium/drivers/r300/compiler/radeon_program_pair.h b/src/gallium/drivers/r300/compiler/radeon_program_pair.h new file mode 100644 index 00000000000..a957ea9f7a0 --- /dev/null +++ b/src/gallium/drivers/r300/compiler/radeon_program_pair.h @@ -0,0 +1,137 @@ +/* + * Copyright (C) 2008 Nicolai Haehnle. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __RADEON_PROGRAM_PAIR_H_ +#define __RADEON_PROGRAM_PAIR_H_ + +#include "radeon_code.h" +#include "radeon_opcodes.h" +#include "radeon_program_constants.h" + +struct radeon_compiler; + + +/** + * \file + * Represents a paired ALU instruction, as found in R300 and R500 + * fragment programs. + * + * Note that this representation is taking some liberties as far + * as register files are concerned, to allow separate register + * allocation. + * + * Also note that there are some subtleties in that the semantics + * of certain opcodes are implicitly changed in this representation; + * see \ref rc_pair_translate + */ + +/* For rgb and alpha instructions when arg[n].Source = RC_PAIR_PRESUB_SRC, then + * the presubtract value will be used, and + * {RGB,Alpha}.Src[RC_PAIR_PRESUB_SRC].File will be set to RC_FILE_PRESUB. + */ +#define RC_PAIR_PRESUB_SRC 3 + +struct rc_pair_instruction_source { + unsigned int Used:1; + unsigned int File:3; + unsigned int Index:RC_REGISTER_INDEX_BITS; +}; + +struct rc_pair_instruction_arg { + unsigned int Source:2; + unsigned int Swizzle:12; + unsigned int Abs:1; + unsigned int Negate:1; +}; + +struct rc_pair_sub_instruction { + unsigned int Opcode:8; + unsigned int DestIndex:RC_REGISTER_INDEX_BITS; + unsigned int WriteMask:4; + unsigned int Target:2; + unsigned int OutputWriteMask:3; + unsigned int DepthWriteMask:1; + unsigned int Saturate:1; + + struct rc_pair_instruction_source Src[4]; + struct rc_pair_instruction_arg Arg[3]; +}; + +struct rc_pair_instruction { + struct rc_pair_sub_instruction RGB; + struct rc_pair_sub_instruction Alpha; + + unsigned int WriteALUResult:2; + unsigned int ALUResultCompare:3; + unsigned int Nop:1; +}; + +typedef void (*rc_pair_foreach_src_fn) + (void *, struct rc_pair_instruction_source *); + +/** + * General helper functions for dealing with the paired instruction format. + */ +/*@{*/ +int rc_pair_alloc_source(struct rc_pair_instruction *pair, + unsigned int rgb, unsigned int alpha, + rc_register_file file, unsigned int index); + +void rc_pair_foreach_source_that_alpha_reads( + struct rc_pair_instruction * pair, + void * data, + rc_pair_foreach_src_fn cb); + +void rc_pair_foreach_source_that_rgb_reads( + struct rc_pair_instruction * pair, + void * data, + rc_pair_foreach_src_fn cb); + +struct rc_pair_instruction_source * rc_pair_get_src( + struct rc_pair_instruction * pair_inst, + struct rc_pair_instruction_arg * arg); + +int rc_pair_get_src_index( + struct rc_pair_instruction * pair_inst, + struct rc_pair_instruction_source * src); +/*@}*/ + + +/** + * Compiler passes that operate with the paired format. + */ +/*@{*/ +struct radeon_pair_handler; + +void rc_pair_translate(struct radeon_compiler *cc, void *user); +void rc_pair_schedule(struct radeon_compiler *cc, void *user); +void rc_pair_regalloc(struct radeon_compiler *cc, void *user); +void rc_pair_regalloc_inputs_only(struct radeon_compiler *cc, void *user); +void rc_pair_remove_dead_sources(struct radeon_compiler *c, void *user); +/*@}*/ + +#endif /* __RADEON_PROGRAM_PAIR_H_ */ diff --git a/src/gallium/drivers/r300/compiler/radeon_program_print.c b/src/gallium/drivers/r300/compiler/radeon_program_print.c new file mode 100644 index 00000000000..390d1319460 --- /dev/null +++ b/src/gallium/drivers/r300/compiler/radeon_program_print.c @@ -0,0 +1,418 @@ +/* + * Copyright 2009 Nicolai Hähnle <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. */ + +#include "radeon_program.h" + +#include <stdio.h> + +static const char * textarget_to_string(rc_texture_target target) +{ + switch(target) { + case RC_TEXTURE_2D_ARRAY: return "2D_ARRAY"; + case RC_TEXTURE_1D_ARRAY: return "1D_ARRAY"; + case RC_TEXTURE_CUBE: return "CUBE"; + case RC_TEXTURE_3D: return "3D"; + case RC_TEXTURE_RECT: return "RECT"; + case RC_TEXTURE_2D: return "2D"; + case RC_TEXTURE_1D: return "1D"; + default: return "BAD_TEXTURE_TARGET"; + } +} + +static const char * presubtract_op_to_string(rc_presubtract_op op) +{ + switch(op) { + case RC_PRESUB_NONE: + return "NONE"; + case RC_PRESUB_BIAS: + return "(1 - 2 * src0)"; + case RC_PRESUB_SUB: + return "(src1 - src0)"; + case RC_PRESUB_ADD: + return "(src1 + src0)"; + case RC_PRESUB_INV: + return "(1 - src0)"; + default: + return "BAD_PRESUBTRACT_OP"; + } +} + +static void rc_print_comparefunc(FILE * f, const char * lhs, rc_compare_func func, const char * rhs) +{ + if (func == RC_COMPARE_FUNC_NEVER) { + fprintf(f, "false"); + } else if (func == RC_COMPARE_FUNC_ALWAYS) { + fprintf(f, "true"); + } else { + const char * op; + switch(func) { + case RC_COMPARE_FUNC_LESS: op = "<"; break; + case RC_COMPARE_FUNC_EQUAL: op = "=="; break; + case RC_COMPARE_FUNC_LEQUAL: op = "<="; break; + case RC_COMPARE_FUNC_GREATER: op = ">"; break; + case RC_COMPARE_FUNC_NOTEQUAL: op = "!="; break; + case RC_COMPARE_FUNC_GEQUAL: op = ">="; break; + default: op = "???"; break; + } + fprintf(f, "%s %s %s", lhs, op, rhs); + } +} + +static void rc_print_register(FILE * f, rc_register_file file, int index, unsigned int reladdr) +{ + if (file == RC_FILE_NONE) { + fprintf(f, "none"); + } else if (file == RC_FILE_SPECIAL) { + switch(index) { + case RC_SPECIAL_ALU_RESULT: fprintf(f, "aluresult"); break; + default: fprintf(f, "special[%i]", index); break; + } + } else { + const char * filename; + switch(file) { + case RC_FILE_TEMPORARY: filename = "temp"; break; + case RC_FILE_INPUT: filename = "input"; break; + case RC_FILE_OUTPUT: filename = "output"; break; + case RC_FILE_ADDRESS: filename = "addr"; break; + case RC_FILE_CONSTANT: filename = "const"; break; + default: filename = "BAD FILE"; break; + } + fprintf(f, "%s[%i%s]", filename, index, reladdr ? " + addr[0]" : ""); + } +} + +static void rc_print_mask(FILE * f, unsigned int mask) +{ + if (mask & RC_MASK_X) fprintf(f, "x"); + if (mask & RC_MASK_Y) fprintf(f, "y"); + if (mask & RC_MASK_Z) fprintf(f, "z"); + if (mask & RC_MASK_W) fprintf(f, "w"); +} + +static void rc_print_dst_register(FILE * f, struct rc_dst_register dst) +{ + rc_print_register(f, dst.File, dst.Index, 0); + if (dst.WriteMask != RC_MASK_XYZW) { + fprintf(f, "."); + rc_print_mask(f, dst.WriteMask); + } +} + +static char rc_swizzle_char(unsigned int swz) +{ + switch(swz) { + case RC_SWIZZLE_X: return 'x'; + case RC_SWIZZLE_Y: return 'y'; + case RC_SWIZZLE_Z: return 'z'; + case RC_SWIZZLE_W: return 'w'; + case RC_SWIZZLE_ZERO: return '0'; + case RC_SWIZZLE_ONE: return '1'; + case RC_SWIZZLE_HALF: return 'H'; + case RC_SWIZZLE_UNUSED: return '_'; + } + fprintf(stderr, "bad swz: %u\n", swz); + return '?'; +} + +static void rc_print_swizzle(FILE * f, unsigned int swizzle, unsigned int negate) +{ + unsigned int comp; + for(comp = 0; comp < 4; ++comp) { + rc_swizzle swz = GET_SWZ(swizzle, comp); + if (GET_BIT(negate, comp)) + fprintf(f, "-"); + fprintf(f, "%c", rc_swizzle_char(swz)); + } +} + +static void rc_print_presub_instruction(FILE * f, + struct rc_presub_instruction inst) +{ + fprintf(f,"("); + switch(inst.Opcode){ + case RC_PRESUB_BIAS: + fprintf(f, "1 - 2 * "); + rc_print_register(f, inst.SrcReg[0].File, + inst.SrcReg[0].Index,inst.SrcReg[0].RelAddr); + break; + case RC_PRESUB_SUB: + rc_print_register(f, inst.SrcReg[1].File, + inst.SrcReg[1].Index,inst.SrcReg[1].RelAddr); + fprintf(f, " - "); + rc_print_register(f, inst.SrcReg[0].File, + inst.SrcReg[0].Index,inst.SrcReg[0].RelAddr); + break; + case RC_PRESUB_ADD: + rc_print_register(f, inst.SrcReg[1].File, + inst.SrcReg[1].Index,inst.SrcReg[1].RelAddr); + fprintf(f, " + "); + rc_print_register(f, inst.SrcReg[0].File, + inst.SrcReg[0].Index,inst.SrcReg[0].RelAddr); + break; + case RC_PRESUB_INV: + fprintf(f, "1 - "); + rc_print_register(f, inst.SrcReg[0].File, + inst.SrcReg[0].Index,inst.SrcReg[0].RelAddr); + break; + default: + break; + } + fprintf(f, ")"); +} + +static void rc_print_src_register(FILE * f, struct rc_instruction * inst, + struct rc_src_register src) +{ + int trivial_negate = (src.Negate == RC_MASK_NONE || src.Negate == RC_MASK_XYZW); + + if (src.Negate == RC_MASK_XYZW) + fprintf(f, "-"); + if (src.Abs) + fprintf(f, "|"); + + if(src.File == RC_FILE_PRESUB) + rc_print_presub_instruction(f, inst->U.I.PreSub); + else + rc_print_register(f, src.File, src.Index, src.RelAddr); + + if (src.Abs && !trivial_negate) + fprintf(f, "|"); + + if (src.Swizzle != RC_SWIZZLE_XYZW || !trivial_negate) { + fprintf(f, "."); + rc_print_swizzle(f, src.Swizzle, trivial_negate ? 0 : src.Negate); + } + + if (src.Abs && trivial_negate) + fprintf(f, "|"); +} + +static unsigned update_branch_depth(rc_opcode opcode, unsigned *branch_depth) +{ + switch (opcode) { + case RC_OPCODE_IF: + case RC_OPCODE_BGNLOOP: + return (*branch_depth)++ * 2; + + case RC_OPCODE_ENDIF: + case RC_OPCODE_ENDLOOP: + assert(*branch_depth > 0); + return --(*branch_depth) * 2; + + case RC_OPCODE_ELSE: + assert(*branch_depth > 0); + return (*branch_depth - 1) * 2; + + default: + return *branch_depth * 2; + } +} + +static void rc_print_normal_instruction(FILE * f, struct rc_instruction * inst, unsigned *branch_depth) +{ + const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); + unsigned int reg; + unsigned spaces = update_branch_depth(inst->U.I.Opcode, branch_depth); + + for (unsigned i = 0; i < spaces; i++) + fprintf(f, " "); + + fprintf(f, "%s", opcode->Name); + + switch(inst->U.I.SaturateMode) { + case RC_SATURATE_NONE: break; + case RC_SATURATE_ZERO_ONE: fprintf(f, "_SAT"); break; + case RC_SATURATE_MINUS_PLUS_ONE: fprintf(f, "_SAT2"); break; + default: fprintf(f, "_BAD_SAT"); break; + } + + if (opcode->HasDstReg) { + fprintf(f, " "); + rc_print_dst_register(f, inst->U.I.DstReg); + if (opcode->NumSrcRegs) + fprintf(f, ","); + } + + for(reg = 0; reg < opcode->NumSrcRegs; ++reg) { + if (reg > 0) + fprintf(f, ","); + fprintf(f, " "); + rc_print_src_register(f, inst, inst->U.I.SrcReg[reg]); + } + + if (opcode->HasTexture) { + fprintf(f, ", %s%s[%u]", + textarget_to_string(inst->U.I.TexSrcTarget), + inst->U.I.TexShadow ? "SHADOW" : "", + inst->U.I.TexSrcUnit); + } + + fprintf(f, ";"); + + if (inst->U.I.WriteALUResult) { + fprintf(f, " [aluresult = ("); + rc_print_comparefunc(f, + (inst->U.I.WriteALUResult == RC_ALURESULT_X) ? "x" : "w", + inst->U.I.ALUResultCompare, "0"); + fprintf(f, ")]"); + } + + fprintf(f, "\n"); +} + +static void rc_print_pair_instruction(FILE * f, struct rc_instruction * fullinst, unsigned *branch_depth) +{ + struct rc_pair_instruction * inst = &fullinst->U.P; + int printedsrc = 0; + unsigned spaces = update_branch_depth(inst->RGB.Opcode != RC_OPCODE_NOP ? + inst->RGB.Opcode : inst->Alpha.Opcode, branch_depth); + + for (unsigned i = 0; i < spaces; i++) + fprintf(f, " "); + + for(unsigned int src = 0; src < 3; ++src) { + if (inst->RGB.Src[src].Used) { + if (printedsrc) + fprintf(f, ", "); + fprintf(f, "src%i.xyz = ", src); + rc_print_register(f, inst->RGB.Src[src].File, inst->RGB.Src[src].Index, 0); + printedsrc = 1; + } + if (inst->Alpha.Src[src].Used) { + if (printedsrc) + fprintf(f, ", "); + fprintf(f, "src%i.w = ", src); + rc_print_register(f, inst->Alpha.Src[src].File, inst->Alpha.Src[src].Index, 0); + printedsrc = 1; + } + } + if(inst->RGB.Src[RC_PAIR_PRESUB_SRC].Used) { + fprintf(f, ", srcp.xyz = %s", + presubtract_op_to_string( + inst->RGB.Src[RC_PAIR_PRESUB_SRC].Index)); + } + if(inst->Alpha.Src[RC_PAIR_PRESUB_SRC].Used) { + fprintf(f, ", srcp.w = %s", + presubtract_op_to_string( + inst->Alpha.Src[RC_PAIR_PRESUB_SRC].Index)); + } + fprintf(f, "\n"); + + if (inst->RGB.Opcode != RC_OPCODE_NOP) { + const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->RGB.Opcode); + + for (unsigned i = 0; i < spaces; i++) + fprintf(f, " "); + + fprintf(f, " %s%s", opcode->Name, inst->RGB.Saturate ? "_SAT" : ""); + if (inst->RGB.WriteMask) + fprintf(f, " temp[%i].%s%s%s", inst->RGB.DestIndex, + (inst->RGB.WriteMask & 1) ? "x" : "", + (inst->RGB.WriteMask & 2) ? "y" : "", + (inst->RGB.WriteMask & 4) ? "z" : ""); + if (inst->RGB.OutputWriteMask) + fprintf(f, " color[%i].%s%s%s", inst->RGB.Target, + (inst->RGB.OutputWriteMask & 1) ? "x" : "", + (inst->RGB.OutputWriteMask & 2) ? "y" : "", + (inst->RGB.OutputWriteMask & 4) ? "z" : ""); + if (inst->WriteALUResult == RC_ALURESULT_X) + fprintf(f, " aluresult"); + + for(unsigned int arg = 0; arg < opcode->NumSrcRegs; ++arg) { + const char* abs = inst->RGB.Arg[arg].Abs ? "|" : ""; + const char* neg = inst->RGB.Arg[arg].Negate ? "-" : ""; + fprintf(f, ", %s%ssrc", neg, abs); + if(inst->RGB.Arg[arg].Source == RC_PAIR_PRESUB_SRC) + fprintf(f,"p"); + else + fprintf(f,"%d", inst->RGB.Arg[arg].Source); + fprintf(f,".%c%c%c%s", + rc_swizzle_char(GET_SWZ(inst->RGB.Arg[arg].Swizzle, 0)), + rc_swizzle_char(GET_SWZ(inst->RGB.Arg[arg].Swizzle, 1)), + rc_swizzle_char(GET_SWZ(inst->RGB.Arg[arg].Swizzle, 2)), + abs); + } + fprintf(f, "\n"); + } + + if (inst->Alpha.Opcode != RC_OPCODE_NOP) { + const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->Alpha.Opcode); + + for (unsigned i = 0; i < spaces; i++) + fprintf(f, " "); + + fprintf(f, " %s%s", opcode->Name, inst->Alpha.Saturate ? "_SAT" : ""); + if (inst->Alpha.WriteMask) + fprintf(f, " temp[%i].w", inst->Alpha.DestIndex); + if (inst->Alpha.OutputWriteMask) + fprintf(f, " color[%i].w", inst->Alpha.Target); + if (inst->Alpha.DepthWriteMask) + fprintf(f, " depth.w"); + if (inst->WriteALUResult == RC_ALURESULT_W) + fprintf(f, " aluresult"); + + for(unsigned int arg = 0; arg < opcode->NumSrcRegs; ++arg) { + const char* abs = inst->Alpha.Arg[arg].Abs ? "|" : ""; + const char* neg = inst->Alpha.Arg[arg].Negate ? "-" : ""; + fprintf(f, ", %s%ssrc", neg, abs); + if(inst->Alpha.Arg[arg].Source == RC_PAIR_PRESUB_SRC) + fprintf(f,"p"); + else + fprintf(f,"%d", inst->Alpha.Arg[arg].Source); + fprintf(f,".%c%s", + rc_swizzle_char(GET_SWZ(inst->Alpha.Arg[arg].Swizzle, 0)), abs); + } + fprintf(f, "\n"); + } + + if (inst->WriteALUResult) { + for (unsigned i = 0; i < spaces; i++) + fprintf(f, " "); + + fprintf(f, " [aluresult = ("); + rc_print_comparefunc(f, "result", inst->ALUResultCompare, "0"); + fprintf(f, ")]\n"); + } +} + +/** + * Print program to stderr, default options. + */ +void rc_print_program(const struct rc_program *prog) +{ + unsigned int linenum = 0; + unsigned branch_depth = 0; + struct rc_instruction *inst; + + fprintf(stderr, "# Radeon Compiler Program\n"); + + for(inst = prog->Instructions.Next; inst != &prog->Instructions; inst = inst->Next) { + fprintf(stderr, "%3d: ", linenum); + + if (inst->Type == RC_INSTRUCTION_PAIR) + rc_print_pair_instruction(stderr, inst, &branch_depth); + else + rc_print_normal_instruction(stderr, inst, &branch_depth); + + linenum++; + } +} diff --git a/src/gallium/drivers/r300/compiler/radeon_program_tex.c b/src/gallium/drivers/r300/compiler/radeon_program_tex.c new file mode 100644 index 00000000000..8d16b2cf9ec --- /dev/null +++ b/src/gallium/drivers/r300/compiler/radeon_program_tex.c @@ -0,0 +1,528 @@ +/* + * Copyright (C) 2010 Corbin Simpson + * Copyright (C) 2010 Marek Olšák <[email protected]> + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "radeon_program_tex.h" + +#include "radeon_compiler_util.h" + +/* Series of transformations to be done on textures. */ + +static struct rc_src_register shadow_fail_value(struct r300_fragment_program_compiler *compiler, + int tmu) +{ + struct rc_src_register reg = { 0, }; + + if (compiler->enable_shadow_ambient) { + reg.File = RC_FILE_CONSTANT; + reg.Index = rc_constants_add_state(&compiler->Base.Program.Constants, + RC_STATE_SHADOW_AMBIENT, tmu); + reg.Swizzle = RC_SWIZZLE_WWWW; + } else { + reg.File = RC_FILE_NONE; + reg.Swizzle = RC_SWIZZLE_0000; + } + + reg.Swizzle = combine_swizzles(reg.Swizzle, + compiler->state.unit[tmu].texture_swizzle); + return reg; +} + +static struct rc_src_register shadow_pass_value(struct r300_fragment_program_compiler *compiler, + int tmu) +{ + struct rc_src_register reg = { 0, }; + + reg.File = RC_FILE_NONE; + reg.Swizzle = combine_swizzles(RC_SWIZZLE_1111, + compiler->state.unit[tmu].texture_swizzle); + return reg; +} + +static void scale_texcoords(struct r300_fragment_program_compiler *compiler, + struct rc_instruction *inst, + unsigned state_constant) +{ + struct rc_instruction *inst_mov; + + unsigned temp = rc_find_free_temporary(&compiler->Base); + + inst_mov = rc_insert_new_instruction(&compiler->Base, inst->Prev); + + inst_mov->U.I.Opcode = RC_OPCODE_MUL; + inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY; + inst_mov->U.I.DstReg.Index = temp; + inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[0]; + inst_mov->U.I.SrcReg[1].File = RC_FILE_CONSTANT; + inst_mov->U.I.SrcReg[1].Index = + rc_constants_add_state(&compiler->Base.Program.Constants, + state_constant, inst->U.I.TexSrcUnit); + + reset_srcreg(&inst->U.I.SrcReg[0]); + inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; + inst->U.I.SrcReg[0].Index = temp; +} + +static void projective_divide(struct r300_fragment_program_compiler *compiler, + struct rc_instruction *inst) +{ + struct rc_instruction *inst_mul, *inst_rcp; + + unsigned temp = rc_find_free_temporary(&compiler->Base); + + inst_rcp = rc_insert_new_instruction(&compiler->Base, inst->Prev); + inst_rcp->U.I.Opcode = RC_OPCODE_RCP; + inst_rcp->U.I.DstReg.File = RC_FILE_TEMPORARY; + inst_rcp->U.I.DstReg.Index = temp; + inst_rcp->U.I.DstReg.WriteMask = RC_MASK_W; + inst_rcp->U.I.SrcReg[0] = inst->U.I.SrcReg[0]; + /* Because the input can be arbitrarily swizzled, + * read the component mapped to W. */ + inst_rcp->U.I.SrcReg[0].Swizzle = + RC_MAKE_SWIZZLE_SMEAR(GET_SWZ(inst->U.I.SrcReg[0].Swizzle, 3)); + + inst_mul = rc_insert_new_instruction(&compiler->Base, inst->Prev); + inst_mul->U.I.Opcode = RC_OPCODE_MUL; + inst_mul->U.I.DstReg.File = RC_FILE_TEMPORARY; + inst_mul->U.I.DstReg.Index = temp; + inst_mul->U.I.SrcReg[0] = inst->U.I.SrcReg[0]; + inst_mul->U.I.SrcReg[1].File = RC_FILE_TEMPORARY; + inst_mul->U.I.SrcReg[1].Index = temp; + inst_mul->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_WWWW; + + reset_srcreg(&inst->U.I.SrcReg[0]); + inst->U.I.Opcode = RC_OPCODE_TEX; + inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; + inst->U.I.SrcReg[0].Index = temp; +} + +/** + * Transform TEX, TXP, TXB, and KIL instructions in the following ways: + * - implement texture compare (shadow extensions) + * - extract non-native source / destination operands + * - premultiply texture coordinates for RECT + * - extract operand swizzles + * - introduce a temporary register when write masks are needed + */ +int radeonTransformTEX( + struct radeon_compiler * c, + struct rc_instruction * inst, + void* data) +{ + struct r300_fragment_program_compiler *compiler = + (struct r300_fragment_program_compiler*)data; + rc_wrap_mode wrapmode = compiler->state.unit[inst->U.I.TexSrcUnit].wrap_mode; + int is_rect = inst->U.I.TexSrcTarget == RC_TEXTURE_RECT || + compiler->state.unit[inst->U.I.TexSrcUnit].non_normalized_coords; + + if (inst->U.I.Opcode != RC_OPCODE_TEX && + inst->U.I.Opcode != RC_OPCODE_TXB && + inst->U.I.Opcode != RC_OPCODE_TXP && + inst->U.I.Opcode != RC_OPCODE_TXD && + inst->U.I.Opcode != RC_OPCODE_TXL && + inst->U.I.Opcode != RC_OPCODE_KIL) + return 0; + + /* ARB_shadow & EXT_shadow_funcs */ + if (inst->U.I.Opcode != RC_OPCODE_KIL && + ((c->Program.ShadowSamplers & (1 << inst->U.I.TexSrcUnit)) || + (compiler->state.unit[inst->U.I.TexSrcUnit].compare_mode_enabled))) { + rc_compare_func comparefunc = compiler->state.unit[inst->U.I.TexSrcUnit].texture_compare_func; + + if (comparefunc == RC_COMPARE_FUNC_NEVER || comparefunc == RC_COMPARE_FUNC_ALWAYS) { + inst->U.I.Opcode = RC_OPCODE_MOV; + + if (comparefunc == RC_COMPARE_FUNC_ALWAYS) { + inst->U.I.SrcReg[0] = shadow_pass_value(compiler, inst->U.I.TexSrcUnit); + } else { + inst->U.I.SrcReg[0] = shadow_fail_value(compiler, inst->U.I.TexSrcUnit); + } + + return 1; + } else { + struct rc_instruction * inst_rcp = NULL; + struct rc_instruction *inst_mul, *inst_add, *inst_cmp; + unsigned tmp_texsample; + unsigned tmp_sum; + int pass, fail; + + /* Save the output register. */ + struct rc_dst_register output_reg = inst->U.I.DstReg; + unsigned saturate_mode = inst->U.I.SaturateMode; + + /* Redirect TEX to a new temp. */ + tmp_texsample = rc_find_free_temporary(c); + inst->U.I.SaturateMode = 0; + inst->U.I.DstReg.File = RC_FILE_TEMPORARY; + inst->U.I.DstReg.Index = tmp_texsample; + inst->U.I.DstReg.WriteMask = RC_MASK_XYZW; + + tmp_sum = rc_find_free_temporary(c); + + if (inst->U.I.Opcode == RC_OPCODE_TXP) { + /* Compute 1/W. */ + inst_rcp = rc_insert_new_instruction(c, inst); + inst_rcp->U.I.Opcode = RC_OPCODE_RCP; + inst_rcp->U.I.DstReg.File = RC_FILE_TEMPORARY; + inst_rcp->U.I.DstReg.Index = tmp_sum; + inst_rcp->U.I.DstReg.WriteMask = RC_MASK_W; + inst_rcp->U.I.SrcReg[0] = inst->U.I.SrcReg[0]; + inst_rcp->U.I.SrcReg[0].Swizzle = + RC_MAKE_SWIZZLE_SMEAR(GET_SWZ(inst->U.I.SrcReg[0].Swizzle, 3)); + } + + /* Divide Z by W (if it's TXP) and saturate. */ + inst_mul = rc_insert_new_instruction(c, inst_rcp ? inst_rcp : inst); + inst_mul->U.I.Opcode = inst->U.I.Opcode == RC_OPCODE_TXP ? RC_OPCODE_MUL : RC_OPCODE_MOV; + inst_mul->U.I.DstReg.File = RC_FILE_TEMPORARY; + inst_mul->U.I.DstReg.Index = tmp_sum; + inst_mul->U.I.DstReg.WriteMask = RC_MASK_W; + inst_mul->U.I.SaturateMode = RC_SATURATE_ZERO_ONE; + inst_mul->U.I.SrcReg[0] = inst->U.I.SrcReg[0]; + inst_mul->U.I.SrcReg[0].Swizzle = + RC_MAKE_SWIZZLE_SMEAR(GET_SWZ(inst->U.I.SrcReg[0].Swizzle, 2)); + if (inst->U.I.Opcode == RC_OPCODE_TXP) { + inst_mul->U.I.SrcReg[1].File = RC_FILE_TEMPORARY; + inst_mul->U.I.SrcReg[1].Index = tmp_sum; + inst_mul->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_WWWW; + } + + /* Add the depth texture value. */ + inst_add = rc_insert_new_instruction(c, inst_mul); + inst_add->U.I.Opcode = RC_OPCODE_ADD; + inst_add->U.I.DstReg.File = RC_FILE_TEMPORARY; + inst_add->U.I.DstReg.Index = tmp_sum; + inst_add->U.I.DstReg.WriteMask = RC_MASK_W; + inst_add->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; + inst_add->U.I.SrcReg[0].Index = tmp_sum; + inst_add->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_WWWW; + inst_add->U.I.SrcReg[1].File = RC_FILE_TEMPORARY; + inst_add->U.I.SrcReg[1].Index = tmp_texsample; + inst_add->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_XXXX; + + /* Note that SrcReg[0] is r, SrcReg[1] is tex and: + * LESS: r < tex <=> -tex+r < 0 + * GEQUAL: r >= tex <=> not (-tex+r < 0) + * GREATER: r > tex <=> tex-r < 0 + * LEQUAL: r <= tex <=> not ( tex-r < 0) + * EQUAL: GEQUAL + * NOTEQUAL:LESS + */ + + /* This negates either r or tex: */ + if (comparefunc == RC_COMPARE_FUNC_LESS || comparefunc == RC_COMPARE_FUNC_GEQUAL || + comparefunc == RC_COMPARE_FUNC_EQUAL || comparefunc == RC_COMPARE_FUNC_NOTEQUAL) + inst_add->U.I.SrcReg[1].Negate = inst_add->U.I.SrcReg[1].Negate ^ RC_MASK_XYZW; + else + inst_add->U.I.SrcReg[0].Negate = inst_add->U.I.SrcReg[0].Negate ^ RC_MASK_XYZW; + + /* This negates the whole expresion: */ + if (comparefunc == RC_COMPARE_FUNC_LESS || comparefunc == RC_COMPARE_FUNC_GREATER || + comparefunc == RC_COMPARE_FUNC_NOTEQUAL) { + pass = 1; + fail = 2; + } else { + pass = 2; + fail = 1; + } + + inst_cmp = rc_insert_new_instruction(c, inst_add); + inst_cmp->U.I.Opcode = RC_OPCODE_CMP; + inst_cmp->U.I.SaturateMode = saturate_mode; + inst_cmp->U.I.DstReg = output_reg; + inst_cmp->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; + inst_cmp->U.I.SrcReg[0].Index = tmp_sum; + inst_cmp->U.I.SrcReg[0].Swizzle = + combine_swizzles(RC_SWIZZLE_WWWW, + compiler->state.unit[inst->U.I.TexSrcUnit].texture_swizzle); + inst_cmp->U.I.SrcReg[pass] = shadow_pass_value(compiler, inst->U.I.TexSrcUnit); + inst_cmp->U.I.SrcReg[fail] = shadow_fail_value(compiler, inst->U.I.TexSrcUnit); + + assert(tmp_texsample != tmp_sum); + } + } + + /* R300 cannot sample from rectangles and the wrap mode fallback needs + * normalized coordinates anyway. */ + if (inst->U.I.Opcode != RC_OPCODE_KIL && + is_rect && (!c->is_r500 || wrapmode != RC_WRAP_NONE)) { + scale_texcoords(compiler, inst, RC_STATE_R300_TEXRECT_FACTOR); + inst->U.I.TexSrcTarget = RC_TEXTURE_2D; + } + + /* Divide by W if needed. */ + if (inst->U.I.Opcode == RC_OPCODE_TXP && + (wrapmode == RC_WRAP_REPEAT || wrapmode == RC_WRAP_MIRRORED_REPEAT || + compiler->state.unit[inst->U.I.TexSrcUnit].clamp_and_scale_before_fetch)) { + projective_divide(compiler, inst); + } + + /* Texture wrap modes don't work on NPOT textures. + * + * Non-wrapped/clamped texcoords with NPOT are free in HW. Repeat and + * mirroring are not. If we need to repeat, we do: + * + * MUL temp, texcoord, <scaling factor constant> + * FRC temp, temp ; Discard integer portion of coords + * + * This gives us coords in [0, 1]. + * + * Mirroring is trickier. We're going to start out like repeat: + * + * MUL temp, texcoord, <scaling factor constant> ; De-mirror across axes + * MUL temp, temp, 0.5 ; Pattern repeats in [0, 2] + * ; so scale to [0, 1] + * FRC temp, temp ; Make the pattern repeat + * MAD temp, temp, 2, -1 ; Move the pattern to [-1, 1] + * ADD temp, 1, -abs(temp) ; Now comes a neat trick: use abs to mirror the pattern. + * ; The pattern is backwards, so reverse it (1-x). + * + * This gives us coords in [0, 1]. + * + * ~ C & M. ;) + */ + if (inst->U.I.Opcode != RC_OPCODE_KIL && + wrapmode != RC_WRAP_NONE) { + struct rc_instruction *inst_mov; + unsigned temp = rc_find_free_temporary(c); + + if (wrapmode == RC_WRAP_REPEAT) { + /* Both instructions will be paired up. */ + struct rc_instruction *inst_frc = rc_insert_new_instruction(c, inst->Prev); + + inst_frc->U.I.Opcode = RC_OPCODE_FRC; + inst_frc->U.I.DstReg.File = RC_FILE_TEMPORARY; + inst_frc->U.I.DstReg.Index = temp; + inst_frc->U.I.DstReg.WriteMask = RC_MASK_XYZ; + inst_frc->U.I.SrcReg[0] = inst->U.I.SrcReg[0]; + } else if (wrapmode == RC_WRAP_MIRRORED_REPEAT) { + /* + * Function: + * f(v) = 1 - abs(frac(v * 0.5) * 2 - 1) + * + * Code: + * MUL temp, src0, 0.5 + * FRC temp, temp + * MAD temp, temp, 2, -1 + * ADD temp, 1, -abs(temp) + */ + + struct rc_instruction *inst_mul, *inst_frc, *inst_mad, *inst_add; + unsigned two, two_swizzle; + + inst_mul = rc_insert_new_instruction(c, inst->Prev); + + inst_mul->U.I.Opcode = RC_OPCODE_MUL; + inst_mul->U.I.DstReg.File = RC_FILE_TEMPORARY; + inst_mul->U.I.DstReg.Index = temp; + inst_mul->U.I.DstReg.WriteMask = RC_MASK_XYZ; + inst_mul->U.I.SrcReg[0] = inst->U.I.SrcReg[0]; + inst_mul->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_HHHH; + + inst_frc = rc_insert_new_instruction(c, inst->Prev); + + inst_frc->U.I.Opcode = RC_OPCODE_FRC; + inst_frc->U.I.DstReg.File = RC_FILE_TEMPORARY; + inst_frc->U.I.DstReg.Index = temp; + inst_frc->U.I.DstReg.WriteMask = RC_MASK_XYZ; + inst_frc->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; + inst_frc->U.I.SrcReg[0].Index = temp; + inst_frc->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZ0; + + two = rc_constants_add_immediate_scalar(&c->Program.Constants, 2, &two_swizzle); + inst_mad = rc_insert_new_instruction(c, inst->Prev); + + inst_mad->U.I.Opcode = RC_OPCODE_MAD; + inst_mad->U.I.DstReg.File = RC_FILE_TEMPORARY; + inst_mad->U.I.DstReg.Index = temp; + inst_mad->U.I.DstReg.WriteMask = RC_MASK_XYZ; + inst_mad->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; + inst_mad->U.I.SrcReg[0].Index = temp; + inst_mad->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZ0; + inst_mad->U.I.SrcReg[1].File = RC_FILE_CONSTANT; + inst_mad->U.I.SrcReg[1].Index = two; + inst_mad->U.I.SrcReg[1].Swizzle = two_swizzle; + inst_mad->U.I.SrcReg[2].Swizzle = RC_SWIZZLE_1111; + inst_mad->U.I.SrcReg[2].Negate = RC_MASK_XYZ; + + inst_add = rc_insert_new_instruction(c, inst->Prev); + + inst_add->U.I.Opcode = RC_OPCODE_ADD; + inst_add->U.I.DstReg.File = RC_FILE_TEMPORARY; + inst_add->U.I.DstReg.Index = temp; + inst_add->U.I.DstReg.WriteMask = RC_MASK_XYZ; + inst_add->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_1111; + inst_add->U.I.SrcReg[1].File = RC_FILE_TEMPORARY; + inst_add->U.I.SrcReg[1].Index = temp; + inst_add->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_XYZ0; + inst_add->U.I.SrcReg[1].Abs = 1; + inst_add->U.I.SrcReg[1].Negate = RC_MASK_XYZ; + } else if (wrapmode == RC_WRAP_MIRRORED_CLAMP) { + /* + * Mirrored clamp modes are bloody simple, we just use abs + * to mirror [0, 1] into [-1, 0]. This works for + * all modes i.e. CLAMP, CLAMP_TO_EDGE, and CLAMP_TO_BORDER. + */ + struct rc_instruction *inst_mov; + + inst_mov = rc_insert_new_instruction(c, inst->Prev); + + inst_mov->U.I.Opcode = RC_OPCODE_MOV; + inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY; + inst_mov->U.I.DstReg.Index = temp; + inst_mov->U.I.DstReg.WriteMask = RC_MASK_XYZ; + inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[0]; + inst_mov->U.I.SrcReg[0].Abs = 1; + } + + /* Preserve W for TXP/TXB. */ + inst_mov = rc_insert_new_instruction(c, inst->Prev); + + inst_mov->U.I.Opcode = RC_OPCODE_MOV; + inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY; + inst_mov->U.I.DstReg.Index = temp; + inst_mov->U.I.DstReg.WriteMask = RC_MASK_W; + inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[0]; + + reset_srcreg(&inst->U.I.SrcReg[0]); + inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; + inst->U.I.SrcReg[0].Index = temp; + } + + /* NPOT -> POT conversion for 3D textures. */ + if (inst->U.I.Opcode != RC_OPCODE_KIL && + compiler->state.unit[inst->U.I.TexSrcUnit].clamp_and_scale_before_fetch) { + struct rc_instruction *inst_mov; + unsigned temp = rc_find_free_temporary(c); + + /* Saturate XYZ. */ + inst_mov = rc_insert_new_instruction(c, inst->Prev); + inst_mov->U.I.Opcode = RC_OPCODE_MOV; + inst_mov->U.I.SaturateMode = RC_SATURATE_ZERO_ONE; + inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY; + inst_mov->U.I.DstReg.Index = temp; + inst_mov->U.I.DstReg.WriteMask = RC_MASK_XYZ; + inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[0]; + + /* Copy W. */ + inst_mov = rc_insert_new_instruction(c, inst->Prev); + inst_mov->U.I.Opcode = RC_OPCODE_MOV; + inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY; + inst_mov->U.I.DstReg.Index = temp; + inst_mov->U.I.DstReg.WriteMask = RC_MASK_W; + inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[0]; + + reset_srcreg(&inst->U.I.SrcReg[0]); + inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; + inst->U.I.SrcReg[0].Index = temp; + + scale_texcoords(compiler, inst, RC_STATE_R300_TEXSCALE_FACTOR); + } + + /* Convert SNORM-encoded ATI1N sampled as UNORM to SNORM. + * Formula: dst = tex > 0.5 ? tex*2-2 : tex*2 + */ + if (inst->U.I.Opcode != RC_OPCODE_KIL && + compiler->state.unit[inst->U.I.TexSrcUnit].convert_unorm_to_snorm) { + unsigned two, two_swizzle; + struct rc_instruction *inst_mul, *inst_mad, *inst_cnd; + + two = rc_constants_add_immediate_scalar(&c->Program.Constants, 2.35, &two_swizzle); + + inst_mul = rc_insert_new_instruction(c, inst); + inst_mul->U.I.Opcode = RC_OPCODE_MUL; + inst_mul->U.I.DstReg.File = RC_FILE_TEMPORARY; + inst_mul->U.I.DstReg.Index = rc_find_free_temporary(c); + inst_mul->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; + inst_mul->U.I.SrcReg[0].Index = rc_find_free_temporary(c); /* redirected TEX output */ + inst_mul->U.I.SrcReg[1].File = RC_FILE_CONSTANT; /* 2 */ + inst_mul->U.I.SrcReg[1].Index = two; + inst_mul->U.I.SrcReg[1].Swizzle = two_swizzle; + + inst_mad = rc_insert_new_instruction(c, inst_mul); + inst_mad->U.I.Opcode = RC_OPCODE_MAD; + inst_mad->U.I.DstReg.File = RC_FILE_TEMPORARY; + inst_mad->U.I.DstReg.Index = rc_find_free_temporary(c); + inst_mad->U.I.SrcReg[0] = inst_mul->U.I.SrcReg[0]; /* redirected TEX output */ + inst_mad->U.I.SrcReg[1] = inst_mul->U.I.SrcReg[1]; /* 2 */ + inst_mad->U.I.SrcReg[2] = inst_mul->U.I.SrcReg[1]; /* 2 */ + inst_mad->U.I.SrcReg[2].Negate = RC_MASK_XYZW; + + inst_cnd = rc_insert_new_instruction(c, inst_mad); + inst_cnd->U.I.Opcode = RC_OPCODE_CND; + inst_cnd->U.I.SaturateMode = inst->U.I.SaturateMode; + inst_cnd->U.I.DstReg = inst->U.I.DstReg; + inst_cnd->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; + inst_cnd->U.I.SrcReg[0].Index = inst_mad->U.I.DstReg.Index; + inst_cnd->U.I.SrcReg[0].Swizzle = compiler->state.unit[inst->U.I.TexSrcUnit].texture_swizzle; + inst_cnd->U.I.SrcReg[1].File = RC_FILE_TEMPORARY; + inst_cnd->U.I.SrcReg[1].Index = inst_mul->U.I.DstReg.Index; + inst_cnd->U.I.SrcReg[1].Swizzle = compiler->state.unit[inst->U.I.TexSrcUnit].texture_swizzle; + inst_cnd->U.I.SrcReg[2] = inst_mul->U.I.SrcReg[0]; /* redirected TEX output */ + + inst->U.I.SaturateMode = 0; + inst->U.I.DstReg.File = RC_FILE_TEMPORARY; + inst->U.I.DstReg.Index = inst_mul->U.I.SrcReg[0].Index; + inst->U.I.DstReg.WriteMask = RC_MASK_XYZW; + } + + /* Cannot write texture to output registers or with saturate (all chips), + * or with masks (non-r500). */ + if (inst->U.I.Opcode != RC_OPCODE_KIL && + (inst->U.I.DstReg.File != RC_FILE_TEMPORARY || + inst->U.I.SaturateMode || + (!c->is_r500 && inst->U.I.DstReg.WriteMask != RC_MASK_XYZW))) { + struct rc_instruction * inst_mov = rc_insert_new_instruction(c, inst); + + inst_mov->U.I.Opcode = RC_OPCODE_MOV; + inst_mov->U.I.SaturateMode = inst->U.I.SaturateMode; + inst_mov->U.I.DstReg = inst->U.I.DstReg; + inst_mov->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; + inst_mov->U.I.SrcReg[0].Index = rc_find_free_temporary(c); + + inst->U.I.SaturateMode = 0; + inst->U.I.DstReg.File = RC_FILE_TEMPORARY; + inst->U.I.DstReg.Index = inst_mov->U.I.SrcReg[0].Index; + inst->U.I.DstReg.WriteMask = RC_MASK_XYZW; + } + + /* Cannot read texture coordinate from constants file */ + if (inst->U.I.SrcReg[0].File != RC_FILE_TEMPORARY && inst->U.I.SrcReg[0].File != RC_FILE_INPUT) { + struct rc_instruction * inst_mov = rc_insert_new_instruction(c, inst->Prev); + + inst_mov->U.I.Opcode = RC_OPCODE_MOV; + inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY; + inst_mov->U.I.DstReg.Index = rc_find_free_temporary(c); + inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[0]; + + reset_srcreg(&inst->U.I.SrcReg[0]); + inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; + inst->U.I.SrcReg[0].Index = inst_mov->U.I.DstReg.Index; + } + + return 1; +} diff --git a/src/gallium/drivers/r300/compiler/radeon_program_tex.h b/src/gallium/drivers/r300/compiler/radeon_program_tex.h new file mode 100644 index 00000000000..a0105051ac4 --- /dev/null +++ b/src/gallium/drivers/r300/compiler/radeon_program_tex.h @@ -0,0 +1,39 @@ +/* + * Copyright (C) 2010 Corbin Simpson + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __RADEON_PROGRAM_TEX_H_ +#define __RADEON_PROGRAM_TEX_H_ + +#include "radeon_compiler.h" +#include "radeon_program.h" + +int radeonTransformTEX( + struct radeon_compiler * c, + struct rc_instruction * inst, + void* data); + +#endif /* __RADEON_PROGRAM_TEX_H_ */ diff --git a/src/gallium/drivers/r300/compiler/radeon_remove_constants.c b/src/gallium/drivers/r300/compiler/radeon_remove_constants.c new file mode 100644 index 00000000000..7d76585a593 --- /dev/null +++ b/src/gallium/drivers/r300/compiler/radeon_remove_constants.c @@ -0,0 +1,150 @@ +/* + * Copyright (C) 2010 Marek Olšák <[email protected]> + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "radeon_remove_constants.h" +#include "radeon_dataflow.h" + +struct mark_used_data { + unsigned char * const_used; + unsigned * has_rel_addr; +}; + +static void remap_regs(void * userdata, struct rc_instruction * inst, + rc_register_file * pfile, unsigned int * pindex) +{ + unsigned *inv_remap_table = userdata; + + if (*pfile == RC_FILE_CONSTANT) { + *pindex = inv_remap_table[*pindex]; + } +} + +static void mark_used(void * userdata, struct rc_instruction * inst, + struct rc_src_register * src) +{ + struct mark_used_data * d = userdata; + + if (src->File == RC_FILE_CONSTANT) { + if (src->RelAddr) { + *d->has_rel_addr = 1; + } else { + d->const_used[src->Index] = 1; + } + } +} + +void rc_remove_unused_constants(struct radeon_compiler *c, void *user) +{ + unsigned **out_remap_table = (unsigned**)user; + unsigned char *const_used; + unsigned *remap_table; + unsigned *inv_remap_table; + unsigned has_rel_addr = 0; + unsigned is_identity = 1; + unsigned are_externals_remapped = 0; + struct rc_constant *constants = c->Program.Constants.Constants; + struct mark_used_data d; + unsigned new_count; + + if (!c->Program.Constants.Count) { + *out_remap_table = NULL; + return; + } + + const_used = malloc(c->Program.Constants.Count); + memset(const_used, 0, c->Program.Constants.Count); + + d.const_used = const_used; + d.has_rel_addr = &has_rel_addr; + + /* Pass 1: Mark used constants. */ + for (struct rc_instruction *inst = c->Program.Instructions.Next; + inst != &c->Program.Instructions; inst = inst->Next) { + rc_for_all_reads_src(inst, mark_used, &d); + } + + /* Pass 2: If there is relative addressing or dead constant elimination + * is disabled, mark all externals as used. */ + if (has_rel_addr || !c->remove_unused_constants) { + for (unsigned i = 0; i < c->Program.Constants.Count; i++) + if (constants[i].Type == RC_CONSTANT_EXTERNAL) + const_used[i] = 1; + } + + /* Pass 3: Make the remapping table and remap constants. + * This pass removes unused constants simply by overwriting them by other constants. */ + remap_table = malloc(c->Program.Constants.Count * sizeof(unsigned)); + inv_remap_table = malloc(c->Program.Constants.Count * sizeof(unsigned)); + new_count = 0; + + for (unsigned i = 0; i < c->Program.Constants.Count; i++) { + if (const_used[i]) { + remap_table[new_count] = i; + inv_remap_table[i] = new_count; + + if (i != new_count) { + if (constants[i].Type == RC_CONSTANT_EXTERNAL) + are_externals_remapped = 1; + + constants[new_count] = constants[i]; + is_identity = 0; + } + new_count++; + } + } + + /* is_identity ==> new_count == old_count + * !is_identity ==> new_count < old_count */ + assert( is_identity || new_count < c->Program.Constants.Count); + assert(!((has_rel_addr || !c->remove_unused_constants) && are_externals_remapped)); + + /* Pass 4: Redirect reads of all constants to their new locations. */ + if (!is_identity) { + for (struct rc_instruction *inst = c->Program.Instructions.Next; + inst != &c->Program.Instructions; inst = inst->Next) { + rc_remap_registers(inst, remap_regs, inv_remap_table); + } + } + + /* Set the new constant count. Note that new_count may be less than + * Count even though the remapping function is identity. In that case, + * the constants have been removed at the end of the array. */ + c->Program.Constants.Count = new_count; + + if (are_externals_remapped) { + *out_remap_table = remap_table; + } else { + *out_remap_table = NULL; + free(remap_table); + } + + free(const_used); + free(inv_remap_table); + + if (c->Debug & RC_DBG_LOG) + rc_constants_print(&c->Program.Constants); +} diff --git a/src/gallium/drivers/r300/compiler/radeon_remove_constants.h b/src/gallium/drivers/r300/compiler/radeon_remove_constants.h new file mode 100644 index 00000000000..f29113b922b --- /dev/null +++ b/src/gallium/drivers/r300/compiler/radeon_remove_constants.h @@ -0,0 +1,35 @@ +/* + * Copyright (C) 2010 Marek Olšák <[email protected]> + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef RADEON_REMOVE_CONSTANTS_H +#define RADEON_REMOVE_CONSTANTS_H + +#include "radeon_compiler.h" + +void rc_remove_unused_constants(struct radeon_compiler *c, void *user); + +#endif diff --git a/src/gallium/drivers/r300/compiler/radeon_rename_regs.c b/src/gallium/drivers/r300/compiler/radeon_rename_regs.c new file mode 100644 index 00000000000..cafa0579734 --- /dev/null +++ b/src/gallium/drivers/r300/compiler/radeon_rename_regs.c @@ -0,0 +1,92 @@ +/* + * Copyright 2010 Tom Stellard <[email protected]> + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +/** + * \file + */ + +#include "radeon_rename_regs.h" + +#include "radeon_compiler.h" +#include "radeon_dataflow.h" +#include "radeon_program.h" + +/** + * This function renames registers in an attempt to get the code close to + * SSA form. After this function has completed, most of the register are only + * written to one time, with a few exceptions. + * + * This function assumes all the instructions are still of type + * RC_INSTRUCTION_NORMAL. + */ +void rc_rename_regs(struct radeon_compiler *c, void *user) +{ + unsigned int i, used_length; + int new_index; + struct rc_instruction * inst; + struct rc_reader_data reader_data; + unsigned char * used; + + /* XXX Remove this once the register allocation works with flow control. */ + for(inst = c->Program.Instructions.Next; + inst != &c->Program.Instructions; + inst = inst->Next) { + if (inst->U.I.Opcode == RC_OPCODE_BGNLOOP) + return; + } + + used_length = 2 * rc_recompute_ips(c); + used = memory_pool_malloc(&c->Pool, sizeof(unsigned char) * used_length); + memset(used, 0, sizeof(unsigned char) * used_length); + + rc_get_used_temporaries(c, used, used_length); + for(inst = c->Program.Instructions.Next; + inst != &c->Program.Instructions; + inst = inst->Next) { + + if (inst->U.I.DstReg.File != RC_FILE_TEMPORARY) + continue; + + reader_data.ExitOnAbort = 1; + rc_get_readers(c, inst, &reader_data, NULL, NULL, NULL); + + if (reader_data.Abort || reader_data.ReaderCount == 0) + continue; + + new_index = rc_find_free_temporary_list(c, used, used_length, + RC_MASK_XYZW); + if (new_index < 0) { + rc_error(c, "Ran out of temporary registers\n"); + return; + } + + reader_data.Writer->U.I.DstReg.Index = new_index; + for(i = 0; i < reader_data.ReaderCount; i++) { + reader_data.Readers[i].U.I.Src->Index = new_index; + } + } +} diff --git a/src/gallium/drivers/r300/compiler/radeon_rename_regs.h b/src/gallium/drivers/r300/compiler/radeon_rename_regs.h new file mode 100644 index 00000000000..3baf29f6120 --- /dev/null +++ b/src/gallium/drivers/r300/compiler/radeon_rename_regs.h @@ -0,0 +1,9 @@ + +#ifndef RADEON_RENAME_REGS_H +#define RADEON_RENAME_REGS_H + +struct radeon_compiler; + +void rc_rename_regs(struct radeon_compiler *c, void *user); + +#endif /* RADEON_RENAME_REGS_H */ diff --git a/src/gallium/drivers/r300/compiler/radeon_swizzle.h b/src/gallium/drivers/r300/compiler/radeon_swizzle.h new file mode 100644 index 00000000000..c81d5f7a5e9 --- /dev/null +++ b/src/gallium/drivers/r300/compiler/radeon_swizzle.h @@ -0,0 +1,57 @@ +/* + * Copyright (C) 2009 Nicolai Haehnle. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef RADEON_SWIZZLE_H +#define RADEON_SWIZZLE_H + +#include "radeon_program.h" + +struct rc_swizzle_split { + unsigned char NumPhases; + unsigned char Phase[4]; +}; + +/** + * Describe the swizzling capability of target hardware. + */ +struct rc_swizzle_caps { + /** + * Check whether the given swizzle, absolute and negate combination + * can be implemented natively by the hardware for this opcode. + * + * \return 1 if the swizzle is native for the given opcode + */ + int (*IsNative)(rc_opcode opcode, struct rc_src_register reg); + + /** + * Determine how to split access to the masked channels of the + * given source register to obtain ALU-native swizzles. + */ + void (*Split)(struct rc_src_register reg, unsigned int mask, struct rc_swizzle_split * split); +}; + +#endif /* RADEON_SWIZZLE_H */ diff --git a/src/gallium/drivers/r300/compiler/radeon_variable.c b/src/gallium/drivers/r300/compiler/radeon_variable.c new file mode 100644 index 00000000000..938fb8421f2 --- /dev/null +++ b/src/gallium/drivers/r300/compiler/radeon_variable.c @@ -0,0 +1,517 @@ +/* + * Copyright 2011 Tom Stellard <[email protected]> + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "radeon_variable.h" + +#include "memory_pool.h" +#include "radeon_compiler_util.h" +#include "radeon_dataflow.h" +#include "radeon_list.h" +#include "radeon_opcodes.h" +#include "radeon_program.h" + +/** + * Rewrite the index and writemask for the destination register of var + * and its friends to new_index and new_writemask. This function also takes + * care of rewriting the swizzles for the sources of var. + */ +void rc_variable_change_dst( + struct rc_variable * var, + unsigned int new_index, + unsigned int new_writemask) +{ + struct rc_variable * var_ptr; + struct rc_list * readers; + unsigned int old_mask = rc_variable_writemask_sum(var); + unsigned int conversion_swizzle = + rc_make_conversion_swizzle(old_mask, new_writemask); + + for (var_ptr = var; var_ptr; var_ptr = var_ptr->Friend) { + if (var_ptr->Inst->Type == RC_INSTRUCTION_NORMAL) { + rc_normal_rewrite_writemask(var_ptr->Inst, + conversion_swizzle); + var_ptr->Inst->U.I.DstReg.Index = new_index; + } else { + struct rc_pair_sub_instruction * sub; + if (var_ptr->Dst.WriteMask == RC_MASK_W) { + assert(new_writemask & RC_MASK_W); + sub = &var_ptr->Inst->U.P.Alpha; + } else { + sub = &var_ptr->Inst->U.P.RGB; + rc_pair_rewrite_writemask(sub, + conversion_swizzle); + } + sub->DestIndex = new_index; + } + } + + readers = rc_variable_readers_union(var); + + for ( ; readers; readers = readers->Next) { + struct rc_reader * reader = readers->Item; + if (reader->Inst->Type == RC_INSTRUCTION_NORMAL) { + reader->U.I.Src->Index = new_index; + reader->U.I.Src->Swizzle = rc_rewrite_swizzle( + reader->U.I.Src->Swizzle, conversion_swizzle); + } else { + struct rc_pair_instruction * pair_inst = + &reader->Inst->U.P; + unsigned int src_type = rc_source_type_swz( + reader->U.P.Arg->Swizzle); + + int src_index = reader->U.P.Arg->Source; + if (src_index == RC_PAIR_PRESUB_SRC) { + src_index = rc_pair_get_src_index( + pair_inst, reader->U.P.Src); + } + /* Try to delete the old src, it is OK if this fails, + * because rc_pair_alloc_source might be able to + * find a source the ca be reused. + */ + if (rc_pair_remove_src(reader->Inst, src_type, + src_index, old_mask)) { + /* Reuse the source index of the source that + * was just deleted and set its register + * index. We can't use rc_pair_alloc_source + * for this becuase it might return a source + * index that is already being used. */ + if (src_type & RC_SOURCE_RGB) { + pair_inst->RGB.Src[src_index] + .Used = 1; + pair_inst->RGB.Src[src_index] + .Index = new_index; + pair_inst->RGB.Src[src_index] + .File = RC_FILE_TEMPORARY; + } + if (src_type & RC_SOURCE_ALPHA) { + pair_inst->Alpha.Src[src_index] + .Used = 1; + pair_inst->Alpha.Src[src_index] + .Index = new_index; + pair_inst->Alpha.Src[src_index] + .File = RC_FILE_TEMPORARY; + } + } else { + src_index = rc_pair_alloc_source( + &reader->Inst->U.P, + src_type & RC_SOURCE_RGB, + src_type & RC_SOURCE_ALPHA, + RC_FILE_TEMPORARY, + new_index); + if (src_index < 0) { + rc_error(var->C, "Rewrite of inst %u failed " + "Can't allocate source for " + "Inst %u src_type=%x " + "new_index=%u new_mask=%u\n", + var->Inst->IP, reader->Inst->IP, src_type, new_index, new_writemask); + continue; + } + } + reader->U.P.Arg->Swizzle = rc_rewrite_swizzle( + reader->U.P.Arg->Swizzle, conversion_swizzle); + if (reader->U.P.Arg->Source != RC_PAIR_PRESUB_SRC) { + reader->U.P.Arg->Source = src_index; + } + } + } +} + +/** + * Compute the live intervals for var and its friends. + */ +void rc_variable_compute_live_intervals(struct rc_variable * var) +{ + while(var) { + unsigned int i; + unsigned int start = var->Inst->IP; + + for (i = 0; i < var->ReaderCount; i++) { + unsigned int chan; + unsigned int chan_start = start; + unsigned int chan_end = var->Readers[i].Inst->IP; + unsigned int mask = var->Readers[i].WriteMask; + struct rc_instruction * inst; + + /* Extend the live interval of T0 to the start of the + * loop for sequences like: + * BGNLOOP + * read T0 + * ... + * write T0 + * ENDLOOP + */ + if (var->Readers[i].Inst->IP < start) { + struct rc_instruction * bgnloop = + rc_match_endloop(var->Readers[i].Inst); + chan_start = bgnloop->IP; + } + + /* Extend the live interval of T0 to the start of the + * loop in case there is a BRK instruction in the loop + * (we don't actually check for a BRK instruction we + * assume there is one somewhere in the loop, which + * there usually is) for sequences like: + * BGNLOOP + * ... + * conditional BRK + * ... + * write T0 + * ENDLOOP + * read T0 + *************************************************** + * Extend the live interval of T0 to the end of the + * loop for sequences like: + * write T0 + * BGNLOOP + * ... + * read T0 + * ENDLOOP + */ + for (inst = var->Inst; inst != var->Readers[i].Inst; + inst = inst->Next) { + rc_opcode op = rc_get_flow_control_inst(inst); + if (op == RC_OPCODE_ENDLOOP) { + struct rc_instruction * bgnloop = + rc_match_endloop(inst); + if (bgnloop->IP < chan_start) { + chan_start = bgnloop->IP; + } + } else if (op == RC_OPCODE_BGNLOOP) { + struct rc_instruction * endloop = + rc_match_bgnloop(inst); + if (endloop->IP > chan_end) { + chan_end = endloop->IP; + } + } + } + + for (chan = 0; chan < 4; chan++) { + if ((mask >> chan) & 0x1) { + if (!var->Live[chan].Used + || chan_start < var->Live[chan].Start) { + var->Live[chan].Start = + chan_start; + } + if (!var->Live[chan].Used + || chan_end > var->Live[chan].End) { + var->Live[chan].End = chan_end; + } + var->Live[chan].Used = 1; + } + } + } + var = var->Friend; + } +} + +/** + * @return 1 if a and b share a reader + * @return 0 if they do not + */ +static unsigned int readers_intersect( + struct rc_variable * a, + struct rc_variable * b) +{ + unsigned int a_index, b_index; + for (a_index = 0; a_index < a->ReaderCount; a_index++) { + struct rc_reader reader_a = a->Readers[a_index]; + for (b_index = 0; b_index < b->ReaderCount; b_index++) { + struct rc_reader reader_b = b->Readers[b_index]; + if (reader_a.Inst->Type == RC_INSTRUCTION_NORMAL + && reader_b.Inst->Type == RC_INSTRUCTION_NORMAL + && reader_a.U.I.Src == reader_b.U.I.Src) { + + return 1; + } + if (reader_a.Inst->Type == RC_INSTRUCTION_PAIR + && reader_b.Inst->Type == RC_INSTRUCTION_PAIR + && reader_a.U.P.Src == reader_b.U.P.Src) { + + return 1; + } + } + } + return 0; +} + +void rc_variable_add_friend( + struct rc_variable * var, + struct rc_variable * friend) +{ + assert(var->Dst.Index == friend->Dst.Index); + while(var->Friend) { + var = var->Friend; + } + var->Friend = friend; +} + +struct rc_variable * rc_variable( + struct radeon_compiler * c, + unsigned int DstFile, + unsigned int DstIndex, + unsigned int DstWriteMask, + struct rc_reader_data * reader_data) +{ + struct rc_variable * new = + memory_pool_malloc(&c->Pool, sizeof(struct rc_variable)); + memset(new, 0, sizeof(struct rc_variable)); + new->C = c; + new->Dst.File = DstFile; + new->Dst.Index = DstIndex; + new->Dst.WriteMask = DstWriteMask; + if (reader_data) { + new->Inst = reader_data->Writer; + new->ReaderCount = reader_data->ReaderCount; + new->Readers = reader_data->Readers; + } + return new; +} + +static void get_variable_helper( + struct rc_list ** variable_list, + struct rc_variable * variable) +{ + struct rc_list * list_ptr; + for (list_ptr = *variable_list; list_ptr; list_ptr = list_ptr->Next) { + if (readers_intersect(variable, list_ptr->Item)) { + rc_variable_add_friend(list_ptr->Item, variable); + return; + } + } + rc_list_add(variable_list, rc_list(&variable->C->Pool, variable)); +} + +static void get_variable_pair_helper( + struct rc_list ** variable_list, + struct radeon_compiler * c, + struct rc_instruction * inst, + struct rc_pair_sub_instruction * sub_inst) +{ + struct rc_reader_data reader_data; + struct rc_variable * new_var; + rc_register_file file; + unsigned int writemask; + + if (sub_inst->Opcode == RC_OPCODE_NOP) { + return; + } + memset(&reader_data, 0, sizeof(struct rc_reader_data)); + rc_get_readers_sub(c, inst, sub_inst, &reader_data, NULL, NULL, NULL); + + if (reader_data.ReaderCount == 0) { + return; + } + + if (sub_inst->WriteMask) { + file = RC_FILE_TEMPORARY; + writemask = sub_inst->WriteMask; + } else if (sub_inst->OutputWriteMask) { + file = RC_FILE_OUTPUT; + writemask = sub_inst->OutputWriteMask; + } else { + writemask = 0; + file = RC_FILE_NONE; + } + new_var = rc_variable(c, file, sub_inst->DestIndex, writemask, + &reader_data); + get_variable_helper(variable_list, new_var); +} + +/** + * Generate a list of variables used by the shader program. Each instruction + * that writes to a register is considered a variable. The struct rc_variable + * data structure includes a list of readers and is essentially a + * definition-use chain. Any two variables that share a reader are considered + * "friends" and they are linked together via the Friend attribute. + */ +struct rc_list * rc_get_variables(struct radeon_compiler * c) +{ + struct rc_instruction * inst; + struct rc_list * variable_list = NULL; + + for (inst = c->Program.Instructions.Next; + inst != &c->Program.Instructions; + inst = inst->Next) { + struct rc_reader_data reader_data; + struct rc_variable * new_var; + memset(&reader_data, 0, sizeof(reader_data)); + + if (inst->Type == RC_INSTRUCTION_NORMAL) { + rc_get_readers(c, inst, &reader_data, NULL, NULL, NULL); + if (reader_data.ReaderCount == 0) { + continue; + } + new_var = rc_variable(c, inst->U.I.DstReg.File, + inst->U.I.DstReg.Index, + inst->U.I.DstReg.WriteMask, &reader_data); + get_variable_helper(&variable_list, new_var); + } else { + get_variable_pair_helper(&variable_list, c, inst, + &inst->U.P.RGB); + get_variable_pair_helper(&variable_list, c, inst, + &inst->U.P.Alpha); + } + } + + return variable_list; +} + +/** + * @return The bitwise or of the writemasks of a variable and all of its + * friends. + */ +unsigned int rc_variable_writemask_sum(struct rc_variable * var) +{ + unsigned int writemask = 0; + while(var) { + writemask |= var->Dst.WriteMask; + var = var->Friend; + } + return writemask; +} + +/* + * @return A list of readers for a variable and its friends. Readers + * that read from two different variable friends are only included once in + * this list. + */ +struct rc_list * rc_variable_readers_union(struct rc_variable * var) +{ + struct rc_list * list = NULL; + while (var) { + unsigned int i; + for (i = 0; i < var->ReaderCount; i++) { + struct rc_list * temp; + struct rc_reader * a = &var->Readers[i]; + unsigned int match = 0; + for (temp = list; temp; temp = temp->Next) { + struct rc_reader * b = temp->Item; + if (a->Inst->Type != b->Inst->Type) { + continue; + } + if (a->Inst->Type == RC_INSTRUCTION_NORMAL) { + if (a->U.I.Src == b->U.I.Src) { + match = 1; + break; + } + } + if (a->Inst->Type == RC_INSTRUCTION_PAIR) { + if (a->U.P.Arg == b->U.P.Arg + && a->U.P.Src == b->U.P.Src) { + match = 1; + break; + } + } + } + if (match) { + continue; + } + rc_list_add(&list, rc_list(&var->C->Pool, a)); + } + var = var->Friend; + } + return list; +} + +static unsigned int reader_equals_src( + struct rc_reader reader, + unsigned int src_type, + void * src) +{ + if (reader.Inst->Type != src_type) { + return 0; + } + if (src_type == RC_INSTRUCTION_NORMAL) { + return reader.U.I.Src == src; + } else { + return reader.U.P.Src == src; + } +} + +static unsigned int variable_writes_src( + struct rc_variable * var, + unsigned int src_type, + void * src) +{ + unsigned int i; + for (i = 0; i < var->ReaderCount; i++) { + if (reader_equals_src(var->Readers[i], src_type, src)) { + return 1; + } + } + return 0; +} + + +struct rc_list * rc_variable_list_get_writers( + struct rc_list * var_list, + unsigned int src_type, + void * src) +{ + struct rc_list * list_ptr; + struct rc_list * writer_list = NULL; + for (list_ptr = var_list; list_ptr; list_ptr = list_ptr->Next) { + struct rc_variable * var = list_ptr->Item; + if (variable_writes_src(var, src_type, src)) { + struct rc_variable * friend; + rc_list_add(&writer_list, rc_list(&var->C->Pool, var)); + for (friend = var->Friend; friend; + friend = friend->Friend) { + if (variable_writes_src(friend, src_type, src)) { + rc_list_add(&writer_list, + rc_list(&var->C->Pool, friend)); + } + } + /* Once we have indentifed the variable and its + * friends that write this source, we can stop + * stop searching, because we know know of the + * other variables in the list will write this source. + * If they did they would be friends of var. + */ + break; + } + } + return writer_list; +} + +void rc_variable_print(struct rc_variable * var) +{ + unsigned int i; + while (var) { + fprintf(stderr, "%u: TEMP[%u].%u: ", + var->Inst->IP, var->Dst.Index, var->Dst.WriteMask); + for (i = 0; i < 4; i++) { + fprintf(stderr, "chan %u: start=%u end=%u ", i, + var->Live[i].Start, var->Live[i].End); + } + fprintf(stderr, "%u readers\n", var->ReaderCount); + if (var->Friend) { + fprintf(stderr, "Friend: \n\t"); + } + var = var->Friend; + } +} diff --git a/src/gallium/drivers/r300/compiler/radeon_variable.h b/src/gallium/drivers/r300/compiler/radeon_variable.h new file mode 100644 index 00000000000..9427bee18a7 --- /dev/null +++ b/src/gallium/drivers/r300/compiler/radeon_variable.h @@ -0,0 +1,89 @@ +/* + * Copyright 2011 Tom Stellard <[email protected]> + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef RADEON_VARIABLE_H +#define RADEON_VARIABLE_H + +#include "radeon_compiler.h" + +struct radeon_compiler; +struct rc_list; +struct rc_reader_data; +struct rc_readers; + +struct live_intervals { + int Start; + int End; + int Used; +}; + +struct rc_variable { + struct radeon_compiler * C; + struct rc_dst_register Dst; + + struct rc_instruction * Inst; + unsigned int ReaderCount; + struct rc_reader * Readers; + struct live_intervals Live[4]; + + /* A friend is a variable that shares a reader with another variable. + */ + struct rc_variable * Friend; +}; + +void rc_variable_change_dst( + struct rc_variable * var, + unsigned int new_index, + unsigned int new_writemask); + +void rc_variable_compute_live_intervals(struct rc_variable * var); + +void rc_variable_add_friend( + struct rc_variable * var, + struct rc_variable * friend); + +struct rc_variable * rc_variable( + struct radeon_compiler * c, + unsigned int DstFile, + unsigned int DstIndex, + unsigned int DstWriteMask, + struct rc_reader_data * reader_data); + +struct rc_list * rc_get_variables(struct radeon_compiler * c); + +unsigned int rc_variable_writemask_sum(struct rc_variable * var); + +struct rc_list * rc_variable_readers_union(struct rc_variable * var); + +struct rc_list * rc_variable_list_get_writers( + struct rc_list * var_list, + unsigned int src_type, + void * src); + +void rc_variable_print(struct rc_variable * var); + +#endif /* RADEON_VARIABLE_H */ diff --git a/src/gallium/drivers/r300/compiler/tests/.gitignore b/src/gallium/drivers/r300/compiler/tests/.gitignore new file mode 100644 index 00000000000..85672fed777 --- /dev/null +++ b/src/gallium/drivers/r300/compiler/tests/.gitignore @@ -0,0 +1 @@ +radeon_compiler_util_tests diff --git a/src/gallium/drivers/r300/compiler/tests/Makefile b/src/gallium/drivers/r300/compiler/tests/Makefile new file mode 100644 index 00000000000..6eda34a2c00 --- /dev/null +++ b/src/gallium/drivers/r300/compiler/tests/Makefile @@ -0,0 +1,53 @@ +TOP = ../../../../../.. +include $(TOP)/configs/current + +CFLAGS += -Wall -Werror + +### Basic defines ### +TESTS = radeon_compiler_util_tests + +TEST_SOURCES := $(TESTS:=.c) + +SHARED_SOURCES = \ + rc_test_helpers.c \ + unit_test.c + +C_SOURCES = $(SHARED_SOURCES) $(TEST_SOURCES) + +INCLUDES = \ + -I. \ + -I.. + +COMPILER_LIB = ../../libr300.a + +##### TARGETS ##### + +default: depend run_tests + +depend: $(C_SOURCES) + rm -f depend + touch depend + $(MKDEP) $(MKDEP_OPTIONS) $(INCLUDES) $^ 2> /dev/null + +# Remove .o and backup files +clean: + rm -f $(TESTS) depend depend.bak + +$(TESTS): $(TESTS:=.o) $(SHARED_SOURCES:.c=.o) $(COMPILER_LIB) + $(APP_CC) -o $@ $^ + +run_tests: $(TESTS) + @echo "RUNNING TESTS:" + @echo "" + $(foreach test, $^, @./$(test)) + +.PHONY: $(COMPILER_LIB) +$(COMPILER_LIB): + $(MAKE) -C ../.. + +##### RULES ##### +.c.o: + $(CC) -c $(INCLUDES) $(CFLAGS) $(LIBRARY_DEFINES) $< -o $@ + + +sinclude depend diff --git a/src/gallium/drivers/r300/compiler/tests/radeon_compiler_util_tests.c b/src/gallium/drivers/r300/compiler/tests/radeon_compiler_util_tests.c new file mode 100644 index 00000000000..a2e3f2ab2e5 --- /dev/null +++ b/src/gallium/drivers/r300/compiler/tests/radeon_compiler_util_tests.c @@ -0,0 +1,76 @@ +#include <stdlib.h> +#include <string.h> +#include <sys/types.h> + +#include "radeon_compiler_util.h" +#include "radeon_program.h" + +#include "rc_test_helpers.h" +#include "unit_test.h" + +static void test_rc_inst_can_use_presub( + struct test_result * result, + int expected, + const char * add_str, + const char * replace_str) +{ + struct rc_instruction add_inst, replace_inst; + int ret; + + test_begin(result); + init_rc_normal_instruction(&add_inst, add_str); + init_rc_normal_instruction(&replace_inst, replace_str); + + ret = rc_inst_can_use_presub(&replace_inst, RC_PRESUB_ADD, 0, + &replace_inst.U.I.SrcReg[0], + &add_inst.U.I.SrcReg[0], &add_inst.U.I.SrcReg[1]); + + test_check(result, ret == expected); +} + +static void test_runner_rc_inst_can_use_presub(struct test_result * result) +{ + + /* This tests the case where the source being replace has the same + * register file and register index as another source register in the + * CMP instruction. A previous version of this function was ignoring + * all registers that shared the same file and index as the replacement + * register when counting the number of source selects. + * + * https://bugs.freedesktop.org/show_bug.cgi?id=36527 + */ + test_rc_inst_can_use_presub(result, 0, + "ADD temp[0].z, temp[6].__x_, const[1].__x_;", + "CMP temp[0].y, temp[0]._z__, const[0]._z__, temp[0]._y__;"); + + + /* Testing a random case that should fail + * + * https://bugs.freedesktop.org/show_bug.cgi?id=36527 + */ + test_rc_inst_can_use_presub(result, 0, + "ADD temp[3], temp[1], temp[2];", + "MAD temp[1], temp[0], const[0].xxxx, -temp[3];"); + + /* This tests the case where the arguments of the ADD + * instruction share the same register file and index. Normally, we + * would need only one source select for these two arguments, but since + * they will be part of a presubtract operation we need to use the two + * source selects that the presubtract instruction expects + * (src0 and src1). + * + * https://bugs.freedesktop.org/show_bug.cgi?id=36527 + */ + test_rc_inst_can_use_presub(result, 0, + "ADD temp[3].x, temp[0].x___, temp[0].x___;", + "MAD temp[0].xyz, temp[2].xyz_, -temp[3].xxx_, input[5].xyz_;"); +} + +int main(int argc, char ** argv) +{ + struct test tests[] = { + {"rc_inst_can_use_presub()", test_runner_rc_inst_can_use_presub}, + {NULL, NULL} + }; + run_tests(tests); +} diff --git a/src/gallium/drivers/r300/compiler/tests/rc_test_helpers.c b/src/gallium/drivers/r300/compiler/tests/rc_test_helpers.c new file mode 100644 index 00000000000..ca4738af54d --- /dev/null +++ b/src/gallium/drivers/r300/compiler/tests/rc_test_helpers.c @@ -0,0 +1,380 @@ +#include <errno.h> +#include <regex.h> +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include <sys/types.h> + +#include "../radeon_compiler_util.h" +#include "../radeon_opcodes.h" +#include "../radeon_program.h" + +#include "rc_test_helpers.h" + +/* This file contains some helper functions for filling out the rc_instruction + * data structures. These functions take a string as input based on the format + * output by rc_program_print(). + */ + +#define VERBOSE 0 + +#define DBG(...) do { if (VERBOSE) fprintf(stderr, __VA_ARGS__); } while(0) + +#define REGEX_ERR_BUF_SIZE 50 + +struct match_info { + const char * String; + int Length; +}; + +static int match_length(regmatch_t * matches, int index) +{ + return matches[index].rm_eo - matches[index].rm_so; +} + +static int regex_helper( + const char * regex_str, + const char * search_str, + regmatch_t * matches, + int num_matches) +{ + char err_buf[REGEX_ERR_BUF_SIZE]; + regex_t regex; + int err_code; + unsigned int i; + + err_code = regcomp(®ex, regex_str, REG_EXTENDED); + if (err_code) { + regerror(err_code, ®ex, err_buf, REGEX_ERR_BUF_SIZE); + fprintf(stderr, "Failed to compile regex: %s\n", err_buf); + return 0; + } + + err_code = regexec(®ex, search_str, num_matches, matches, 0); + DBG("Search string: '%s'\n", search_str); + for (i = 0; i < num_matches; i++) { + DBG("Match %u start = %d end = %d\n", i, + matches[i].rm_so, matches[i].rm_eo); + } + if (err_code) { + regerror(err_code, ®ex, err_buf, REGEX_ERR_BUF_SIZE); + fprintf(stderr, "Failed to match regex: %s\n", err_buf); + return 0; + } + return 1; +} + +#define REGEX_SRC_MATCHES 6 + +struct src_tokens { + struct match_info Negate; + struct match_info Abs; + struct match_info File; + struct match_info Index; + struct match_info Swizzle; +}; + +/** + * Initialize the source register at index src_index for the instruction based + * on src_str. + * + * NOTE: Warning in init_rc_normal_instruction() applies to this function as + * well. + * + * @param src_str A string that represents the source register. The format for + * this string is the same that is output by rc_program_print. + * @return 1 On success, 0 on failure + */ +int init_rc_normal_src( + struct rc_instruction * inst, + unsigned int src_index, + const char * src_str) +{ + const char * regex_str = "(-*)(\\|*)([[:lower:]]*)\\[([[:digit:]])\\](\\.*[[:lower:]-]*)"; + regmatch_t matches[REGEX_SRC_MATCHES]; + struct src_tokens tokens; + struct rc_src_register * src_reg = &inst->U.I.SrcReg[src_index]; + unsigned int i; + + /* Execute the regex */ + if (!regex_helper(regex_str, src_str, matches, REGEX_SRC_MATCHES)) { + fprintf(stderr, "Failed to execute regex for src register.\n"); + return 0; + } + + /* Create Tokens */ + tokens.Negate.String = src_str + matches[1].rm_so; + tokens.Negate.Length = match_length(matches, 1); + tokens.Abs.String = src_str + matches[2].rm_so; + tokens.Abs.Length = match_length(matches, 2); + tokens.File.String = src_str + matches[3].rm_so; + tokens.File.Length = match_length(matches, 3); + tokens.Index.String = src_str + matches[4].rm_so; + tokens.Index.Length = match_length(matches, 4); + tokens.Swizzle.String = src_str + matches[5].rm_so; + tokens.Swizzle.Length = match_length(matches, 5); + + /* Negate */ + if (tokens.Negate.Length > 0) { + src_reg->Negate = RC_MASK_XYZW; + } + + /* Abs */ + if (tokens.Abs.Length > 0) { + src_reg->Abs = 1; + } + + /* File */ + if (!strncmp(tokens.File.String, "temp", tokens.File.Length)) { + src_reg->File = RC_FILE_TEMPORARY; + } else if (!strncmp(tokens.File.String, "input", tokens.File.Length)) { + src_reg->File = RC_FILE_INPUT; + } else if (!strncmp(tokens.File.String, "const", tokens.File.Length)) { + src_reg->File = RC_FILE_CONSTANT; + } else if (!strncmp(tokens.File.String, "none", tokens.File.Length)) { + src_reg->File = RC_FILE_NONE; + } + + /* Index */ + errno = 0; + src_reg->Index = strtol(tokens.Index.String, NULL, 10); + if (errno > 0) { + fprintf(stderr, "Could not convert src register index.\n"); + return 0; + } + + /* Swizzle */ + if (tokens.Swizzle.Length == 0) { + src_reg->Swizzle = RC_SWIZZLE_XYZW; + } else { + int str_index = 1; + src_reg->Swizzle = RC_MAKE_SWIZZLE_SMEAR(RC_SWIZZLE_UNUSED); + if (tokens.Swizzle.String[0] != '.') { + fprintf(stderr, "First char of swizzle is not valid.\n"); + return 0; + } + for (i = 0; i < 4; i++, str_index++) { + if (tokens.Swizzle.String[str_index] == '-') { + src_reg->Negate |= (1 << i); + str_index++; + } + switch(tokens.Swizzle.String[str_index]) { + case 'x': + SET_SWZ(src_reg->Swizzle, i, RC_SWIZZLE_X); + break; + case 'y': + SET_SWZ(src_reg->Swizzle, i, RC_SWIZZLE_Y); + break; + case 'z': + SET_SWZ(src_reg->Swizzle, i, RC_SWIZZLE_Z); + break; + case 'w': + SET_SWZ(src_reg->Swizzle, i, RC_SWIZZLE_W); + break; + case '1': + SET_SWZ(src_reg->Swizzle, i, RC_SWIZZLE_ONE); + break; + case '0': + SET_SWZ(src_reg->Swizzle, i, RC_SWIZZLE_ZERO); + break; + case 'H': + SET_SWZ(src_reg->Swizzle, i, RC_SWIZZLE_HALF); + break; + case '_': + SET_SWZ(src_reg->Swizzle, i, RC_SWIZZLE_UNUSED); + break; + default: + fprintf(stderr, "Unknown src register swizzle.\n"); + return 0; + } + } + } + DBG("File=%u index=%u swizzle=%x negate=%u abs=%u\n", + src_reg->File, src_reg->Index, src_reg->Swizzle, + src_reg->Negate, src_reg->Abs); + return 1; +} + +#define REGEX_DST_MATCHES 4 + +struct dst_tokens { + struct match_info File; + struct match_info Index; + struct match_info WriteMask; +}; + +/** + * Initialize the destination for the instruction based on dst_str. + * + * NOTE: Warning in init_rc_normal_instruction() applies to this function as + * well. + * + * @param dst_str A string that represents the destination register. The format + * for this string is the same that is output by rc_program_print. + * @return 1 On success, 0 on failure + */ +int init_rc_normal_dst( + struct rc_instruction * inst, + const char * dst_str) +{ + const char * regex_str = "([[:lower:]]*)\\[([[:digit:]]*)\\](\\.*[[:lower:]]*)"; + regmatch_t matches[REGEX_DST_MATCHES]; + struct dst_tokens tokens; + unsigned int i; + + /* Execute the regex */ + if (!regex_helper(regex_str, dst_str, matches, REGEX_DST_MATCHES)) { + fprintf(stderr, "Failed to execute regex for dst register.\n"); + return 0; + } + + /* Create Tokens */ + tokens.File.String = dst_str + matches[1].rm_so; + tokens.File.Length = match_length(matches, 1); + tokens.Index.String = dst_str + matches[2].rm_so; + tokens.Index.Length = match_length(matches, 2); + tokens.WriteMask.String = dst_str + matches[3].rm_so; + tokens.WriteMask.Length = match_length(matches, 3); + + /* File Type */ + if (!strncmp(tokens.File.String, "temp", tokens.File.Length)) { + inst->U.I.DstReg.File = RC_FILE_TEMPORARY; + } else if (!strncmp(tokens.File.String, "output", tokens.File.Length)) { + inst->U.I.DstReg.File = RC_FILE_OUTPUT; + } else { + fprintf(stderr, "Unknown dst register file type.\n"); + return 0; + } + + /* File Index */ + errno = 0; + inst->U.I.DstReg.Index = strtol(tokens.Index.String, NULL, 10); + + if (errno > 0) { + fprintf(stderr, "Could not convert dst register index\n"); + return 0; + } + + /* WriteMask */ + if (tokens.WriteMask.Length == 0) { + inst->U.I.DstReg.WriteMask = RC_MASK_XYZW; + } else { + /* The first character should be '.' */ + if (tokens.WriteMask.String[0] != '.') { + fprintf(stderr, "1st char of writemask is not valid.\n"); + return 0; + } + for (i = 1; i < tokens.WriteMask.Length; i++) { + switch(tokens.WriteMask.String[i]) { + case 'x': + inst->U.I.DstReg.WriteMask |= RC_MASK_X; + break; + case 'y': + inst->U.I.DstReg.WriteMask |= RC_MASK_Y; + break; + case 'z': + inst->U.I.DstReg.WriteMask |= RC_MASK_Z; + break; + case 'w': + inst->U.I.DstReg.WriteMask |= RC_MASK_W; + break; + default: + fprintf(stderr, "Unknown swizzle in writemask.\n"); + return 0; + } + } + } + DBG("Dst Reg File=%u Index=%d Writemask=%d\n", + inst->U.I.DstReg.File, + inst->U.I.DstReg.Index, + inst->U.I.DstReg.WriteMask); + return 1; +} + +#define REGEX_INST_MATCHES 7 + +struct inst_tokens { + struct match_info Opcode; + struct match_info Sat; + struct match_info Dst; + struct match_info Srcs[3]; +}; + +/** + * Initialize a normal instruction based on inst_str. + * + * WARNING: This function might not be able to handle every kind of format that + * rc_program_print() can output. If you are having problems with a + * particular string, you may need to add support for it to this functions. + * + * @param inst_str A string that represents the source register. The format for + * this string is the same that is output by rc_program_print. + * @return 1 On success, 0 on failure + */ +int init_rc_normal_instruction( + struct rc_instruction * inst, + const char * inst_str) +{ + const char * regex_str = "([[:upper:]]+)(_SAT)* ([^,]*)[, ]*([^,]*)[, ]*([^,]*)[, ]*([^;]*)"; + int i; + regmatch_t matches[REGEX_INST_MATCHES]; + struct inst_tokens tokens; + + /* Initialize inst */ + memset(inst, 0, sizeof(struct rc_instruction)); + inst->Type = RC_INSTRUCTION_NORMAL; + + /* Execute the regex */ + if (!regex_helper(regex_str, inst_str, matches, REGEX_INST_MATCHES)) { + return 0; + } + memset(&tokens, 0, sizeof(tokens)); + + /* Create Tokens */ + tokens.Opcode.String = inst_str + matches[1].rm_so; + tokens.Opcode.Length = match_length(matches, 1); + if (matches[2].rm_so > -1) { + tokens.Sat.String = inst_str + matches[2].rm_so; + tokens.Sat.Length = match_length(matches, 2); + } + + + /* Fill out the rest of the instruction. */ + for (i = 0; i < MAX_RC_OPCODE; i++) { + const struct rc_opcode_info * info = rc_get_opcode_info(i); + unsigned int first_src = 3; + unsigned int j; + if (strncmp(tokens.Opcode.String, info->Name, tokens.Opcode.Length)) { + continue; + } + inst->U.I.Opcode = info->Opcode; + if (info->HasDstReg) { + char * dst_str; + tokens.Dst.String = inst_str + matches[3].rm_so; + tokens.Dst.Length = match_length(matches, 3); + first_src++; + + dst_str = malloc(sizeof(char) * (tokens.Dst.Length + 1)); + strncpy(dst_str, tokens.Dst.String, tokens.Dst.Length); + dst_str[tokens.Dst.Length] = '\0'; + init_rc_normal_dst(inst, dst_str); + free(dst_str); + } + for (j = 0; j < info->NumSrcRegs; j++) { + char * src_str; + tokens.Srcs[j].String = + inst_str + matches[first_src + j].rm_so; + tokens.Srcs[j].Length = + match_length(matches, first_src + j); + + src_str = malloc(sizeof(char) * + (tokens.Srcs[j].Length + 1)); + strncpy(src_str, tokens.Srcs[j].String, + tokens.Srcs[j].Length); + src_str[tokens.Srcs[j].Length] = '\0'; + init_rc_normal_src(inst, j, src_str); + } + break; + } + return 1; +} diff --git a/src/gallium/drivers/r300/compiler/tests/rc_test_helpers.h b/src/gallium/drivers/r300/compiler/tests/rc_test_helpers.h new file mode 100644 index 00000000000..1a6bf9699ba --- /dev/null +++ b/src/gallium/drivers/r300/compiler/tests/rc_test_helpers.h @@ -0,0 +1,13 @@ + +int init_rc_normal_src( + struct rc_instruction * inst, + unsigned int src_index, + const char * src_str); + +int init_rc_normal_dst( + struct rc_instruction * inst, + const char * dst_str); + +int init_rc_normal_instruction( + struct rc_instruction * inst, + const char * inst_str); diff --git a/src/gallium/drivers/r300/compiler/tests/unit_test.c b/src/gallium/drivers/r300/compiler/tests/unit_test.c new file mode 100644 index 00000000000..266f3365c58 --- /dev/null +++ b/src/gallium/drivers/r300/compiler/tests/unit_test.c @@ -0,0 +1,35 @@ +#include <stdlib.h> +#include <stdio.h> +#include <string.h> + +#include "unit_test.h" + +void run_tests(struct test tests[]) +{ + int i; + for (i = 0; tests[i].name; i++) { + printf("Test %s\n", tests[i].name); + memset(&tests[i].result, 0, sizeof(tests[i].result)); + tests[i].test_func(&tests[i].result); + printf("Test %s (%d/%d) pass\n", tests[i].name, + tests[i].result.pass, tests[i].result.test_count); + } +} + +void test_begin(struct test_result * result) +{ + result->test_count++; +} + +void test_check(struct test_result * result, int cond) +{ + printf("Subtest %u -> ", result->test_count); + if (cond) { + result->pass++; + printf("Pass"); + } else { + result->fail++; + printf("Fail"); + } + printf("\n"); +} diff --git a/src/gallium/drivers/r300/compiler/tests/unit_test.h b/src/gallium/drivers/r300/compiler/tests/unit_test.h new file mode 100644 index 00000000000..441e8b655a5 --- /dev/null +++ b/src/gallium/drivers/r300/compiler/tests/unit_test.h @@ -0,0 +1,17 @@ + +struct test_result { + unsigned int test_count; + unsigned int pass; + unsigned int fail; +}; + +struct test { + const char * name; + void (*test_func)(struct test_result * result); + struct test_result result; +}; + +void run_tests(struct test tests[]); + +void test_begin(struct test_result * result); +void test_check(struct test_result * result, int cond); diff --git a/src/gallium/drivers/r300/r300_blit.c b/src/gallium/drivers/r300/r300_blit.c index 388ebcdbf32..db97e496e19 100644 --- a/src/gallium/drivers/r300/r300_blit.c +++ b/src/gallium/drivers/r300/r300_blit.c @@ -247,7 +247,7 @@ static void r300_clear(struct pipe_context* pipe, if (!r300->hyperz_enabled) { r300->hyperz_enabled = r300->rws->cs_request_feature(r300->cs, - RADEON_FID_HYPERZ_RAM_ACCESS, + RADEON_FID_R300_HYPERZ_ACCESS, TRUE); if (r300->hyperz_enabled) { /* Need to emit HyperZ buffer regs for the first time. */ diff --git a/src/gallium/drivers/r300/r300_chipset.c b/src/gallium/drivers/r300/r300_chipset.c index 571986c3011..80148b80afb 100644 --- a/src/gallium/drivers/r300/r300_chipset.c +++ b/src/gallium/drivers/r300/r300_chipset.c @@ -31,9 +31,9 @@ * Radeons. */ /* Parse a PCI ID and fill an r300_capabilities struct with information. */ -void r300_parse_chipset(struct r300_capabilities* caps) +void r300_parse_chipset(uint32_t pci_id, struct r300_capabilities* caps) { - switch (caps->pci_id) { + switch (pci_id) { #define CHIPSET(pci_id, name, chipfamily) \ case pci_id: \ caps->family = CHIP_FAMILY_##chipfamily; \ @@ -43,7 +43,7 @@ void r300_parse_chipset(struct r300_capabilities* caps) default: fprintf(stderr, "r300: Warning: Unknown chipset 0x%x\nAborting...", - caps->pci_id); + pci_id); abort(); } diff --git a/src/gallium/drivers/r300/r300_chipset.h b/src/gallium/drivers/r300/r300_chipset.h index 4df6b5b6292..f96cdaf2580 100644 --- a/src/gallium/drivers/r300/r300_chipset.h +++ b/src/gallium/drivers/r300/r300_chipset.h @@ -43,16 +43,10 @@ enum r300_zmask_compression { /* Structure containing all the possible information about a specific Radeon * in the R3xx, R4xx, and R5xx families. */ struct r300_capabilities { - /* PCI ID */ - uint32_t pci_id; /* Chipset family */ int family; /* The number of vertex floating-point units */ unsigned num_vert_fpus; - /* The number of fragment pipes */ - unsigned num_frag_pipes; - /* The number of z pipes */ - unsigned num_z_pipes; /* The number of texture units. */ unsigned num_tex_units; /* Whether or not TCL is physically present */ @@ -121,6 +115,6 @@ enum { CHIP_FAMILY_RV570 }; -void r300_parse_chipset(struct r300_capabilities* caps); +void r300_parse_chipset(uint32_t pci_id, struct r300_capabilities* caps); #endif /* R300_CHIPSET_H */ diff --git a/src/gallium/drivers/r300/r300_context.c b/src/gallium/drivers/r300/r300_context.c index 0554c40eef0..5c222588e47 100644 --- a/src/gallium/drivers/r300/r300_context.c +++ b/src/gallium/drivers/r300/r300_context.c @@ -27,6 +27,8 @@ #include "util/u_simple_list.h" #include "util/u_upload_mgr.h" #include "os/os_time.h" +#include "vl/vl_decoder.h" +#include "vl/vl_video_buffer.h" #include "r300_cb.h" #include "r300_context.h" @@ -97,7 +99,7 @@ static void r300_destroy_context(struct pipe_context* context) struct r300_context* r300 = r300_context(context); if (r300->cs && r300->hyperz_enabled) { - r300->rws->cs_request_feature(r300->cs, RADEON_FID_HYPERZ_RAM_ACCESS, FALSE); + r300->rws->cs_request_feature(r300->cs, RADEON_FID_R300_HYPERZ_ACCESS, FALSE); } if (r300->blitter) @@ -171,7 +173,7 @@ static boolean r300_setup_atoms(struct r300_context* r300) boolean is_rv350 = r300->screen->caps.is_rv350; boolean is_r500 = r300->screen->caps.is_r500; boolean has_tcl = r300->screen->caps.has_tcl; - boolean drm_2_6_0 = r300->rws->get_value(r300->rws, RADEON_VID_DRM_2_6_0); + boolean drm_2_6_0 = r300->screen->info.drm_minor >= 6; /* Create the actual atom list. * @@ -378,7 +380,7 @@ static void r300_init_states(struct pipe_context *pipe) if (r300->screen->caps.is_r500 || (r300->screen->caps.is_rv350 && - r300->rws->get_value(r300->rws, RADEON_VID_DRM_2_6_0))) { + r300->screen->info.drm_minor >= 6)) { OUT_CB_REG(R300_GB_Z_PEQ_CONFIG, 0); } END_CB; @@ -436,6 +438,9 @@ struct pipe_context* r300_create_context(struct pipe_screen* screen, r300_init_query_functions(r300); r300_init_state_functions(r300); r300_init_resource_functions(r300); + + r300->context.create_video_decoder = vl_create_decoder; + r300->context.create_video_buffer = vl_video_buffer_create; r300->vbuf_mgr = u_vbuf_mgr_create(&r300->context, 1024 * 1024, 16, PIPE_BIND_VERTEX_BUFFER | @@ -515,15 +520,15 @@ struct pipe_context* r300_create_context(struct pipe_screen* screen, "r300: DRM version: %d.%d.%d, Name: %s, ID: 0x%04x, GB: %d, Z: %d\n" "r300: GART size: %d MB, VRAM size: %d MB\n" "r300: AA compression RAM: %s, Z compression RAM: %s, HiZ RAM: %s\n", - rws->get_value(rws, RADEON_VID_DRM_MAJOR), - rws->get_value(rws, RADEON_VID_DRM_MINOR), - rws->get_value(rws, RADEON_VID_DRM_PATCHLEVEL), + r300->screen->info.drm_major, + r300->screen->info.drm_minor, + r300->screen->info.drm_patchlevel, screen->get_name(screen), - rws->get_value(rws, RADEON_VID_PCI_ID), - rws->get_value(rws, RADEON_VID_R300_GB_PIPES), - rws->get_value(rws, RADEON_VID_R300_Z_PIPES), - rws->get_value(rws, RADEON_VID_GART_SIZE) >> 20, - rws->get_value(rws, RADEON_VID_VRAM_SIZE) >> 20, + r300->screen->info.pci_id, + r300->screen->info.r300_num_gb_pipes, + r300->screen->info.r300_num_z_pipes, + r300->screen->info.gart_size >> 20, + r300->screen->info.vram_size >> 20, "YES", /* XXX really? */ r300->screen->caps.zmask_ram ? "YES" : "NO", r300->screen->caps.hiz_ram ? "YES" : "NO"); diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c index d214af4cd5b..502aed3a20c 100644 --- a/src/gallium/drivers/r300/r300_emit.c +++ b/src/gallium/drivers/r300/r300_emit.c @@ -574,11 +574,12 @@ static void r300_emit_query_end_frag_pipes(struct r300_context *r300, struct r300_query *query) { struct r300_capabilities* caps = &r300->screen->caps; + uint32_t gb_pipes = r300->screen->info.r300_num_gb_pipes; CS_LOCALS(r300); - assert(caps->num_frag_pipes); + assert(gb_pipes); - BEGIN_CS(6 * caps->num_frag_pipes + 2); + BEGIN_CS(6 * gb_pipes + 2); /* I'm not so sure I like this switch, but it's hard to be elegant * when there's so many special cases... * @@ -587,7 +588,7 @@ static void r300_emit_query_end_frag_pipes(struct r300_context *r300, * 4-byte offset for each pipe. RV380 and older are special; they have * only two pipes, and the second pipe's enable is on bit 3, not bit 1, * so there's a chipset cap for that. */ - switch (caps->num_frag_pipes) { + switch (gb_pipes) { case 4: /* pipe 3 only */ OUT_CS_REG(R300_SU_REG_DEST, 1 << 3); @@ -613,7 +614,7 @@ static void r300_emit_query_end_frag_pipes(struct r300_context *r300, break; default: fprintf(stderr, "r300: Implementation error: Chipset reports %d" - " pixel pipes!\n", caps->num_frag_pipes); + " pixel pipes!\n", gb_pipes); abort(); } @@ -663,7 +664,7 @@ void r300_emit_query_end(struct r300_context* r300) return; if (caps->family == CHIP_FAMILY_RV530) { - if (caps->num_z_pipes == 2) + if (r300->screen->info.r300_num_z_pipes == 2) rv530_emit_query_end_double_z(r300, query); else rv530_emit_query_end_single_z(r300, query); diff --git a/src/gallium/drivers/r300/r300_emit.h b/src/gallium/drivers/r300/r300_emit.h index 6c1c9d2fb13..234e043b071 100644 --- a/src/gallium/drivers/r300/r300_emit.h +++ b/src/gallium/drivers/r300/r300_emit.h @@ -24,7 +24,6 @@ #define R300_EMIT_H #include "r300_context.h" -#include "radeon_code.h" struct rX00_fragment_program_code; struct r300_vertex_program_code; diff --git a/src/gallium/drivers/r300/r300_flush.c b/src/gallium/drivers/r300/r300_flush.c index 34f5419a864..dc596c4122a 100644 --- a/src/gallium/drivers/r300/r300_flush.c +++ b/src/gallium/drivers/r300/r300_flush.c @@ -76,7 +76,6 @@ void r300_flush(struct pipe_context *pipe, /* Create a fence, which is a dummy BO. */ *rfence = r300->rws->buffer_create(r300->rws, 1, 1, PIPE_BIND_VERTEX_BUFFER, - PIPE_USAGE_STATIC, RADEON_DOMAIN_GTT); /* Add the fence as a dummy relocation. */ r300->rws->cs_add_reloc(r300->cs, @@ -121,7 +120,7 @@ void r300_flush(struct pipe_context *pipe, } /* Release HyperZ. */ - r300->rws->cs_request_feature(r300->cs, RADEON_FID_HYPERZ_RAM_ACCESS, + r300->rws->cs_request_feature(r300->cs, RADEON_FID_R300_HYPERZ_ACCESS, FALSE); } r300->num_z_clears = 0; diff --git a/src/gallium/drivers/r300/r300_fs.c b/src/gallium/drivers/r300/r300_fs.c index e3a1bc4a0f4..6f21125f70a 100644 --- a/src/gallium/drivers/r300/r300_fs.c +++ b/src/gallium/drivers/r300/r300_fs.c @@ -38,8 +38,7 @@ #include "r300_texture.h" #include "r300_tgsi_to_rc.h" -#include "radeon_code.h" -#include "radeon_compiler.h" +#include "compiler/radeon_compiler.h" /* Convert info about FS input semantics to r300_shader_semantics. */ void r300_shader_read_fs_inputs(struct tgsi_shader_info* info, @@ -181,9 +180,10 @@ static void get_external_state( v->base.format == PIPE_FORMAT_LATC1_SNORM) { unsigned char swizzle[4]; - util_format_combine_swizzles(swizzle, + util_format_compose_swizzles( util_format_description(v->base.format)->swizzle, - v->swizzle); + v->swizzle, + swizzle); state->unit[i].texture_swizzle = RC_MAKE_SWIZZLE(swizzle[0], swizzle[1], diff --git a/src/gallium/drivers/r300/r300_fs.h b/src/gallium/drivers/r300/r300_fs.h index c86a90b85ae..45c9e8801c3 100644 --- a/src/gallium/drivers/r300/r300_fs.h +++ b/src/gallium/drivers/r300/r300_fs.h @@ -27,7 +27,7 @@ #include "pipe/p_state.h" #include "tgsi/tgsi_scan.h" -#include "radeon_code.h" +#include "compiler/radeon_code.h" #include "r300_shader_semantics.h" struct r300_fragment_shader_code { diff --git a/src/gallium/drivers/r300/r300_query.c b/src/gallium/drivers/r300/r300_query.c index 782f041e926..c0357f9d035 100644 --- a/src/gallium/drivers/r300/r300_query.c +++ b/src/gallium/drivers/r300/r300_query.c @@ -49,16 +49,15 @@ static struct pipe_query *r300_create_query(struct pipe_context *pipe, q->buffer_size = 4096; if (r300screen->caps.family == CHIP_FAMILY_RV530) - q->num_pipes = r300screen->caps.num_z_pipes; + q->num_pipes = r300screen->info.r300_num_z_pipes; else - q->num_pipes = r300screen->caps.num_frag_pipes; + q->num_pipes = r300screen->info.r300_num_gb_pipes; insert_at_tail(&r300->query_list, q); /* Open up the occlusion query buffer. */ q->buf = r300->rws->buffer_create(r300->rws, q->buffer_size, 4096, - PIPE_BIND_CUSTOM, PIPE_USAGE_STREAM, - q->domain); + PIPE_BIND_CUSTOM, q->domain); q->cs_buf = r300->rws->buffer_get_cs_handle(q->buf); return (struct pipe_query*)q; diff --git a/src/gallium/drivers/r300/r300_reg.h b/src/gallium/drivers/r300/r300_reg.h index bb30b1ab0be..5edbb22a743 100644 --- a/src/gallium/drivers/r300/r300_reg.h +++ b/src/gallium/drivers/r300/r300_reg.h @@ -2078,7 +2078,7 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. # define R300_ALU_OUTC_D2A (3 << 23) # define R300_ALU_OUTC_MIN (4 << 23) # define R300_ALU_OUTC_MAX (5 << 23) -# define R300_ALU_OUTC_CMPH (7 << 23) +# define R300_ALU_OUTC_CND (7 << 23) # define R300_ALU_OUTC_CMP (8 << 23) # define R300_ALU_OUTC_FRC (9 << 23) # define R300_ALU_OUTC_REPL_ALPHA (10 << 23) @@ -2944,6 +2944,23 @@ enum { /*\}*/ +#define PVS_OP_DST_OPERAND(opcode, math_inst, macro_inst, reg_index, reg_writemask, reg_class) \ + (((opcode & PVS_DST_OPCODE_MASK) << PVS_DST_OPCODE_SHIFT) \ + | ((math_inst & PVS_DST_MATH_INST_MASK) << PVS_DST_MATH_INST_SHIFT) \ + | ((macro_inst & PVS_DST_MACRO_INST_MASK) << PVS_DST_MACRO_INST_SHIFT) \ + | ((reg_index & PVS_DST_OFFSET_MASK) << PVS_DST_OFFSET_SHIFT) \ + | ((reg_writemask & 0xf) << PVS_DST_WE_X_SHIFT) /* X Y Z W */ \ + | ((reg_class & PVS_DST_REG_TYPE_MASK) << PVS_DST_REG_TYPE_SHIFT)) + +#define PVS_SRC_OPERAND(in_reg_index, comp_x, comp_y, comp_z, comp_w, reg_class, negate) \ + (((in_reg_index & PVS_SRC_OFFSET_MASK) << PVS_SRC_OFFSET_SHIFT) \ + | ((comp_x & PVS_SRC_SWIZZLE_X_MASK) << PVS_SRC_SWIZZLE_X_SHIFT) \ + | ((comp_y & PVS_SRC_SWIZZLE_Y_MASK) << PVS_SRC_SWIZZLE_Y_SHIFT) \ + | ((comp_z & PVS_SRC_SWIZZLE_Z_MASK) << PVS_SRC_SWIZZLE_Z_SHIFT) \ + | ((comp_w & PVS_SRC_SWIZZLE_W_MASK) << PVS_SRC_SWIZZLE_W_SHIFT) \ + | ((negate & 0xf) << PVS_SRC_MODIFIER_X_SHIFT) /* X Y Z W */ \ + | ((reg_class & PVS_SRC_REG_TYPE_MASK) << PVS_SRC_REG_TYPE_SHIFT)) + /* BEGIN: Packet 3 commands */ /* A primitive emission dword. */ @@ -3249,6 +3266,8 @@ enum { # define R500_INST_RGB_CLAMP (1 << 19) # define R500_INST_ALPHA_CLAMP (1 << 20) # define R500_INST_ALU_RESULT_SEL (1 << 21) +# define R500_INST_ALU_RESULT_SEL_RED (0 << 21) +# define R500_INST_ALU_RESULT_SEL_ALPHA (1 << 21) # define R500_INST_ALPHA_PRED_INV (1 << 22) # define R500_INST_ALU_RESULT_OP_EQ (0 << 23) # define R500_INST_ALU_RESULT_OP_LT (1 << 23) diff --git a/src/gallium/drivers/r300/r300_render.c b/src/gallium/drivers/r300/r300_render.c index d9399d78ef9..b31141a518e 100644 --- a/src/gallium/drivers/r300/r300_render.c +++ b/src/gallium/drivers/r300/r300_render.c @@ -175,8 +175,8 @@ static void r300_split_index_bias(struct r300_context *r300, int index_bias, enum r300_prepare_flags { PREP_EMIT_STATES = (1 << 0), /* call emit_dirty_state and friends? */ PREP_VALIDATE_VBOS = (1 << 1), /* validate VBOs? */ - PREP_EMIT_AOS = (1 << 2), /* call emit_vertex_arrays? */ - PREP_EMIT_AOS_SWTCL = (1 << 3), /* call emit_vertex_arrays_swtcl? */ + PREP_EMIT_VARRAYS = (1 << 2), /* call emit_vertex_arrays? */ + PREP_EMIT_VARRAYS_SWTCL = (1 << 3), /* call emit_vertex_arrays_swtcl? */ PREP_INDEXED = (1 << 4) /* is this draw_elements? */ }; @@ -193,23 +193,22 @@ static boolean r300_reserve_cs_dwords(struct r300_context *r300, unsigned cs_dwords) { boolean flushed = FALSE; - boolean first_draw = flags & PREP_EMIT_STATES; - boolean emit_vertex_arrays = flags & PREP_EMIT_AOS; - boolean emit_vertex_arrays_swtcl = flags & PREP_EMIT_AOS_SWTCL; + boolean emit_states = flags & PREP_EMIT_STATES; + boolean emit_vertex_arrays = flags & PREP_EMIT_VARRAYS; + boolean emit_vertex_arrays_swtcl = flags & PREP_EMIT_VARRAYS_SWTCL; /* Add dirty state, index offset, and AOS. */ - if (first_draw) { + if (emit_states) cs_dwords += r300_get_num_dirty_dwords(r300); - if (r300->screen->caps.is_r500) - cs_dwords += 2; /* emit_index_offset */ + if (r300->screen->caps.is_r500) + cs_dwords += 2; /* emit_index_offset */ - if (emit_vertex_arrays) - cs_dwords += 55; /* emit_vertex_arrays */ + if (emit_vertex_arrays) + cs_dwords += 55; /* emit_vertex_arrays */ - if (emit_vertex_arrays_swtcl) - cs_dwords += 7; /* emit_vertex_arrays_swtcl */ - } + if (emit_vertex_arrays_swtcl) + cs_dwords += 7; /* emit_vertex_arrays_swtcl */ cs_dwords += r300_get_num_cs_end_dwords(r300); @@ -238,46 +237,48 @@ static boolean r300_emit_states(struct r300_context *r300, int buffer_offset, int index_bias, int instance_id) { - boolean first_draw = flags & PREP_EMIT_STATES; - boolean emit_vertex_arrays = flags & PREP_EMIT_AOS; - boolean emit_vertex_arrays_swtcl = flags & PREP_EMIT_AOS_SWTCL; + boolean emit_states = flags & PREP_EMIT_STATES; + boolean emit_vertex_arrays = flags & PREP_EMIT_VARRAYS; + boolean emit_vertex_arrays_swtcl = flags & PREP_EMIT_VARRAYS_SWTCL; boolean indexed = flags & PREP_INDEXED; boolean validate_vbos = flags & PREP_VALIDATE_VBOS; /* Validate buffers and emit dirty state if needed. */ - if (first_draw) { + if (emit_states || (emit_vertex_arrays && validate_vbos)) { if (!r300_emit_buffer_validate(r300, validate_vbos, index_buffer)) { fprintf(stderr, "r300: CS space validation failed. " "(not enough memory?) Skipping rendering.\n"); return FALSE; } + } + if (emit_states) r300_emit_dirty_state(r300); - if (r300->screen->caps.is_r500) { - if (r300->screen->caps.has_tcl) - r500_emit_index_bias(r300, index_bias); - else - r500_emit_index_bias(r300, 0); - } - if (emit_vertex_arrays && - (r300->vertex_arrays_dirty || - r300->vertex_arrays_indexed != indexed || - r300->vertex_arrays_offset != buffer_offset || - r300->vertex_arrays_instance_id != instance_id)) { - r300_emit_vertex_arrays(r300, buffer_offset, indexed, instance_id); - - r300->vertex_arrays_dirty = FALSE; - r300->vertex_arrays_indexed = indexed; - r300->vertex_arrays_offset = buffer_offset; - r300->vertex_arrays_instance_id = instance_id; - } + if (r300->screen->caps.is_r500) { + if (r300->screen->caps.has_tcl) + r500_emit_index_bias(r300, index_bias); + else + r500_emit_index_bias(r300, 0); + } - if (emit_vertex_arrays_swtcl) - r300_emit_vertex_arrays_swtcl(r300, indexed); + if (emit_vertex_arrays && + (r300->vertex_arrays_dirty || + r300->vertex_arrays_indexed != indexed || + r300->vertex_arrays_offset != buffer_offset || + r300->vertex_arrays_instance_id != instance_id)) { + r300_emit_vertex_arrays(r300, buffer_offset, indexed, instance_id); + + r300->vertex_arrays_dirty = FALSE; + r300->vertex_arrays_indexed = indexed; + r300->vertex_arrays_offset = buffer_offset; + r300->vertex_arrays_instance_id = instance_id; } + if (emit_vertex_arrays_swtcl) + r300_emit_vertex_arrays_swtcl(r300, indexed); + return TRUE; } @@ -540,7 +541,7 @@ static void r300_draw_elements_immediate(struct r300_context *r300, /* 19 dwords for r300_draw_elements_immediate. Give up if the function fails. */ if (!r300_prepare_for_rendering(r300, - PREP_EMIT_STATES | PREP_VALIDATE_VBOS | PREP_EMIT_AOS | + PREP_EMIT_STATES | PREP_VALIDATE_VBOS | PREP_EMIT_VARRAYS | PREP_INDEXED, NULL, 2+count_dwords, 0, info->index_bias, -1)) return; @@ -662,7 +663,7 @@ static void r300_draw_elements(struct r300_context *r300, /* 19 dwords for emit_draw_elements. Give up if the function fails. */ if (!r300_prepare_for_rendering(r300, - PREP_EMIT_STATES | PREP_VALIDATE_VBOS | PREP_EMIT_AOS | + PREP_EMIT_STATES | PREP_VALIDATE_VBOS | PREP_EMIT_VARRAYS | PREP_INDEXED, indexBuffer, 19, buffer_offset, info->index_bias, instance_id)) goto done; @@ -689,7 +690,7 @@ static void r300_draw_elements(struct r300_context *r300, /* 15 dwords for emit_draw_elements */ if (count) { if (!r300_prepare_for_rendering(r300, - PREP_VALIDATE_VBOS | PREP_EMIT_AOS | PREP_INDEXED, + PREP_VALIDATE_VBOS | PREP_EMIT_VARRAYS | PREP_INDEXED, indexBuffer, 19, buffer_offset, info->index_bias, instance_id)) goto done; @@ -715,7 +716,7 @@ static void r300_draw_arrays(struct r300_context *r300, /* 9 spare dwords for emit_draw_arrays. Give up if the function fails. */ if (!r300_prepare_for_rendering(r300, - PREP_EMIT_STATES | PREP_VALIDATE_VBOS | PREP_EMIT_AOS, + PREP_EMIT_STATES | PREP_VALIDATE_VBOS | PREP_EMIT_VARRAYS, NULL, 9, start, 0, instance_id)) return; @@ -736,7 +737,7 @@ static void r300_draw_arrays(struct r300_context *r300, /* 9 spare dwords for emit_draw_arrays. Give up if the function fails. */ if (count) { if (!r300_prepare_for_rendering(r300, - PREP_VALIDATE_VBOS | PREP_EMIT_AOS, NULL, 9, + PREP_VALIDATE_VBOS | PREP_EMIT_VARRAYS, NULL, 9, start, 0, instance_id)) return; } @@ -767,7 +768,6 @@ static void r300_draw_vbo(struct pipe_context* pipe, { struct r300_context* r300 = r300_context(pipe); struct pipe_draw_info info = *dinfo; - boolean buffers_updated, uploader_flushed; info.indexed = info.indexed && r300->index_buffer.buffer; @@ -779,9 +779,7 @@ static void r300_draw_vbo(struct pipe_context* pipe, r300_update_derived_state(r300); /* Start the vbuf manager and update buffers if needed. */ - u_vbuf_mgr_draw_begin(r300->vbuf_mgr, &info, - &buffers_updated, &uploader_flushed); - if (buffers_updated) { + if (u_vbuf_mgr_draw_begin(r300->vbuf_mgr, &info) & U_VBUF_BUFFERS_UPDATED) { r300->vertex_arrays_dirty = TRUE; } @@ -843,7 +841,7 @@ static void r300_swtcl_draw_vbo(struct pipe_context* pipe, r300_update_derived_state(r300); r300_reserve_cs_dwords(r300, - PREP_EMIT_STATES | PREP_EMIT_AOS_SWTCL | + PREP_EMIT_STATES | PREP_EMIT_VARRAYS_SWTCL | (indexed ? PREP_INDEXED : 0), indexed ? 256 : 6); @@ -1025,12 +1023,12 @@ static void r300_render_draw_arrays(struct vbuf_render* render, if (r300->draw_first_emitted) { if (!r300_prepare_for_rendering(r300, - PREP_EMIT_STATES | PREP_EMIT_AOS_SWTCL, + PREP_EMIT_STATES | PREP_EMIT_VARRAYS_SWTCL, NULL, dwords, 0, 0, -1)) return; } else { if (!r300_emit_states(r300, - PREP_EMIT_STATES | PREP_EMIT_AOS_SWTCL, + PREP_EMIT_STATES | PREP_EMIT_VARRAYS_SWTCL, NULL, 0, 0, -1)) return; } @@ -1065,12 +1063,12 @@ static void r300_render_draw_elements(struct vbuf_render* render, if (r300->draw_first_emitted) { if (!r300_prepare_for_rendering(r300, - PREP_EMIT_STATES | PREP_EMIT_AOS_SWTCL | PREP_INDEXED, + PREP_EMIT_STATES | PREP_EMIT_VARRAYS_SWTCL | PREP_INDEXED, NULL, 256, 0, 0, -1)) return; } else { if (!r300_emit_states(r300, - PREP_EMIT_STATES | PREP_EMIT_AOS_SWTCL | PREP_INDEXED, + PREP_EMIT_STATES | PREP_EMIT_VARRAYS_SWTCL | PREP_INDEXED, NULL, 0, 0, -1)) return; } @@ -1107,7 +1105,7 @@ static void r300_render_draw_elements(struct vbuf_render* render, if (count) { if (!r300_prepare_for_rendering(r300, - PREP_EMIT_AOS_SWTCL | PREP_INDEXED, + PREP_EMIT_VARRAYS_SWTCL | PREP_INDEXED, NULL, 256, 0, 0, -1)) return; diff --git a/src/gallium/drivers/r300/r300_screen.c b/src/gallium/drivers/r300/r300_screen.c index 93baba68150..47de4005c37 100644 --- a/src/gallium/drivers/r300/r300_screen.c +++ b/src/gallium/drivers/r300/r300_screen.c @@ -25,6 +25,8 @@ #include "util/u_format_s3tc.h" #include "util/u_memory.h" #include "os/os_time.h" +#include "vl/vl_decoder.h" +#include "vl/vl_video_buffer.h" #include "r300_context.h" #include "r300_texture.h" @@ -304,15 +306,31 @@ static float r300_get_paramf(struct pipe_screen* pscreen, enum pipe_cap param) } } +static int r300_get_video_param(struct pipe_screen *screen, + enum pipe_video_profile profile, + enum pipe_video_cap param) +{ + switch (param) { + case PIPE_VIDEO_CAP_SUPPORTED: + return vl_profile_supported(screen, profile); + case PIPE_VIDEO_CAP_NPOT_TEXTURES: + return 0; + case PIPE_VIDEO_CAP_MAX_WIDTH: + case PIPE_VIDEO_CAP_MAX_HEIGHT: + return vl_video_buffer_max_size(screen); + default: + return 0; + } +} + static boolean r300_is_format_supported(struct pipe_screen* screen, enum pipe_format format, enum pipe_texture_target target, unsigned sample_count, unsigned usage) { - struct radeon_winsys *rws = r300_screen(screen)->rws; uint32_t retval = 0; - boolean drm_2_8_0 = rws->get_value(rws, RADEON_VID_DRM_2_8_0); + boolean drm_2_8_0 = r300_screen(screen)->info.drm_minor >= 8; boolean is_r500 = r300_screen(screen)->caps.is_r500; boolean is_r400 = r300_screen(screen)->caps.is_r400; boolean is_color2101010 = format == PIPE_FORMAT_R10G10B10A2_UNORM || @@ -480,19 +498,17 @@ struct pipe_screen* r300_screen_create(struct radeon_winsys *rws) return NULL; } - r300screen->caps.pci_id = rws->get_value(rws, RADEON_VID_PCI_ID); - r300screen->caps.num_frag_pipes = rws->get_value(rws, RADEON_VID_R300_GB_PIPES); - r300screen->caps.num_z_pipes = rws->get_value(rws, RADEON_VID_R300_Z_PIPES); + rws->query_info(rws, &r300screen->info); r300_init_debug(r300screen); - r300_parse_chipset(&r300screen->caps); + r300_parse_chipset(r300screen->info.pci_id, &r300screen->caps); if (SCREEN_DBG_ON(r300screen, DBG_NO_ZMASK)) r300screen->caps.zmask_ram = 0; if (SCREEN_DBG_ON(r300screen, DBG_NO_HIZ)) r300screen->caps.hiz_ram = 0; - if (!rws->get_value(rws, RADEON_VID_DRM_2_8_0)) + if (r300screen->info.drm_minor < 8) r300screen->caps.has_us_format = FALSE; pipe_mutex_init(r300screen->num_contexts_mutex); @@ -509,9 +525,10 @@ struct pipe_screen* r300_screen_create(struct radeon_winsys *rws) r300screen->screen.get_param = r300_get_param; r300screen->screen.get_shader_param = r300_get_shader_param; r300screen->screen.get_paramf = r300_get_paramf; + r300screen->screen.get_video_param = r300_get_video_param; r300screen->screen.is_format_supported = r300_is_format_supported; + r300screen->screen.is_video_format_supported = vl_video_buffer_is_format_supported; r300screen->screen.context_create = r300_create_context; - r300screen->screen.fence_reference = r300_fence_reference; r300screen->screen.fence_signalled = r300_fence_signalled; r300screen->screen.fence_finish = r300_fence_finish; diff --git a/src/gallium/drivers/r300/r300_screen.h b/src/gallium/drivers/r300/r300_screen.h index e5c53bf3500..82b2068e7a0 100644 --- a/src/gallium/drivers/r300/r300_screen.h +++ b/src/gallium/drivers/r300/r300_screen.h @@ -24,23 +24,20 @@ #ifndef R300_SCREEN_H #define R300_SCREEN_H -#include "pipe/p_screen.h" - #include "r300_chipset.h" - +#include "../../winsys/radeon/drm/radeon_winsys.h" +#include "pipe/p_screen.h" #include "util/u_slab.h" - #include <stdio.h> -struct radeon_winsys; - struct r300_screen { /* Parent class */ struct pipe_screen screen; struct radeon_winsys *rws; - /* Chipset capabilities */ + /* Chipset info and capabilities. */ + struct radeon_info info; struct r300_capabilities caps; /* Memory pools. */ diff --git a/src/gallium/drivers/r300/r300_screen_buffer.c b/src/gallium/drivers/r300/r300_screen_buffer.c index 4154c81512e..c751a943b96 100644 --- a/src/gallium/drivers/r300/r300_screen_buffer.c +++ b/src/gallium/drivers/r300/r300_screen_buffer.c @@ -201,8 +201,7 @@ struct pipe_resource *r300_buffer_create(struct pipe_screen *screen, rbuf->buf = r300screen->rws->buffer_create(r300screen->rws, rbuf->b.b.b.width0, alignment, - rbuf->b.b.b.bind, rbuf->b.b.b.usage, - rbuf->domain); + rbuf->b.b.b.bind, rbuf->domain); if (!rbuf->buf) { util_slab_free(&r300screen->pool_buffers, rbuf); return NULL; diff --git a/src/gallium/drivers/r300/r300_state_derived.c b/src/gallium/drivers/r300/r300_state_derived.c index f63114e7eb7..45c11fce1fe 100644 --- a/src/gallium/drivers/r300/r300_state_derived.c +++ b/src/gallium/drivers/r300/r300_state_derived.c @@ -605,7 +605,6 @@ static uint32_t r300_get_border_color(enum pipe_format format, { const struct util_format_description *desc; float border_swizzled[4] = {0}; - unsigned i; union util_color uc = {0}; desc = util_format_description(format); @@ -629,22 +628,7 @@ static uint32_t r300_get_border_color(enum pipe_format format, } /* Apply inverse swizzle of the format. */ - for (i = 0; i < 4; i++) { - switch (desc->swizzle[i]) { - case UTIL_FORMAT_SWIZZLE_X: - border_swizzled[0] = border[i]; - break; - case UTIL_FORMAT_SWIZZLE_Y: - border_swizzled[1] = border[i]; - break; - case UTIL_FORMAT_SWIZZLE_Z: - border_swizzled[2] = border[i]; - break; - case UTIL_FORMAT_SWIZZLE_W: - border_swizzled[3] = border[i]; - break; - } - } + util_format_unswizzle_4f(border_swizzled, border, desc->swizzle); /* Compressed formats. */ if (util_format_is_compressed(format)) { diff --git a/src/gallium/drivers/r300/r300_texture.c b/src/gallium/drivers/r300/r300_texture.c index 38ca9a24e45..fc84004fb97 100644 --- a/src/gallium/drivers/r300/r300_texture.c +++ b/src/gallium/drivers/r300/r300_texture.c @@ -38,18 +38,6 @@ #include "pipe/p_screen.h" -void util_format_combine_swizzles(unsigned char *dst, - const unsigned char *swz1, - const unsigned char *swz2) -{ - unsigned i; - - for (i = 0; i < 4; i++) { - dst[i] = swz2[i] <= UTIL_FORMAT_SWIZZLE_W ? - swz1[swz2[i]] : swz2[i]; - } -} - unsigned r300_get_swizzle_combined(const unsigned char *swizzle_format, const unsigned char *swizzle_view, boolean dxtc_swizzle) @@ -72,7 +60,7 @@ unsigned r300_get_swizzle_combined(const unsigned char *swizzle_format, if (swizzle_view) { /* Combine two sets of swizzles. */ - util_format_combine_swizzles(swizzle, swizzle_format, swizzle_view); + util_format_compose_swizzles(swizzle_format, swizzle_view, swizzle); } else { memcpy(swizzle, swizzle_format, 4); } @@ -447,16 +435,8 @@ static uint32_t r300_translate_colorformat(enum pipe_format format) /*case PIPE_FORMAT_B8G8R8A8_SNORM:*/ case PIPE_FORMAT_B8G8R8X8_UNORM: /*case PIPE_FORMAT_B8G8R8X8_SNORM:*/ - case PIPE_FORMAT_A8R8G8B8_UNORM: - /*case PIPE_FORMAT_A8R8G8B8_SNORM:*/ - case PIPE_FORMAT_X8R8G8B8_UNORM: - /*case PIPE_FORMAT_X8R8G8B8_SNORM:*/ - case PIPE_FORMAT_A8B8G8R8_UNORM: - /*case PIPE_FORMAT_A8B8G8R8_SNORM:*/ case PIPE_FORMAT_R8G8B8A8_UNORM: case PIPE_FORMAT_R8G8B8A8_SNORM: - case PIPE_FORMAT_X8B8G8R8_UNORM: - /*case PIPE_FORMAT_X8B8G8R8_SNORM:*/ case PIPE_FORMAT_R8G8B8X8_UNORM: /*case PIPE_FORMAT_R8G8B8X8_SNORM:*/ /* These formats work fine with ARGB8888 if US_OUT_FMT is set @@ -662,10 +642,6 @@ static uint32_t r300_translate_out_fmt(enum pipe_format format) R300_C2_SEL_R | R300_C3_SEL_A; /* ARGB outputs. */ - case PIPE_FORMAT_A8R8G8B8_UNORM: - /*case PIPE_FORMAT_A8R8G8B8_SNORM:*/ - case PIPE_FORMAT_X8R8G8B8_UNORM: - /*case PIPE_FORMAT_X8R8G8B8_SNORM:*/ case PIPE_FORMAT_A16_UNORM: case PIPE_FORMAT_A16_SNORM: case PIPE_FORMAT_A16_FLOAT: @@ -674,15 +650,6 @@ static uint32_t r300_translate_out_fmt(enum pipe_format format) R300_C0_SEL_A | R300_C1_SEL_R | R300_C2_SEL_G | R300_C3_SEL_B; - /* ABGR outputs. */ - case PIPE_FORMAT_A8B8G8R8_UNORM: - /*case PIPE_FORMAT_A8B8G8R8_SNORM:*/ - case PIPE_FORMAT_X8B8G8R8_UNORM: - /*case PIPE_FORMAT_X8B8G8R8_SNORM:*/ - return modifier | - R300_C0_SEL_A | R300_C1_SEL_B | - R300_C2_SEL_G | R300_C3_SEL_R; - /* RGBA outputs. */ case PIPE_FORMAT_R8G8B8X8_UNORM: /*case PIPE_FORMAT_R8G8B8X8_SNORM:*/ @@ -947,7 +914,7 @@ r300_texture_create_object(struct r300_screen *rscreen, if (!buffer) { tex->buf_size = tex->tex.size_in_bytes; tex->buf = rws->buffer_create(rws, tex->tex.size_in_bytes, 2048, - base->bind, base->usage, tex->domain); + base->bind, tex->domain); if (!tex->buf) { FREE(tex); diff --git a/src/gallium/drivers/r300/r300_texture.h b/src/gallium/drivers/r300/r300_texture.h index 4586bb2e4dc..158a387478f 100644 --- a/src/gallium/drivers/r300/r300_texture.h +++ b/src/gallium/drivers/r300/r300_texture.h @@ -35,10 +35,6 @@ struct r300_texture_desc; struct r300_resource; struct r300_screen; -void util_format_combine_swizzles(unsigned char *dst, - const unsigned char *swz1, - const unsigned char *swz2); - unsigned r300_get_swizzle_combined(const unsigned char *swizzle_format, const unsigned char *swizzle_view, boolean dxtc_swizzle); diff --git a/src/gallium/drivers/r300/r300_texture_desc.c b/src/gallium/drivers/r300/r300_texture_desc.c index da5778be65e..fe4f8dd5679 100644 --- a/src/gallium/drivers/r300/r300_texture_desc.c +++ b/src/gallium/drivers/r300/r300_texture_desc.c @@ -360,9 +360,9 @@ static void r300_setup_hyperz_properties(struct r300_screen *screen, unsigned i, pipes; if (screen->caps.family == CHIP_FAMILY_RV530) { - pipes = screen->caps.num_z_pipes; + pipes = screen->info.r300_num_z_pipes; } else { - pipes = screen->caps.num_frag_pipes; + pipes = screen->info.r300_num_gb_pipes; } for (i = 0; i <= tex->b.b.b.last_level; i++) { diff --git a/src/gallium/drivers/r300/r300_tgsi_to_rc.c b/src/gallium/drivers/r300/r300_tgsi_to_rc.c index 0561ab9bfa4..07a3f3caee7 100644 --- a/src/gallium/drivers/r300/r300_tgsi_to_rc.c +++ b/src/gallium/drivers/r300/r300_tgsi_to_rc.c @@ -22,8 +22,7 @@ #include "r300_tgsi_to_rc.h" -#include "radeon_compiler.h" -#include "radeon_program.h" +#include "compiler/radeon_compiler.h" #include "tgsi/tgsi_info.h" #include "tgsi/tgsi_parse.h" diff --git a/src/gallium/drivers/r300/r300_vs.c b/src/gallium/drivers/r300/r300_vs.c index b319890157f..a5e8fd680ff 100644 --- a/src/gallium/drivers/r300/r300_vs.c +++ b/src/gallium/drivers/r300/r300_vs.c @@ -32,7 +32,7 @@ #include "tgsi/tgsi_parse.h" #include "tgsi/tgsi_ureg.h" -#include "radeon_compiler.h" +#include "compiler/radeon_compiler.h" /* Convert info about VS output semantics into r300_shader_semantics. */ static void r300_shader_read_vs_outputs( diff --git a/src/gallium/drivers/r300/r300_vs.h b/src/gallium/drivers/r300/r300_vs.h index 170de6c79db..a482ddce9c9 100644 --- a/src/gallium/drivers/r300/r300_vs.h +++ b/src/gallium/drivers/r300/r300_vs.h @@ -26,7 +26,7 @@ #include "pipe/p_state.h" #include "tgsi/tgsi_scan.h" -#include "radeon_code.h" +#include "compiler/radeon_code.h" #include "r300_context.h" #include "r300_shader_semantics.h" diff --git a/src/gallium/drivers/r600/SConscript b/src/gallium/drivers/r600/SConscript index 0135808f10a..19f07b2bef8 100644 --- a/src/gallium/drivers/r600/SConscript +++ b/src/gallium/drivers/r600/SConscript @@ -2,11 +2,7 @@ Import('*') env = env.Clone() -try: - env.ParseConfig('pkg-config --cflags libdrm_radeon') -except OSError: - print 'warning: not building r600' - Return() +env.PkgUseModules('DRM_RADEON') env.Append(CPPPATH = [ '#/include', diff --git a/src/gallium/drivers/r600/eg_asm.c b/src/gallium/drivers/r600/eg_asm.c index fb0b0f104bf..c95872b0809 100644 --- a/src/gallium/drivers/r600/eg_asm.c +++ b/src/gallium/drivers/r600/eg_asm.c @@ -69,7 +69,7 @@ int eg_bc_cf_build(struct r600_bc *bc, struct r600_bc_cf *cf) S_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_W(cf->output.swizzle_w) | S_SQ_CF_ALLOC_EXPORT_WORD1_BARRIER(cf->output.barrier) | S_SQ_CF_ALLOC_EXPORT_WORD1_CF_INST(cf->output.inst); - if (bc->chiprev == CHIPREV_EVERGREEN) /* no EOP on cayman */ + if (bc->chip_class == EVERGREEN) /* no EOP on cayman */ bc->bytecode[id] |= S_SQ_CF_ALLOC_EXPORT_WORD1_END_OF_PROGRAM(cf->output.end_of_program); id++; diff --git a/src/gallium/drivers/r600/eg_state_inlines.h b/src/gallium/drivers/r600/eg_state_inlines.h deleted file mode 100644 index f8891781be8..00000000000 --- a/src/gallium/drivers/r600/eg_state_inlines.h +++ /dev/null @@ -1,582 +0,0 @@ -/* - * Copyright 2010 Red Hat Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - */ -#ifndef EG_STATE_INLINES_H -#define EG_STATE_INLINES_H - -#include "util/u_format.h" -#include "evergreend.h" -#include "r600_formats.h" - -static INLINE uint32_t r600_translate_blend_function(int blend_func) -{ - switch (blend_func) { - case PIPE_BLEND_ADD: - return V_028780_COMB_DST_PLUS_SRC; - case PIPE_BLEND_SUBTRACT: - return V_028780_COMB_SRC_MINUS_DST; - case PIPE_BLEND_REVERSE_SUBTRACT: - return V_028780_COMB_DST_MINUS_SRC; - case PIPE_BLEND_MIN: - return V_028780_COMB_MIN_DST_SRC; - case PIPE_BLEND_MAX: - return V_028780_COMB_MAX_DST_SRC; - default: - R600_ERR("Unknown blend function %d\n", blend_func); - assert(0); - break; - } - return 0; -} - -static INLINE uint32_t r600_translate_blend_factor(int blend_fact) -{ - switch (blend_fact) { - case PIPE_BLENDFACTOR_ONE: - return V_028780_BLEND_ONE; - case PIPE_BLENDFACTOR_SRC_COLOR: - return V_028780_BLEND_SRC_COLOR; - case PIPE_BLENDFACTOR_SRC_ALPHA: - return V_028780_BLEND_SRC_ALPHA; - case PIPE_BLENDFACTOR_DST_ALPHA: - return V_028780_BLEND_DST_ALPHA; - case PIPE_BLENDFACTOR_DST_COLOR: - return V_028780_BLEND_DST_COLOR; - case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: - return V_028780_BLEND_SRC_ALPHA_SATURATE; - case PIPE_BLENDFACTOR_CONST_COLOR: - return V_028780_BLEND_CONST_COLOR; - case PIPE_BLENDFACTOR_CONST_ALPHA: - return V_028780_BLEND_CONST_ALPHA; - case PIPE_BLENDFACTOR_ZERO: - return V_028780_BLEND_ZERO; - case PIPE_BLENDFACTOR_INV_SRC_COLOR: - return V_028780_BLEND_ONE_MINUS_SRC_COLOR; - case PIPE_BLENDFACTOR_INV_SRC_ALPHA: - return V_028780_BLEND_ONE_MINUS_SRC_ALPHA; - case PIPE_BLENDFACTOR_INV_DST_ALPHA: - return V_028780_BLEND_ONE_MINUS_DST_ALPHA; - case PIPE_BLENDFACTOR_INV_DST_COLOR: - return V_028780_BLEND_ONE_MINUS_DST_COLOR; - case PIPE_BLENDFACTOR_INV_CONST_COLOR: - return V_028780_BLEND_ONE_MINUS_CONST_COLOR; - case PIPE_BLENDFACTOR_INV_CONST_ALPHA: - return V_028780_BLEND_ONE_MINUS_CONST_ALPHA; - case PIPE_BLENDFACTOR_SRC1_COLOR: - return V_028780_BLEND_SRC1_COLOR; - case PIPE_BLENDFACTOR_SRC1_ALPHA: - return V_028780_BLEND_SRC1_ALPHA; - case PIPE_BLENDFACTOR_INV_SRC1_COLOR: - return V_028780_BLEND_INV_SRC1_COLOR; - case PIPE_BLENDFACTOR_INV_SRC1_ALPHA: - return V_028780_BLEND_INV_SRC1_ALPHA; - default: - R600_ERR("Bad blend factor %d not supported!\n", blend_fact); - assert(0); - break; - } - return 0; -} - -static INLINE uint32_t r600_translate_stencil_op(int s_op) -{ - switch (s_op) { - case PIPE_STENCIL_OP_KEEP: - return V_028800_STENCIL_KEEP; - case PIPE_STENCIL_OP_ZERO: - return V_028800_STENCIL_ZERO; - case PIPE_STENCIL_OP_REPLACE: - return V_028800_STENCIL_REPLACE; - case PIPE_STENCIL_OP_INCR: - return V_028800_STENCIL_INCR; - case PIPE_STENCIL_OP_DECR: - return V_028800_STENCIL_DECR; - case PIPE_STENCIL_OP_INCR_WRAP: - return V_028800_STENCIL_INCR_WRAP; - case PIPE_STENCIL_OP_DECR_WRAP: - return V_028800_STENCIL_DECR_WRAP; - case PIPE_STENCIL_OP_INVERT: - return V_028800_STENCIL_INVERT; - default: - R600_ERR("Unknown stencil op %d", s_op); - assert(0); - break; - } - return 0; -} - -static INLINE uint32_t r600_translate_fill(uint32_t func) -{ - switch(func) { - case PIPE_POLYGON_MODE_FILL: - return 2; - case PIPE_POLYGON_MODE_LINE: - return 1; - case PIPE_POLYGON_MODE_POINT: - return 0; - default: - assert(0); - return 0; - } -} - -/* translates straight */ -static INLINE uint32_t r600_translate_ds_func(int func) -{ - return func; -} - -static inline unsigned r600_tex_wrap(unsigned wrap) -{ - switch (wrap) { - default: - case PIPE_TEX_WRAP_REPEAT: - return V_03C000_SQ_TEX_WRAP; - case PIPE_TEX_WRAP_CLAMP: - return V_03C000_SQ_TEX_CLAMP_HALF_BORDER; - case PIPE_TEX_WRAP_CLAMP_TO_EDGE: - return V_03C000_SQ_TEX_CLAMP_LAST_TEXEL; - case PIPE_TEX_WRAP_CLAMP_TO_BORDER: - return V_03C000_SQ_TEX_CLAMP_BORDER; - case PIPE_TEX_WRAP_MIRROR_REPEAT: - return V_03C000_SQ_TEX_MIRROR; - case PIPE_TEX_WRAP_MIRROR_CLAMP: - return V_03C000_SQ_TEX_MIRROR_ONCE_HALF_BORDER; - case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE: - return V_03C000_SQ_TEX_MIRROR_ONCE_LAST_TEXEL; - case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER: - return V_03C000_SQ_TEX_MIRROR_ONCE_BORDER; - } -} - -static inline unsigned r600_tex_filter(unsigned filter) -{ - switch (filter) { - default: - case PIPE_TEX_FILTER_NEAREST: - return V_03C000_SQ_TEX_XY_FILTER_POINT; - case PIPE_TEX_FILTER_LINEAR: - return V_03C000_SQ_TEX_XY_FILTER_BILINEAR; - } -} - -static inline unsigned r600_tex_mipfilter(unsigned filter) -{ - switch (filter) { - case PIPE_TEX_MIPFILTER_NEAREST: - return V_03C000_SQ_TEX_Z_FILTER_POINT; - case PIPE_TEX_MIPFILTER_LINEAR: - return V_03C000_SQ_TEX_Z_FILTER_LINEAR; - default: - case PIPE_TEX_MIPFILTER_NONE: - return V_03C000_SQ_TEX_Z_FILTER_NONE; - } -} - -static inline unsigned r600_tex_compare(unsigned compare) -{ - switch (compare) { - default: - case PIPE_FUNC_NEVER: - return V_03C000_SQ_TEX_DEPTH_COMPARE_NEVER; - case PIPE_FUNC_LESS: - return V_03C000_SQ_TEX_DEPTH_COMPARE_LESS; - case PIPE_FUNC_EQUAL: - return V_03C000_SQ_TEX_DEPTH_COMPARE_EQUAL; - case PIPE_FUNC_LEQUAL: - return V_03C000_SQ_TEX_DEPTH_COMPARE_LESSEQUAL; - case PIPE_FUNC_GREATER: - return V_03C000_SQ_TEX_DEPTH_COMPARE_GREATER; - case PIPE_FUNC_NOTEQUAL: - return V_03C000_SQ_TEX_DEPTH_COMPARE_NOTEQUAL; - case PIPE_FUNC_GEQUAL: - return V_03C000_SQ_TEX_DEPTH_COMPARE_GREATEREQUAL; - case PIPE_FUNC_ALWAYS: - return V_03C000_SQ_TEX_DEPTH_COMPARE_ALWAYS; - } -} - -static inline unsigned r600_tex_swizzle(unsigned swizzle) -{ - switch (swizzle) { - case PIPE_SWIZZLE_RED: - return V_030010_SQ_SEL_X; - case PIPE_SWIZZLE_GREEN: - return V_030010_SQ_SEL_Y; - case PIPE_SWIZZLE_BLUE: - return V_030010_SQ_SEL_Z; - case PIPE_SWIZZLE_ALPHA: - return V_030010_SQ_SEL_W; - case PIPE_SWIZZLE_ZERO: - return V_030010_SQ_SEL_0; - default: - case PIPE_SWIZZLE_ONE: - return V_030010_SQ_SEL_1; - } -} - -static inline unsigned r600_format_type(unsigned format_type) -{ - switch (format_type) { - default: - case UTIL_FORMAT_TYPE_UNSIGNED: - return V_030010_SQ_FORMAT_COMP_UNSIGNED; - case UTIL_FORMAT_TYPE_SIGNED: - return V_030010_SQ_FORMAT_COMP_SIGNED; - case UTIL_FORMAT_TYPE_FIXED: - return V_030010_SQ_FORMAT_COMP_UNSIGNED_BIASED; - } -} - -static inline unsigned r600_tex_dim(unsigned dim) -{ - switch (dim) { - default: - case PIPE_TEXTURE_1D: - return V_030000_SQ_TEX_DIM_1D; - case PIPE_TEXTURE_1D_ARRAY: - return V_030000_SQ_TEX_DIM_1D_ARRAY; - case PIPE_TEXTURE_2D: - case PIPE_TEXTURE_RECT: - return V_030000_SQ_TEX_DIM_2D; - case PIPE_TEXTURE_2D_ARRAY: - return V_030000_SQ_TEX_DIM_2D_ARRAY; - case PIPE_TEXTURE_3D: - return V_030000_SQ_TEX_DIM_3D; - case PIPE_TEXTURE_CUBE: - return V_030000_SQ_TEX_DIM_CUBEMAP; - } -} - -static inline uint32_t r600_translate_dbformat(enum pipe_format format) -{ - switch (format) { - case PIPE_FORMAT_Z16_UNORM: - return V_028040_Z_16; - case PIPE_FORMAT_Z24X8_UNORM: - return V_028040_Z_24; - case PIPE_FORMAT_Z24_UNORM_S8_USCALED: - return V_028040_Z_24; - default: - return ~0; - } -} - -static inline uint32_t r600_translate_stencilformat(enum pipe_format format) -{ - if (format == PIPE_FORMAT_Z24_UNORM_S8_USCALED) - return 1; - else - return 0; -} - -static inline uint32_t r600_translate_colorswap(enum pipe_format format) -{ - switch (format) { - /* 8-bit buffers. */ - case PIPE_FORMAT_L4A4_UNORM: - return V_028C70_SWAP_ALT; - - case PIPE_FORMAT_A8_UNORM: - return V_028C70_SWAP_ALT_REV; - case PIPE_FORMAT_I8_UNORM: - case PIPE_FORMAT_L8_UNORM: - case PIPE_FORMAT_L8_SRGB: - case PIPE_FORMAT_R8_UNORM: - case PIPE_FORMAT_R8_SNORM: - return V_028C70_SWAP_STD; - - /* 16-bit buffers. */ - case PIPE_FORMAT_B5G6R5_UNORM: - return V_028C70_SWAP_STD_REV; - - case PIPE_FORMAT_B5G5R5A1_UNORM: - case PIPE_FORMAT_B5G5R5X1_UNORM: - return V_028C70_SWAP_ALT; - - case PIPE_FORMAT_B4G4R4A4_UNORM: - case PIPE_FORMAT_B4G4R4X4_UNORM: - return V_028C70_SWAP_ALT; - - case PIPE_FORMAT_Z16_UNORM: - return V_028C70_SWAP_STD; - - case PIPE_FORMAT_L8A8_UNORM: - case PIPE_FORMAT_L8A8_SRGB: - return V_028C70_SWAP_ALT; - case PIPE_FORMAT_R8G8_UNORM: - return V_028C70_SWAP_STD; - - case PIPE_FORMAT_R16_UNORM: - return V_028C70_SWAP_STD; - - /* 32-bit buffers. */ - case PIPE_FORMAT_A8B8G8R8_SRGB: - return V_028C70_SWAP_STD_REV; - case PIPE_FORMAT_B8G8R8A8_SRGB: - return V_028C70_SWAP_ALT; - - case PIPE_FORMAT_B8G8R8A8_UNORM: - case PIPE_FORMAT_B8G8R8X8_UNORM: - return V_028C70_SWAP_ALT; - - case PIPE_FORMAT_A8R8G8B8_UNORM: - case PIPE_FORMAT_X8R8G8B8_UNORM: - return V_028C70_SWAP_ALT_REV; - case PIPE_FORMAT_R8G8B8A8_SNORM: - case PIPE_FORMAT_R8G8B8X8_UNORM: - return V_028C70_SWAP_STD; - - case PIPE_FORMAT_A8B8G8R8_UNORM: - case PIPE_FORMAT_X8B8G8R8_UNORM: - /* case PIPE_FORMAT_R8SG8SB8UX8U_NORM: */ - return V_028C70_SWAP_STD_REV; - - case PIPE_FORMAT_Z24X8_UNORM: - case PIPE_FORMAT_Z24_UNORM_S8_USCALED: - return V_028C70_SWAP_STD; - - case PIPE_FORMAT_X8Z24_UNORM: - case PIPE_FORMAT_S8_USCALED_Z24_UNORM: - return V_028C70_SWAP_STD; - - case PIPE_FORMAT_R10G10B10A2_UNORM: - case PIPE_FORMAT_R10G10B10X2_SNORM: - case PIPE_FORMAT_R10SG10SB10SA2U_NORM: - return V_028C70_SWAP_STD; - - case PIPE_FORMAT_B10G10R10A2_UNORM: - return V_028C70_SWAP_ALT; - - case PIPE_FORMAT_R11G11B10_FLOAT: - case PIPE_FORMAT_R32_FLOAT: - case PIPE_FORMAT_R16G16_FLOAT: - case PIPE_FORMAT_R16G16_UNORM: - return V_028C70_SWAP_STD; - - /* 64-bit buffers. */ - case PIPE_FORMAT_R32G32_FLOAT: - case PIPE_FORMAT_R16G16B16A16_UNORM: - case PIPE_FORMAT_R16G16B16A16_SNORM: - case PIPE_FORMAT_R16G16B16A16_FLOAT: - - /* 128-bit buffers. */ - case PIPE_FORMAT_R32G32B32A32_FLOAT: - case PIPE_FORMAT_R32G32B32A32_SNORM: - case PIPE_FORMAT_R32G32B32A32_UNORM: - return V_028C70_SWAP_STD; - default: - R600_ERR("unsupported colorswap format %d\n", format); - return ~0; - } - return ~0; -} - -static INLINE uint32_t r600_translate_colorformat(enum pipe_format format) -{ - switch (format) { - /* 8-bit buffers. */ - case PIPE_FORMAT_L4A4_UNORM: - return V_028C70_COLOR_4_4; - - case PIPE_FORMAT_A8_UNORM: - case PIPE_FORMAT_I8_UNORM: - case PIPE_FORMAT_L8_UNORM: - case PIPE_FORMAT_L8_SRGB: - case PIPE_FORMAT_R8_UNORM: - case PIPE_FORMAT_R8_SNORM: - return V_028C70_COLOR_8; - - /* 16-bit buffers. */ - case PIPE_FORMAT_B5G6R5_UNORM: - return V_028C70_COLOR_5_6_5; - - case PIPE_FORMAT_B5G5R5A1_UNORM: - case PIPE_FORMAT_B5G5R5X1_UNORM: - return V_028C70_COLOR_1_5_5_5; - - case PIPE_FORMAT_B4G4R4A4_UNORM: - case PIPE_FORMAT_B4G4R4X4_UNORM: - return V_028C70_COLOR_4_4_4_4; - - case PIPE_FORMAT_Z16_UNORM: - return V_028C70_COLOR_16; - - case PIPE_FORMAT_L8A8_UNORM: - case PIPE_FORMAT_L8A8_SRGB: - case PIPE_FORMAT_R8G8_UNORM: - return V_028C70_COLOR_8_8; - - case PIPE_FORMAT_R16_UNORM: - return V_028C70_COLOR_16; - - /* 32-bit buffers. */ - case PIPE_FORMAT_A8B8G8R8_SRGB: - case PIPE_FORMAT_A8B8G8R8_UNORM: - case PIPE_FORMAT_A8R8G8B8_UNORM: - case PIPE_FORMAT_B8G8R8A8_SRGB: - case PIPE_FORMAT_B8G8R8A8_UNORM: - case PIPE_FORMAT_B8G8R8X8_UNORM: - case PIPE_FORMAT_R8G8B8A8_SNORM: - case PIPE_FORMAT_R8G8B8A8_UNORM: - case PIPE_FORMAT_R8G8B8X8_UNORM: - case PIPE_FORMAT_R8SG8SB8UX8U_NORM: - case PIPE_FORMAT_X8B8G8R8_UNORM: - case PIPE_FORMAT_X8R8G8B8_UNORM: - case PIPE_FORMAT_R8G8B8_UNORM: - return V_028C70_COLOR_8_8_8_8; - - case PIPE_FORMAT_R10G10B10A2_UNORM: - case PIPE_FORMAT_R10G10B10X2_SNORM: - case PIPE_FORMAT_B10G10R10A2_UNORM: - case PIPE_FORMAT_R10SG10SB10SA2U_NORM: - return V_028C70_COLOR_2_10_10_10; - - case PIPE_FORMAT_Z24X8_UNORM: - case PIPE_FORMAT_Z24_UNORM_S8_USCALED: - return V_028C70_COLOR_8_24; - - case PIPE_FORMAT_X8Z24_UNORM: - case PIPE_FORMAT_S8_USCALED_Z24_UNORM: - return V_028C70_COLOR_24_8; - - case PIPE_FORMAT_R32_FLOAT: - return V_028C70_COLOR_32_FLOAT; - - case PIPE_FORMAT_R16G16_FLOAT: - return V_028C70_COLOR_16_16_FLOAT; - - case PIPE_FORMAT_R16G16_SSCALED: - case PIPE_FORMAT_R16G16_UNORM: - return V_028C70_COLOR_16_16; - - case PIPE_FORMAT_R11G11B10_FLOAT: - return V_028C70_COLOR_10_11_11_FLOAT; - - /* 64-bit buffers. */ - case PIPE_FORMAT_R16G16B16_USCALED: - case PIPE_FORMAT_R16G16B16A16_USCALED: - case PIPE_FORMAT_R16G16B16_SSCALED: - case PIPE_FORMAT_R16G16B16A16_SSCALED: - case PIPE_FORMAT_R16G16B16A16_UNORM: - case PIPE_FORMAT_R16G16B16A16_SNORM: - return V_028C70_COLOR_16_16_16_16; - - case PIPE_FORMAT_R16G16B16_FLOAT: - case PIPE_FORMAT_R16G16B16A16_FLOAT: - return V_028C70_COLOR_16_16_16_16_FLOAT; - - case PIPE_FORMAT_R32G32_FLOAT: - return V_028C70_COLOR_32_32_FLOAT; - - case PIPE_FORMAT_R32G32_USCALED: - case PIPE_FORMAT_R32G32_SSCALED: - return V_028C70_COLOR_32_32; - - /* 96-bit buffers. */ - case PIPE_FORMAT_R32G32B32_FLOAT: - return V_028C70_COLOR_32_32_32_FLOAT; - - /* 128-bit buffers. */ - case PIPE_FORMAT_R32G32B32A32_SNORM: - case PIPE_FORMAT_R32G32B32A32_UNORM: - return V_028C70_COLOR_32_32_32_32; - case PIPE_FORMAT_R32G32B32A32_FLOAT: - return V_028C70_COLOR_32_32_32_32_FLOAT; - - /* YUV buffers. */ - case PIPE_FORMAT_UYVY: - case PIPE_FORMAT_YUYV: - default: - return ~0; /* Unsupported. */ - } -} - -static INLINE uint32_t r600_colorformat_endian_swap(uint32_t colorformat) -{ - if (R600_BIG_ENDIAN) { - switch(colorformat) { - case V_028C70_COLOR_4_4: - return(ENDIAN_NONE); - - /* 8-bit buffers. */ - case V_028C70_COLOR_8: - return(ENDIAN_NONE); - - /* 16-bit buffers. */ - case V_028C70_COLOR_5_6_5: - case V_028C70_COLOR_1_5_5_5: - case V_028C70_COLOR_4_4_4_4: - case V_028C70_COLOR_16: - case V_028C70_COLOR_8_8: - return(ENDIAN_8IN16); - - /* 32-bit buffers. */ - case V_028C70_COLOR_8_8_8_8: - case V_028C70_COLOR_2_10_10_10: - case V_028C70_COLOR_8_24: - case V_028C70_COLOR_24_8: - case V_028C70_COLOR_32_FLOAT: - case V_028C70_COLOR_16_16_FLOAT: - case V_028C70_COLOR_16_16: - return(ENDIAN_8IN32); - - /* 64-bit buffers. */ - case V_028C70_COLOR_16_16_16_16: - case V_028C70_COLOR_16_16_16_16_FLOAT: - return(ENDIAN_8IN16); - - case V_028C70_COLOR_32_32_FLOAT: - case V_028C70_COLOR_32_32: - return(ENDIAN_8IN32); - - /* 96-bit buffers. */ - case V_028C70_COLOR_32_32_32_FLOAT: - /* 128-bit buffers. */ - case V_028C70_COLOR_32_32_32_32_FLOAT: - case V_028C70_COLOR_32_32_32_32: - return(ENDIAN_8IN32); - default: - return ENDIAN_NONE; /* Unsupported. */ - } - } else { - return ENDIAN_NONE; - } -} - -static INLINE boolean r600_is_sampler_format_supported(struct pipe_screen *screen, enum pipe_format format) -{ - return r600_translate_texformat(screen, format, NULL, NULL, NULL) != ~0; -} - -static INLINE boolean r600_is_colorbuffer_format_supported(enum pipe_format format) -{ - return r600_translate_colorformat(format) != ~0 && - r600_translate_colorswap(format) != ~0; -} - -static INLINE boolean r600_is_zs_format_supported(enum pipe_format format) -{ - return r600_translate_dbformat(format) != ~0; -} - -#endif diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c index f86e4d4e3af..c9eaf94a2ae 100644 --- a/src/gallium/drivers/r600/evergreen_state.c +++ b/src/gallium/drivers/r600/evergreen_state.c @@ -46,7 +46,587 @@ #include "r600_resource.h" #include "r600_shader.h" #include "r600_pipe.h" -#include "eg_state_inlines.h" +#include "r600_formats.h" + +static uint32_t r600_translate_blend_function(int blend_func) +{ + switch (blend_func) { + case PIPE_BLEND_ADD: + return V_028780_COMB_DST_PLUS_SRC; + case PIPE_BLEND_SUBTRACT: + return V_028780_COMB_SRC_MINUS_DST; + case PIPE_BLEND_REVERSE_SUBTRACT: + return V_028780_COMB_DST_MINUS_SRC; + case PIPE_BLEND_MIN: + return V_028780_COMB_MIN_DST_SRC; + case PIPE_BLEND_MAX: + return V_028780_COMB_MAX_DST_SRC; + default: + R600_ERR("Unknown blend function %d\n", blend_func); + assert(0); + break; + } + return 0; +} + +static uint32_t r600_translate_blend_factor(int blend_fact) +{ + switch (blend_fact) { + case PIPE_BLENDFACTOR_ONE: + return V_028780_BLEND_ONE; + case PIPE_BLENDFACTOR_SRC_COLOR: + return V_028780_BLEND_SRC_COLOR; + case PIPE_BLENDFACTOR_SRC_ALPHA: + return V_028780_BLEND_SRC_ALPHA; + case PIPE_BLENDFACTOR_DST_ALPHA: + return V_028780_BLEND_DST_ALPHA; + case PIPE_BLENDFACTOR_DST_COLOR: + return V_028780_BLEND_DST_COLOR; + case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: + return V_028780_BLEND_SRC_ALPHA_SATURATE; + case PIPE_BLENDFACTOR_CONST_COLOR: + return V_028780_BLEND_CONST_COLOR; + case PIPE_BLENDFACTOR_CONST_ALPHA: + return V_028780_BLEND_CONST_ALPHA; + case PIPE_BLENDFACTOR_ZERO: + return V_028780_BLEND_ZERO; + case PIPE_BLENDFACTOR_INV_SRC_COLOR: + return V_028780_BLEND_ONE_MINUS_SRC_COLOR; + case PIPE_BLENDFACTOR_INV_SRC_ALPHA: + return V_028780_BLEND_ONE_MINUS_SRC_ALPHA; + case PIPE_BLENDFACTOR_INV_DST_ALPHA: + return V_028780_BLEND_ONE_MINUS_DST_ALPHA; + case PIPE_BLENDFACTOR_INV_DST_COLOR: + return V_028780_BLEND_ONE_MINUS_DST_COLOR; + case PIPE_BLENDFACTOR_INV_CONST_COLOR: + return V_028780_BLEND_ONE_MINUS_CONST_COLOR; + case PIPE_BLENDFACTOR_INV_CONST_ALPHA: + return V_028780_BLEND_ONE_MINUS_CONST_ALPHA; + case PIPE_BLENDFACTOR_SRC1_COLOR: + return V_028780_BLEND_SRC1_COLOR; + case PIPE_BLENDFACTOR_SRC1_ALPHA: + return V_028780_BLEND_SRC1_ALPHA; + case PIPE_BLENDFACTOR_INV_SRC1_COLOR: + return V_028780_BLEND_INV_SRC1_COLOR; + case PIPE_BLENDFACTOR_INV_SRC1_ALPHA: + return V_028780_BLEND_INV_SRC1_ALPHA; + default: + R600_ERR("Bad blend factor %d not supported!\n", blend_fact); + assert(0); + break; + } + return 0; +} + +static uint32_t r600_translate_stencil_op(int s_op) +{ + switch (s_op) { + case PIPE_STENCIL_OP_KEEP: + return V_028800_STENCIL_KEEP; + case PIPE_STENCIL_OP_ZERO: + return V_028800_STENCIL_ZERO; + case PIPE_STENCIL_OP_REPLACE: + return V_028800_STENCIL_REPLACE; + case PIPE_STENCIL_OP_INCR: + return V_028800_STENCIL_INCR; + case PIPE_STENCIL_OP_DECR: + return V_028800_STENCIL_DECR; + case PIPE_STENCIL_OP_INCR_WRAP: + return V_028800_STENCIL_INCR_WRAP; + case PIPE_STENCIL_OP_DECR_WRAP: + return V_028800_STENCIL_DECR_WRAP; + case PIPE_STENCIL_OP_INVERT: + return V_028800_STENCIL_INVERT; + default: + R600_ERR("Unknown stencil op %d", s_op); + assert(0); + break; + } + return 0; +} + +static uint32_t r600_translate_fill(uint32_t func) +{ + switch(func) { + case PIPE_POLYGON_MODE_FILL: + return 2; + case PIPE_POLYGON_MODE_LINE: + return 1; + case PIPE_POLYGON_MODE_POINT: + return 0; + default: + assert(0); + return 0; + } +} + +/* translates straight */ +static uint32_t r600_translate_ds_func(int func) +{ + return func; +} + +static unsigned r600_tex_wrap(unsigned wrap) +{ + switch (wrap) { + default: + case PIPE_TEX_WRAP_REPEAT: + return V_03C000_SQ_TEX_WRAP; + case PIPE_TEX_WRAP_CLAMP: + return V_03C000_SQ_TEX_CLAMP_HALF_BORDER; + case PIPE_TEX_WRAP_CLAMP_TO_EDGE: + return V_03C000_SQ_TEX_CLAMP_LAST_TEXEL; + case PIPE_TEX_WRAP_CLAMP_TO_BORDER: + return V_03C000_SQ_TEX_CLAMP_BORDER; + case PIPE_TEX_WRAP_MIRROR_REPEAT: + return V_03C000_SQ_TEX_MIRROR; + case PIPE_TEX_WRAP_MIRROR_CLAMP: + return V_03C000_SQ_TEX_MIRROR_ONCE_HALF_BORDER; + case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE: + return V_03C000_SQ_TEX_MIRROR_ONCE_LAST_TEXEL; + case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER: + return V_03C000_SQ_TEX_MIRROR_ONCE_BORDER; + } +} + +static unsigned r600_tex_filter(unsigned filter) +{ + switch (filter) { + default: + case PIPE_TEX_FILTER_NEAREST: + return V_03C000_SQ_TEX_XY_FILTER_POINT; + case PIPE_TEX_FILTER_LINEAR: + return V_03C000_SQ_TEX_XY_FILTER_BILINEAR; + } +} + +static unsigned r600_tex_mipfilter(unsigned filter) +{ + switch (filter) { + case PIPE_TEX_MIPFILTER_NEAREST: + return V_03C000_SQ_TEX_Z_FILTER_POINT; + case PIPE_TEX_MIPFILTER_LINEAR: + return V_03C000_SQ_TEX_Z_FILTER_LINEAR; + default: + case PIPE_TEX_MIPFILTER_NONE: + return V_03C000_SQ_TEX_Z_FILTER_NONE; + } +} + +static unsigned r600_tex_compare(unsigned compare) +{ + switch (compare) { + default: + case PIPE_FUNC_NEVER: + return V_03C000_SQ_TEX_DEPTH_COMPARE_NEVER; + case PIPE_FUNC_LESS: + return V_03C000_SQ_TEX_DEPTH_COMPARE_LESS; + case PIPE_FUNC_EQUAL: + return V_03C000_SQ_TEX_DEPTH_COMPARE_EQUAL; + case PIPE_FUNC_LEQUAL: + return V_03C000_SQ_TEX_DEPTH_COMPARE_LESSEQUAL; + case PIPE_FUNC_GREATER: + return V_03C000_SQ_TEX_DEPTH_COMPARE_GREATER; + case PIPE_FUNC_NOTEQUAL: + return V_03C000_SQ_TEX_DEPTH_COMPARE_NOTEQUAL; + case PIPE_FUNC_GEQUAL: + return V_03C000_SQ_TEX_DEPTH_COMPARE_GREATEREQUAL; + case PIPE_FUNC_ALWAYS: + return V_03C000_SQ_TEX_DEPTH_COMPARE_ALWAYS; + } +} + +static unsigned r600_tex_dim(unsigned dim) +{ + switch (dim) { + default: + case PIPE_TEXTURE_1D: + return V_030000_SQ_TEX_DIM_1D; + case PIPE_TEXTURE_1D_ARRAY: + return V_030000_SQ_TEX_DIM_1D_ARRAY; + case PIPE_TEXTURE_2D: + case PIPE_TEXTURE_RECT: + return V_030000_SQ_TEX_DIM_2D; + case PIPE_TEXTURE_2D_ARRAY: + return V_030000_SQ_TEX_DIM_2D_ARRAY; + case PIPE_TEXTURE_3D: + return V_030000_SQ_TEX_DIM_3D; + case PIPE_TEXTURE_CUBE: + return V_030000_SQ_TEX_DIM_CUBEMAP; + } +} + +static uint32_t r600_translate_dbformat(enum pipe_format format) +{ + switch (format) { + case PIPE_FORMAT_Z16_UNORM: + return V_028040_Z_16; + case PIPE_FORMAT_Z24X8_UNORM: + return V_028040_Z_24; + case PIPE_FORMAT_Z24_UNORM_S8_USCALED: + return V_028040_Z_24; + default: + return ~0U; + } +} + +static uint32_t r600_translate_stencilformat(enum pipe_format format) +{ + if (format == PIPE_FORMAT_Z24_UNORM_S8_USCALED) + return 1; + else + return 0; +} + +static uint32_t r600_translate_colorswap(enum pipe_format format) +{ + switch (format) { + /* 8-bit buffers. */ + case PIPE_FORMAT_L4A4_UNORM: + return V_028C70_SWAP_ALT; + + case PIPE_FORMAT_A8_UNORM: + return V_028C70_SWAP_ALT_REV; + case PIPE_FORMAT_I8_UNORM: + case PIPE_FORMAT_L8_UNORM: + case PIPE_FORMAT_L8_SRGB: + case PIPE_FORMAT_R8_UNORM: + case PIPE_FORMAT_R8_SNORM: + return V_028C70_SWAP_STD; + + /* 16-bit buffers. */ + case PIPE_FORMAT_B5G6R5_UNORM: + return V_028C70_SWAP_STD_REV; + + case PIPE_FORMAT_B5G5R5A1_UNORM: + case PIPE_FORMAT_B5G5R5X1_UNORM: + return V_028C70_SWAP_ALT; + + case PIPE_FORMAT_B4G4R4A4_UNORM: + case PIPE_FORMAT_B4G4R4X4_UNORM: + return V_028C70_SWAP_ALT; + + case PIPE_FORMAT_Z16_UNORM: + return V_028C70_SWAP_STD; + + case PIPE_FORMAT_L8A8_UNORM: + case PIPE_FORMAT_L8A8_SRGB: + return V_028C70_SWAP_ALT; + case PIPE_FORMAT_R8G8_UNORM: + return V_028C70_SWAP_STD; + + case PIPE_FORMAT_R16_UNORM: + case PIPE_FORMAT_R16_FLOAT: + return V_028C70_SWAP_STD; + + /* 32-bit buffers. */ + case PIPE_FORMAT_A8B8G8R8_SRGB: + return V_028C70_SWAP_STD_REV; + case PIPE_FORMAT_B8G8R8A8_SRGB: + return V_028C70_SWAP_ALT; + + case PIPE_FORMAT_B8G8R8A8_UNORM: + case PIPE_FORMAT_B8G8R8X8_UNORM: + return V_028C70_SWAP_ALT; + + case PIPE_FORMAT_A8R8G8B8_UNORM: + case PIPE_FORMAT_X8R8G8B8_UNORM: + return V_028C70_SWAP_ALT_REV; + case PIPE_FORMAT_R8G8B8A8_SNORM: + case PIPE_FORMAT_R8G8B8A8_UNORM: + case PIPE_FORMAT_R8G8B8X8_UNORM: + return V_028C70_SWAP_STD; + + case PIPE_FORMAT_A8B8G8R8_UNORM: + case PIPE_FORMAT_X8B8G8R8_UNORM: + /* case PIPE_FORMAT_R8SG8SB8UX8U_NORM: */ + return V_028C70_SWAP_STD_REV; + + case PIPE_FORMAT_Z24X8_UNORM: + case PIPE_FORMAT_Z24_UNORM_S8_USCALED: + return V_028C70_SWAP_STD; + + case PIPE_FORMAT_X8Z24_UNORM: + case PIPE_FORMAT_S8_USCALED_Z24_UNORM: + return V_028C70_SWAP_STD; + + case PIPE_FORMAT_R10G10B10A2_UNORM: + case PIPE_FORMAT_R10G10B10X2_SNORM: + case PIPE_FORMAT_R10SG10SB10SA2U_NORM: + return V_028C70_SWAP_STD; + + case PIPE_FORMAT_B10G10R10A2_UNORM: + return V_028C70_SWAP_ALT; + + case PIPE_FORMAT_R11G11B10_FLOAT: + case PIPE_FORMAT_R32_FLOAT: + case PIPE_FORMAT_R16G16_FLOAT: + case PIPE_FORMAT_R16G16_UNORM: + return V_028C70_SWAP_STD; + + /* 64-bit buffers. */ + case PIPE_FORMAT_R32G32_FLOAT: + case PIPE_FORMAT_R16G16B16A16_UNORM: + case PIPE_FORMAT_R16G16B16A16_SNORM: + case PIPE_FORMAT_R16G16B16A16_FLOAT: + + /* 128-bit buffers. */ + case PIPE_FORMAT_R32G32B32A32_FLOAT: + case PIPE_FORMAT_R32G32B32A32_SNORM: + case PIPE_FORMAT_R32G32B32A32_UNORM: + return V_028C70_SWAP_STD; + default: + R600_ERR("unsupported colorswap format %d\n", format); + return ~0U; + } + return ~0U; +} + +static uint32_t r600_translate_colorformat(enum pipe_format format) +{ + switch (format) { + /* 8-bit buffers. */ + case PIPE_FORMAT_L4A4_UNORM: + return V_028C70_COLOR_4_4; + + case PIPE_FORMAT_A8_UNORM: + case PIPE_FORMAT_I8_UNORM: + case PIPE_FORMAT_L8_UNORM: + case PIPE_FORMAT_L8_SRGB: + case PIPE_FORMAT_R8_UNORM: + case PIPE_FORMAT_R8_SNORM: + return V_028C70_COLOR_8; + + /* 16-bit buffers. */ + case PIPE_FORMAT_B5G6R5_UNORM: + return V_028C70_COLOR_5_6_5; + + case PIPE_FORMAT_B5G5R5A1_UNORM: + case PIPE_FORMAT_B5G5R5X1_UNORM: + return V_028C70_COLOR_1_5_5_5; + + case PIPE_FORMAT_B4G4R4A4_UNORM: + case PIPE_FORMAT_B4G4R4X4_UNORM: + return V_028C70_COLOR_4_4_4_4; + + case PIPE_FORMAT_Z16_UNORM: + return V_028C70_COLOR_16; + + case PIPE_FORMAT_L8A8_UNORM: + case PIPE_FORMAT_L8A8_SRGB: + case PIPE_FORMAT_R8G8_UNORM: + return V_028C70_COLOR_8_8; + + case PIPE_FORMAT_R16_UNORM: + return V_028C70_COLOR_16; + + case PIPE_FORMAT_R16_FLOAT: + return V_028C70_COLOR_16_FLOAT; + + /* 32-bit buffers. */ + case PIPE_FORMAT_A8B8G8R8_SRGB: + case PIPE_FORMAT_A8B8G8R8_UNORM: + case PIPE_FORMAT_A8R8G8B8_UNORM: + case PIPE_FORMAT_B8G8R8A8_SRGB: + case PIPE_FORMAT_B8G8R8A8_UNORM: + case PIPE_FORMAT_B8G8R8X8_UNORM: + case PIPE_FORMAT_R8G8B8A8_SNORM: + case PIPE_FORMAT_R8G8B8A8_UNORM: + case PIPE_FORMAT_R8G8B8X8_UNORM: + case PIPE_FORMAT_R8SG8SB8UX8U_NORM: + case PIPE_FORMAT_X8B8G8R8_UNORM: + case PIPE_FORMAT_X8R8G8B8_UNORM: + case PIPE_FORMAT_R8G8B8_UNORM: + return V_028C70_COLOR_8_8_8_8; + + case PIPE_FORMAT_R10G10B10A2_UNORM: + case PIPE_FORMAT_R10G10B10X2_SNORM: + case PIPE_FORMAT_B10G10R10A2_UNORM: + case PIPE_FORMAT_R10SG10SB10SA2U_NORM: + return V_028C70_COLOR_2_10_10_10; + + case PIPE_FORMAT_Z24X8_UNORM: + case PIPE_FORMAT_Z24_UNORM_S8_USCALED: + return V_028C70_COLOR_8_24; + + case PIPE_FORMAT_X8Z24_UNORM: + case PIPE_FORMAT_S8_USCALED_Z24_UNORM: + return V_028C70_COLOR_24_8; + + case PIPE_FORMAT_R32_FLOAT: + return V_028C70_COLOR_32_FLOAT; + + case PIPE_FORMAT_R16G16_FLOAT: + return V_028C70_COLOR_16_16_FLOAT; + + case PIPE_FORMAT_R16G16_SSCALED: + case PIPE_FORMAT_R16G16_UNORM: + return V_028C70_COLOR_16_16; + + case PIPE_FORMAT_R11G11B10_FLOAT: + return V_028C70_COLOR_10_11_11_FLOAT; + + /* 64-bit buffers. */ + case PIPE_FORMAT_R16G16B16_USCALED: + case PIPE_FORMAT_R16G16B16A16_USCALED: + case PIPE_FORMAT_R16G16B16_SSCALED: + case PIPE_FORMAT_R16G16B16A16_SSCALED: + case PIPE_FORMAT_R16G16B16A16_UNORM: + case PIPE_FORMAT_R16G16B16A16_SNORM: + return V_028C70_COLOR_16_16_16_16; + + case PIPE_FORMAT_R16G16B16_FLOAT: + case PIPE_FORMAT_R16G16B16A16_FLOAT: + return V_028C70_COLOR_16_16_16_16_FLOAT; + + case PIPE_FORMAT_R32G32_FLOAT: + return V_028C70_COLOR_32_32_FLOAT; + + case PIPE_FORMAT_R32G32_USCALED: + case PIPE_FORMAT_R32G32_SSCALED: + return V_028C70_COLOR_32_32; + + /* 96-bit buffers. */ + case PIPE_FORMAT_R32G32B32_FLOAT: + return V_028C70_COLOR_32_32_32_FLOAT; + + /* 128-bit buffers. */ + case PIPE_FORMAT_R32G32B32A32_SNORM: + case PIPE_FORMAT_R32G32B32A32_UNORM: + return V_028C70_COLOR_32_32_32_32; + case PIPE_FORMAT_R32G32B32A32_FLOAT: + return V_028C70_COLOR_32_32_32_32_FLOAT; + + /* YUV buffers. */ + case PIPE_FORMAT_UYVY: + case PIPE_FORMAT_YUYV: + default: + return ~0U; /* Unsupported. */ + } +} + +static uint32_t r600_colorformat_endian_swap(uint32_t colorformat) +{ + if (R600_BIG_ENDIAN) { + switch(colorformat) { + case V_028C70_COLOR_4_4: + return ENDIAN_NONE; + + /* 8-bit buffers. */ + case V_028C70_COLOR_8: + return ENDIAN_NONE; + + /* 16-bit buffers. */ + case V_028C70_COLOR_5_6_5: + case V_028C70_COLOR_1_5_5_5: + case V_028C70_COLOR_4_4_4_4: + case V_028C70_COLOR_16: + case V_028C70_COLOR_8_8: + return ENDIAN_8IN16; + + /* 32-bit buffers. */ + case V_028C70_COLOR_8_8_8_8: + case V_028C70_COLOR_2_10_10_10: + case V_028C70_COLOR_8_24: + case V_028C70_COLOR_24_8: + case V_028C70_COLOR_32_FLOAT: + case V_028C70_COLOR_16_16_FLOAT: + case V_028C70_COLOR_16_16: + return ENDIAN_8IN32; + + /* 64-bit buffers. */ + case V_028C70_COLOR_16_16_16_16: + case V_028C70_COLOR_16_16_16_16_FLOAT: + return ENDIAN_8IN16; + + case V_028C70_COLOR_32_32_FLOAT: + case V_028C70_COLOR_32_32: + return ENDIAN_8IN32; + + /* 96-bit buffers. */ + case V_028C70_COLOR_32_32_32_FLOAT: + /* 128-bit buffers. */ + case V_028C70_COLOR_32_32_32_32_FLOAT: + case V_028C70_COLOR_32_32_32_32: + return ENDIAN_8IN32; + default: + return ENDIAN_NONE; /* Unsupported. */ + } + } else { + return ENDIAN_NONE; + } +} + +static bool r600_is_sampler_format_supported(struct pipe_screen *screen, enum pipe_format format) +{ + return r600_translate_texformat(screen, format, NULL, NULL, NULL) != ~0U; +} + +static bool r600_is_colorbuffer_format_supported(enum pipe_format format) +{ + return r600_translate_colorformat(format) != ~0U && + r600_translate_colorswap(format) != ~0U; +} + +static bool r600_is_zs_format_supported(enum pipe_format format) +{ + return r600_translate_dbformat(format) != ~0U; +} + +boolean evergreen_is_format_supported(struct pipe_screen *screen, + enum pipe_format format, + enum pipe_texture_target target, + unsigned sample_count, + unsigned usage) +{ + unsigned retval = 0; + + if (target >= PIPE_MAX_TEXTURE_TYPES) { + R600_ERR("r600: unsupported texture type %d\n", target); + return FALSE; + } + + if (!util_format_is_supported(format, usage)) + return FALSE; + + /* Multisample */ + if (sample_count > 1) + return FALSE; + + if ((usage & PIPE_BIND_SAMPLER_VIEW) && + r600_is_sampler_format_supported(screen, format)) { + retval |= PIPE_BIND_SAMPLER_VIEW; + } + + if ((usage & (PIPE_BIND_RENDER_TARGET | + PIPE_BIND_DISPLAY_TARGET | + PIPE_BIND_SCANOUT | + PIPE_BIND_SHARED)) && + r600_is_colorbuffer_format_supported(format)) { + retval |= usage & + (PIPE_BIND_RENDER_TARGET | + PIPE_BIND_DISPLAY_TARGET | + PIPE_BIND_SCANOUT | + PIPE_BIND_SHARED); + } + + if ((usage & PIPE_BIND_DEPTH_STENCIL) && + r600_is_zs_format_supported(format)) { + retval |= PIPE_BIND_DEPTH_STENCIL; + } + + if ((usage & PIPE_BIND_VERTEX_BUFFER) && + r600_is_vertex_format_supported(format)) { + retval |= PIPE_BIND_VERTEX_BUFFER; + } + + if (usage & PIPE_BIND_TRANSFER_READ) + retval |= PIPE_BIND_TRANSFER_READ; + if (usage & PIPE_BIND_TRANSFER_WRITE) + retval |= PIPE_BIND_TRANSFER_WRITE; + + return retval == usage; +} static void evergreen_set_blend_color(struct pipe_context *ctx, const struct pipe_blend_color *state) @@ -77,13 +657,11 @@ static void *evergreen_create_blend_state(struct pipe_context *ctx, u32 color_control, target_mask; /* FIXME there is more then 8 framebuffer */ unsigned blend_cntl[8]; - enum radeon_family family; if (blend == NULL) { return NULL; } - family = r600_get_family(rctx->radeon); rstate = &blend->rstate; rstate->id = R600_PIPE_STATE_BLEND; @@ -110,7 +688,7 @@ static void *evergreen_create_blend_state(struct pipe_context *ctx, r600_pipe_state_add_reg(rstate, R_028808_CB_COLOR_CONTROL, color_control, 0xFFFFFFFD, NULL); - if (family != CHIP_CAYMAN) + if (rctx->chip_class != CAYMAN) r600_pipe_state_add_reg(rstate, R_028C3C_PA_SC_AA_MASK, 0xFFFFFFFF, 0xFFFFFFFF, NULL); else { r600_pipe_state_add_reg(rstate, CM_R_028C38_PA_SC_AA_MASK_X0Y0_X1Y0, 0xFFFFFFFF, 0xFFFFFFFF, NULL); @@ -247,15 +825,14 @@ static void *evergreen_create_rs_state(struct pipe_context *ctx, unsigned tmp; unsigned prov_vtx = 1, polygon_dual_mode; unsigned clip_rule; - enum radeon_family family; - - family = r600_get_family(rctx->radeon); if (rs == NULL) { return NULL; } rstate = &rs->rstate; + rs->clamp_vertex_color = state->clamp_vertex_color; + rs->clamp_fragment_color = state->clamp_fragment_color; rs->flatshade = state->flatshade; rs->sprite_coord_enable = state->sprite_coord_enable; @@ -306,7 +883,7 @@ static void *evergreen_create_rs_state(struct pipe_context *ctx, tmp = (unsigned)state->line_width * 8; r600_pipe_state_add_reg(rstate, R_028A08_PA_SU_LINE_CNTL, S_028A08_WIDTH(tmp), 0xFFFFFFFF, NULL); - if (family == CHIP_CAYMAN) { + if (rctx->chip_class == CAYMAN) { r600_pipe_state_add_reg(rstate, CM_R_028BDC_PA_SC_LINE_CNTL, 0x00000400, 0xFFFFFFFF, NULL); r600_pipe_state_add_reg(rstate, CM_R_028BE4_PA_SU_VTX_CNTL, S_028C08_PIX_CENTER_HALF(state->gl_rasterization_rules), @@ -446,8 +1023,8 @@ static struct pipe_sampler_view *evergreen_create_sampler_view(struct pipe_conte rstate->val[1] = (S_030004_TEX_HEIGHT(texture->height0 - 1) | S_030004_TEX_DEPTH(texture->depth0 - 1) | S_030004_ARRAY_MODE(array_mode)); - rstate->val[2] = (tmp->offset[0] + r600_bo_offset(bo[0])) >> 8; - rstate->val[3] = (tmp->offset[1] + r600_bo_offset(bo[1])) >> 8; + rstate->val[2] = tmp->offset[0] >> 8; + rstate->val[3] = tmp->offset[1] >> 8; rstate->val[4] = (word4 | S_030010_SRF_MODE_ALL(V_030010_SRF_MODE_ZERO_CLAMP_MINUS_ONE) | S_030010_ENDIAN_SWAP(endian) | @@ -777,7 +1354,7 @@ static void evergreen_cb(struct r600_pipe_context *rctx, struct r600_pipe_state /* FIXME handle enabling of CB beyond BASE8 which has different offset */ r600_pipe_state_add_reg(rstate, R_028C60_CB_COLOR0_BASE + cb * 0x3C, - (offset + r600_bo_offset(bo[0])) >> 8, 0xFFFFFFFF, bo[0]); + offset >> 8, 0xFFFFFFFF, bo[0]); r600_pipe_state_add_reg(rstate, R_028C78_CB_COLOR0_DIM + cb * 0x3C, 0x0, 0xFFFFFFFF, NULL); @@ -830,18 +1407,18 @@ static void evergreen_db(struct r600_pipe_context *rctx, struct r600_pipe_state stencil_format = r600_translate_stencilformat(state->zsbuf->texture->format); r600_pipe_state_add_reg(rstate, R_028048_DB_Z_READ_BASE, - (offset + r600_bo_offset(rbuffer->bo)) >> 8, 0xFFFFFFFF, rbuffer->bo); + offset >> 8, 0xFFFFFFFF, rbuffer->bo); r600_pipe_state_add_reg(rstate, R_028050_DB_Z_WRITE_BASE, - (offset + r600_bo_offset(rbuffer->bo)) >> 8, 0xFFFFFFFF, rbuffer->bo); + offset >> 8, 0xFFFFFFFF, rbuffer->bo); if (stencil_format) { uint32_t stencil_offset; stencil_offset = ((surf->aligned_height * rtex->pitch_in_bytes[level]) + 255) & ~255; r600_pipe_state_add_reg(rstate, R_02804C_DB_STENCIL_READ_BASE, - (offset + stencil_offset + r600_bo_offset(rbuffer->bo)) >> 8, 0xFFFFFFFF, rbuffer->bo); + (offset + stencil_offset) >> 8, 0xFFFFFFFF, rbuffer->bo); r600_pipe_state_add_reg(rstate, R_028054_DB_STENCIL_WRITE_BASE, - (offset + stencil_offset + r600_bo_offset(rbuffer->bo)) >> 8, 0xFFFFFFFF, rbuffer->bo); + (offset + stencil_offset) >> 8, 0xFFFFFFFF, rbuffer->bo); } r600_pipe_state_add_reg(rstate, R_028008_DB_DEPTH_VIEW, 0x00000000, 0xFFFFFFFF, NULL); @@ -865,14 +1442,11 @@ static void evergreen_set_framebuffer_state(struct pipe_context *ctx, struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; struct r600_pipe_state *rstate = CALLOC_STRUCT(r600_pipe_state); u32 shader_mask, tl, br, target_mask; - enum radeon_family family; int tl_x, tl_y, br_x, br_y; if (rstate == NULL) return; - family = r600_get_family(rctx->radeon); - evergreen_context_flush_dest_caches(&rctx->ctx); rctx->ctx.num_dest_buffers = state->nr_cbufs; @@ -883,6 +1457,7 @@ static void evergreen_set_framebuffer_state(struct pipe_context *ctx, /* build states */ rctx->have_depth_fb = 0; + rctx->nr_cbufs = state->nr_cbufs; for (int i = 0; i < state->nr_cbufs; i++) { evergreen_cb(rctx, rstate, state, i); } @@ -908,7 +1483,7 @@ static void evergreen_set_framebuffer_state(struct pipe_context *ctx, if (br_y == 0) tl_y = 1; /* cayman hw workaround */ - if (family == CHIP_CAYMAN) { + if (rctx->chip_class == CAYMAN) { if (br_x == 1 && br_y == 1) br_x = 2; } @@ -952,7 +1527,7 @@ static void evergreen_set_framebuffer_state(struct pipe_context *ctx, shader_mask, 0xFFFFFFFF, NULL); - if (family == CHIP_CAYMAN) { + if (rctx->chip_class == CAYMAN) { r600_pipe_state_add_reg(rstate, CM_R_028BE0_PA_SC_AA_CONFIG, 0x00000000, 0xFFFFFFFF, NULL); } else { @@ -1139,9 +1714,9 @@ void evergreen_init_config(struct r600_pipe_context *rctx) enum radeon_family family; unsigned tmp; - family = r600_get_family(rctx->radeon); + family = rctx->family; - if (family == CHIP_CAYMAN) { + if (rctx->chip_class == CAYMAN) { cayman_init_config(rctx); return; } @@ -1451,6 +2026,11 @@ void evergreen_init_config(struct r600_pipe_context *rctx) tmp |= S_008C28_NUM_LS_STACK_ENTRIES(num_ls_stack_entries); r600_pipe_state_add_reg(rstate, R_008C28_SQ_STACK_RESOURCE_MGMT_3, tmp, 0xFFFFFFFF, NULL); + tmp = 0; + tmp |= S_008E2C_NUM_PS_LDS(0x1000); + tmp |= S_008E2C_NUM_LS_LDS(0x1000); + r600_pipe_state_add_reg(rstate, R_008E2C_SQ_LDS_RESOURCE_MGMT, tmp, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_009100_SPI_CONFIG_CNTL, 0x0, 0xFFFFFFFF, NULL); r600_pipe_state_add_reg(rstate, R_00913C_SPI_CONFIG_CNTL_1, S_00913C_VTX_DONE_DELAY(4), 0xFFFFFFFF, NULL); @@ -1629,7 +2209,10 @@ void evergreen_pipe_shader_ps(struct pipe_context *ctx, struct r600_pipe_shader rshader->output[i].name == TGSI_SEMANTIC_STENCIL) exports_ps |= 1; else if (rshader->output[i].name == TGSI_SEMANTIC_COLOR) { - num_cout++; + if (rshader->fs_write_all) + num_cout = rshader->nr_cbufs; + else + num_cout++; } } exports_ps |= S_02884C_EXPORT_COLORS(num_cout); @@ -1682,7 +2265,7 @@ void evergreen_pipe_shader_ps(struct pipe_context *ctx, struct r600_pipe_shader r600_pipe_state_add_reg(rstate, R_028840_SQ_PGM_START_PS, - (r600_bo_offset(shader->bo)) >> 8, 0xFFFFFFFF, shader->bo); + 0, 0xFFFFFFFF, shader->bo); r600_pipe_state_add_reg(rstate, R_028844_SQ_PGM_RESOURCES_PS, S_028844_NUM_GPRS(rshader->bc.ngpr) | @@ -1715,7 +2298,7 @@ void evergreen_pipe_shader_vs(struct pipe_context *ctx, struct r600_pipe_shader struct r600_pipe_state *rstate = &shader->rstate; struct r600_shader *rshader = &shader->shader; unsigned spi_vs_out_id[10]; - unsigned i, tmp; + unsigned i, tmp, nparams; /* clear previous register */ rstate->nregs = 0; @@ -1734,9 +2317,17 @@ void evergreen_pipe_shader_vs(struct pipe_context *ctx, struct r600_pipe_shader spi_vs_out_id[i], 0xFFFFFFFF, NULL); } + /* Certain attributes (position, psize, etc.) don't count as params. + * VS is required to export at least one param and r600_shader_from_tgsi() + * takes care of adding a dummy export. + */ + nparams = rshader->noutput - rshader->npos; + if (nparams < 1) + nparams = 1; + r600_pipe_state_add_reg(rstate, R_0286C4_SPI_VS_OUT_CONFIG, - S_0286C4_VS_EXPORT_COUNT(rshader->noutput - 2), + S_0286C4_VS_EXPORT_COUNT(nparams - 1), 0xFFFFFFFF, NULL); r600_pipe_state_add_reg(rstate, R_028860_SQ_PGM_RESOURCES_VS, @@ -1748,7 +2339,7 @@ void evergreen_pipe_shader_vs(struct pipe_context *ctx, struct r600_pipe_shader 0x0, 0xFFFFFFFF, NULL); r600_pipe_state_add_reg(rstate, R_02885C_SQ_PGM_START_VS, - (r600_bo_offset(shader->bo)) >> 8, 0xFFFFFFFF, shader->bo); + 0, 0xFFFFFFFF, shader->bo); r600_pipe_state_add_reg(rstate, R_03A200_SQ_LOOP_CONST_0 + (32 * 4), 0x01000FFF, @@ -1765,7 +2356,7 @@ void evergreen_fetch_shader(struct pipe_context *ctx, r600_pipe_state_add_reg(rstate, R_0288A8_SQ_PGM_RESOURCES_FS, 0x00000000, 0xFFFFFFFF, NULL); r600_pipe_state_add_reg(rstate, R_0288A4_SQ_PGM_START_FS, - (r600_bo_offset(ve->fetch_shader)) >> 8, + 0, 0xFFFFFFFF, ve->fetch_shader); } diff --git a/src/gallium/drivers/r600/evergreend.h b/src/gallium/drivers/r600/evergreend.h index d795f5757ed..96dbd4da91b 100644 --- a/src/gallium/drivers/r600/evergreend.h +++ b/src/gallium/drivers/r600/evergreend.h @@ -216,6 +216,13 @@ #define S_008C28_NUM_LS_STACK_ENTRIES(x) (((x) & 0xFFF) << 16) #define G_008C28_NUM_LS_STACK_ENTRIES(x) (((x) >> 16) & 0xFFF) #define C_008C28_NUM_LS_STACK_ENTRIES(x) 0xF000FFFF +#define R_008E2C_SQ_LDS_RESOURCE_MGMT 0x00008E2C +#define S_008E2C_NUM_PS_LDS(x) (((x) & 0xFFFF) << 0) +#define G_008E2C_NUM_PS_LDS(x) (((x) >> 0) & 0xFFFF) +#define C_008E2C_NUM_PS_LDS(x) 0x0000FFFF +#define S_008E2C_NUM_LS_LDS(x) (((x) & 0xFFFF) << 16) +#define G_008E2C_NUM_LS_LDS(x) (((x) >> 16) & 0xFFFF) +#define C_008E2C_NUM_LS_LDS(x) 0xFFFF0000 #define R_008CF0_SQ_MS_FIFO_SIZES 0x00008CF0 #define S_008CF0_CACHE_FIFO_SIZE(x) (((x) & 0xFF) << 0) diff --git a/src/gallium/drivers/r600/r600.h b/src/gallium/drivers/r600/r600.h index 225c17c2540..2ac5ed465c1 100644 --- a/src/gallium/drivers/r600/r600.h +++ b/src/gallium/drivers/r600/r600.h @@ -94,6 +94,8 @@ struct r600_tiling_info *r600_get_tiling_info(struct radeon *radeon); unsigned r600_get_clock_crystal_freq(struct radeon *radeon); unsigned r600_get_minor_version(struct radeon *radeon); unsigned r600_get_num_backends(struct radeon *radeon); +unsigned r600_get_num_tile_pipes(struct radeon *radeon); +unsigned r600_get_backend_map(struct radeon *radeon); /* r600_bo.c */ struct r600_bo; @@ -105,11 +107,8 @@ struct r600_bo *r600_bo_handle(struct radeon *radeon, void *r600_bo_map(struct radeon *radeon, struct r600_bo *bo, unsigned usage, void *ctx); void r600_bo_unmap(struct radeon *radeon, struct r600_bo *bo); boolean r600_bo_get_winsys_handle(struct radeon *radeon, struct r600_bo *pb_bo, - unsigned stride, struct winsys_handle *whandle); -static INLINE unsigned r600_bo_offset(struct r600_bo *bo) -{ - return 0; -} + unsigned stride, struct winsys_handle *whandle); + void r600_bo_destroy(struct radeon *radeon, struct r600_bo *bo); /* this relies on the pipe_reference being the first member of r600_bo */ @@ -211,14 +210,21 @@ struct r600_reloc { */ struct r600_query { u64 result; - /* The kind of query. Currently only OQ is supported. */ + /* The kind of query */ unsigned type; - /* How many results have been written, in dwords. It's incremented - * after end_query and flush. */ - unsigned num_results; - /* if we've flushed the query */ + /* Offset of the first result for current query */ + unsigned results_start; + /* Offset of the next free result after current query data */ + unsigned results_end; + /* Size of the result */ + unsigned result_size; + /* Count of new queries started in one stream without flushing */ + unsigned queries_emitted; + /* State flags */ unsigned state; - /* The buffer where query results are stored. */ + /* The buffer where query results are stored. It's used as a ring, + * data blocks for current query are stored sequentially from + * results_start to results_end, with wrapping on the buffer end */ struct r600_bo *buffer; unsigned buffer_size; /* linked list of queries */ @@ -228,6 +234,7 @@ struct r600_query { #define R600_QUERY_STATE_STARTED (1 << 0) #define R600_QUERY_STATE_ENDED (1 << 1) #define R600_QUERY_STATE_SUSPENDED (1 << 2) +#define R600_QUERY_STATE_FLUSHED (1 << 3) #define R600_CONTEXT_DRAW_PENDING (1 << 0) #define R600_CONTEXT_DST_CACHES_DIRTY (1 << 1) @@ -245,6 +252,7 @@ struct r600_context { unsigned pm4_cdwords; unsigned pm4_dirty_cdwords; unsigned ctx_pm4_ndwords; + unsigned init_dwords; unsigned nreloc; unsigned creloc; struct r600_reloc *reloc; @@ -252,6 +260,7 @@ struct r600_context { u32 *pm4; struct list_head query_list; unsigned num_query_running; + unsigned backend_mask; struct list_head fenced_bo; unsigned max_db; /* for OQ */ unsigned num_dest_buffers; @@ -273,6 +282,7 @@ struct r600_draw { struct r600_bo *indices; }; +void r600_get_backend_mask(struct r600_context *ctx); int r600_context_init(struct r600_context *ctx, struct radeon *radeon); void r600_context_fini(struct r600_context *ctx); void r600_context_pipe_state_set(struct r600_context *ctx, struct r600_pipe_state *state); @@ -282,7 +292,6 @@ void r600_context_pipe_state_set_fs_resource(struct r600_context *ctx, struct r6 void r600_context_pipe_state_set_ps_sampler(struct r600_context *ctx, struct r600_pipe_state *state, unsigned id); void r600_context_pipe_state_set_vs_sampler(struct r600_context *ctx, struct r600_pipe_state *state, unsigned id); void r600_context_flush(struct r600_context *ctx); -void r600_context_dump_bof(struct r600_context *ctx, const char *file); void r600_context_draw(struct r600_context *ctx, const struct r600_draw *draw); struct r600_query *r600_context_query_create(struct r600_context *ctx, unsigned query_type); @@ -293,7 +302,7 @@ boolean r600_context_query_result(struct r600_context *ctx, void r600_query_begin(struct r600_context *ctx, struct r600_query *query); void r600_query_end(struct r600_context *ctx, struct r600_query *query); void r600_context_queries_suspend(struct r600_context *ctx); -void r600_context_queries_resume(struct r600_context *ctx); +void r600_context_queries_resume(struct r600_context *ctx, boolean flushed); void r600_query_predication(struct r600_context *ctx, struct r600_query *query, int operation, int flag_wait); void r600_context_emit_fence(struct r600_context *ctx, struct r600_bo *fence, diff --git a/src/gallium/drivers/r600/r600_asm.c b/src/gallium/drivers/r600/r600_asm.c index c447a031063..5fae2b00c8b 100644 --- a/src/gallium/drivers/r600/r600_asm.c +++ b/src/gallium/drivers/r600/r600_asm.c @@ -41,9 +41,9 @@ static inline unsigned int r600_bc_get_num_operands(struct r600_bc *bc, struct r if(alu->is_op3) return 3; - switch (bc->chiprev) { - case CHIPREV_R600: - case CHIPREV_R700: + switch (bc->chip_class) { + case R600: + case R700: switch (alu->inst) { case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP: return 0; @@ -93,8 +93,8 @@ static inline unsigned int r600_bc_get_num_operands(struct r600_bc *bc, struct r "Need instruction operand number for 0x%x.\n", alu->inst); } break; - case CHIPREV_EVERGREEN: - case CHIPREV_CAYMAN: + case EVERGREEN: + case CAYMAN: switch (alu->inst) { case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP: return 0; @@ -195,48 +195,10 @@ static struct r600_bc_tex *r600_bc_tex(void) return tex; } -int r600_bc_init(struct r600_bc *bc, enum radeon_family family) +void r600_bc_init(struct r600_bc *bc, enum chip_class chip_class) { LIST_INITHEAD(&bc->cf); - bc->family = family; - switch (bc->family) { - case CHIP_R600: - case CHIP_RV610: - case CHIP_RV630: - case CHIP_RV670: - case CHIP_RV620: - case CHIP_RV635: - case CHIP_RS780: - case CHIP_RS880: - bc->chiprev = CHIPREV_R600; - break; - case CHIP_RV770: - case CHIP_RV730: - case CHIP_RV710: - case CHIP_RV740: - bc->chiprev = CHIPREV_R700; - break; - case CHIP_CEDAR: - case CHIP_REDWOOD: - case CHIP_JUNIPER: - case CHIP_CYPRESS: - case CHIP_HEMLOCK: - case CHIP_PALM: - case CHIP_SUMO: - case CHIP_SUMO2: - case CHIP_BARTS: - case CHIP_TURKS: - case CHIP_CAICOS: - bc->chiprev = CHIPREV_EVERGREEN; - break; - case CHIP_CAYMAN: - bc->chiprev = CHIPREV_CAYMAN; - break; - default: - R600_ERR("unknown family %d\n", bc->family); - return -EINVAL; - } - return 0; + bc->chip_class = chip_class; } static int r600_bc_add_cf(struct r600_bc *bc) @@ -301,9 +263,9 @@ int r600_bc_add_output(struct r600_bc *bc, const struct r600_bc_output *output) /* alu instructions that can ony exits once per group */ static int is_alu_once_inst(struct r600_bc *bc, struct r600_bc_alu *alu) { - switch (bc->chiprev) { - case CHIPREV_R600: - case CHIPREV_R700: + switch (bc->chip_class) { + case R600: + case R700: return !alu->is_op3 && ( alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLE || alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT || @@ -339,8 +301,8 @@ static int is_alu_once_inst(struct r600_bc *bc, struct r600_bc_alu *alu) alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE_PUSH_INT || alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETLT_PUSH_INT || alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETLE_PUSH_INT); - case CHIPREV_EVERGREEN: - case CHIPREV_CAYMAN: + case EVERGREEN: + case CAYMAN: default: return !alu->is_op3 && ( alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLE || @@ -382,16 +344,16 @@ static int is_alu_once_inst(struct r600_bc *bc, struct r600_bc_alu *alu) static int is_alu_reduction_inst(struct r600_bc *bc, struct r600_bc_alu *alu) { - switch (bc->chiprev) { - case CHIPREV_R600: - case CHIPREV_R700: + switch (bc->chip_class) { + case R600: + case R700: return !alu->is_op3 && ( alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CUBE || alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4 || alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4_IEEE || alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX4); - case CHIPREV_EVERGREEN: - case CHIPREV_CAYMAN: + case EVERGREEN: + case CAYMAN: default: return !alu->is_op3 && ( alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CUBE || @@ -403,13 +365,13 @@ static int is_alu_reduction_inst(struct r600_bc *bc, struct r600_bc_alu *alu) static int is_alu_cube_inst(struct r600_bc *bc, struct r600_bc_alu *alu) { - switch (bc->chiprev) { - case CHIPREV_R600: - case CHIPREV_R700: + switch (bc->chip_class) { + case R600: + case R700: return !alu->is_op3 && alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CUBE; - case CHIPREV_EVERGREEN: - case CHIPREV_CAYMAN: + case EVERGREEN: + case CAYMAN: default: return !alu->is_op3 && alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CUBE; @@ -418,15 +380,15 @@ static int is_alu_cube_inst(struct r600_bc *bc, struct r600_bc_alu *alu) static int is_alu_mova_inst(struct r600_bc *bc, struct r600_bc_alu *alu) { - switch (bc->chiprev) { - case CHIPREV_R600: - case CHIPREV_R700: + switch (bc->chip_class) { + case R600: + case R700: return !alu->is_op3 && ( alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA || alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_FLOOR || alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT); - case CHIPREV_EVERGREEN: - case CHIPREV_CAYMAN: + case EVERGREEN: + case CAYMAN: default: return !alu->is_op3 && ( alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT); @@ -438,16 +400,16 @@ static int is_alu_vec_unit_inst(struct r600_bc *bc, struct r600_bc_alu *alu) { return is_alu_reduction_inst(bc, alu) || is_alu_mova_inst(bc, alu) || - (bc->chiprev == CHIPREV_EVERGREEN && + (bc->chip_class == EVERGREEN && alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT_FLOOR); } /* alu instructions that can only execute on the trans unit */ static int is_alu_trans_unit_inst(struct r600_bc *bc, struct r600_bc_alu *alu) { - switch (bc->chiprev) { - case CHIPREV_R600: - case CHIPREV_R700: + switch (bc->chip_class) { + case R600: + case R700: if (!alu->is_op3) return alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ASHR_INT || alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT || @@ -478,8 +440,8 @@ static int is_alu_trans_unit_inst(struct r600_bc *bc, struct r600_bc_alu *alu) alu->inst == V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MUL_LIT_D2 || alu->inst == V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MUL_LIT_M2 || alu->inst == V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MUL_LIT_M4; - case CHIPREV_EVERGREEN: - case CHIPREV_CAYMAN: + case EVERGREEN: + case CAYMAN: default: if (!alu->is_op3) /* Note that FLT_TO_INT_* instructions are vector-only instructions @@ -525,7 +487,7 @@ static int assign_alu_units(struct r600_bc *bc, struct r600_bc_alu *alu_first, { struct r600_bc_alu *alu; unsigned i, chan, trans; - int max_slots = bc->chiprev == CHIPREV_CAYMAN ? 4 : 5; + int max_slots = bc->chip_class == CAYMAN ? 4 : 5; for (i = 0; i < max_slots; i++) assignment[i] = NULL; @@ -612,7 +574,7 @@ static int reserve_gpr(struct alu_bank_swizzle *bs, unsigned sel, unsigned chan, static int reserve_cfile(struct r600_bc *bc, struct alu_bank_swizzle *bs, unsigned sel, unsigned chan) { int res, num_res = 4; - if (bc->chiprev >= CHIPREV_R700) { + if (bc->chip_class >= R700) { num_res = 2; chan /= 2; } @@ -733,8 +695,8 @@ static int check_and_set_bank_swizzle(struct r600_bc *bc, struct alu_bank_swizzle bs; int bank_swizzle[5]; int i, r = 0, forced = 0; - boolean scalar_only = bc->chiprev == CHIPREV_CAYMAN ? false : true; - int max_slots = bc->chiprev == CHIPREV_CAYMAN ? 4 : 5; + boolean scalar_only = bc->chip_class == CAYMAN ? false : true; + int max_slots = bc->chip_class == CAYMAN ? 4 : 5; for (i = 0; i < max_slots; i++) { if (slots[i] && slots[i]->bank_swizzle_force) { @@ -806,7 +768,7 @@ static int replace_gpr_with_pv_ps(struct r600_bc *bc, struct r600_bc_alu *prev[5]; int gpr[5], chan[5]; int i, j, r, src, num_src; - int max_slots = bc->chiprev == CHIPREV_CAYMAN ? 4 : 5; + int max_slots = bc->chip_class == CAYMAN ? 4 : 5; r = assign_alu_units(bc, alu_prev, prev); if (r) @@ -834,7 +796,7 @@ static int replace_gpr_with_pv_ps(struct r600_bc *bc, if (!is_gpr(alu->src[src].sel) || alu->src[src].rel) continue; - if (bc->chiprev < CHIPREV_CAYMAN) { + if (bc->chip_class < CAYMAN) { if (alu->src[src].sel == gpr[4] && alu->src[src].chan == chan[4]) { alu->src[src].sel = V_SQ_ALU_SRC_PS; @@ -948,7 +910,7 @@ static int merge_inst_groups(struct r600_bc *bc, struct r600_bc_alu *slots[5], int i, j, r, src, num_src; int num_once_inst = 0; int have_mova = 0, have_rel = 0; - int max_slots = bc->chiprev == CHIPREV_CAYMAN ? 4 : 5; + int max_slots = bc->chip_class == CAYMAN ? 4 : 5; r = assign_alu_units(bc, alu_prev, prev); if (r) @@ -1252,7 +1214,7 @@ int r600_bc_add_alu_type(struct r600_bc *bc, const struct r600_bc_alu *alu, int uint32_t literal[4]; unsigned nliteral; struct r600_bc_alu *slots[5]; - int max_slots = bc->chiprev == CHIPREV_CAYMAN ? 4 : 5; + int max_slots = bc->chip_class == CAYMAN ? 4 : 5; r = assign_alu_units(bc, bc->cf_last->curr_bs_head, slots); if (r) return r; @@ -1302,26 +1264,26 @@ int r600_bc_add_alu(struct r600_bc *bc, const struct r600_bc_alu *alu) static unsigned r600_bc_num_tex_and_vtx_instructions(const struct r600_bc *bc) { - switch (bc->chiprev) { - case CHIPREV_R600: + switch (bc->chip_class) { + case R600: return 8; - case CHIPREV_R700: + case R700: return 16; - case CHIPREV_EVERGREEN: - case CHIPREV_CAYMAN: + case EVERGREEN: + case CAYMAN: return 64; default: - R600_ERR("Unknown chiprev %d.\n", bc->chiprev); + R600_ERR("Unknown chip class %d.\n", bc->chip_class); return 8; } } static inline boolean last_inst_was_vtx_fetch(struct r600_bc *bc) { - if (bc->chiprev == CHIPREV_CAYMAN) { + if (bc->chip_class == CAYMAN) { if (bc->cf_last->inst != CM_V_SQ_CF_WORD1_SQ_CF_INST_TC) return TRUE; } else { @@ -1350,7 +1312,7 @@ int r600_bc_add_vtx(struct r600_bc *bc, const struct r600_bc_vtx *vtx) free(nvtx); return r; } - if (bc->chiprev == CHIPREV_CAYMAN) + if (bc->chip_class == CAYMAN) bc->cf_last->inst = CM_V_SQ_CF_WORD1_SQ_CF_INST_TC; else bc->cf_last->inst = V_SQ_CF_WORD1_SQ_CF_INST_VTX; @@ -1438,7 +1400,7 @@ static int r600_bc_vtx_build(struct r600_bc *bc, struct r600_bc_vtx *vtx, unsign S_SQ_VTX_WORD0_FETCH_TYPE(vtx->fetch_type) | S_SQ_VTX_WORD0_SRC_GPR(vtx->src_gpr) | S_SQ_VTX_WORD0_SRC_SEL_X(vtx->src_sel_x); - if (bc->chiprev < CHIPREV_CAYMAN) + if (bc->chip_class < CAYMAN) bc->bytecode[id] |= S_SQ_VTX_WORD0_MEGA_FETCH_COUNT(vtx->mega_fetch_count); id++; bc->bytecode[id++] = S_SQ_VTX_WORD1_DST_SEL_X(vtx->dst_sel_x) | @@ -1453,7 +1415,7 @@ static int r600_bc_vtx_build(struct r600_bc *bc, struct r600_bc_vtx *vtx, unsign S_SQ_VTX_WORD1_GPR_DST_GPR(vtx->dst_gpr); bc->bytecode[id] = S_SQ_VTX_WORD2_OFFSET(vtx->offset)| S_SQ_VTX_WORD2_ENDIAN_SWAP(vtx->endian); - if (bc->chiprev < CHIPREV_CAYMAN) + if (bc->chip_class < CAYMAN) bc->bytecode[id] |= S_SQ_VTX_WORD2_MEGA_FETCH(1); id++; bc->bytecode[id++] = 0; @@ -1560,13 +1522,13 @@ static int r600_bc_cf_build(struct r600_bc *bc, struct r600_bc_cf *cf) S_SQ_CF_ALU_WORD1_KCACHE_ADDR0(cf->kcache[0].addr) | S_SQ_CF_ALU_WORD1_KCACHE_ADDR1(cf->kcache[1].addr) | S_SQ_CF_ALU_WORD1_BARRIER(1) | - S_SQ_CF_ALU_WORD1_USES_WATERFALL(bc->chiprev == CHIPREV_R600 ? cf->r6xx_uses_waterfall : 0) | + S_SQ_CF_ALU_WORD1_USES_WATERFALL(bc->chip_class == R600 ? cf->r6xx_uses_waterfall : 0) | S_SQ_CF_ALU_WORD1_COUNT((cf->ndw / 2) - 1); break; case V_SQ_CF_WORD1_SQ_CF_INST_TEX: case V_SQ_CF_WORD1_SQ_CF_INST_VTX: case V_SQ_CF_WORD1_SQ_CF_INST_VTX_TC: - if (bc->chiprev == CHIPREV_R700) + if (bc->chip_class == R700) r700_bc_cf_vtx_build(&bc->bytecode[id], cf); else r600_bc_cf_vtx_build(&bc->bytecode[id], cf); @@ -1673,7 +1635,7 @@ int r600_bc_build(struct r600_bc *bc) return -ENOMEM; LIST_FOR_EACH_ENTRY(cf, &bc->cf, list) { addr = cf->addr; - if (bc->chiprev >= CHIPREV_EVERGREEN) + if (bc->chip_class >= EVERGREEN) r = eg_bc_cf_build(bc, cf); else r = r600_bc_cf_build(bc, cf); @@ -1691,17 +1653,17 @@ int r600_bc_build(struct r600_bc *bc) if (r) return r; r600_bc_alu_adjust_literals(bc, alu, literal, nliteral); - switch(bc->chiprev) { - case CHIPREV_R600: + switch(bc->chip_class) { + case R600: r = r600_bc_alu_build(bc, alu, addr); break; - case CHIPREV_R700: - case CHIPREV_EVERGREEN: /* eg alu is same encoding as r700 */ - case CHIPREV_CAYMAN: /* eg alu is same encoding as r700 */ + case R700: + case EVERGREEN: /* eg alu is same encoding as r700 */ + case CAYMAN: /* eg alu is same encoding as r700 */ r = r700_bc_alu_build(bc, alu, addr); break; default: - R600_ERR("unknown family %d\n", bc->family); + R600_ERR("unknown chip class %d.\n", bc->chip_class); return -EINVAL; } if (r) @@ -1726,7 +1688,7 @@ int r600_bc_build(struct r600_bc *bc) } break; case V_SQ_CF_WORD1_SQ_CF_INST_TEX: - if (bc->chiprev == CHIPREV_CAYMAN) { + if (bc->chip_class == CAYMAN) { LIST_FOR_EACH_ENTRY(vtx, &cf->vtx, list) { r = r600_bc_vtx_build(bc, vtx, addr); if (r) @@ -1812,17 +1774,17 @@ void r600_bc_dump(struct r600_bc *bc) unsigned nliteral; char chip = '6'; - switch (bc->chiprev) { - case 1: + switch (bc->chip_class) { + case R700: chip = '7'; break; - case 2: + case EVERGREEN: chip = 'E'; break; - case 3: + case CAYMAN: chip = 'C'; break; - case 0: + case R600: default: chip = '6'; break; @@ -1863,6 +1825,8 @@ void r600_bc_dump(struct r600_bc *bc) break; case V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT: case V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE: + case EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT: + case EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE: fprintf(stderr, "%04d %08X EXPORT ", id, bc->bytecode[id]); fprintf(stderr, "GPR:%X ", cf->output.gpr); fprintf(stderr, "ELEM_SIZE:%X ", cf->output.elem_size); @@ -1991,7 +1955,7 @@ void r600_bc_dump(struct r600_bc *bc) fprintf(stderr, "%04d %08X ", id, bc->bytecode[id]); fprintf(stderr, "SRC(GPR:%d ", vtx->src_gpr); fprintf(stderr, "SEL_X:%d) ", vtx->src_sel_x); - if (bc->chiprev < CHIPREV_CAYMAN) + if (bc->chip_class < CAYMAN) fprintf(stderr, "MEGA_FETCH_COUNT:%d ", vtx->mega_fetch_count); else fprintf(stderr, "SEL_Y:%d) ", 0); @@ -2160,7 +2124,7 @@ int r600_vertex_elements_build_fetch_shader(struct r600_pipe_context *rctx, stru struct r600_bc_vtx vtx; struct pipe_vertex_element *elements = ve->elements; const struct util_format_description *desc; - unsigned fetch_resource_start = rctx->family >= CHIP_CEDAR ? 0 : 160; + unsigned fetch_resource_start = rctx->chip_class >= EVERGREEN ? 0 : 160; unsigned format, num_format, format_comp, endian; u32 *bytecode; int i, r; @@ -2178,9 +2142,7 @@ int r600_vertex_elements_build_fetch_shader(struct r600_pipe_context *rctx, stru } memset(&bc, 0, sizeof(bc)); - r = r600_bc_init(&bc, r600_get_family(rctx->radeon)); - if (r) - return r; + r600_bc_init(&bc, rctx->chip_class); for (i = 0; i < ve->count; i++) { if (elements[i].instance_divisor > 1) { @@ -2285,7 +2247,7 @@ int r600_vertex_elements_build_fetch_shader(struct r600_pipe_context *rctx, stru r600_bo_unmap(rctx->radeon, ve->fetch_shader); r600_bc_clear(&bc); - if (rctx->family >= CHIP_CEDAR) + if (rctx->chip_class >= EVERGREEN) evergreen_fetch_shader(&rctx->context, ve); else r600_fetch_shader(&rctx->context, ve); diff --git a/src/gallium/drivers/r600/r600_asm.h b/src/gallium/drivers/r600/r600_asm.h index 540f45bbd06..cbdaacf7178 100644 --- a/src/gallium/drivers/r600/r600_asm.h +++ b/src/gallium/drivers/r600/r600_asm.h @@ -171,8 +171,7 @@ struct r600_cf_callstack { }; struct r600_bc { - enum radeon_family family; - int chiprev; /* 0 - r600, 1 - r700, 2 - evergreen */ + enum chip_class chip_class; int type; struct list_head cf; struct r600_bc_cf *cf_last; @@ -193,7 +192,7 @@ struct r600_bc { int eg_bc_cf_build(struct r600_bc *bc, struct r600_bc_cf *cf); /* r600_asm.c */ -int r600_bc_init(struct r600_bc *bc, enum radeon_family family); +void r600_bc_init(struct r600_bc *bc, enum chip_class chip_class); void r600_bc_clear(struct r600_bc *bc); int r600_bc_add_alu(struct r600_bc *bc, const struct r600_bc_alu *alu); int r600_bc_add_vtx(struct r600_bc *bc, const struct r600_bc_vtx *vtx); diff --git a/src/gallium/drivers/r600/r600_blit.c b/src/gallium/drivers/r600/r600_blit.c index 043c8759f56..35e68b6e222 100644 --- a/src/gallium/drivers/r600/r600_blit.c +++ b/src/gallium/drivers/r600/r600_blit.c @@ -97,7 +97,7 @@ static void r600_blitter_end(struct pipe_context *ctx) rctx->saved_render_cond_mode); rctx->saved_render_cond = NULL; } - r600_context_queries_resume(&rctx->ctx); + r600_context_queries_resume(&rctx->ctx, FALSE); rctx->blit = false; } @@ -294,6 +294,8 @@ static void r600_resource_copy_region(struct pipe_context *ctx, { struct r600_resource_texture *rsrc = (struct r600_resource_texture*)src; struct texture_orig_info orig_info[2]; + struct pipe_box sbox; + const struct pipe_box *psbox; boolean restore_orig[2]; /* Fallback for buffers. */ @@ -311,7 +313,15 @@ static void r600_resource_copy_region(struct pipe_context *ctx, if (util_format_is_compressed(src->format)) { r600_compressed_to_blittable(src, src_level, &orig_info[0]); restore_orig[0] = TRUE; - } + sbox.x = util_format_get_nblocksx(orig_info[0].format, src_box->x); + sbox.y = util_format_get_nblocksy(orig_info[0].format, src_box->y); + sbox.z = src_box->z; + sbox.width = util_format_get_nblocksx(orig_info[0].format, src_box->width); + sbox.height = util_format_get_nblocksy(orig_info[0].format, src_box->height); + sbox.depth = src_box->depth; + psbox=&sbox; + } else + psbox=src_box; if (util_format_is_compressed(dst->format)) { r600_compressed_to_blittable(dst, dst_level, &orig_info[1]); @@ -322,7 +332,7 @@ static void r600_resource_copy_region(struct pipe_context *ctx, } r600_hw_copy_region(ctx, dst, dst_level, dstx, dsty, dstz, - src, src_level, src_box); + src, src_level, psbox); if (restore_orig[0]) r600_reset_blittable_to_compressed(src, src_level, &orig_info[0]); diff --git a/src/gallium/drivers/r600/r600_formats.h b/src/gallium/drivers/r600/r600_formats.h index ae0bc432ad2..1c1089d89d2 100644 --- a/src/gallium/drivers/r600/r600_formats.h +++ b/src/gallium/drivers/r600/r600_formats.h @@ -81,4 +81,36 @@ static INLINE unsigned r600_endian_swap(unsigned size) } } +static INLINE bool r600_is_vertex_format_supported(enum pipe_format format) +{ + const struct util_format_description *desc = util_format_description(format); + unsigned i; + + if (!desc) + return false; + + /* Find the first non-VOID channel. */ + for (i = 0; i < 4; i++) { + if (desc->channel[i].type != UTIL_FORMAT_TYPE_VOID) + break; + } + if (i == 4) + return false; + + /* No fixed, no double. */ + if (desc->layout != UTIL_FORMAT_LAYOUT_PLAIN || + desc->channel[i].type == UTIL_FORMAT_TYPE_FIXED || + (desc->channel[i].size == 64 && + desc->channel[i].type == UTIL_FORMAT_TYPE_FLOAT)) + return false; + + /* No scaled/norm formats with 32 bits per channel. */ + if (desc->channel[i].size == 32 && + (desc->channel[i].type == UTIL_FORMAT_TYPE_SIGNED || + desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED)) + return false; + + return true; +} + #endif diff --git a/src/gallium/drivers/r600/r600_opcodes.h b/src/gallium/drivers/r600/r600_opcodes.h index 184f32c9960..7ae091ea5cd 100644 --- a/src/gallium/drivers/r600/r600_opcodes.h +++ b/src/gallium/drivers/r600/r600_opcodes.h @@ -409,14 +409,8 @@ #define EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_EXPORT_COMBINED 0x0000005B #define EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_RAT_COMBINED_CACHELESS 0x0000005C +#define BC_INST(bc, x) ((bc)->chip_class >= EVERGREEN ? EG_##x : x) -#define CHIPREV_R600 0 -#define CHIPREV_R700 1 -#define CHIPREV_EVERGREEN 2 -#define CHIPREV_CAYMAN 3 - -#define BC_INST(bc, x) ((bc)->chiprev >= CHIPREV_EVERGREEN ? EG_##x : x) - -#define CTX_INST(x) (ctx->bc->chiprev >= CHIPREV_EVERGREEN ? EG_##x : x) +#define CTX_INST(x) (ctx->bc->chip_class >= EVERGREEN ? EG_##x : x) #endif diff --git a/src/gallium/drivers/r600/r600_pipe.c b/src/gallium/drivers/r600/r600_pipe.c index 2d744137522..461f59439e8 100644 --- a/src/gallium/drivers/r600/r600_pipe.c +++ b/src/gallium/drivers/r600/r600_pipe.c @@ -38,6 +38,8 @@ #include <util/u_memory.h> #include <util/u_inlines.h> #include "util/u_upload_mgr.h" +#include <vl/vl_decoder.h> +#include <vl/vl_video_buffer.h> #include "os/os_time.h" #include <pipebuffer/pb_buffer.h> #include "r600.h" @@ -45,7 +47,6 @@ #include "r600_resource.h" #include "r600_shader.h" #include "r600_pipe.h" -#include "r600_state_inlines.h" /* * pipe_context @@ -61,7 +62,7 @@ static struct r600_fence *r600_create_fence(struct r600_pipe_context *ctx) R600_ERR("r600: failed to create bo for fence objects\n"); return NULL; } - ctx->fences.data = r600_bo_map(ctx->radeon, ctx->fences.bo, PB_USAGE_UNSYNCHRONIZED, NULL); + ctx->fences.data = r600_bo_map(ctx->radeon, ctx->fences.bo, PIPE_TRANSFER_UNSYNCHRONIZED, NULL); } if (!LIST_IS_EMPTY(&ctx->fences.pool)) { @@ -118,25 +119,9 @@ static void r600_flush(struct pipe_context *ctx, struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; struct r600_fence **rfence = (struct r600_fence**)fence; -#if 0 - static int dc = 0; - char dname[256]; -#endif - if (rfence) *rfence = r600_create_fence(rctx); - if (!rctx->ctx.pm4_cdwords) - return; - -#if 0 - sprintf(dname, "gallium-%08d.bof", dc); - if (dc < 20) { - r600_context_dump_bof(&rctx->ctx, dname); - R600_ERR("dumped %s\n", dname); - } - dc++; -#endif r600_context_flush(&rctx->ctx); } @@ -198,7 +183,6 @@ static struct pipe_context *r600_create_context(struct pipe_screen *screen, void { struct r600_pipe_context *rctx = CALLOC_STRUCT(r600_pipe_context); struct r600_screen* rscreen = (struct r600_screen *)screen; - enum chip_class class; if (rctx == NULL) return NULL; @@ -215,6 +199,7 @@ static struct pipe_context *r600_create_context(struct pipe_screen *screen, void rctx->screen = rscreen; rctx->radeon = rscreen->radeon; rctx->family = r600_get_family(rctx->radeon); + rctx->chip_class = r600_get_family_class(rctx->radeon); rctx->fences.bo = NULL; rctx->fences.data = NULL; @@ -228,47 +213,32 @@ static struct pipe_context *r600_create_context(struct pipe_screen *screen, void r600_init_surface_functions(rctx); rctx->context.draw_vbo = r600_draw_vbo; - switch (r600_get_family(rctx->radeon)) { - case CHIP_R600: - case CHIP_RV610: - case CHIP_RV630: - case CHIP_RV670: - case CHIP_RV620: - case CHIP_RV635: - case CHIP_RS780: - case CHIP_RS880: - case CHIP_RV770: - case CHIP_RV730: - case CHIP_RV710: - case CHIP_RV740: + rctx->context.create_video_decoder = vl_create_decoder; + rctx->context.create_video_buffer = vl_video_buffer_create; + + switch (rctx->chip_class) { + case R600: + case R700: r600_init_state_functions(rctx); if (r600_context_init(&rctx->ctx, rctx->radeon)) { r600_destroy_context(&rctx->context); return NULL; } r600_init_config(rctx); + rctx->custom_dsa_flush = r600_create_db_flush_dsa(rctx); break; - case CHIP_CEDAR: - case CHIP_REDWOOD: - case CHIP_JUNIPER: - case CHIP_CYPRESS: - case CHIP_HEMLOCK: - case CHIP_PALM: - case CHIP_SUMO: - case CHIP_SUMO2: - case CHIP_BARTS: - case CHIP_TURKS: - case CHIP_CAICOS: - case CHIP_CAYMAN: + case EVERGREEN: + case CAYMAN: evergreen_init_state_functions(rctx); if (evergreen_context_init(&rctx->ctx, rctx->radeon)) { r600_destroy_context(&rctx->context); return NULL; } evergreen_init_config(rctx); + rctx->custom_dsa_flush = evergreen_create_db_flush_dsa(rctx); break; default: - R600_ERR("unsupported family %d\n", r600_get_family(rctx->radeon)); + R600_ERR("Unsupported chip class %d.\n", rctx->chip_class); r600_destroy_context(&rctx->context); return NULL; } @@ -293,12 +263,6 @@ static struct pipe_context *r600_create_context(struct pipe_screen *screen, void return NULL; } - class = r600_get_family_class(rctx->radeon); - if (class == R600 || class == R700) - rctx->custom_dsa_flush = r600_create_db_flush_dsa(rctx); - else - rctx->custom_dsa_flush = evergreen_create_db_flush_dsa(rctx); - return &rctx->context; } @@ -376,6 +340,8 @@ static int r600_get_param(struct pipe_screen* pscreen, enum pipe_cap param) case PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT: case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER: case PIPE_CAP_SM3: + case PIPE_CAP_SEAMLESS_CUBE_MAP: + case PIPE_CAP_FRAGMENT_COLOR_CLAMP_CONTROL: return 1; /* Supported except the original R600. */ @@ -385,14 +351,12 @@ static int r600_get_param(struct pipe_screen* pscreen, enum pipe_cap param) return family == CHIP_R600 ? 0 : 1; /* Supported on Evergreen. */ - case PIPE_CAP_SEAMLESS_CUBE_MAP: case PIPE_CAP_SEAMLESS_CUBE_MAP_PER_TEXTURE: return family >= CHIP_CEDAR ? 1 : 0; /* Unsupported features. */ case PIPE_CAP_STREAM_OUTPUT: case PIPE_CAP_PRIMITIVE_RESTART: - case PIPE_CAP_FRAGMENT_COLOR_CLAMP_CONTROL: case PIPE_CAP_TGSI_INSTANCEID: case PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT: case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER: @@ -481,9 +445,9 @@ static int r600_get_shader_param(struct pipe_screen* pscreen, unsigned shader, e return 8; /* FIXME */ case PIPE_SHADER_CAP_MAX_INPUTS: if(shader == PIPE_SHADER_FRAGMENT) - return 10; + return 34; else - return 16; + return 32; case PIPE_SHADER_CAP_MAX_TEMPS: return 256; /* Max native temporaries. */ case PIPE_SHADER_CAP_MAX_ADDRS: @@ -511,62 +475,21 @@ static int r600_get_shader_param(struct pipe_screen* pscreen, unsigned shader, e } } -static boolean r600_is_format_supported(struct pipe_screen* screen, - enum pipe_format format, - enum pipe_texture_target target, - unsigned sample_count, - unsigned usage) +static int r600_get_video_param(struct pipe_screen *screen, + enum pipe_video_profile profile, + enum pipe_video_cap param) { - unsigned retval = 0; - if (target >= PIPE_MAX_TEXTURE_TYPES) { - R600_ERR("r600: unsupported texture type %d\n", target); - return FALSE; - } - - if (!util_format_is_supported(format, usage)) - return FALSE; - - /* Multisample */ - if (sample_count > 1) - return FALSE; - - if ((usage & PIPE_BIND_SAMPLER_VIEW) && - r600_is_sampler_format_supported(screen, format)) { - retval |= PIPE_BIND_SAMPLER_VIEW; - } - - if ((usage & (PIPE_BIND_RENDER_TARGET | - PIPE_BIND_DISPLAY_TARGET | - PIPE_BIND_SCANOUT | - PIPE_BIND_SHARED)) && - r600_is_colorbuffer_format_supported(format)) { - retval |= usage & - (PIPE_BIND_RENDER_TARGET | - PIPE_BIND_DISPLAY_TARGET | - PIPE_BIND_SCANOUT | - PIPE_BIND_SHARED); - } - - if ((usage & PIPE_BIND_DEPTH_STENCIL) && - r600_is_zs_format_supported(format)) { - retval |= PIPE_BIND_DEPTH_STENCIL; - } - - if (usage & PIPE_BIND_VERTEX_BUFFER) { - struct r600_screen *rscreen = (struct r600_screen *)screen; - enum radeon_family family = r600_get_family(rscreen->radeon); - - if (r600_is_vertex_format_supported(format, family)) { - retval |= PIPE_BIND_VERTEX_BUFFER; - } + switch (param) { + case PIPE_VIDEO_CAP_SUPPORTED: + return vl_profile_supported(screen, profile); + case PIPE_VIDEO_CAP_NPOT_TEXTURES: + return 1; + case PIPE_VIDEO_CAP_MAX_WIDTH: + case PIPE_VIDEO_CAP_MAX_HEIGHT: + return vl_video_buffer_max_size(screen); + default: + return 0; } - - if (usage & PIPE_BIND_TRANSFER_READ) - retval |= PIPE_BIND_TRANSFER_READ; - if (usage & PIPE_BIND_TRANSFER_WRITE) - retval |= PIPE_BIND_TRANSFER_WRITE; - - return retval == usage; } static void r600_destroy_screen(struct pipe_screen* pscreen) @@ -657,7 +580,13 @@ struct pipe_screen *r600_screen_create(struct radeon *radeon) rscreen->screen.get_param = r600_get_param; rscreen->screen.get_shader_param = r600_get_shader_param; rscreen->screen.get_paramf = r600_get_paramf; - rscreen->screen.is_format_supported = r600_is_format_supported; + rscreen->screen.get_video_param = r600_get_video_param; + if (r600_get_family_class(radeon) >= EVERGREEN) { + rscreen->screen.is_format_supported = evergreen_is_format_supported; + } else { + rscreen->screen.is_format_supported = r600_is_format_supported; + } + rscreen->screen.is_video_format_supported = vl_video_buffer_is_format_supported; rscreen->screen.context_create = r600_create_context; rscreen->screen.fence_reference = r600_fence_reference; rscreen->screen.fence_signalled = r600_fence_signalled; diff --git a/src/gallium/drivers/r600/r600_pipe.h b/src/gallium/drivers/r600/r600_pipe.h index 84a45bec05d..6f399ed43b0 100644 --- a/src/gallium/drivers/r600/r600_pipe.h +++ b/src/gallium/drivers/r600/r600_pipe.h @@ -50,6 +50,7 @@ enum r600_pipe_state_id { R600_PIPE_STATE_BLEND = 0, R600_PIPE_STATE_BLEND_COLOR, R600_PIPE_STATE_CONFIG, + R600_PIPE_STATE_SEAMLESS_CUBEMAP, R600_PIPE_STATE_CLIP, R600_PIPE_STATE_SCISSOR, R600_PIPE_STATE_VIEWPORT, @@ -87,6 +88,8 @@ struct r600_pipe_sampler_view { struct r600_pipe_rasterizer { struct r600_pipe_state rstate; + boolean clamp_vertex_color; + boolean clamp_fragment_color; boolean flatshade; unsigned sprite_coord_enable; float offset_units; @@ -124,6 +127,12 @@ struct r600_pipe_shader { struct r600_bo *bo; struct r600_bo *bo_fetch; struct r600_vertex_element vertex_elements; + struct tgsi_token *tokens; +}; + +struct r600_pipe_sampler_state { + struct r600_pipe_state rstate; + boolean seamless_cube_map; }; /* needed for blitter save */ @@ -166,7 +175,8 @@ struct r600_pipe_fences { struct r600_pipe_context { struct pipe_context context; struct blitter_context *blitter; - unsigned family; + enum radeon_family family; + enum chip_class chip_class; void *custom_dsa_flush; struct r600_screen *screen; struct radeon *radeon; @@ -196,11 +206,15 @@ struct r600_pipe_context { struct pipe_query *saved_render_cond; unsigned saved_render_cond_mode; /* shader information */ + boolean clamp_vertex_color; + boolean clamp_fragment_color; + boolean spi_dirty; unsigned sprite_coord_enable; boolean flatshade; boolean export_16bpc; unsigned alpha_ref; boolean alpha_ref_dirty; + unsigned nr_cbufs; struct r600_textures_info ps_samplers; struct r600_pipe_fences fences; @@ -234,6 +248,11 @@ void evergreen_pipe_init_buffer_resource(struct r600_pipe_context *rctx, void evergreen_pipe_mod_buffer_resource(struct r600_pipe_resource_state *rstate, struct r600_resource *rbuffer, unsigned offset, unsigned stride); +boolean evergreen_is_format_supported(struct pipe_screen *screen, + enum pipe_format format, + enum pipe_texture_target target, + unsigned sample_count, + unsigned usage); /* r600_blit.c */ void r600_init_blit_functions(struct r600_pipe_context *rctx); @@ -258,7 +277,7 @@ void r600_init_query_functions(struct r600_pipe_context *rctx); void r600_init_context_resource_functions(struct r600_pipe_context *r600); /* r600_shader.c */ -int r600_pipe_shader_create(struct pipe_context *ctx, struct r600_pipe_shader *shader, const struct tgsi_token *tokens); +int r600_pipe_shader_create(struct pipe_context *ctx, struct r600_pipe_shader *shader); void r600_pipe_shader_destroy(struct pipe_context *ctx, struct r600_pipe_shader *shader); int r600_find_vs_semantic_index(struct r600_shader *vs, struct r600_shader *ps, int id); @@ -277,6 +296,11 @@ void r600_pipe_mod_buffer_resource(struct r600_pipe_resource_state *rstate, struct r600_resource *rbuffer, unsigned offset, unsigned stride); void r600_adjust_gprs(struct r600_pipe_context *rctx); +boolean r600_is_format_supported(struct pipe_screen *screen, + enum pipe_format format, + enum pipe_texture_target target, + unsigned sample_count, + unsigned usage); /* r600_texture.c */ void r600_init_screen_texture_functions(struct pipe_screen *screen); diff --git a/src/gallium/drivers/r600/r600_query.c b/src/gallium/drivers/r600/r600_query.c index bedb48b6031..de1f5d05f4e 100644 --- a/src/gallium/drivers/r600/r600_query.c +++ b/src/gallium/drivers/r600/r600_query.c @@ -43,7 +43,7 @@ static void r600_begin_query(struct pipe_context *ctx, struct pipe_query *query) struct r600_query *rquery = (struct r600_query *)query; rquery->result = 0; - rquery->num_results = 0; + rquery->results_start = rquery->results_end; r600_query_begin(&rctx->ctx, (struct r600_query *)query); } @@ -61,10 +61,7 @@ static boolean r600_get_query_result(struct pipe_context *ctx, struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; struct r600_query *rquery = (struct r600_query *)query; - if (rquery->num_results) { - ctx->flush(ctx, NULL); - } - return r600_context_query_result(&rctx->ctx, (struct r600_query *)query, wait, vresult); + return r600_context_query_result(&rctx->ctx, rquery, wait, vresult); } static void r600_render_condition(struct pipe_context *ctx, @@ -75,12 +72,18 @@ static void r600_render_condition(struct pipe_context *ctx, struct r600_query *rquery = (struct r600_query *)query; int wait_flag = 0; + /* If we already have nonzero result, render unconditionally */ + if (query != NULL && rquery->result != 0) + return; + rctx->current_render_cond = query; rctx->current_render_cond_mode = mode; - if (!query) { - rctx->ctx.predicate_drawing = false; - r600_query_predication(&rctx->ctx, NULL, PREDICATION_OP_CLEAR, 1); + if (query == NULL) { + if (rctx->ctx.predicate_drawing) { + rctx->ctx.predicate_drawing = false; + r600_query_predication(&rctx->ctx, NULL, PREDICATION_OP_CLEAR, 1); + } return; } @@ -91,7 +94,6 @@ static void r600_render_condition(struct pipe_context *ctx, rctx->ctx.predicate_drawing = true; r600_query_predication(&rctx->ctx, rquery, PREDICATION_OP_ZPASS, wait_flag); - } void r600_init_query_functions(struct r600_pipe_context *rctx) diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c index b8a86b03143..c55cdd707eb 100644 --- a/src/gallium/drivers/r600/r600_shader.c +++ b/src/gallium/drivers/r600/r600_shader.c @@ -99,14 +99,14 @@ static int r600_pipe_shader(struct pipe_context *ctx, struct r600_pipe_shader *s /* build state */ switch (rshader->processor_type) { case TGSI_PROCESSOR_VERTEX: - if (rshader->family >= CHIP_CEDAR) { + if (rctx->chip_class >= EVERGREEN) { evergreen_pipe_shader_vs(ctx, shader); } else { r600_pipe_shader_vs(ctx, shader); } break; case TGSI_PROCESSOR_FRAGMENT: - if (rshader->family >= CHIP_CEDAR) { + if (rctx->chip_class >= EVERGREEN) { evergreen_pipe_shader_ps(ctx, shader); } else { r600_pipe_shader_ps(ctx, shader); @@ -118,9 +118,9 @@ static int r600_pipe_shader(struct pipe_context *ctx, struct r600_pipe_shader *s return 0; } -static int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *shader); +static int r600_shader_from_tgsi(struct r600_pipe_context * rctx, struct r600_pipe_shader *pipeshader); -int r600_pipe_shader_create(struct pipe_context *ctx, struct r600_pipe_shader *shader, const struct tgsi_token *tokens) +int r600_pipe_shader_create(struct pipe_context *ctx, struct r600_pipe_shader *shader) { static int dump_shaders = -1; struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; @@ -133,10 +133,9 @@ int r600_pipe_shader_create(struct pipe_context *ctx, struct r600_pipe_shader *s if (dump_shaders) { fprintf(stderr, "--------------------------------------------------------------\n"); - tgsi_dump(tokens, 0); + tgsi_dump(shader->tokens, 0); } - shader->shader.family = r600_get_family(rctx->radeon); - r = r600_shader_from_tgsi(tokens, &shader->shader); + r = r600_shader_from_tgsi(rctx, shader); if (r) { R600_ERR("translation from TGSI failed !\n"); return r; @@ -159,6 +158,8 @@ void r600_pipe_shader_destroy(struct pipe_context *ctx, struct r600_pipe_shader r600_bo_reference(rctx->radeon, &shader->bo, NULL); r600_bc_clear(&shader->shader.bc); + + memset(&shader->shader,0,sizeof(struct r600_shader)); } /* @@ -315,7 +316,7 @@ static int tgsi_declaration(struct r600_shader_ctx *ctx) ctx->shader->input[i].interpolate = d->Declaration.Interpolate; ctx->shader->input[i].centroid = d->Declaration.Centroid; ctx->shader->input[i].gpr = ctx->file_offset[TGSI_FILE_INPUT] + i; - if (ctx->type == TGSI_PROCESSOR_FRAGMENT && ctx->bc->chiprev >= CHIPREV_EVERGREEN) { + if (ctx->type == TGSI_PROCESSOR_FRAGMENT && ctx->bc->chip_class >= EVERGREEN) { /* turn input into interpolate on EG */ if (ctx->shader->input[i].name != TGSI_SEMANTIC_POSITION) { if (ctx->shader->input[i].interpolate > 0) { @@ -331,6 +332,12 @@ static int tgsi_declaration(struct r600_shader_ctx *ctx) ctx->shader->output[i].sid = d->Semantic.Index; ctx->shader->output[i].gpr = ctx->file_offset[TGSI_FILE_OUTPUT] + i; ctx->shader->output[i].interpolate = d->Declaration.Interpolate; + if (ctx->type == TGSI_PROCESSOR_VERTEX) { + /* these don't count as vertex param exports */ + if ((ctx->shader->output[i].name == TGSI_SEMANTIC_POSITION) || + (ctx->shader->output[i].name == TGSI_SEMANTIC_PSIZE)) + ctx->shader->npos++; + } break; case TGSI_FILE_CONSTANT: case TGSI_FILE_TEMPORARY: @@ -594,21 +601,21 @@ static int tgsi_split_literal_constant(struct r600_shader_ctx *ctx) return 0; } -static int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *shader) +static int r600_shader_from_tgsi(struct r600_pipe_context * rctx, struct r600_pipe_shader *pipeshader) { + struct r600_shader *shader = &pipeshader->shader; + struct tgsi_token *tokens = pipeshader->tokens; struct tgsi_full_immediate *immediate; struct tgsi_full_property *property; struct r600_shader_ctx ctx; struct r600_bc_output output[32]; unsigned output_done, noutput; unsigned opcode; - int i, r = 0, pos0; + int i, j, r = 0, pos0; ctx.bc = &shader->bc; ctx.shader = shader; - r = r600_bc_init(ctx.bc, shader->family); - if (r) - return r; + r600_bc_init(ctx.bc, rctx->chip_class); ctx.tokens = tokens; tgsi_scan_shader(tokens, &ctx.info); tgsi_parse_init(&ctx.parse, tokens); @@ -616,6 +623,11 @@ static int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_sh shader->processor_type = ctx.type; ctx.bc->type = shader->processor_type; + shader->clamp_color = (((ctx.type == TGSI_PROCESSOR_FRAGMENT) && rctx->clamp_fragment_color) || + ((ctx.type == TGSI_PROCESSOR_VERTEX) && rctx->clamp_vertex_color)); + + shader->nr_cbufs = rctx->nr_cbufs; + /* register allocations */ /* Values [0,127] correspond to GPR[0..127]. * Values [128,159] correspond to constant buffer bank 0 @@ -642,19 +654,19 @@ static int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_sh } if (ctx.type == TGSI_PROCESSOR_VERTEX) { ctx.file_offset[TGSI_FILE_INPUT] = 1; - if (ctx.bc->chiprev >= CHIPREV_EVERGREEN) { + if (ctx.bc->chip_class >= EVERGREEN) { r600_bc_add_cfinst(ctx.bc, EG_V_SQ_CF_WORD1_SQ_CF_INST_CALL_FS); } else { r600_bc_add_cfinst(ctx.bc, V_SQ_CF_WORD1_SQ_CF_INST_CALL_FS); } } - if (ctx.type == TGSI_PROCESSOR_FRAGMENT && ctx.bc->chiprev >= CHIPREV_EVERGREEN) { + if (ctx.type == TGSI_PROCESSOR_FRAGMENT && ctx.bc->chip_class >= EVERGREEN) { ctx.file_offset[TGSI_FILE_INPUT] = evergreen_gpr_count(&ctx); } ctx.file_offset[TGSI_FILE_OUTPUT] = ctx.file_offset[TGSI_FILE_INPUT] + - ctx.info.file_count[TGSI_FILE_INPUT]; + ctx.info.file_max[TGSI_FILE_INPUT] + 1; ctx.file_offset[TGSI_FILE_TEMPORARY] = ctx.file_offset[TGSI_FILE_OUTPUT] + - ctx.info.file_count[TGSI_FILE_OUTPUT]; + ctx.info.file_max[TGSI_FILE_OUTPUT] + 1; /* Outside the GPR range. This will be translated to one of the * kcache banks later. */ @@ -662,7 +674,7 @@ static int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_sh ctx.file_offset[TGSI_FILE_IMMEDIATE] = V_SQ_ALU_SRC_LITERAL; ctx.ar_reg = ctx.file_offset[TGSI_FILE_TEMPORARY] + - ctx.info.file_count[TGSI_FILE_TEMPORARY]; + ctx.info.file_max[TGSI_FILE_TEMPORARY] + 1; ctx.temp_reg = ctx.ar_reg + 1; ctx.nliterals = 0; @@ -702,9 +714,9 @@ static int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_sh goto out_err; if ((r = tgsi_split_literal_constant(&ctx))) goto out_err; - if (ctx.bc->chiprev == CHIPREV_CAYMAN) + if (ctx.bc->chip_class == CAYMAN) ctx.inst_info = &cm_shader_tgsi_instruction[opcode]; - else if (ctx.bc->chiprev >= CHIPREV_EVERGREEN) + else if (ctx.bc->chip_class >= EVERGREEN) ctx.inst_info = &eg_shader_tgsi_instruction[opcode]; else ctx.inst_info = &r600_shader_tgsi_instruction[opcode]; @@ -725,52 +737,103 @@ static int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_sh goto out_err; } } - /* export output */ + noutput = shader->noutput; + + /* clamp color outputs */ + if (shader->clamp_color) { + for (i = 0; i < noutput; i++) { + if (shader->output[i].name == TGSI_SEMANTIC_COLOR || + shader->output[i].name == TGSI_SEMANTIC_BCOLOR) { + + int j; + for (j = 0; j < 4; j++) { + struct r600_bc_alu alu; + memset(&alu, 0, sizeof(struct r600_bc_alu)); + + /* MOV_SAT R, R */ + alu.inst = BC_INST(ctx.bc, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); + alu.dst.sel = shader->output[i].gpr; + alu.dst.chan = j; + alu.dst.write = 1; + alu.dst.clamp = 1; + alu.src[0].sel = alu.dst.sel; + alu.src[0].chan = j; + + if (j == 3) { + alu.last = 1; + } + r = r600_bc_add_alu(ctx.bc, &alu); + if (r) + return r; + } + } + } + } + + /* export output */ + j = 0; for (i = 0, pos0 = 0; i < noutput; i++) { memset(&output[i], 0, sizeof(struct r600_bc_output)); - output[i].gpr = shader->output[i].gpr; - output[i].elem_size = 3; - output[i].swizzle_x = 0; - output[i].swizzle_y = 1; - output[i].swizzle_z = 2; - output[i].swizzle_w = 3; - output[i].burst_count = 1; - output[i].barrier = 1; - output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM; - output[i].array_base = i - pos0; - output[i].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT); + output[i + j].gpr = shader->output[i].gpr; + output[i + j].elem_size = 3; + output[i + j].swizzle_x = 0; + output[i + j].swizzle_y = 1; + output[i + j].swizzle_z = 2; + output[i + j].swizzle_w = 3; + output[i + j].burst_count = 1; + output[i + j].barrier = 1; + output[i + j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM; + output[i + j].array_base = i - pos0; + output[i + j].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT); switch (ctx.type) { case TGSI_PROCESSOR_VERTEX: if (shader->output[i].name == TGSI_SEMANTIC_POSITION) { - output[i].array_base = 60; - output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS; + output[i + j].array_base = 60; + output[i + j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS; /* position doesn't count in array_base */ pos0++; } if (shader->output[i].name == TGSI_SEMANTIC_PSIZE) { - output[i].array_base = 61; - output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS; + output[i + j].array_base = 61; + output[i + j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS; /* position doesn't count in array_base */ pos0++; } break; case TGSI_PROCESSOR_FRAGMENT: if (shader->output[i].name == TGSI_SEMANTIC_COLOR) { - output[i].array_base = shader->output[i].sid; - output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; + output[i + j].array_base = shader->output[i].sid; + output[i + j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; + if (shader->fs_write_all && (rctx->chip_class >= EVERGREEN)) { + for (j = 1; j < shader->nr_cbufs; j++) { + memset(&output[i + j], 0, sizeof(struct r600_bc_output)); + output[i + j].gpr = shader->output[i].gpr; + output[i + j].elem_size = 3; + output[i + j].swizzle_x = 0; + output[i + j].swizzle_y = 1; + output[i + j].swizzle_z = 2; + output[i + j].swizzle_w = 3; + output[i + j].burst_count = 1; + output[i + j].barrier = 1; + output[i + j].array_base = shader->output[i].sid + j; + output[i + j].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT); + output[i + j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; + } + j--; + } } else if (shader->output[i].name == TGSI_SEMANTIC_POSITION) { - output[i].array_base = 61; - output[i].swizzle_x = 2; - output[i].swizzle_y = 7; - output[i].swizzle_z = output[i].swizzle_w = 7; - output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; + output[i + j].array_base = 61; + output[i + j].swizzle_x = 2; + output[i + j].swizzle_y = 7; + output[i + j].swizzle_z = output[i + j].swizzle_w = 7; + output[i + j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; } else if (shader->output[i].name == TGSI_SEMANTIC_STENCIL) { - output[i].array_base = 61; - output[i].swizzle_x = 7; - output[i].swizzle_y = 1; - output[i].swizzle_z = output[i].swizzle_w = 7; - output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; + output[i + j].array_base = 61; + output[i + j].swizzle_x = 7; + output[i + j].swizzle_y = 1; + output[i + j].swizzle_z = output[i + j].swizzle_w = 7; + output[i + j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; } else { R600_ERR("unsupported fragment output name %d\n", shader->output[i].name); r = -EINVAL; @@ -783,6 +846,7 @@ static int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_sh goto out_err; } } + noutput += j; /* add fake param output for vertex shader if no param is exported */ if (ctx.type == TGSI_PROCESSOR_VERTEX) { for (i = 0, pos0 = 0; i < noutput; i++) { @@ -825,7 +889,7 @@ static int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_sh } /* set export done on last export of each type */ for (i = noutput - 1, output_done = 0; i >= 0; i--) { - if (ctx.bc->chiprev < CHIPREV_CAYMAN) { + if (ctx.bc->chip_class < CAYMAN) { if (i == (noutput - 1)) { output[i].end_of_program = 1; } @@ -842,7 +906,7 @@ static int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_sh goto out_err; } /* add program end */ - if (ctx.bc->chiprev == CHIPREV_CAYMAN) + if (ctx.bc->chip_class == CAYMAN) cm_bc_add_cf_end(ctx.bc); free(ctx.literals); @@ -878,6 +942,17 @@ static void r600_bc_src(struct r600_bc_alu_src *bc_src, bc_src->value = shader_src->value[bc_src->chan]; } +static void r600_bc_src_set_abs(struct r600_bc_alu_src *bc_src) +{ + bc_src->abs = 1; + bc_src->neg = 0; +} + +static void r600_bc_src_toggle_neg(struct r600_bc_alu_src *bc_src) +{ + bc_src->neg = !bc_src->neg; +} + static void tgsi_dst(struct r600_shader_ctx *ctx, const struct tgsi_full_dst_register *tgsi_dst, unsigned swizzle, @@ -934,12 +1009,10 @@ static int tgsi_op2_s(struct r600_shader_ctx *ctx, int swap) /* handle some special cases */ switch (ctx->inst_info->tgsi_opcode) { case TGSI_OPCODE_SUB: - alu.src[1].neg = 1; + r600_bc_src_toggle_neg(&alu.src[1]); break; case TGSI_OPCODE_ABS: - alu.src[0].abs = 1; - if (alu.src[0].neg) - alu.src[0].neg = 0; + r600_bc_src_set_abs(&alu.src[0]); break; default: break; @@ -1053,7 +1126,7 @@ static int tgsi_setup_trig(struct r600_shader_ctx *ctx) alu.src[2].sel = V_SQ_ALU_SRC_LITERAL; alu.src[2].chan = 0; - if (ctx->bc->chiprev == CHIPREV_R600) { + if (ctx->bc->chip_class == R600) { alu.src[1].value = *(uint32_t *)&double_pi; alu.src[2].value = *(uint32_t *)&neg_pi; } else { @@ -1160,7 +1233,7 @@ static int tgsi_scs(struct r600_shader_ctx *ctx) /* dst.x = COS */ if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) { - if (ctx->bc->chiprev == CHIPREV_CAYMAN) { + if (ctx->bc->chip_class == CAYMAN) { for (i = 0 ; i < 3; i++) { memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS); @@ -1194,7 +1267,7 @@ static int tgsi_scs(struct r600_shader_ctx *ctx) /* dst.y = SIN */ if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) { - if (ctx->bc->chiprev == CHIPREV_CAYMAN) { + if (ctx->bc->chip_class == CAYMAN) { for (i = 0 ; i < 3; i++) { memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN); @@ -1303,36 +1376,17 @@ static int tgsi_lit(struct r600_shader_ctx *ctx) struct r600_bc_alu alu; int r; - /* dst.x, <- 1.0 */ - memset(&alu, 0, sizeof(struct r600_bc_alu)); - alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); - alu.src[0].sel = V_SQ_ALU_SRC_1; /*1.0*/ - alu.src[0].chan = 0; - tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst); - alu.dst.write = (inst->Dst[0].Register.WriteMask >> 0) & 1; - r = r600_bc_add_alu(ctx->bc, &alu); - if (r) - return r; - - /* dst.y = max(src.x, 0.0) */ + /* tmp.x = max(src.y, 0.0) */ memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX); - r600_bc_src(&alu.src[0], &ctx->src[0], 0); + r600_bc_src(&alu.src[0], &ctx->src[0], 1); alu.src[1].sel = V_SQ_ALU_SRC_0; /*0.0*/ - alu.src[1].chan = 0; - tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst); - alu.dst.write = (inst->Dst[0].Register.WriteMask >> 1) & 1; - r = r600_bc_add_alu(ctx->bc, &alu); - if (r) - return r; + alu.src[1].chan = 1; + + alu.dst.sel = ctx->temp_reg; + alu.dst.chan = 0; + alu.dst.write = 1; - /* dst.w, <- 1.0 */ - memset(&alu, 0, sizeof(struct r600_bc_alu)); - alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); - alu.src[0].sel = V_SQ_ALU_SRC_1; - alu.src[0].chan = 0; - tgsi_dst(ctx, &inst->Dst[0], 3, &alu.dst); - alu.dst.write = (inst->Dst[0].Register.WriteMask >> 3) & 1; alu.last = 1; r = r600_bc_add_alu(ctx->bc, &alu); if (r) @@ -1344,13 +1398,15 @@ static int tgsi_lit(struct r600_shader_ctx *ctx) int sel; int i; - if (ctx->bc->chiprev == CHIPREV_CAYMAN) { + if (ctx->bc->chip_class == CAYMAN) { for (i = 0; i < 3; i++) { - /* dst.z = log(src.y) */ + /* tmp.z = log(tmp.x) */ memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_CLAMPED); - r600_bc_src(&alu.src[0], &ctx->src[0], 1); - tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); + alu.src[0].sel = ctx->temp_reg; + alu.src[0].chan = 0; + alu.dst.sel = ctx->temp_reg; + alu.dst.chan = i; if (i == 2) { alu.dst.write = 1; alu.last = 1; @@ -1362,11 +1418,14 @@ static int tgsi_lit(struct r600_shader_ctx *ctx) return r; } } else { - /* dst.z = log(src.y) */ + /* tmp.z = log(tmp.x) */ memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_CLAMPED); - r600_bc_src(&alu.src[0], &ctx->src[0], 1); - tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst); + alu.src[0].sel = ctx->temp_reg; + alu.src[0].chan = 0; + alu.dst.sel = ctx->temp_reg; + alu.dst.chan = 2; + alu.dst.write = 1; alu.last = 1; r = r600_bc_add_alu(ctx->bc, &alu); if (r) @@ -1376,13 +1435,12 @@ static int tgsi_lit(struct r600_shader_ctx *ctx) chan = alu.dst.chan; sel = alu.dst.sel; - /* tmp.x = amd MUL_LIT(src.w, dst.z, src.x ) */ + /* tmp.x = amd MUL_LIT(tmp.z, src.w, src.x ) */ memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MUL_LIT); - r600_bc_src(&alu.src[0], &ctx->src[0], 3); - alu.src[1].sel = sel; - alu.src[1].chan = chan; - + alu.src[0].sel = sel; + alu.src[0].chan = chan; + r600_bc_src(&alu.src[1], &ctx->src[0], 3); r600_bc_src(&alu.src[2], &ctx->src[0], 0); alu.dst.sel = ctx->temp_reg; alu.dst.chan = 0; @@ -1393,7 +1451,7 @@ static int tgsi_lit(struct r600_shader_ctx *ctx) if (r) return r; - if (ctx->bc->chiprev == CHIPREV_CAYMAN) { + if (ctx->bc->chip_class == CAYMAN) { for (i = 0; i < 3; i++) { /* dst.z = exp(tmp.x) */ memset(&alu, 0, sizeof(struct r600_bc_alu)); @@ -1423,6 +1481,42 @@ static int tgsi_lit(struct r600_shader_ctx *ctx) return r; } } + + /* dst.x, <- 1.0 */ + memset(&alu, 0, sizeof(struct r600_bc_alu)); + alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); + alu.src[0].sel = V_SQ_ALU_SRC_1; /*1.0*/ + alu.src[0].chan = 0; + tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst); + alu.dst.write = (inst->Dst[0].Register.WriteMask >> 0) & 1; + r = r600_bc_add_alu(ctx->bc, &alu); + if (r) + return r; + + /* dst.y = max(src.x, 0.0) */ + memset(&alu, 0, sizeof(struct r600_bc_alu)); + alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX); + r600_bc_src(&alu.src[0], &ctx->src[0], 0); + alu.src[1].sel = V_SQ_ALU_SRC_0; /*0.0*/ + alu.src[1].chan = 0; + tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst); + alu.dst.write = (inst->Dst[0].Register.WriteMask >> 1) & 1; + r = r600_bc_add_alu(ctx->bc, &alu); + if (r) + return r; + + /* dst.w, <- 1.0 */ + memset(&alu, 0, sizeof(struct r600_bc_alu)); + alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); + alu.src[0].sel = V_SQ_ALU_SRC_1; + alu.src[0].chan = 0; + tgsi_dst(ctx, &inst->Dst[0], 3, &alu.dst); + alu.dst.write = (inst->Dst[0].Register.WriteMask >> 3) & 1; + alu.last = 1; + r = r600_bc_add_alu(ctx->bc, &alu); + if (r) + return r; + return 0; } @@ -1442,7 +1536,7 @@ static int tgsi_rsq(struct r600_shader_ctx *ctx) for (i = 0; i < inst->Instruction.NumSrcRegs; i++) { r600_bc_src(&alu.src[i], &ctx->src[i], 0); - alu.src[i].abs = 1; + r600_bc_src_set_abs(&alu.src[i]); } alu.dst.sel = ctx->temp_reg; alu.dst.write = 1; @@ -1834,7 +1928,7 @@ static int tgsi_tex(struct r600_shader_ctx *ctx) } else if (inst->Instruction.Opcode == TGSI_OPCODE_TXP) { int out_chan; /* Add perspective divide */ - if (ctx->bc->chiprev == CHIPREV_CAYMAN) { + if (ctx->bc->chip_class == CAYMAN) { out_chan = 2; for (i = 0; i < 3; i++) { memset(&alu, 0, sizeof(struct r600_bc_alu)); @@ -1916,7 +2010,7 @@ static int tgsi_tex(struct r600_shader_ctx *ctx) } /* tmp1.z = RCP_e(|tmp1.z|) */ - if (ctx->bc->chiprev == CHIPREV_CAYMAN) { + if (ctx->bc->chip_class == CAYMAN) { for (i = 0; i < 3; i++) { memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE); @@ -2128,7 +2222,7 @@ static int tgsi_lrp(struct r600_shader_ctx *ctx) alu.src[0].sel = V_SQ_ALU_SRC_1; alu.src[0].chan = 0; r600_bc_src(&alu.src[1], &ctx->src[0], i); - alu.src[1].neg = 1; + r600_bc_src_toggle_neg(&alu.src[1]); alu.dst.sel = ctx->temp_reg; alu.dst.chan = i; if (i == lasti) { @@ -2309,7 +2403,7 @@ static int tgsi_exp(struct r600_shader_ctx *ctx) if (r) return r; - if (ctx->bc->chiprev == CHIPREV_CAYMAN) { + if (ctx->bc->chip_class == CAYMAN) { for (i = 0; i < 3; i++) { alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE); alu.src[0].sel = ctx->temp_reg; @@ -2365,7 +2459,7 @@ static int tgsi_exp(struct r600_shader_ctx *ctx) /* result.z = RoughApprox2ToX(tmp);*/ if ((inst->Dst[0].Register.WriteMask >> 2) & 0x1) { - if (ctx->bc->chiprev == CHIPREV_CAYMAN) { + if (ctx->bc->chip_class == CAYMAN) { for (i = 0; i < 3; i++) { memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE); @@ -2425,14 +2519,15 @@ static int tgsi_log(struct r600_shader_ctx *ctx) int r; int i; - /* result.x = floor(log2(src)); */ + /* result.x = floor(log2(|src|)); */ if (inst->Dst[0].Register.WriteMask & 1) { - if (ctx->bc->chiprev == CHIPREV_CAYMAN) { + if (ctx->bc->chip_class == CAYMAN) { for (i = 0; i < 3; i++) { memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE); r600_bc_src(&alu.src[0], &ctx->src[0], 0); + r600_bc_src_set_abs(&alu.src[0]); alu.dst.sel = ctx->temp_reg; alu.dst.chan = i; @@ -2450,6 +2545,7 @@ static int tgsi_log(struct r600_shader_ctx *ctx) alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE); r600_bc_src(&alu.src[0], &ctx->src[0], 0); + r600_bc_src_set_abs(&alu.src[0]); alu.dst.sel = ctx->temp_reg; alu.dst.chan = 0; @@ -2474,15 +2570,16 @@ static int tgsi_log(struct r600_shader_ctx *ctx) return r; } - /* result.y = src.x / (2 ^ floor(log2(src.x))); */ + /* result.y = |src.x| / (2 ^ floor(log2(|src.x|))); */ if ((inst->Dst[0].Register.WriteMask >> 1) & 1) { - if (ctx->bc->chiprev == CHIPREV_CAYMAN) { + if (ctx->bc->chip_class == CAYMAN) { for (i = 0; i < 3; i++) { memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE); r600_bc_src(&alu.src[0], &ctx->src[0], 0); + r600_bc_src_set_abs(&alu.src[0]); alu.dst.sel = ctx->temp_reg; alu.dst.chan = i; @@ -2500,6 +2597,7 @@ static int tgsi_log(struct r600_shader_ctx *ctx) alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE); r600_bc_src(&alu.src[0], &ctx->src[0], 0); + r600_bc_src_set_abs(&alu.src[0]); alu.dst.sel = ctx->temp_reg; alu.dst.chan = 1; @@ -2526,7 +2624,7 @@ static int tgsi_log(struct r600_shader_ctx *ctx) if (r) return r; - if (ctx->bc->chiprev == CHIPREV_CAYMAN) { + if (ctx->bc->chip_class == CAYMAN) { for (i = 0; i < 3; i++) { memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE); @@ -2560,7 +2658,7 @@ static int tgsi_log(struct r600_shader_ctx *ctx) return r; } - if (ctx->bc->chiprev == CHIPREV_CAYMAN) { + if (ctx->bc->chip_class == CAYMAN) { for (i = 0; i < 3; i++) { memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE); @@ -2599,6 +2697,7 @@ static int tgsi_log(struct r600_shader_ctx *ctx) alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL); r600_bc_src(&alu.src[0], &ctx->src[0], 0); + r600_bc_src_set_abs(&alu.src[0]); alu.src[1].sel = ctx->temp_reg; alu.src[1].chan = 1; @@ -2613,14 +2712,15 @@ static int tgsi_log(struct r600_shader_ctx *ctx) return r; } - /* result.z = log2(src);*/ + /* result.z = log2(|src|);*/ if ((inst->Dst[0].Register.WriteMask >> 2) & 1) { - if (ctx->bc->chiprev == CHIPREV_CAYMAN) { + if (ctx->bc->chip_class == CAYMAN) { for (i = 0; i < 3; i++) { memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE); r600_bc_src(&alu.src[0], &ctx->src[0], 0); + r600_bc_src_set_abs(&alu.src[0]); alu.dst.sel = ctx->temp_reg; if (i == 2) @@ -2638,6 +2738,7 @@ static int tgsi_log(struct r600_shader_ctx *ctx) alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE); r600_bc_src(&alu.src[0], &ctx->src[0], 0); + r600_bc_src_set_abs(&alu.src[0]); alu.dst.sel = ctx->temp_reg; alu.dst.write = 1; @@ -2831,25 +2932,34 @@ static int emit_logic_pred(struct r600_shader_ctx *ctx, int opcode) static int pops(struct r600_shader_ctx *ctx, int pops) { - int alu_pop = 3; - if (ctx->bc->cf_last) { - if (ctx->bc->cf_last->inst == CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU) << 3) - alu_pop = 0; - else if (ctx->bc->cf_last->inst == CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER) << 3) - alu_pop = 1; - } - alu_pop += pops; - if (alu_pop == 1) { - ctx->bc->cf_last->inst = CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER) << 3; - ctx->bc->force_add_cf = 1; - } else if (alu_pop == 2) { - ctx->bc->cf_last->inst = CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP2_AFTER) << 3; - ctx->bc->force_add_cf = 1; - } else { + unsigned force_pop = ctx->bc->force_add_cf; + + if (!force_pop) { + int alu_pop = 3; + if (ctx->bc->cf_last) { + if (ctx->bc->cf_last->inst == CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU) << 3) + alu_pop = 0; + else if (ctx->bc->cf_last->inst == CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER) << 3) + alu_pop = 1; + } + alu_pop += pops; + if (alu_pop == 1) { + ctx->bc->cf_last->inst = CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER) << 3; + ctx->bc->force_add_cf = 1; + } else if (alu_pop == 2) { + ctx->bc->cf_last->inst = CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP2_AFTER) << 3; + ctx->bc->force_add_cf = 1; + } else { + force_pop = 1; + } + } + + if (force_pop) { r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_POP)); ctx->bc->cf_last->pop_count = pops; ctx->bc->cf_last->cf_addr = ctx->bc->cf_last->id + 2; } + return 0; } @@ -3266,7 +3376,7 @@ static struct r600_shader_tgsi_instruction eg_shader_tgsi_instruction[] = { {TGSI_OPCODE_MOV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2}, {TGSI_OPCODE_LIT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lit}, {TGSI_OPCODE_RCP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE, tgsi_trans_srcx_replicate}, - {TGSI_OPCODE_RSQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_IEEE, tgsi_trans_srcx_replicate}, + {TGSI_OPCODE_RSQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_IEEE, tgsi_rsq}, {TGSI_OPCODE_EXP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_exp}, {TGSI_OPCODE_LOG, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_log}, {TGSI_OPCODE_MUL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL, tgsi_op2}, diff --git a/src/gallium/drivers/r600/r600_shader.h b/src/gallium/drivers/r600/r600_shader.h index 8f96ce5085c..600c3e2f540 100644 --- a/src/gallium/drivers/r600/r600_shader.h +++ b/src/gallium/drivers/r600/r600_shader.h @@ -40,12 +40,14 @@ struct r600_shader { struct r600_bc bc; unsigned ninput; unsigned noutput; + unsigned npos; unsigned nlds; struct r600_shader_io input[32]; struct r600_shader_io output[32]; - enum radeon_family family; boolean uses_kill; boolean fs_write_all; + boolean clamp_color; + unsigned nr_cbufs; }; #endif diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c index d927e4a945e..487b1df0052 100644 --- a/src/gallium/drivers/r600/r600_state.c +++ b/src/gallium/drivers/r600/r600_state.c @@ -44,7 +44,590 @@ #include "r600_resource.h" #include "r600_shader.h" #include "r600_pipe.h" -#include "r600_state_inlines.h" +#include "r600_formats.h" + +static uint32_t r600_translate_blend_function(int blend_func) +{ + switch (blend_func) { + case PIPE_BLEND_ADD: + return V_028804_COMB_DST_PLUS_SRC; + case PIPE_BLEND_SUBTRACT: + return V_028804_COMB_SRC_MINUS_DST; + case PIPE_BLEND_REVERSE_SUBTRACT: + return V_028804_COMB_DST_MINUS_SRC; + case PIPE_BLEND_MIN: + return V_028804_COMB_MIN_DST_SRC; + case PIPE_BLEND_MAX: + return V_028804_COMB_MAX_DST_SRC; + default: + R600_ERR("Unknown blend function %d\n", blend_func); + assert(0); + break; + } + return 0; +} + +static uint32_t r600_translate_blend_factor(int blend_fact) +{ + switch (blend_fact) { + case PIPE_BLENDFACTOR_ONE: + return V_028804_BLEND_ONE; + case PIPE_BLENDFACTOR_SRC_COLOR: + return V_028804_BLEND_SRC_COLOR; + case PIPE_BLENDFACTOR_SRC_ALPHA: + return V_028804_BLEND_SRC_ALPHA; + case PIPE_BLENDFACTOR_DST_ALPHA: + return V_028804_BLEND_DST_ALPHA; + case PIPE_BLENDFACTOR_DST_COLOR: + return V_028804_BLEND_DST_COLOR; + case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: + return V_028804_BLEND_SRC_ALPHA_SATURATE; + case PIPE_BLENDFACTOR_CONST_COLOR: + return V_028804_BLEND_CONST_COLOR; + case PIPE_BLENDFACTOR_CONST_ALPHA: + return V_028804_BLEND_CONST_ALPHA; + case PIPE_BLENDFACTOR_ZERO: + return V_028804_BLEND_ZERO; + case PIPE_BLENDFACTOR_INV_SRC_COLOR: + return V_028804_BLEND_ONE_MINUS_SRC_COLOR; + case PIPE_BLENDFACTOR_INV_SRC_ALPHA: + return V_028804_BLEND_ONE_MINUS_SRC_ALPHA; + case PIPE_BLENDFACTOR_INV_DST_ALPHA: + return V_028804_BLEND_ONE_MINUS_DST_ALPHA; + case PIPE_BLENDFACTOR_INV_DST_COLOR: + return V_028804_BLEND_ONE_MINUS_DST_COLOR; + case PIPE_BLENDFACTOR_INV_CONST_COLOR: + return V_028804_BLEND_ONE_MINUS_CONST_COLOR; + case PIPE_BLENDFACTOR_INV_CONST_ALPHA: + return V_028804_BLEND_ONE_MINUS_CONST_ALPHA; + case PIPE_BLENDFACTOR_SRC1_COLOR: + return V_028804_BLEND_SRC1_COLOR; + case PIPE_BLENDFACTOR_SRC1_ALPHA: + return V_028804_BLEND_SRC1_ALPHA; + case PIPE_BLENDFACTOR_INV_SRC1_COLOR: + return V_028804_BLEND_INV_SRC1_COLOR; + case PIPE_BLENDFACTOR_INV_SRC1_ALPHA: + return V_028804_BLEND_INV_SRC1_ALPHA; + default: + R600_ERR("Bad blend factor %d not supported!\n", blend_fact); + assert(0); + break; + } + return 0; +} + +static uint32_t r600_translate_stencil_op(int s_op) +{ + switch (s_op) { + case PIPE_STENCIL_OP_KEEP: + return V_028800_STENCIL_KEEP; + case PIPE_STENCIL_OP_ZERO: + return V_028800_STENCIL_ZERO; + case PIPE_STENCIL_OP_REPLACE: + return V_028800_STENCIL_REPLACE; + case PIPE_STENCIL_OP_INCR: + return V_028800_STENCIL_INCR; + case PIPE_STENCIL_OP_DECR: + return V_028800_STENCIL_DECR; + case PIPE_STENCIL_OP_INCR_WRAP: + return V_028800_STENCIL_INCR_WRAP; + case PIPE_STENCIL_OP_DECR_WRAP: + return V_028800_STENCIL_DECR_WRAP; + case PIPE_STENCIL_OP_INVERT: + return V_028800_STENCIL_INVERT; + default: + R600_ERR("Unknown stencil op %d", s_op); + assert(0); + break; + } + return 0; +} + +static uint32_t r600_translate_fill(uint32_t func) +{ + switch(func) { + case PIPE_POLYGON_MODE_FILL: + return 2; + case PIPE_POLYGON_MODE_LINE: + return 1; + case PIPE_POLYGON_MODE_POINT: + return 0; + default: + assert(0); + return 0; + } +} + +/* translates straight */ +static uint32_t r600_translate_ds_func(int func) +{ + return func; +} + +static unsigned r600_tex_wrap(unsigned wrap) +{ + switch (wrap) { + default: + case PIPE_TEX_WRAP_REPEAT: + return V_03C000_SQ_TEX_WRAP; + case PIPE_TEX_WRAP_CLAMP: + return V_03C000_SQ_TEX_CLAMP_HALF_BORDER; + case PIPE_TEX_WRAP_CLAMP_TO_EDGE: + return V_03C000_SQ_TEX_CLAMP_LAST_TEXEL; + case PIPE_TEX_WRAP_CLAMP_TO_BORDER: + return V_03C000_SQ_TEX_CLAMP_BORDER; + case PIPE_TEX_WRAP_MIRROR_REPEAT: + return V_03C000_SQ_TEX_MIRROR; + case PIPE_TEX_WRAP_MIRROR_CLAMP: + return V_03C000_SQ_TEX_MIRROR_ONCE_HALF_BORDER; + case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE: + return V_03C000_SQ_TEX_MIRROR_ONCE_LAST_TEXEL; + case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER: + return V_03C000_SQ_TEX_MIRROR_ONCE_BORDER; + } +} + +static unsigned r600_tex_filter(unsigned filter) +{ + switch (filter) { + default: + case PIPE_TEX_FILTER_NEAREST: + return V_03C000_SQ_TEX_XY_FILTER_POINT; + case PIPE_TEX_FILTER_LINEAR: + return V_03C000_SQ_TEX_XY_FILTER_BILINEAR; + } +} + +static unsigned r600_tex_mipfilter(unsigned filter) +{ + switch (filter) { + case PIPE_TEX_MIPFILTER_NEAREST: + return V_03C000_SQ_TEX_Z_FILTER_POINT; + case PIPE_TEX_MIPFILTER_LINEAR: + return V_03C000_SQ_TEX_Z_FILTER_LINEAR; + default: + case PIPE_TEX_MIPFILTER_NONE: + return V_03C000_SQ_TEX_Z_FILTER_NONE; + } +} + +static unsigned r600_tex_compare(unsigned compare) +{ + switch (compare) { + default: + case PIPE_FUNC_NEVER: + return V_03C000_SQ_TEX_DEPTH_COMPARE_NEVER; + case PIPE_FUNC_LESS: + return V_03C000_SQ_TEX_DEPTH_COMPARE_LESS; + case PIPE_FUNC_EQUAL: + return V_03C000_SQ_TEX_DEPTH_COMPARE_EQUAL; + case PIPE_FUNC_LEQUAL: + return V_03C000_SQ_TEX_DEPTH_COMPARE_LESSEQUAL; + case PIPE_FUNC_GREATER: + return V_03C000_SQ_TEX_DEPTH_COMPARE_GREATER; + case PIPE_FUNC_NOTEQUAL: + return V_03C000_SQ_TEX_DEPTH_COMPARE_NOTEQUAL; + case PIPE_FUNC_GEQUAL: + return V_03C000_SQ_TEX_DEPTH_COMPARE_GREATEREQUAL; + case PIPE_FUNC_ALWAYS: + return V_03C000_SQ_TEX_DEPTH_COMPARE_ALWAYS; + } +} + +static unsigned r600_tex_dim(unsigned dim) +{ + switch (dim) { + default: + case PIPE_TEXTURE_1D: + return V_038000_SQ_TEX_DIM_1D; + case PIPE_TEXTURE_1D_ARRAY: + return V_038000_SQ_TEX_DIM_1D_ARRAY; + case PIPE_TEXTURE_2D: + case PIPE_TEXTURE_RECT: + return V_038000_SQ_TEX_DIM_2D; + case PIPE_TEXTURE_2D_ARRAY: + return V_038000_SQ_TEX_DIM_2D_ARRAY; + case PIPE_TEXTURE_3D: + return V_038000_SQ_TEX_DIM_3D; + case PIPE_TEXTURE_CUBE: + return V_038000_SQ_TEX_DIM_CUBEMAP; + } +} + +static uint32_t r600_translate_dbformat(enum pipe_format format) +{ + switch (format) { + case PIPE_FORMAT_Z16_UNORM: + return V_028010_DEPTH_16; + case PIPE_FORMAT_Z24X8_UNORM: + return V_028010_DEPTH_X8_24; + case PIPE_FORMAT_Z24_UNORM_S8_USCALED: + return V_028010_DEPTH_8_24; + case PIPE_FORMAT_Z32_FLOAT: + return V_028010_DEPTH_32_FLOAT; + case PIPE_FORMAT_Z32_FLOAT_S8X24_USCALED: + return V_028010_DEPTH_X24_8_32_FLOAT; + default: + return ~0U; + } +} + +static uint32_t r600_translate_colorswap(enum pipe_format format) +{ + switch (format) { + /* 8-bit buffers. */ + case PIPE_FORMAT_A8_UNORM: + return V_0280A0_SWAP_ALT_REV; + case PIPE_FORMAT_I8_UNORM: + case PIPE_FORMAT_L8_UNORM: + case PIPE_FORMAT_L8_SRGB: + case PIPE_FORMAT_R8_UNORM: + case PIPE_FORMAT_R8_SNORM: + return V_0280A0_SWAP_STD; + + case PIPE_FORMAT_L4A4_UNORM: + return V_0280A0_SWAP_ALT; + + /* 16-bit buffers. */ + case PIPE_FORMAT_B5G6R5_UNORM: + return V_0280A0_SWAP_STD_REV; + + case PIPE_FORMAT_B5G5R5A1_UNORM: + case PIPE_FORMAT_B5G5R5X1_UNORM: + return V_0280A0_SWAP_ALT; + + case PIPE_FORMAT_B4G4R4A4_UNORM: + case PIPE_FORMAT_B4G4R4X4_UNORM: + return V_0280A0_SWAP_ALT; + + case PIPE_FORMAT_Z16_UNORM: + return V_0280A0_SWAP_STD; + + case PIPE_FORMAT_L8A8_UNORM: + case PIPE_FORMAT_L8A8_SRGB: + return V_0280A0_SWAP_ALT; + case PIPE_FORMAT_R8G8_UNORM: + return V_0280A0_SWAP_STD; + + case PIPE_FORMAT_R16_UNORM: + case PIPE_FORMAT_R16_FLOAT: + return V_0280A0_SWAP_STD; + + /* 32-bit buffers. */ + + case PIPE_FORMAT_A8B8G8R8_SRGB: + return V_0280A0_SWAP_STD_REV; + case PIPE_FORMAT_B8G8R8A8_SRGB: + return V_0280A0_SWAP_ALT; + + case PIPE_FORMAT_B8G8R8A8_UNORM: + case PIPE_FORMAT_B8G8R8X8_UNORM: + return V_0280A0_SWAP_ALT; + + case PIPE_FORMAT_A8R8G8B8_UNORM: + case PIPE_FORMAT_X8R8G8B8_UNORM: + return V_0280A0_SWAP_ALT_REV; + case PIPE_FORMAT_R8G8B8A8_SNORM: + case PIPE_FORMAT_R8G8B8A8_UNORM: + case PIPE_FORMAT_R8G8B8X8_UNORM: + return V_0280A0_SWAP_STD; + + case PIPE_FORMAT_A8B8G8R8_UNORM: + case PIPE_FORMAT_X8B8G8R8_UNORM: + /* case PIPE_FORMAT_R8SG8SB8UX8U_NORM: */ + return V_0280A0_SWAP_STD_REV; + + case PIPE_FORMAT_Z24X8_UNORM: + case PIPE_FORMAT_Z24_UNORM_S8_USCALED: + return V_0280A0_SWAP_STD; + + case PIPE_FORMAT_X8Z24_UNORM: + case PIPE_FORMAT_S8_USCALED_Z24_UNORM: + return V_0280A0_SWAP_STD; + + case PIPE_FORMAT_R10G10B10A2_UNORM: + case PIPE_FORMAT_R10G10B10X2_SNORM: + case PIPE_FORMAT_R10SG10SB10SA2U_NORM: + return V_0280A0_SWAP_STD; + + case PIPE_FORMAT_B10G10R10A2_UNORM: + return V_0280A0_SWAP_ALT; + + case PIPE_FORMAT_R11G11B10_FLOAT: + case PIPE_FORMAT_R16G16_UNORM: + case PIPE_FORMAT_R16G16_FLOAT: + case PIPE_FORMAT_R32_FLOAT: + case PIPE_FORMAT_Z32_FLOAT: + return V_0280A0_SWAP_STD; + + /* 64-bit buffers. */ + case PIPE_FORMAT_R32G32_FLOAT: + case PIPE_FORMAT_R16G16B16A16_UNORM: + case PIPE_FORMAT_R16G16B16A16_SNORM: + case PIPE_FORMAT_R16G16B16A16_FLOAT: + case PIPE_FORMAT_Z32_FLOAT_S8X24_USCALED: + + /* 128-bit buffers. */ + case PIPE_FORMAT_R32G32B32A32_FLOAT: + case PIPE_FORMAT_R32G32B32A32_SNORM: + case PIPE_FORMAT_R32G32B32A32_UNORM: + return V_0280A0_SWAP_STD; + default: + R600_ERR("unsupported colorswap format %d\n", format); + return ~0U; + } + return ~0U; +} + +static uint32_t r600_translate_colorformat(enum pipe_format format) +{ + switch (format) { + case PIPE_FORMAT_L4A4_UNORM: + return V_0280A0_COLOR_4_4; + + /* 8-bit buffers. */ + case PIPE_FORMAT_A8_UNORM: + case PIPE_FORMAT_I8_UNORM: + case PIPE_FORMAT_L8_UNORM: + case PIPE_FORMAT_L8_SRGB: + case PIPE_FORMAT_R8_UNORM: + case PIPE_FORMAT_R8_SNORM: + return V_0280A0_COLOR_8; + + /* 16-bit buffers. */ + case PIPE_FORMAT_B5G6R5_UNORM: + return V_0280A0_COLOR_5_6_5; + + case PIPE_FORMAT_B5G5R5A1_UNORM: + case PIPE_FORMAT_B5G5R5X1_UNORM: + return V_0280A0_COLOR_1_5_5_5; + + case PIPE_FORMAT_B4G4R4A4_UNORM: + case PIPE_FORMAT_B4G4R4X4_UNORM: + return V_0280A0_COLOR_4_4_4_4; + + case PIPE_FORMAT_Z16_UNORM: + return V_0280A0_COLOR_16; + + case PIPE_FORMAT_L8A8_UNORM: + case PIPE_FORMAT_L8A8_SRGB: + case PIPE_FORMAT_R8G8_UNORM: + return V_0280A0_COLOR_8_8; + + case PIPE_FORMAT_R16_UNORM: + return V_0280A0_COLOR_16; + + case PIPE_FORMAT_R16_FLOAT: + return V_0280A0_COLOR_16_FLOAT; + + /* 32-bit buffers. */ + case PIPE_FORMAT_A8B8G8R8_SRGB: + case PIPE_FORMAT_A8B8G8R8_UNORM: + case PIPE_FORMAT_A8R8G8B8_UNORM: + case PIPE_FORMAT_B8G8R8A8_SRGB: + case PIPE_FORMAT_B8G8R8A8_UNORM: + case PIPE_FORMAT_B8G8R8X8_UNORM: + case PIPE_FORMAT_R8G8B8A8_SNORM: + case PIPE_FORMAT_R8G8B8A8_UNORM: + case PIPE_FORMAT_R8G8B8X8_UNORM: + case PIPE_FORMAT_R8SG8SB8UX8U_NORM: + case PIPE_FORMAT_X8B8G8R8_UNORM: + case PIPE_FORMAT_X8R8G8B8_UNORM: + case PIPE_FORMAT_R8G8B8_UNORM: + return V_0280A0_COLOR_8_8_8_8; + + case PIPE_FORMAT_R10G10B10A2_UNORM: + case PIPE_FORMAT_R10G10B10X2_SNORM: + case PIPE_FORMAT_B10G10R10A2_UNORM: + case PIPE_FORMAT_R10SG10SB10SA2U_NORM: + return V_0280A0_COLOR_2_10_10_10; + + case PIPE_FORMAT_Z24X8_UNORM: + case PIPE_FORMAT_Z24_UNORM_S8_USCALED: + return V_0280A0_COLOR_8_24; + + case PIPE_FORMAT_X8Z24_UNORM: + case PIPE_FORMAT_S8_USCALED_Z24_UNORM: + return V_0280A0_COLOR_24_8; + + case PIPE_FORMAT_Z32_FLOAT_S8X24_USCALED: + return V_0280A0_COLOR_X24_8_32_FLOAT; + + case PIPE_FORMAT_R32_FLOAT: + case PIPE_FORMAT_Z32_FLOAT: + return V_0280A0_COLOR_32_FLOAT; + + case PIPE_FORMAT_R16G16_FLOAT: + return V_0280A0_COLOR_16_16_FLOAT; + + case PIPE_FORMAT_R16G16_SSCALED: + case PIPE_FORMAT_R16G16_UNORM: + return V_0280A0_COLOR_16_16; + + case PIPE_FORMAT_R11G11B10_FLOAT: + return V_0280A0_COLOR_10_11_11_FLOAT; + + /* 64-bit buffers. */ + case PIPE_FORMAT_R16G16B16_USCALED: + case PIPE_FORMAT_R16G16B16A16_USCALED: + case PIPE_FORMAT_R16G16B16_SSCALED: + case PIPE_FORMAT_R16G16B16A16_SSCALED: + case PIPE_FORMAT_R16G16B16A16_UNORM: + case PIPE_FORMAT_R16G16B16A16_SNORM: + return V_0280A0_COLOR_16_16_16_16; + + case PIPE_FORMAT_R16G16B16_FLOAT: + case PIPE_FORMAT_R16G16B16A16_FLOAT: + return V_0280A0_COLOR_16_16_16_16_FLOAT; + + case PIPE_FORMAT_R32G32_FLOAT: + return V_0280A0_COLOR_32_32_FLOAT; + + case PIPE_FORMAT_R32G32_USCALED: + case PIPE_FORMAT_R32G32_SSCALED: + return V_0280A0_COLOR_32_32; + + /* 96-bit buffers. */ + case PIPE_FORMAT_R32G32B32_FLOAT: + return V_0280A0_COLOR_32_32_32_FLOAT; + + /* 128-bit buffers. */ + case PIPE_FORMAT_R32G32B32A32_FLOAT: + return V_0280A0_COLOR_32_32_32_32_FLOAT; + case PIPE_FORMAT_R32G32B32A32_SNORM: + case PIPE_FORMAT_R32G32B32A32_UNORM: + return V_0280A0_COLOR_32_32_32_32; + + /* YUV buffers. */ + case PIPE_FORMAT_UYVY: + case PIPE_FORMAT_YUYV: + default: + return ~0U; /* Unsupported. */ + } +} + +static uint32_t r600_colorformat_endian_swap(uint32_t colorformat) +{ + if (R600_BIG_ENDIAN) { + switch(colorformat) { + case V_0280A0_COLOR_4_4: + return ENDIAN_NONE; + + /* 8-bit buffers. */ + case V_0280A0_COLOR_8: + return ENDIAN_NONE; + + /* 16-bit buffers. */ + case V_0280A0_COLOR_5_6_5: + case V_0280A0_COLOR_1_5_5_5: + case V_0280A0_COLOR_4_4_4_4: + case V_0280A0_COLOR_16: + case V_0280A0_COLOR_8_8: + return ENDIAN_8IN16; + + /* 32-bit buffers. */ + case V_0280A0_COLOR_8_8_8_8: + case V_0280A0_COLOR_2_10_10_10: + case V_0280A0_COLOR_8_24: + case V_0280A0_COLOR_24_8: + case V_0280A0_COLOR_32_FLOAT: + case V_0280A0_COLOR_16_16_FLOAT: + case V_0280A0_COLOR_16_16: + return ENDIAN_8IN32; + + /* 64-bit buffers. */ + case V_0280A0_COLOR_16_16_16_16: + case V_0280A0_COLOR_16_16_16_16_FLOAT: + return ENDIAN_8IN16; + + case V_0280A0_COLOR_32_32_FLOAT: + case V_0280A0_COLOR_32_32: + case V_0280A0_COLOR_X24_8_32_FLOAT: + return ENDIAN_8IN32; + + /* 128-bit buffers. */ + case V_0280A0_COLOR_32_32_32_FLOAT: + case V_0280A0_COLOR_32_32_32_32_FLOAT: + case V_0280A0_COLOR_32_32_32_32: + return ENDIAN_8IN32; + default: + return ENDIAN_NONE; /* Unsupported. */ + } + } else { + return ENDIAN_NONE; + } +} + +static bool r600_is_sampler_format_supported(struct pipe_screen *screen, enum pipe_format format) +{ + return r600_translate_texformat(screen, format, NULL, NULL, NULL) != ~0U; +} + +static bool r600_is_colorbuffer_format_supported(enum pipe_format format) +{ + return r600_translate_colorformat(format) != ~0U && + r600_translate_colorswap(format) != ~0U; +} + +static bool r600_is_zs_format_supported(enum pipe_format format) +{ + return r600_translate_dbformat(format) != ~0U; +} + +boolean r600_is_format_supported(struct pipe_screen *screen, + enum pipe_format format, + enum pipe_texture_target target, + unsigned sample_count, + unsigned usage) +{ + unsigned retval = 0; + + if (target >= PIPE_MAX_TEXTURE_TYPES) { + R600_ERR("r600: unsupported texture type %d\n", target); + return FALSE; + } + + if (!util_format_is_supported(format, usage)) + return FALSE; + + /* Multisample */ + if (sample_count > 1) + return FALSE; + + if ((usage & PIPE_BIND_SAMPLER_VIEW) && + r600_is_sampler_format_supported(screen, format)) { + retval |= PIPE_BIND_SAMPLER_VIEW; + } + + if ((usage & (PIPE_BIND_RENDER_TARGET | + PIPE_BIND_DISPLAY_TARGET | + PIPE_BIND_SCANOUT | + PIPE_BIND_SHARED)) && + r600_is_colorbuffer_format_supported(format)) { + retval |= usage & + (PIPE_BIND_RENDER_TARGET | + PIPE_BIND_DISPLAY_TARGET | + PIPE_BIND_SCANOUT | + PIPE_BIND_SHARED); + } + + if ((usage & PIPE_BIND_DEPTH_STENCIL) && + r600_is_zs_format_supported(format)) { + retval |= PIPE_BIND_DEPTH_STENCIL; + } + + if ((usage & PIPE_BIND_VERTEX_BUFFER) && + r600_is_vertex_format_supported(format)) { + retval |= PIPE_BIND_VERTEX_BUFFER; + } + + if (usage & PIPE_BIND_TRANSFER_READ) + retval |= PIPE_BIND_TRANSFER_READ; + if (usage & PIPE_BIND_TRANSFER_WRITE) + retval |= PIPE_BIND_TRANSFER_WRITE; + + return retval == usage; +} void r600_polygon_offset_update(struct r600_pipe_context *rctx) { @@ -63,6 +646,7 @@ void r600_polygon_offset_update(struct r600_pipe_context *rctx) offset_units *= 2.0f; break; case PIPE_FORMAT_Z32_FLOAT: + case PIPE_FORMAT_Z32_FLOAT_S8X24_USCALED: depth = -23; offset_units *= 1.0f; offset_db_fmt_cntl |= S_028DF8_POLY_OFFSET_DB_IS_FLOAT_FMT(1); @@ -299,6 +883,8 @@ static void *r600_create_rs_state(struct pipe_context *ctx, } rstate = &rs->rstate; + rs->clamp_vertex_color = state->clamp_vertex_color; + rs->clamp_fragment_color = state->clamp_fragment_color; rs->flatshade = state->flatshade; rs->sprite_coord_enable = state->sprite_coord_enable; @@ -369,14 +955,17 @@ static void *r600_create_rs_state(struct pipe_context *ctx, static void *r600_create_sampler_state(struct pipe_context *ctx, const struct pipe_sampler_state *state) { - struct r600_pipe_state *rstate = CALLOC_STRUCT(r600_pipe_state); + struct r600_pipe_sampler_state *ss = CALLOC_STRUCT(r600_pipe_sampler_state); + struct r600_pipe_state *rstate; union util_color uc; unsigned aniso_flag_offset = state->max_anisotropy > 1 ? 4 : 0; - if (rstate == NULL) { + if (ss == NULL) { return NULL; } + ss->seamless_cube_map = state->seamless_cube_map; + rstate = &ss->rstate; rstate->id = R600_PIPE_STATE_SAMPLER; util_pack_color(state->border_color, PIPE_FORMAT_B8G8R8A8_UNORM, &uc); r600_pipe_state_add_reg_noblock(rstate, R_03C000_SQ_TEX_SAMPLER_WORD0_0, @@ -488,8 +1077,8 @@ static struct pipe_sampler_view *r600_create_sampler_view(struct pipe_context *c rstate->val[1] = (S_038004_TEX_HEIGHT(height - 1) | S_038004_TEX_DEPTH(depth - 1) | S_038004_DATA_FORMAT(format)); - rstate->val[2] = (tmp->offset[offset_level] + r600_bo_offset(bo[0])) >> 8; - rstate->val[3] = (tmp->offset[offset_level+1] + r600_bo_offset(bo[1])) >> 8; + rstate->val[2] = tmp->offset[offset_level] >> 8; + rstate->val[3] = tmp->offset[offset_level+1] >> 8; rstate->val[4] = (word4 | S_038010_SRF_MODE_ALL(V_038010_SRF_MODE_ZERO_CLAMP_MINUS_ONE) | S_038010_REQUEST_SIZE(1) | @@ -559,27 +1148,57 @@ static void r600_set_ps_sampler_view(struct pipe_context *ctx, unsigned count, rctx->ps_samplers.n_views = count; } +static void r600_set_seamless_cubemap(struct r600_pipe_context *rctx, boolean enable) +{ + struct r600_pipe_state *rstate = CALLOC_STRUCT(r600_pipe_state); + if (rstate == NULL) + return; + + rstate->id = R600_PIPE_STATE_SEAMLESS_CUBEMAP; + r600_pipe_state_add_reg(rstate, R_009508_TA_CNTL_AUX, + (enable ? 0 : S_009508_DISABLE_CUBE_WRAP(1)), + 1, NULL); + + free(rctx->states[R600_PIPE_STATE_SEAMLESS_CUBEMAP]); + rctx->states[R600_PIPE_STATE_SEAMLESS_CUBEMAP] = rstate; + r600_context_pipe_state_set(&rctx->ctx, rstate); +} + static void r600_bind_ps_sampler(struct pipe_context *ctx, unsigned count, void **states) { struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; - struct r600_pipe_state **rstates = (struct r600_pipe_state **)states; + struct r600_pipe_sampler_state **sstates = (struct r600_pipe_sampler_state **)states; + int seamless = -1; memcpy(rctx->ps_samplers.samplers, states, sizeof(void*) * count); rctx->ps_samplers.n_samplers = count; for (int i = 0; i < count; i++) { - r600_context_pipe_state_set_ps_sampler(&rctx->ctx, rstates[i], i); + r600_context_pipe_state_set_ps_sampler(&rctx->ctx, &sstates[i]->rstate, i); + + if (sstates[i]) + seamless = sstates[i]->seamless_cube_map; } + + if (seamless != -1) + r600_set_seamless_cubemap(rctx, seamless); } static void r600_bind_vs_sampler(struct pipe_context *ctx, unsigned count, void **states) { struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; - struct r600_pipe_state **rstates = (struct r600_pipe_state **)states; + struct r600_pipe_sampler_state **sstates = (struct r600_pipe_sampler_state **)states; + int seamless = -1; for (int i = 0; i < count; i++) { - r600_context_pipe_state_set_vs_sampler(&rctx->ctx, rstates[i], i); + r600_context_pipe_state_set_vs_sampler(&rctx->ctx, &sstates[i]->rstate, i); + + if (sstates[i]) + seamless = sstates[i]->seamless_cube_map; } + + if (seamless != -1) + r600_set_seamless_cubemap(rctx, seamless); } static void r600_set_clip_state(struct pipe_context *ctx, @@ -792,7 +1411,7 @@ static void r600_cb(struct r600_pipe_context *rctx, struct r600_pipe_state *rsta /* EXPORT_NORM is an optimzation that can be enabled for better * performance in certain cases */ - if (rctx->family < CHIP_RV770) { + if (rctx->chip_class == R600) { /* EXPORT_NORM can be enabled if: * - 11-bit or smaller UNORM/SNORM/SRGB * - BLEND_CLAMP is enabled @@ -822,7 +1441,7 @@ static void r600_cb(struct r600_pipe_context *rctx, struct r600_pipe_state *rsta r600_pipe_state_add_reg(rstate, R_028040_CB_COLOR0_BASE + cb * 4, - (offset + r600_bo_offset(bo[0])) >> 8, 0xFFFFFFFF, bo[0]); + offset >> 8, 0xFFFFFFFF, bo[0]); r600_pipe_state_add_reg(rstate, R_0280A0_CB_COLOR0_INFO + cb * 4, color_info, 0xFFFFFFFF, bo[0]); @@ -836,10 +1455,10 @@ static void r600_cb(struct r600_pipe_context *rctx, struct r600_pipe_state *rsta 0x00000000, 0xFFFFFFFF, NULL); r600_pipe_state_add_reg(rstate, R_0280E0_CB_COLOR0_FRAG + cb * 4, - r600_bo_offset(bo[1]) >> 8, 0xFFFFFFFF, bo[1]); + 0, 0xFFFFFFFF, bo[1]); r600_pipe_state_add_reg(rstate, R_0280C0_CB_COLOR0_TILE + cb * 4, - r600_bo_offset(bo[2]) >> 8, 0xFFFFFFFF, bo[2]); + 0, 0xFFFFFFFF, bo[2]); r600_pipe_state_add_reg(rstate, R_028100_CB_COLOR0_MASK + cb * 4, 0x00000000, 0xFFFFFFFF, NULL); @@ -873,7 +1492,7 @@ static void r600_db(struct r600_pipe_context *rctx, struct r600_pipe_state *rsta format = r600_translate_dbformat(state->zsbuf->texture->format); r600_pipe_state_add_reg(rstate, R_02800C_DB_DEPTH_BASE, - (offset + r600_bo_offset(rbuffer->bo)) >> 8, 0xFFFFFFFF, rbuffer->bo); + offset >> 8, 0xFFFFFFFF, rbuffer->bo); r600_pipe_state_add_reg(rstate, R_028000_DB_DEPTH_SIZE, S_028000_PITCH_TILE_MAX(pitch) | S_028000_SLICE_TILE_MAX(slice), 0xFFFFFFFF, NULL); @@ -952,7 +1571,7 @@ static void r600_set_framebuffer_state(struct pipe_context *ctx, r600_pipe_state_add_reg(rstate, R_028200_PA_SC_WINDOW_OFFSET, 0x00000000, 0xFFFFFFFF, NULL); - if (rctx->family >= CHIP_RV770) { + if (rctx->chip_class >= R700) { r600_pipe_state_add_reg(rstate, R_028230_PA_SC_EDGERULE, 0xAAAAAAAA, 0xFFFFFFFF, NULL); @@ -1046,16 +1665,13 @@ void r600_init_state_functions(struct r600_pipe_context *rctx) void r600_adjust_gprs(struct r600_pipe_context *rctx) { - enum radeon_family family; struct r600_pipe_state rstate; unsigned num_ps_gprs = rctx->default_ps_gprs; unsigned num_vs_gprs = rctx->default_vs_gprs; unsigned tmp; int diff; - family = r600_get_family(rctx->radeon); - - if (family >= CHIP_CEDAR) + if (rctx->chip_class >= EVERGREEN) return; if (!rctx->ps_shader && !rctx->vs_shader) @@ -1107,7 +1723,7 @@ void r600_init_config(struct r600_pipe_context *rctx) struct r600_pipe_state *rstate = &rctx->config; u32 tmp; - family = r600_get_family(rctx->radeon); + family = rctx->family; ps_prio = 0; vs_prio = 1; gs_prio = 2; @@ -1288,16 +1904,24 @@ void r600_init_config(struct r600_pipe_context *rctx) r600_pipe_state_add_reg(rstate, R_009714_VC_ENHANCE, 0x00000000, 0xFFFFFFFF, NULL); r600_pipe_state_add_reg(rstate, R_028350_SX_MISC, 0x00000000, 0xFFFFFFFF, NULL); - if (family >= CHIP_RV770) { + if (rctx->chip_class >= R700) { r600_pipe_state_add_reg(rstate, R_008D8C_SQ_DYN_GPR_CNTL_PS_FLUSH_REQ, 0x00004000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_009508_TA_CNTL_AUX, 0x07000002, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_009508_TA_CNTL_AUX, + S_009508_DISABLE_CUBE_ANISO(1) | + S_009508_SYNC_GRADIENT(1) | + S_009508_SYNC_WALKER(1) | + S_009508_SYNC_ALIGNER(1), 0xFFFFFFFF, NULL); r600_pipe_state_add_reg(rstate, R_009830_DB_DEBUG, 0x00000000, 0xFFFFFFFF, NULL); r600_pipe_state_add_reg(rstate, R_009838_DB_WATERMARKS, 0x00420204, 0xFFFFFFFF, NULL); r600_pipe_state_add_reg(rstate, R_0286C8_SPI_THREAD_GROUPING, 0x00000000, 0xFFFFFFFF, NULL); r600_pipe_state_add_reg(rstate, R_028A4C_PA_SC_MODE_CNTL, 0x00514002, 0xFFFFFFFF, NULL); } else { r600_pipe_state_add_reg(rstate, R_008D8C_SQ_DYN_GPR_CNTL_PS_FLUSH_REQ, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_009508_TA_CNTL_AUX, 0x07000003, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_009508_TA_CNTL_AUX, + S_009508_DISABLE_CUBE_ANISO(1) | + S_009508_SYNC_GRADIENT(1) | + S_009508_SYNC_WALKER(1) | + S_009508_SYNC_ALIGNER(1), 0xFFFFFFFF, NULL); r600_pipe_state_add_reg(rstate, R_009830_DB_DEBUG, 0x82000000, 0xFFFFFFFF, NULL); r600_pipe_state_add_reg(rstate, R_009838_DB_WATERMARKS, 0x01020204, 0xFFFFFFFF, NULL); r600_pipe_state_add_reg(rstate, R_0286C8_SPI_THREAD_GROUPING, 0x00000001, 0xFFFFFFFF, NULL); @@ -1403,7 +2027,7 @@ void r600_pipe_shader_ps(struct pipe_context *ctx, struct r600_pipe_shader *shad r600_pipe_state_add_reg(rstate, R_0286D8_SPI_INPUT_Z, spi_input_z, 0xFFFFFFFF, NULL); r600_pipe_state_add_reg(rstate, R_028840_SQ_PGM_START_PS, - r600_bo_offset(shader->bo) >> 8, 0xFFFFFFFF, shader->bo); + 0, 0xFFFFFFFF, shader->bo); r600_pipe_state_add_reg(rstate, R_028850_SQ_PGM_RESOURCES_PS, S_028868_NUM_GPRS(rshader->bc.ngpr) | @@ -1438,7 +2062,7 @@ void r600_pipe_shader_vs(struct pipe_context *ctx, struct r600_pipe_shader *shad struct r600_pipe_state *rstate = &shader->rstate; struct r600_shader *rshader = &shader->shader; unsigned spi_vs_out_id[10]; - unsigned i, tmp; + unsigned i, tmp, nparams; /* clear previous register */ rstate->nregs = 0; @@ -1460,9 +2084,17 @@ void r600_pipe_shader_vs(struct pipe_context *ctx, struct r600_pipe_shader *shad spi_vs_out_id[i], 0xFFFFFFFF, NULL); } + /* Certain attributes (position, psize, etc.) don't count as params. + * VS is required to export at least one param and r600_shader_from_tgsi() + * takes care of adding a dummy export. + */ + nparams = rshader->noutput - rshader->npos; + if (nparams < 1) + nparams = 1; + r600_pipe_state_add_reg(rstate, R_0286C4_SPI_VS_OUT_CONFIG, - S_0286C4_VS_EXPORT_COUNT(rshader->noutput - 2), + S_0286C4_VS_EXPORT_COUNT(nparams - 1), 0xFFFFFFFF, NULL); r600_pipe_state_add_reg(rstate, R_028868_SQ_PGM_RESOURCES_VS, @@ -1474,7 +2106,7 @@ void r600_pipe_shader_vs(struct pipe_context *ctx, struct r600_pipe_shader *shad 0x00000000, 0xFFFFFFFF, NULL); r600_pipe_state_add_reg(rstate, R_028858_SQ_PGM_START_VS, - r600_bo_offset(shader->bo) >> 8, 0xFFFFFFFF, shader->bo); + 0, 0xFFFFFFFF, shader->bo); r600_pipe_state_add_reg(rstate, R_03E200_SQ_LOOP_CONST_0 + (32 * 4), 0x01000FFF, @@ -1495,7 +2127,7 @@ void r600_fetch_shader(struct pipe_context *ctx, r600_pipe_state_add_reg(rstate, R_0288DC_SQ_PGM_CF_OFFSET_FS, 0x00000000, 0xFFFFFFFF, NULL); r600_pipe_state_add_reg(rstate, R_028894_SQ_PGM_START_FS, - r600_bo_offset(ve->fetch_shader) >> 8, + 0, 0xFFFFFFFF, ve->fetch_shader); } diff --git a/src/gallium/drivers/r600/r600_state_common.c b/src/gallium/drivers/r600/r600_state_common.c index fa0c5cb89d7..9f3ab89fdf7 100644 --- a/src/gallium/drivers/r600/r600_state_common.c +++ b/src/gallium/drivers/r600/r600_state_common.c @@ -28,6 +28,7 @@ #include <util/u_format.h> #include <pipebuffer/pb_buffer.h> #include "pipe/p_shader_tokens.h" +#include "tgsi/tgsi_parse.h" #include "r600_formats.h" #include "r600_pipe.h" #include "r600d.h" @@ -99,6 +100,8 @@ void r600_bind_rs_state(struct pipe_context *ctx, void *state) if (state == NULL) return; + rctx->clamp_vertex_color = rs->clamp_vertex_color; + rctx->clamp_fragment_color = rs->clamp_fragment_color; rctx->flatshade = rs->flatshade; rctx->sprite_coord_enable = rs->sprite_coord_enable; rctx->rasterizer = rs; @@ -106,13 +109,13 @@ void r600_bind_rs_state(struct pipe_context *ctx, void *state) rctx->states[rs->rstate.id] = &rs->rstate; r600_context_pipe_state_set(&rctx->ctx, &rs->rstate); - if (rctx->family >= CHIP_CEDAR) { + if (rctx->chip_class >= EVERGREEN) { evergreen_polygon_offset_update(rctx); } else { r600_polygon_offset_update(rctx); } if (rctx->ps_shader && rctx->vs_shader) - r600_spi_update(rctx); + rctx->spi_dirty = true; } void r600_delete_rs_state(struct pipe_context *ctx, void *state) @@ -209,7 +212,7 @@ void r600_set_vertex_buffers(struct pipe_context *ctx, unsigned count, /* Zero states. */ for (i = 0; i < count; i++) { if (!buffers[i].buffer) { - if (rctx->family >= CHIP_CEDAR) { + if (rctx->chip_class >= EVERGREEN) { evergreen_context_pipe_state_set_fs_resource(&rctx->ctx, NULL, i); } else { r600_context_pipe_state_set_fs_resource(&rctx->ctx, NULL, i); @@ -217,7 +220,7 @@ void r600_set_vertex_buffers(struct pipe_context *ctx, unsigned count, } } for (; i < rctx->vbuf_mgr->nr_real_vertex_buffers; i++) { - if (rctx->family >= CHIP_CEDAR) { + if (rctx->chip_class >= EVERGREEN) { evergreen_context_pipe_state_set_fs_resource(&rctx->ctx, NULL, i); } else { r600_context_pipe_state_set_fs_resource(&rctx->ctx, NULL, i); @@ -257,7 +260,9 @@ void *r600_create_shader_state(struct pipe_context *ctx, struct r600_pipe_shader *shader = CALLOC_STRUCT(r600_pipe_shader); int r; - r = r600_pipe_shader_create(ctx, shader, state->tokens); + shader->tokens = tgsi_dup_tokens(state->tokens); + + r = r600_pipe_shader_create(ctx, shader); if (r) { return NULL; } @@ -274,7 +279,7 @@ void r600_bind_ps_shader(struct pipe_context *ctx, void *state) r600_context_pipe_state_set(&rctx->ctx, &rctx->ps_shader->rstate); } if (rctx->ps_shader && rctx->vs_shader) { - r600_spi_update(rctx); + rctx->spi_dirty = true; r600_adjust_gprs(rctx); } } @@ -289,7 +294,7 @@ void r600_bind_vs_shader(struct pipe_context *ctx, void *state) r600_context_pipe_state_set(&rctx->ctx, &rctx->vs_shader->rstate); } if (rctx->ps_shader && rctx->vs_shader) { - r600_spi_update(rctx); + rctx->spi_dirty = true; r600_adjust_gprs(rctx); } } @@ -303,6 +308,7 @@ void r600_delete_ps_shader(struct pipe_context *ctx, void *state) rctx->ps_shader = NULL; } + free(shader->tokens); r600_pipe_shader_destroy(ctx, shader); free(shader); } @@ -316,6 +322,7 @@ void r600_delete_vs_shader(struct pipe_context *ctx, void *state) rctx->vs_shader = NULL; } + free(shader->tokens); r600_pipe_shader_destroy(ctx, shader); free(shader); } @@ -351,7 +358,7 @@ static void r600_spi_update(struct r600_pipe_context *rctx) struct r600_pipe_shader *shader = rctx->ps_shader; struct r600_pipe_state *rstate = &rctx->spi; struct r600_shader *rshader = &shader->shader; - unsigned i, tmp; + unsigned i, tmp, sid; if (rctx->spi.id == 0) r600_spi_block_init(rctx, &rctx->spi); @@ -360,9 +367,14 @@ static void r600_spi_update(struct r600_pipe_context *rctx) for (i = 0; i < rshader->ninput; i++) { if (rshader->input[i].name == TGSI_SEMANTIC_POSITION || rshader->input[i].name == TGSI_SEMANTIC_FACE) - continue; + if (rctx->chip_class >= EVERGREEN) + continue; + else + sid=0; + else + sid=r600_find_vs_semantic_index(&rctx->vs_shader->shader, rshader, i); - tmp = S_028644_SEMANTIC(r600_find_vs_semantic_index(&rctx->vs_shader->shader, rshader, i)); + tmp = S_028644_SEMANTIC(sid); if (rshader->input[i].name == TGSI_SEMANTIC_COLOR || rshader->input[i].name == TGSI_SEMANTIC_BCOLOR || @@ -375,7 +387,7 @@ static void r600_spi_update(struct r600_pipe_context *rctx) tmp |= S_028644_PT_SPRITE_TEX(1); } - if (rctx->family < CHIP_CEDAR) { + if (rctx->chip_class < EVERGREEN) { if (rshader->input[i].centroid) tmp |= S_028644_SEL_CENTROID(1); @@ -386,6 +398,7 @@ static void r600_spi_update(struct r600_pipe_context *rctx) r600_pipe_state_mod_reg(rstate, tmp); } + rctx->spi_dirty = false; r600_context_pipe_state_set(&rctx->ctx, rstate); } @@ -405,7 +418,6 @@ void r600_set_constant_buffer(struct pipe_context *ctx, uint shader, uint index, } r600_upload_const_buffer(rctx, &rbuffer, &offset); - offset += r600_bo_offset(rbuffer->r.bo); switch (shader) { case PIPE_SHADER_VERTEX: @@ -421,14 +433,14 @@ void r600_set_constant_buffer(struct pipe_context *ctx, uint shader, uint index, rstate = &rctx->vs_const_buffer_resource[index]; if (!rstate->id) { - if (rctx->family >= CHIP_CEDAR) { + if (rctx->chip_class >= EVERGREEN) { evergreen_pipe_init_buffer_resource(rctx, rstate); } else { r600_pipe_init_buffer_resource(rctx, rstate); } } - if (rctx->family >= CHIP_CEDAR) { + if (rctx->chip_class >= EVERGREEN) { evergreen_pipe_mod_buffer_resource(rstate, &rbuffer->r, offset, 16); evergreen_context_pipe_state_set_vs_resource(&rctx->ctx, rstate, index); } else { @@ -449,13 +461,13 @@ void r600_set_constant_buffer(struct pipe_context *ctx, uint shader, uint index, rstate = &rctx->ps_const_buffer_resource[index]; if (!rstate->id) { - if (rctx->family >= CHIP_CEDAR) { + if (rctx->chip_class >= EVERGREEN) { evergreen_pipe_init_buffer_resource(rctx, rstate); } else { r600_pipe_init_buffer_resource(rctx, rstate); } } - if (rctx->family >= CHIP_CEDAR) { + if (rctx->chip_class >= EVERGREEN) { evergreen_pipe_mod_buffer_resource(rstate, &rbuffer->r, offset, 16); evergreen_context_pipe_state_set_ps_resource(&rctx->ctx, rstate, index); } else { @@ -505,17 +517,17 @@ static void r600_vertex_buffer_update(struct r600_pipe_context *rctx) } if (vertex_buffer == NULL || rbuffer == NULL) continue; - offset += vertex_buffer->buffer_offset + r600_bo_offset(rbuffer->bo); + offset += vertex_buffer->buffer_offset; if (!rstate->id) { - if (rctx->family >= CHIP_CEDAR) { + if (rctx->chip_class >= EVERGREEN) { evergreen_pipe_init_buffer_resource(rctx, rstate); } else { r600_pipe_init_buffer_resource(rctx, rstate); } } - if (rctx->family >= CHIP_CEDAR) { + if (rctx->chip_class >= EVERGREEN) { evergreen_pipe_mod_buffer_resource(rstate, rbuffer, offset, vertex_buffer->stride); evergreen_context_pipe_state_set_fs_resource(&rctx->ctx, rstate, i); } else { @@ -525,24 +537,39 @@ static void r600_vertex_buffer_update(struct r600_pipe_context *rctx) } } +static int r600_shader_rebuild(struct pipe_context * ctx, struct r600_pipe_shader * shader) +{ + struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; + int r; + + r600_pipe_shader_destroy(ctx, shader); + r = r600_pipe_shader_create(ctx, shader); + if (r) { + return r; + } + r600_context_pipe_state_set(&rctx->ctx, &shader->rstate); + + return 0; +} + void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info) { struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; struct r600_resource *rbuffer; - u32 vgt_dma_index_type, vgt_dma_swap_mode, vgt_draw_initiator, mask; struct r600_draw rdraw; - struct r600_drawl draw = {}; - unsigned prim; + struct r600_drawl draw; + unsigned prim, mask; if (!rctx->blit) { if (rctx->have_depth_fb || rctx->have_depth_texture) r600_flush_depth_textures(rctx); } - u_vbuf_mgr_draw_begin(rctx->vbuf_mgr, info, NULL, NULL); + u_vbuf_mgr_draw_begin(rctx->vbuf_mgr, info); r600_vertex_buffer_update(rctx); draw.info = *info; draw.ctx = ctx; + draw.index_buffer = NULL; if (info->indexed && rctx->index_buffer.buffer) { draw.info.start += rctx->index_buffer.offset / rctx->index_buffer.index_size; pipe_resource_reference(&draw.index_buffer, rctx->index_buffer.buffer); @@ -560,57 +587,29 @@ void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info) r600_upload_index_buffer(rctx, &draw); } } else { + draw.index_size = 0; + draw.index_buffer_offset = 0; draw.info.index_bias = info->start; } - vgt_dma_swap_mode = 0; - switch (draw.index_size) { - case 2: - vgt_draw_initiator = 0; - vgt_dma_index_type = 0; - if (R600_BIG_ENDIAN) { - vgt_dma_swap_mode = ENDIAN_8IN16; - } - break; - case 4: - vgt_draw_initiator = 0; - vgt_dma_index_type = 1; - if (R600_BIG_ENDIAN) { - vgt_dma_swap_mode = ENDIAN_8IN32; - } - break; - case 0: - vgt_draw_initiator = 2; - vgt_dma_index_type = 0; - break; - default: - R600_ERR("unsupported index size %d\n", draw.index_size); - return; - } if (r600_conv_pipe_prim(draw.info.mode, &prim)) return; - if (unlikely(rctx->ps_shader == NULL)) { - R600_ERR("missing vertex shader\n"); - return; - } - if (unlikely(rctx->vs_shader == NULL)) { - R600_ERR("missing vertex shader\n"); - return; - } - /* there should be enough input */ - if (rctx->vertex_elements->count < rctx->vs_shader->shader.bc.nresource) { - R600_ERR("%d resources provided, expecting %d\n", - rctx->vertex_elements->count, rctx->vs_shader->shader.bc.nresource); - return; - } + + if (rctx->vs_shader->shader.clamp_color != rctx->clamp_vertex_color) + r600_shader_rebuild(ctx, rctx->vs_shader); + + if ((rctx->ps_shader->shader.clamp_color != rctx->clamp_fragment_color) || + ((rctx->chip_class >= EVERGREEN) && rctx->ps_shader->shader.fs_write_all && + (rctx->ps_shader->shader.nr_cbufs != rctx->nr_cbufs))) + r600_shader_rebuild(ctx, rctx->ps_shader); + + if (rctx->spi_dirty) + r600_spi_update(rctx); if (rctx->alpha_ref_dirty) r600_update_alpha_ref(rctx); - mask = 0; - for (int i = 0; i < rctx->framebuffer.nr_cbufs; i++) { - mask |= (0xF << (i * 4)); - } + mask = (1ULL << ((unsigned)rctx->framebuffer.nr_cbufs * 4)) - 1; if (rctx->vgt.id != R600_PIPE_STATE_VGT) { rctx->vgt.id = R600_PIPE_STATE_VGT; @@ -644,8 +643,10 @@ void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info) rdraw.vgt_num_indices = draw.info.count; rdraw.vgt_num_instances = draw.info.instance_count; - rdraw.vgt_index_type = vgt_dma_index_type | (vgt_dma_swap_mode << 2); - rdraw.vgt_draw_initiator = vgt_draw_initiator; + rdraw.vgt_index_type = ((draw.index_size == 4) ? 1 : 0); + if (R600_BIG_ENDIAN) + rdraw.vgt_index_type |= (draw.index_size >> 1) << 2; + rdraw.vgt_draw_initiator = draw.index_size ? 0 : 2; rdraw.indices = NULL; if (draw.index_buffer) { rbuffer = (struct r600_resource*)draw.index_buffer; @@ -653,7 +654,7 @@ void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info) rdraw.indices_bo_offset = draw.index_buffer_offset; } - if (rctx->family >= CHIP_CEDAR) { + if (rctx->chip_class >= EVERGREEN) { evergreen_context_draw(&rctx->ctx, &rdraw); } else { r600_context_draw(&rctx->ctx, &rdraw); diff --git a/src/gallium/drivers/r600/r600_state_inlines.h b/src/gallium/drivers/r600/r600_state_inlines.h deleted file mode 100644 index 5418570756f..00000000000 --- a/src/gallium/drivers/r600/r600_state_inlines.h +++ /dev/null @@ -1,608 +0,0 @@ -/* - * Copyright 2010 Red Hat Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - */ -#ifndef R600_STATE_INLINES_H -#define R600_STATE_INLINES_H - -#include "util/u_format.h" -#include "r600d.h" -#include "r600_formats.h" - -static INLINE uint32_t r600_translate_blend_function(int blend_func) -{ - switch (blend_func) { - case PIPE_BLEND_ADD: - return V_028804_COMB_DST_PLUS_SRC; - case PIPE_BLEND_SUBTRACT: - return V_028804_COMB_SRC_MINUS_DST; - case PIPE_BLEND_REVERSE_SUBTRACT: - return V_028804_COMB_DST_MINUS_SRC; - case PIPE_BLEND_MIN: - return V_028804_COMB_MIN_DST_SRC; - case PIPE_BLEND_MAX: - return V_028804_COMB_MAX_DST_SRC; - default: - R600_ERR("Unknown blend function %d\n", blend_func); - assert(0); - break; - } - return 0; -} - -static INLINE uint32_t r600_translate_blend_factor(int blend_fact) -{ - switch (blend_fact) { - case PIPE_BLENDFACTOR_ONE: - return V_028804_BLEND_ONE; - case PIPE_BLENDFACTOR_SRC_COLOR: - return V_028804_BLEND_SRC_COLOR; - case PIPE_BLENDFACTOR_SRC_ALPHA: - return V_028804_BLEND_SRC_ALPHA; - case PIPE_BLENDFACTOR_DST_ALPHA: - return V_028804_BLEND_DST_ALPHA; - case PIPE_BLENDFACTOR_DST_COLOR: - return V_028804_BLEND_DST_COLOR; - case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: - return V_028804_BLEND_SRC_ALPHA_SATURATE; - case PIPE_BLENDFACTOR_CONST_COLOR: - return V_028804_BLEND_CONST_COLOR; - case PIPE_BLENDFACTOR_CONST_ALPHA: - return V_028804_BLEND_CONST_ALPHA; - case PIPE_BLENDFACTOR_ZERO: - return V_028804_BLEND_ZERO; - case PIPE_BLENDFACTOR_INV_SRC_COLOR: - return V_028804_BLEND_ONE_MINUS_SRC_COLOR; - case PIPE_BLENDFACTOR_INV_SRC_ALPHA: - return V_028804_BLEND_ONE_MINUS_SRC_ALPHA; - case PIPE_BLENDFACTOR_INV_DST_ALPHA: - return V_028804_BLEND_ONE_MINUS_DST_ALPHA; - case PIPE_BLENDFACTOR_INV_DST_COLOR: - return V_028804_BLEND_ONE_MINUS_DST_COLOR; - case PIPE_BLENDFACTOR_INV_CONST_COLOR: - return V_028804_BLEND_ONE_MINUS_CONST_COLOR; - case PIPE_BLENDFACTOR_INV_CONST_ALPHA: - return V_028804_BLEND_ONE_MINUS_CONST_ALPHA; - case PIPE_BLENDFACTOR_SRC1_COLOR: - return V_028804_BLEND_SRC1_COLOR; - case PIPE_BLENDFACTOR_SRC1_ALPHA: - return V_028804_BLEND_SRC1_ALPHA; - case PIPE_BLENDFACTOR_INV_SRC1_COLOR: - return V_028804_BLEND_INV_SRC1_COLOR; - case PIPE_BLENDFACTOR_INV_SRC1_ALPHA: - return V_028804_BLEND_INV_SRC1_ALPHA; - default: - R600_ERR("Bad blend factor %d not supported!\n", blend_fact); - assert(0); - break; - } - return 0; -} - -static INLINE uint32_t r600_translate_stencil_op(int s_op) -{ - switch (s_op) { - case PIPE_STENCIL_OP_KEEP: - return V_028800_STENCIL_KEEP; - case PIPE_STENCIL_OP_ZERO: - return V_028800_STENCIL_ZERO; - case PIPE_STENCIL_OP_REPLACE: - return V_028800_STENCIL_REPLACE; - case PIPE_STENCIL_OP_INCR: - return V_028800_STENCIL_INCR; - case PIPE_STENCIL_OP_DECR: - return V_028800_STENCIL_DECR; - case PIPE_STENCIL_OP_INCR_WRAP: - return V_028800_STENCIL_INCR_WRAP; - case PIPE_STENCIL_OP_DECR_WRAP: - return V_028800_STENCIL_DECR_WRAP; - case PIPE_STENCIL_OP_INVERT: - return V_028800_STENCIL_INVERT; - default: - R600_ERR("Unknown stencil op %d", s_op); - assert(0); - break; - } - return 0; -} - -static INLINE uint32_t r600_translate_fill(uint32_t func) -{ - switch(func) { - case PIPE_POLYGON_MODE_FILL: - return 2; - case PIPE_POLYGON_MODE_LINE: - return 1; - case PIPE_POLYGON_MODE_POINT: - return 0; - default: - assert(0); - return 0; - } -} - -/* translates straight */ -static INLINE uint32_t r600_translate_ds_func(int func) -{ - return func; -} - -static inline unsigned r600_tex_wrap(unsigned wrap) -{ - switch (wrap) { - default: - case PIPE_TEX_WRAP_REPEAT: - return V_03C000_SQ_TEX_WRAP; - case PIPE_TEX_WRAP_CLAMP: - return V_03C000_SQ_TEX_CLAMP_HALF_BORDER; - case PIPE_TEX_WRAP_CLAMP_TO_EDGE: - return V_03C000_SQ_TEX_CLAMP_LAST_TEXEL; - case PIPE_TEX_WRAP_CLAMP_TO_BORDER: - return V_03C000_SQ_TEX_CLAMP_BORDER; - case PIPE_TEX_WRAP_MIRROR_REPEAT: - return V_03C000_SQ_TEX_MIRROR; - case PIPE_TEX_WRAP_MIRROR_CLAMP: - return V_03C000_SQ_TEX_MIRROR_ONCE_HALF_BORDER; - case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE: - return V_03C000_SQ_TEX_MIRROR_ONCE_LAST_TEXEL; - case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER: - return V_03C000_SQ_TEX_MIRROR_ONCE_BORDER; - } -} - -static inline unsigned r600_tex_filter(unsigned filter) -{ - switch (filter) { - default: - case PIPE_TEX_FILTER_NEAREST: - return V_03C000_SQ_TEX_XY_FILTER_POINT; - case PIPE_TEX_FILTER_LINEAR: - return V_03C000_SQ_TEX_XY_FILTER_BILINEAR; - } -} - -static inline unsigned r600_tex_mipfilter(unsigned filter) -{ - switch (filter) { - case PIPE_TEX_MIPFILTER_NEAREST: - return V_03C000_SQ_TEX_Z_FILTER_POINT; - case PIPE_TEX_MIPFILTER_LINEAR: - return V_03C000_SQ_TEX_Z_FILTER_LINEAR; - default: - case PIPE_TEX_MIPFILTER_NONE: - return V_03C000_SQ_TEX_Z_FILTER_NONE; - } -} - -static inline unsigned r600_tex_compare(unsigned compare) -{ - switch (compare) { - default: - case PIPE_FUNC_NEVER: - return V_03C000_SQ_TEX_DEPTH_COMPARE_NEVER; - case PIPE_FUNC_LESS: - return V_03C000_SQ_TEX_DEPTH_COMPARE_LESS; - case PIPE_FUNC_EQUAL: - return V_03C000_SQ_TEX_DEPTH_COMPARE_EQUAL; - case PIPE_FUNC_LEQUAL: - return V_03C000_SQ_TEX_DEPTH_COMPARE_LESSEQUAL; - case PIPE_FUNC_GREATER: - return V_03C000_SQ_TEX_DEPTH_COMPARE_GREATER; - case PIPE_FUNC_NOTEQUAL: - return V_03C000_SQ_TEX_DEPTH_COMPARE_NOTEQUAL; - case PIPE_FUNC_GEQUAL: - return V_03C000_SQ_TEX_DEPTH_COMPARE_GREATEREQUAL; - case PIPE_FUNC_ALWAYS: - return V_03C000_SQ_TEX_DEPTH_COMPARE_ALWAYS; - } -} - -static inline unsigned r600_tex_swizzle(unsigned swizzle) -{ - switch (swizzle) { - case PIPE_SWIZZLE_RED: - return V_038010_SQ_SEL_X; - case PIPE_SWIZZLE_GREEN: - return V_038010_SQ_SEL_Y; - case PIPE_SWIZZLE_BLUE: - return V_038010_SQ_SEL_Z; - case PIPE_SWIZZLE_ALPHA: - return V_038010_SQ_SEL_W; - case PIPE_SWIZZLE_ZERO: - return V_038010_SQ_SEL_0; - default: - case PIPE_SWIZZLE_ONE: - return V_038010_SQ_SEL_1; - } -} - -static inline unsigned r600_format_type(unsigned format_type) -{ - switch (format_type) { - default: - case UTIL_FORMAT_TYPE_UNSIGNED: - return V_038010_SQ_FORMAT_COMP_UNSIGNED; - case UTIL_FORMAT_TYPE_SIGNED: - return V_038010_SQ_FORMAT_COMP_SIGNED; - case UTIL_FORMAT_TYPE_FIXED: - return V_038010_SQ_FORMAT_COMP_UNSIGNED_BIASED; - } -} - -static inline unsigned r600_tex_dim(unsigned dim) -{ - switch (dim) { - default: - case PIPE_TEXTURE_1D: - return V_038000_SQ_TEX_DIM_1D; - case PIPE_TEXTURE_1D_ARRAY: - return V_038000_SQ_TEX_DIM_1D_ARRAY; - case PIPE_TEXTURE_2D: - case PIPE_TEXTURE_RECT: - return V_038000_SQ_TEX_DIM_2D; - case PIPE_TEXTURE_2D_ARRAY: - return V_038000_SQ_TEX_DIM_2D_ARRAY; - case PIPE_TEXTURE_3D: - return V_038000_SQ_TEX_DIM_3D; - case PIPE_TEXTURE_CUBE: - return V_038000_SQ_TEX_DIM_CUBEMAP; - } -} - -static inline uint32_t r600_translate_dbformat(enum pipe_format format) -{ - switch (format) { - case PIPE_FORMAT_Z16_UNORM: - return V_028010_DEPTH_16; - case PIPE_FORMAT_Z24X8_UNORM: - return V_028010_DEPTH_X8_24; - case PIPE_FORMAT_Z24_UNORM_S8_USCALED: - return V_028010_DEPTH_8_24; - default: - return ~0; - } -} - -static inline uint32_t r600_translate_colorswap(enum pipe_format format) -{ - switch (format) { - /* 8-bit buffers. */ - case PIPE_FORMAT_A8_UNORM: - return V_0280A0_SWAP_ALT_REV; - case PIPE_FORMAT_I8_UNORM: - case PIPE_FORMAT_L8_UNORM: - case PIPE_FORMAT_L8_SRGB: - case PIPE_FORMAT_R8_UNORM: - case PIPE_FORMAT_R8_SNORM: - return V_0280A0_SWAP_STD; - - case PIPE_FORMAT_L4A4_UNORM: - return V_0280A0_SWAP_ALT; - - /* 16-bit buffers. */ - case PIPE_FORMAT_B5G6R5_UNORM: - return V_0280A0_SWAP_STD_REV; - - case PIPE_FORMAT_B5G5R5A1_UNORM: - case PIPE_FORMAT_B5G5R5X1_UNORM: - return V_0280A0_SWAP_ALT; - - case PIPE_FORMAT_B4G4R4A4_UNORM: - case PIPE_FORMAT_B4G4R4X4_UNORM: - return V_0280A0_SWAP_ALT; - - case PIPE_FORMAT_Z16_UNORM: - return V_0280A0_SWAP_STD; - - case PIPE_FORMAT_L8A8_UNORM: - case PIPE_FORMAT_L8A8_SRGB: - return V_0280A0_SWAP_ALT; - case PIPE_FORMAT_R8G8_UNORM: - return V_0280A0_SWAP_STD; - - case PIPE_FORMAT_R16_UNORM: - return V_0280A0_SWAP_STD; - - /* 32-bit buffers. */ - - case PIPE_FORMAT_A8B8G8R8_SRGB: - return V_0280A0_SWAP_STD_REV; - case PIPE_FORMAT_B8G8R8A8_SRGB: - return V_0280A0_SWAP_ALT; - - case PIPE_FORMAT_B8G8R8A8_UNORM: - case PIPE_FORMAT_B8G8R8X8_UNORM: - return V_0280A0_SWAP_ALT; - - case PIPE_FORMAT_A8R8G8B8_UNORM: - case PIPE_FORMAT_X8R8G8B8_UNORM: - return V_0280A0_SWAP_ALT_REV; - case PIPE_FORMAT_R8G8B8A8_SNORM: - case PIPE_FORMAT_R8G8B8A8_UNORM: - case PIPE_FORMAT_R8G8B8X8_UNORM: - return V_0280A0_SWAP_STD; - - case PIPE_FORMAT_A8B8G8R8_UNORM: - case PIPE_FORMAT_X8B8G8R8_UNORM: - /* case PIPE_FORMAT_R8SG8SB8UX8U_NORM: */ - return V_0280A0_SWAP_STD_REV; - - case PIPE_FORMAT_Z24X8_UNORM: - case PIPE_FORMAT_Z24_UNORM_S8_USCALED: - return V_0280A0_SWAP_STD; - - case PIPE_FORMAT_X8Z24_UNORM: - case PIPE_FORMAT_S8_USCALED_Z24_UNORM: - return V_0280A0_SWAP_STD; - - case PIPE_FORMAT_R10G10B10A2_UNORM: - case PIPE_FORMAT_R10G10B10X2_SNORM: - case PIPE_FORMAT_R10SG10SB10SA2U_NORM: - return V_0280A0_SWAP_STD; - - case PIPE_FORMAT_B10G10R10A2_UNORM: - return V_0280A0_SWAP_ALT; - - case PIPE_FORMAT_R11G11B10_FLOAT: - case PIPE_FORMAT_R16G16_UNORM: - case PIPE_FORMAT_R16G16_FLOAT: - case PIPE_FORMAT_R32_FLOAT: - return V_0280A0_SWAP_STD; - - /* 64-bit buffers. */ - case PIPE_FORMAT_R32G32_FLOAT: - case PIPE_FORMAT_R16G16B16A16_UNORM: - case PIPE_FORMAT_R16G16B16A16_SNORM: - case PIPE_FORMAT_R16G16B16A16_FLOAT: - - /* 128-bit buffers. */ - case PIPE_FORMAT_R32G32B32A32_FLOAT: - case PIPE_FORMAT_R32G32B32A32_SNORM: - case PIPE_FORMAT_R32G32B32A32_UNORM: - return V_0280A0_SWAP_STD; - default: - R600_ERR("unsupported colorswap format %d\n", format); - return ~0; - } - return ~0; -} - -static INLINE uint32_t r600_translate_colorformat(enum pipe_format format) -{ - switch (format) { - case PIPE_FORMAT_L4A4_UNORM: - return V_0280A0_COLOR_4_4; - - /* 8-bit buffers. */ - case PIPE_FORMAT_A8_UNORM: - case PIPE_FORMAT_I8_UNORM: - case PIPE_FORMAT_L8_UNORM: - case PIPE_FORMAT_L8_SRGB: - case PIPE_FORMAT_R8_UNORM: - case PIPE_FORMAT_R8_SNORM: - return V_0280A0_COLOR_8; - - /* 16-bit buffers. */ - case PIPE_FORMAT_B5G6R5_UNORM: - return V_0280A0_COLOR_5_6_5; - - case PIPE_FORMAT_B5G5R5A1_UNORM: - case PIPE_FORMAT_B5G5R5X1_UNORM: - return V_0280A0_COLOR_1_5_5_5; - - case PIPE_FORMAT_B4G4R4A4_UNORM: - case PIPE_FORMAT_B4G4R4X4_UNORM: - return V_0280A0_COLOR_4_4_4_4; - - case PIPE_FORMAT_Z16_UNORM: - return V_0280A0_COLOR_16; - - case PIPE_FORMAT_L8A8_UNORM: - case PIPE_FORMAT_L8A8_SRGB: - case PIPE_FORMAT_R8G8_UNORM: - return V_0280A0_COLOR_8_8; - - case PIPE_FORMAT_R16_UNORM: - return V_0280A0_COLOR_16; - - /* 32-bit buffers. */ - case PIPE_FORMAT_A8B8G8R8_SRGB: - case PIPE_FORMAT_A8B8G8R8_UNORM: - case PIPE_FORMAT_A8R8G8B8_UNORM: - case PIPE_FORMAT_B8G8R8A8_SRGB: - case PIPE_FORMAT_B8G8R8A8_UNORM: - case PIPE_FORMAT_B8G8R8X8_UNORM: - case PIPE_FORMAT_R8G8B8A8_SNORM: - case PIPE_FORMAT_R8G8B8A8_UNORM: - case PIPE_FORMAT_R8G8B8X8_UNORM: - case PIPE_FORMAT_R8SG8SB8UX8U_NORM: - case PIPE_FORMAT_X8B8G8R8_UNORM: - case PIPE_FORMAT_X8R8G8B8_UNORM: - case PIPE_FORMAT_R8G8B8_UNORM: - return V_0280A0_COLOR_8_8_8_8; - - case PIPE_FORMAT_R10G10B10A2_UNORM: - case PIPE_FORMAT_R10G10B10X2_SNORM: - case PIPE_FORMAT_B10G10R10A2_UNORM: - case PIPE_FORMAT_R10SG10SB10SA2U_NORM: - return V_0280A0_COLOR_2_10_10_10; - - case PIPE_FORMAT_Z24X8_UNORM: - case PIPE_FORMAT_Z24_UNORM_S8_USCALED: - return V_0280A0_COLOR_8_24; - - case PIPE_FORMAT_X8Z24_UNORM: - case PIPE_FORMAT_S8_USCALED_Z24_UNORM: - return V_0280A0_COLOR_24_8; - - case PIPE_FORMAT_R32_FLOAT: - return V_0280A0_COLOR_32_FLOAT; - - case PIPE_FORMAT_R16G16_FLOAT: - return V_0280A0_COLOR_16_16_FLOAT; - - case PIPE_FORMAT_R16G16_SSCALED: - case PIPE_FORMAT_R16G16_UNORM: - return V_0280A0_COLOR_16_16; - - case PIPE_FORMAT_R11G11B10_FLOAT: - return V_0280A0_COLOR_10_11_11_FLOAT; - - /* 64-bit buffers. */ - case PIPE_FORMAT_R16G16B16_USCALED: - case PIPE_FORMAT_R16G16B16A16_USCALED: - case PIPE_FORMAT_R16G16B16_SSCALED: - case PIPE_FORMAT_R16G16B16A16_SSCALED: - case PIPE_FORMAT_R16G16B16A16_UNORM: - case PIPE_FORMAT_R16G16B16A16_SNORM: - return V_0280A0_COLOR_16_16_16_16; - - case PIPE_FORMAT_R16G16B16_FLOAT: - case PIPE_FORMAT_R16G16B16A16_FLOAT: - return V_0280A0_COLOR_16_16_16_16_FLOAT; - - case PIPE_FORMAT_R32G32_FLOAT: - return V_0280A0_COLOR_32_32_FLOAT; - - case PIPE_FORMAT_R32G32_USCALED: - case PIPE_FORMAT_R32G32_SSCALED: - return V_0280A0_COLOR_32_32; - - /* 96-bit buffers. */ - case PIPE_FORMAT_R32G32B32_FLOAT: - return V_0280A0_COLOR_32_32_32_FLOAT; - - /* 128-bit buffers. */ - case PIPE_FORMAT_R32G32B32A32_FLOAT: - return V_0280A0_COLOR_32_32_32_32_FLOAT; - case PIPE_FORMAT_R32G32B32A32_SNORM: - case PIPE_FORMAT_R32G32B32A32_UNORM: - return V_0280A0_COLOR_32_32_32_32; - - /* YUV buffers. */ - case PIPE_FORMAT_UYVY: - case PIPE_FORMAT_YUYV: - default: - return ~0; /* Unsupported. */ - } -} - -static INLINE uint32_t r600_colorformat_endian_swap(uint32_t colorformat) -{ - if (R600_BIG_ENDIAN) { - switch(colorformat) { - case V_0280A0_COLOR_4_4: - return(ENDIAN_NONE); - - /* 8-bit buffers. */ - case V_0280A0_COLOR_8: - return(ENDIAN_NONE); - - /* 16-bit buffers. */ - case V_0280A0_COLOR_5_6_5: - case V_0280A0_COLOR_1_5_5_5: - case V_0280A0_COLOR_4_4_4_4: - case V_0280A0_COLOR_16: - case V_0280A0_COLOR_8_8: - return(ENDIAN_8IN16); - - /* 32-bit buffers. */ - case V_0280A0_COLOR_8_8_8_8: - case V_0280A0_COLOR_2_10_10_10: - case V_0280A0_COLOR_8_24: - case V_0280A0_COLOR_24_8: - case V_0280A0_COLOR_32_FLOAT: - case V_0280A0_COLOR_16_16_FLOAT: - case V_0280A0_COLOR_16_16: - return(ENDIAN_8IN32); - - /* 64-bit buffers. */ - case V_0280A0_COLOR_16_16_16_16: - case V_0280A0_COLOR_16_16_16_16_FLOAT: - return(ENDIAN_8IN16); - - case V_0280A0_COLOR_32_32_FLOAT: - case V_0280A0_COLOR_32_32: - return(ENDIAN_8IN32); - - /* 128-bit buffers. */ - case V_0280A0_COLOR_32_32_32_FLOAT: - case V_0280A0_COLOR_32_32_32_32_FLOAT: - case V_0280A0_COLOR_32_32_32_32: - return(ENDIAN_8IN32); - default: - return ENDIAN_NONE; /* Unsupported. */ - } - } else { - return ENDIAN_NONE; - } -} - -static INLINE boolean r600_is_sampler_format_supported(struct pipe_screen *screen, enum pipe_format format) -{ - return r600_translate_texformat(screen, format, NULL, NULL, NULL) != ~0; -} - -static INLINE boolean r600_is_colorbuffer_format_supported(enum pipe_format format) -{ - return r600_translate_colorformat(format) != ~0 && - r600_translate_colorswap(format) != ~0; -} - -static INLINE boolean r600_is_zs_format_supported(enum pipe_format format) -{ - return r600_translate_dbformat(format) != ~0; -} - -static INLINE boolean r600_is_vertex_format_supported(enum pipe_format format, - enum radeon_family family) -{ - unsigned i; - const struct util_format_description *desc = util_format_description(format); - if (!desc) - return FALSE; - - /* Find the first non-VOID channel. */ - for (i = 0; i < 4; i++) { - if (desc->channel[i].type != UTIL_FORMAT_TYPE_VOID) { - break; - } - } - if (i == 4) - return FALSE; - - /* No fixed, no double. */ - if (desc->layout != UTIL_FORMAT_LAYOUT_PLAIN || - desc->channel[i].type == UTIL_FORMAT_TYPE_FIXED || - (desc->channel[i].size == 64 && - desc->channel[i].type == UTIL_FORMAT_TYPE_FLOAT)) - return FALSE; - - /* No scaled/norm formats with 32 bits per channel. */ - if (desc->channel[i].size == 32 && - (desc->channel[i].type == UTIL_FORMAT_TYPE_SIGNED || - desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED)) - return FALSE; - - return TRUE; -} - -#endif diff --git a/src/gallium/drivers/r600/r600_texture.c b/src/gallium/drivers/r600/r600_texture.c index 470d26e2c9f..927eb5dafc9 100644 --- a/src/gallium/drivers/r600/r600_texture.c +++ b/src/gallium/drivers/r600/r600_texture.c @@ -35,7 +35,6 @@ #include "pipebuffer/pb_buffer.h" #include "r600_pipe.h" #include "r600_resource.h" -#include "r600_state_inlines.h" #include "r600d.h" #include "r600_formats.h" @@ -243,10 +242,11 @@ static void r600_setup_miptree(struct pipe_screen *screen, struct radeon *radeon = (struct radeon *)screen->winsys; enum chip_class chipc = r600_get_family_class(radeon); unsigned size, layer_size, i, offset; - unsigned nblocksx, nblocksy; + unsigned nblocksx, nblocksy, extra_size = 0; for (i = 0, offset = 0; i <= ptex->last_level; i++) { unsigned blocksize = util_format_get_blocksize(ptex->format); + unsigned base_align = r600_get_base_alignment(screen, ptex->format, array_mode); r600_texture_set_array_mode(screen, rtex, i, array_mode); @@ -265,9 +265,13 @@ static void r600_setup_miptree(struct pipe_screen *screen, else size = layer_size * ptex->array_size; + /* evergreen stores depth and stencil separately */ + if ((chipc >= EVERGREEN) && util_format_is_depth_or_stencil(ptex->format)) + extra_size = align(extra_size + (nblocksx * nblocksy * 1), base_align); + /* align base image and start of miptree */ if ((i == 0) || (i == 1)) - offset = align(offset, r600_get_base_alignment(screen, ptex->format, array_mode)); + offset = align(offset, base_align); rtex->offset[i] = offset; rtex->layer_size[i] = layer_size; rtex->pitch_in_blocks[i] = nblocksx; /* CB talks in elements */ @@ -275,7 +279,7 @@ static void r600_setup_miptree(struct pipe_screen *screen, offset += size; } - rtex->size = offset; + rtex->size = offset + extra_size; } /* Figure out whether u_blitter will fallback to a transfer operation. @@ -309,7 +313,14 @@ static boolean permit_hardware_blit(struct pipe_screen *screen, PIPE_BIND_SAMPLER_VIEW)) return FALSE; - return TRUE; + switch (res->usage) { + case PIPE_USAGE_STREAM: + case PIPE_USAGE_STAGING: + return FALSE; + + default: + return TRUE; + } } static boolean r600_texture_get_handle(struct pipe_screen* screen, @@ -678,7 +689,6 @@ void* r600_texture_transfer_map(struct pipe_context *ctx, enum pipe_format format = transfer->resource->format; struct radeon *radeon = (struct radeon *)ctx->screen->winsys; unsigned offset = 0; - unsigned usage = 0; char *map; if (rtransfer->staging_texture) { @@ -696,30 +706,7 @@ void* r600_texture_transfer_map(struct pipe_context *ctx, transfer->box.x / util_format_get_blockwidth(format) * util_format_get_blocksize(format); } - if (transfer->usage & PIPE_TRANSFER_WRITE) { - usage |= PB_USAGE_CPU_WRITE; - - if (transfer->usage & PIPE_TRANSFER_DISCARD) { - } - - if (transfer->usage & PIPE_TRANSFER_FLUSH_EXPLICIT) { - } - } - - if (transfer->usage & PIPE_TRANSFER_READ) { - usage |= PB_USAGE_CPU_READ; - } - - if (transfer->usage & PIPE_TRANSFER_DONTBLOCK) { - usage |= PB_USAGE_DONTBLOCK; - } - - if (transfer->usage & PIPE_TRANSFER_UNSYNCHRONIZED) { - usage |= PB_USAGE_UNSYNCHRONIZED; - } - - map = r600_bo_map(radeon, bo, usage, ctx); - if (!map) { + if (!(map = r600_bo_map(radeon, bo, transfer->usage, ctx))) { return NULL; } @@ -767,11 +754,7 @@ static unsigned r600_get_swizzle_combined(const unsigned char *swizzle_format, }; if (swizzle_view) { - /* Combine two sets of swizzles. */ - for (i = 0; i < 4; i++) { - swizzle[i] = swizzle_view[i] <= UTIL_FORMAT_SWIZZLE_W ? - swizzle_format[swizzle_view[i]] : swizzle_view[i]; - } + util_format_compose_swizzles(swizzle_format, swizzle_view, swizzle); } else { memcpy(swizzle, swizzle_format, 4); } @@ -847,6 +830,12 @@ uint32_t r600_translate_texformat(struct pipe_screen *screen, result = FMT_8; word4 |= S_038010_NUM_FORMAT_ALL(V_038010_SQ_NUM_FORMAT_INT); goto out_word4; + case PIPE_FORMAT_Z32_FLOAT: + result = FMT_32_FLOAT; + goto out_word4; + case PIPE_FORMAT_Z32_FLOAT_S8X24_USCALED: + result = FMT_X24_8_32_FLOAT; + goto out_word4; default: goto out_unknown; } diff --git a/src/gallium/drivers/r600/r600d.h b/src/gallium/drivers/r600/r600d.h index 6373572b65f..f6eec24cc05 100644 --- a/src/gallium/drivers/r600/r600d.h +++ b/src/gallium/drivers/r600/r600d.h @@ -2556,6 +2556,9 @@ #define S_009508_DISABLE_CUBE_WRAP(x) (((x) & 0x1) << 0) #define G_009508_DISABLE_CUBE_WRAP(x) (((x) >> 0) & 0x1) #define C_009508_DISABLE_CUBE_WRAP 0xFFFFFFFE +#define S_009508_DISABLE_CUBE_ANISO(x) (((x) & 0x1) << 1) +#define G_009508_DISABLE_CUBE_ANISO(x) (((x) >> 1) & 0x1) +#define C_009508_DISABLE_CUBE_ANISO (~(1 << 1)) #define S_009508_SYNC_GRADIENT(x) (((x) & 0x1) << 24) #define G_009508_SYNC_GRADIENT(x) (((x) >> 24) & 0x1) #define C_009508_SYNC_GRADIENT 0xFEFFFFFF diff --git a/src/gallium/drivers/softpipe/Makefile b/src/gallium/drivers/softpipe/Makefile index 28953582f0a..9403e6cf0b8 100644 --- a/src/gallium/drivers/softpipe/Makefile +++ b/src/gallium/drivers/softpipe/Makefile @@ -19,7 +19,7 @@ C_SOURCES = \ sp_quad_fs.c \ sp_quad_blend.c \ sp_screen.c \ - sp_setup.c \ + sp_setup.c \ sp_state_blend.c \ sp_state_clip.c \ sp_state_derived.c \ diff --git a/src/gallium/drivers/softpipe/sp_clear.c b/src/gallium/drivers/softpipe/sp_clear.c index ae3f00f3387..22e8a2e5817 100644 --- a/src/gallium/drivers/softpipe/sp_clear.c +++ b/src/gallium/drivers/softpipe/sp_clear.c @@ -60,7 +60,7 @@ softpipe_clear(struct pipe_context *pipe, unsigned buffers, const float *rgba, return; #if 0 - softpipe_update_derived(softpipe); /* not needed?? */ + softpipe_update_derived(softpipe, PIPE_PRIM_TRIANGLES); /* not needed?? */ #endif if (buffers & PIPE_CLEAR_COLOR) { diff --git a/src/gallium/drivers/softpipe/sp_context.c b/src/gallium/drivers/softpipe/sp_context.c index ce22f646228..c97b0333035 100644 --- a/src/gallium/drivers/softpipe/sp_context.c +++ b/src/gallium/drivers/softpipe/sp_context.c @@ -35,8 +35,11 @@ #include "pipe/p_defines.h" #include "util/u_math.h" #include "util/u_memory.h" +#include "util/u_pstipple.h" #include "util/u_inlines.h" #include "tgsi/tgsi_exec.h" +#include "vl/vl_decoder.h" +#include "vl/vl_video_buffer.h" #include "sp_clear.h" #include "sp_context.h" #include "sp_flush.h" @@ -88,6 +91,14 @@ softpipe_destroy( struct pipe_context *pipe ) struct softpipe_context *softpipe = softpipe_context( pipe ); uint i; +#if DO_PSTIPPLE_IN_HELPER_MODULE + if (softpipe->pstipple.sampler) + pipe->delete_sampler_state(pipe, softpipe->pstipple.sampler); + + pipe_resource_reference(&softpipe->pstipple.texture, NULL); + pipe_sampler_view_reference(&softpipe->pstipple.sampler_view, NULL); +#endif + if (softpipe->draw) draw_destroy( softpipe->draw ); @@ -258,6 +269,9 @@ softpipe_create_context( struct pipe_screen *screen, softpipe->pipe.flush = softpipe_flush_wrapped; softpipe->pipe.render_condition = softpipe_render_condition; + + softpipe->pipe.create_video_decoder = vl_create_decoder; + softpipe->pipe.create_video_buffer = vl_video_buffer_create; /* * Alloc caches for accessing drawing surfaces and textures. @@ -341,6 +355,11 @@ softpipe_create_context( struct pipe_screen *screen, sp_init_surface_functions(softpipe); +#if DO_PSTIPPLE_IN_HELPER_MODULE + /* create the polgon stipple sampler */ + softpipe->pstipple.sampler = util_pstipple_create_sampler(&softpipe->pipe); +#endif + return &softpipe->pipe; fail: diff --git a/src/gallium/drivers/softpipe/sp_context.h b/src/gallium/drivers/softpipe/sp_context.h index a572ee8cf00..410b0a65792 100644 --- a/src/gallium/drivers/softpipe/sp_context.h +++ b/src/gallium/drivers/softpipe/sp_context.h @@ -38,8 +38,11 @@ #include "sp_quad_pipe.h" -/** Do polygon stipple in the driver here, or in the draw module? */ -#define DO_PSTIPPLE_IN_DRAW_MODULE 1 +/** Do polygon stipple in the draw module? */ +#define DO_PSTIPPLE_IN_DRAW_MODULE 0 + +/** Do polygon stipple with the util module? */ +#define DO_PSTIPPLE_IN_HELPER_MODULE 1 struct softpipe_vbuf_render; @@ -64,6 +67,7 @@ struct softpipe_context { struct pipe_depth_stencil_alpha_state *depth_stencil; struct pipe_rasterizer_state *rasterizer; struct sp_fragment_shader *fs; + struct sp_fragment_shader_variant *fs_variant; struct sp_vertex_shader *vs; struct sp_geometry_shader *gs; struct sp_velems_state *velems; @@ -143,6 +147,13 @@ struct softpipe_context { struct pipe_query *render_cond_query; uint render_cond_mode; + /** Polygon stipple items */ + struct { + struct pipe_resource *texture; + struct pipe_sampler_state *sampler; + struct pipe_sampler_view *sampler_view; + } pstipple; + /** Software quad rendering pipeline */ struct { struct quad_stage *shade; diff --git a/src/gallium/drivers/softpipe/sp_draw_arrays.c b/src/gallium/drivers/softpipe/sp_draw_arrays.c index 01b4ca985d0..69b5b96b4fd 100644 --- a/src/gallium/drivers/softpipe/sp_draw_arrays.c +++ b/src/gallium/drivers/softpipe/sp_draw_arrays.c @@ -64,7 +64,7 @@ softpipe_draw_stream_output(struct pipe_context *pipe, unsigned mode) sp->reduced_api_prim = u_reduced_prim(mode); if (sp->dirty) { - softpipe_update_derived(sp); + softpipe_update_derived(sp, sp->reduced_api_prim); } softpipe_map_transfers(sp); @@ -122,7 +122,7 @@ softpipe_draw_vbo(struct pipe_context *pipe, sp->reduced_api_prim = u_reduced_prim(info->mode); if (sp->dirty) { - softpipe_update_derived(sp); + softpipe_update_derived(sp, sp->reduced_api_prim); } softpipe_map_transfers(sp); diff --git a/src/gallium/drivers/softpipe/sp_fs.h b/src/gallium/drivers/softpipe/sp_fs.h index 4792ace3a33..d46d7d5a657 100644 --- a/src/gallium/drivers/softpipe/sp_fs.h +++ b/src/gallium/drivers/softpipe/sp_fs.h @@ -31,17 +31,15 @@ #ifndef SP_FS_H #define SP_FS_H -struct sp_fragment_shader * -softpipe_create_fs_exec(struct softpipe_context *softpipe, - const struct pipe_shader_state *templ); -struct sp_fragment_shader * -softpipe_create_fs_sse(struct softpipe_context *softpipe, - const struct pipe_shader_state *templ); +struct sp_fragment_shader_variant * +softpipe_create_fs_variant_exec(struct softpipe_context *softpipe, + const struct pipe_shader_state *templ); + +struct sp_fragment_shader_variant * +softpipe_create_fs_variant_sse(struct softpipe_context *softpipe, + const struct pipe_shader_state *templ); -struct sp_fragment_shader * -softpipe_create_fs_llvm(struct softpipe_context *softpipe, - const struct pipe_shader_state *templ); struct tgsi_interp_coef; struct tgsi_exec_vector; diff --git a/src/gallium/drivers/softpipe/sp_fs_exec.c b/src/gallium/drivers/softpipe/sp_fs_exec.c index 346e1b402ba..779b8c4995c 100644 --- a/src/gallium/drivers/softpipe/sp_fs_exec.c +++ b/src/gallium/drivers/softpipe/sp_fs_exec.c @@ -42,25 +42,25 @@ /** - * Subclass of sp_fragment_shader + * Subclass of sp_fragment_shader_variant */ struct sp_exec_fragment_shader { - struct sp_fragment_shader base; + struct sp_fragment_shader_variant base; /* No other members for now */ }; /** cast wrapper */ static INLINE struct sp_exec_fragment_shader * -sp_exec_fragment_shader(const struct sp_fragment_shader *base) +sp_exec_fragment_shader(const struct sp_fragment_shader_variant *var) { - return (struct sp_exec_fragment_shader *) base; + return (struct sp_exec_fragment_shader *) var; } static void -exec_prepare( const struct sp_fragment_shader *base, +exec_prepare( const struct sp_fragment_shader_variant *var, struct tgsi_exec_machine *machine, struct tgsi_sampler **samplers ) { @@ -68,9 +68,9 @@ exec_prepare( const struct sp_fragment_shader *base, * Bind tokens/shader to the interpreter's machine state. * Avoid redundant binding. */ - if (machine->Tokens != base->shader.tokens) { + if (machine->Tokens != var->tokens) { tgsi_exec_machine_bind_shader( machine, - base->shader.tokens, + var->tokens, PIPE_MAX_SAMPLERS, samplers ); } @@ -118,7 +118,7 @@ setup_pos_vector(const struct tgsi_interp_coef *coef, * interface: */ static unsigned -exec_run( const struct sp_fragment_shader *base, +exec_run( const struct sp_fragment_shader_variant *var, struct tgsi_exec_machine *machine, struct quad_header *quad ) { @@ -136,9 +136,9 @@ exec_run( const struct sp_fragment_shader *base, /* store outputs */ { - const ubyte *sem_name = base->info.output_semantic_name; - const ubyte *sem_index = base->info.output_semantic_index; - const uint n = base->info.num_outputs; + const ubyte *sem_name = var->info.output_semantic_name; + const ubyte *sem_index = var->info.output_semantic_index; + const uint n = var->info.num_outputs; uint i; for (i = 0; i < n; i++) { switch (sem_name[i]) { @@ -180,29 +180,23 @@ exec_run( const struct sp_fragment_shader *base, static void -exec_delete( struct sp_fragment_shader *base ) +exec_delete( struct sp_fragment_shader_variant *var ) { - FREE((void *) base->shader.tokens); - FREE(base); + FREE( (void *) var->tokens ); + FREE(var); } -struct sp_fragment_shader * -softpipe_create_fs_exec(struct softpipe_context *softpipe, - const struct pipe_shader_state *templ) +struct sp_fragment_shader_variant * +softpipe_create_fs_variant_exec(struct softpipe_context *softpipe, + const struct pipe_shader_state *templ) { struct sp_exec_fragment_shader *shader; - /* Decide whether we'll be codegenerating this shader and if so do - * that now. - */ - shader = CALLOC_STRUCT(sp_exec_fragment_shader); if (!shader) return NULL; - /* we need to keep a local copy of the tokens */ - shader->base.shader.tokens = tgsi_dup_tokens(templ->tokens); shader->base.prepare = exec_prepare; shader->base.run = exec_run; shader->base.delete = exec_delete; diff --git a/src/gallium/drivers/softpipe/sp_fs_sse.c b/src/gallium/drivers/softpipe/sp_fs_sse.c index 5b18cd035e3..c873af125bd 100644 --- a/src/gallium/drivers/softpipe/sp_fs_sse.c +++ b/src/gallium/drivers/softpipe/sp_fs_sse.c @@ -48,11 +48,11 @@ /** - * Subclass of sp_fragment_shader + * Subclass of sp_fragment_shader_variant */ struct sp_sse_fragment_shader { - struct sp_fragment_shader base; + struct sp_fragment_shader_variant base; struct x86_function sse2_program; tgsi_sse2_fs_function func; float immediates[TGSI_EXEC_NUM_IMMEDIATES][4]; @@ -61,14 +61,14 @@ struct sp_sse_fragment_shader /** cast wrapper */ static INLINE struct sp_sse_fragment_shader * -sp_sse_fragment_shader(const struct sp_fragment_shader *base) +sp_sse_fragment_shader(const struct sp_fragment_shader_variant *base) { return (struct sp_sse_fragment_shader *) base; } static void -fs_sse_prepare( const struct sp_fragment_shader *base, +fs_sse_prepare( const struct sp_fragment_shader_variant *base, struct tgsi_exec_machine *machine, struct tgsi_sampler **samplers ) { @@ -119,7 +119,7 @@ setup_pos_vector(const struct tgsi_interp_coef *coef, * TODO: process >1 quad at a time */ static unsigned -fs_sse_run( const struct sp_fragment_shader *base, +fs_sse_run( const struct sp_fragment_shader_variant *base, struct tgsi_exec_machine *machine, struct quad_header *quad ) { @@ -189,7 +189,7 @@ fs_sse_run( const struct sp_fragment_shader *base, static void -fs_sse_delete( struct sp_fragment_shader *base ) +fs_sse_delete( struct sp_fragment_shader_variant *base ) { struct sp_sse_fragment_shader *shader = sp_sse_fragment_shader(base); @@ -198,9 +198,9 @@ fs_sse_delete( struct sp_fragment_shader *base ) } -struct sp_fragment_shader * -softpipe_create_fs_sse(struct softpipe_context *softpipe, - const struct pipe_shader_state *templ) +struct sp_fragment_shader_variant * +softpipe_create_fs_variant_sse(struct softpipe_context *softpipe, + const struct pipe_shader_state *templ) { struct sp_sse_fragment_shader *shader; @@ -226,7 +226,6 @@ softpipe_create_fs_sse(struct softpipe_context *softpipe, return NULL; } - shader->base.shader.tokens = NULL; /* don't hold reference to templ->tokens */ shader->base.prepare = fs_sse_prepare; shader->base.run = fs_sse_run; shader->base.delete = fs_sse_delete; @@ -239,9 +238,9 @@ softpipe_create_fs_sse(struct softpipe_context *softpipe, /* Maybe put this variant in the header file. */ -struct sp_fragment_shader * -softpipe_create_fs_sse(struct softpipe_context *softpipe, - const struct pipe_shader_state *templ) +struct sp_fragment_shader_variant * +softpipe_create_fs_variant_sse(struct softpipe_context *softpipe, + const struct pipe_shader_state *templ) { return NULL; } diff --git a/src/gallium/drivers/softpipe/sp_quad_blend.c b/src/gallium/drivers/softpipe/sp_quad_blend.c index 76cfc0bf51c..c881194768a 100644 --- a/src/gallium/drivers/softpipe/sp_quad_blend.c +++ b/src/gallium/drivers/softpipe/sp_quad_blend.c @@ -240,6 +240,7 @@ blend_quad(struct quad_stage *qs, static const float one[4] = { 1, 1, 1, 1 }; struct softpipe_context *softpipe = qs->softpipe; float source[4][QUAD_SIZE] = { { 0 } }; + float blend_dest[4][QUAD_SIZE]; /* * Compute src/first term RGB @@ -480,79 +481,85 @@ blend_quad(struct quad_stage *qs, assert(0 && "invalid alpha src factor"); } + /* Save the original dest for use in masking */ + VEC4_COPY(blend_dest[0], dest[0]); + VEC4_COPY(blend_dest[1], dest[1]); + VEC4_COPY(blend_dest[2], dest[2]); + VEC4_COPY(blend_dest[3], dest[3]); + /* - * Compute dest/second term RGB + * Compute blend_dest/second term RGB */ switch (softpipe->blend->rt[blend_index].rgb_dst_factor) { case PIPE_BLENDFACTOR_ONE: - /* dest = dest * 1 NO-OP, leave dest as-is */ + /* blend_dest = blend_dest * 1 NO-OP, leave blend_dest as-is */ break; case PIPE_BLENDFACTOR_SRC_COLOR: - VEC4_MUL(dest[0], dest[0], quadColor[0]); /* R */ - VEC4_MUL(dest[1], dest[1], quadColor[1]); /* G */ - VEC4_MUL(dest[2], dest[2], quadColor[2]); /* B */ + VEC4_MUL(blend_dest[0], blend_dest[0], quadColor[0]); /* R */ + VEC4_MUL(blend_dest[1], blend_dest[1], quadColor[1]); /* G */ + VEC4_MUL(blend_dest[2], blend_dest[2], quadColor[2]); /* B */ break; case PIPE_BLENDFACTOR_SRC_ALPHA: - VEC4_MUL(dest[0], dest[0], quadColor[3]); /* R * A */ - VEC4_MUL(dest[1], dest[1], quadColor[3]); /* G * A */ - VEC4_MUL(dest[2], dest[2], quadColor[3]); /* B * A */ + VEC4_MUL(blend_dest[0], blend_dest[0], quadColor[3]); /* R * A */ + VEC4_MUL(blend_dest[1], blend_dest[1], quadColor[3]); /* G * A */ + VEC4_MUL(blend_dest[2], blend_dest[2], quadColor[3]); /* B * A */ break; case PIPE_BLENDFACTOR_DST_ALPHA: if (has_dst_alpha) { - VEC4_MUL(dest[0], dest[0], dest[3]); /* R * A */ - VEC4_MUL(dest[1], dest[1], dest[3]); /* G * A */ - VEC4_MUL(dest[2], dest[2], dest[3]); /* B * A */ + VEC4_MUL(blend_dest[0], blend_dest[0], blend_dest[3]); /* R * A */ + VEC4_MUL(blend_dest[1], blend_dest[1], blend_dest[3]); /* G * A */ + VEC4_MUL(blend_dest[2], blend_dest[2], blend_dest[3]); /* B * A */ } else { - /* dest = dest * 1 NO-OP, leave dest as-is */ + /* blend_dest = blend_dest * 1 NO-OP, leave blend_dest as-is */ } break; case PIPE_BLENDFACTOR_DST_COLOR: - VEC4_MUL(dest[0], dest[0], dest[0]); /* R */ - VEC4_MUL(dest[1], dest[1], dest[1]); /* G */ - VEC4_MUL(dest[2], dest[2], dest[2]); /* B */ + VEC4_MUL(blend_dest[0], blend_dest[0], blend_dest[0]); /* R */ + VEC4_MUL(blend_dest[1], blend_dest[1], blend_dest[1]); /* G */ + VEC4_MUL(blend_dest[2], blend_dest[2], blend_dest[2]); /* B */ break; case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: if (has_dst_alpha) { const float *alpha = quadColor[3]; float diff[4], temp[4]; - VEC4_SUB(diff, one, dest[3]); + VEC4_SUB(diff, one, blend_dest[3]); VEC4_MIN(temp, alpha, diff); - VEC4_MUL(dest[0], quadColor[0], temp); /* R */ - VEC4_MUL(dest[1], quadColor[1], temp); /* G */ - VEC4_MUL(dest[2], quadColor[2], temp); /* B */ + VEC4_MUL(blend_dest[0], quadColor[0], temp); /* R */ + VEC4_MUL(blend_dest[1], quadColor[1], temp); /* G */ + VEC4_MUL(blend_dest[2], quadColor[2], temp); /* B */ } else { - VEC4_COPY(dest[0], zero); /* R */ - VEC4_COPY(dest[1], zero); /* G */ - VEC4_COPY(dest[2], zero); /* B */ + VEC4_COPY(blend_dest[0], zero); /* R */ + VEC4_COPY(blend_dest[1], zero); /* G */ + VEC4_COPY(blend_dest[2], zero); /* B */ } break; case PIPE_BLENDFACTOR_CONST_COLOR: { float comp[4]; VEC4_SCALAR(comp, softpipe->blend_color.color[0]); /* R */ - VEC4_MUL(dest[0], dest[0], comp); /* R */ + VEC4_MUL(blend_dest[0], blend_dest[0], comp); /* R */ VEC4_SCALAR(comp, softpipe->blend_color.color[1]); /* G */ - VEC4_MUL(dest[1], dest[1], comp); /* G */ + VEC4_MUL(blend_dest[1], blend_dest[1], comp); /* G */ VEC4_SCALAR(comp, softpipe->blend_color.color[2]); /* B */ - VEC4_MUL(dest[2], dest[2], comp); /* B */ + VEC4_MUL(blend_dest[2], blend_dest[2], comp); /* B */ } break; case PIPE_BLENDFACTOR_CONST_ALPHA: { float comp[4]; VEC4_SCALAR(comp, softpipe->blend_color.color[3]); /* A */ - VEC4_MUL(dest[0], dest[0], comp); /* R */ - VEC4_MUL(dest[1], dest[1], comp); /* G */ - VEC4_MUL(dest[2], dest[2], comp); /* B */ + VEC4_MUL(blend_dest[0], blend_dest[0], comp); /* R */ + VEC4_MUL(blend_dest[1], blend_dest[1], comp); /* G */ + VEC4_MUL(blend_dest[2], blend_dest[2], comp); /* B */ } break; case PIPE_BLENDFACTOR_ZERO: - VEC4_COPY(dest[0], zero); /* R */ - VEC4_COPY(dest[1], zero); /* G */ - VEC4_COPY(dest[2], zero); /* B */ + VEC4_COPY(blend_dest[0], zero); /* R */ + VEC4_COPY(blend_dest[1], zero); /* G */ + VEC4_COPY(blend_dest[2], zero); /* B */ break; case PIPE_BLENDFACTOR_SRC1_COLOR: case PIPE_BLENDFACTOR_SRC1_ALPHA: @@ -563,45 +570,45 @@ blend_quad(struct quad_stage *qs, { float inv_comp[4]; VEC4_SUB(inv_comp, one, quadColor[0]); /* R */ - VEC4_MUL(dest[0], inv_comp, dest[0]); /* R */ + VEC4_MUL(blend_dest[0], inv_comp, blend_dest[0]); /* R */ VEC4_SUB(inv_comp, one, quadColor[1]); /* G */ - VEC4_MUL(dest[1], inv_comp, dest[1]); /* G */ + VEC4_MUL(blend_dest[1], inv_comp, blend_dest[1]); /* G */ VEC4_SUB(inv_comp, one, quadColor[2]); /* B */ - VEC4_MUL(dest[2], inv_comp, dest[2]); /* B */ + VEC4_MUL(blend_dest[2], inv_comp, blend_dest[2]); /* B */ } break; case PIPE_BLENDFACTOR_INV_SRC_ALPHA: { float one_minus_alpha[QUAD_SIZE]; VEC4_SUB(one_minus_alpha, one, quadColor[3]); - VEC4_MUL(dest[0], dest[0], one_minus_alpha); /* R */ - VEC4_MUL(dest[1], dest[1], one_minus_alpha); /* G */ - VEC4_MUL(dest[2], dest[2], one_minus_alpha); /* B */ + VEC4_MUL(blend_dest[0], blend_dest[0], one_minus_alpha); /* R */ + VEC4_MUL(blend_dest[1], blend_dest[1], one_minus_alpha); /* G */ + VEC4_MUL(blend_dest[2], blend_dest[2], one_minus_alpha); /* B */ } break; case PIPE_BLENDFACTOR_INV_DST_ALPHA: if (has_dst_alpha) { float inv_comp[4]; - VEC4_SUB(inv_comp, one, dest[3]); /* A */ - VEC4_MUL(dest[0], inv_comp, dest[0]); /* R */ - VEC4_MUL(dest[1], inv_comp, dest[1]); /* G */ - VEC4_MUL(dest[2], inv_comp, dest[2]); /* B */ + VEC4_SUB(inv_comp, one, blend_dest[3]); /* A */ + VEC4_MUL(blend_dest[0], inv_comp, blend_dest[0]); /* R */ + VEC4_MUL(blend_dest[1], inv_comp, blend_dest[1]); /* G */ + VEC4_MUL(blend_dest[2], inv_comp, blend_dest[2]); /* B */ } else { - VEC4_COPY(dest[0], zero); /* R */ - VEC4_COPY(dest[1], zero); /* G */ - VEC4_COPY(dest[2], zero); /* B */ + VEC4_COPY(blend_dest[0], zero); /* R */ + VEC4_COPY(blend_dest[1], zero); /* G */ + VEC4_COPY(blend_dest[2], zero); /* B */ } break; case PIPE_BLENDFACTOR_INV_DST_COLOR: { float inv_comp[4]; - VEC4_SUB(inv_comp, one, dest[0]); /* R */ - VEC4_MUL(dest[0], dest[0], inv_comp); /* R */ - VEC4_SUB(inv_comp, one, dest[1]); /* G */ - VEC4_MUL(dest[1], dest[1], inv_comp); /* G */ - VEC4_SUB(inv_comp, one, dest[2]); /* B */ - VEC4_MUL(dest[2], dest[2], inv_comp); /* B */ + VEC4_SUB(inv_comp, one, blend_dest[0]); /* R */ + VEC4_MUL(blend_dest[0], blend_dest[0], inv_comp); /* R */ + VEC4_SUB(inv_comp, one, blend_dest[1]); /* G */ + VEC4_MUL(blend_dest[1], blend_dest[1], inv_comp); /* G */ + VEC4_SUB(inv_comp, one, blend_dest[2]); /* B */ + VEC4_MUL(blend_dest[2], blend_dest[2], inv_comp); /* B */ } break; case PIPE_BLENDFACTOR_INV_CONST_COLOR: @@ -609,22 +616,22 @@ blend_quad(struct quad_stage *qs, float inv_comp[4]; /* R */ VEC4_SCALAR(inv_comp, 1.0f - softpipe->blend_color.color[0]); - VEC4_MUL(dest[0], dest[0], inv_comp); + VEC4_MUL(blend_dest[0], blend_dest[0], inv_comp); /* G */ VEC4_SCALAR(inv_comp, 1.0f - softpipe->blend_color.color[1]); - VEC4_MUL(dest[1], dest[1], inv_comp); + VEC4_MUL(blend_dest[1], blend_dest[1], inv_comp); /* B */ VEC4_SCALAR(inv_comp, 1.0f - softpipe->blend_color.color[2]); - VEC4_MUL(dest[2], dest[2], inv_comp); + VEC4_MUL(blend_dest[2], blend_dest[2], inv_comp); } break; case PIPE_BLENDFACTOR_INV_CONST_ALPHA: { float inv_comp[4]; VEC4_SCALAR(inv_comp, 1.0f - softpipe->blend_color.color[3]); - VEC4_MUL(dest[0], dest[0], inv_comp); - VEC4_MUL(dest[1], dest[1], inv_comp); - VEC4_MUL(dest[2], dest[2], inv_comp); + VEC4_MUL(blend_dest[0], blend_dest[0], inv_comp); + VEC4_MUL(blend_dest[1], blend_dest[1], inv_comp); + VEC4_MUL(blend_dest[2], blend_dest[2], inv_comp); } break; case PIPE_BLENDFACTOR_INV_SRC1_COLOR: @@ -637,29 +644,29 @@ blend_quad(struct quad_stage *qs, } /* - * Compute dest/second term A + * Compute blend_dest/second term A */ switch (softpipe->blend->rt[blend_index].alpha_dst_factor) { case PIPE_BLENDFACTOR_ONE: - /* dest = dest * 1 NO-OP, leave dest as-is */ + /* blend_dest = blend_dest * 1 NO-OP, leave blend_dest as-is */ break; case PIPE_BLENDFACTOR_SRC_COLOR: /* fall-through */ case PIPE_BLENDFACTOR_SRC_ALPHA: - VEC4_MUL(dest[3], dest[3], quadColor[3]); /* A * A */ + VEC4_MUL(blend_dest[3], blend_dest[3], quadColor[3]); /* A * A */ break; case PIPE_BLENDFACTOR_DST_COLOR: /* fall-through */ case PIPE_BLENDFACTOR_DST_ALPHA: if (has_dst_alpha) { - VEC4_MUL(dest[3], dest[3], dest[3]); /* A */ + VEC4_MUL(blend_dest[3], blend_dest[3], blend_dest[3]); /* A */ } else { - /* dest = dest * 1 NO-OP, leave dest as-is */ + /* blend_dest = blend_dest * 1 NO-OP, leave blend_dest as-is */ } break; case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: - /* dest = dest * 1 NO-OP, leave dest as-is */ + /* blend_dest = blend_dest * 1 NO-OP, leave blend_dest as-is */ break; case PIPE_BLENDFACTOR_CONST_COLOR: /* fall-through */ @@ -667,11 +674,11 @@ blend_quad(struct quad_stage *qs, { float comp[4]; VEC4_SCALAR(comp, softpipe->blend_color.color[3]); /* A */ - VEC4_MUL(dest[3], dest[3], comp); /* A */ + VEC4_MUL(blend_dest[3], blend_dest[3], comp); /* A */ } break; case PIPE_BLENDFACTOR_ZERO: - VEC4_COPY(dest[3], zero); /* A */ + VEC4_COPY(blend_dest[3], zero); /* A */ break; case PIPE_BLENDFACTOR_INV_SRC_COLOR: /* fall-through */ @@ -679,7 +686,7 @@ blend_quad(struct quad_stage *qs, { float one_minus_alpha[QUAD_SIZE]; VEC4_SUB(one_minus_alpha, one, quadColor[3]); - VEC4_MUL(dest[3], dest[3], one_minus_alpha); /* A */ + VEC4_MUL(blend_dest[3], blend_dest[3], one_minus_alpha); /* A */ } break; case PIPE_BLENDFACTOR_INV_DST_COLOR: @@ -687,11 +694,11 @@ blend_quad(struct quad_stage *qs, case PIPE_BLENDFACTOR_INV_DST_ALPHA: if (has_dst_alpha) { float inv_comp[4]; - VEC4_SUB(inv_comp, one, dest[3]); /* A */ - VEC4_MUL(dest[3], inv_comp, dest[3]); /* A */ + VEC4_SUB(inv_comp, one, blend_dest[3]); /* A */ + VEC4_MUL(blend_dest[3], inv_comp, blend_dest[3]); /* A */ } else { - VEC4_COPY(dest[3], zero); /* A */ + VEC4_COPY(blend_dest[3], zero); /* A */ } break; case PIPE_BLENDFACTOR_INV_CONST_COLOR: @@ -700,7 +707,7 @@ blend_quad(struct quad_stage *qs, { float inv_comp[4]; VEC4_SCALAR(inv_comp, 1.0f - softpipe->blend_color.color[3]); - VEC4_MUL(dest[3], dest[3], inv_comp); + VEC4_MUL(blend_dest[3], blend_dest[3], inv_comp); } break; default: @@ -712,29 +719,29 @@ blend_quad(struct quad_stage *qs, */ switch (softpipe->blend->rt[blend_index].rgb_func) { case PIPE_BLEND_ADD: - VEC4_ADD_SAT(quadColor[0], source[0], dest[0]); /* R */ - VEC4_ADD_SAT(quadColor[1], source[1], dest[1]); /* G */ - VEC4_ADD_SAT(quadColor[2], source[2], dest[2]); /* B */ + VEC4_ADD_SAT(quadColor[0], source[0], blend_dest[0]); /* R */ + VEC4_ADD_SAT(quadColor[1], source[1], blend_dest[1]); /* G */ + VEC4_ADD_SAT(quadColor[2], source[2], blend_dest[2]); /* B */ break; case PIPE_BLEND_SUBTRACT: - VEC4_SUB_SAT(quadColor[0], source[0], dest[0]); /* R */ - VEC4_SUB_SAT(quadColor[1], source[1], dest[1]); /* G */ - VEC4_SUB_SAT(quadColor[2], source[2], dest[2]); /* B */ + VEC4_SUB_SAT(quadColor[0], source[0], blend_dest[0]); /* R */ + VEC4_SUB_SAT(quadColor[1], source[1], blend_dest[1]); /* G */ + VEC4_SUB_SAT(quadColor[2], source[2], blend_dest[2]); /* B */ break; case PIPE_BLEND_REVERSE_SUBTRACT: - VEC4_SUB_SAT(quadColor[0], dest[0], source[0]); /* R */ - VEC4_SUB_SAT(quadColor[1], dest[1], source[1]); /* G */ - VEC4_SUB_SAT(quadColor[2], dest[2], source[2]); /* B */ + VEC4_SUB_SAT(quadColor[0], blend_dest[0], source[0]); /* R */ + VEC4_SUB_SAT(quadColor[1], blend_dest[1], source[1]); /* G */ + VEC4_SUB_SAT(quadColor[2], blend_dest[2], source[2]); /* B */ break; case PIPE_BLEND_MIN: - VEC4_MIN(quadColor[0], source[0], dest[0]); /* R */ - VEC4_MIN(quadColor[1], source[1], dest[1]); /* G */ - VEC4_MIN(quadColor[2], source[2], dest[2]); /* B */ + VEC4_MIN(quadColor[0], source[0], blend_dest[0]); /* R */ + VEC4_MIN(quadColor[1], source[1], blend_dest[1]); /* G */ + VEC4_MIN(quadColor[2], source[2], blend_dest[2]); /* B */ break; case PIPE_BLEND_MAX: - VEC4_MAX(quadColor[0], source[0], dest[0]); /* R */ - VEC4_MAX(quadColor[1], source[1], dest[1]); /* G */ - VEC4_MAX(quadColor[2], source[2], dest[2]); /* B */ + VEC4_MAX(quadColor[0], source[0], blend_dest[0]); /* R */ + VEC4_MAX(quadColor[1], source[1], blend_dest[1]); /* G */ + VEC4_MAX(quadColor[2], source[2], blend_dest[2]); /* B */ break; default: assert(0 && "invalid rgb blend func"); @@ -745,19 +752,19 @@ blend_quad(struct quad_stage *qs, */ switch (softpipe->blend->rt[blend_index].alpha_func) { case PIPE_BLEND_ADD: - VEC4_ADD_SAT(quadColor[3], source[3], dest[3]); /* A */ + VEC4_ADD_SAT(quadColor[3], source[3], blend_dest[3]); /* A */ break; case PIPE_BLEND_SUBTRACT: - VEC4_SUB_SAT(quadColor[3], source[3], dest[3]); /* A */ + VEC4_SUB_SAT(quadColor[3], source[3], blend_dest[3]); /* A */ break; case PIPE_BLEND_REVERSE_SUBTRACT: - VEC4_SUB_SAT(quadColor[3], dest[3], source[3]); /* A */ + VEC4_SUB_SAT(quadColor[3], blend_dest[3], source[3]); /* A */ break; case PIPE_BLEND_MIN: - VEC4_MIN(quadColor[3], source[3], dest[3]); /* A */ + VEC4_MIN(quadColor[3], source[3], blend_dest[3]); /* A */ break; case PIPE_BLEND_MAX: - VEC4_MAX(quadColor[3], source[3], dest[3]); /* A */ + VEC4_MAX(quadColor[3], source[3], blend_dest[3]); /* A */ break; default: assert(0 && "invalid alpha blend func"); @@ -797,7 +804,7 @@ blend_fallback(struct quad_stage *qs, unsigned cbuf; boolean write_all; - write_all = softpipe->fs->color0_writes_all_cbufs; + write_all = softpipe->fs_variant->info.color0_writes_all_cbufs; for (cbuf = 0; cbuf < softpipe->framebuffer.nr_cbufs; cbuf++) { @@ -810,17 +817,25 @@ blend_fallback(struct quad_stage *qs, quads[0]->input.y0); boolean has_dst_alpha = util_format_has_alpha(softpipe->framebuffer.cbufs[cbuf]->format); - uint q, i, j, qbuf; - - qbuf = write_all ? 0 : cbuf; + uint q, i, j; for (q = 0; q < nr; q++) { struct quad_header *quad = quads[q]; float (*quadColor)[4]; + float temp_quad_color[QUAD_SIZE][4]; const int itx = (quad->input.x0 & (TILE_SIZE-1)); const int ity = (quad->input.y0 & (TILE_SIZE-1)); - quadColor = quad->output.color[qbuf]; + if (write_all) { + for (j = 0; j < QUAD_SIZE; j++) { + for (i = 0; i < 4; i++) { + temp_quad_color[i][j] = quad->output.color[0][i][j]; + } + } + quadColor = temp_quad_color; + } else { + quadColor = quad->output.color[cbuf]; + } /* get/swizzle dest colors */ diff --git a/src/gallium/drivers/softpipe/sp_quad_depth_test.c b/src/gallium/drivers/softpipe/sp_quad_depth_test.c index 89b2a91fc1f..a349f0d1f3c 100644 --- a/src/gallium/drivers/softpipe/sp_quad_depth_test.c +++ b/src/gallium/drivers/softpipe/sp_quad_depth_test.c @@ -189,7 +189,8 @@ convert_quad_depth( struct depth_data *data, /** - * Compute the depth_data::shader_stencil_refs[] values from the float fragment stencil values. + * Compute the depth_data::shader_stencil_refs[] values from the float + * fragment stencil values. */ static void convert_quad_stencil( struct depth_data *data, @@ -205,10 +206,9 @@ convert_quad_stencil( struct depth_data *data, case PIPE_FORMAT_Z24_UNORM_S8_USCALED: case PIPE_FORMAT_X8Z24_UNORM: case PIPE_FORMAT_S8_USCALED_Z24_UNORM: - { - for (j = 0; j < QUAD_SIZE; j++) { - data->shader_stencil_refs[j] = ((unsigned)(quad->output.stencil[j])); - } + case PIPE_FORMAT_S8_USCALED: + for (j = 0; j < QUAD_SIZE; j++) { + data->shader_stencil_refs[j] = ((unsigned)(quad->output.stencil[j])); } break; default: @@ -216,6 +216,7 @@ convert_quad_stencil( struct depth_data *data, } } + /** * Write data->bzzzz[] values and data->stencilVals into the Z/stencil buffer. */ @@ -726,9 +727,9 @@ depth_test_quads_fallback(struct quad_stage *qs, unsigned nr) { unsigned i, pass = 0; - const struct sp_fragment_shader *fs = qs->softpipe->fs; - boolean interp_depth = !fs->info.writes_z; - boolean shader_stencil_ref = fs->info.writes_stencil; + const struct tgsi_shader_info *fsInfo = &qs->softpipe->fs_variant->info; + boolean interp_depth = !fsInfo->writes_z; + boolean shader_stencil_ref = fsInfo->writes_stencil; struct depth_data data; data.use_shader_stencil_refs = FALSE; @@ -837,7 +838,9 @@ choose_depth_test(struct quad_stage *qs, struct quad_header *quads[], unsigned nr) { - boolean interp_depth = !qs->softpipe->fs->info.writes_z; + const struct tgsi_shader_info *fsInfo = &qs->softpipe->fs_variant->info; + + boolean interp_depth = !fsInfo->writes_z; boolean alpha = qs->softpipe->depth_stencil->alpha.enabled; diff --git a/src/gallium/drivers/softpipe/sp_quad_fs.c b/src/gallium/drivers/softpipe/sp_quad_fs.c index 90f4787d599..d74d6d4914e 100644 --- a/src/gallium/drivers/softpipe/sp_quad_fs.c +++ b/src/gallium/drivers/softpipe/sp_quad_fs.c @@ -74,7 +74,7 @@ shade_quad(struct quad_stage *qs, struct quad_header *quad) struct tgsi_exec_machine *machine = softpipe->fs_machine; /* run shader */ - return softpipe->fs->run( softpipe->fs, machine, quad ); + return softpipe->fs_variant->run( softpipe->fs_variant, machine, quad ); } @@ -140,10 +140,10 @@ shade_begin(struct quad_stage *qs) { struct softpipe_context *softpipe = qs->softpipe; - softpipe->fs->prepare( softpipe->fs, - softpipe->fs_machine, - (struct tgsi_sampler **) - softpipe->tgsi.frag_samplers_list ); + softpipe->fs_variant->prepare( softpipe->fs_variant, + softpipe->fs_machine, + (struct tgsi_sampler **) + softpipe->tgsi.frag_samplers_list ); qs->next->begin(qs->next); } diff --git a/src/gallium/drivers/softpipe/sp_quad_pipe.c b/src/gallium/drivers/softpipe/sp_quad_pipe.c index 2cfd02a22c6..0c4506ae8f4 100644 --- a/src/gallium/drivers/softpipe/sp_quad_pipe.c +++ b/src/gallium/drivers/softpipe/sp_quad_pipe.c @@ -30,9 +30,9 @@ #include "sp_state.h" #include "pipe/p_shader_tokens.h" + static void -sp_push_quad_first( struct softpipe_context *sp, - struct quad_stage *quad ) +insert_stage_at_head(struct softpipe_context *sp, struct quad_stage *quad) { quad->next = sp->quad.first; sp->quad.first = quad; @@ -46,24 +46,24 @@ sp_build_quad_pipeline(struct softpipe_context *sp) sp->depth_stencil->depth.enabled && sp->framebuffer.zsbuf && !sp->depth_stencil->alpha.enabled && - !sp->fs->info.uses_kill && - !sp->fs->info.writes_z && - !sp->fs->info.writes_stencil; + !sp->fs_variant->info.uses_kill && + !sp->fs_variant->info.writes_z && + !sp->fs_variant->info.writes_stencil; sp->quad.first = sp->quad.blend; if (early_depth_test) { - sp_push_quad_first( sp, sp->quad.shade ); - sp_push_quad_first( sp, sp->quad.depth_test ); + insert_stage_at_head( sp, sp->quad.shade ); + insert_stage_at_head( sp, sp->quad.depth_test ); } else { - sp_push_quad_first( sp, sp->quad.depth_test ); - sp_push_quad_first( sp, sp->quad.shade ); + insert_stage_at_head( sp, sp->quad.depth_test ); + insert_stage_at_head( sp, sp->quad.shade ); } -#if !DO_PSTIPPLE_IN_DRAW_MODULE +#if !DO_PSTIPPLE_IN_DRAW_MODULE && !DO_PSTIPPLE_IN_HELPER_MODULE if (sp->rasterizer->poly_stipple_enable) - sp_push_quad_first( sp, sp->quad.pstipple ); + insert_stage_at_head( sp, sp->quad.pstipple ); #endif } diff --git a/src/gallium/drivers/softpipe/sp_screen.c b/src/gallium/drivers/softpipe/sp_screen.c index 30f53a9e674..1e58d27be88 100644 --- a/src/gallium/drivers/softpipe/sp_screen.c +++ b/src/gallium/drivers/softpipe/sp_screen.c @@ -1,8 +1,8 @@ /************************************************************************** - * + * * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. * All Rights Reserved. - * + * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the * "Software"), to deal in the Software without restriction, including @@ -10,11 +10,11 @@ * distribute, sub license, and/or sell copies of the Software, and to * permit persons to whom the Software is furnished to do so, subject to * the following conditions: - * + * * The above copyright notice and this permission notice (including the * next paragraph) shall be included in all copies or substantial portions * of the Software. - * + * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. @@ -22,16 +22,19 @@ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * + * **************************************************************************/ #include "util/u_memory.h" #include "util/u_format.h" #include "util/u_format_s3tc.h" +#include "util/u_video.h" #include "pipe/p_defines.h" #include "pipe/p_screen.h" #include "draw/draw_context.h" +#include "vl/vl_decoder.h" +#include "vl/vl_video_buffer.h" #include "state_tracker/sw_winsys.h" #include "tgsi/tgsi_exec.h" @@ -169,6 +172,23 @@ softpipe_get_paramf(struct pipe_screen *screen, enum pipe_cap param) } } +static int +softpipe_get_video_param(struct pipe_screen *screen, + enum pipe_video_profile profile, + enum pipe_video_cap param) +{ + switch (param) { + case PIPE_VIDEO_CAP_SUPPORTED: + return vl_profile_supported(screen, profile); + case PIPE_VIDEO_CAP_NPOT_TEXTURES: + return 0; + case PIPE_VIDEO_CAP_MAX_WIDTH: + case PIPE_VIDEO_CAP_MAX_HEIGHT: + return vl_video_buffer_max_size(screen); + default: + return 0; + } +} /** * Query format support for creating a texture, drawing surface, etc. @@ -307,7 +327,9 @@ softpipe_create_screen(struct sw_winsys *winsys) screen->base.get_param = softpipe_get_param; screen->base.get_shader_param = softpipe_get_shader_param; screen->base.get_paramf = softpipe_get_paramf; + screen->base.get_video_param = softpipe_get_video_param; screen->base.is_format_supported = softpipe_is_format_supported; + screen->base.is_video_format_supported = vl_video_buffer_is_format_supported; screen->base.context_create = softpipe_create_context; screen->base.flush_frontbuffer = softpipe_flush_frontbuffer; diff --git a/src/gallium/drivers/softpipe/sp_setup.c b/src/gallium/drivers/softpipe/sp_setup.c index 0ce28f4c6ee..656d001809f 100644 --- a/src/gallium/drivers/softpipe/sp_setup.c +++ b/src/gallium/drivers/softpipe/sp_setup.c @@ -568,17 +568,18 @@ tri_persp_coeff(struct setup_context *setup, static void setup_fragcoord_coeff(struct setup_context *setup, uint slot) { - struct sp_fragment_shader* spfs = setup->softpipe->fs; + const struct tgsi_shader_info *fsInfo = &setup->softpipe->fs_variant->info; + /*X*/ - setup->coef[slot].a0[0] = spfs->pixel_center_integer ? 0.0 : 0.5; + setup->coef[slot].a0[0] = fsInfo->pixel_center_integer ? 0.0 : 0.5; setup->coef[slot].dadx[0] = 1.0; setup->coef[slot].dady[0] = 0.0; /*Y*/ setup->coef[slot].a0[1] = - (spfs->origin_lower_left ? setup->softpipe->framebuffer.height-1 : 0) - + (spfs->pixel_center_integer ? 0.0 : 0.5); + (fsInfo->origin_lower_left ? setup->softpipe->framebuffer.height-1 : 0) + + (fsInfo->pixel_center_integer ? 0.0 : 0.5); setup->coef[slot].dadx[1] = 0.0; - setup->coef[slot].dady[1] = spfs->origin_lower_left ? -1.0 : 1.0; + setup->coef[slot].dady[1] = fsInfo->origin_lower_left ? -1.0 : 1.0; /*Z*/ setup->coef[slot].a0[2] = setup->posCoef.a0[2]; setup->coef[slot].dadx[2] = setup->posCoef.dadx[2]; @@ -599,7 +600,7 @@ static void setup_tri_coefficients(struct setup_context *setup) { struct softpipe_context *softpipe = setup->softpipe; - const struct sp_fragment_shader *spfs = softpipe->fs; + const struct tgsi_shader_info *fsInfo = &setup->softpipe->fs_variant->info; const struct vertex_info *vinfo = softpipe_get_vertex_info(softpipe); uint fragSlot; float v[3]; @@ -618,7 +619,7 @@ setup_tri_coefficients(struct setup_context *setup) /* setup interpolation for all the remaining attributes: */ - for (fragSlot = 0; fragSlot < spfs->info.num_inputs; fragSlot++) { + for (fragSlot = 0; fragSlot < fsInfo->num_inputs; fragSlot++) { const uint vertSlot = vinfo->attrib[fragSlot].src_index; uint j; @@ -632,7 +633,7 @@ setup_tri_coefficients(struct setup_context *setup) tri_apply_cylindrical_wrap(setup->vmin[vertSlot][j], setup->vmid[vertSlot][j], setup->vmax[vertSlot][j], - spfs->info.input_cylindrical_wrap[fragSlot] & (1 << j), + fsInfo->input_cylindrical_wrap[fragSlot] & (1 << j), v); tri_linear_coeff(setup, &setup->coef[fragSlot], j, v); } @@ -642,7 +643,7 @@ setup_tri_coefficients(struct setup_context *setup) tri_apply_cylindrical_wrap(setup->vmin[vertSlot][j], setup->vmid[vertSlot][j], setup->vmax[vertSlot][j], - spfs->info.input_cylindrical_wrap[fragSlot] & (1 << j), + fsInfo->input_cylindrical_wrap[fragSlot] & (1 << j), v); tri_persp_coeff(setup, &setup->coef[fragSlot], j, v); } @@ -654,7 +655,7 @@ setup_tri_coefficients(struct setup_context *setup) assert(0); } - if (spfs->info.input_semantic_name[fragSlot] == TGSI_SEMANTIC_FACE) { + if (fsInfo->input_semantic_name[fragSlot] == TGSI_SEMANTIC_FACE) { /* convert 0 to 1.0 and 1 to -1.0 */ setup->coef[fragSlot].a0[0] = setup->facing * -2.0f + 1.0f; setup->coef[fragSlot].dadx[0] = 0.0; @@ -939,7 +940,7 @@ setup_line_coefficients(struct setup_context *setup, const float (*v1)[4]) { struct softpipe_context *softpipe = setup->softpipe; - const struct sp_fragment_shader *spfs = softpipe->fs; + const struct tgsi_shader_info *fsInfo = &setup->softpipe->fs_variant->info; const struct vertex_info *vinfo = softpipe_get_vertex_info(softpipe); uint fragSlot; float area; @@ -974,7 +975,7 @@ setup_line_coefficients(struct setup_context *setup, /* setup interpolation for all the remaining attributes: */ - for (fragSlot = 0; fragSlot < spfs->info.num_inputs; fragSlot++) { + for (fragSlot = 0; fragSlot < fsInfo->num_inputs; fragSlot++) { const uint vertSlot = vinfo->attrib[fragSlot].src_index; uint j; @@ -987,7 +988,7 @@ setup_line_coefficients(struct setup_context *setup, for (j = 0; j < NUM_CHANNELS; j++) { line_apply_cylindrical_wrap(setup->vmin[vertSlot][j], setup->vmax[vertSlot][j], - spfs->info.input_cylindrical_wrap[fragSlot] & (1 << j), + fsInfo->input_cylindrical_wrap[fragSlot] & (1 << j), v); line_linear_coeff(setup, &setup->coef[fragSlot], j, v); } @@ -996,7 +997,7 @@ setup_line_coefficients(struct setup_context *setup, for (j = 0; j < NUM_CHANNELS; j++) { line_apply_cylindrical_wrap(setup->vmin[vertSlot][j], setup->vmax[vertSlot][j], - spfs->info.input_cylindrical_wrap[fragSlot] & (1 << j), + fsInfo->input_cylindrical_wrap[fragSlot] & (1 << j), v); line_persp_coeff(setup, &setup->coef[fragSlot], j, v); } @@ -1008,7 +1009,7 @@ setup_line_coefficients(struct setup_context *setup, assert(0); } - if (spfs->info.input_semantic_name[fragSlot] == TGSI_SEMANTIC_FACE) { + if (fsInfo->input_semantic_name[fragSlot] == TGSI_SEMANTIC_FACE) { /* convert 0 to 1.0 and 1 to -1.0 */ setup->coef[fragSlot].a0[0] = setup->facing * -2.0f + 1.0f; setup->coef[fragSlot].dadx[0] = 0.0; @@ -1188,7 +1189,7 @@ sp_setup_point(struct setup_context *setup, const float (*v0)[4]) { struct softpipe_context *softpipe = setup->softpipe; - const struct sp_fragment_shader *spfs = softpipe->fs; + const struct tgsi_shader_info *fsInfo = &setup->softpipe->fs_variant->info; const int sizeAttr = setup->softpipe->psize_slot; const float size = sizeAttr > 0 ? v0[sizeAttr][0] @@ -1232,7 +1233,7 @@ sp_setup_point(struct setup_context *setup, const_coeff(setup, &setup->posCoef, 0, 2); const_coeff(setup, &setup->posCoef, 0, 3); - for (fragSlot = 0; fragSlot < spfs->info.num_inputs; fragSlot++) { + for (fragSlot = 0; fragSlot < fsInfo->num_inputs; fragSlot++) { const uint vertSlot = vinfo->attrib[fragSlot].src_index; uint j; @@ -1255,7 +1256,7 @@ sp_setup_point(struct setup_context *setup, assert(0); } - if (spfs->info.input_semantic_name[fragSlot] == TGSI_SEMANTIC_FACE) { + if (fsInfo->input_semantic_name[fragSlot] == TGSI_SEMANTIC_FACE) { /* convert 0 to 1.0 and 1 to -1.0 */ setup->coef[fragSlot].a0[0] = setup->facing * -2.0f + 1.0f; setup->coef[fragSlot].dadx[0] = 0.0; @@ -1396,7 +1397,7 @@ sp_setup_prepare(struct setup_context *setup) struct softpipe_context *sp = setup->softpipe; if (sp->dirty) { - softpipe_update_derived(sp); + softpipe_update_derived(sp, sp->reduced_api_prim); } /* Note: nr_attrs is only used for debugging (vertex printing) */ diff --git a/src/gallium/drivers/softpipe/sp_state.h b/src/gallium/drivers/softpipe/sp_state.h index bb19f8cff20..ec4c8cf5e4d 100644 --- a/src/gallium/drivers/softpipe/sp_state.h +++ b/src/gallium/drivers/softpipe/sp_state.h @@ -60,34 +60,45 @@ struct tgsi_exec_machine; struct vertex_info; -/** - * Subclass of pipe_shader_state (though it doesn't really need to be). - * - * This is starting to look an awful lot like a quad pipeline stage... - */ -struct sp_fragment_shader { - struct pipe_shader_state shader; +struct sp_fragment_shader_variant_key +{ + boolean polygon_stipple; +}; + +struct sp_fragment_shader_variant +{ + const struct tgsi_token *tokens; + struct sp_fragment_shader_variant_key key; struct tgsi_shader_info info; + unsigned stipple_sampler_unit; + + /* See comments about this elsewhere */ +#if 0 struct draw_fragment_shader *draw_shader; +#endif - boolean origin_lower_left; /**< fragment shader uses lower left position origin? */ - boolean pixel_center_integer; /**< fragment shader uses integer pixel center? */ - boolean color0_writes_all_cbufs; /**< fragment shader writes color0 to all bound cbufs */ - void (*prepare)( const struct sp_fragment_shader *shader, - struct tgsi_exec_machine *machine, - struct tgsi_sampler **samplers); + void (*prepare)(const struct sp_fragment_shader_variant *shader, + struct tgsi_exec_machine *machine, + struct tgsi_sampler **samplers); - /* Run the shader - this interface will get cleaned up in the - * future: - */ - unsigned (*run)( const struct sp_fragment_shader *shader, - struct tgsi_exec_machine *machine, - struct quad_header *quad ); + unsigned (*run)(const struct sp_fragment_shader_variant *shader, + struct tgsi_exec_machine *machine, + struct quad_header *quad); + /* Deletes this instance of the object */ + void (*delete)(struct sp_fragment_shader_variant *shader); - void (*delete)( struct sp_fragment_shader * ); + struct sp_fragment_shader_variant *next; +}; + + +/** Subclass of pipe_shader_state */ +struct sp_fragment_shader { + struct pipe_shader_state shader; + struct sp_fragment_shader_variant *variants; + struct draw_fragment_shader *draw_shader; }; @@ -141,7 +152,7 @@ softpipe_set_framebuffer_state(struct pipe_context *, const struct pipe_framebuffer_state *); void -softpipe_update_derived( struct softpipe_context *softpipe ); +softpipe_update_derived(struct softpipe_context *softpipe, unsigned prim); void softpipe_draw_vbo(struct pipe_context *pipe, @@ -170,4 +181,16 @@ struct vertex_info * softpipe_get_vbuf_vertex_info(struct softpipe_context *softpipe); +struct sp_fragment_shader_variant * +softpipe_find_fs_variant(struct softpipe_context *softpipe, + struct sp_fragment_shader *fs, + const struct sp_fragment_shader_variant_key *key); + + +struct sp_fragment_shader_variant * +softpipe_find_fs_variant(struct softpipe_context *softpipe, + struct sp_fragment_shader *fs, + const struct sp_fragment_shader_variant_key *key); + + #endif diff --git a/src/gallium/drivers/softpipe/sp_state_derived.c b/src/gallium/drivers/softpipe/sp_state_derived.c index f9590eb0b24..fd688089a3e 100644 --- a/src/gallium/drivers/softpipe/sp_state_derived.c +++ b/src/gallium/drivers/softpipe/sp_state_derived.c @@ -25,8 +25,10 @@ * **************************************************************************/ +#include "util/u_inlines.h" #include "util/u_math.h" #include "util/u_memory.h" +#include "util/u_pstipple.h" #include "pipe/p_shader_tokens.h" #include "draw/draw_context.h" #include "draw/draw_vertex.h" @@ -64,7 +66,7 @@ softpipe_get_vertex_info(struct softpipe_context *softpipe) if (vinfo->num_attribs == 0) { /* compute vertex layout now */ - const struct sp_fragment_shader *spfs = softpipe->fs; + const struct tgsi_shader_info *fsInfo = &softpipe->fs_variant->info; struct vertex_info *vinfo_vbuf = &softpipe->vertex_info_vbuf; const uint num = draw_num_shader_outputs(softpipe->draw); uint i; @@ -84,11 +86,11 @@ softpipe_get_vertex_info(struct softpipe_context *softpipe) * from the vertex shader. */ vinfo->num_attribs = 0; - for (i = 0; i < spfs->info.num_inputs; i++) { + for (i = 0; i < fsInfo->num_inputs; i++) { int src; enum interp_mode interp; - switch (spfs->info.input_interpolate[i]) { + switch (fsInfo->input_interpolate[i]) { case TGSI_INTERPOLATE_CONSTANT: interp = INTERP_CONSTANT; break; @@ -103,7 +105,7 @@ softpipe_get_vertex_info(struct softpipe_context *softpipe) interp = INTERP_LINEAR; } - switch (spfs->info.input_semantic_name[i]) { + switch (fsInfo->input_semantic_name[i]) { case TGSI_SEMANTIC_POSITION: interp = INTERP_POS; break; @@ -117,8 +119,8 @@ softpipe_get_vertex_info(struct softpipe_context *softpipe) /* this includes texcoords and varying vars */ src = draw_find_shader_output(softpipe->draw, - spfs->info.input_semantic_name[i], - spfs->info.input_semantic_index[i]); + fsInfo->input_semantic_name[i], + fsInfo->input_semantic_index[i]); draw_emit_vertex_attr(vinfo, EMIT_4F, interp, src); } @@ -241,10 +243,101 @@ update_tgsi_samplers( struct softpipe_context *softpipe ) } +static void +update_fragment_shader(struct softpipe_context *softpipe, unsigned prim) +{ + struct sp_fragment_shader_variant_key key; + + memset(&key, 0, sizeof(key)); + + if (prim == PIPE_PRIM_TRIANGLES) + key.polygon_stipple = softpipe->rasterizer->poly_stipple_enable; + + if (softpipe->fs) { + softpipe->fs_variant = softpipe_find_fs_variant(softpipe, + softpipe->fs, &key); + } + else { + softpipe->fs_variant = NULL; + } + + /* This would be the logical place to pass the fragment shader + * to the draw module. However, doing this here, during state + * validation, causes problems with the 'draw' module helpers for + * wide/AA/stippled lines. + * In principle, the draw's fragment shader should be per-variant + * but that doesn't work. So we use a single draw fragment shader + * per fragment shader, not per variant. + */ +#if 0 + if (softpipe->fs_variant) { + draw_bind_fragment_shader(softpipe->draw, + softpipe->fs_variant->draw_shader); + } + else { + draw_bind_fragment_shader(softpipe->draw, NULL); + } +#endif +} + + +/** + * This should be called when the polygon stipple pattern changes. + * We create a new texture from the stipple pattern and create a new + * sampler view. + */ +static void +update_polygon_stipple_pattern(struct softpipe_context *softpipe) +{ + struct pipe_resource *tex; + struct pipe_sampler_view *view; + + tex = util_pstipple_create_stipple_texture(&softpipe->pipe, + softpipe->poly_stipple.stipple); + pipe_resource_reference(&softpipe->pstipple.texture, tex); + + view = util_pstipple_create_sampler_view(&softpipe->pipe, tex); + pipe_sampler_view_reference(&softpipe->pstipple.sampler_view, view); +} + + +/** + * Should be called when polygon stipple is enabled/disabled or when + * the fragment shader changes. + * We add/update the fragment sampler and sampler views to sample from + * the polygon stipple texture. The texture unit that we use depends on + * the fragment shader (we need to use a unit not otherwise used by the + * shader). + */ +static void +update_polygon_stipple_enable(struct softpipe_context *softpipe, unsigned prim) +{ + if (prim == PIPE_PRIM_TRIANGLES && + softpipe->fs_variant->key.polygon_stipple) { + const unsigned unit = softpipe->fs_variant->stipple_sampler_unit; + + assert(unit >= softpipe->num_fragment_samplers); + + /* sampler state */ + softpipe->fragment_samplers[unit] = softpipe->pstipple.sampler; + + /* sampler view */ + pipe_sampler_view_reference(&softpipe->fragment_sampler_views[unit], + softpipe->pstipple.sampler_view); + + sp_tex_tile_cache_set_sampler_view(softpipe->fragment_tex_cache[unit], + softpipe->pstipple.sampler_view); + + softpipe->dirty |= SP_NEW_SAMPLER; + } +} + + /* Hopefully this will remain quite simple, otherwise need to pull in * something like the state tracker mechanism. */ -void softpipe_update_derived( struct softpipe_context *softpipe ) +void +softpipe_update_derived(struct softpipe_context *softpipe, unsigned prim) { struct softpipe_screen *sp_screen = softpipe_screen(softpipe->pipe.screen); @@ -254,7 +347,24 @@ void softpipe_update_derived( struct softpipe_context *softpipe ) softpipe->tex_timestamp = sp_screen->timestamp; softpipe->dirty |= SP_NEW_TEXTURE; } - + +#if DO_PSTIPPLE_IN_HELPER_MODULE + if (softpipe->dirty & SP_NEW_STIPPLE) + /* before updating samplers! */ + update_polygon_stipple_pattern(softpipe); +#endif + + if (softpipe->dirty & (SP_NEW_RASTERIZER | + SP_NEW_FS)) + update_fragment_shader(softpipe, prim); + +#if DO_PSTIPPLE_IN_HELPER_MODULE + if (softpipe->dirty & (SP_NEW_RASTERIZER | + SP_NEW_STIPPLE | + SP_NEW_FS)) + update_polygon_stipple_enable(softpipe, prim); +#endif + if (softpipe->dirty & (SP_NEW_SAMPLER | SP_NEW_TEXTURE | SP_NEW_FS | diff --git a/src/gallium/drivers/softpipe/sp_state_sampler.c b/src/gallium/drivers/softpipe/sp_state_sampler.c index 60331bc4976..16023c990a7 100644 --- a/src/gallium/drivers/softpipe/sp_state_sampler.c +++ b/src/gallium/drivers/softpipe/sp_state_sampler.c @@ -373,8 +373,9 @@ softpipe_reset_sampler_variants(struct softpipe_context *softpipe) } } - for (i = 0; i <= softpipe->fs->info.file_max[TGSI_FILE_SAMPLER]; i++) { + for (i = 0; i <= softpipe->fs_variant->info.file_max[TGSI_FILE_SAMPLER]; i++) { if (softpipe->fragment_samplers[i]) { + assert(softpipe->fragment_sampler_views[i]->texture); softpipe->tgsi.frag_samplers_list[i] = get_sampler_variant( i, sp_sampler(softpipe->fragment_samplers[i]), diff --git a/src/gallium/drivers/softpipe/sp_state_shader.c b/src/gallium/drivers/softpipe/sp_state_shader.c index 3dec5de3cc4..da895270aa9 100644 --- a/src/gallium/drivers/softpipe/sp_state_shader.c +++ b/src/gallium/drivers/softpipe/sp_state_shader.c @@ -33,6 +33,7 @@ #include "pipe/p_defines.h" #include "util/u_memory.h" #include "util/u_inlines.h" +#include "util/u_pstipple.h" #include "draw/draw_context.h" #include "draw/draw_vs.h" #include "draw/draw_gs.h" @@ -42,46 +43,105 @@ #include "tgsi/tgsi_parse.h" +/** + * Create a new fragment shader variant. + */ +static struct sp_fragment_shader_variant * +create_fs_variant(struct softpipe_context *softpipe, + struct sp_fragment_shader *fs, + const struct sp_fragment_shader_variant_key *key) +{ + struct sp_fragment_shader_variant *var; + struct pipe_shader_state *stipple_fs = NULL, *curfs = &fs->shader; + unsigned unit = 0; + + if (key->polygon_stipple) { + /* get new shader that implements polygon stippling */ + stipple_fs = util_pstipple_create_fragment_shader(&softpipe->pipe, + curfs, &unit); + curfs = stipple_fs; + } + + /* codegen, create variant object */ + var = softpipe_create_fs_variant_sse(softpipe, curfs); + if (!var) { + var = softpipe_create_fs_variant_exec(softpipe, curfs); + } + + if (var) { + var->key = *key; + var->tokens = tgsi_dup_tokens(curfs->tokens); + var->stipple_sampler_unit = unit; + + tgsi_scan_shader(var->tokens, &var->info); + + /* See comments elsewhere about draw fragment shaders */ +#if 0 + /* draw's fs state */ + var->draw_shader = draw_create_fragment_shader(softpipe->draw, + &fs->shader); + if (!var->draw_shader) { + var->delete(var); + FREE((void *) var->tokens); + return NULL; + } +#endif + + /* insert variant into linked list */ + var->next = fs->variants; + fs->variants = var; + } + + if (stipple_fs) { + free((void *) stipple_fs->tokens); + free(stipple_fs); + } + + return var; +} + + +struct sp_fragment_shader_variant * +softpipe_find_fs_variant(struct softpipe_context *sp, + struct sp_fragment_shader *fs, + const struct sp_fragment_shader_variant_key *key) +{ + struct sp_fragment_shader_variant *var; + + for (var = fs->variants; var; var = var->next) { + if (memcmp(&var->key, key, sizeof(*key)) == 0) { + /* found it */ + return var; + } + } + + return create_fs_variant(sp, fs, key); +} + + static void * softpipe_create_fs_state(struct pipe_context *pipe, const struct pipe_shader_state *templ) { struct softpipe_context *softpipe = softpipe_context(pipe); - struct sp_fragment_shader *state; - unsigned i; + struct sp_fragment_shader *state = CALLOC_STRUCT(sp_fragment_shader); /* debug */ if (softpipe->dump_fs) tgsi_dump(templ->tokens, 0); - /* codegen */ - state = softpipe_create_fs_sse( softpipe, templ ); - if (!state) { - state = softpipe_create_fs_exec( softpipe, templ ); - } - - if (!state) - return NULL; + /* we need to keep a local copy of the tokens */ + state->shader.tokens = tgsi_dup_tokens(templ->tokens); /* draw's fs state */ - state->draw_shader = draw_create_fragment_shader(softpipe->draw, templ); + state->draw_shader = draw_create_fragment_shader(softpipe->draw, + &state->shader); if (!state->draw_shader) { - state->delete( state ); + FREE((void *) state->shader.tokens); + FREE(state); return NULL; } - /* get/save the summary info for this shader */ - tgsi_scan_shader(templ->tokens, &state->info); - - for (i = 0; i < state->info.num_properties; ++i) { - if (state->info.properties[i].name == TGSI_PROPERTY_FS_COORD_ORIGIN) - state->origin_lower_left = state->info.properties[i].data[0]; - else if (state->info.properties[i].name == TGSI_PROPERTY_FS_COORD_PIXEL_CENTER) - state->pixel_center_integer = state->info.properties[i].data[0]; - else if (state->info.properties[i].name == TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS) - state->color0_writes_all_cbufs = state->info.properties[i].data[0]; - } - return state; } @@ -90,6 +150,7 @@ static void softpipe_bind_fs_state(struct pipe_context *pipe, void *fs) { struct softpipe_context *softpipe = softpipe_context(pipe); + struct sp_fragment_shader *state = (struct sp_fragment_shader *) fs; if (softpipe->fs == fs) return; @@ -98,8 +159,14 @@ softpipe_bind_fs_state(struct pipe_context *pipe, void *fs) softpipe->fs = fs; - draw_bind_fragment_shader(softpipe->draw, - (softpipe->fs ? softpipe->fs->draw_shader : NULL)); + if (fs == NULL) + softpipe->fs_variant = NULL; + + if (state) + draw_bind_fragment_shader(softpipe->draw, + state->draw_shader); + else + draw_bind_fragment_shader(softpipe->draw, NULL); softpipe->dirty |= SP_NEW_FS; } @@ -110,8 +177,9 @@ softpipe_delete_fs_state(struct pipe_context *pipe, void *fs) { struct softpipe_context *softpipe = softpipe_context(pipe); struct sp_fragment_shader *state = fs; + struct sp_fragment_shader_variant *var, *next_var; - assert(fs != softpipe_context(pipe)->fs); + assert(fs != softpipe->fs); if (softpipe->fs_machine->Tokens == state->shader.tokens) { /* unbind the shader from the tgsi executor if we're @@ -120,9 +188,23 @@ softpipe_delete_fs_state(struct pipe_context *pipe, void *fs) tgsi_exec_machine_bind_shader(softpipe->fs_machine, NULL, 0, NULL); } + /* delete variants */ + for (var = state->variants; var; var = next_var) { + next_var = var->next; + + assert(var != softpipe->fs_variant); + + /* See comments elsewhere about draw fragment shaders */ +#if 0 + draw_delete_fragment_shader(softpipe->draw, var->draw_shader); +#endif + + var->delete(var); + } + draw_delete_fragment_shader(softpipe->draw, state->draw_shader); - state->delete( state ); + FREE((void *) state->shader.tokens); } diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.c b/src/gallium/drivers/softpipe/sp_tex_sample.c index 90766f4119c..f7309480bb9 100644 --- a/src/gallium/drivers/softpipe/sp_tex_sample.c +++ b/src/gallium/drivers/softpipe/sp_tex_sample.c @@ -1662,7 +1662,7 @@ mip_filter_nearest(struct tgsi_sampler *tgsi_sampler, samp->mag_img_filter(tgsi_sampler, s, t, p, NULL, tgsi_sampler_lod_bias, rgba); } else { - samp->level = samp->view->u.tex.first_level + (int)(lambda + 0.5) ; + samp->level = samp->view->u.tex.first_level + (int)(lambda + 0.5F) ; samp->level = MIN2(samp->level, (int)texture->last_level); samp->min_img_filter(tgsi_sampler, s, t, p, NULL, tgsi_sampler_lod_bias, rgba); } @@ -1815,13 +1815,13 @@ img_filter_2d_ewa(struct tgsi_sampler *tgsi_sampler, * and incrementally update the value of Ax^2+Bxy*Cy^2; when this * value, q, is less than F, we're inside the ellipse */ - float tex_u=-0.5 + s[j] * texture->width0 * scaling; - float tex_v=-0.5 + t[j] * texture->height0 * scaling; + float tex_u = -0.5F + s[j] * texture->width0 * scaling; + float tex_v = -0.5F + t[j] * texture->height0 * scaling; - int u0 = floor(tex_u - box_u); - int u1 = ceil (tex_u + box_u); - int v0 = floor(tex_v - box_v); - int v1 = ceil (tex_v + box_v); + int u0 = (int) floorf(tex_u - box_u); + int u1 = (int) ceilf(tex_u + box_u); + int v0 = (int) floorf(tex_v - box_v); + int v1 = (int) ceilf(tex_v + box_v); float num[4] = {0.0F, 0.0F, 0.0F, 0.0F}; buffer_next = 0; @@ -1849,7 +1849,7 @@ img_filter_2d_ewa(struct tgsi_sampler *tgsi_sampler, buffer_next++; if (buffer_next == QUAD_SIZE) { /* 4 texel coords are in the buffer -> read it now */ - int jj; + unsigned jj; /* it is assumed that samp->min_img_filter is set to * img_filter_2d_nearest or one of the * accelerated img_filter_2d_nearest_XXX functions. @@ -1879,7 +1879,7 @@ img_filter_2d_ewa(struct tgsi_sampler *tgsi_sampler, * parameter, we need to read the whole quad and ignore the unused values */ if (buffer_next > 0) { - int jj; + unsigned jj; /* it is assumed that samp->min_img_filter is set to * img_filter_2d_nearest or one of the * accelerated img_filter_2d_nearest_XXX functions. @@ -1984,7 +1984,7 @@ mip_filter_linear_aniso(struct tgsi_sampler *tgsi_sampler, /* note: we need to have Pmin=sqrt(Pmin2) here, but we can avoid * this since 0.5*log(x) = log(sqrt(x)) */ - lambda = 0.5 * util_fast_log2(Pmin2) + samp->sampler->lod_bias; + lambda = 0.5F * util_fast_log2(Pmin2) + samp->sampler->lod_bias; compute_lod(samp->sampler, lambda, c0, lod); } else { @@ -2001,7 +2001,7 @@ mip_filter_linear_aniso(struct tgsi_sampler *tgsi_sampler, /* If the ellipse covers the whole image, we can * simply return the average of the whole image. */ - if (level0 >= texture->last_level) { + if (level0 >= (int) texture->last_level) { samp->level = texture->last_level; samp->min_img_filter(tgsi_sampler, s, t, p, NULL, tgsi_sampler_lod_bias, rgba); } @@ -2226,9 +2226,9 @@ sample_cube(struct tgsi_sampler *tgsi_sampler, */ { /* use the average of the four pixel's texcoords to choose the face */ - const float rx = 0.25 * (s[0] + s[1] + s[2] + s[3]); - const float ry = 0.25 * (t[0] + t[1] + t[2] + t[3]); - const float rz = 0.25 * (p[0] + p[1] + p[2] + p[3]); + const float rx = 0.25F * (s[0] + s[1] + s[2] + s[3]); + const float ry = 0.25F * (t[0] + t[1] + t[2] + t[3]); + const float rz = 0.25F * (p[0] + p[1] + p[2] + p[3]); const float arx = fabsf(rx), ary = fabsf(ry), arz = fabsf(rz); if (arx >= ary && arx >= arz) { @@ -2255,7 +2255,7 @@ sample_cube(struct tgsi_sampler *tgsi_sampler, float sign = (rz >= 0.0F) ? 1.0F : -1.0F; uint face = (rz >= 0.0F) ? PIPE_TEX_FACE_POS_Z : PIPE_TEX_FACE_NEG_Z; for (j = 0; j < QUAD_SIZE; j++) { - const float ima = -0.5 / fabsf(p[j]); + const float ima = -0.5F / fabsf(p[j]); ssss[j] = sign * -s[j] * ima + 0.5F; tttt[j] = t[j] * ima + 0.5F; samp->faces[j] = face; diff --git a/src/gallium/drivers/softpipe/sp_texture.h b/src/gallium/drivers/softpipe/sp_texture.h index 5603110eeb3..533d6252e25 100644 --- a/src/gallium/drivers/softpipe/sp_texture.h +++ b/src/gallium/drivers/softpipe/sp_texture.h @@ -79,7 +79,6 @@ struct softpipe_transfer }; - /** cast wrappers */ static INLINE struct softpipe_resource * softpipe_resource(struct pipe_resource *pt) diff --git a/src/gallium/drivers/svga/svga_context.c b/src/gallium/drivers/svga/svga_context.c index dbbc249258d..cfb1b9d8d0d 100644 --- a/src/gallium/drivers/svga/svga_context.c +++ b/src/gallium/drivers/svga/svga_context.c @@ -207,6 +207,14 @@ void svga_context_flush( struct svga_context *svga, svga->curr.nr_fbs = 0; + /* Flush the upload managers to ensure recycling of upload buffers + * without throttling. This should really be conditioned on + * pipe_buffer_map_range not supporting PIPE_TRANSFER_UNSYNCHRONIZED. + */ + + u_upload_flush(svga->upload_vb); + u_upload_flush(svga->upload_ib); + /* Ensure that texture dma uploads are processed * before submitting commands. */ diff --git a/src/gallium/drivers/svga/svga_context.h b/src/gallium/drivers/svga/svga_context.h index eca529d262e..34b9e85c1a3 100644 --- a/src/gallium/drivers/svga/svga_context.h +++ b/src/gallium/drivers/svga/svga_context.h @@ -372,9 +372,6 @@ struct svga_context /** List of buffers with queued transfers */ struct list_head dirty_buffers; - - /** Was the previous draw done with the SW path? */ - boolean prev_draw_swtnl; }; /* A flag for each state_tracker state object: diff --git a/src/gallium/drivers/svga/svga_draw.c b/src/gallium/drivers/svga/svga_draw.c index d8af615ede1..aa096692888 100644 --- a/src/gallium/drivers/svga/svga_draw.c +++ b/src/gallium/drivers/svga/svga_draw.c @@ -145,7 +145,7 @@ svga_hwtnl_flush( struct svga_hwtnl *hwtnl ) unsigned i; /* Unmap upload manager vertex buffers */ - u_upload_flush(svga->upload_vb); + u_upload_unmap(svga->upload_vb); for (i = 0; i < hwtnl->cmd.vdecl_count; i++) { handle = svga_buffer_handle(svga, hwtnl->cmd.vdecl_vb[i]); @@ -156,7 +156,7 @@ svga_hwtnl_flush( struct svga_hwtnl *hwtnl ) } /* Unmap upload manager index buffers */ - u_upload_flush(svga->upload_ib); + u_upload_unmap(svga->upload_ib); for (i = 0; i < hwtnl->cmd.prim_count; i++) { if (hwtnl->cmd.prim_ib[i]) { @@ -242,6 +242,11 @@ svga_hwtnl_flush( struct svga_hwtnl *hwtnl ) } +void svga_hwtnl_set_index_bias( struct svga_hwtnl *hwtnl, + int index_bias) +{ + hwtnl->index_bias = index_bias; +} @@ -265,15 +270,16 @@ enum pipe_error svga_hwtnl_prim( struct svga_hwtnl *hwtnl, unsigned size = vb ? vb->width0 : 0; unsigned offset = hwtnl->cmd.vdecl[i].array.offset; unsigned stride = hwtnl->cmd.vdecl[i].array.stride; - unsigned index_bias = range->indexBias; + int index_bias = (int) range->indexBias + hwtnl->index_bias; unsigned width; assert(vb); assert(size); assert(offset < size); - assert(index_bias >= 0); assert(min_index <= max_index); - assert(offset + index_bias*stride < size); + if (index_bias >= 0) { + assert(offset + index_bias*stride < size); + } if (min_index != ~0) { assert(offset + (index_bias + min_index) * stride < size); } @@ -394,6 +400,7 @@ enum pipe_error svga_hwtnl_prim( struct svga_hwtnl *hwtnl, hwtnl->cmd.max_index[hwtnl->cmd.prim_count] = max_index; hwtnl->cmd.prim[hwtnl->cmd.prim_count] = *range; + hwtnl->cmd.prim[hwtnl->cmd.prim_count].indexBias += hwtnl->index_bias; pipe_resource_reference(&hwtnl->cmd.prim_ib[hwtnl->cmd.prim_count], ib); hwtnl->cmd.prim_count++; diff --git a/src/gallium/drivers/svga/svga_draw.h b/src/gallium/drivers/svga/svga_draw.h index a2403d802be..1dac17421e1 100644 --- a/src/gallium/drivers/svga/svga_draw.h +++ b/src/gallium/drivers/svga/svga_draw.h @@ -79,5 +79,8 @@ svga_hwtnl_draw_range_elements( struct svga_hwtnl *hwtnl, enum pipe_error svga_hwtnl_flush( struct svga_hwtnl *hwtnl ); +void svga_hwtnl_set_index_bias( struct svga_hwtnl *hwtnl, + int index_bias); + #endif /* SVGA_DRAW_H_ */ diff --git a/src/gallium/drivers/svga/svga_draw_private.h b/src/gallium/drivers/svga/svga_draw_private.h index ca658ac6745..8126f7ee23c 100644 --- a/src/gallium/drivers/svga/svga_draw_private.h +++ b/src/gallium/drivers/svga/svga_draw_private.h @@ -116,6 +116,13 @@ struct draw_cmd { struct svga_hwtnl { struct svga_context *svga; struct u_upload_mgr *upload_ib; + + /* Additional negative index bias due to partial buffer uploads + * This is compensated for in the offset associated with all + * vertex buffers. + */ + + int index_bias; /* Flatshade information: */ diff --git a/src/gallium/drivers/svga/svga_pipe_draw.c b/src/gallium/drivers/svga/svga_pipe_draw.c index 2093bcae101..d53edcb23c5 100644 --- a/src/gallium/drivers/svga/svga_pipe_draw.c +++ b/src/gallium/drivers/svga/svga_pipe_draw.c @@ -25,6 +25,7 @@ #include "svga_cmd.h" +#include "util/u_format.h" #include "util/u_inlines.h" #include "util/u_prim.h" #include "util/u_time.h" @@ -37,6 +38,178 @@ #include "svga_state.h" #include "svga_swtnl.h" #include "svga_debug.h" +#include "svga_resource_buffer.h" +#include "util/u_upload_mgr.h" + +/** + * Determine the ranges to upload for the user-buffers referenced + * by the next draw command. + * + * TODO: It might be beneficial to support multiple ranges. In that case, + * the struct svga_buffer::uploaded member should be made an array or a + * list, since we need to account for the possibility that different ranges + * may be uploaded to different hardware buffers chosen by the utility + * upload manager. + */ + +static void +svga_user_buffer_range(struct svga_context *svga, + unsigned start, + unsigned count, + unsigned instance_count) +{ + const struct pipe_vertex_element *ve = svga->curr.velems->velem; + int i; + + /* + * Release old uploaded range (if not done already) and + * initialize new ranges. + */ + + for (i=0; i < svga->curr.velems->count; i++) { + struct pipe_vertex_buffer *vb = + &svga->curr.vb[ve[i].vertex_buffer_index]; + + if (vb->buffer && svga_buffer_is_user_buffer(vb->buffer)) { + struct svga_buffer *buffer = svga_buffer(vb->buffer); + + pipe_resource_reference(&buffer->uploaded.buffer, NULL); + buffer->uploaded.start = ~0; + buffer->uploaded.end = 0; + } + } + + for (i=0; i < svga->curr.velems->count; i++) { + struct pipe_vertex_buffer *vb = + &svga->curr.vb[ve[i].vertex_buffer_index]; + + if (vb->buffer && svga_buffer_is_user_buffer(vb->buffer)) { + struct svga_buffer *buffer = svga_buffer(vb->buffer); + unsigned first, size; + unsigned instance_div = ve[i].instance_divisor; + unsigned elemSize = util_format_get_blocksize(ve[i].src_format); + + svga->dirty |= SVGA_NEW_VBUFFER; + + if (instance_div) { + first = ve[i].src_offset; + count = (instance_count + instance_div - 1) / instance_div; + size = vb->stride * (count - 1) + elemSize; + } else if (vb->stride) { + first = vb->stride * start + ve[i].src_offset; + size = vb->stride * (count - 1) + elemSize; + } else { + /* Only a single vertex! + * Upload with the largest vertex size the hw supports, + * if possible. + */ + first = ve[i].src_offset; + size = MIN2(16, vb->buffer->width0); + } + + buffer->uploaded.start = MIN2(buffer->uploaded.start, first); + buffer->uploaded.end = MAX2(buffer->uploaded.end, first + size); + } + } +} + +/** + * svga_upload_user_buffers - upload parts of user buffers + * + * This function streams a part of a user buffer to hw and fills + * svga_buffer::uploaded with information on the upload. + */ + +static int +svga_upload_user_buffers(struct svga_context *svga, + unsigned start, + unsigned count, + unsigned instance_count) +{ + const struct pipe_vertex_element *ve = svga->curr.velems->velem; + unsigned i; + int ret; + + svga_user_buffer_range(svga, start, count, instance_count); + + for (i=0; i < svga->curr.velems->count; i++) { + struct pipe_vertex_buffer *vb = + &svga->curr.vb[ve[i].vertex_buffer_index]; + + if (vb->buffer && svga_buffer_is_user_buffer(vb->buffer)) { + struct svga_buffer *buffer = svga_buffer(vb->buffer); + boolean flushed; + + /* + * Check if already uploaded. Otherwise go ahead and upload. + */ + + if (buffer->uploaded.buffer) + continue; + + ret = u_upload_buffer( svga->upload_vb, + 0, + buffer->uploaded.start, + buffer->uploaded.end - buffer->uploaded.start, + &buffer->b.b, + &buffer->uploaded.offset, + &buffer->uploaded.buffer, + &flushed); + + if (ret) + return ret; + + if (0) + debug_printf("%s: %d: orig buf %p upl buf %p ofs %d sofs %d" + " sz %d\n", + __FUNCTION__, + i, + buffer, + buffer->uploaded.buffer, + buffer->uploaded.offset, + buffer->uploaded.start, + buffer->uploaded.end - buffer->uploaded.start); + + vb->buffer_offset = buffer->uploaded.offset; + } + } + + return PIPE_OK; +} + +/** + * svga_release_user_upl_buffers - release uploaded parts of user buffers + * + * This function releases the hw copy of the uploaded fraction of the + * user-buffer. It's important to do this as soon as all draw calls + * affecting the uploaded fraction are issued, as this allows for + * efficient reuse of the hardware surface backing the uploaded fraction. + * + * svga_buffer::source_offset is set to 0, and svga_buffer::uploaded::buffer + * is set to 0. + */ + +static void +svga_release_user_upl_buffers(struct svga_context *svga) +{ + unsigned i; + unsigned nr; + + nr = svga->curr.num_vertex_buffers; + + for (i = 0; i < nr; ++i) { + struct pipe_vertex_buffer *vb = &svga->curr.vb[i]; + + if (vb->buffer && svga_buffer_is_user_buffer(vb->buffer)) { + struct svga_buffer *buffer = svga_buffer(vb->buffer); + + buffer->uploaded.start = ~0; + buffer->uploaded.end = 0; + if (buffer->uploaded.buffer) + pipe_resource_reference(&buffer->uploaded.buffer, NULL); + } + } +} @@ -50,6 +223,7 @@ retry_draw_range_elements( struct svga_context *svga, unsigned prim, unsigned start, unsigned count, + unsigned instance_count, boolean do_retry ) { enum pipe_error ret = 0; @@ -61,6 +235,10 @@ retry_draw_range_elements( struct svga_context *svga, svga->curr.rast->templ.flatshade, svga->curr.rast->templ.flatshade_first ); + ret = svga_upload_user_buffers( svga, min_index + index_bias, + max_index - min_index + 1, instance_count ); + if (ret != PIPE_OK) + goto retry; ret = svga_update_state( svga, SVGA_STATE_HW_DRAW ); if (ret) @@ -84,7 +262,7 @@ retry: index_buffer, index_size, index_bias, min_index, max_index, prim, start, count, - FALSE ); + instance_count, FALSE ); } return ret; @@ -96,6 +274,7 @@ retry_draw_arrays( struct svga_context *svga, unsigned prim, unsigned start, unsigned count, + unsigned instance_count, boolean do_retry ) { enum pipe_error ret; @@ -107,6 +286,11 @@ retry_draw_arrays( struct svga_context *svga, svga->curr.rast->templ.flatshade, svga->curr.rast->templ.flatshade_first ); + ret = svga_upload_user_buffers( svga, start, count, instance_count ); + + if (ret != PIPE_OK) + goto retry; + ret = svga_update_state( svga, SVGA_STATE_HW_DRAW ); if (ret) goto retry; @@ -127,6 +311,7 @@ retry: prim, start, count, + instance_count, FALSE ); } @@ -141,18 +326,11 @@ svga_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info) unsigned reduced_prim = u_reduced_prim( info->mode ); unsigned count = info->count; enum pipe_error ret = 0; + boolean needed_swtnl; if (!u_trim_pipe_prim( info->mode, &count )) return; - if (svga->state.sw.need_swtnl != svga->prev_draw_swtnl) { - /* We're switching between SW and HW drawing. Do a flush to avoid - * mixing HW and SW rendering with the same vertex buffer. - */ - pipe->flush(pipe, NULL); - svga->prev_draw_swtnl = svga->state.sw.need_swtnl; - } - /* * Mark currently bound target surfaces as dirty * doesn't really matter if it is done before drawing. @@ -167,6 +345,8 @@ svga_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info) svga->dirty |= SVGA_NEW_REDUCED_PRIMITIVE; } + needed_swtnl = svga->state.sw.need_swtnl; + svga_update_state_retry( svga, SVGA_STATE_NEED_SWTNL ); #ifdef DEBUG @@ -176,6 +356,20 @@ svga_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info) #endif if (svga->state.sw.need_swtnl) { + if (!needed_swtnl) { + /* + * We're switching from HW to SW TNL. SW TNL will require mapping all + * currently bound vertex buffers, some of which may already be + * referenced in the current command buffer as result of previous HW + * TNL. So flush now, to prevent the context to flush while a referred + * vertex buffer is mapped. + */ + + svga_context_flush(svga, NULL); + } + + /* Avoid leaking the previous hwtnl bias to swtnl */ + svga_hwtnl_set_index_bias( svga->hwtnl, 0 ); ret = svga_swtnl_draw_vbo( svga, info ); } else { @@ -194,6 +388,7 @@ svga_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info) info->mode, info->start + offset, info->count, + info->instance_count, TRUE ); } else { @@ -201,10 +396,13 @@ svga_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info) info->mode, info->start, info->count, + info->instance_count, TRUE ); } } + svga_release_user_upl_buffers( svga ); + if (SVGA_DEBUG & DEBUG_FLUSH) { svga_hwtnl_flush_retry( svga ); svga_context_flush(svga, NULL); diff --git a/src/gallium/drivers/svga/svga_resource_buffer.h b/src/gallium/drivers/svga/svga_resource_buffer.h index 95032213fa5..ca8c8d1f5ea 100644 --- a/src/gallium/drivers/svga/svga_resource_buffer.h +++ b/src/gallium/drivers/svga/svga_resource_buffer.h @@ -129,6 +129,12 @@ struct svga_buffer * is the relative offset within that buffer. */ unsigned offset; + + /** + * Range of user buffer that is uploaded in @buffer at @offset. + */ + unsigned start; + unsigned end; } uploaded; /** @@ -193,7 +199,11 @@ svga_buffer(struct pipe_resource *buffer) static INLINE boolean svga_buffer_is_user_buffer( struct pipe_resource *buffer ) { - return svga_buffer(buffer)->user; + if (buffer) { + return svga_buffer(buffer)->user; + } else { + return FALSE; + } } diff --git a/src/gallium/drivers/svga/svga_resource_buffer_upload.c b/src/gallium/drivers/svga/svga_resource_buffer_upload.c index 923958674b4..a657a8bc224 100644 --- a/src/gallium/drivers/svga/svga_resource_buffer_upload.c +++ b/src/gallium/drivers/svga/svga_resource_buffer_upload.c @@ -651,8 +651,6 @@ svga_redefine_user_buffer(struct pipe_context *pipe, unsigned offset, unsigned size) { - struct svga_screen *ss = svga_screen(pipe->screen); - struct svga_context *svga = svga_context(pipe); struct svga_buffer *sbuf = svga_buffer(resource); assert(sbuf->user); @@ -661,19 +659,8 @@ svga_redefine_user_buffer(struct pipe_context *pipe, assert(!sbuf->hwbuf); /* - * Release any uploaded user buffer. - * - * TODO: As an optimization, we could try to update the uploaded buffer - * instead. + * We always treat the contents of user-buffers as volatile, + * so no particular action needed here. */ - pipe_resource_reference(&sbuf->uploaded.buffer, NULL); - - pipe_mutex_lock(ss->swc_mutex); - - sbuf->key.size.width = sbuf->b.b.width0 = offset + size; - - pipe_mutex_unlock(ss->swc_mutex); - - svga->dirty |= SVGA_NEW_VBUFFER | SVGA_NEW_VELEMENT; } diff --git a/src/gallium/drivers/svga/svga_state_vdecl.c b/src/gallium/drivers/svga/svga_state_vdecl.c index 7c393a1da8d..47eab1a9739 100644 --- a/src/gallium/drivers/svga/svga_state_vdecl.c +++ b/src/gallium/drivers/svga/svga_state_vdecl.c @@ -38,57 +38,6 @@ #include "svga_hw_reg.h" -static int -upload_user_buffers( struct svga_context *svga ) -{ - enum pipe_error ret = PIPE_OK; - int i; - int nr; - - if (0) - debug_printf("%s: %d\n", __FUNCTION__, svga->curr.num_vertex_buffers); - - nr = svga->curr.num_vertex_buffers; - - for (i = 0; i < nr; i++) - { - if (svga_buffer_is_user_buffer(svga->curr.vb[i].buffer)) - { - struct svga_buffer *buffer = svga_buffer(svga->curr.vb[i].buffer); - - if (!buffer->uploaded.buffer) { - boolean flushed; - ret = u_upload_buffer( svga->upload_vb, - 0, 0, - buffer->b.b.width0, - &buffer->b.b, - &buffer->uploaded.offset, - &buffer->uploaded.buffer, - &flushed); - if (ret) - return ret; - - if (0) - debug_printf("%s: %d: orig buf %p upl buf %p ofs %d sz %d\n", - __FUNCTION__, - i, - buffer, - buffer->uploaded.buffer, - buffer->uploaded.offset, - buffer->b.b.width0); - } - - svga->curr.vb[i].buffer_offset = buffer->uploaded.offset; - } - } - - if (0) - debug_printf("%s: DONE\n", __FUNCTION__); - - return ret; -} - - /*********************************************************************** */ @@ -99,6 +48,7 @@ static int emit_hw_vs_vdecl( struct svga_context *svga, const struct pipe_vertex_element *ve = svga->curr.velems->velem; SVGA3dVertexDecl decl; unsigned i; + unsigned neg_bias = 0; assert(svga->curr.velems->count >= svga->curr.vs->base.info.file_count[TGSI_FILE_INPUT]); @@ -106,12 +56,50 @@ static int emit_hw_vs_vdecl( struct svga_context *svga, svga_hwtnl_reset_vdecl( svga->hwtnl, svga->curr.velems->count ); + /** + * We can't set the VDECL offset to something negative, so we + * must calculate a common negative additional index bias, and modify + * the VDECL offsets accordingly so they *all* end up positive. + * + * Note that the exact value of the negative index bias is not that + * important, since we compensate for it when we calculate the vertex + * buffer offset below. The important thing is that all vertex buffer + * offsets remain positive. + * + * Note that we use a negative bias variable in order to make the + * rounding maths more easy to follow, and to avoid int / unsigned + * confusion. + */ + for (i = 0; i < svga->curr.velems->count; i++) { - const struct pipe_vertex_buffer *vb = &svga->curr.vb[ve[i].vertex_buffer_index]; + const struct pipe_vertex_buffer *vb = + &svga->curr.vb[ve[i].vertex_buffer_index]; + struct svga_buffer *buffer; + unsigned int offset = vb->buffer_offset + ve[i].src_offset; + unsigned tmp_neg_bias = 0; + + if (!vb->buffer) + continue; + + buffer = svga_buffer(vb->buffer); + if (buffer->uploaded.start > offset) { + tmp_neg_bias = buffer->uploaded.start - offset; + if (vb->stride) + tmp_neg_bias = (tmp_neg_bias + vb->stride - 1) / vb->stride; + neg_bias = MAX2(neg_bias, tmp_neg_bias); + } + } + + for (i = 0; i < svga->curr.velems->count; i++) { + const struct pipe_vertex_buffer *vb = + &svga->curr.vb[ve[i].vertex_buffer_index]; unsigned usage, index; - struct svga_buffer *buffer = svga_buffer(vb->buffer); + struct svga_buffer *buffer; + if (!vb->buffer) + continue; + buffer= svga_buffer(vb->buffer); svga_generate_vdecl_semantics( i, &usage, &index ); /* SVGA_NEW_VELEMENT @@ -121,8 +109,16 @@ static int emit_hw_vs_vdecl( struct svga_context *svga, decl.identity.usage = usage; decl.identity.usageIndex = index; decl.array.stride = vb->stride; - decl.array.offset = (vb->buffer_offset + - ve[i].src_offset); + + /* Compensate for partially uploaded vbo, and + * for the negative index bias. + */ + decl.array.offset = (vb->buffer_offset + + ve[i].src_offset + + neg_bias * vb->stride + - buffer->uploaded.start); + + assert(decl.array.offset >= 0); svga_hwtnl_vdecl( svga->hwtnl, i, @@ -131,6 +127,7 @@ static int emit_hw_vs_vdecl( struct svga_context *svga, vb->buffer ); } + svga_hwtnl_set_index_bias( svga->hwtnl, -neg_bias ); return 0; } @@ -138,23 +135,11 @@ static int emit_hw_vs_vdecl( struct svga_context *svga, static int emit_hw_vdecl( struct svga_context *svga, unsigned dirty ) { - int ret = 0; - /* SVGA_NEW_NEED_SWTNL */ if (svga->state.sw.need_swtnl) return 0; /* Do not emit during swtnl */ - /* If we get to here, we know that we're going to draw. Upload - * userbuffers now and try to combine multiple userbuffers from - * multiple draw calls into a single host buffer for performance. - */ - if (svga->curr.any_user_vertex_buffers) { - ret = upload_user_buffers( svga ); - if (ret) - return ret; - } - return emit_hw_vs_vdecl( svga, dirty ); } |