diff options
Diffstat (limited to 'src/mesa/drivers')
118 files changed, 16693 insertions, 1073 deletions
diff --git a/src/mesa/drivers/dri/Makefile.template b/src/mesa/drivers/dri/Makefile.template index 8cb25439e48..a00018cafa7 100644 --- a/src/mesa/drivers/dri/Makefile.template +++ b/src/mesa/drivers/dri/Makefile.template @@ -17,6 +17,7 @@ COMMON_SOURCES = $(COMMON_GALLIUM_SOURCES) \ INCLUDES = $(SHARED_INCLUDES) $(EXPAT_INCLUDES) OBJECTS = $(C_SOURCES:.c=.o) \ + $(CXX_SOURCES:.cpp=.o) \ $(ASM_SOURCES:.S=.o) @@ -33,12 +34,16 @@ SHARED_INCLUDES = \ $(LIBDRM_CFLAGS) CFLAGS += $(API_DEFINES) +CXXFLAGS += $(API_DEFINES) ##### RULES ##### .c.o: $(CC) -c $(INCLUDES) $(CFLAGS) $(DRIVER_DEFINES) $< -o $@ +.cpp.o: + $(CC) -c $(INCLUDES) $(CXXFLAGS) $(DRIVER_DEFINES) $< -o $@ + .S.o: $(CC) -c $(INCLUDES) $(CFLAGS) $(DRIVER_DEFINES) $< -o $@ @@ -54,9 +59,9 @@ lib: symlinks subdirs depend $(LIBNAME): $(OBJECTS) $(MESA_MODULES) $(EXTRA_MODULES) Makefile \ $(TOP)/src/mesa/drivers/dri/Makefile.template $(TOP)/src/mesa/drivers/dri/common/dri_test.o - $(MKLIB) -o [email protected] -noprefix -linker '$(CC)' -ldflags '$(LDFLAGS)' \ + $(MKLIB) -o [email protected] -noprefix -linker '$(CXX)' -ldflags '$(LDFLAGS)' \ $(OBJECTS) $(MESA_MODULES) $(EXTRA_MODULES) $(DRI_LIB_DEPS) - $(CC) $(CFLAGS) -o [email protected] $(TOP)/src/mesa/drivers/dri/common/dri_test.o [email protected] $(DRI_LIB_DEPS) + $(CXX) $(CFLAGS) -o [email protected] $(TOP)/src/mesa/drivers/dri/common/dri_test.o [email protected] $(DRI_LIB_DEPS) @rm -f [email protected] mv -f [email protected] $@ diff --git a/src/mesa/drivers/dri/common/dri_util.c b/src/mesa/drivers/dri/common/dri_util.c index dce84ef0deb..a581c6663f2 100644 --- a/src/mesa/drivers/dri/common/dri_util.c +++ b/src/mesa/drivers/dri/common/dri_util.c @@ -32,6 +32,7 @@ #include "drm_sarea.h" #include "utils.h" #include "xmlpool.h" +#include "../glsl/glsl_parser_extras.h" PUBLIC const char __dri2ConfigOptions[] = DRI_CONF_BEGIN @@ -707,6 +708,8 @@ static void driDestroyScreen(__DRIscreen *psp) * stream open to the X-server anymore. */ + _mesa_destroy_shader_compiler(); + if (psp->DriverAPI.DestroyScreen) (*psp->DriverAPI.DestroyScreen)(psp); @@ -714,6 +717,9 @@ static void driDestroyScreen(__DRIscreen *psp) (void)drmUnmap((drmAddress)psp->pSAREA, SAREA_MAX); (void)drmUnmap((drmAddress)psp->pFB, psp->fbSize); (void)drmCloseOnce(psp->fd); + } else { + driDestroyOptionCache(&psp->optionCache); + driDestroyOptionInfo(&psp->optionInfo); } free(psp); @@ -839,7 +845,6 @@ dri2CreateNewScreen(int scrn, int fd, static const __DRIextension *emptyExtensionList[] = { NULL }; __DRIscreen *psp; drmVersionPtr version; - driOptionCache options; if (driDriverAPI.InitScreen2 == NULL) return NULL; @@ -873,8 +878,10 @@ dri2CreateNewScreen(int scrn, int fd, psp->DriverAPI = driDriverAPI; - driParseOptionInfo(&options, __dri2ConfigOptions, __dri2NConfigOptions); - driParseConfigFiles(&psp->optionCache, &options, psp->myNum, "dri2"); + driParseOptionInfo(&psp->optionInfo, __dri2ConfigOptions, + __dri2NConfigOptions); + driParseConfigFiles(&psp->optionCache, &psp->optionInfo, psp->myNum, + "dri2"); return psp; } diff --git a/src/mesa/drivers/dri/common/dri_util.h b/src/mesa/drivers/dri/common/dri_util.h index bc647ff8130..5096d22cad3 100644 --- a/src/mesa/drivers/dri/common/dri_util.h +++ b/src/mesa/drivers/dri/common/dri_util.h @@ -513,7 +513,11 @@ struct __DRIscreenRec { * * This pointer is never touched by the DRI layer. */ +#ifdef __cplusplus + void *priv; +#else void *private; +#endif /* Extensions provided by the loader. */ const __DRIgetDrawableInfoExtension *getDrawableInfo; @@ -532,6 +536,7 @@ struct __DRIscreenRec { /* The lock actually in use, old sarea or DRI2 */ drmLock *lock; + driOptionCache optionInfo; driOptionCache optionCache; unsigned int api_mask; }; diff --git a/src/mesa/drivers/dri/i915/i830_context.c b/src/mesa/drivers/dri/i915/i830_context.c index d52ea9812f7..8ddce6d82a5 100644 --- a/src/mesa/drivers/dri/i915/i830_context.c +++ b/src/mesa/drivers/dri/i915/i830_context.c @@ -27,7 +27,6 @@ #include "i830_context.h" #include "main/imports.h" -#include "texmem.h" #include "tnl/tnl.h" #include "tnl/t_vertex.h" #include "tnl/t_context.h" diff --git a/src/mesa/drivers/dri/i915/i915_context.c b/src/mesa/drivers/dri/i915/i915_context.c index b3fe1c05d66..d8715cf026d 100644 --- a/src/mesa/drivers/dri/i915/i915_context.c +++ b/src/mesa/drivers/dri/i915/i915_context.c @@ -174,6 +174,8 @@ i915CreateContext(int api, ctx->FragmentProgram._MaintainTexEnvProgram = GL_TRUE; + ctx->Shader.EmitNoIfs = GL_TRUE; + ctx->Const.MaxDrawBuffers = 1; _tnl_init_vertices(ctx, ctx->Const.MaxArrayLockSize + 12, diff --git a/src/mesa/drivers/dri/i915/i915_fragprog.c b/src/mesa/drivers/dri/i915/i915_fragprog.c index f1505dc5e73..4a2e6209d07 100644 --- a/src/mesa/drivers/dri/i915/i915_fragprog.c +++ b/src/mesa/drivers/dri/i915/i915_fragprog.c @@ -143,6 +143,20 @@ src_vector(struct i915_fragment_program *p, } break; + case PROGRAM_OUTPUT: + switch (source->Index) { + case FRAG_RESULT_COLOR: + src = UREG(REG_TYPE_OC, 0); + break; + case FRAG_RESULT_DEPTH: + src = UREG(REG_TYPE_OD, 0); + break; + default: + i915_program_error(p, "Bad source->Index: %d", source->Index); + return 0; + } + break; + /* Various paramters and env values. All emitted to * hardware as program constants. */ @@ -472,6 +486,18 @@ upload_program(struct i915_fragment_program *p) swizzle(tmp, X, X, X, X)); break; + case OPCODE_DP2: + src0 = src_vector(p, &inst->SrcReg[0], program); + src1 = src_vector(p, &inst->SrcReg[1], program); + i915_emit_arith(p, + A0_DP3, + get_result_vector(p, inst), + get_result_flags(inst), 0, + swizzle(src0, X, Y, ZERO, ZERO), + swizzle(src1, X, Y, ZERO, ZERO), + 0); + break; + case OPCODE_DP3: EMIT_2ARG_ARITH(A0_DP3); break; @@ -957,6 +983,41 @@ upload_program(struct i915_fragment_program *p) 0); break; + case OPCODE_SSG: + dst = get_result_vector(p, inst); + flags = get_result_flags(inst); + src0 = src_vector(p, &inst->SrcReg[0], program); + tmp = i915_get_utemp(p); + + /* tmp = (src < 0.0) */ + i915_emit_arith(p, + A0_SLT, + tmp, + flags, 0, + src0, + swizzle(src0, ZERO, ZERO, ZERO, ZERO), + 0); + + /* dst = (0.0 < src) */ + i915_emit_arith(p, + A0_SLT, + dst, + flags, 0, + swizzle(src0, ZERO, ZERO, ZERO, ZERO), + src0, + 0); + + /* dst = (src > 0.0) - (src < 0.0) */ + i915_emit_arith(p, + A0_ADD, + dst, + flags, 0, + dst, + negate(tmp, 1, 1, 1, 1), + 0); + + break; + case OPCODE_SUB: src0 = src_vector(p, &inst->SrcReg[0], program); src1 = src_vector(p, &inst->SrcReg[1], program); diff --git a/src/mesa/drivers/dri/i965/Makefile b/src/mesa/drivers/dri/i965/Makefile index e381a5c714b..bea48e13138 100644 --- a/src/mesa/drivers/dri/i965/Makefile +++ b/src/mesa/drivers/dri/i965/Makefile @@ -104,6 +104,11 @@ C_SOURCES = \ $(COMMON_SOURCES) \ $(DRIVER_SOURCES) +CXX_SOURCES = \ + brw_fs.cpp \ + brw_fs_channel_expressions.cpp \ + brw_fs_vector_splitting.cpp + ASM_SOURCES = DRIVER_DEFINES = -I../intel diff --git a/src/mesa/drivers/dri/i965/brw_context.c b/src/mesa/drivers/dri/i965/brw_context.c index 6d064b822e5..d2b20165f9d 100644 --- a/src/mesa/drivers/dri/i965/brw_context.c +++ b/src/mesa/drivers/dri/i965/brw_context.c @@ -144,7 +144,8 @@ GLboolean brwCreateContext( int api, brw->CMD_VF_STATISTICS = CMD_VF_STATISTICS_GM45; brw->CMD_PIPELINE_SELECT = CMD_PIPELINE_SELECT_GM45; brw->has_surface_tile_offset = GL_TRUE; - brw->has_compr4 = GL_TRUE; + if (intel->gen < 6) + brw->has_compr4 = GL_TRUE; brw->has_aa_line_parameters = GL_TRUE; brw->has_pln = GL_TRUE; } else { @@ -153,7 +154,11 @@ GLboolean brwCreateContext( int api, } /* WM maximum threads is number of EUs times number of threads per EU. */ - if (intel->gen == 5) { + if (intel->gen >= 6) { + brw->urb.size = 1024; + brw->vs_max_threads = 60; + brw->wm_max_threads = 80; + } else if (intel->gen == 5) { brw->urb.size = 1024; brw->vs_max_threads = 72; brw->wm_max_threads = 12 * 6; diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index cc4e6638e8b..703a7de78d1 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -179,6 +179,16 @@ struct brw_fragment_program { GLbitfield tex_units_used; }; +struct brw_shader { + struct gl_shader base; + + /** Shader IR transformed for native compile, at link time. */ + struct exec_list *ir; +}; + +struct brw_shader_program { + struct gl_shader_program base; +}; /* Data about a particular attempt to compile a program. Note that * there can be many of these, each in a different GL state @@ -654,7 +664,13 @@ struct brw_context drm_intel_bo *prog_bo; drm_intel_bo *state_bo; - drm_intel_bo *const_bo; + drm_intel_bo *const_bo; /* pull constant buffer. */ + /** + * This is the push constant BO on gen6. + * + * Pre-gen6, push constants live in the CURBE. + */ + drm_intel_bo *push_const_bo; } wm; @@ -686,7 +702,13 @@ struct brw_context #define BRW_PACKCOLOR8888(r,g,b,a) ((r<<24) | (g<<16) | (b<<8) | a) - +struct brw_instruction_info { + char *name; + int nsrc; + int ndst; + GLboolean is_arith; +}; +extern const struct brw_instruction_info brw_opcodes[128]; /*====================================================================== * brw_vtbl.c diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h index f7a68cead7c..6b8e9e05d08 100644 --- a/src/mesa/drivers/dri/i965/brw_defines.h +++ b/src/mesa/drivers/dri/i965/brw_defines.h @@ -686,6 +686,9 @@ #define BRW_SAMPLER_MESSAGE_SAMPLE_BIAS_GEN5 1 #define BRW_SAMPLER_MESSAGE_SAMPLE_LOD_GEN5 2 #define BRW_SAMPLER_MESSAGE_SAMPLE_COMPARE_GEN5 3 +#define BRW_SAMPLER_MESSAGE_SAMPLE_DERIVS_GEN5 4 +#define BRW_SAMPLER_MESSAGE_SAMPLE_BIAS_COMPARE_GEN5 5 +#define BRW_SAMPLER_MESSAGE_SAMPLE_LOD_COMPARE_GEN5 6 /* for GEN5 only */ #define BRW_SAMPLER_SIMD_MODE_SIMD4X2 0 diff --git a/src/mesa/drivers/dri/i965/brw_disasm.c b/src/mesa/drivers/dri/i965/brw_disasm.c index d2307145361..f74a236834b 100644 --- a/src/mesa/drivers/dri/i965/brw_disasm.c +++ b/src/mesa/drivers/dri/i965/brw_disasm.c @@ -159,6 +159,11 @@ char *saturate[2] = { [1] = ".sat" }; +char *accwr[2] = { + [0] = "", + [1] = "AccWrEnable" +}; + char *exec_size[8] = { [0] = "1", [1] = "2", @@ -206,6 +211,7 @@ char *compr_ctrl[4] = { [0] = "", [1] = "sechalf", [2] = "compr", + [3] = "compr4", }; char *dep_ctrl[4] = { @@ -235,6 +241,16 @@ char *reg_encoding[8] = { [7] = "F" }; +int reg_type_size[8] = { + [0] = 4, + [1] = 4, + [2] = 2, + [3] = 2, + [4] = 1, + [5] = 1, + [7] = 4 +}; + char *imm_encoding[8] = { [0] = "UD", [1] = "D", @@ -423,6 +439,11 @@ static int print_opcode (FILE *file, int id) static int reg (FILE *file, GLuint _reg_file, GLuint _reg_nr) { int err = 0; + + /* Clear the Compr4 instruction compression bit. */ + if (_reg_file == BRW_MESSAGE_REGISTER_FILE) + _reg_nr &= ~(1 << 7); + if (_reg_file == BRW_ARCHITECTURE_REGISTER_FILE) { switch (_reg_nr & 0xf0) { case BRW_ARF_NULL: @@ -476,7 +497,8 @@ static int dest (FILE *file, struct brw_instruction *inst) if (err == -1) return 0; if (inst->bits1.da1.dest_subreg_nr) - format (file, ".%d", inst->bits1.da1.dest_subreg_nr); + format (file, ".%d", inst->bits1.da1.dest_subreg_nr / + reg_type_size[inst->bits1.da1.dest_reg_type]); format (file, "<%d>", inst->bits1.da1.dest_horiz_stride); err |= control (file, "dest reg encoding", reg_encoding, inst->bits1.da1.dest_reg_type, NULL); } @@ -484,7 +506,8 @@ static int dest (FILE *file, struct brw_instruction *inst) { string (file, "g[a0"); if (inst->bits1.ia1.dest_subreg_nr) - format (file, ".%d", inst->bits1.ia1.dest_subreg_nr); + format (file, ".%d", inst->bits1.ia1.dest_subreg_nr / + reg_type_size[inst->bits1.ia1.dest_reg_type]); if (inst->bits1.ia1.dest_indirect_offset) format (file, " %d", inst->bits1.ia1.dest_indirect_offset); string (file, "]"); @@ -500,7 +523,8 @@ static int dest (FILE *file, struct brw_instruction *inst) if (err == -1) return 0; if (inst->bits1.da16.dest_subreg_nr) - format (file, ".%d", inst->bits1.da16.dest_subreg_nr); + format (file, ".%d", inst->bits1.da16.dest_subreg_nr / + reg_type_size[inst->bits1.da16.dest_reg_type]); string (file, "<1>"); err |= control (file, "writemask", writemask, inst->bits1.da16.dest_writemask, NULL); err |= control (file, "dest reg encoding", reg_encoding, inst->bits1.da16.dest_reg_type, NULL); @@ -541,7 +565,7 @@ static int src_da1 (FILE *file, GLuint type, GLuint _reg_file, if (err == -1) return 0; if (sub_reg_num) - format (file, ".%d", sub_reg_num); + format (file, ".%d", sub_reg_num / reg_type_size[type]); /* use formal style like spec */ src_align1_region (file, _vert_stride, _width, _horiz_stride); err |= control (file, "src reg encoding", reg_encoding, type, NULL); return err; @@ -595,11 +619,12 @@ static int src_da16 (FILE *file, if (err == -1) return 0; if (_subreg_nr) - format (file, ".%d", _subreg_nr); + /* bit4 for subreg number byte addressing. Make this same meaning as + in da1 case, so output looks consistent. */ + format (file, ".%d", 16 / reg_type_size[_reg_type]); string (file, "<"); err |= control (file, "vert stride", vert_stride, _vert_stride, NULL); string (file, ",4,1>"); - err |= control (file, "src da16 reg type", reg_encoding, _reg_type, NULL); /* * Three kinds of swizzle display: * identity - nothing printed @@ -863,12 +888,25 @@ int brw_disasm (FILE *file, struct brw_instruction *inst, int gen) inst->bits3.math.precision, &space); break; case BRW_MESSAGE_TARGET_SAMPLER: - format (file, " (%d, %d, ", - inst->bits3.sampler.binding_table_index, - inst->bits3.sampler.sampler); - err |= control (file, "sampler target format", sampler_target_format, - inst->bits3.sampler.return_format, NULL); - string (file, ")"); + if (gen >= 5) { + format (file, " (%d, %d, %d, %d)", + inst->bits3.sampler_gen5.binding_table_index, + inst->bits3.sampler_gen5.sampler, + inst->bits3.sampler_gen5.msg_type, + inst->bits3.sampler_gen5.simd_mode); + } else if (0 /* FINISHME: is_g4x */) { + format (file, " (%d, %d)", + inst->bits3.sampler_g4x.binding_table_index, + inst->bits3.sampler_g4x.sampler); + } else { + format (file, " (%d, %d, ", + inst->bits3.sampler.binding_table_index, + inst->bits3.sampler.sampler); + err |= control (file, "sampler target format", + sampler_target_format, + inst->bits3.sampler.return_format, NULL); + string (file, ")"); + } break; case BRW_MESSAGE_TARGET_DATAPORT_READ: if (gen >= 6) { @@ -929,6 +967,11 @@ int brw_disasm (FILE *file, struct brw_instruction *inst, int gen) inst->bits3.urb.used, &space); err |= control (file, "urb complete", urb_complete, inst->bits3.urb.complete, &space); + if (gen >= 5) { + format (file, " mlen %d, rlen %d\n", + inst->bits3.urb_gen5.msg_length, + inst->bits3.urb_gen5.response_length); + } break; case BRW_MESSAGE_TARGET_THREAD_SPAWNER: break; @@ -957,8 +1000,19 @@ int brw_disasm (FILE *file, struct brw_instruction *inst, int gen) err |= control(file, "access mode", access_mode, inst->header.access_mode, &space); err |= control (file, "mask control", mask_ctrl, inst->header.mask_control, &space); err |= control (file, "dependency control", dep_ctrl, inst->header.dependency_control, &space); - err |= control (file, "compression control", compr_ctrl, inst->header.compression_control, &space); + + if (inst->header.compression_control == BRW_COMPRESSION_COMPRESSED && + opcode[inst->header.opcode].ndst > 0 && + inst->bits1.da1.dest_reg_file == BRW_MESSAGE_REGISTER_FILE && + inst->bits1.da1.dest_reg_nr & (1 << 7)) { + format (file, " compr4"); + } else { + err |= control (file, "compression control", compr_ctrl, + inst->header.compression_control, &space); + } err |= control (file, "thread control", thread_ctrl, inst->header.thread_control, &space); + if (gen >= 6) + err |= control (file, "acc write control", accwr, inst->header.acc_wr_control, &space); if (inst->header.opcode == BRW_OPCODE_SEND) err |= control (file, "end of thread", end_of_thread, inst->bits3.generic.end_of_thread, &space); diff --git a/src/mesa/drivers/dri/i965/brw_draw_upload.c b/src/mesa/drivers/dri/i965/brw_draw_upload.c index f07aab86e90..249e874ab1a 100644 --- a/src/mesa/drivers/dri/i965/brw_draw_upload.c +++ b/src/mesa/drivers/dri/i965/brw_draw_upload.c @@ -476,7 +476,7 @@ static void brw_emit_vertices(struct brw_context *brw) if (brw->vb.nr_enabled == 0) { BEGIN_BATCH(3); OUT_BATCH((CMD_VERTEX_ELEMENT << 16) | 1); - if (IS_GEN6(intel->intelScreen->deviceID)) { + if (intel->gen >= 6) { OUT_BATCH((0 << GEN6_VE0_INDEX_SHIFT) | GEN6_VE0_VALID | (BRW_SURFACEFORMAT_R32G32B32A32_FLOAT << BRW_VE0_FORMAT_SHIFT) | @@ -553,7 +553,7 @@ static void brw_emit_vertices(struct brw_context *brw) break; } - if (IS_GEN6(intel->intelScreen->deviceID)) { + if (intel->gen >= 6) { OUT_BATCH((i << GEN6_VE0_INDEX_SHIFT) | GEN6_VE0_VALID | (format << BRW_VE0_FORMAT_SHIFT) | diff --git a/src/mesa/drivers/dri/i965/brw_eu.c b/src/mesa/drivers/dri/i965/brw_eu.c index 4e7c1226ad4..2ff39e8e64a 100644 --- a/src/mesa/drivers/dri/i965/brw_eu.c +++ b/src/mesa/drivers/dri/i965/brw_eu.c @@ -85,6 +85,12 @@ void brw_set_saturate( struct brw_compile *p, GLuint value ) p->current->header.saturate = value; } +void brw_set_acc_write_control(struct brw_compile *p, GLuint value) +{ + if (p->brw->intel.gen >= 6) + p->current->header.acc_wr_control = value; +} + void brw_push_insn_state( struct brw_compile *p ) { assert(p->current != &p->stack[BRW_EU_MAX_INSN_STACK-1]); diff --git a/src/mesa/drivers/dri/i965/brw_eu.h b/src/mesa/drivers/dri/i965/brw_eu.h index ffdddd0a388..c63db164609 100644 --- a/src/mesa/drivers/dri/i965/brw_eu.h +++ b/src/mesa/drivers/dri/i965/brw_eu.h @@ -633,6 +633,8 @@ static INLINE struct brw_reg brw_swizzle( struct brw_reg reg, GLuint z, GLuint w) { + assert(reg.file != BRW_IMMEDIATE_VALUE); + reg.dw1.bits.swizzle = BRW_SWIZZLE4(BRW_GET_SWZ(reg.dw1.bits.swizzle, x), BRW_GET_SWZ(reg.dw1.bits.swizzle, y), BRW_GET_SWZ(reg.dw1.bits.swizzle, z), @@ -650,6 +652,7 @@ static INLINE struct brw_reg brw_swizzle1( struct brw_reg reg, static INLINE struct brw_reg brw_writemask( struct brw_reg reg, GLuint mask ) { + assert(reg.file != BRW_IMMEDIATE_VALUE); reg.dw1.bits.writemask &= mask; return reg; } @@ -657,6 +660,7 @@ static INLINE struct brw_reg brw_writemask( struct brw_reg reg, static INLINE struct brw_reg brw_set_writemask( struct brw_reg reg, GLuint mask ) { + assert(reg.file != BRW_IMMEDIATE_VALUE); reg.dw1.bits.writemask = mask; return reg; } @@ -766,6 +770,7 @@ void brw_set_compression_control( struct brw_compile *p, GLboolean control ); void brw_set_predicate_control_flag_value( struct brw_compile *p, GLuint value ); void brw_set_predicate_control( struct brw_compile *p, GLuint pc ); void brw_set_conditionalmod( struct brw_compile *p, GLuint conditional ); +void brw_set_acc_write_control(struct brw_compile *p, GLuint value); void brw_init_compile( struct brw_context *, struct brw_compile *p ); const GLuint *brw_get_program( struct brw_compile *p, GLuint *sz ); @@ -840,6 +845,7 @@ void brw_ff_sync(struct brw_compile *p, GLboolean eot); void brw_fb_WRITE(struct brw_compile *p, + int dispatch_width, struct brw_reg dest, GLuint msg_reg_nr, struct brw_reg src0, @@ -925,8 +931,8 @@ struct brw_instruction *brw_DO(struct brw_compile *p, struct brw_instruction *brw_WHILE(struct brw_compile *p, struct brw_instruction *patch_insn); -struct brw_instruction *brw_BREAK(struct brw_compile *p); -struct brw_instruction *brw_CONT(struct brw_compile *p); +struct brw_instruction *brw_BREAK(struct brw_compile *p, int pop_count); +struct brw_instruction *brw_CONT(struct brw_compile *p, int pop_count); /* Forward jumps: */ void brw_land_fwd_jump(struct brw_compile *p, diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c b/src/mesa/drivers/dri/i965/brw_eu_emit.c index 0d5d17f501d..0906150613b 100644 --- a/src/mesa/drivers/dri/i965/brw_eu_emit.c +++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c @@ -75,6 +75,8 @@ static void brw_set_dest( struct brw_instruction *insn, else { insn->bits1.da16.dest_subreg_nr = dest.subnr / 16; insn->bits1.da16.dest_writemask = dest.dw1.bits.writemask; + /* even ignored in da16, still need to set as '01' */ + insn->bits1.da16.dest_horiz_stride = 1; } } else { @@ -90,6 +92,8 @@ static void brw_set_dest( struct brw_instruction *insn, } else { insn->bits1.ia16.dest_indirect_offset = dest.dw1.bits.indirect_offset; + /* even ignored in da16, still need to set as '01' */ + insn->bits1.ia16.dest_horiz_stride = 1; } } @@ -368,9 +372,23 @@ static void brw_set_dp_write_message( struct brw_context *brw, GLuint send_commit_msg) { struct intel_context *intel = &brw->intel; - brw_set_src1(insn, brw_imm_d(0)); + brw_set_src1(insn, brw_imm_ud(0)); - if (intel->gen == 5) { + if (intel->gen >= 6) { + insn->bits3.dp_render_cache.binding_table_index = binding_table_index; + insn->bits3.dp_render_cache.msg_control = msg_control; + insn->bits3.dp_render_cache.pixel_scoreboard_clear = pixel_scoreboard_clear; + insn->bits3.dp_render_cache.msg_type = msg_type; + insn->bits3.dp_render_cache.send_commit_msg = send_commit_msg; + insn->bits3.dp_render_cache.header_present = 0; /* XXX */ + insn->bits3.dp_render_cache.response_length = response_length; + insn->bits3.dp_render_cache.msg_length = msg_length; + insn->bits3.dp_render_cache.end_of_thread = end_of_thread; + insn->header.destreg__conditionalmod = BRW_MESSAGE_TARGET_DATAPORT_WRITE; + /* XXX really need below? */ + insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_DATAPORT_WRITE; + insn->bits2.send_gen5.end_of_thread = end_of_thread; + } else if (intel->gen == 5) { insn->bits3.dp_write_gen5.binding_table_index = binding_table_index; insn->bits3.dp_write_gen5.msg_control = msg_control; insn->bits3.dp_write_gen5.pixel_scoreboard_clear = pixel_scoreboard_clear; @@ -759,7 +777,7 @@ void brw_ENDIF(struct brw_compile *p, } } -struct brw_instruction *brw_BREAK(struct brw_compile *p) +struct brw_instruction *brw_BREAK(struct brw_compile *p, int pop_count) { struct brw_instruction *insn; insn = next_insn(p, BRW_OPCODE_BREAK); @@ -770,10 +788,11 @@ struct brw_instruction *brw_BREAK(struct brw_compile *p) insn->header.execution_size = BRW_EXECUTE_8; /* insn->header.mask_control = BRW_MASK_DISABLE; */ insn->bits3.if_else.pad0 = 0; + insn->bits3.if_else.pop_count = pop_count; return insn; } -struct brw_instruction *brw_CONT(struct brw_compile *p) +struct brw_instruction *brw_CONT(struct brw_compile *p, int pop_count) { struct brw_instruction *insn; insn = next_insn(p, BRW_OPCODE_CONTINUE); @@ -784,6 +803,7 @@ struct brw_instruction *brw_CONT(struct brw_compile *p) insn->header.execution_size = BRW_EXECUTE_8; /* insn->header.mask_control = BRW_MASK_DISABLE; */ insn->bits3.if_else.pad0 = 0; + insn->bits3.if_else.pop_count = pop_count; return insn; } @@ -1332,6 +1352,7 @@ void brw_dp_READ_4_vs_relative(struct brw_compile *p, void brw_fb_WRITE(struct brw_compile *p, + int dispatch_width, struct brw_reg dest, GLuint msg_reg_nr, struct brw_reg src0, @@ -1340,22 +1361,40 @@ void brw_fb_WRITE(struct brw_compile *p, GLuint response_length, GLboolean eot) { - struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND); - + struct intel_context *intel = &p->brw->intel; + struct brw_instruction *insn; + GLuint msg_control, msg_type; + + insn = next_insn(p, BRW_OPCODE_SEND); insn->header.predicate_control = 0; /* XXX */ - insn->header.compression_control = BRW_COMPRESSION_NONE; - insn->header.destreg__conditionalmod = msg_reg_nr; - + insn->header.compression_control = BRW_COMPRESSION_NONE; + + if (intel->gen >= 6) { + /* headerless version, just submit color payload */ + src0 = brw_message_reg(msg_reg_nr); + + msg_type = BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE_GEN6; + } else { + insn->header.destreg__conditionalmod = msg_reg_nr; + + msg_type = BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE; + } + + if (dispatch_width == 16) + msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE; + else + msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_SINGLE_SOURCE_SUBSPAN01; + brw_set_dest(insn, dest); brw_set_src0(insn, src0); brw_set_dp_write_message(p->brw, insn, binding_table_index, - BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE, /* msg_control */ - BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE, /* msg_type */ + msg_control, + msg_type, msg_length, 1, /* pixel scoreboard */ - response_length, + response_length, eot, 0 /* send_commit_msg */); } diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp new file mode 100644 index 00000000000..34c5d5262fb --- /dev/null +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -0,0 +1,1924 @@ +/* + * Copyright © 2010 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Eric Anholt <[email protected]> + * + */ + +extern "C" { + +#include <sys/types.h> + +#include "main/macros.h" +#include "main/shaderobj.h" +#include "program/prog_parameter.h" +#include "program/prog_print.h" +#include "program/prog_optimize.h" +#include "program/hash_table.h" +#include "brw_context.h" +#include "brw_eu.h" +#include "brw_wm.h" +#include "talloc.h" +} +#include "../glsl/glsl_types.h" +#include "../glsl/ir_optimization.h" +#include "../glsl/ir_print_visitor.h" + +enum register_file { + ARF = BRW_ARCHITECTURE_REGISTER_FILE, + GRF = BRW_GENERAL_REGISTER_FILE, + MRF = BRW_MESSAGE_REGISTER_FILE, + IMM = BRW_IMMEDIATE_VALUE, + FIXED_HW_REG, /* a struct brw_reg */ + UNIFORM, /* prog_data->params[hw_reg] */ + BAD_FILE +}; + +enum fs_opcodes { + FS_OPCODE_FB_WRITE = 256, + FS_OPCODE_RCP, + FS_OPCODE_RSQ, + FS_OPCODE_SQRT, + FS_OPCODE_EXP2, + FS_OPCODE_LOG2, + FS_OPCODE_POW, + FS_OPCODE_SIN, + FS_OPCODE_COS, + FS_OPCODE_DDX, + FS_OPCODE_DDY, + FS_OPCODE_LINTERP, + FS_OPCODE_TEX, + FS_OPCODE_TXB, + FS_OPCODE_TXL, + FS_OPCODE_DISCARD, +}; + +static int using_new_fs = -1; + +struct gl_shader * +brw_new_shader(GLcontext *ctx, GLuint name, GLuint type) +{ + struct brw_shader *shader; + + shader = talloc_zero(NULL, struct brw_shader); + if (shader) { + shader->base.Type = type; + shader->base.Name = name; + _mesa_init_shader(ctx, &shader->base); + } + + return &shader->base; +} + +struct gl_shader_program * +brw_new_shader_program(GLcontext *ctx, GLuint name) +{ + struct brw_shader_program *prog; + prog = talloc_zero(NULL, struct brw_shader_program); + if (prog) { + prog->base.Name = name; + _mesa_init_shader_program(ctx, &prog->base); + } + return &prog->base; +} + +GLboolean +brw_compile_shader(GLcontext *ctx, struct gl_shader *shader) +{ + if (!_mesa_ir_compile_shader(ctx, shader)) + return GL_FALSE; + + return GL_TRUE; +} + +GLboolean +brw_link_shader(GLcontext *ctx, struct gl_shader_program *prog) +{ + if (using_new_fs == -1) + using_new_fs = getenv("INTEL_NEW_FS") != NULL; + + for (unsigned i = 0; i < prog->_NumLinkedShaders; i++) { + struct brw_shader *shader = (struct brw_shader *)prog->_LinkedShaders[i]; + + if (using_new_fs && shader->base.Type == GL_FRAGMENT_SHADER) { + void *mem_ctx = talloc_new(NULL); + bool progress; + + if (shader->ir) + talloc_free(shader->ir); + shader->ir = new(shader) exec_list; + clone_ir_list(mem_ctx, shader->ir, shader->base.ir); + + do_mat_op_to_vec(shader->ir); + do_mod_to_fract(shader->ir); + do_div_to_mul_rcp(shader->ir); + do_sub_to_add_neg(shader->ir); + do_explog_to_explog2(shader->ir); + + brw_do_channel_expressions(shader->ir); + brw_do_vector_splitting(shader->ir); + + do { + progress = false; + + progress = do_common_optimization(shader->ir, true) || progress; + } while (progress); + + validate_ir_tree(shader->ir); + + reparent_ir(shader->ir, shader->ir); + talloc_free(mem_ctx); + } + } + + if (!_mesa_ir_link_shader(ctx, prog)) + return GL_FALSE; + + return GL_TRUE; +} + +static int +type_size(const struct glsl_type *type) +{ + unsigned int size, i; + + switch (type->base_type) { + case GLSL_TYPE_UINT: + case GLSL_TYPE_INT: + case GLSL_TYPE_FLOAT: + case GLSL_TYPE_BOOL: + return type->components(); + case GLSL_TYPE_ARRAY: + /* FINISHME: uniform/varying arrays. */ + return type_size(type->fields.array) * type->length; + case GLSL_TYPE_STRUCT: + size = 0; + for (i = 0; i < type->length; i++) { + size += type_size(type->fields.structure[i].type); + } + return size; + case GLSL_TYPE_SAMPLER: + /* Samplers take up no register space, since they're baked in at + * link time. + */ + return 0; + default: + assert(!"not reached"); + return 0; + } +} + +class fs_reg { +public: + /* Callers of this talloc-based new need not call delete. It's + * easier to just talloc_free 'ctx' (or any of its ancestors). */ + static void* operator new(size_t size, void *ctx) + { + void *node; + + node = talloc_size(ctx, size); + assert(node != NULL); + + return node; + } + + /** Generic unset register constructor. */ + fs_reg() + { + this->file = BAD_FILE; + this->reg = 0; + this->reg_offset = 0; + this->hw_reg = -1; + this->negate = 0; + this->abs = 0; + } + + /** Immediate value constructor. */ + fs_reg(float f) + { + this->file = IMM; + this->reg = 0; + this->hw_reg = 0; + this->type = BRW_REGISTER_TYPE_F; + this->imm.f = f; + this->negate = 0; + this->abs = 0; + } + + /** Immediate value constructor. */ + fs_reg(int32_t i) + { + this->file = IMM; + this->reg = 0; + this->hw_reg = 0; + this->type = BRW_REGISTER_TYPE_D; + this->imm.i = i; + this->negate = 0; + this->abs = 0; + } + + /** Immediate value constructor. */ + fs_reg(uint32_t u) + { + this->file = IMM; + this->reg = 0; + this->hw_reg = 0; + this->type = BRW_REGISTER_TYPE_UD; + this->imm.u = u; + this->negate = 0; + this->abs = 0; + } + + /** Fixed brw_reg Immediate value constructor. */ + fs_reg(struct brw_reg fixed_hw_reg) + { + this->file = FIXED_HW_REG; + this->fixed_hw_reg = fixed_hw_reg; + this->reg = 0; + this->hw_reg = 0; + this->type = fixed_hw_reg.type; + this->negate = 0; + this->abs = 0; + } + + fs_reg(enum register_file file, int hw_reg); + fs_reg(class fs_visitor *v, const struct glsl_type *type); + + /** Register file: ARF, GRF, MRF, IMM. */ + enum register_file file; + /** Abstract register number. 0 = fixed hw reg */ + int reg; + /** Offset within the abstract register. */ + int reg_offset; + /** HW register number. Generally unset until register allocation. */ + int hw_reg; + /** Register type. BRW_REGISTER_TYPE_* */ + int type; + bool negate; + bool abs; + struct brw_reg fixed_hw_reg; + + /** Value for file == BRW_IMMMEDIATE_FILE */ + union { + int32_t i; + uint32_t u; + float f; + } imm; +}; + +static const fs_reg reg_undef; +static const fs_reg reg_null(ARF, BRW_ARF_NULL); + +class fs_inst : public exec_node { +public: + /* Callers of this talloc-based new need not call delete. It's + * easier to just talloc_free 'ctx' (or any of its ancestors). */ + static void* operator new(size_t size, void *ctx) + { + void *node; + + node = talloc_zero_size(ctx, size); + assert(node != NULL); + + return node; + } + + void init() + { + this->opcode = BRW_OPCODE_NOP; + this->saturate = false; + this->conditional_mod = BRW_CONDITIONAL_NONE; + this->predicated = false; + this->sampler = 0; + this->shadow_compare = false; + } + + fs_inst() + { + init(); + } + + fs_inst(int opcode) + { + init(); + this->opcode = opcode; + } + + fs_inst(int opcode, fs_reg dst, fs_reg src0) + { + init(); + this->opcode = opcode; + this->dst = dst; + this->src[0] = src0; + } + + fs_inst(int opcode, fs_reg dst, fs_reg src0, fs_reg src1) + { + init(); + this->opcode = opcode; + this->dst = dst; + this->src[0] = src0; + this->src[1] = src1; + } + + fs_inst(int opcode, fs_reg dst, fs_reg src0, fs_reg src1, fs_reg src2) + { + init(); + this->opcode = opcode; + this->dst = dst; + this->src[0] = src0; + this->src[1] = src1; + this->src[2] = src2; + } + + int opcode; /* BRW_OPCODE_* or FS_OPCODE_* */ + fs_reg dst; + fs_reg src[3]; + bool saturate; + bool predicated; + int conditional_mod; /**< BRW_CONDITIONAL_* */ + + int mlen; /** SEND message length */ + int sampler; + bool shadow_compare; + + /** @{ + * Annotation for the generated IR. One of the two can be set. + */ + ir_instruction *ir; + const char *annotation; + /** @} */ +}; + +class fs_visitor : public ir_visitor +{ +public: + + fs_visitor(struct brw_wm_compile *c, struct brw_shader *shader) + { + this->c = c; + this->p = &c->func; + this->brw = p->brw; + this->intel = &brw->intel; + this->ctx = &intel->ctx; + this->mem_ctx = talloc_new(NULL); + this->shader = shader; + this->fail = false; + this->next_abstract_grf = 1; + this->variable_ht = hash_table_ctor(0, + hash_table_pointer_hash, + hash_table_pointer_compare); + + this->frag_color = NULL; + this->frag_data = NULL; + this->frag_depth = NULL; + this->first_non_payload_grf = 0; + + this->current_annotation = NULL; + this->annotation_string = NULL; + this->annotation_ir = NULL; + } + ~fs_visitor() + { + talloc_free(this->mem_ctx); + hash_table_dtor(this->variable_ht); + } + + fs_reg *variable_storage(ir_variable *var); + + void visit(ir_variable *ir); + void visit(ir_assignment *ir); + void visit(ir_dereference_variable *ir); + void visit(ir_dereference_record *ir); + void visit(ir_dereference_array *ir); + void visit(ir_expression *ir); + void visit(ir_texture *ir); + void visit(ir_if *ir); + void visit(ir_constant *ir); + void visit(ir_swizzle *ir); + void visit(ir_return *ir); + void visit(ir_loop *ir); + void visit(ir_loop_jump *ir); + void visit(ir_discard *ir); + void visit(ir_call *ir); + void visit(ir_function *ir); + void visit(ir_function_signature *ir); + + fs_inst *emit(fs_inst inst); + void assign_curb_setup(); + void assign_urb_setup(); + void assign_regs(); + void generate_code(); + void generate_fb_write(fs_inst *inst); + void generate_linterp(fs_inst *inst, struct brw_reg dst, + struct brw_reg *src); + void generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src); + void generate_math(fs_inst *inst, struct brw_reg dst, struct brw_reg *src); + void generate_discard(fs_inst *inst); + + void emit_dummy_fs(); + void emit_interpolation(); + void emit_pinterp(int location); + void emit_fb_writes(); + + struct brw_reg interp_reg(int location, int channel); + + struct brw_context *brw; + struct intel_context *intel; + GLcontext *ctx; + struct brw_wm_compile *c; + struct brw_compile *p; + struct brw_shader *shader; + void *mem_ctx; + exec_list instructions; + int next_abstract_grf; + struct hash_table *variable_ht; + ir_variable *frag_color, *frag_data, *frag_depth; + int first_non_payload_grf; + + /** @{ debug annotation info */ + const char *current_annotation; + ir_instruction *base_ir; + const char **annotation_string; + ir_instruction **annotation_ir; + /** @} */ + + bool fail; + + /* Result of last visit() method. */ + fs_reg result; + + fs_reg pixel_x; + fs_reg pixel_y; + fs_reg pixel_w; + fs_reg delta_x; + fs_reg delta_y; + fs_reg interp_attrs[64]; + + int grf_used; + +}; + +/** Fixed HW reg constructor. */ +fs_reg::fs_reg(enum register_file file, int hw_reg) +{ + this->file = file; + this->reg = 0; + this->reg_offset = 0; + this->hw_reg = hw_reg; + this->type = BRW_REGISTER_TYPE_F; + this->negate = 0; + this->abs = 0; +} + +/** Automatic reg constructor. */ +fs_reg::fs_reg(class fs_visitor *v, const struct glsl_type *type) +{ + this->file = GRF; + this->reg = v->next_abstract_grf; + this->reg_offset = 0; + v->next_abstract_grf += type_size(type); + this->hw_reg = -1; + this->negate = 0; + this->abs = 0; + + switch (type->base_type) { + case GLSL_TYPE_FLOAT: + this->type = BRW_REGISTER_TYPE_F; + break; + case GLSL_TYPE_INT: + case GLSL_TYPE_BOOL: + this->type = BRW_REGISTER_TYPE_D; + break; + case GLSL_TYPE_UINT: + this->type = BRW_REGISTER_TYPE_UD; + break; + default: + assert(!"not reached"); + this->type = BRW_REGISTER_TYPE_F; + break; + } +} + +fs_reg * +fs_visitor::variable_storage(ir_variable *var) +{ + return (fs_reg *)hash_table_find(this->variable_ht, var); +} + +void +fs_visitor::visit(ir_variable *ir) +{ + fs_reg *reg = NULL; + + if (strcmp(ir->name, "gl_FragColor") == 0) { + this->frag_color = ir; + } else if (strcmp(ir->name, "gl_FragData") == 0) { + this->frag_data = ir; + } else if (strcmp(ir->name, "gl_FragDepth") == 0) { + this->frag_depth = ir; + assert(!"FINISHME: this hangs currently."); + } + + if (ir->mode == ir_var_in) { + reg = &this->interp_attrs[ir->location]; + } + + if (ir->mode == ir_var_uniform) { + const float *vec_values; + int param_index = c->prog_data.nr_params; + + /* FINISHME: This is wildly incomplete. */ + assert(ir->type->is_scalar() || ir->type->is_vector() || + ir->type->is_sampler()); + + const struct gl_program *fp = &this->brw->fragment_program->Base; + /* Our support for uniforms is piggy-backed on the struct + * gl_fragment_program, because that's where the values actually + * get stored, rather than in some global gl_shader_program uniform + * store. + */ + vec_values = fp->Parameters->ParameterValues[ir->location]; + for (unsigned int i = 0; i < ir->type->vector_elements; i++) { + c->prog_data.param[c->prog_data.nr_params++] = &vec_values[i]; + } + + reg = new(this->mem_ctx) fs_reg(UNIFORM, param_index); + } + + if (!reg) + reg = new(this->mem_ctx) fs_reg(this, ir->type); + + hash_table_insert(this->variable_ht, reg, ir); +} + +void +fs_visitor::visit(ir_dereference_variable *ir) +{ + fs_reg *reg = variable_storage(ir->var); + this->result = *reg; +} + +void +fs_visitor::visit(ir_dereference_record *ir) +{ + assert(!"FINISHME"); +} + +void +fs_visitor::visit(ir_dereference_array *ir) +{ + ir_constant *index; + int element_size; + + ir->array->accept(this); + index = ir->array_index->as_constant(); + + if (ir->type->is_matrix()) { + element_size = ir->type->vector_elements; + } else { + element_size = type_size(ir->type); + } + + if (index) { + assert(this->result.file == UNIFORM || + (this->result.file == GRF && + this->result.reg != 0)); + this->result.reg_offset += index->value.i[0] * element_size; + } else { + assert(!"FINISHME: non-constant matrix column"); + } +} + +void +fs_visitor::visit(ir_expression *ir) +{ + unsigned int operand; + fs_reg op[2], temp; + fs_reg result; + fs_inst *inst; + + for (operand = 0; operand < ir->get_num_operands(); operand++) { + ir->operands[operand]->accept(this); + if (this->result.file == BAD_FILE) { + ir_print_visitor v; + printf("Failed to get tree for expression operand:\n"); + ir->operands[operand]->accept(&v); + this->fail = true; + } + op[operand] = this->result; + + /* Matrix expression operands should have been broken down to vector + * operations already. + */ + assert(!ir->operands[operand]->type->is_matrix()); + /* And then those vector operands should have been broken down to scalar. + */ + assert(!ir->operands[operand]->type->is_vector()); + } + + /* Storage for our result. If our result goes into an assignment, it will + * just get copy-propagated out, so no worries. + */ + this->result = fs_reg(this, ir->type); + + switch (ir->operation) { + case ir_unop_logic_not: + emit(fs_inst(BRW_OPCODE_ADD, this->result, op[0], fs_reg(-1))); + break; + case ir_unop_neg: + op[0].negate = ~op[0].negate; + this->result = op[0]; + break; + case ir_unop_abs: + op[0].abs = true; + this->result = op[0]; + break; + case ir_unop_sign: + temp = fs_reg(this, ir->type); + + emit(fs_inst(BRW_OPCODE_MOV, this->result, fs_reg(0.0f))); + + inst = emit(fs_inst(BRW_OPCODE_CMP, reg_null, op[0], fs_reg(0.0f))); + inst->conditional_mod = BRW_CONDITIONAL_G; + inst = emit(fs_inst(BRW_OPCODE_MOV, this->result, fs_reg(1.0f))); + inst->predicated = true; + + inst = emit(fs_inst(BRW_OPCODE_CMP, reg_null, op[0], fs_reg(0.0f))); + inst->conditional_mod = BRW_CONDITIONAL_L; + inst = emit(fs_inst(BRW_OPCODE_MOV, this->result, fs_reg(-1.0f))); + inst->predicated = true; + + break; + case ir_unop_rcp: + emit(fs_inst(FS_OPCODE_RCP, this->result, op[0])); + break; + + case ir_unop_exp2: + emit(fs_inst(FS_OPCODE_EXP2, this->result, op[0])); + break; + case ir_unop_log2: + emit(fs_inst(FS_OPCODE_LOG2, this->result, op[0])); + break; + case ir_unop_exp: + case ir_unop_log: + assert(!"not reached: should be handled by ir_explog_to_explog2"); + break; + case ir_unop_sin: + emit(fs_inst(FS_OPCODE_SIN, this->result, op[0])); + break; + case ir_unop_cos: + emit(fs_inst(FS_OPCODE_COS, this->result, op[0])); + break; + + case ir_unop_dFdx: + emit(fs_inst(FS_OPCODE_DDX, this->result, op[0])); + break; + case ir_unop_dFdy: + emit(fs_inst(FS_OPCODE_DDY, this->result, op[0])); + break; + + case ir_binop_add: + emit(fs_inst(BRW_OPCODE_ADD, this->result, op[0], op[1])); + break; + case ir_binop_sub: + assert(!"not reached: should be handled by ir_sub_to_add_neg"); + break; + + case ir_binop_mul: + emit(fs_inst(BRW_OPCODE_MUL, this->result, op[0], op[1])); + break; + case ir_binop_div: + assert(!"not reached: should be handled by ir_div_to_mul_rcp"); + break; + case ir_binop_mod: + assert(!"ir_binop_mod should have been converted to b * fract(a/b)"); + break; + + case ir_binop_less: + inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1])); + inst->conditional_mod = BRW_CONDITIONAL_L; + emit(fs_inst(BRW_OPCODE_AND, this->result, this->result, fs_reg(0x1))); + break; + case ir_binop_greater: + inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1])); + inst->conditional_mod = BRW_CONDITIONAL_G; + emit(fs_inst(BRW_OPCODE_AND, this->result, this->result, fs_reg(0x1))); + break; + case ir_binop_lequal: + inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1])); + inst->conditional_mod = BRW_CONDITIONAL_LE; + emit(fs_inst(BRW_OPCODE_AND, this->result, this->result, fs_reg(0x1))); + break; + case ir_binop_gequal: + inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1])); + inst->conditional_mod = BRW_CONDITIONAL_GE; + emit(fs_inst(BRW_OPCODE_AND, this->result, this->result, fs_reg(0x1))); + break; + case ir_binop_equal: + inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1])); + inst->conditional_mod = BRW_CONDITIONAL_Z; + emit(fs_inst(BRW_OPCODE_AND, this->result, this->result, fs_reg(0x1))); + break; + case ir_binop_nequal: + inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1])); + inst->conditional_mod = BRW_CONDITIONAL_NZ; + emit(fs_inst(BRW_OPCODE_AND, this->result, this->result, fs_reg(0x1))); + break; + + case ir_binop_logic_xor: + emit(fs_inst(BRW_OPCODE_XOR, this->result, op[0], op[1])); + break; + + case ir_binop_logic_or: + emit(fs_inst(BRW_OPCODE_OR, this->result, op[0], op[1])); + break; + + case ir_binop_logic_and: + emit(fs_inst(BRW_OPCODE_AND, this->result, op[0], op[1])); + break; + + case ir_binop_dot: + case ir_binop_cross: + case ir_unop_any: + assert(!"not reached: should be handled by brw_channel_expressions"); + break; + + case ir_unop_sqrt: + emit(fs_inst(FS_OPCODE_SQRT, this->result, op[0])); + break; + + case ir_unop_rsq: + emit(fs_inst(FS_OPCODE_RSQ, this->result, op[0])); + break; + + case ir_unop_i2f: + case ir_unop_b2f: + case ir_unop_b2i: + emit(fs_inst(BRW_OPCODE_MOV, this->result, op[0])); + break; + case ir_unop_f2i: + emit(fs_inst(BRW_OPCODE_RNDZ, this->result, op[0])); + break; + case ir_unop_f2b: + case ir_unop_i2b: + inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], fs_reg(0.0f))); + inst->conditional_mod = BRW_CONDITIONAL_NZ; + + case ir_unop_trunc: + emit(fs_inst(BRW_OPCODE_RNDD, this->result, op[0])); + break; + case ir_unop_ceil: + op[0].negate = ~op[0].negate; + inst = emit(fs_inst(BRW_OPCODE_RNDD, this->result, op[0])); + this->result.negate = true; + break; + case ir_unop_floor: + inst = emit(fs_inst(BRW_OPCODE_RNDD, this->result, op[0])); + break; + case ir_unop_fract: + inst = emit(fs_inst(BRW_OPCODE_FRC, this->result, op[0])); + break; + + case ir_binop_min: + inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1])); + inst->conditional_mod = BRW_CONDITIONAL_L; + + inst = emit(fs_inst(BRW_OPCODE_SEL, this->result, op[0], op[1])); + inst->predicated = true; + break; + case ir_binop_max: + inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1])); + inst->conditional_mod = BRW_CONDITIONAL_G; + + inst = emit(fs_inst(BRW_OPCODE_SEL, this->result, op[0], op[1])); + inst->predicated = true; + break; + + case ir_binop_pow: + inst = emit(fs_inst(FS_OPCODE_POW, this->result, op[0], op[1])); + break; + + case ir_unop_bit_not: + case ir_unop_u2f: + case ir_binop_lshift: + case ir_binop_rshift: + case ir_binop_bit_and: + case ir_binop_bit_xor: + case ir_binop_bit_or: + assert(!"GLSL 1.30 features unsupported"); + break; + } +} + +void +fs_visitor::visit(ir_assignment *ir) +{ + struct fs_reg l, r; + int i; + int write_mask; + fs_inst *inst; + + /* FINISHME: arrays on the lhs */ + ir->lhs->accept(this); + l = this->result; + + ir->rhs->accept(this); + r = this->result; + + /* FINISHME: This should really set to the correct maximal writemask for each + * FINISHME: component written (in the loops below). This case can only + * FINISHME: occur for matrices, arrays, and structures. + */ + if (ir->write_mask == 0) { + assert(!ir->lhs->type->is_scalar() && !ir->lhs->type->is_vector()); + write_mask = WRITEMASK_XYZW; + } else { + assert(ir->lhs->type->is_vector() || ir->lhs->type->is_scalar()); + write_mask = ir->write_mask; + } + + assert(l.file != BAD_FILE); + assert(r.file != BAD_FILE); + + if (ir->condition) { + /* Get the condition bool into the predicate. */ + ir->condition->accept(this); + inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, fs_reg(0))); + inst->conditional_mod = BRW_CONDITIONAL_NZ; + } + + for (i = 0; i < type_size(ir->lhs->type); i++) { + if (i >= 4 || (write_mask & (1 << i))) { + inst = emit(fs_inst(BRW_OPCODE_MOV, l, r)); + if (ir->condition) + inst->predicated = true; + } + l.reg_offset++; + r.reg_offset++; + } +} + +void +fs_visitor::visit(ir_texture *ir) +{ + int base_mrf = 2; + fs_inst *inst = NULL; + unsigned int mlen = 0; + + ir->coordinate->accept(this); + fs_reg coordinate = this->result; + + if (ir->projector) { + fs_reg inv_proj = fs_reg(this, glsl_type::float_type); + + ir->projector->accept(this); + emit(fs_inst(FS_OPCODE_RCP, inv_proj, this->result)); + + fs_reg proj_coordinate = fs_reg(this, ir->coordinate->type); + for (unsigned int i = 0; i < ir->coordinate->type->vector_elements; i++) { + emit(fs_inst(BRW_OPCODE_MUL, proj_coordinate, coordinate, inv_proj)); + coordinate.reg_offset++; + proj_coordinate.reg_offset++; + } + proj_coordinate.reg_offset = 0; + + coordinate = proj_coordinate; + } + + for (mlen = 0; mlen < ir->coordinate->type->vector_elements; mlen++) { + emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), coordinate)); + coordinate.reg_offset++; + } + + /* Pre-Ironlake, the 8-wide sampler always took u,v,r. */ + if (intel->gen < 5) + mlen = 3; + + if (ir->shadow_comparitor) { + /* For shadow comparisons, we have to supply u,v,r. */ + mlen = 3; + + ir->shadow_comparitor->accept(this); + emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result)); + mlen++; + } + + /* Do we ever want to handle writemasking on texture samples? Is it + * performance relevant? + */ + fs_reg dst = fs_reg(this, glsl_type::vec4_type); + + switch (ir->op) { + case ir_tex: + inst = emit(fs_inst(FS_OPCODE_TEX, dst, fs_reg(MRF, base_mrf))); + break; + case ir_txb: + ir->lod_info.bias->accept(this); + emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result)); + mlen++; + + inst = emit(fs_inst(FS_OPCODE_TXB, dst, fs_reg(MRF, base_mrf))); + break; + case ir_txl: + ir->lod_info.lod->accept(this); + emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result)); + mlen++; + + inst = emit(fs_inst(FS_OPCODE_TXL, dst, fs_reg(MRF, base_mrf))); + break; + case ir_txd: + case ir_txf: + assert(!"GLSL 1.30 features unsupported"); + break; + } + + this->result = dst; + + if (ir->shadow_comparitor) + inst->shadow_compare = true; + inst->mlen = mlen; +} + +void +fs_visitor::visit(ir_swizzle *ir) +{ + ir->val->accept(this); + fs_reg val = this->result; + + fs_reg result = fs_reg(this, ir->type); + this->result = result; + + for (unsigned int i = 0; i < ir->type->vector_elements; i++) { + fs_reg channel = val; + int swiz = 0; + + switch (i) { + case 0: + swiz = ir->mask.x; + break; + case 1: + swiz = ir->mask.y; + break; + case 2: + swiz = ir->mask.z; + break; + case 3: + swiz = ir->mask.w; + break; + } + + channel.reg_offset += swiz; + emit(fs_inst(BRW_OPCODE_MOV, result, channel)); + result.reg_offset++; + } +} + +void +fs_visitor::visit(ir_discard *ir) +{ + assert(ir->condition == NULL); /* FINISHME */ + + emit(fs_inst(FS_OPCODE_DISCARD)); +} + +void +fs_visitor::visit(ir_constant *ir) +{ + fs_reg reg(this, ir->type); + this->result = reg; + + for (unsigned int i = 0; i < ir->type->vector_elements; i++) { + switch (ir->type->base_type) { + case GLSL_TYPE_FLOAT: + emit(fs_inst(BRW_OPCODE_MOV, reg, fs_reg(ir->value.f[i]))); + break; + case GLSL_TYPE_UINT: + emit(fs_inst(BRW_OPCODE_MOV, reg, fs_reg(ir->value.u[i]))); + break; + case GLSL_TYPE_INT: + emit(fs_inst(BRW_OPCODE_MOV, reg, fs_reg(ir->value.i[i]))); + break; + case GLSL_TYPE_BOOL: + emit(fs_inst(BRW_OPCODE_MOV, reg, fs_reg((int)ir->value.b[i]))); + break; + default: + assert(!"Non-float/uint/int/bool constant"); + } + reg.reg_offset++; + } +} + +void +fs_visitor::visit(ir_if *ir) +{ + fs_inst *inst; + + /* Don't point the annotation at the if statement, because then it plus + * the then and else blocks get printed. + */ + this->base_ir = ir->condition; + + /* Generate the condition into the condition code. */ + ir->condition->accept(this); + inst = emit(fs_inst(BRW_OPCODE_MOV, fs_reg(brw_null_reg()), this->result)); + inst->conditional_mod = BRW_CONDITIONAL_NZ; + + inst = emit(fs_inst(BRW_OPCODE_IF)); + inst->predicated = true; + + foreach_iter(exec_list_iterator, iter, ir->then_instructions) { + ir_instruction *ir = (ir_instruction *)iter.get(); + this->base_ir = ir; + + ir->accept(this); + } + + if (!ir->else_instructions.is_empty()) { + emit(fs_inst(BRW_OPCODE_ELSE)); + + foreach_iter(exec_list_iterator, iter, ir->else_instructions) { + ir_instruction *ir = (ir_instruction *)iter.get(); + this->base_ir = ir; + + ir->accept(this); + } + } + + emit(fs_inst(BRW_OPCODE_ENDIF)); +} + +void +fs_visitor::visit(ir_loop *ir) +{ + assert(!ir->from); + assert(!ir->to); + assert(!ir->increment); + assert(!ir->counter); + + emit(fs_inst(BRW_OPCODE_DO)); + + /* Start a safety counter. If the user messed up their loop + * counting, we don't want to hang the GPU. + */ + fs_reg max_iter = fs_reg(this, glsl_type::int_type); + emit(fs_inst(BRW_OPCODE_MOV, max_iter, fs_reg(10000))); + + foreach_iter(exec_list_iterator, iter, ir->body_instructions) { + ir_instruction *ir = (ir_instruction *)iter.get(); + fs_inst *inst; + + this->base_ir = ir; + ir->accept(this); + + /* Check the maximum loop iters counter. */ + inst = emit(fs_inst(BRW_OPCODE_ADD, max_iter, max_iter, fs_reg(-1))); + inst->conditional_mod = BRW_CONDITIONAL_Z; + + inst = emit(fs_inst(BRW_OPCODE_BREAK)); + inst->predicated = true; + } + + emit(fs_inst(BRW_OPCODE_WHILE)); +} + +void +fs_visitor::visit(ir_loop_jump *ir) +{ + switch (ir->mode) { + case ir_loop_jump::jump_break: + emit(fs_inst(BRW_OPCODE_BREAK)); + break; + case ir_loop_jump::jump_continue: + emit(fs_inst(BRW_OPCODE_CONTINUE)); + break; + } +} + +void +fs_visitor::visit(ir_call *ir) +{ + assert(!"FINISHME"); +} + +void +fs_visitor::visit(ir_return *ir) +{ + assert(!"FINISHME"); +} + +void +fs_visitor::visit(ir_function *ir) +{ + /* Ignore function bodies other than main() -- we shouldn't see calls to + * them since they should all be inlined before we get to ir_to_mesa. + */ + if (strcmp(ir->name, "main") == 0) { + const ir_function_signature *sig; + exec_list empty; + + sig = ir->matching_signature(&empty); + + assert(sig); + + foreach_iter(exec_list_iterator, iter, sig->body) { + ir_instruction *ir = (ir_instruction *)iter.get(); + this->base_ir = ir; + + ir->accept(this); + } + } +} + +void +fs_visitor::visit(ir_function_signature *ir) +{ + assert(!"not reached"); + (void)ir; +} + +fs_inst * +fs_visitor::emit(fs_inst inst) +{ + fs_inst *list_inst = new(mem_ctx) fs_inst; + *list_inst = inst; + + list_inst->annotation = this->current_annotation; + list_inst->ir = this->base_ir; + + this->instructions.push_tail(list_inst); + + return list_inst; +} + +/** Emits a dummy fragment shader consisting of magenta for bringup purposes. */ +void +fs_visitor::emit_dummy_fs() +{ + /* Everyone's favorite color. */ + emit(fs_inst(BRW_OPCODE_MOV, + fs_reg(MRF, 2), + fs_reg(1.0f))); + emit(fs_inst(BRW_OPCODE_MOV, + fs_reg(MRF, 3), + fs_reg(0.0f))); + emit(fs_inst(BRW_OPCODE_MOV, + fs_reg(MRF, 4), + fs_reg(1.0f))); + emit(fs_inst(BRW_OPCODE_MOV, + fs_reg(MRF, 5), + fs_reg(0.0f))); + + fs_inst *write; + write = emit(fs_inst(FS_OPCODE_FB_WRITE, + fs_reg(0), + fs_reg(0))); +} + +/* The register location here is relative to the start of the URB + * data. It will get adjusted to be a real location before + * generate_code() time. + */ +struct brw_reg +fs_visitor::interp_reg(int location, int channel) +{ + int regnr = location * 2 + channel / 2; + int stride = (channel & 1) * 4; + + return brw_vec1_grf(regnr, stride); +} + +/** Emits the interpolation for the varying inputs. */ +void +fs_visitor::emit_interpolation() +{ + struct brw_reg g1_uw = retype(brw_vec1_grf(1, 0), BRW_REGISTER_TYPE_UW); + /* For now, the source regs for the setup URB data will be unset, + * since we don't know until codegen how many push constants we'll + * use, and therefore what the setup URB offset is. + */ + fs_reg src_reg = reg_undef; + + this->current_annotation = "compute pixel centers"; + this->pixel_x = fs_reg(this, glsl_type::uint_type); + this->pixel_y = fs_reg(this, glsl_type::uint_type); + emit(fs_inst(BRW_OPCODE_ADD, + this->pixel_x, + fs_reg(stride(suboffset(g1_uw, 4), 2, 4, 0)), + fs_reg(brw_imm_v(0x10101010)))); + emit(fs_inst(BRW_OPCODE_ADD, + this->pixel_y, + fs_reg(stride(suboffset(g1_uw, 5), 2, 4, 0)), + fs_reg(brw_imm_v(0x11001100)))); + + this->current_annotation = "compute pixel deltas from v0"; + this->delta_x = fs_reg(this, glsl_type::float_type); + this->delta_y = fs_reg(this, glsl_type::float_type); + emit(fs_inst(BRW_OPCODE_ADD, + this->delta_x, + this->pixel_x, + fs_reg(negate(brw_vec1_grf(1, 0))))); + emit(fs_inst(BRW_OPCODE_ADD, + this->delta_y, + this->pixel_y, + fs_reg(brw_vec1_grf(1, 1)))); + + this->current_annotation = "compute pos.w and 1/pos.w"; + /* Compute wpos. Unlike many other varying inputs, we usually need it + * to produce 1/w, and the varying variable wouldn't show up. + */ + fs_reg wpos = fs_reg(this, glsl_type::vec4_type); + this->interp_attrs[FRAG_ATTRIB_WPOS] = wpos; + emit(fs_inst(BRW_OPCODE_MOV, wpos, this->pixel_x)); /* FINISHME: ARB_fcc */ + wpos.reg_offset++; + emit(fs_inst(BRW_OPCODE_MOV, wpos, this->pixel_y)); /* FINISHME: ARB_fcc */ + wpos.reg_offset++; + emit(fs_inst(FS_OPCODE_LINTERP, wpos, this->delta_x, this->delta_y, + interp_reg(FRAG_ATTRIB_WPOS, 2))); + wpos.reg_offset++; + emit(fs_inst(FS_OPCODE_LINTERP, wpos, this->delta_x, this->delta_y, + interp_reg(FRAG_ATTRIB_WPOS, 3))); + /* Compute the pixel W value from wpos.w. */ + this->pixel_w = fs_reg(this, glsl_type::float_type); + emit(fs_inst(FS_OPCODE_RCP, this->pixel_w, wpos)); + + /* FINISHME: gl_FrontFacing */ + + foreach_iter(exec_list_iterator, iter, *this->shader->ir) { + ir_instruction *ir = (ir_instruction *)iter.get(); + ir_variable *var = ir->as_variable(); + + if (!var) + continue; + + if (var->mode != ir_var_in) + continue; + + /* If it's already set up (WPOS), skip. */ + if (var->location == 0) + continue; + + this->current_annotation = talloc_asprintf(this->mem_ctx, + "interpolate %s " + "(FRAG_ATTRIB[%d])", + var->name, + var->location); + emit_pinterp(var->location); + } + this->current_annotation = NULL; +} + +void +fs_visitor::emit_pinterp(int location) +{ + fs_reg interp_attr = fs_reg(this, glsl_type::vec4_type); + this->interp_attrs[location] = interp_attr; + + for (unsigned int i = 0; i < 4; i++) { + struct brw_reg interp = interp_reg(location, i); + emit(fs_inst(FS_OPCODE_LINTERP, + interp_attr, + this->delta_x, + this->delta_y, + fs_reg(interp))); + interp_attr.reg_offset++; + } + interp_attr.reg_offset -= 4; + + for (unsigned int i = 0; i < 4; i++) { + emit(fs_inst(BRW_OPCODE_MUL, + interp_attr, + interp_attr, + this->pixel_w)); + interp_attr.reg_offset++; + } +} + +void +fs_visitor::emit_fb_writes() +{ + this->current_annotation = "FB write"; + + assert(this->frag_color || !"FINISHME: MRT"); + fs_reg color = *(variable_storage(this->frag_color)); + + for (int i = 0; i < 4; i++) { + emit(fs_inst(BRW_OPCODE_MOV, + fs_reg(MRF, 2 + i), + color)); + color.reg_offset++; + } + + emit(fs_inst(FS_OPCODE_FB_WRITE, + fs_reg(0), + fs_reg(0))); + + this->current_annotation = NULL; +} + +void +fs_visitor::generate_fb_write(fs_inst *inst) +{ + GLboolean eot = 1; /* FINISHME: MRT */ + /* FINISHME: AADS */ + + /* Header is 2 regs, g0 and g1 are the contents. g0 will be implied + * move, here's g1. + */ + brw_push_insn_state(p); + brw_set_mask_control(p, BRW_MASK_DISABLE); + brw_set_compression_control(p, BRW_COMPRESSION_NONE); + brw_MOV(p, + brw_message_reg(1), + brw_vec8_grf(1, 0)); + brw_pop_insn_state(p); + + int nr = 2 + 4; + + brw_fb_WRITE(p, + 8, /* dispatch_width */ + retype(vec8(brw_null_reg()), BRW_REGISTER_TYPE_UW), + 0, /* base MRF */ + retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW), + 0, /* FINISHME: MRT target */ + nr, + 0, + eot); +} + +void +fs_visitor::generate_linterp(fs_inst *inst, + struct brw_reg dst, struct brw_reg *src) +{ + struct brw_reg delta_x = src[0]; + struct brw_reg delta_y = src[1]; + struct brw_reg interp = src[2]; + + if (brw->has_pln && + delta_y.nr == delta_x.nr + 1 && + (intel->gen >= 6 || (delta_x.nr & 1) == 0)) { + brw_PLN(p, dst, interp, delta_x); + } else { + brw_LINE(p, brw_null_reg(), interp, delta_x); + brw_MAC(p, dst, suboffset(interp, 1), delta_y); + } +} + +void +fs_visitor::generate_math(fs_inst *inst, + struct brw_reg dst, struct brw_reg *src) +{ + int op; + + switch (inst->opcode) { + case FS_OPCODE_RCP: + op = BRW_MATH_FUNCTION_INV; + break; + case FS_OPCODE_RSQ: + op = BRW_MATH_FUNCTION_RSQ; + break; + case FS_OPCODE_SQRT: + op = BRW_MATH_FUNCTION_SQRT; + break; + case FS_OPCODE_EXP2: + op = BRW_MATH_FUNCTION_EXP; + break; + case FS_OPCODE_LOG2: + op = BRW_MATH_FUNCTION_LOG; + break; + case FS_OPCODE_POW: + op = BRW_MATH_FUNCTION_POW; + break; + case FS_OPCODE_SIN: + op = BRW_MATH_FUNCTION_SIN; + break; + case FS_OPCODE_COS: + op = BRW_MATH_FUNCTION_COS; + break; + default: + assert(!"not reached: unknown math function"); + op = 0; + break; + } + + if (inst->opcode == FS_OPCODE_POW) { + brw_MOV(p, brw_message_reg(3), src[1]); + } + + brw_math(p, dst, + op, + inst->saturate ? BRW_MATH_SATURATE_SATURATE : + BRW_MATH_SATURATE_NONE, + 2, src[0], + BRW_MATH_DATA_VECTOR, + BRW_MATH_PRECISION_FULL); +} + +void +fs_visitor::generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src) +{ + int msg_type = -1; + int rlen = 4; + + if (intel->gen == 5) { + switch (inst->opcode) { + case FS_OPCODE_TEX: + if (inst->shadow_compare) { + msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_COMPARE_GEN5; + } else { + msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_GEN5; + } + break; + case FS_OPCODE_TXB: + if (inst->shadow_compare) { + msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_BIAS_COMPARE_GEN5; + } else { + msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_BIAS_GEN5; + } + break; + } + } else { + switch (inst->opcode) { + case FS_OPCODE_TEX: + /* Note that G45 and older determines shadow compare and dispatch width + * from message length for most messages. + */ + if (inst->shadow_compare) { + msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE; + } else { + msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE; + } + case FS_OPCODE_TXB: + if (inst->shadow_compare) { + assert(!"FINISHME: shadow compare with bias."); + msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS; + } else { + msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS; + rlen = 8; + } + break; + } + } + assert(msg_type != -1); + + /* g0 header. */ + src.nr--; + + brw_SAMPLE(p, + retype(dst, BRW_REGISTER_TYPE_UW), + src.nr, + retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW), + SURF_INDEX_TEXTURE(inst->sampler), + inst->sampler, + WRITEMASK_XYZW, + msg_type, + rlen, + inst->mlen + 1, + 0, + 1, + BRW_SAMPLER_SIMD_MODE_SIMD8); +} + +void +fs_visitor::generate_discard(fs_inst *inst) +{ + struct brw_reg g0 = retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW); + brw_push_insn_state(p); + brw_set_mask_control(p, BRW_MASK_DISABLE); + brw_NOT(p, c->emit_mask_reg, brw_mask_reg(1)); /* IMASK */ + brw_AND(p, g0, c->emit_mask_reg, g0); + brw_pop_insn_state(p); +} + +static void +trivial_assign_reg(int header_size, fs_reg *reg) +{ + if (reg->file == GRF && reg->reg != 0) { + reg->hw_reg = header_size + reg->reg - 1 + reg->reg_offset; + reg->reg = 0; + } +} + +void +fs_visitor::assign_curb_setup() +{ + c->prog_data.first_curbe_grf = c->key.nr_payload_regs; + c->prog_data.curb_read_length = ALIGN(c->prog_data.nr_params, 8) / 8; + + /* Map the offsets in the UNIFORM file to fixed HW regs. */ + foreach_iter(exec_list_iterator, iter, this->instructions) { + fs_inst *inst = (fs_inst *)iter.get(); + + for (unsigned int i = 0; i < 3; i++) { + if (inst->src[i].file == UNIFORM) { + int constant_nr = inst->src[i].hw_reg + inst->src[i].reg_offset; + struct brw_reg brw_reg = brw_vec1_grf(c->prog_data.first_curbe_grf + + constant_nr / 8, + constant_nr % 8); + + inst->src[i].file = FIXED_HW_REG; + inst->src[i].fixed_hw_reg = brw_reg; + } + } + } +} + +void +fs_visitor::assign_urb_setup() +{ + int urb_start = c->prog_data.first_curbe_grf + c->prog_data.curb_read_length; + int interp_reg_nr[FRAG_ATTRIB_MAX]; + + c->prog_data.urb_read_length = 0; + + /* Figure out where each of the incoming setup attributes lands. */ + for (unsigned int i = 0; i < FRAG_ATTRIB_MAX; i++) { + interp_reg_nr[i] = -1; + + if (i != FRAG_ATTRIB_WPOS && + !(brw->fragment_program->Base.InputsRead & BITFIELD64_BIT(i))) + continue; + + /* Each attribute is 4 setup channels, each of which is half a reg. */ + interp_reg_nr[i] = urb_start + c->prog_data.urb_read_length; + c->prog_data.urb_read_length += 2; + } + + /* Map the register numbers for FS_OPCODE_LINTERP so that it uses + * the correct setup input. + */ + foreach_iter(exec_list_iterator, iter, this->instructions) { + fs_inst *inst = (fs_inst *)iter.get(); + + if (inst->opcode != FS_OPCODE_LINTERP) + continue; + + assert(inst->src[2].file == FIXED_HW_REG); + + int location = inst->src[2].fixed_hw_reg.nr / 2; + assert(interp_reg_nr[location] != -1); + inst->src[2].fixed_hw_reg.nr = (interp_reg_nr[location] + + (inst->src[2].fixed_hw_reg.nr & 1)); + } + + this->first_non_payload_grf = urb_start + c->prog_data.urb_read_length; +} + +void +fs_visitor::assign_regs() +{ + int header_size = this->first_non_payload_grf; + int last_grf = 0; + + /* FINISHME: trivial assignment of register numbers */ + foreach_iter(exec_list_iterator, iter, this->instructions) { + fs_inst *inst = (fs_inst *)iter.get(); + + trivial_assign_reg(header_size, &inst->dst); + trivial_assign_reg(header_size, &inst->src[0]); + trivial_assign_reg(header_size, &inst->src[1]); + + last_grf = MAX2(last_grf, inst->dst.hw_reg); + last_grf = MAX2(last_grf, inst->src[0].hw_reg); + last_grf = MAX2(last_grf, inst->src[1].hw_reg); + } + + this->grf_used = last_grf + 1; +} + +static struct brw_reg brw_reg_from_fs_reg(fs_reg *reg) +{ + struct brw_reg brw_reg; + + switch (reg->file) { + case GRF: + case ARF: + case MRF: + brw_reg = brw_vec8_reg(reg->file, + reg->hw_reg, 0); + brw_reg = retype(brw_reg, reg->type); + break; + case IMM: + switch (reg->type) { + case BRW_REGISTER_TYPE_F: + brw_reg = brw_imm_f(reg->imm.f); + break; + case BRW_REGISTER_TYPE_D: + brw_reg = brw_imm_d(reg->imm.i); + break; + case BRW_REGISTER_TYPE_UD: + brw_reg = brw_imm_ud(reg->imm.u); + break; + default: + assert(!"not reached"); + break; + } + break; + case FIXED_HW_REG: + brw_reg = reg->fixed_hw_reg; + break; + case BAD_FILE: + /* Probably unused. */ + brw_reg = brw_null_reg(); + break; + case UNIFORM: + assert(!"not reached"); + brw_reg = brw_null_reg(); + break; + } + if (reg->abs) + brw_reg = brw_abs(brw_reg); + if (reg->negate) + brw_reg = negate(brw_reg); + + return brw_reg; +} + +void +fs_visitor::generate_code() +{ + unsigned int annotation_len = 0; + int last_native_inst = 0; + struct brw_instruction *if_stack[16], *loop_stack[16]; + int if_stack_depth = 0, loop_stack_depth = 0; + int if_depth_in_loop[16]; + + if_depth_in_loop[loop_stack_depth] = 0; + + memset(&if_stack, 0, sizeof(if_stack)); + foreach_iter(exec_list_iterator, iter, this->instructions) { + fs_inst *inst = (fs_inst *)iter.get(); + struct brw_reg src[3], dst; + + for (unsigned int i = 0; i < 3; i++) { + src[i] = brw_reg_from_fs_reg(&inst->src[i]); + } + dst = brw_reg_from_fs_reg(&inst->dst); + + brw_set_conditionalmod(p, inst->conditional_mod); + brw_set_predicate_control(p, inst->predicated); + + switch (inst->opcode) { + case BRW_OPCODE_MOV: + brw_MOV(p, dst, src[0]); + break; + case BRW_OPCODE_ADD: + brw_ADD(p, dst, src[0], src[1]); + break; + case BRW_OPCODE_MUL: + brw_MUL(p, dst, src[0], src[1]); + break; + + case BRW_OPCODE_FRC: + brw_FRC(p, dst, src[0]); + break; + case BRW_OPCODE_RNDD: + brw_RNDD(p, dst, src[0]); + break; + case BRW_OPCODE_RNDZ: + brw_RNDZ(p, dst, src[0]); + break; + + case BRW_OPCODE_AND: + brw_AND(p, dst, src[0], src[1]); + break; + case BRW_OPCODE_OR: + brw_OR(p, dst, src[0], src[1]); + break; + case BRW_OPCODE_XOR: + brw_XOR(p, dst, src[0], src[1]); + break; + + case BRW_OPCODE_CMP: + brw_CMP(p, dst, inst->conditional_mod, src[0], src[1]); + break; + case BRW_OPCODE_SEL: + brw_SEL(p, dst, src[0], src[1]); + break; + + case BRW_OPCODE_IF: + assert(if_stack_depth < 16); + if_stack[if_stack_depth] = brw_IF(p, BRW_EXECUTE_8); + if_stack_depth++; + break; + case BRW_OPCODE_ELSE: + if_stack[if_stack_depth - 1] = + brw_ELSE(p, if_stack[if_stack_depth - 1]); + break; + case BRW_OPCODE_ENDIF: + if_stack_depth--; + brw_ENDIF(p , if_stack[if_stack_depth]); + break; + + case BRW_OPCODE_DO: + loop_stack[loop_stack_depth++] = brw_DO(p, BRW_EXECUTE_8); + if_depth_in_loop[loop_stack_depth] = 0; + break; + + case BRW_OPCODE_BREAK: + brw_BREAK(p, if_depth_in_loop[loop_stack_depth]); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + break; + case BRW_OPCODE_CONTINUE: + brw_CONT(p, if_depth_in_loop[loop_stack_depth]); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + break; + + case BRW_OPCODE_WHILE: { + struct brw_instruction *inst0, *inst1; + GLuint br = 1; + + if (intel->gen == 5) + br = 2; + + assert(loop_stack_depth > 0); + loop_stack_depth--; + inst0 = inst1 = brw_WHILE(p, loop_stack[loop_stack_depth]); + /* patch all the BREAK/CONT instructions from last BGNLOOP */ + while (inst0 > loop_stack[loop_stack_depth]) { + inst0--; + if (inst0->header.opcode == BRW_OPCODE_BREAK && + inst0->bits3.if_else.jump_count == 0) { + inst0->bits3.if_else.jump_count = br * (inst1 - inst0 + 1); + } + else if (inst0->header.opcode == BRW_OPCODE_CONTINUE && + inst0->bits3.if_else.jump_count == 0) { + inst0->bits3.if_else.jump_count = br * (inst1 - inst0); + } + } + } + break; + + case FS_OPCODE_RCP: + case FS_OPCODE_RSQ: + case FS_OPCODE_SQRT: + case FS_OPCODE_EXP2: + case FS_OPCODE_LOG2: + case FS_OPCODE_POW: + case FS_OPCODE_SIN: + case FS_OPCODE_COS: + generate_math(inst, dst, src); + break; + case FS_OPCODE_LINTERP: + generate_linterp(inst, dst, src); + break; + case FS_OPCODE_TEX: + case FS_OPCODE_TXB: + case FS_OPCODE_TXL: + generate_tex(inst, dst, src[0]); + break; + case FS_OPCODE_DISCARD: + generate_discard(inst); + break; + case FS_OPCODE_FB_WRITE: + generate_fb_write(inst); + break; + default: + if (inst->opcode < (int)ARRAY_SIZE(brw_opcodes)) { + _mesa_problem(ctx, "Unsupported opcode `%s' in FS", + brw_opcodes[inst->opcode].name); + } else { + _mesa_problem(ctx, "Unsupported opcode %d in FS", inst->opcode); + } + this->fail = true; + } + + if (annotation_len < p->nr_insn) { + annotation_len *= 2; + if (annotation_len < 16) + annotation_len = 16; + + this->annotation_string = talloc_realloc(this->mem_ctx, + annotation_string, + const char *, + annotation_len); + this->annotation_ir = talloc_realloc(this->mem_ctx, + annotation_ir, + ir_instruction *, + annotation_len); + } + + for (unsigned int i = last_native_inst; i < p->nr_insn; i++) { + this->annotation_string[i] = inst->annotation; + this->annotation_ir[i] = inst->ir; + } + last_native_inst = p->nr_insn; + } +} + +GLboolean +brw_wm_fs_emit(struct brw_context *brw, struct brw_wm_compile *c) +{ + struct brw_compile *p = &c->func; + struct intel_context *intel = &brw->intel; + GLcontext *ctx = &intel->ctx; + struct brw_shader *shader = NULL; + struct gl_shader_program *prog = ctx->Shader.CurrentProgram; + + if (!prog) + return GL_FALSE; + + if (!using_new_fs) + return GL_FALSE; + + for (unsigned int i = 0; i < prog->_NumLinkedShaders; i++) { + if (prog->_LinkedShaders[i]->Type == GL_FRAGMENT_SHADER) { + shader = (struct brw_shader *)prog->_LinkedShaders[i]; + break; + } + } + if (!shader) + return GL_FALSE; + + /* We always use 8-wide mode, at least for now. For one, flow + * control only works in 8-wide. Also, when we're fragment shader + * bound, we're almost always under register pressure as well, so + * 8-wide would save us from the performance cliff of spilling + * regs. + */ + c->dispatch_width = 8; + + if (INTEL_DEBUG & DEBUG_WM) { + printf("GLSL IR for native fragment shader %d:\n", prog->Name); + _mesa_print_ir(shader->ir, NULL); + printf("\n"); + } + + /* Now the main event: Visit the shader IR and generate our FS IR for it. + */ + fs_visitor v(c, shader); + + if (0) { + v.emit_dummy_fs(); + } else { + v.emit_interpolation(); + + /* Generate FS IR for main(). (the visitor only descends into + * functions called "main"). + */ + foreach_iter(exec_list_iterator, iter, *shader->ir) { + ir_instruction *ir = (ir_instruction *)iter.get(); + v.base_ir = ir; + ir->accept(&v); + } + + if (v.fail) + return GL_FALSE; + + v.emit_fb_writes(); + v.assign_curb_setup(); + v.assign_urb_setup(); + v.assign_regs(); + } + + v.generate_code(); + + if (INTEL_DEBUG & DEBUG_WM) { + const char *last_annotation_string = NULL; + ir_instruction *last_annotation_ir = NULL; + + printf("Native code for fragment shader %d:\n", prog->Name); + for (unsigned int i = 0; i < p->nr_insn; i++) { + if (last_annotation_ir != v.annotation_ir[i]) { + last_annotation_ir = v.annotation_ir[i]; + if (last_annotation_ir) { + printf(" "); + last_annotation_ir->print(); + printf("\n"); + } + } + if (last_annotation_string != v.annotation_string[i]) { + last_annotation_string = v.annotation_string[i]; + if (last_annotation_string) + printf(" %s\n", last_annotation_string); + } + brw_disasm(stdout, &p->store[i], intel->gen); + } + printf("\n"); + } + + c->prog_data.total_grf = v.grf_used; + c->prog_data.total_scratch = 0; + + return GL_TRUE; +} diff --git a/src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp b/src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp new file mode 100644 index 00000000000..d8d58a9467b --- /dev/null +++ b/src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp @@ -0,0 +1,365 @@ +/* + * Copyright © 2010 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/** + * \file brw_wm_channel_expressions.cpp + * + * Breaks vector operations down into operations on each component. + * + * The 965 fragment shader receives 8 or 16 pixels at a time, so each + * channel of a vector is laid out as 1 or 2 8-float registers. Each + * ALU operation operates on one of those channel registers. As a + * result, there is no value to the 965 fragment shader in tracking + * "vector" expressions in the sense of GLSL fragment shaders, when + * doing a channel at a time may help in constant folding, algebraic + * simplification, and reducing the liveness of channel registers. + * + * The exception to the desire to break everything down to floats is + * texturing. The texture sampler returns a writemasked masked + * 4/8-register sequence containing the texture values. We don't want + * to dispatch to the sampler separately for each channel we need, so + * we do retain the vector types in that case. + */ + +extern "C" { +#include "main/core.h" +#include "brw_wm.h" +} +#include "../glsl/ir.h" +#include "../glsl/ir_expression_flattening.h" +#include "../glsl/glsl_types.h" + +class ir_channel_expressions_visitor : public ir_hierarchical_visitor { +public: + ir_channel_expressions_visitor() + { + this->progress = false; + this->mem_ctx = NULL; + } + + ir_visitor_status visit_leave(ir_assignment *); + + ir_rvalue *get_element(ir_variable *var, unsigned int element); + void assign(ir_assignment *ir, int elem, ir_rvalue *val); + + bool progress; + void *mem_ctx; +}; + +static bool +channel_expressions_predicate(ir_instruction *ir) +{ + ir_expression *expr = ir->as_expression(); + unsigned int i; + + if (!expr) + return false; + + for (i = 0; i < expr->get_num_operands(); i++) { + if (expr->operands[i]->type->is_vector()) + return true; + } + + return false; +} + +extern "C" { +GLboolean +brw_do_channel_expressions(exec_list *instructions) +{ + ir_channel_expressions_visitor v; + + /* Pull out any matrix expression to a separate assignment to a + * temp. This will make our handling of the breakdown to + * operations on the matrix's vector components much easier. + */ + do_expression_flattening(instructions, channel_expressions_predicate); + + visit_list_elements(&v, instructions); + + return v.progress; +} +} + +ir_rvalue * +ir_channel_expressions_visitor::get_element(ir_variable *var, unsigned int elem) +{ + ir_dereference *deref; + + if (var->type->is_scalar()) + return new(mem_ctx) ir_dereference_variable(var); + + assert(elem < var->type->components()); + deref = new(mem_ctx) ir_dereference_variable(var); + return new(mem_ctx) ir_swizzle(deref, elem, 0, 0, 0, 1); +} + +void +ir_channel_expressions_visitor::assign(ir_assignment *ir, int elem, ir_rvalue *val) +{ + ir_dereference *lhs = ir->lhs->clone(mem_ctx, NULL); + ir_assignment *assign; + ir_swizzle *val_swiz; + + /* This assign-of-expression should have been generated by the + * expression flattening visitor (since we never short circit to + * not flatten, even for plain assignments of variables), so the + * writemask is always full. + */ + assert(ir->write_mask == (1 << ir->lhs->type->components()) - 1); + + /* Smear the float across all the channels for the masked write. */ + val_swiz = new(mem_ctx) ir_swizzle(val, 0, 0, 0, 0, + ir->lhs->type->components()); + assign = new(mem_ctx) ir_assignment(lhs, val_swiz, NULL, (1 << elem)); + ir->insert_before(assign); +} + +ir_visitor_status +ir_channel_expressions_visitor::visit_leave(ir_assignment *ir) +{ + ir_expression *expr = ir->rhs->as_expression(); + bool found_vector = false; + unsigned int i, vector_elements = 1; + ir_variable *op_var[2]; + + if (!expr) + return visit_continue; + + if (!this->mem_ctx) + this->mem_ctx = talloc_parent(ir); + + for (i = 0; i < expr->get_num_operands(); i++) { + if (expr->operands[i]->type->is_vector()) { + found_vector = true; + vector_elements = expr->operands[i]->type->vector_elements; + break; + } + } + if (!found_vector) + return visit_continue; + + /* Store the expression operands in temps so we can use them + * multiple times. + */ + for (i = 0; i < expr->get_num_operands(); i++) { + ir_assignment *assign; + ir_dereference *deref; + + assert(!expr->operands[i]->type->is_matrix()); + + op_var[i] = new(mem_ctx) ir_variable(expr->operands[i]->type, + "channel_expressions", + ir_var_temporary); + ir->insert_before(op_var[i]); + + deref = new(mem_ctx) ir_dereference_variable(op_var[i]); + assign = new(mem_ctx) ir_assignment(deref, + expr->operands[i], + NULL); + ir->insert_before(assign); + } + + const glsl_type *element_type = glsl_type::get_instance(ir->lhs->type->base_type, + 1, 1); + + /* OK, time to break down this vector operation. */ + switch (expr->operation) { + case ir_unop_bit_not: + case ir_unop_logic_not: + case ir_unop_neg: + case ir_unop_abs: + case ir_unop_sign: + case ir_unop_rcp: + case ir_unop_rsq: + case ir_unop_sqrt: + case ir_unop_exp: + case ir_unop_log: + case ir_unop_exp2: + case ir_unop_log2: + case ir_unop_f2i: + case ir_unop_i2f: + case ir_unop_f2b: + case ir_unop_b2f: + case ir_unop_i2b: + case ir_unop_b2i: + case ir_unop_u2f: + case ir_unop_trunc: + case ir_unop_ceil: + case ir_unop_floor: + case ir_unop_fract: + case ir_unop_sin: + case ir_unop_cos: + case ir_unop_dFdx: + case ir_unop_dFdy: + for (i = 0; i < vector_elements; i++) { + ir_rvalue *op0 = get_element(op_var[0], i); + + assign(ir, i, new(mem_ctx) ir_expression(expr->operation, + element_type, + op0, + NULL)); + } + break; + + case ir_binop_add: + case ir_binop_sub: + case ir_binop_mul: + case ir_binop_div: + case ir_binop_mod: + case ir_binop_min: + case ir_binop_max: + case ir_binop_pow: + case ir_binop_lshift: + case ir_binop_rshift: + case ir_binop_bit_and: + case ir_binop_bit_xor: + case ir_binop_bit_or: + for (i = 0; i < vector_elements; i++) { + ir_rvalue *op0 = get_element(op_var[0], i); + ir_rvalue *op1 = get_element(op_var[1], i); + + assign(ir, i, new(mem_ctx) ir_expression(expr->operation, + element_type, + op0, + op1)); + } + break; + + case ir_unop_any: { + ir_expression *temp; + temp = new(mem_ctx) ir_expression(ir_binop_logic_or, + element_type, + get_element(op_var[0], 0), + get_element(op_var[0], 1)); + + for (i = 2; i < vector_elements; i++) { + temp = new(mem_ctx) ir_expression(ir_binop_logic_or, + element_type, + get_element(op_var[0], i), + temp); + } + assign(ir, 0, temp); + break; + } + + case ir_binop_dot: { + ir_expression *last = NULL; + for (i = 0; i < vector_elements; i++) { + ir_rvalue *op0 = get_element(op_var[0], i); + ir_rvalue *op1 = get_element(op_var[1], i); + ir_expression *temp; + + temp = new(mem_ctx) ir_expression(ir_binop_mul, + element_type, + op0, + op1); + if (last) { + last = new(mem_ctx) ir_expression(ir_binop_add, + element_type, + temp, + last); + } else { + last = temp; + } + } + assign(ir, 0, last); + break; + } + + case ir_binop_cross: { + for (i = 0; i < vector_elements; i++) { + int swiz0 = (i + 1) % 3; + int swiz1 = (i + 2) % 3; + ir_expression *temp1, *temp2; + + temp1 = new(mem_ctx) ir_expression(ir_binop_mul, + element_type, + get_element(op_var[0], swiz0), + get_element(op_var[1], swiz1)); + + temp2 = new(mem_ctx) ir_expression(ir_binop_mul, + element_type, + get_element(op_var[1], swiz0), + get_element(op_var[0], swiz1)); + + temp2 = new(mem_ctx) ir_expression(ir_unop_neg, + element_type, + temp2, + NULL); + + assign(ir, i, new(mem_ctx) ir_expression(ir_binop_add, + element_type, + temp1, temp2)); + } + break; + } + + case ir_binop_less: + case ir_binop_greater: + case ir_binop_lequal: + case ir_binop_gequal: + case ir_binop_logic_and: + case ir_binop_logic_xor: + case ir_binop_logic_or: + ir->print(); + printf("\n"); + assert(!"not reached: expression operates on scalars only"); + break; + case ir_binop_equal: + case ir_binop_nequal: { + ir_expression *last = NULL; + for (i = 0; i < vector_elements; i++) { + ir_rvalue *op0 = get_element(op_var[0], i); + ir_rvalue *op1 = get_element(op_var[1], i); + ir_expression *temp; + ir_expression_operation join; + + if (expr->operation == ir_binop_equal) + join = ir_binop_logic_and; + else + join = ir_binop_logic_or; + + temp = new(mem_ctx) ir_expression(expr->operation, + element_type, + op0, + op1); + if (last) { + last = new(mem_ctx) ir_expression(join, + element_type, + temp, + last); + } else { + last = temp; + } + } + assign(ir, 0, last); + break; + } + } + + ir->remove(); + this->progress = true; + + return visit_continue; +} diff --git a/src/mesa/drivers/dri/i965/brw_fs_vector_splitting.cpp b/src/mesa/drivers/dri/i965/brw_fs_vector_splitting.cpp new file mode 100644 index 00000000000..00d5c202485 --- /dev/null +++ b/src/mesa/drivers/dri/i965/brw_fs_vector_splitting.cpp @@ -0,0 +1,391 @@ +/* + * Copyright © 2010 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/** + * \file brw_wm_vector_splitting.cpp + * + * If a vector is only ever referenced by its components, then + * split those components out to individual variables so they can be + * handled normally by other optimization passes. + * + * This skips vectors in uniforms and varyings, which need to be + * accessible as vectors for their access by the GL. Also, vector + * results of non-variable-derefs in assignments aren't handled + * because to do so we would have to store the vector result to a + * temporary in order to unload each channel, and to do so would just + * loop us back to where we started. For the 965, this is exactly the + * behavior we want for the results of texture lookups, but probably not for + */ + +extern "C" { +#include "main/core.h" +#include "intel_context.h" +} +#include "../glsl/ir.h" +#include "../glsl/ir_visitor.h" +#include "../glsl/ir_print_visitor.h" +#include "../glsl/ir_rvalue_visitor.h" +#include "../glsl/glsl_types.h" + +static bool debug = false; + +class variable_entry : public exec_node +{ +public: + variable_entry(ir_variable *var) + { + this->var = var; + this->whole_vector_access = 0; + this->declaration = false; + this->mem_ctx = NULL; + } + + ir_variable *var; /* The key: the variable's pointer. */ + + /** Number of times the variable is referenced, including assignments. */ + unsigned whole_vector_access; + + bool declaration; /* If the variable had a decl in the instruction stream */ + + ir_variable *components[4]; + + /** talloc_parent(this->var) -- the shader's talloc context. */ + void *mem_ctx; +}; + +class ir_vector_reference_visitor : public ir_hierarchical_visitor { +public: + ir_vector_reference_visitor(void) + { + this->mem_ctx = talloc_new(NULL); + this->variable_list.make_empty(); + } + + ~ir_vector_reference_visitor(void) + { + talloc_free(mem_ctx); + } + + virtual ir_visitor_status visit(ir_variable *); + virtual ir_visitor_status visit(ir_dereference_variable *); + virtual ir_visitor_status visit_enter(ir_swizzle *); + virtual ir_visitor_status visit_enter(ir_assignment *); + virtual ir_visitor_status visit_enter(ir_function_signature *); + + variable_entry *get_variable_entry(ir_variable *var); + + /* List of variable_entry */ + exec_list variable_list; + + void *mem_ctx; +}; + +variable_entry * +ir_vector_reference_visitor::get_variable_entry(ir_variable *var) +{ + assert(var); + + if (!var->type->is_vector()) + return NULL; + + switch (var->mode) { + case ir_var_uniform: + case ir_var_in: + case ir_var_out: + case ir_var_inout: + /* Can't split varyings or uniforms. Function in/outs won't get split + * either, so don't care about the ambiguity. + */ + return NULL; + case ir_var_auto: + case ir_var_temporary: + break; + } + + foreach_iter(exec_list_iterator, iter, this->variable_list) { + variable_entry *entry = (variable_entry *)iter.get(); + if (entry->var == var) + return entry; + } + + variable_entry *entry = new(mem_ctx) variable_entry(var); + this->variable_list.push_tail(entry); + return entry; +} + + +ir_visitor_status +ir_vector_reference_visitor::visit(ir_variable *ir) +{ + variable_entry *entry = this->get_variable_entry(ir); + + if (entry) + entry->declaration = true; + + return visit_continue; +} + +ir_visitor_status +ir_vector_reference_visitor::visit(ir_dereference_variable *ir) +{ + ir_variable *const var = ir->var; + variable_entry *entry = this->get_variable_entry(var); + + if (entry) + entry->whole_vector_access++; + + return visit_continue; +} + +ir_visitor_status +ir_vector_reference_visitor::visit_enter(ir_swizzle *ir) +{ + /* Don't descend into a vector ir_dereference_variable below. */ + if (ir->val->as_dereference_variable() && ir->type->is_scalar()) + return visit_continue_with_parent; + + return visit_continue; +} + +ir_visitor_status +ir_vector_reference_visitor::visit_enter(ir_assignment *ir) +{ + if (ir->lhs->as_dereference_variable() && + ir->rhs->as_dereference_variable() && + !ir->condition) { + /* We'll split copies of a vector to copies of channels, so don't + * descend to the ir_dereference_variables. + */ + return visit_continue_with_parent; + } + if (ir->lhs->as_dereference_variable() && + is_power_of_two(ir->write_mask) && + !ir->condition) { + /* If we're writing just a channel, then channel-splitting the LHS is OK. + */ + ir->rhs->accept(this); + return visit_continue_with_parent; + } + return visit_continue; +} + +ir_visitor_status +ir_vector_reference_visitor::visit_enter(ir_function_signature *ir) +{ + /* We don't want to descend into the function parameters and + * split them, so just accept the body here. + */ + visit_list_elements(this, &ir->body); + return visit_continue_with_parent; +} + +class ir_vector_splitting_visitor : public ir_rvalue_visitor { +public: + ir_vector_splitting_visitor(exec_list *vars) + { + this->variable_list = vars; + } + + virtual ir_visitor_status visit_leave(ir_assignment *); + + void handle_rvalue(ir_rvalue **rvalue); + struct variable_entry *get_splitting_entry(ir_variable *var); + + exec_list *variable_list; + void *mem_ctx; +}; + +struct variable_entry * +ir_vector_splitting_visitor::get_splitting_entry(ir_variable *var) +{ + assert(var); + + if (!var->type->is_vector()) + return NULL; + + foreach_iter(exec_list_iterator, iter, *this->variable_list) { + variable_entry *entry = (variable_entry *)iter.get(); + if (entry->var == var) { + return entry; + } + } + + return NULL; +} + +void +ir_vector_splitting_visitor::handle_rvalue(ir_rvalue **rvalue) +{ + if (!*rvalue) + return; + + ir_swizzle *swiz = (*rvalue)->as_swizzle(); + if (!swiz || !swiz->type->is_scalar()) + return; + + ir_dereference_variable *deref_var = swiz->val->as_dereference_variable(); + if (!deref_var) + return; + + variable_entry *entry = get_splitting_entry(deref_var->var); + if (!entry) + return; + + ir_variable *var = entry->components[swiz->mask.x]; + *rvalue = new(entry->mem_ctx) ir_dereference_variable(var); +} + +ir_visitor_status +ir_vector_splitting_visitor::visit_leave(ir_assignment *ir) +{ + ir_dereference_variable *lhs_deref = ir->lhs->as_dereference_variable(); + ir_dereference_variable *rhs_deref = ir->rhs->as_dereference_variable(); + variable_entry *lhs = lhs_deref ? get_splitting_entry(lhs_deref->var) : NULL; + variable_entry *rhs = rhs_deref ? get_splitting_entry(rhs_deref->var) : NULL; + + if (lhs_deref && rhs_deref && (lhs || rhs) && !ir->condition) { + /* Straight assignment of vector variables. */ + for (unsigned int i = 0; i < ir->rhs->type->vector_elements; i++) { + ir_dereference *new_lhs; + ir_rvalue *new_rhs; + void *mem_ctx = lhs ? lhs->mem_ctx : rhs->mem_ctx; + unsigned int writemask; + + if (lhs) { + new_lhs = new(mem_ctx) ir_dereference_variable(lhs->components[i]); + writemask = (ir->write_mask >> i) & 1; + } else { + new_lhs = ir->lhs->clone(mem_ctx, NULL); + writemask = ir->write_mask & (1 << i); + } + + if (rhs) { + new_rhs = new(mem_ctx) ir_dereference_variable(rhs->components[i]); + /* If we're writing into a writemask, smear it out to that channel. */ + if (!lhs) + new_rhs = new(mem_ctx) ir_swizzle(new_rhs, i, i, i, i, i + 1); + } else { + new_rhs = new(mem_ctx) ir_swizzle(ir->rhs->clone(mem_ctx, NULL), + i, i, i, i, 1); + } + + ir->insert_before(new(mem_ctx) ir_assignment(new_lhs, + new_rhs, + NULL, writemask)); + } + ir->remove(); + } else if (lhs) { + int elem = -1; + + switch (ir->write_mask) { + case (1 << 0): + elem = 0; + break; + case (1 << 1): + elem = 1; + break; + case (1 << 2): + elem = 2; + break; + case (1 << 3): + elem = 3; + break; + default: + ir->print(); + assert(!"not reached: non-channelwise dereference of LHS."); + } + + ir->lhs = new(mem_ctx) ir_dereference_variable(lhs->components[elem]); + ir->write_mask = (1 << 0); + + handle_rvalue(&ir->rhs); + ir->rhs = new(mem_ctx) ir_swizzle(ir->rhs, + elem, elem, elem, elem, 1); + } else { + handle_rvalue(&ir->rhs); + } + + handle_rvalue(&ir->condition); + + return visit_continue; +} + +extern "C" { +bool +brw_do_vector_splitting(exec_list *instructions) +{ + ir_vector_reference_visitor refs; + + visit_list_elements(&refs, instructions); + + /* Trim out variables we can't split. */ + foreach_iter(exec_list_iterator, iter, refs.variable_list) { + variable_entry *entry = (variable_entry *)iter.get(); + + if (debug) { + printf("vector %s@%p: decl %d, whole_access %d\n", + entry->var->name, (void *) entry->var, entry->declaration, + entry->whole_vector_access); + } + + if (!entry->declaration || entry->whole_vector_access) { + entry->remove(); + } + } + + if (refs.variable_list.is_empty()) + return false; + + void *mem_ctx = talloc_new(NULL); + + /* Replace the decls of the vectors to be split with their split + * components. + */ + foreach_iter(exec_list_iterator, iter, refs.variable_list) { + variable_entry *entry = (variable_entry *)iter.get(); + const struct glsl_type *type; + type = glsl_type::get_instance(entry->var->type->base_type, 1, 1); + + entry->mem_ctx = talloc_parent(entry->var); + + for (unsigned int i = 0; i < entry->var->type->vector_elements; i++) { + const char *name = talloc_asprintf(mem_ctx, "%s_%c", + entry->var->name, + "xyzw"[i]); + + entry->components[i] = new(entry->mem_ctx) ir_variable(type, name, + ir_var_temporary); + entry->var->insert_before(entry->components[i]); + } + + entry->var->remove(); + } + + ir_vector_splitting_visitor split(&refs.variable_list); + visit_list_elements(&split, instructions); + + talloc_free(mem_ctx); + + return true; +} +} diff --git a/src/mesa/drivers/dri/i965/brw_misc_state.c b/src/mesa/drivers/dri/i965/brw_misc_state.c index 572175f463e..6eeaba77720 100644 --- a/src/mesa/drivers/dri/i965/brw_misc_state.c +++ b/src/mesa/drivers/dri/i965/brw_misc_state.c @@ -281,7 +281,7 @@ static void emit_depthbuffer(struct brw_context *brw) } assert(region->tiling != I915_TILING_X); - if (IS_GEN6(intel->intelScreen->deviceID)) + if (intel->gen >= 6) assert(region->tiling != I915_TILING_NONE); BEGIN_BATCH(len); @@ -295,7 +295,7 @@ static void emit_depthbuffer(struct brw_context *brw) I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, 0); OUT_BATCH((BRW_SURFACE_MIPMAPLAYOUT_BELOW << 1) | - ((region->pitch - 1) << 6) | + ((region->width - 1) << 6) | ((region->height - 1) << 19)); OUT_BATCH(0); diff --git a/src/mesa/drivers/dri/i965/brw_optimize.c b/src/mesa/drivers/dri/i965/brw_optimize.c index 8aa6fb6cc6f..cbed2bd5cb1 100644 --- a/src/mesa/drivers/dri/i965/brw_optimize.c +++ b/src/mesa/drivers/dri/i965/brw_optimize.c @@ -32,12 +32,7 @@ #include "brw_defines.h" #include "brw_eu.h" -static const struct { - char *name; - int nsrc; - int ndst; - GLboolean is_arith; -} inst_opcode[128] = { +const struct brw_instruction_info brw_opcodes[128] = { [BRW_OPCODE_MOV] = { .name = "mov", .nsrc = 1, .ndst = 1, .is_arith = 1 }, [BRW_OPCODE_FRC] = { .name = "frc", .nsrc = 1, .ndst = 1, .is_arith = 1 }, [BRW_OPCODE_RNDU] = { .name = "rndu", .nsrc = 1, .ndst = 1, .is_arith = 1 }, @@ -94,7 +89,7 @@ static const struct { static INLINE GLboolean brw_is_arithmetic_inst(const struct brw_instruction *inst) { - return inst_opcode[inst->header.opcode].is_arith; + return brw_opcodes[inst->header.opcode].is_arith; } static const GLuint inst_stride[7] = { @@ -122,7 +117,7 @@ brw_is_grf_written(const struct brw_instruction *inst, int reg_index, int size, int gen) { - if (inst_opcode[inst->header.opcode].ndst == 0) + if (brw_opcodes[inst->header.opcode].ndst == 0) return GL_FALSE; if (inst->bits1.da1.dest_address_mode != BRW_ADDRESS_DIRECT) @@ -161,20 +156,19 @@ brw_is_grf_written(const struct brw_instruction *inst, return left < right; } -/* Specific path for message register since we need to handle the compr4 case */ -static INLINE GLboolean -brw_is_mrf_written(const struct brw_instruction *inst, int reg_index, int size) +static GLboolean +brw_is_mrf_written_alu(const struct brw_instruction *inst, + int reg_index, int size) { - if (inst_opcode[inst->header.opcode].ndst == 0) + if (brw_opcodes[inst->header.opcode].ndst == 0) return GL_FALSE; - if (inst->bits1.da1.dest_address_mode != BRW_ADDRESS_DIRECT) - if (inst->bits1.ia1.dest_reg_file == BRW_MESSAGE_REGISTER_FILE) - return GL_TRUE; - if (inst->bits1.da1.dest_reg_file != BRW_MESSAGE_REGISTER_FILE) return GL_FALSE; + if (inst->bits1.da1.dest_address_mode != BRW_ADDRESS_DIRECT) + return GL_TRUE; + const int reg_start = reg_index * REG_SIZE; const int reg_end = reg_start + size; @@ -188,8 +182,6 @@ brw_is_mrf_written(const struct brw_instruction *inst, int reg_index, int size) if (is_compr4 && inst->header.execution_size != BRW_EXECUTE_16) return GL_TRUE; - GLboolean is_written = GL_FALSE; - /* Here we write mrf_{i} and mrf_{i+4}. So we read two times 8 elements */ if (is_compr4) { const int length = 8 * type_size * inst->bits1.da1.dest_horiz_stride; @@ -210,7 +202,8 @@ brw_is_mrf_written(const struct brw_instruction *inst, int reg_index, int size) const int left1 = MAX2(write_start1, reg_start); const int right1 = MIN2(write_end1, reg_end); - is_written = left0 < right0 || left1 < right1; + if (left0 < right0 || left1 < right1) + return GL_TRUE; } else { int length; @@ -223,25 +216,41 @@ brw_is_mrf_written(const struct brw_instruction *inst, int reg_index, int size) + inst->bits1.da1.dest_subreg_nr; const int write_end = write_start + length; const int left = MAX2(write_start, reg_start); - const int right = MIN2(write_end, reg_end);; + const int right = MIN2(write_end, reg_end); - is_written = left < right; + if (left < right) + return GL_TRUE; } - /* SEND may perform an implicit mov to a mrf register */ - if (is_written == GL_FALSE && - inst->header.opcode == BRW_OPCODE_SEND && - inst->bits1.da1.src0_reg_file != 0) { + return GL_FALSE; +} - const int mrf_start = inst->header.destreg__conditionalmod; - const int write_start = mrf_start * REG_SIZE; - const int write_end = write_start + REG_SIZE; - const int left = MAX2(write_start, reg_start); - const int right = MIN2(write_end, reg_end);; - is_written = left < right; - } +/* SEND may perform an implicit mov to a mrf register */ +static GLboolean brw_is_mrf_written_send(const struct brw_instruction *inst, + int reg_index, int size) +{ + + const int reg_start = reg_index * REG_SIZE; + const int reg_end = reg_start + size; + const int mrf_start = inst->header.destreg__conditionalmod; + const int write_start = mrf_start * REG_SIZE; + const int write_end = write_start + REG_SIZE; + const int left = MAX2(write_start, reg_start); + const int right = MIN2(write_end, reg_end); + + if (inst->header.opcode != BRW_OPCODE_SEND || + inst->bits1.da1.src0_reg_file == 0) + return GL_FALSE; - return is_written; + return left < right; +} + +/* Specific path for message register since we need to handle the compr4 case */ +static INLINE GLboolean +brw_is_mrf_written(const struct brw_instruction *inst, int reg_index, int size) +{ + return (brw_is_mrf_written_alu(inst, reg_index, size) || + brw_is_mrf_written_send(inst, reg_index, size)); } static INLINE GLboolean @@ -284,7 +293,7 @@ static INLINE GLboolean brw_is_grf_read(const struct brw_instruction *inst, int reg_index, int size) { int i, j; - if (inst_opcode[inst->header.opcode].nsrc == 0) + if (brw_opcodes[inst->header.opcode].nsrc == 0) return GL_FALSE; /* Look at first source. We must take into account register regions to @@ -292,7 +301,7 @@ brw_is_grf_read(const struct brw_instruction *inst, int reg_index, int size) * since we do not take into account the fact that some complete registers * may be skipped */ - if (inst_opcode[inst->header.opcode].nsrc >= 1) { + if (brw_opcodes[inst->header.opcode].nsrc >= 1) { if (inst->bits2.da1.src0_address_mode != BRW_ADDRESS_DIRECT) if (inst->bits1.ia1.src0_reg_file == BRW_GENERAL_REGISTER_FILE) @@ -327,7 +336,7 @@ brw_is_grf_read(const struct brw_instruction *inst, int reg_index, int size) } /* Second src register */ - if (inst_opcode[inst->header.opcode].nsrc >= 2) { + if (brw_opcodes[inst->header.opcode].nsrc >= 2) { if (inst->bits3.da1.src1_address_mode != BRW_ADDRESS_DIRECT) if (inst->bits1.ia1.src1_reg_file == BRW_GENERAL_REGISTER_FILE) diff --git a/src/mesa/drivers/dri/i965/brw_program.c b/src/mesa/drivers/dri/i965/brw_program.c index 4b08d2599bc..bc152204a42 100644 --- a/src/mesa/drivers/dri/i965/brw_program.c +++ b/src/mesa/drivers/dri/i965/brw_program.c @@ -36,6 +36,7 @@ #include "program/program.h" #include "program/programopt.h" #include "tnl/tnl.h" +#include "talloc.h" #include "brw_context.h" #include "brw_wm.h" @@ -114,10 +115,7 @@ shader_error(GLcontext *ctx, struct gl_program *prog, const char *msg) shader = _mesa_lookup_shader_program(ctx, prog->Id); if (shader) { - if (shader->InfoLog) { - free(shader->InfoLog); - } - shader->InfoLog = _mesa_strdup(msg); + shader->InfoLog = talloc_strdup_append(shader->InfoLog, msg); shader->LinkStatus = GL_FALSE; } } @@ -170,6 +168,9 @@ static GLboolean brwProgramStringNotify( GLcontext *ctx, * See piglit glsl-{vs,fs}-functions-[23] tests. */ for (i = 0; i < prog->NumInstructions; i++) { + struct prog_instruction *inst = prog->Instructions + i; + int r; + if (prog->Instructions[i].Opcode == OPCODE_CAL) { shader_error(ctx, prog, "i965 driver doesn't yet support uninlined function " @@ -177,16 +178,28 @@ static GLboolean brwProgramStringNotify( GLcontext *ctx, "the end of the function to work around it.\n"); return GL_FALSE; } - if (prog->Instructions[i].DstReg.RelAddr && - prog->Instructions[i].DstReg.File == PROGRAM_INPUT) { + + if (prog->Instructions[i].Opcode == OPCODE_RET) { shader_error(ctx, prog, - "Variable indexing of shader inputs unsupported\n"); + "i965 driver doesn't yet support \"return\" " + "from main().\n"); return GL_FALSE; } - if (prog->Instructions[i].DstReg.RelAddr && + + for (r = 0; r < _mesa_num_inst_src_regs(inst->Opcode); r++) { + if (prog->Instructions[i].SrcReg[r].RelAddr && + prog->Instructions[i].SrcReg[r].File == PROGRAM_INPUT) { + shader_error(ctx, prog, + "Variable indexing of shader inputs unsupported\n"); + return GL_FALSE; + } + } + + if (target == GL_FRAGMENT_PROGRAM_ARB && + prog->Instructions[i].DstReg.RelAddr && prog->Instructions[i].DstReg.File == PROGRAM_OUTPUT) { shader_error(ctx, prog, - "Variable indexing of shader outputs unsupported\n"); + "Variable indexing of FS outputs unsupported\n"); return GL_FALSE; } if (target == GL_FRAGMENT_PROGRAM_ARB) { @@ -218,5 +231,10 @@ void brwInitFragProgFuncs( struct dd_function_table *functions ) functions->DeleteProgram = brwDeleteProgram; functions->IsProgramNative = brwIsProgramNative; functions->ProgramStringNotify = brwProgramStringNotify; + + functions->NewShader = brw_new_shader; + functions->NewShaderProgram = brw_new_shader_program; + functions->CompileShader = brw_compile_shader; + functions->LinkShader = brw_link_shader; } diff --git a/src/mesa/drivers/dri/i965/brw_state.h b/src/mesa/drivers/dri/i965/brw_state.h index af08446f2d8..c5d296b1295 100644 --- a/src/mesa/drivers/dri/i965/brw_state.h +++ b/src/mesa/drivers/dri/i965/brw_state.h @@ -107,6 +107,7 @@ extern const struct brw_tracked_state gen6_sf_vp; extern const struct brw_tracked_state gen6_urb; extern const struct brw_tracked_state gen6_viewport_state; extern const struct brw_tracked_state gen6_vs_state; +extern const struct brw_tracked_state gen6_wm_constants; extern const struct brw_tracked_state gen6_wm_state; /*********************************************************************** diff --git a/src/mesa/drivers/dri/i965/brw_state_upload.c b/src/mesa/drivers/dri/i965/brw_state_upload.c index f92a19c2aa0..a0c130557e3 100644 --- a/src/mesa/drivers/dri/i965/brw_state_upload.c +++ b/src/mesa/drivers/dri/i965/brw_state_upload.c @@ -35,7 +35,6 @@ #include "brw_state.h" #include "intel_batchbuffer.h" #include "intel_buffers.h" -#include "intel_chipset.h" /* This is used to initialize brw->state.atoms[]. We could use this * list directly except for a single atom, brw_constant_buffer, which @@ -129,7 +128,7 @@ const struct brw_tracked_state *gen6_atoms[] = &gen6_cc_state_pointers, &brw_vs_constants, /* Before vs_surfaces and constant_buffer */ - &brw_wm_constants, /* Before wm_surfaces and constant_buffer */ + &gen6_wm_constants, /* Before wm_surfaces and constant_buffer */ &brw_vs_surfaces, /* must do before unit */ &brw_wm_constant_surface, /* must do before wm surfaces/bind bo */ @@ -351,7 +350,7 @@ void brw_validate_state( struct brw_context *brw ) brw_add_validated_bo(brw, intel->batch->buf); - if (IS_GEN6(intel->intelScreen->deviceID)) { + if (intel->gen >= 6) { atoms = gen6_atoms; num_atoms = ARRAY_SIZE(gen6_atoms); } else { @@ -425,7 +424,7 @@ void brw_upload_state(struct brw_context *brw) const struct brw_tracked_state **atoms; int num_atoms; - if (IS_GEN6(intel->intelScreen->deviceID)) { + if (intel->gen >= 6) { atoms = gen6_atoms; num_atoms = ARRAY_SIZE(gen6_atoms); } else { diff --git a/src/mesa/drivers/dri/i965/brw_structs.h b/src/mesa/drivers/dri/i965/brw_structs.h index 2fde42a7060..2a118e01c53 100644 --- a/src/mesa/drivers/dri/i965/brw_structs.h +++ b/src/mesa/drivers/dri/i965/brw_structs.h @@ -750,7 +750,7 @@ struct gen6_depth_stencil_state } ds1; struct { - GLuint pad0:25; + GLuint pad0:26; GLuint depth_write_enable:1; GLuint depth_test_func:3; GLuint pad1:1; @@ -1305,13 +1305,14 @@ struct brw_instruction GLuint access_mode:1; GLuint mask_control:1; GLuint dependency_control:2; - GLuint compression_control:2; + GLuint compression_control:2; /* gen6: quater control */ GLuint thread_control:2; GLuint predicate_control:4; GLuint predicate_inverse:1; GLuint execution_size:3; GLuint destreg__conditionalmod:4; /* destreg - send, conditionalmod - others */ - GLuint pad0:2; + GLuint acc_wr_control:1; + GLuint cmpt_control:1; GLuint debug_control:1; GLuint saturate:1; } header; @@ -1359,7 +1360,7 @@ struct brw_instruction GLuint dest_writemask:4; GLuint dest_subreg_nr:1; GLuint dest_reg_nr:8; - GLuint pad1:2; + GLuint dest_horiz_stride:2; GLuint dest_address_mode:1; } da16; @@ -1373,7 +1374,7 @@ struct brw_instruction GLuint dest_writemask:4; GLint dest_indirect_offset:6; GLuint dest_subreg_nr:3; - GLuint pad1:2; + GLuint dest_horiz_stride:2; GLuint dest_address_mode:1; } ia16; } bits1; diff --git a/src/mesa/drivers/dri/i965/brw_vs.c b/src/mesa/drivers/dri/i965/brw_vs.c index 9a832af9a97..9f90e1e5e5c 100644 --- a/src/mesa/drivers/dri/i965/brw_vs.c +++ b/src/mesa/drivers/dri/i965/brw_vs.c @@ -75,10 +75,10 @@ static void do_vs_prog( struct brw_context *brw, c.prog_data.outputs_written |= BITFIELD64_BIT(VERT_RESULT_TEX0 + i); } - if (0) - _mesa_print_program(&c.vp->program.Base); - - + if (0) { + _mesa_fprint_program_opt(stdout, &c.vp->program.Base, PROG_PRINT_DEBUG, + GL_TRUE); + } /* Emit GEN4 code. */ diff --git a/src/mesa/drivers/dri/i965/brw_vs_emit.c b/src/mesa/drivers/dri/i965/brw_vs_emit.c index b6b558e9a69..1d88c6b5a46 100644 --- a/src/mesa/drivers/dri/i965/brw_vs_emit.c +++ b/src/mesa/drivers/dri/i965/brw_vs_emit.c @@ -47,6 +47,7 @@ brw_vs_arg_can_be_immediate(enum prog_opcode opcode, int arg) [OPCODE_MOV] = 1, [OPCODE_ADD] = 2, [OPCODE_CMP] = 3, + [OPCODE_DP2] = 2, [OPCODE_DP3] = 2, [OPCODE_DP4] = 2, [OPCODE_DPH] = 2, @@ -97,6 +98,39 @@ static void release_tmps( struct brw_vs_compile *c ) c->last_tmp = c->first_tmp; } +static int +get_first_reladdr_output(struct gl_vertex_program *vp) +{ + int i; + int first_reladdr_output = VERT_RESULT_MAX; + + for (i = 0; i < vp->Base.NumInstructions; i++) { + struct prog_instruction *inst = vp->Base.Instructions + i; + + if (inst->DstReg.File == PROGRAM_OUTPUT && + inst->DstReg.RelAddr && + inst->DstReg.Index < first_reladdr_output) + first_reladdr_output = inst->DstReg.Index; + } + + return first_reladdr_output; +} + +/* Clears the record of which vp_const_buffer elements have been + * loaded into our constant buffer registers, for the starts of new + * blocks after control flow. + */ +static void +clear_current_const(struct brw_vs_compile *c) +{ + unsigned int i; + + if (c->vp->use_const_buffer) { + for (i = 0; i < 3; i++) { + c->current_const[i].index = -1; + } + } +} /** * Preallocate GRF register before code emit. @@ -108,6 +142,7 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c ) struct intel_context *intel = &c->func.brw->intel; GLuint i, reg = 0, mrf; int attributes_in_vue; + int first_reladdr_output; /* Determine whether to use a real constant buffer or use a block * of GRF registers for constants. The later is faster but only @@ -225,6 +260,7 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c ) else mrf = 4; + first_reladdr_output = get_first_reladdr_output(&c->vp->program); for (i = 0; i < VERT_RESULT_MAX; i++) { if (c->prog_data.outputs_written & BITFIELD64_BIT(i)) { c->nr_outputs++; @@ -253,15 +289,16 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c ) * For attributes beyond the compute-to-MRF, we compute to * GRFs and they will be written in the second URB_WRITE. */ - if (mrf < 15) { + if (first_reladdr_output > i && mrf < 15) { c->regs[PROGRAM_OUTPUT][i] = brw_message_reg(mrf); mrf++; } else { - if (!c->first_overflow_output) + if (mrf >= 15 && !c->first_overflow_output) c->first_overflow_output = i; c->regs[PROGRAM_OUTPUT][i] = brw_vec8_grf(reg, 0); reg++; + mrf++; } } } @@ -292,10 +329,10 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c ) if (c->vp->use_const_buffer) { for (i = 0; i < 3; i++) { - c->current_const[i].index = -1; c->current_const[i].reg = brw_vec8_grf(reg, 0); reg++; } + clear_current_const(c); } for (i = 0; i < 128; i++) { @@ -502,6 +539,23 @@ static void emit_cmp( struct brw_compile *p, brw_set_predicate_control(p, BRW_PREDICATE_NONE); } +static void emit_sign(struct brw_vs_compile *c, + struct brw_reg dst, + struct brw_reg arg0) +{ + struct brw_compile *p = &c->func; + + brw_MOV(p, dst, brw_imm_f(0)); + + brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, arg0, brw_imm_f(0)); + brw_MOV(p, dst, brw_imm_f(-1.0)); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + + brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_G, arg0, brw_imm_f(0)); + brw_MOV(p, dst, brw_imm_f(1.0)); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); +} + static void emit_max( struct brw_compile *p, struct brw_reg dst, struct brw_reg arg0, @@ -1010,13 +1064,11 @@ move_to_reladdr_dst(struct brw_vs_compile *c, int reg_size = 32; struct brw_reg addr_reg = c->regs[PROGRAM_ADDRESS][0]; struct brw_reg vp_address = retype(vec1(addr_reg), BRW_REGISTER_TYPE_D); - struct brw_reg temp_base = c->regs[inst->DstReg.File][0]; - GLuint byte_offset = temp_base.nr * 32 + temp_base.subnr; + struct brw_reg base = c->regs[inst->DstReg.File][inst->DstReg.Index]; + GLuint byte_offset = base.nr * 32 + base.subnr; struct brw_reg indirect = brw_vec4_indirect(0,0); struct brw_reg acc = retype(vec1(get_tmp(c)), BRW_REGISTER_TYPE_UW); - byte_offset += inst->DstReg.Index * reg_size; - brw_push_insn_state(p); brw_set_access_mode(p, BRW_ALIGN_1); @@ -1162,10 +1214,12 @@ static struct brw_reg get_arg( struct brw_vs_compile *c, /* Convert 3-bit swizzle to 2-bit. */ - reg.dw1.bits.swizzle = BRW_SWIZZLE4(GET_SWZ(src->Swizzle, 0), - GET_SWZ(src->Swizzle, 1), - GET_SWZ(src->Swizzle, 2), - GET_SWZ(src->Swizzle, 3)); + if (reg.file != BRW_IMMEDIATE_VALUE) { + reg.dw1.bits.swizzle = BRW_SWIZZLE4(GET_SWZ(src->Swizzle, 0), + GET_SWZ(src->Swizzle, 1), + GET_SWZ(src->Swizzle, 2), + GET_SWZ(src->Swizzle, 3)); + } /* Note this is ok for non-swizzle instructions: */ @@ -1211,6 +1265,7 @@ static struct brw_reg get_dst( struct brw_vs_compile *c, reg = brw_null_reg(); } + assert(reg.type != BRW_IMMEDIATE_VALUE); reg.dw1.bits.writemask = dst.WriteMask; return reg; @@ -1299,6 +1354,7 @@ static void emit_vertex_write( struct brw_vs_compile *c) struct brw_reg ndc; int eot; GLuint len_vertex_header = 2; + int next_mrf, i; if (c->key.copy_edgeflag) { brw_MOV(p, @@ -1376,6 +1432,7 @@ static void emit_vertex_write( struct brw_vs_compile *c) * of zeros followed by two sets of NDC coordinates: */ brw_set_access_mode(p, BRW_ALIGN_1); + brw_set_acc_write_control(p, 0); /* The VUE layout is documented in Volume 2a. */ if (intel->gen >= 6) { @@ -1416,6 +1473,23 @@ static void emit_vertex_write( struct brw_vs_compile *c) len_vertex_header = 2; } + /* Move variable-addressed, non-overflow outputs to their MRFs. */ + next_mrf = 2 + len_vertex_header; + for (i = 0; i < VERT_RESULT_MAX; i++) { + if (c->first_overflow_output > 0 && i >= c->first_overflow_output) + break; + if (!(c->prog_data.outputs_written & BITFIELD64_BIT(i))) + continue; + + if (i >= VERT_RESULT_TEX0 && + c->regs[PROGRAM_OUTPUT][i].file == BRW_GENERAL_REGISTER_FILE) { + brw_MOV(p, brw_message_reg(next_mrf), c->regs[PROGRAM_OUTPUT][i]); + next_mrf++; + } else if (c->regs[PROGRAM_OUTPUT][i].file == BRW_MESSAGE_REGISTER_FILE) { + next_mrf = c->regs[PROGRAM_OUTPUT][i].nr + 1; + } + } + eot = (c->first_overflow_output == 0); brw_urb_WRITE(p, @@ -1541,18 +1615,23 @@ void brw_vs_emit(struct brw_vs_compile *c ) const GLuint nr_insns = c->vp->program.Base.NumInstructions; GLuint insn, if_depth = 0, loop_depth = 0; struct brw_instruction *if_inst[MAX_IF_DEPTH], *loop_inst[MAX_LOOP_DEPTH] = { 0 }; + int if_depth_in_loop[MAX_LOOP_DEPTH]; const struct brw_indirect stack_index = brw_indirect(0, 0); GLuint index; GLuint file; if (INTEL_DEBUG & DEBUG_VS) { printf("vs-mesa:\n"); - _mesa_print_program(&c->vp->program.Base); + _mesa_fprint_program_opt(stdout, &c->vp->program.Base, PROG_PRINT_DEBUG, + GL_TRUE); printf("\n"); } brw_set_compression_control(p, BRW_COMPRESSION_NONE); brw_set_access_mode(p, BRW_ALIGN_16); + if_depth_in_loop[loop_depth] = 0; + + brw_set_acc_write_control(p, 1); for (insn = 0; insn < nr_insns; insn++) { GLuint i; @@ -1591,7 +1670,7 @@ void brw_vs_emit(struct brw_vs_compile *c ) const struct prog_instruction *inst = &c->vp->program.Base.Instructions[insn]; struct brw_reg args[3], dst; GLuint i; - + #if 0 printf("%d: ", insn); _mesa_print_instruction(inst); @@ -1636,6 +1715,9 @@ void brw_vs_emit(struct brw_vs_compile *c ) case OPCODE_COS: emit_math1(c, BRW_MATH_FUNCTION_COS, dst, args[0], BRW_MATH_PRECISION_FULL); break; + case OPCODE_DP2: + brw_DP2(p, dst, args[0], args[1]); + break; case OPCODE_DP3: brw_DP3(p, dst, args[0], args[1]); break; @@ -1732,6 +1814,9 @@ void brw_vs_emit(struct brw_vs_compile *c ) case OPCODE_SLE: unalias2(c, dst, args[0], args[1], emit_sle); break; + case OPCODE_SSG: + unalias1(c, dst, args[0], emit_sign); + break; case OPCODE_SUB: brw_ADD(p, dst, args[0], negate(args[1])); break; @@ -1753,31 +1838,38 @@ void brw_vs_emit(struct brw_vs_compile *c ) if_inst[if_depth] = brw_IF(p, BRW_EXECUTE_8); /* Note that brw_IF smashes the predicate_control field. */ if_inst[if_depth]->header.predicate_control = get_predicate(inst); + if_depth_in_loop[loop_depth]++; if_depth++; break; case OPCODE_ELSE: + clear_current_const(c); assert(if_depth > 0); if_inst[if_depth-1] = brw_ELSE(p, if_inst[if_depth-1]); break; case OPCODE_ENDIF: + clear_current_const(c); assert(if_depth > 0); brw_ENDIF(p, if_inst[--if_depth]); + if_depth_in_loop[loop_depth]--; break; case OPCODE_BGNLOOP: + clear_current_const(c); loop_inst[loop_depth++] = brw_DO(p, BRW_EXECUTE_8); + if_depth_in_loop[loop_depth] = 0; break; case OPCODE_BRK: brw_set_predicate_control(p, get_predicate(inst)); - brw_BREAK(p); + brw_BREAK(p, if_depth_in_loop[loop_depth]); brw_set_predicate_control(p, BRW_PREDICATE_NONE); break; case OPCODE_CONT: brw_set_predicate_control(p, get_predicate(inst)); - brw_CONT(p); + brw_CONT(p, if_depth_in_loop[loop_depth]); brw_set_predicate_control(p, BRW_PREDICATE_NONE); break; case OPCODE_ENDLOOP: { + clear_current_const(c); struct brw_instruction *inst0, *inst1; GLuint br = 1; @@ -1793,12 +1885,10 @@ void brw_vs_emit(struct brw_vs_compile *c ) if (inst0->header.opcode == BRW_OPCODE_BREAK && inst0->bits3.if_else.jump_count == 0) { inst0->bits3.if_else.jump_count = br * (inst1 - inst0 + 1); - inst0->bits3.if_else.pop_count = 0; } else if (inst0->header.opcode == BRW_OPCODE_CONTINUE && inst0->bits3.if_else.jump_count == 0) { inst0->bits3.if_else.jump_count = br * (inst1 - inst0); - inst0->bits3.if_else.pop_count = 0; } } } @@ -1883,11 +1973,9 @@ void brw_vs_emit(struct brw_vs_compile *c ) } } - if (inst->DstReg.RelAddr && inst->DstReg.File == PROGRAM_TEMPORARY) { - /* We don't do RelAddr of PROGRAM_OUTPUT yet, because of the - * compute-to-mrf and the fact that we are allocating - * registers for only the used PROGRAM_OUTPUTs. - */ + if (inst->DstReg.RelAddr) { + assert(inst->DstReg.File == PROGRAM_TEMPORARY|| + inst->DstReg.File == PROGRAM_OUTPUT); move_to_reladdr_dst(c, inst, dst); } @@ -1903,7 +1991,7 @@ void brw_vs_emit(struct brw_vs_compile *c ) printf("vs-native:\n"); for (i = 0; i < p->nr_insn; i++) - brw_disasm(stderr, &p->store[i], intel->gen); + brw_disasm(stdout, &p->store[i], intel->gen); printf("\n"); } } diff --git a/src/mesa/drivers/dri/i965/brw_vtbl.c b/src/mesa/drivers/dri/i965/brw_vtbl.c index 14227a51332..8f1601d10f1 100644 --- a/src/mesa/drivers/dri/i965/brw_vtbl.c +++ b/src/mesa/drivers/dri/i965/brw_vtbl.c @@ -101,6 +101,7 @@ static void brw_destroy_context( struct intel_context *intel ) dri_bo_release(&brw->wm.prog_bo); dri_bo_release(&brw->wm.state_bo); dri_bo_release(&brw->wm.const_bo); + dri_bo_release(&brw->wm.push_const_bo); dri_bo_release(&brw->cc.prog_bo); dri_bo_release(&brw->cc.state_bo); dri_bo_release(&brw->cc.vp_bo); diff --git a/src/mesa/drivers/dri/i965/brw_wm.c b/src/mesa/drivers/dri/i965/brw_wm.c index e182fc32029..d70be7bda28 100644 --- a/src/mesa/drivers/dri/i965/brw_wm.c +++ b/src/mesa/drivers/dri/i965/brw_wm.c @@ -32,7 +32,7 @@ #include "brw_context.h" #include "brw_wm.h" #include "brw_state.h" - +#include "main/formats.h" /** Return number of src args for given instruction */ GLuint brw_wm_nr_args( GLuint opcode ) @@ -68,6 +68,7 @@ GLuint brw_wm_is_scalar_result( GLuint opcode ) case OPCODE_RCP: case OPCODE_RSQ: case OPCODE_SIN: + case OPCODE_DP2: case OPCODE_DP3: case OPCODE_DP4: case OPCODE_DPH: @@ -177,17 +178,19 @@ static void do_wm_prog( struct brw_context *brw, /* temporary sanity check assertion */ ASSERT(fp->isGLSL == brw_wm_is_glsl(&c->fp->program)); - /* - * Shader which use GLSL features such as flow control are handled - * differently from "simple" shaders. - */ - if (fp->isGLSL) { - c->dispatch_width = 8; - brw_wm_glsl_emit(brw, c); - } - else { - c->dispatch_width = 16; - brw_wm_non_glsl_emit(brw, c); + if (!brw_wm_fs_emit(brw, c)) { + /* + * Shader which use GLSL features such as flow control are handled + * differently from "simple" shaders. + */ + if (fp->isGLSL) { + c->dispatch_width = 8; + brw_wm_glsl_emit(brw, c); + } + else { + c->dispatch_width = 16; + brw_wm_non_glsl_emit(brw, c); + } } if (INTEL_DEBUG & DEBUG_WM) diff --git a/src/mesa/drivers/dri/i965/brw_wm.h b/src/mesa/drivers/dri/i965/brw_wm.h index f40977fab8d..2639d4f26b3 100644 --- a/src/mesa/drivers/dri/i965/brw_wm.h +++ b/src/mesa/drivers/dri/i965/brw_wm.h @@ -61,7 +61,7 @@ struct brw_wm_prog_key { GLuint source_depth_reg:3; GLuint aa_dest_stencil_reg:3; GLuint dest_depth_reg:3; - GLuint nr_depth_regs:3; + GLuint nr_payload_regs:4; GLuint computes_depth:1; /* could be derived from program string */ GLuint source_depth_to_render_target:1; GLuint flat_shade:1; @@ -306,6 +306,7 @@ void brw_wm_lookup_iz( GLuint line_aa, GLboolean brw_wm_is_glsl(const struct gl_fragment_program *fp); void brw_wm_glsl_emit(struct brw_context *brw, struct brw_wm_compile *c); +GLboolean brw_wm_fs_emit(struct brw_context *brw, struct brw_wm_compile *c); /* brw_wm_emit.c */ void emit_alu1(struct brw_compile *p, @@ -343,6 +344,11 @@ void emit_delta_xy(struct brw_compile *p, const struct brw_reg *dst, GLuint mask, const struct brw_reg *arg0); +void emit_dp2(struct brw_compile *p, + const struct brw_reg *dst, + GLuint mask, + const struct brw_reg *arg0, + const struct brw_reg *arg1); void emit_dp3(struct brw_compile *p, const struct brw_reg *dst, GLuint mask, @@ -425,6 +431,10 @@ void emit_sop(struct brw_compile *p, GLuint cond, const struct brw_reg *arg0, const struct brw_reg *arg1); +void emit_sign(struct brw_compile *p, + const struct brw_reg *dst, + GLuint mask, + const struct brw_reg *arg0); void emit_tex(struct brw_wm_compile *c, struct brw_reg *dst, GLuint dst_flags, @@ -450,4 +460,13 @@ void emit_xpd(struct brw_compile *p, const struct brw_reg *arg0, const struct brw_reg *arg1); +GLboolean brw_compile_shader(GLcontext *ctx, + struct gl_shader *shader); +GLboolean brw_link_shader(GLcontext *ctx, struct gl_shader_program *prog); +struct gl_shader *brw_new_shader(GLcontext *ctx, GLuint name, GLuint type); +struct gl_shader_program *brw_new_shader_program(GLcontext *ctx, GLuint name); + +GLboolean brw_do_channel_expressions(struct exec_list *instructions); +GLboolean brw_do_vector_splitting(struct exec_list *instructions); + #endif diff --git a/src/mesa/drivers/dri/i965/brw_wm_debug.c b/src/mesa/drivers/dri/i965/brw_wm_debug.c index a78cc8b54e5..6a91251a80e 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_debug.c +++ b/src/mesa/drivers/dri/i965/brw_wm_debug.c @@ -44,16 +44,16 @@ void brw_wm_print_value( struct brw_wm_compile *c, printf("undef"); else if( value - c->vreg >= 0 && value - c->vreg < BRW_WM_MAX_VREG) - printf("r%d", value - c->vreg); + printf("r%ld", (long) (value - c->vreg)); else if (value - c->creg >= 0 && value - c->creg < BRW_WM_MAX_PARAM) - printf("c%d", value - c->creg); + printf("c%ld", (long) (value - c->creg)); else if (value - c->payload.input_interp >= 0 && value - c->payload.input_interp < FRAG_ATTRIB_MAX) - printf("i%d", value - c->payload.input_interp); + printf("i%ld", (long) (value - c->payload.input_interp)); else if (value - c->payload.depth >= 0 && value - c->payload.depth < FRAG_ATTRIB_MAX) - printf("d%d", value - c->payload.depth); + printf("d%ld", (long) (value - c->payload.depth)); else printf("?"); } diff --git a/src/mesa/drivers/dri/i965/brw_wm_emit.c b/src/mesa/drivers/dri/i965/brw_wm_emit.c index d9fa2e63354..f3ad01b3fec 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_emit.c +++ b/src/mesa/drivers/dri/i965/brw_wm_emit.c @@ -668,6 +668,28 @@ void emit_cmp(struct brw_compile *p, } } +void emit_sign(struct brw_compile *p, + const struct brw_reg *dst, + GLuint mask, + const struct brw_reg *arg0) +{ + GLuint i; + + for (i = 0; i < 4; i++) { + if (mask & (1<<i)) { + brw_MOV(p, dst[i], brw_imm_f(0.0)); + + brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, arg0[i], brw_imm_f(0)); + brw_MOV(p, dst[i], brw_imm_f(-1.0)); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + + brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_G, arg0[i], brw_imm_f(0)); + brw_MOV(p, dst[i], brw_imm_f(1.0)); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + } + } +} + void emit_max(struct brw_compile *p, const struct brw_reg *dst, GLuint mask, @@ -709,6 +731,27 @@ void emit_min(struct brw_compile *p, } +void emit_dp2(struct brw_compile *p, + const struct brw_reg *dst, + GLuint mask, + const struct brw_reg *arg0, + const struct brw_reg *arg1) +{ + int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1; + + if (!(mask & WRITEMASK_XYZW)) + return; /* Do not emit dead code */ + + assert(is_power_of_two(mask & WRITEMASK_XYZW)); + + brw_MUL(p, brw_null_reg(), arg0[0], arg1[0]); + + brw_set_saturate(p, (mask & SATURATE) ? 1 : 0); + brw_MAC(p, dst[dst_chan], arg0[1], arg1[1]); + brw_set_saturate(p, 0); +} + + void emit_dp3(struct brw_compile *p, const struct brw_reg *dst, GLuint mask, @@ -809,21 +852,28 @@ void emit_math1(struct brw_wm_compile *c, const struct brw_reg *arg0) { struct brw_compile *p = &c->func; + struct intel_context *intel = &p->brw->intel; int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1; GLuint saturate = ((mask & SATURATE) ? BRW_MATH_SATURATE_SATURATE : BRW_MATH_SATURATE_NONE); + struct brw_reg src; + + if (intel->gen >= 6 && arg0[0].hstride == BRW_HORIZONTAL_STRIDE_0) { + /* Gen6 math requires that source and dst horizontal stride be 1. + * + */ + src = *dst; + brw_MOV(p, src, arg0[0]); + } else { + src = arg0[0]; + } if (!(mask & WRITEMASK_XYZW)) return; /* Do not emit dead code */ assert(is_power_of_two(mask & WRITEMASK_XYZW)); - /* If compressed, this will write message reg 2,3 from arg0.x's 16 - * channels. - */ - brw_MOV(p, brw_message_reg(2), arg0[0]); - /* Send two messages to perform all 16 operations: */ brw_push_insn_state(p); @@ -833,7 +883,7 @@ void emit_math1(struct brw_wm_compile *c, function, saturate, 2, - brw_null_reg(), + src, BRW_MATH_DATA_VECTOR, BRW_MATH_PRECISION_FULL); @@ -844,7 +894,7 @@ void emit_math1(struct brw_wm_compile *c, function, saturate, 3, - brw_null_reg(), + sechalf(src), BRW_MATH_DATA_VECTOR, BRW_MATH_PRECISION_FULL); } @@ -873,13 +923,6 @@ void emit_math2(struct brw_wm_compile *c, brw_push_insn_state(p); brw_set_compression_control(p, BRW_COMPRESSION_NONE); - brw_MOV(p, brw_message_reg(2), arg0[0]); - if (c->dispatch_width == 16) { - brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF); - brw_MOV(p, brw_message_reg(4), sechalf(arg0[0])); - } - - brw_set_compression_control(p, BRW_COMPRESSION_NONE); brw_MOV(p, brw_message_reg(3), arg1[0]); if (c->dispatch_width == 16) { brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF); @@ -892,7 +935,7 @@ void emit_math2(struct brw_wm_compile *c, function, saturate, 2, - brw_null_reg(), + arg0[0], BRW_MATH_DATA_VECTOR, BRW_MATH_PRECISION_FULL); @@ -905,7 +948,7 @@ void emit_math2(struct brw_wm_compile *c, function, saturate, 4, - brw_null_reg(), + sechalf(arg0[0]), BRW_MATH_DATA_VECTOR, BRW_MATH_PRECISION_FULL); } @@ -1199,6 +1242,7 @@ static void fire_fb_write( struct brw_wm_compile *c, GLuint eot ) { struct brw_compile *p = &c->func; + struct intel_context *intel = &p->brw->intel; struct brw_reg dst; if (c->dispatch_width == 16) @@ -1209,6 +1253,7 @@ static void fire_fb_write( struct brw_wm_compile *c, /* Pass through control information: */ /* mov (8) m1.0<1>:ud r1.0<8;8,1>:ud { Align1 NoMask } */ + if (intel->gen < 6) /* gen6, use headerless for fb write */ { brw_push_insn_state(p); brw_set_mask_control(p, BRW_MASK_DISABLE); /* ? */ @@ -1222,6 +1267,7 @@ static void fire_fb_write( struct brw_wm_compile *c, /* Send framebuffer write message: */ /* send (16) null.0<1>:uw m0 r0.0<8;8,1>:uw 0x85a04000:ud { Align1 EOT } */ brw_fb_WRITE(p, + c->dispatch_width, dst, base_reg, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW), @@ -1263,8 +1309,10 @@ void emit_fb_write(struct brw_wm_compile *c, { struct brw_compile *p = &c->func; struct brw_context *brw = p->brw; + struct intel_context *intel = &brw->intel; GLuint nr = 2; GLuint channel; + int base_reg; /* For gen6 fb write with no header, starting from color payload directly!. */ /* Reserve a space for AA - may not be needed: */ @@ -1276,9 +1324,40 @@ void emit_fb_write(struct brw_wm_compile *c, */ brw_push_insn_state(p); + if (intel->gen >= 6) + base_reg = nr; + else + base_reg = 0; + for (channel = 0; channel < 4; channel++) { - if (c->dispatch_width == 16 && brw->has_compr4) { - /* By setting the high bit of the MRF register number, we indicate + if (intel->gen >= 6) { + /* gen6 SIMD16 single source DP write looks like: + * m + 0: r0 + * m + 1: r1 + * m + 2: g0 + * m + 3: g1 + * m + 4: b0 + * m + 5: b1 + * m + 6: a0 + * m + 7: a1 + */ + if (c->dispatch_width == 16) { + brw_MOV(p, brw_message_reg(nr + channel * 2), arg0[channel]); + } else { + brw_MOV(p, brw_message_reg(nr + channel), arg0[channel]); + } + } else if (c->dispatch_width == 16 && brw->has_compr4) { + /* pre-gen6 SIMD16 single source DP write looks like: + * m + 0: r0 + * m + 1: g0 + * m + 2: b0 + * m + 3: a0 + * m + 4: r1 + * m + 5: g1 + * m + 6: b1 + * m + 7: a1 + * + * By setting the high bit of the MRF register number, we indicate * that we want COMPR4 mode - instead of doing the usual destination * + 1 for the second half we get destination + 4. */ @@ -1303,7 +1382,11 @@ void emit_fb_write(struct brw_wm_compile *c, } /* skip over the regs populated above: */ - nr += 8; + if (c->dispatch_width == 16) + nr += 8; + else + nr += 4; + brw_pop_insn_state(p); if (c->key.source_depth_to_render_target) @@ -1336,11 +1419,16 @@ void emit_fb_write(struct brw_wm_compile *c, nr += 2; } + if (intel->gen >= 6) { + /* Subtract off the message header, since we send headerless. */ + nr -= 2; + } + if (!c->key.runtime_check_aads_emit) { if (c->key.aa_dest_stencil_reg) emit_aa(c, arg1, 2); - fire_fb_write(c, 0, nr, target, eot); + fire_fb_write(c, base_reg, nr, target, eot); } else { struct brw_reg v1_null_ud = vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_UD)); @@ -1562,6 +1650,10 @@ void brw_wm_emit( struct brw_wm_compile *c ) emit_ddxy(p, dst, dst_flags, GL_FALSE, args[0]); break; + case OPCODE_DP2: + emit_dp2(p, dst, dst_flags, args[0], args[1]); + break; + case OPCODE_DP3: emit_dp3(p, dst, dst_flags, args[0], args[1]); break; @@ -1673,6 +1765,10 @@ void brw_wm_emit( struct brw_wm_compile *c ) emit_sne(p, dst, dst_flags, args[0], args[1]); break; + case OPCODE_SSG: + emit_sign(p, dst, dst_flags, args[0]); + break; + case OPCODE_LIT: emit_lit(c, dst, dst_flags, args[0]); break; @@ -1724,7 +1820,7 @@ void brw_wm_emit( struct brw_wm_compile *c ) printf("wm-native:\n"); for (i = 0; i < p->nr_insn; i++) - brw_disasm(stderr, &p->store[i], p->brw->intel.gen); + brw_disasm(stdout, &p->store[i], p->brw->intel.gen); printf("\n"); } } diff --git a/src/mesa/drivers/dri/i965/brw_wm_fp.c b/src/mesa/drivers/dri/i965/brw_wm_fp.c index 0bef874b887..3870bf10fcb 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_fp.c +++ b/src/mesa/drivers/dri/i965/brw_wm_fp.c @@ -88,6 +88,7 @@ static struct prog_src_register src_reg(GLuint file, GLuint idx) reg.RelAddr = 0; reg.Negate = NEGATE_NONE; reg.Abs = 0; + reg.HasIndex2 = 0; return reg; } @@ -1036,13 +1037,12 @@ static void print_insns( const struct prog_instruction *insn, for (i = 0; i < nr; i++, insn++) { printf("%3d: ", i); if (insn->Opcode < MAX_OPCODE) - _mesa_print_instruction(insn); + _mesa_fprint_instruction_opt(stdout, insn, 0, PROG_PRINT_DEBUG, NULL); else if (insn->Opcode < MAX_WM_OPCODE) { GLuint idx = insn->Opcode - MAX_OPCODE; - _mesa_print_alu_instruction(insn, - wm_opcode_strings[idx], - 3); + _mesa_fprint_alu_instruction(stdout, insn, wm_opcode_strings[idx], + 3, PROG_PRINT_DEBUG, NULL); } else printf("965 Opcode %d\n", insn->Opcode); @@ -1061,7 +1061,8 @@ void brw_wm_pass_fp( struct brw_wm_compile *c ) if (INTEL_DEBUG & DEBUG_WM) { printf("pre-fp:\n"); - _mesa_print_program(&fp->program.Base); + _mesa_fprint_program_opt(stdout, &fp->program.Base, PROG_PRINT_DEBUG, + GL_TRUE); printf("\n"); } diff --git a/src/mesa/drivers/dri/i965/brw_wm_glsl.c b/src/mesa/drivers/dri/i965/brw_wm_glsl.c index 575f89b17fa..c1083c59422 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_glsl.c +++ b/src/mesa/drivers/dri/i965/brw_wm_glsl.c @@ -303,13 +303,13 @@ static void prealloc_reg(struct brw_wm_compile *c) c->first_free_grf = 0; for (i = 0; i < 4; i++) { - if (i < c->key.nr_depth_regs) + if (i < (c->key.nr_payload_regs + 1) / 2) reg = brw_vec8_grf(i * 2, 0); else reg = brw_vec8_grf(0, 0); set_reg(c, PROGRAM_PAYLOAD, PAYLOAD_DEPTH, i, reg); } - reg_index += 2 * c->key.nr_depth_regs; + reg_index += c->key.nr_payload_regs; /* constants */ { @@ -380,7 +380,7 @@ static void prealloc_reg(struct brw_wm_compile *c) } } - c->prog_data.first_curbe_grf = c->key.nr_depth_regs * 2; + c->prog_data.first_curbe_grf = c->key.nr_payload_regs; c->prog_data.urb_read_length = urb_read_length; c->prog_data.curb_read_length = c->nr_creg; c->emit_mask_reg = brw_uw1_reg(BRW_GENERAL_REGISTER_FILE, reg_index, 0); @@ -1803,12 +1803,15 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c) #define MAX_IF_DEPTH 32 #define MAX_LOOP_DEPTH 32 struct brw_instruction *if_inst[MAX_IF_DEPTH], *loop_inst[MAX_LOOP_DEPTH]; + int if_depth_in_loop[MAX_LOOP_DEPTH]; GLuint i, if_depth = 0, loop_depth = 0; struct brw_compile *p = &c->func; struct brw_indirect stack_index = brw_indirect(0, 0); c->out_of_regs = GL_FALSE; + if_depth_in_loop[loop_depth] = 0; + prealloc_reg(c); brw_set_compression_control(p, BRW_COMPRESSION_NONE); brw_MOV(p, get_addr_reg(stack_index), brw_address(c->stack)); @@ -1903,6 +1906,9 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c) case OPCODE_SWZ: emit_alu1(p, brw_MOV, dst, dst_flags, args[0]); break; + case OPCODE_DP2: + emit_dp2(p, dst, dst_flags, args[0], args[1]); + break; case OPCODE_DP3: emit_dp3(p, dst, dst_flags, args[0], args[1]); break; @@ -1971,6 +1977,9 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c) emit_sop(p, dst, dst_flags, BRW_CONDITIONAL_NEQ, args[0], args[1]); break; + case OPCODE_SSG: + emit_sign(p, dst, dst_flags, args[0]); + break; case OPCODE_MUL: emit_alu2(p, brw_MUL, dst, dst_flags, args[0], args[1]); break; @@ -2014,6 +2023,7 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c) case OPCODE_IF: assert(if_depth < MAX_IF_DEPTH); if_inst[if_depth++] = brw_IF(p, BRW_EXECUTE_8); + if_depth_in_loop[loop_depth]++; break; case OPCODE_ELSE: assert(if_depth > 0); @@ -2022,6 +2032,7 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c) case OPCODE_ENDIF: assert(if_depth > 0); brw_ENDIF(p, if_inst[--if_depth]); + if_depth_in_loop[loop_depth]--; break; case OPCODE_BGNSUB: brw_save_label(p, inst->Comment, p->nr_insn); @@ -2056,13 +2067,14 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c) case OPCODE_BGNLOOP: /* XXX may need to invalidate the current_constant regs */ loop_inst[loop_depth++] = brw_DO(p, BRW_EXECUTE_8); + if_depth_in_loop[loop_depth] = 0; break; case OPCODE_BRK: - brw_BREAK(p); + brw_BREAK(p, if_depth_in_loop[loop_depth]); brw_set_predicate_control(p, BRW_PREDICATE_NONE); break; case OPCODE_CONT: - brw_CONT(p); + brw_CONT(p, if_depth_in_loop[loop_depth]); brw_set_predicate_control(p, BRW_PREDICATE_NONE); break; case OPCODE_ENDLOOP: @@ -2082,12 +2094,10 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c) if (inst0->header.opcode == BRW_OPCODE_BREAK && inst0->bits3.if_else.jump_count == 0) { inst0->bits3.if_else.jump_count = br * (inst1 - inst0 + 1); - inst0->bits3.if_else.pop_count = 0; } else if (inst0->header.opcode == BRW_OPCODE_CONTINUE && inst0->bits3.if_else.jump_count == 0) { inst0->bits3.if_else.jump_count = br * (inst1 - inst0); - inst0->bits3.if_else.pop_count = 0; } } } @@ -2111,7 +2121,7 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c) if (INTEL_DEBUG & DEBUG_WM) { printf("wm-native:\n"); for (i = 0; i < p->nr_insn; i++) - brw_disasm(stderr, &p->store[i], intel->gen); + brw_disasm(stdout, &p->store[i], intel->gen); printf("\n"); } } diff --git a/src/mesa/drivers/dri/i965/brw_wm_iz.c b/src/mesa/drivers/dri/i965/brw_wm_iz.c index 5e399ac62a8..8505ef19510 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_iz.c +++ b/src/mesa/drivers/dri/i965/brw_wm_iz.c @@ -152,6 +152,6 @@ void brw_wm_lookup_iz( GLuint line_aa, reg+=2; } - key->nr_depth_regs = (reg+1)/2; + key->nr_payload_regs = reg; } diff --git a/src/mesa/drivers/dri/i965/brw_wm_pass0.c b/src/mesa/drivers/dri/i965/brw_wm_pass0.c index 05de85a957e..8fc960b4456 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_pass0.c +++ b/src/mesa/drivers/dri/i965/brw_wm_pass0.c @@ -379,7 +379,7 @@ static void pass0_init_payload( struct brw_wm_compile *c ) GLuint i; for (i = 0; i < 4; i++) { - GLuint j = i >= c->key.nr_depth_regs ? 0 : i; + GLuint j = i >= (c->key.nr_payload_regs + 1) / 2 ? 0 : i; pass0_set_fpreg_value( c, PROGRAM_PAYLOAD, PAYLOAD_DEPTH, i, &c->payload.depth[j] ); } diff --git a/src/mesa/drivers/dri/i965/brw_wm_pass1.c b/src/mesa/drivers/dri/i965/brw_wm_pass1.c index b4493940292..962515a99e9 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_pass1.c +++ b/src/mesa/drivers/dri/i965/brw_wm_pass1.c @@ -158,6 +158,7 @@ void brw_wm_pass1( struct brw_wm_compile *c ) case OPCODE_FLR: case OPCODE_FRC: case OPCODE_MOV: + case OPCODE_SSG: case OPCODE_SWZ: case OPCODE_TRUNC: read0 = writemask; @@ -254,6 +255,11 @@ void brw_wm_pass1( struct brw_wm_compile *c ) read2 = WRITEMASK_W; /* pixel w */ break; + case OPCODE_DP2: + read0 = WRITEMASK_XY; + read1 = WRITEMASK_XY; + break; + case OPCODE_DP3: read0 = WRITEMASK_XYZ; read1 = WRITEMASK_XYZ; diff --git a/src/mesa/drivers/dri/i965/brw_wm_pass2.c b/src/mesa/drivers/dri/i965/brw_wm_pass2.c index 31303febf09..54acb3038b5 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_pass2.c +++ b/src/mesa/drivers/dri/i965/brw_wm_pass2.c @@ -76,7 +76,7 @@ static void init_registers( struct brw_wm_compile *c ) for (j = 0; j < c->grf_limit; j++) c->pass2_grf[j].nextuse = BRW_WM_MAX_INSN; - for (j = 0; j < c->key.nr_depth_regs; j++) + for (j = 0; j < (c->key.nr_payload_regs + 1) / 2; j++) prealloc_reg(c, &c->payload.depth[j], i++); for (j = 0; j < c->nr_creg; j++) @@ -101,7 +101,7 @@ static void init_registers( struct brw_wm_compile *c ) assert(nr_interp_regs >= 1); - c->prog_data.first_curbe_grf = c->key.nr_depth_regs * 2; + c->prog_data.first_curbe_grf = ALIGN(c->key.nr_payload_regs, 2); c->prog_data.urb_read_length = nr_interp_regs * 2; c->prog_data.curb_read_length = c->nr_creg * 2; diff --git a/src/mesa/drivers/dri/i965/brw_wm_state.c b/src/mesa/drivers/dri/i965/brw_wm_state.c index c1cf4db1cae..6699d0a73e6 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_state.c +++ b/src/mesa/drivers/dri/i965/brw_wm_state.c @@ -104,8 +104,22 @@ wm_unit_populate_key(struct brw_context *brw, struct brw_wm_unit_key *key) key->uses_kill = fp->UsesKill || ctx->Color.AlphaEnabled; key->is_glsl = bfp->isGLSL; - /* temporary sanity check assertion */ - ASSERT(bfp->isGLSL == brw_wm_is_glsl(fp)); + /* If using the fragment shader backend, the program is always + * 8-wide. + */ + if (ctx->Shader.CurrentProgram) { + int i; + + for (i = 0; i < ctx->Shader.CurrentProgram->_NumLinkedShaders; i++) { + struct brw_shader *shader = + (struct brw_shader *)ctx->Shader.CurrentProgram->_LinkedShaders[i];; + + if (shader->base.Type == GL_FRAGMENT_SHADER && + shader->ir != NULL) { + key->is_glsl = GL_TRUE; + } + } + } /* _NEW_DEPTH */ key->stats_wm = intel->stats_wm; diff --git a/src/mesa/drivers/dri/i965/gen6_cc.c b/src/mesa/drivers/dri/i965/gen6_cc.c index f7acad69129..26f1070a164 100644 --- a/src/mesa/drivers/dri/i965/gen6_cc.c +++ b/src/mesa/drivers/dri/i965/gen6_cc.c @@ -267,9 +267,9 @@ static void upload_cc_state_pointers(struct brw_context *brw) BEGIN_BATCH(4); OUT_BATCH(CMD_3D_CC_STATE_POINTERS << 16 | (4 - 2)); - OUT_RELOC(brw->cc.state_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 1); OUT_RELOC(brw->cc.blend_state_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 1); OUT_RELOC(brw->cc.depth_stencil_state_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 1); + OUT_RELOC(brw->cc.state_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 1); ADVANCE_BATCH(); intel_batchbuffer_emit_mi_flush(intel->batch); diff --git a/src/mesa/drivers/dri/i965/gen6_wm_state.c b/src/mesa/drivers/dri/i965/gen6_wm_state.c index 863c85449d9..2cd640de175 100644 --- a/src/mesa/drivers/dri/i965/gen6_wm_state.c +++ b/src/mesa/drivers/dri/i965/gen6_wm_state.c @@ -34,18 +34,59 @@ #include "intel_batchbuffer.h" static void +prepare_wm_constants(struct brw_context *brw) +{ + struct intel_context *intel = &brw->intel; + GLcontext *ctx = &intel->ctx; + const struct brw_fragment_program *fp = + brw_fragment_program_const(brw->fragment_program); + + drm_intel_bo_unreference(brw->wm.push_const_bo); + brw->wm.push_const_bo = NULL; + + /* Updates the ParamaterValues[i] pointers for all parameters of the + * basic type of PROGRAM_STATE_VAR. + */ + /* XXX: Should this happen somewhere before to get our state flag set? */ + _mesa_load_state_parameters(ctx, fp->program.Base.Parameters); + + if (brw->wm.prog_data->nr_params != 0) { + float *constants; + unsigned int i; + + brw->wm.push_const_bo = drm_intel_bo_alloc(intel->bufmgr, + "WM constant_bo", + brw->wm.prog_data->nr_params * + sizeof(float), + 4096); + drm_intel_gem_bo_map_gtt(brw->wm.push_const_bo); + constants = brw->wm.push_const_bo->virtual; + for (i = 0; i < brw->wm.prog_data->nr_params; i++) { + constants[i] = *brw->wm.prog_data->param[i]; + } + drm_intel_gem_bo_unmap_gtt(brw->wm.push_const_bo); + } +} + +const struct brw_tracked_state gen6_wm_constants = { + .dirty = { + .mesa = _NEW_PROGRAM_CONSTANTS, + .brw = 0, + .cache = 0, + }, + .prepare = prepare_wm_constants, +}; + +static void upload_wm_state(struct brw_context *brw) { struct intel_context *intel = &brw->intel; GLcontext *ctx = &intel->ctx; const struct brw_fragment_program *fp = brw_fragment_program_const(brw->fragment_program); - unsigned int nr_params = fp->program.Base.Parameters->NumParameters; - drm_intel_bo *constant_bo; - int i; uint32_t dw2, dw4, dw5, dw6; - if (fp->use_const_buffer || nr_params == 0) { + if (fp->use_const_buffer || brw->wm.prog_data->nr_params == 0) { /* Disable the push constant buffers. */ BEGIN_BATCH(5); OUT_BATCH(CMD_3D_CONSTANT_PS_STATE << 16 | (5 - 2)); @@ -55,35 +96,17 @@ upload_wm_state(struct brw_context *brw) OUT_BATCH(0); ADVANCE_BATCH(); } else { - /* Updates the ParamaterValues[i] pointers for all parameters of the - * basic type of PROGRAM_STATE_VAR. - */ - _mesa_load_state_parameters(ctx, fp->program.Base.Parameters); - - constant_bo = drm_intel_bo_alloc(intel->bufmgr, "WM constant_bo", - nr_params * 4 * sizeof(float), - 4096); - drm_intel_gem_bo_map_gtt(constant_bo); - for (i = 0; i < nr_params; i++) { - memcpy((char *)constant_bo->virtual + i * 4 * sizeof(float), - fp->program.Base.Parameters->ParameterValues[i], - 4 * sizeof(float)); - } - drm_intel_gem_bo_unmap_gtt(constant_bo); - BEGIN_BATCH(5); OUT_BATCH(CMD_3D_CONSTANT_PS_STATE << 16 | GEN6_CONSTANT_BUFFER_0_ENABLE | (5 - 2)); - OUT_RELOC(constant_bo, + OUT_RELOC(brw->wm.push_const_bo, I915_GEM_DOMAIN_RENDER, 0, /* XXX: bad domain */ - ALIGN(nr_params, 2) / 2 - 1); + ALIGN(brw->wm.prog_data->nr_params, 8) / 8 - 1); OUT_BATCH(0); OUT_BATCH(0); OUT_BATCH(0); ADVANCE_BATCH(); - - drm_intel_bo_unreference(constant_bo); } intel_batchbuffer_emit_mi_flush(intel->batch); diff --git a/src/mesa/drivers/dri/intel/intel_buffer_objects.c b/src/mesa/drivers/dri/intel/intel_buffer_objects.c index 8ab41f8d279..117d4daf3ba 100644 --- a/src/mesa/drivers/dri/intel/intel_buffer_objects.c +++ b/src/mesa/drivers/dri/intel/intel_buffer_objects.c @@ -202,6 +202,9 @@ intel_bufferobj_subdata(GLcontext * ctx, struct intel_context *intel = intel_context(ctx); struct intel_buffer_object *intel_obj = intel_buffer_object(obj); + if (size == 0) + return; + assert(intel_obj); if (intel_obj->region) @@ -426,6 +429,9 @@ intel_bufferobj_flush_mapped_range(GLcontext *ctx, GLenum target, if (intel_obj->range_map_buffer == NULL) return; + if (length == 0) + return; + temp_bo = drm_intel_bo_alloc(intel->bufmgr, "range map flush", length, 64); drm_intel_bo_subdata(temp_bo, 0, length, intel_obj->range_map_buffer); diff --git a/src/mesa/drivers/dri/intel/intel_chipset.h b/src/mesa/drivers/dri/intel/intel_chipset.h index 72a74322ee5..b5f180bbc88 100644 --- a/src/mesa/drivers/dri/intel/intel_chipset.h +++ b/src/mesa/drivers/dri/intel/intel_chipset.h @@ -73,6 +73,7 @@ #define PCI_CHIP_SANDYBRIDGE 0x0102 #define PCI_CHIP_SANDYBRIDGE_M 0x0106 +#define PCI_CHIP_SANDYBRIDGE_M_D0 0x0126 #define IS_MOBILE(devid) (devid == PCI_CHIP_I855_GM || \ devid == PCI_CHIP_I915_GM || \ @@ -119,7 +120,8 @@ #define IS_IRONLAKE(devid) IS_GEN5(devid) #define IS_GEN6(devid) (devid == PCI_CHIP_SANDYBRIDGE || \ - devid == PCI_CHIP_SANDYBRIDGE_M) + devid == PCI_CHIP_SANDYBRIDGE_M || \ + devid == PCI_CHIP_SANDYBRIDGE_M_D0) #define IS_965(devid) (IS_GEN4(devid) || \ IS_G4X(devid) || \ diff --git a/src/mesa/drivers/dri/intel/intel_context.c b/src/mesa/drivers/dri/intel/intel_context.c index e19f44035fd..a9ba93d24bb 100644 --- a/src/mesa/drivers/dri/intel/intel_context.c +++ b/src/mesa/drivers/dri/intel/intel_context.c @@ -377,7 +377,8 @@ intel_update_renderbuffers(__DRIcontext *context, __DRIdrawable *drawable) intel_region_reference(®ion, depth_region); } else - region = intel_region_alloc_for_handle(intel, buffers[i].cpp, + region = intel_region_alloc_for_handle(intel->intelScreen, + buffers[i].cpp, drawable->w, drawable->h, buffers[i].pitch / buffers[i].cpp, @@ -720,6 +721,8 @@ intelInitContext(struct intel_context *intel, ctx->Const.MaxPointSizeAA = 3.0; ctx->Const.PointSizeGranularity = 1.0; + ctx->Const.MaxSamples = 1.0; + /* reinitialize the context point state. * It depend on constants in __GLcontextRec::Const */ diff --git a/src/mesa/drivers/dri/intel/intel_context.h b/src/mesa/drivers/dri/intel/intel_context.h index c7ac2de01e6..28d53284fdf 100644 --- a/src/mesa/drivers/dri/intel/intel_context.h +++ b/src/mesa/drivers/dri/intel/intel_context.h @@ -32,14 +32,26 @@ #include "main/mtypes.h" #include "main/mm.h" -#include "texmem.h" #include "dri_metaops.h" + +#ifdef __cplusplus +extern "C" { + /* Evil hack for using libdrm in a c++ compiler. */ + #define virtual virt +#endif + #include "drm.h" #include "intel_bufmgr.h" #include "intel_screen.h" #include "intel_tex_obj.h" #include "i915_drm.h" + +#ifdef __cplusplus + #undef virtual +} +#endif + #include "tnl/t_vertex.h" #define TAG(x) intel##x diff --git a/src/mesa/drivers/dri/intel/intel_extensions.c b/src/mesa/drivers/dri/intel/intel_extensions.c index edba1fc2f2b..bf22a423fcb 100644 --- a/src/mesa/drivers/dri/intel/intel_extensions.c +++ b/src/mesa/drivers/dri/intel/intel_extensions.c @@ -50,8 +50,9 @@ #define need_GL_EXT_cull_vertex #define need_GL_EXT_draw_buffers2 #define need_GL_EXT_fog_coord -#define need_GL_EXT_framebuffer_object #define need_GL_EXT_framebuffer_blit +#define need_GL_EXT_framebuffer_multisample +#define need_GL_EXT_framebuffer_object #define need_GL_EXT_gpu_program_parameters #define need_GL_EXT_point_parameters #define need_GL_EXT_provoking_vertex @@ -111,6 +112,7 @@ static const struct dri_extension card_extensions[] = { { "GL_EXT_cull_vertex", GL_EXT_cull_vertex_functions }, { "GL_EXT_framebuffer_blit", GL_EXT_framebuffer_blit_functions }, { "GL_EXT_framebuffer_object", GL_EXT_framebuffer_object_functions }, + { "GL_EXT_framebuffer_multisample", GL_EXT_framebuffer_multisample_functions }, { "GL_EXT_fog_coord", GL_EXT_fog_coord_functions }, { "GL_EXT_gpu_program_parameters", GL_EXT_gpu_program_parameters_functions }, { "GL_EXT_packed_depth_stencil", NULL }, diff --git a/src/mesa/drivers/dri/intel/intel_fbo.c b/src/mesa/drivers/dri/intel/intel_fbo.c index 4a83886fc16..2693b5fa72e 100644 --- a/src/mesa/drivers/dri/intel/intel_fbo.c +++ b/src/mesa/drivers/dri/intel/intel_fbo.c @@ -137,27 +137,21 @@ intel_alloc_renderbuffer_storage(GLcontext * ctx, struct gl_renderbuffer *rb, rb->Format = MESA_FORMAT_A8; rb->DataType = GL_UNSIGNED_BYTE; break; + case GL_DEPTH_COMPONENT16: + rb->Format = MESA_FORMAT_Z16; + rb->DataType = GL_UNSIGNED_SHORT; + break; case GL_STENCIL_INDEX: case GL_STENCIL_INDEX1_EXT: case GL_STENCIL_INDEX4_EXT: case GL_STENCIL_INDEX8_EXT: case GL_STENCIL_INDEX16_EXT: - /* alloc a depth+stencil buffer */ - rb->Format = MESA_FORMAT_S8_Z24; - rb->DataType = GL_UNSIGNED_INT_24_8_EXT; - break; - case GL_DEPTH_COMPONENT16: - rb->Format = MESA_FORMAT_Z16; - rb->DataType = GL_UNSIGNED_SHORT; - break; case GL_DEPTH_COMPONENT: case GL_DEPTH_COMPONENT24: case GL_DEPTH_COMPONENT32: - rb->Format = MESA_FORMAT_S8_Z24; - rb->DataType = GL_UNSIGNED_INT_24_8_EXT; - break; case GL_DEPTH_STENCIL_EXT: case GL_DEPTH24_STENCIL8_EXT: + /* alloc a depth+stencil buffer */ rb->Format = MESA_FORMAT_S8_Z24; rb->DataType = GL_UNSIGNED_INT_24_8_EXT; break; @@ -182,7 +176,7 @@ intel_alloc_renderbuffer_storage(GLcontext * ctx, struct gl_renderbuffer *rb, /* alloc hardware renderbuffer */ DBG("Allocating %d x %d Intel RBO\n", width, height); - irb->region = intel_region_alloc(intel, I915_TILING_NONE, cpp, + irb->region = intel_region_alloc(intel->intelScreen, I915_TILING_NONE, cpp, width, height, GL_TRUE); if (!irb->region) return GL_FALSE; /* out of memory? */ diff --git a/src/mesa/drivers/dri/intel/intel_mipmap_tree.c b/src/mesa/drivers/dri/intel/intel_mipmap_tree.c index 39ac0205fa1..d316d34d690 100644 --- a/src/mesa/drivers/dri/intel/intel_mipmap_tree.c +++ b/src/mesa/drivers/dri/intel/intel_mipmap_tree.c @@ -30,6 +30,7 @@ #include "intel_regions.h" #include "intel_tex_layout.h" #include "main/enums.h" +#include "main/formats.h" #define FILE_DEBUG_FLAG DEBUG_MIPTREE @@ -136,7 +137,7 @@ intel_miptree_create(struct intel_context *intel, return NULL; } - mt->region = intel_region_alloc(intel, + mt->region = intel_region_alloc(intel->intelScreen, tiling, mt->cpp, mt->total_width, diff --git a/src/mesa/drivers/dri/intel/intel_regions.c b/src/mesa/drivers/dri/intel/intel_regions.c index 680d18ba299..e87e29462c3 100644 --- a/src/mesa/drivers/dri/intel/intel_regions.c +++ b/src/mesa/drivers/dri/intel/intel_regions.c @@ -142,10 +142,10 @@ intel_region_unmap(struct intel_context *intel, struct intel_region *region) } static struct intel_region * -intel_region_alloc_internal(struct intel_context *intel, +intel_region_alloc_internal(struct intel_screen *screen, GLuint cpp, GLuint width, GLuint height, GLuint pitch, - drm_intel_bo *buffer) + uint32_t tiling, drm_intel_bo *buffer) { struct intel_region *region; @@ -164,44 +164,52 @@ intel_region_alloc_internal(struct intel_context *intel, region->pitch = pitch; region->refcount = 1; region->buffer = buffer; - - /* Default to no tiling */ - region->tiling = I915_TILING_NONE; + region->tiling = tiling; + region->screen = screen; _DBG("%s <-- %p\n", __FUNCTION__, region); return region; } struct intel_region * -intel_region_alloc(struct intel_context *intel, +intel_region_alloc(struct intel_screen *screen, uint32_t tiling, GLuint cpp, GLuint width, GLuint height, GLboolean expect_accelerated_upload) { drm_intel_bo *buffer; - struct intel_region *region; unsigned long flags = 0; unsigned long aligned_pitch; if (expect_accelerated_upload) flags |= BO_ALLOC_FOR_RENDER; - buffer = drm_intel_bo_alloc_tiled(intel->bufmgr, "region", + buffer = drm_intel_bo_alloc_tiled(screen->bufmgr, "region", width, height, cpp, &tiling, &aligned_pitch, flags); - region = intel_region_alloc_internal(intel, cpp, width, height, - aligned_pitch / cpp, buffer); - if (region == NULL) - return region; + return intel_region_alloc_internal(screen, cpp, width, height, + aligned_pitch / cpp, tiling, buffer); +} - region->tiling = tiling; +GLboolean +intel_region_flink(struct intel_region *region, uint32_t *name) +{ + if (region->name == 0) { + if (drm_intel_bo_flink(region->buffer, ®ion->name)) + return GL_FALSE; + + _mesa_HashInsert(region->screen->named_regions, + region->name, region); + } - return region; + *name = region->name; + + return GL_TRUE; } struct intel_region * -intel_region_alloc_for_handle(struct intel_context *intel, +intel_region_alloc_for_handle(struct intel_screen *screen, GLuint cpp, GLuint width, GLuint height, GLuint pitch, GLuint handle, const char *name) @@ -209,9 +217,9 @@ intel_region_alloc_for_handle(struct intel_context *intel, struct intel_region *region, *dummy; drm_intel_bo *buffer; int ret; - uint32_t bit_6_swizzle; + uint32_t bit_6_swizzle, tiling; - region = _mesa_HashLookup(intel->intelScreen->named_regions, handle); + region = _mesa_HashLookup(screen->named_regions, handle); if (region != NULL) { dummy = NULL; if (region->width != width || region->height != height || @@ -225,25 +233,26 @@ intel_region_alloc_for_handle(struct intel_context *intel, return dummy; } - buffer = intel_bo_gem_create_from_name(intel->bufmgr, name, handle); - - region = intel_region_alloc_internal(intel, cpp, - width, height, pitch, buffer); - if (region == NULL) - return region; - - ret = drm_intel_bo_get_tiling(region->buffer, ®ion->tiling, - &bit_6_swizzle); + buffer = intel_bo_gem_create_from_name(screen->bufmgr, name, handle); + if (buffer == NULL) + return NULL; + ret = drm_intel_bo_get_tiling(buffer, &tiling, &bit_6_swizzle); if (ret != 0) { fprintf(stderr, "Couldn't get tiling of buffer %d (%s): %s\n", handle, name, strerror(-ret)); - intel_region_release(®ion); + drm_intel_bo_unreference(buffer); + return NULL; + } + + region = intel_region_alloc_internal(screen, cpp, + width, height, pitch, tiling, buffer); + if (region == NULL) { + drm_intel_bo_unreference(buffer); return NULL; } region->name = handle; - region->screen = intel->intelScreen; - _mesa_HashInsert(intel->intelScreen->named_regions, handle, region); + _mesa_HashInsert(screen->named_regions, handle, region); return region; } diff --git a/src/mesa/drivers/dri/intel/intel_regions.h b/src/mesa/drivers/dri/intel/intel_regions.h index 6bbed32f2a2..8464a5e937d 100644 --- a/src/mesa/drivers/dri/intel/intel_regions.h +++ b/src/mesa/drivers/dri/intel/intel_regions.h @@ -76,18 +76,21 @@ struct intel_region /* Allocate a refcounted region. Pointers to regions should only be * copied by calling intel_reference_region(). */ -struct intel_region *intel_region_alloc(struct intel_context *intel, +struct intel_region *intel_region_alloc(struct intel_screen *screen, uint32_t tiling, GLuint cpp, GLuint width, GLuint height, GLboolean expect_accelerated_upload); struct intel_region * -intel_region_alloc_for_handle(struct intel_context *intel, +intel_region_alloc_for_handle(struct intel_screen *screen, GLuint cpp, GLuint width, GLuint height, GLuint pitch, unsigned int handle, const char *name); +GLboolean +intel_region_flink(struct intel_region *region, uint32_t *name); + void intel_region_reference(struct intel_region **dst, struct intel_region *src); diff --git a/src/mesa/drivers/dri/intel/intel_screen.c b/src/mesa/drivers/dri/intel/intel_screen.c index 15a465c6402..0a542a7303d 100644 --- a/src/mesa/drivers/dri/intel/intel_screen.c +++ b/src/mesa/drivers/dri/intel/intel_screen.c @@ -70,7 +70,7 @@ PUBLIC const char __driConfigOptions[] = DRI_CONF_DESC(en, "Enable early Z in classic mode (unstable, 945-only).") DRI_CONF_OPT_END - DRI_CONF_OPT_BEGIN(fragment_shader, bool, false) + DRI_CONF_OPT_BEGIN(fragment_shader, bool, true) DRI_CONF_DESC(en, "Enable limited ARB_fragment_shader support on 915/945.") DRI_CONF_OPT_END @@ -159,7 +159,8 @@ intel_create_image_from_name(__DRIcontext *context, image->data = loaderPrivate; cpp = _mesa_get_format_bytes(image->format); - image->region = intel_region_alloc_for_handle(intel, cpp, width, height, + image->region = intel_region_alloc_for_handle(intel->intelScreen, + cpp, width, height, pitch, name, "image"); if (image->region == NULL) { FREE(image); @@ -206,11 +207,79 @@ intel_destroy_image(__DRIimage *image) FREE(image); } +static __DRIimage * +intel_create_image(__DRIscreen *screen, + int width, int height, int format, + unsigned int use, + void *loaderPrivate) +{ + __DRIimage *image; + struct intel_screen *intelScreen = screen->private; + int cpp; + + image = CALLOC(sizeof *image); + if (image == NULL) + return NULL; + + switch (format) { + case __DRI_IMAGE_FORMAT_RGB565: + image->format = MESA_FORMAT_RGB565; + image->internal_format = GL_RGB; + image->data_type = GL_UNSIGNED_BYTE; + break; + case __DRI_IMAGE_FORMAT_XRGB8888: + image->format = MESA_FORMAT_XRGB8888; + image->internal_format = GL_RGB; + image->data_type = GL_UNSIGNED_BYTE; + break; + case __DRI_IMAGE_FORMAT_ARGB8888: + image->format = MESA_FORMAT_ARGB8888; + image->internal_format = GL_RGBA; + image->data_type = GL_UNSIGNED_BYTE; + break; + default: + free(image); + return NULL; + } + + image->data = loaderPrivate; + cpp = _mesa_get_format_bytes(image->format); + + image->region = + intel_region_alloc(intelScreen, I915_TILING_NONE, + cpp, width, height, GL_TRUE); + if (image->region == NULL) { + FREE(image); + return NULL; + } + + return image; +} + +static GLboolean +intel_query_image(__DRIimage *image, int attrib, int *value) +{ + switch (attrib) { + case __DRI_IMAGE_ATTRIB_STRIDE: + *value = image->region->pitch * image->region->cpp; + return GL_TRUE; + case __DRI_IMAGE_ATTRIB_HANDLE: + *value = image->region->buffer->handle; + return GL_TRUE; + case __DRI_IMAGE_ATTRIB_NAME: + return intel_region_flink(image->region, (uint32_t *) value); + default: + return GL_FALSE; + } +} + static struct __DRIimageExtensionRec intelImageExtension = { { __DRI_IMAGE, __DRI_IMAGE_VERSION }, intel_create_image_from_name, intel_create_image_from_renderbuffer, intel_destroy_image, + intel_create_image, + intel_query_image }; static const __DRIextension *intelScreenExtensions[] = { diff --git a/src/mesa/drivers/dri/intel/intel_tex.h b/src/mesa/drivers/dri/intel/intel_tex.h index 4bb012dc65e..cd77dd5b8e4 100644 --- a/src/mesa/drivers/dri/intel/intel_tex.h +++ b/src/mesa/drivers/dri/intel/intel_tex.h @@ -31,8 +31,6 @@ #include "main/mtypes.h" #include "main/formats.h" #include "intel_context.h" -#include "texmem.h" - void intelInitTextureFuncs(struct dd_function_table *functions); diff --git a/src/mesa/drivers/dri/nouveau/nouveau_driver.c b/src/mesa/drivers/dri/nouveau/nouveau_driver.c index 4ec864c181c..6452fe218e5 100644 --- a/src/mesa/drivers/dri/nouveau/nouveau_driver.c +++ b/src/mesa/drivers/dri/nouveau/nouveau_driver.c @@ -138,5 +138,7 @@ nouveau_driver_functions_init(struct dd_function_table *functions) functions->DrawPixels = _mesa_meta_DrawPixels; functions->CopyPixels = _mesa_meta_CopyPixels; functions->Bitmap = _mesa_meta_Bitmap; +#if FEATURE_EXT_framebuffer_blit functions->BlitFramebuffer = _mesa_meta_BlitFramebuffer; +#endif } diff --git a/src/mesa/drivers/dri/nouveau/nouveau_fbo.c b/src/mesa/drivers/dri/nouveau/nouveau_fbo.c index bd1273beea7..32d8f2d0f9b 100644 --- a/src/mesa/drivers/dri/nouveau/nouveau_fbo.c +++ b/src/mesa/drivers/dri/nouveau/nouveau_fbo.c @@ -262,10 +262,12 @@ nouveau_finish_render_texture(GLcontext *ctx, void nouveau_fbo_functions_init(struct dd_function_table *functions) { +#if FEATURE_EXT_framebuffer_object functions->NewFramebuffer = nouveau_framebuffer_new; functions->NewRenderbuffer = nouveau_renderbuffer_new; functions->BindFramebuffer = nouveau_bind_framebuffer; functions->FramebufferRenderbuffer = nouveau_framebuffer_renderbuffer; functions->RenderTexture = nouveau_render_texture; functions->FinishRenderTexture = nouveau_finish_render_texture; +#endif } diff --git a/src/mesa/drivers/dri/r300/compiler/Makefile b/src/mesa/drivers/dri/r300/compiler/Makefile index 3167d49bcae..d0eb1707845 100644 --- a/src/mesa/drivers/dri/r300/compiler/Makefile +++ b/src/mesa/drivers/dri/r300/compiler/Makefile @@ -23,6 +23,7 @@ C_SOURCES = \ radeon_dataflow_deadcode.c \ radeon_dataflow_swizzles.c \ radeon_optimize.c \ + radeon_remove_constants.c \ radeon_rename_regs.c \ r3xx_fragprog.c \ r300_fragprog.c \ diff --git a/src/mesa/drivers/dri/r300/compiler/SConscript b/src/mesa/drivers/dri/r300/compiler/SConscript index c6f47a6f8a4..847857b1425 100755 --- a/src/mesa/drivers/dri/r300/compiler/SConscript +++ b/src/mesa/drivers/dri/r300/compiler/SConscript @@ -22,6 +22,7 @@ r300compiler = env.ConvenienceLibrary( 'radeon_pair_schedule.c', 'radeon_pair_regalloc.c', 'radeon_optimize.c', + 'radeon_remove_constants.c', 'radeon_rename_regs.c', 'radeon_emulate_branches.c', 'radeon_emulate_loops.c', diff --git a/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c b/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c index d2fa816894c..8613ec51091 100644 --- a/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c +++ b/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c @@ -30,6 +30,7 @@ #include "radeon_program_alu.h" #include "radeon_program_tex.h" #include "radeon_rename_regs.h" +#include "radeon_remove_constants.h" #include "r300_fragprog.h" #include "r300_fragprog_swizzle.h" #include "r500_fragprog.h" @@ -180,6 +181,13 @@ void r3xx_compile_fragment_program(struct r300_fragment_program_compiler* c) debug_program_log(c, "after dataflow passes"); + if (c->Base.remove_unused_constants) { + rc_remove_unused_constants(&c->Base, + &c->code->constants_remap_table); + + debug_program_log(c, "after constants cleanup"); + } + if(!c->Base.is_r500) { /* This pass makes it easier for the scheduler to group TEX * instructions and reduces the chances of creating too @@ -224,4 +232,14 @@ void r3xx_compile_fragment_program(struct r300_fragment_program_compiler* c) r300FragmentProgramDump(c->code); } } + + /* Check the number of constants. */ + if (!c->Base.Error) { + unsigned max = c->Base.is_r500 ? R500_PFS_NUM_CONST_REGS : R300_PFS_NUM_CONST_REGS; + + if (c->Base.Program.Constants.Count > max) { + rc_error(&c->Base, "Too many constants. Max: %i, Got: %i\n", + max, c->Base.Program.Constants.Count); + } + } } diff --git a/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c b/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c index 666c9c2a7a9..b05b3aabf30 100644 --- a/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c +++ b/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c @@ -31,6 +31,7 @@ #include "radeon_swizzle.h" #include "radeon_emulate_branches.h" #include "radeon_emulate_loops.h" +#include "radeon_remove_constants.h" struct loop { int BgnLoop; @@ -465,7 +466,7 @@ static void translate_vertex_program(struct r300_vertex_program_compiler * compi { struct rc_instruction *rci; - struct loop * loops; + struct loop * loops = NULL; int current_loop_depth = 0; int loops_reserved = 0; @@ -484,6 +485,21 @@ static void translate_vertex_program(struct r300_vertex_program_compiler * compi if (!valid_dst(compiler->code, &vpi->DstReg)) continue; + if (rc_get_opcode_info(vpi->Opcode)->HasDstReg) { + /* Relative addressing of destination operands is not supported yet. */ + if (vpi->DstReg.RelAddr) { + rc_error(&compiler->Base, "Vertex program does not support relative " + "addressing of destination operands (yet).\n"); + return; + } + + /* Neither is Saturate. */ + if (vpi->SaturateMode != RC_SATURATE_NONE) { + rc_error(&compiler->Base, "Vertex program does not support the Saturate " + "modifier (yet).\n"); + } + } + if (compiler->code->length >= R500_VS_MAX_ALU_DWORDS || (compiler->code->length >= R300_VS_MAX_ALU_DWORDS && !compiler->Base.is_r500)) { rc_error(&compiler->Base, "Vertex program has too many instructions\n"); @@ -543,10 +559,16 @@ static void translate_vertex_program(struct r300_vertex_program_compiler * compi } case RC_OPCODE_ENDLOOP: { - struct loop * l = &loops[current_loop_depth - 1]; - unsigned int act_addr = l->BgnLoop - 1; - unsigned int last_addr = (compiler->code->length / 4) - 1; - unsigned int ret_addr = l->BgnLoop; + struct loop * l; + unsigned int act_addr; + unsigned int last_addr; + unsigned int ret_addr; + + assert(loops); + l = &loops[current_loop_depth - 1]; + act_addr = l->BgnLoop - 1; + last_addr = (compiler->code->length / 4) - 1; + ret_addr = l->BgnLoop; if (loops_reserved >= R300_VS_MAX_FC_OPS) { rc_error(&compiler->Base, @@ -624,10 +646,9 @@ static void allocate_temporary_registers(struct r300_vertex_program_compiler * c struct temporary_allocation * ta; unsigned int i, j; - compiler->code->num_temporaries = 0; memset(hwtemps, 0, sizeof(hwtemps)); - /* Pass 1: Count original temporaries and allocate structures */ + /* Pass 1: Count original temporaries. */ for(inst = compiler->Base.Program.Instructions.Next; inst != &compiler->Base.Program.Instructions; inst = inst->Next) { const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); @@ -645,12 +666,30 @@ static void allocate_temporary_registers(struct r300_vertex_program_compiler * c } } } + compiler->code->num_temporaries = num_orig_temps; + + /* Pass 2: If there is relative addressing of temporaries, we cannot change register indices. Give up. */ + for (inst = compiler->Base.Program.Instructions.Next; inst != &compiler->Base.Program.Instructions; inst = inst->Next) { + const struct rc_opcode_info *opcode = rc_get_opcode_info(inst->U.I.Opcode); + + if (opcode->HasDstReg) + if (inst->U.I.DstReg.RelAddr) + return; + for (i = 0; i < opcode->NumSrcRegs; ++i) { + if (inst->U.I.SrcReg[i].File == RC_FILE_TEMPORARY && + inst->U.I.SrcReg[i].RelAddr) { + return; + } + } + } + + compiler->code->num_temporaries = 0; ta = (struct temporary_allocation*)memory_pool_malloc(&compiler->Base.Pool, sizeof(struct temporary_allocation) * num_orig_temps); memset(ta, 0, sizeof(struct temporary_allocation) * num_orig_temps); - /* Pass 2: Determine original temporary lifetimes */ + /* Pass 3: Determine original temporary lifetimes */ for(inst = compiler->Base.Program.Instructions.Next; inst != &compiler->Base.Program.Instructions; inst = inst->Next) { const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); /* Instructions inside of loops need to use the ENDLOOP @@ -685,7 +724,7 @@ static void allocate_temporary_registers(struct r300_vertex_program_compiler * c } } - /* Pass 3: Register allocation */ + /* Pass 4: Register allocation */ for(inst = compiler->Base.Program.Instructions.Next; inst != &compiler->Base.Program.Instructions; inst = inst->Next) { const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); @@ -853,6 +892,76 @@ static int swizzle_is_native(rc_opcode opcode, struct rc_src_register reg) return 1; } +static void transform_negative_addressing(struct r300_vertex_program_compiler *c, + struct rc_instruction *arl, + struct rc_instruction *end, + int min_offset) +{ + struct rc_instruction *inst, *add; + unsigned const_swizzle; + + /* Transform ARL */ + add = rc_insert_new_instruction(&c->Base, arl->Prev); + add->U.I.Opcode = RC_OPCODE_ADD; + add->U.I.DstReg.File = RC_FILE_TEMPORARY; + add->U.I.DstReg.Index = rc_find_free_temporary(&c->Base); + add->U.I.DstReg.WriteMask = RC_MASK_X; + add->U.I.SrcReg[0] = arl->U.I.SrcReg[0]; + add->U.I.SrcReg[1].File = RC_FILE_CONSTANT; + add->U.I.SrcReg[1].Index = rc_constants_add_immediate_scalar(&c->Base.Program.Constants, + min_offset, &const_swizzle); + add->U.I.SrcReg[1].Swizzle = const_swizzle; + + arl->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; + arl->U.I.SrcReg[0].Index = add->U.I.DstReg.Index; + arl->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XXXX; + + /* Rewrite offsets up to and excluding inst. */ + for (inst = arl->Next; inst != end; inst = inst->Next) { + const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); + + for (unsigned i = 0; i < opcode->NumSrcRegs; i++) + if (inst->U.I.SrcReg[i].RelAddr) + inst->U.I.SrcReg[i].Index -= min_offset; + } +} + +static void rc_emulate_negative_addressing(struct r300_vertex_program_compiler *c) +{ + struct rc_instruction *inst, *lastARL = NULL; + int min_offset = 0; + + for (inst = c->Base.Program.Instructions.Next; inst != &c->Base.Program.Instructions; inst = inst->Next) { + const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); + + if (inst->U.I.Opcode == RC_OPCODE_ARL) { + if (lastARL != NULL && min_offset < 0) + transform_negative_addressing(c, lastARL, inst, min_offset); + + lastARL = inst; + min_offset = 0; + continue; + } + + for (unsigned i = 0; i < opcode->NumSrcRegs; i++) { + if (inst->U.I.SrcReg[i].RelAddr && + inst->U.I.SrcReg[i].Index < 0) { + /* ARL must precede any indirect addressing. */ + if (lastARL == NULL) { + rc_error(&c->Base, "Vertex shader: Found relative addressing without ARL."); + return; + } + + if (inst->U.I.SrcReg[i].Index < min_offset) + min_offset = inst->U.I.SrcReg[i].Index; + } + } + } + + if (lastARL != NULL && min_offset < 0) + transform_negative_addressing(c, lastARL, inst, min_offset); +} + static void debug_program_log(struct r300_vertex_program_compiler* c, const char * where) { if (c->Base.Debug) { @@ -868,44 +977,56 @@ static struct rc_swizzle_caps r300_vertprog_swizzle_caps = { }; -void r3xx_compile_vertex_program(struct r300_vertex_program_compiler* compiler) +void r3xx_compile_vertex_program(struct r300_vertex_program_compiler *c) { struct emulate_loop_state loop_state; - compiler->Base.SwizzleCaps = &r300_vertprog_swizzle_caps; + c->Base.SwizzleCaps = &r300_vertprog_swizzle_caps; - addArtificialOutputs(compiler); + addArtificialOutputs(c); - debug_program_log(compiler, "before compilation"); + debug_program_log(c, "before compilation"); - if (compiler->Base.is_r500) - rc_transform_loops(&compiler->Base, &loop_state, R500_VS_MAX_ALU); + if (c->Base.is_r500) + rc_transform_loops(&c->Base, &loop_state, R500_VS_MAX_ALU); else - rc_transform_loops(&compiler->Base, &loop_state, R300_VS_MAX_ALU); + rc_transform_loops(&c->Base, &loop_state, R300_VS_MAX_ALU); + if (c->Base.Error) + return; - debug_program_log(compiler, "after emulate loops"); + debug_program_log(c, "after emulate loops"); - if (!compiler->Base.is_r500) { - rc_emulate_branches(&compiler->Base); - debug_program_log(compiler, "after emulate branches"); + if (!c->Base.is_r500) { + rc_emulate_branches(&c->Base); + if (c->Base.Error) + return; + debug_program_log(c, "after emulate branches"); } - if (compiler->Base.is_r500) { + rc_emulate_negative_addressing(c); + + debug_program_log(c, "after negative addressing emulation"); + + if (c->Base.is_r500) { struct radeon_program_transformation transformations[] = { { &r300_transform_vertex_alu, 0 }, { &r300_transform_trig_scale_vertex, 0 } }; - radeonLocalTransform(&compiler->Base, 2, transformations); + radeonLocalTransform(&c->Base, 2, transformations); + if (c->Base.Error) + return; - debug_program_log(compiler, "after native rewrite"); + debug_program_log(c, "after native rewrite"); } else { struct radeon_program_transformation transformations[] = { { &r300_transform_vertex_alu, 0 }, { &radeonTransformTrigSimple, 0 } }; - radeonLocalTransform(&compiler->Base, 2, transformations); + radeonLocalTransform(&c->Base, 2, transformations); + if (c->Base.Error) + return; - debug_program_log(compiler, "after native rewrite"); + debug_program_log(c, "after native rewrite"); /* Note: This pass has to be done seperately from ALU rewrite, * because it needs to check every instruction. @@ -913,9 +1034,11 @@ void r3xx_compile_vertex_program(struct r300_vertex_program_compiler* compiler) struct radeon_program_transformation transformations2[] = { { &transform_nonnative_modifiers, 0 }, }; - radeonLocalTransform(&compiler->Base, 1, transformations2); + radeonLocalTransform(&c->Base, 1, transformations2); + if (c->Base.Error) + return; - debug_program_log(compiler, "after emulate modifiers"); + debug_program_log(c, "after emulate modifiers"); } { @@ -926,30 +1049,58 @@ void r3xx_compile_vertex_program(struct r300_vertex_program_compiler* compiler) struct radeon_program_transformation transformations[] = { { &transform_source_conflicts, 0 }, }; - radeonLocalTransform(&compiler->Base, 1, transformations); + radeonLocalTransform(&c->Base, 1, transformations); + if (c->Base.Error) + return; } - debug_program_log(compiler, "after source conflict resolve"); + debug_program_log(c, "after source conflict resolve"); - rc_dataflow_deadcode(&compiler->Base, &dataflow_outputs_mark_used, compiler); + rc_dataflow_deadcode(&c->Base, &dataflow_outputs_mark_used, c); + if (c->Base.Error) + return; - debug_program_log(compiler, "after deadcode"); + debug_program_log(c, "after deadcode"); - rc_dataflow_swizzles(&compiler->Base); + rc_dataflow_swizzles(&c->Base); + if (c->Base.Error) + return; - allocate_temporary_registers(compiler); + debug_program_log(c, "after dataflow"); - debug_program_log(compiler, "after dataflow"); + allocate_temporary_registers(c); + if (c->Base.Error) + return; - translate_vertex_program(compiler); + debug_program_log(c, "after register allocation"); - rc_constants_copy(&compiler->code->constants, &compiler->Base.Program.Constants); + if (c->Base.remove_unused_constants) { + rc_remove_unused_constants(&c->Base, + &c->code->constants_remap_table); + if (c->Base.Error) + return; - compiler->code->InputsRead = compiler->Base.Program.InputsRead; - compiler->code->OutputsWritten = compiler->Base.Program.OutputsWritten; + debug_program_log(c, "after constants cleanup"); + } - if (compiler->Base.Debug) { + translate_vertex_program(c); + if (c->Base.Error) + return; + + rc_constants_copy(&c->code->constants, &c->Base.Program.Constants); + + c->code->InputsRead = c->Base.Program.InputsRead; + c->code->OutputsWritten = c->Base.Program.OutputsWritten; + + if (c->Base.Debug) { fprintf(stderr, "Final vertex program code:\n"); - r300_vertex_program_dump(compiler); + r300_vertex_program_dump(c); + } + + /* Check the number of constants. */ + if (!c->Base.Error && + c->Base.Program.Constants.Count > 256) { + rc_error(&c->Base, "Too many constants. Max: 256, Got: %i\n", + c->Base.Program.Constants.Count); } } diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_code.h b/src/mesa/drivers/dri/r300/compiler/radeon_code.h index 896246d2035..f76676fae8e 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_code.h +++ b/src/mesa/drivers/dri/r300/compiler/radeon_code.h @@ -235,6 +235,7 @@ struct rX00_fragment_program_code { unsigned writes_depth:1; struct rc_constant_list constants; + unsigned *constants_remap_table; }; @@ -266,6 +267,7 @@ struct r300_vertex_program_code { int outputs[VSF_MAX_OUTPUTS]; struct rc_constant_list constants; + unsigned *constants_remap_table; uint32_t InputsRead; uint32_t OutputsWritten; diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h index 7c42eb3ae57..5155b912e17 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h +++ b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h @@ -39,9 +39,12 @@ struct radeon_compiler { char * ErrorMsg; /* Hardware specification. */ - unsigned is_r500; + unsigned is_r500:1; unsigned max_temp_regs; + /* Whether to remove unused constants and empty holes in constant space. */ + unsigned remove_unused_constants:1; + /** * Variables used internally, not be touched by callers * of the compiler diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_dataflow_deadcode.c b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow_deadcode.c index faf531b412e..acdb371de93 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_dataflow_deadcode.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow_deadcode.c @@ -157,8 +157,12 @@ static void update_instruction(struct deadcode_state * s, struct rc_instruction unsigned char * pused = get_used_ptr(s, inst->U.I.DstReg.File, inst->U.I.DstReg.Index); if (pused) { usedmask = *pused & inst->U.I.DstReg.WriteMask; - *pused &= ~usedmask; + if (!inst->U.I.DstReg.RelAddr) + *pused &= ~usedmask; } + + if (inst->U.I.DstReg.RelAddr) + mark_used(s, RC_FILE_ADDRESS, 0, RC_MASK_X); } insts->WriteMask |= usedmask; @@ -213,6 +217,7 @@ void rc_dataflow_deadcode(struct radeon_compiler * c, rc_dataflow_mark_outputs_f { struct deadcode_state s; unsigned int nr_instructions; + unsigned has_temp_reladdr_src = 0; memset(&s, 0, sizeof(s)); s.C = c; @@ -300,6 +305,30 @@ void rc_dataflow_deadcode(struct radeon_compiler * c, rc_dataflow_mark_outputs_f rc_error(c, "%s: Unhandled control flow instruction %s\n", __FUNCTION__, opcode->Name); } } + + if (!has_temp_reladdr_src) { + for (unsigned i = 0; i < opcode->NumSrcRegs; i++) { + if (inst->U.I.SrcReg[i].File == RC_FILE_TEMPORARY && + inst->U.I.SrcReg[i].RelAddr) { + /* If there is a register read from a temporary file with relative addressing, + * mark all preceding written registers as used. */ + for (struct rc_instruction *ptr = inst->Prev; + ptr != &c->Program.Instructions; + ptr = ptr->Prev) { + if (opcode->HasDstReg && + ptr->U.I.DstReg.File == RC_FILE_TEMPORARY && + ptr->U.I.DstReg.WriteMask) { + mark_used(&s, + ptr->U.I.DstReg.File, + ptr->U.I.DstReg.Index, + ptr->U.I.DstReg.WriteMask); + } + } + + has_temp_reladdr_src = 1; + } + } + } } update_instruction(&s, inst); diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.c b/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.c index 2ea830be7f9..da495a3afaa 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.c @@ -95,6 +95,12 @@ struct rc_opcode_info rc_opcodes[MAX_RC_OPCODE] = { .IsComponentwise = 1 }, { + .Opcode = RC_OPCODE_DP2, + .Name = "DP2", + .NumSrcRegs = 2, + .HasDstReg = 1 + }, + { .Opcode = RC_OPCODE_DP3, .Name = "DP3", .NumSrcRegs = 2, @@ -295,6 +301,13 @@ struct rc_opcode_info rc_opcodes[MAX_RC_OPCODE] = { .IsComponentwise = 1 }, { + .Opcode = RC_OPCODE_SSG, + .Name = "SSG", + .NumSrcRegs = 1, + .HasDstReg = 1, + .IsComponentwise = 1 + }, + { .Opcode = RC_OPCODE_SUB, .Name = "SUB", .NumSrcRegs = 2, @@ -435,6 +448,10 @@ void rc_compute_sources_for_writemask( case RC_OPCODE_ARL: srcmasks[0] |= RC_MASK_X; break; + case RC_OPCODE_DP2: + srcmasks[0] |= RC_MASK_XY; + srcmasks[1] |= RC_MASK_XY; + break; case RC_OPCODE_DP3: srcmasks[0] |= RC_MASK_XYZ; srcmasks[1] |= RC_MASK_XYZ; diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.h b/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.h index 6e18d6eb3f1..d3f639c8701 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.h +++ b/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.h @@ -64,6 +64,9 @@ typedef enum { * dst.c = d src0.c / dy */ RC_OPCODE_DDY, + /** scalar instruction: dst = src0.x*src1.x + src0.y*src1.y */ + RC_OPCODE_DP2, + /** scalar instruction: dst = src0.x*src1.x + src0.y*src1.y + src0.z*src1.z */ RC_OPCODE_DP3, @@ -154,6 +157,9 @@ typedef enum { /** vec4 instruction: dst.c = (src0.c != src1.c) ? 1.0 : 0.0 */ RC_OPCODE_SNE, + /** vec4 instruction: dst.c = (src0.c < 0 ?) -1 : ((src0.c > 0) : 1 : 0) */ + RC_OPCODE_SSG, + /** vec4 instruction: dst.c = src0.c - src1.c */ RC_OPCODE_SUB, diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_pair_translate.c b/src/mesa/drivers/dri/r300/compiler/radeon_pair_translate.c index 407a0a55ee2..8327e9aced6 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_pair_translate.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_pair_translate.c @@ -230,6 +230,34 @@ static void set_pair_instruction(struct r300_fragment_program_compiler *c, } +static void check_opcode_support(struct r300_fragment_program_compiler *c, + struct rc_sub_instruction *inst) +{ + const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->Opcode); + + if (opcode->HasDstReg) { + if (inst->DstReg.RelAddr) { + rc_error(&c->Base, "Fragment program does not support relative addressing " + "of destination operands.\n"); + return; + } + + if (inst->SaturateMode == RC_SATURATE_MINUS_PLUS_ONE) { + rc_error(&c->Base, "Fragment program does not support signed Saturate.\n"); + return; + } + } + + for (unsigned i = 0; i < opcode->NumSrcRegs; i++) { + if (inst->SrcReg[i].RelAddr) { + rc_error(&c->Base, "Fragment program does not support relative addressing " + " of source operands.\n"); + return; + } + } +} + + /** * Translate all ALU instructions into corresponding pair instructions, * performing no other changes. @@ -249,6 +277,8 @@ void rc_pair_translate(struct r300_fragment_program_compiler *c) struct rc_sub_instruction copy = inst->U.I; + check_opcode_support(c, ©); + final_rewrite(©); inst->Type = RC_INSTRUCTION_PAIR; set_pair_instruction(c, &inst->U.P, ©); diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c b/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c index 857aae55145..704a7bb2d23 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c @@ -216,18 +216,18 @@ static void transform_CEIL(struct radeon_compiler* c, rc_remove_instruction(inst); } -static void transform_DP3(struct radeon_compiler* c, +static void transform_DP2(struct radeon_compiler* c, struct rc_instruction* inst) { struct rc_src_register src0 = inst->U.I.SrcReg[0]; struct rc_src_register src1 = inst->U.I.SrcReg[1]; - src0.Negate &= ~RC_MASK_W; - src0.Swizzle &= ~(7 << (3 * 3)); - src0.Swizzle |= RC_SWIZZLE_ZERO << (3 * 3); - src1.Negate &= ~RC_MASK_W; - src1.Swizzle &= ~(7 << (3 * 3)); - src1.Swizzle |= RC_SWIZZLE_ZERO << (3 * 3); - emit2(c, inst->Prev, RC_OPCODE_DP4, inst->U.I.SaturateMode, inst->U.I.DstReg, src0, src1); + src0.Negate &= ~(RC_MASK_Z | RC_MASK_W); + src0.Swizzle &= ~(63 << (3 * 2)); + src0.Swizzle |= (RC_SWIZZLE_ZERO << (3 * 2)) | (RC_SWIZZLE_ZERO << (3 * 3)); + src1.Negate &= ~(RC_MASK_Z | RC_MASK_W); + src1.Swizzle &= ~(63 << (3 * 2)); + src1.Swizzle |= (RC_SWIZZLE_ZERO << (3 * 2)) | (RC_SWIZZLE_ZERO << (3 * 3)); + emit2(c, inst->Prev, RC_OPCODE_DP3, inst->U.I.SaturateMode, inst->U.I.DstReg, src0, src1); rc_remove_instruction(inst); } @@ -464,6 +464,43 @@ static void transform_SNE(struct radeon_compiler* c, rc_remove_instruction(inst); } +static void transform_SSG(struct radeon_compiler* c, + struct rc_instruction* inst) +{ + /* result = sign(x) + * + * CMP tmp0, -x, 1, 0 + * CMP tmp1, x, 1, 0 + * ADD result, tmp0, -tmp1; + */ + unsigned tmp0, tmp1; + + /* 0 < x */ + tmp0 = rc_find_free_temporary(c); + emit3(c, inst->Prev, RC_OPCODE_CMP, 0, + dstregtmpmask(tmp0, inst->U.I.DstReg.WriteMask), + negate(inst->U.I.SrcReg[0]), + builtin_one, + builtin_zero); + + /* x < 0 */ + tmp1 = rc_find_free_temporary(c); + emit3(c, inst->Prev, RC_OPCODE_CMP, 0, + dstregtmpmask(tmp1, inst->U.I.DstReg.WriteMask), + inst->U.I.SrcReg[0], + builtin_one, + builtin_zero); + + /* Either both are zero, or one of them is one and the other is zero. */ + /* result = tmp0 - tmp1 */ + emit2(c, inst->Prev, RC_OPCODE_ADD, 0, + inst->U.I.DstReg, + srcreg(RC_FILE_TEMPORARY, tmp0), + negate(srcreg(RC_FILE_TEMPORARY, tmp1))); + + rc_remove_instruction(inst); +} + static void transform_SUB(struct radeon_compiler* c, struct rc_instruction* inst) { @@ -516,6 +553,7 @@ int radeonTransformALU( switch(inst->U.I.Opcode) { case RC_OPCODE_ABS: transform_ABS(c, inst); return 1; case RC_OPCODE_CEIL: transform_CEIL(c, inst); return 1; + case RC_OPCODE_DP2: transform_DP2(c, inst); return 1; case RC_OPCODE_DPH: transform_DPH(c, inst); return 1; case RC_OPCODE_DST: transform_DST(c, inst); return 1; case RC_OPCODE_FLR: transform_FLR(c, inst); return 1; @@ -530,6 +568,7 @@ int radeonTransformALU( case RC_OPCODE_SLE: transform_SLE(c, inst); return 1; case RC_OPCODE_SLT: transform_SLT(c, inst); return 1; case RC_OPCODE_SNE: transform_SNE(c, inst); return 1; + case RC_OPCODE_SSG: transform_SSG(c, inst); return 1; case RC_OPCODE_SUB: transform_SUB(c, inst); return 1; case RC_OPCODE_SWZ: transform_SWZ(c, inst); return 1; case RC_OPCODE_XPD: transform_XPD(c, inst); return 1; @@ -577,6 +616,29 @@ static void transform_r300_vertex_CMP(struct radeon_compiler* c, rc_remove_instruction(inst); } +static void transform_r300_vertex_DP2(struct radeon_compiler* c, + struct rc_instruction* inst) +{ + struct rc_instruction *next_inst = inst->Next; + transform_DP2(c, inst); + next_inst->Prev->U.I.Opcode = RC_OPCODE_DP4; +} + +static void transform_r300_vertex_DP3(struct radeon_compiler* c, + struct rc_instruction* inst) +{ + struct rc_src_register src0 = inst->U.I.SrcReg[0]; + struct rc_src_register src1 = inst->U.I.SrcReg[1]; + src0.Negate &= ~RC_MASK_W; + src0.Swizzle &= ~(7 << (3 * 3)); + src0.Swizzle |= RC_SWIZZLE_ZERO << (3 * 3); + src1.Negate &= ~RC_MASK_W; + src1.Swizzle &= ~(7 << (3 * 3)); + src1.Swizzle |= RC_SWIZZLE_ZERO << (3 * 3); + emit2(c, inst->Prev, RC_OPCODE_DP4, inst->U.I.SaturateMode, inst->U.I.DstReg, src0, src1); + rc_remove_instruction(inst); +} + static void transform_r300_vertex_fix_LIT(struct radeon_compiler* c, struct rc_instruction* inst) { @@ -672,6 +734,41 @@ static void transform_r300_vertex_SLE(struct radeon_compiler* c, inst->U.I.SrcReg[1].Negate ^= RC_MASK_XYZW; } +static void transform_r300_vertex_SSG(struct radeon_compiler* c, + struct rc_instruction* inst) +{ + /* result = sign(x) + * + * SLT tmp0, 0, x; + * SLT tmp1, x, 0; + * ADD result, tmp0, -tmp1; + */ + unsigned tmp0, tmp1; + + /* 0 < x */ + tmp0 = rc_find_free_temporary(c); + emit2(c, inst->Prev, RC_OPCODE_SLT, 0, + dstregtmpmask(tmp0, inst->U.I.DstReg.WriteMask), + builtin_zero, + inst->U.I.SrcReg[0]); + + /* x < 0 */ + tmp1 = rc_find_free_temporary(c); + emit2(c, inst->Prev, RC_OPCODE_SLT, 0, + dstregtmpmask(tmp1, inst->U.I.DstReg.WriteMask), + inst->U.I.SrcReg[0], + builtin_zero); + + /* Either both are zero, or one of them is one and the other is zero. */ + /* result = tmp0 - tmp1 */ + emit2(c, inst->Prev, RC_OPCODE_ADD, 0, + inst->U.I.DstReg, + srcreg(RC_FILE_TEMPORARY, tmp0), + negate(srcreg(RC_FILE_TEMPORARY, tmp1))); + + rc_remove_instruction(inst); +} + /** * For use with radeonLocalTransform, this transforms non-native ALU * instructions of the r300 up to r500 vertex engine. @@ -685,7 +782,8 @@ int r300_transform_vertex_alu( case RC_OPCODE_ABS: transform_r300_vertex_ABS(c, inst); return 1; case RC_OPCODE_CEIL: transform_CEIL(c, inst); return 1; case RC_OPCODE_CMP: transform_r300_vertex_CMP(c, inst); return 1; - case RC_OPCODE_DP3: transform_DP3(c, inst); return 1; + case RC_OPCODE_DP2: transform_r300_vertex_DP2(c, inst); return 1; + case RC_OPCODE_DP3: transform_r300_vertex_DP3(c, inst); return 1; case RC_OPCODE_DPH: transform_DPH(c, inst); return 1; case RC_OPCODE_FLR: transform_FLR(c, inst); return 1; case RC_OPCODE_LIT: transform_r300_vertex_fix_LIT(c, inst); return 1; @@ -705,6 +803,7 @@ int r300_transform_vertex_alu( return 1; } return 0; + case RC_OPCODE_SSG: transform_r300_vertex_SSG(c, inst); return 1; case RC_OPCODE_SUB: transform_SUB(c, inst); return 1; case RC_OPCODE_SWZ: transform_SWZ(c, inst); return 1; case RC_OPCODE_XPD: transform_XPD(c, inst); return 1; diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_tex.c b/src/mesa/drivers/dri/r300/compiler/radeon_program_tex.c index 9c4b65f4c00..ddce590ee66 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_program_tex.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_tex.c @@ -117,8 +117,8 @@ int radeonTransformTEX( struct rc_instruction * inst_rcp = NULL; struct rc_instruction * inst_mad; struct rc_instruction * inst_cmp; - unsigned tmp_texsample = rc_find_free_temporary(c); - unsigned tmp_sum = rc_find_free_temporary(c); + unsigned tmp_texsample; + unsigned tmp_sum; unsigned tmp_recip_w = 0; int pass, fail, tex; @@ -126,6 +126,7 @@ int radeonTransformTEX( struct rc_dst_register output_reg = inst->U.I.DstReg; /* Redirect TEX to a new temp. */ + tmp_texsample = rc_find_free_temporary(c); inst->U.I.DstReg.File = RC_FILE_TEMPORARY; inst->U.I.DstReg.Index = tmp_texsample; inst->U.I.DstReg.WriteMask = RC_MASK_XYZW; @@ -144,6 +145,7 @@ int radeonTransformTEX( } /* Perspective-divide r by W (if it's TXP) and add the texture sample (see below). */ + tmp_sum = rc_find_free_temporary(c); inst_mad = rc_insert_new_instruction(c, inst_rcp ? inst_rcp : inst); inst_mad->U.I.DstReg.File = RC_FILE_TEMPORARY; inst_mad->U.I.DstReg.Index = tmp_sum; @@ -199,6 +201,8 @@ int radeonTransformTEX( inst_cmp->U.I.SrcReg[pass].File = RC_FILE_NONE; inst_cmp->U.I.SrcReg[pass].Swizzle = RC_SWIZZLE_1111; inst_cmp->U.I.SrcReg[fail] = shadow_ambient(compiler, inst->U.I.TexSrcUnit); + + assert(tmp_texsample != tmp_sum && tmp_sum != tmp_recip_w); } } diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_remove_constants.c b/src/mesa/drivers/dri/r300/compiler/radeon_remove_constants.c new file mode 100644 index 00000000000..be89e9fa5b4 --- /dev/null +++ b/src/mesa/drivers/dri/r300/compiler/radeon_remove_constants.c @@ -0,0 +1,128 @@ +/* + * Copyright (C) 2010 Marek Olšák <[email protected]> + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "radeon_remove_constants.h" + +void rc_remove_unused_constants(struct radeon_compiler *c, + unsigned **out_remap_table) +{ + unsigned char *const_used; + unsigned *remap_table; + unsigned *inv_remap_table; + unsigned has_rel_addr = 0; + unsigned is_identity = 1; + unsigned are_externals_remapped = 0; + struct rc_constant *constants = c->Program.Constants.Constants; + + if (!c->Program.Constants.Count) { + *out_remap_table = NULL; + return; + } + + const_used = malloc(c->Program.Constants.Count); + memset(const_used, 0, c->Program.Constants.Count); + + /* Pass 1: Mark used constants. */ + for (struct rc_instruction *inst = c->Program.Instructions.Next; + inst != &c->Program.Instructions; inst = inst->Next) { + const struct rc_opcode_info *opcode = rc_get_opcode_info(inst->U.I.Opcode); + + for (unsigned i = 0; i < opcode->NumSrcRegs; i++) { + if (inst->U.I.SrcReg[i].File == RC_FILE_CONSTANT) { + if (inst->U.I.SrcReg[i].RelAddr) { + has_rel_addr = 1; + } else { + const_used[inst->U.I.SrcReg[i].Index] = 1; + } + } + } + } + + /* Pass 2: If there is relative addressing, mark all externals as used. */ + if (has_rel_addr) { + for (unsigned i = 0; i < c->Program.Constants.Count; i++) + if (constants[i].Type == RC_CONSTANT_EXTERNAL) + const_used[i] = 1; + } + + /* Pass 3: Make the remapping table and remap constants. + * This pass removes unused constants simply by overwriting them by other constants. */ + remap_table = malloc(c->Program.Constants.Count * sizeof(unsigned)); + inv_remap_table = malloc(c->Program.Constants.Count * sizeof(unsigned)); + unsigned new_count = 0; + + for (unsigned i = 0; i < c->Program.Constants.Count; i++) { + if (const_used[i]) { + remap_table[new_count] = i; + inv_remap_table[i] = new_count; + + if (i != new_count) { + if (constants[i].Type == RC_CONSTANT_EXTERNAL) + are_externals_remapped = 1; + + constants[new_count] = constants[i]; + is_identity = 0; + } + new_count++; + } + } + + /* is_identity ==> new_count == old_count + * !is_identity ==> new_count < old_count */ + assert( is_identity || new_count < c->Program.Constants.Count); + assert(!(has_rel_addr && are_externals_remapped)); + + /* Pass 4: Redirect reads of all constants to their new locations. */ + if (!is_identity) { + for (struct rc_instruction *inst = c->Program.Instructions.Next; + inst != &c->Program.Instructions; inst = inst->Next) { + const struct rc_opcode_info *opcode = rc_get_opcode_info(inst->U.I.Opcode); + + for (unsigned i = 0; i < opcode->NumSrcRegs; i++) { + if (inst->U.I.SrcReg[i].File == RC_FILE_CONSTANT) { + inst->U.I.SrcReg[i].Index = inv_remap_table[inst->U.I.SrcReg[i].Index]; + } + } + } + + } + + /* Set the new constant count. Note that new_count may be less than + * Count even though the remapping function is identity. In that case, + * the constants have been removed at the end of the array. */ + c->Program.Constants.Count = new_count; + + if (are_externals_remapped) { + *out_remap_table = remap_table; + } else { + *out_remap_table = NULL; + free(remap_table); + } + + free(const_used); + free(inv_remap_table); +} diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_remove_constants.h b/src/mesa/drivers/dri/r300/compiler/radeon_remove_constants.h new file mode 100644 index 00000000000..0d3a26ca1ca --- /dev/null +++ b/src/mesa/drivers/dri/r300/compiler/radeon_remove_constants.h @@ -0,0 +1,36 @@ +/* + * Copyright (C) 2010 Marek Olšák <[email protected]> + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef RADEON_REMOVE_CONSTANTS_H +#define RADEON_REMOVE_CONSTANTS_H + +#include "radeon_compiler.h" + +void rc_remove_unused_constants(struct radeon_compiler *c, + unsigned **out_remap_table); + +#endif diff --git a/src/mesa/drivers/dri/r300/r300_fragprog_common.c b/src/mesa/drivers/dri/r300/r300_fragprog_common.c index 95f4306f604..7b6521c7480 100644 --- a/src/mesa/drivers/dri/r300/r300_fragprog_common.c +++ b/src/mesa/drivers/dri/r300/r300_fragprog_common.c @@ -38,7 +38,6 @@ #include "r300_fragprog_common.h" -#include "program/prog_parameter.h" #include "program/prog_print.h" #include "compiler/radeon_compiler.h" diff --git a/src/mesa/drivers/dri/r600/Makefile b/src/mesa/drivers/dri/r600/Makefile index 17915621ee4..03c17540e02 100644 --- a/src/mesa/drivers/dri/r600/Makefile +++ b/src/mesa/drivers/dri/r600/Makefile @@ -59,6 +59,15 @@ DRIVER_SOURCES = \ r600_texstate.c \ r600_blit.c \ r700_debug.c \ + evergreen_context.c \ + evergreen_state.c \ + evergreen_tex.c \ + evergreen_ioctl.c \ + evergreen_render.c \ + evergreen_chip.c \ + evergreen_vertprog.c \ + evergreen_fragprog.c \ + evergreen_oglprog.c \ $(RADEON_COMMON_SOURCES) \ $(EGL_SOURCES) \ $(CS_SOURCES) diff --git a/src/mesa/drivers/dri/r600/evergreen_chip.c b/src/mesa/drivers/dri/r600/evergreen_chip.c new file mode 100644 index 00000000000..f925f215bcc --- /dev/null +++ b/src/mesa/drivers/dri/r600/evergreen_chip.c @@ -0,0 +1,1289 @@ +/* + * Copyright (C) 2008-2010 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +/* + * Authors: + * Richard Li <[email protected]>, <[email protected]> + */ + +#include "main/imports.h" +#include "main/glheader.h" +#include "main/simple_list.h" + +#include "r600_context.h" +#include "r600_cmdbuf.h" + +#include "evergreen_chip.h" +#include "evergreen_off.h" +#include "evergreen_diff.h" +#include "evergreen_fragprog.h" +#include "evergreen_vertprog.h" + +#include "radeon_mipmap_tree.h" + +void evergreenCreateChip(context_t *context) +{ + EVERGREEN_CHIP_CONTEXT * evergreen = + (EVERGREEN_CHIP_CONTEXT*) CALLOC(sizeof(EVERGREEN_CHIP_CONTEXT)); + + context->pChip = (void*)evergreen; +} + +#define EVERGREEN_ALLOC_STATE( ATOM, CHK, SZ, EMIT ) \ +do { \ + context->evergreen_atoms.ATOM.cmd_size = (SZ); \ + context->evergreen_atoms.ATOM.cmd = NULL; \ + context->evergreen_atoms.ATOM.name = #ATOM; \ + context->evergreen_atoms.ATOM.idx = 0; \ + context->evergreen_atoms.ATOM.check = check_##CHK; \ + context->evergreen_atoms.ATOM.dirty = GL_FALSE; \ + context->evergreen_atoms.ATOM.emit = (EMIT); \ + context->radeon.hw.max_state_size += (SZ); \ + insert_at_tail(&context->radeon.hw.atomlist, &context->evergreen_atoms.ATOM); \ +} while (0) + +/* +static void evergreen_init_query_stateobj(radeonContextPtr radeon, int SZ) +{ + radeon->query.queryobj.cmd_size = (SZ); + radeon->query.queryobj.cmd = NULL; + radeon->query.queryobj.name = "queryobj"; + radeon->query.queryobj.idx = 0; + radeon->query.queryobj.check = check_queryobj; + radeon->query.queryobj.dirty = GL_FALSE; + radeon->query.queryobj.emit = r700SendQueryBegin; + radeon->hw.max_state_size += (SZ); + insert_at_tail(&radeon->hw.atomlist, &radeon->query.queryobj); +} +*/ + +static int check_always(GLcontext *ctx, struct radeon_state_atom *atom) +{ + return atom->cmd_size; +} + +static void evergreenSendTexState(GLcontext *ctx, struct radeon_state_atom *atom) +{ + context_t *context = EVERGREEN_CONTEXT(ctx); + EVERGREEN_CHIP_CONTEXT *evergreen = GET_EVERGREEN_CHIP(context); + + struct evergreen_vertex_program *vp = context->selected_vp; + + struct radeon_bo *bo = NULL; + unsigned int i; + unsigned int nBorderSet = 0; + BATCH_LOCALS(&context->radeon); + + radeon_print(RADEON_STATE, RADEON_VERBOSE, "%s\n", __func__); + + for (i = 0; i < R700_TEXTURE_NUMBERUNITS; i++) { + if (ctx->Texture.Unit[i]._ReallyEnabled) { + radeonTexObj *t = evergreen->textures[i]; + + if (t) { + /* Tex resource */ + if (!t->image_override) { + bo = t->mt->bo; + } else { + bo = t->bo; + } + if (bo) + { + radeon_bo_unmap(bo); + + r700SyncSurf(context, bo, + RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM, + 0, TC_ACTION_ENA_bit); + + BEGIN_BATCH_NO_AUTOSTATE(10 + 4); + R600_OUT_BATCH(CP_PACKET3(R600_IT_SET_RESOURCE, 8)); + + if( (1<<i) & vp->r700AsmCode.unVetTexBits ) + { /* vs texture */ + R600_OUT_BATCH((i + VERT_ATTRIB_MAX + EG_SQ_FETCH_RESOURCE_VS_OFFSET) * FETCH_RESOURCE_STRIDE); + } + else + { + R600_OUT_BATCH(i * EG_FETCH_RESOURCE_STRIDE); + } + + R600_OUT_BATCH(evergreen->textures[i]->SQ_TEX_RESOURCE0); + R600_OUT_BATCH(evergreen->textures[i]->SQ_TEX_RESOURCE1); + R600_OUT_BATCH(evergreen->textures[i]->SQ_TEX_RESOURCE2); + R600_OUT_BATCH(evergreen->textures[i]->SQ_TEX_RESOURCE3); + R600_OUT_BATCH(evergreen->textures[i]->SQ_TEX_RESOURCE4); + R600_OUT_BATCH(evergreen->textures[i]->SQ_TEX_RESOURCE5); + R600_OUT_BATCH(evergreen->textures[i]->SQ_TEX_RESOURCE6); + R600_OUT_BATCH(evergreen->textures[i]->SQ_TEX_RESOURCE7); + + R600_OUT_BATCH_RELOC(evergreen->textures[i]->SQ_TEX_RESOURCE2, + bo, + evergreen->textures[i]->SQ_TEX_RESOURCE2, + RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM, 0, 0); + R600_OUT_BATCH_RELOC(evergreen->textures[i]->SQ_TEX_RESOURCE3, + bo, + evergreen->textures[i]->SQ_TEX_RESOURCE3, + RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM, 0, 0); + END_BATCH(); + COMMIT_BATCH(); + } + /* Tex sampler */ + BEGIN_BATCH_NO_AUTOSTATE(5); + R600_OUT_BATCH(CP_PACKET3(R600_IT_SET_SAMPLER, 3)); + + if( (1<<i) & vp->r700AsmCode.unVetTexBits ) + { /* vs texture */ + R600_OUT_BATCH((i+SQ_TEX_SAMPLER_VS_OFFSET) * 3); + } + else + { + R600_OUT_BATCH(i * 3); + } + R600_OUT_BATCH(evergreen->textures[i]->SQ_TEX_SAMPLER0); + R600_OUT_BATCH(evergreen->textures[i]->SQ_TEX_SAMPLER1); + R600_OUT_BATCH(evergreen->textures[i]->SQ_TEX_SAMPLER2); + + END_BATCH(); + COMMIT_BATCH(); + + /* Tex border color */ + if(0 == nBorderSet) + { + BEGIN_BATCH_NO_AUTOSTATE(2 + 4); + R600_OUT_BATCH_REGSEQ(EG_TD_PS_BORDER_COLOR_RED, 4); + R600_OUT_BATCH(evergreen->textures[i]->TD_PS_SAMPLER0_BORDER_RED); + R600_OUT_BATCH(evergreen->textures[i]->TD_PS_SAMPLER0_BORDER_GREEN); + R600_OUT_BATCH(evergreen->textures[i]->TD_PS_SAMPLER0_BORDER_BLUE); + R600_OUT_BATCH(evergreen->textures[i]->TD_PS_SAMPLER0_BORDER_ALPHA); + END_BATCH(); + COMMIT_BATCH(); + + nBorderSet = 1; + } + } + } + } +} + +static int check_evergreen_tx(GLcontext *ctx, struct radeon_state_atom *atom) +{ + context_t *context = EVERGREEN_CONTEXT(ctx); + unsigned int i, count = 0; + EVERGREEN_CHIP_CONTEXT *evergreen = GET_EVERGREEN_CHIP(context); + + for (i = 0; i < R700_TEXTURE_NUMBERUNITS; i++) { + if (ctx->Texture.Unit[i]._ReallyEnabled) { + radeonTexObj *t = evergreen->textures[i]; + if (t) + count++; + } + } + radeon_print(RADEON_STATE, RADEON_TRACE, "%s %d\n", __func__, count); + return count * 37 + 6; +} + +static void evergreenSendSQConfig(GLcontext *ctx, struct radeon_state_atom *atom) +{ + context_t *context = EVERGREEN_CONTEXT(ctx); + EVERGREEN_CHIP_CONTEXT *evergreen = GET_EVERGREEN_CHIP(context); + BATCH_LOCALS(&context->radeon); + radeon_print(RADEON_STATE, RADEON_VERBOSE, "%s\n", __func__); + + BEGIN_BATCH_NO_AUTOSTATE(19); + //6 + EVERGREEN_OUT_BATCH_REGVAL(EG_SPI_CONFIG_CNTL, evergreen->evergreen_config.SPI_CONFIG_CNTL.u32All); + EVERGREEN_OUT_BATCH_REGVAL(EG_SPI_CONFIG_CNTL_1, evergreen->evergreen_config.SPI_CONFIG_CNTL_1.u32All); + //6 + EVERGREEN_OUT_BATCH_REGSEQ(EG_SQ_CONFIG, 4); + R600_OUT_BATCH(evergreen->evergreen_config.SQ_CONFIG.u32All); + R600_OUT_BATCH(evergreen->evergreen_config.SQ_GPR_RESOURCE_MGMT_1.u32All); + R600_OUT_BATCH(evergreen->evergreen_config.SQ_GPR_RESOURCE_MGMT_2.u32All); + R600_OUT_BATCH(evergreen->evergreen_config.SQ_GPR_RESOURCE_MGMT_3.u32All); + //7 + EVERGREEN_OUT_BATCH_REGSEQ(EG_SQ_THREAD_RESOURCE_MGMT, 5); + R600_OUT_BATCH(evergreen->evergreen_config.SQ_THREAD_RESOURCE_MGMT.u32All); + R600_OUT_BATCH(evergreen->evergreen_config.SQ_THREAD_RESOURCE_MGMT_2.u32All); + R600_OUT_BATCH(evergreen->evergreen_config.SQ_STACK_RESOURCE_MGMT_1.u32All); + R600_OUT_BATCH(evergreen->evergreen_config.SQ_STACK_RESOURCE_MGMT_2.u32All); + R600_OUT_BATCH(evergreen->evergreen_config.SQ_STACK_RESOURCE_MGMT_3.u32All); + + END_BATCH(); + + COMMIT_BATCH(); +} + +extern int evergreen_getTypeSize(GLenum type); +static void evergreenSetupVTXConstants(GLcontext * ctx, + void * pAos, + StreamDesc * pStreamDesc) +{ + context_t *context = EVERGREEN_CONTEXT(ctx); + struct radeon_aos * paos = (struct radeon_aos *)pAos; + unsigned int nVBsize; + BATCH_LOCALS(&context->radeon); + + unsigned int uSQ_VTX_CONSTANT_WORD0_0; + unsigned int uSQ_VTX_CONSTANT_WORD1_0; + unsigned int uSQ_VTX_CONSTANT_WORD2_0 = 0; + unsigned int uSQ_VTX_CONSTANT_WORD3_0 = 0; + unsigned int uSQ_VTX_CONSTANT_WORD7_0 = 0; + + if (!paos->bo) + return; + + r700SyncSurf(context, paos->bo, RADEON_GEM_DOMAIN_GTT, 0, VC_ACTION_ENA_bit); + + if(0 == pStreamDesc->stride) + { + nVBsize = paos->count * pStreamDesc->size * getTypeSize(pStreamDesc->type); + } + else + { + nVBsize = (paos->count - 1) * pStreamDesc->stride + + pStreamDesc->size * getTypeSize(pStreamDesc->type); + } + + //uSQ_VTX_CONSTANT_WORD0_0 + uSQ_VTX_CONSTANT_WORD0_0 = paos->offset; + + //uSQ_VTX_CONSTANT_WORD1_0 + uSQ_VTX_CONSTANT_WORD1_0 = nVBsize; + + //uSQ_VTX_CONSTANT_WORD2_0 + SETfield(uSQ_VTX_CONSTANT_WORD2_0, + pStreamDesc->stride, + SQ_VTX_CONSTANT_WORD2_0__STRIDE_shift, + SQ_VTX_CONSTANT_WORD2_0__STRIDE_mask); + SETfield(uSQ_VTX_CONSTANT_WORD2_0, GetSurfaceFormat(pStreamDesc->type, pStreamDesc->size, NULL), + SQ_VTX_CONSTANT_WORD2_0__DATA_FORMAT_shift, + SQ_VTX_CONSTANT_WORD2_0__DATA_FORMAT_mask); // TODO : trace back api for initial data type, not only GL_FLOAT + SETfield(uSQ_VTX_CONSTANT_WORD2_0, 0, BASE_ADDRESS_HI_shift, BASE_ADDRESS_HI_mask); // TODO + if(GL_TRUE == pStreamDesc->normalize) + { + SETfield(uSQ_VTX_CONSTANT_WORD2_0, SQ_NUM_FORMAT_NORM, + SQ_VTX_CONSTANT_WORD2_0__NUM_FORMAT_ALL_shift, SQ_VTX_CONSTANT_WORD2_0__NUM_FORMAT_ALL_mask); + } + else + { + SETfield(uSQ_VTX_CONSTANT_WORD2_0, SQ_NUM_FORMAT_SCALED, + SQ_VTX_CONSTANT_WORD2_0__NUM_FORMAT_ALL_shift, SQ_VTX_CONSTANT_WORD2_0__NUM_FORMAT_ALL_mask); + } + if(1 == pStreamDesc->_signed) + { + SETbit(uSQ_VTX_CONSTANT_WORD2_0, SQ_VTX_CONSTANT_WORD2_0__FORMAT_COMP_ALL_bit); + } + + //uSQ_VTX_CONSTANT_WORD3_0 + SETfield(uSQ_VTX_CONSTANT_WORD3_0, SQ_SEL_X, + EG_SQ_VTX_CONSTANT_WORD3_0__DST_SEL_X_shift, + EG_SQ_VTX_CONSTANT_WORD3_0__DST_SEL_X_mask); + SETfield(uSQ_VTX_CONSTANT_WORD3_0, SQ_SEL_Y, + EG_SQ_VTX_CONSTANT_WORD3_0__DST_SEL_Y_shift, + EG_SQ_VTX_CONSTANT_WORD3_0__DST_SEL_Y_mask); + SETfield(uSQ_VTX_CONSTANT_WORD3_0, SQ_SEL_Z, + EG_SQ_VTX_CONSTANT_WORD3_0__DST_SEL_Z_shift, + EG_SQ_VTX_CONSTANT_WORD3_0__DST_SEL_Z_mask); + SETfield(uSQ_VTX_CONSTANT_WORD3_0, SQ_SEL_W, + EG_SQ_VTX_CONSTANT_WORD3_0__DST_SEL_W_shift, + EG_SQ_VTX_CONSTANT_WORD3_0__DST_SEL_W_mask); + + //uSQ_VTX_CONSTANT_WORD7_0 + SETfield(uSQ_VTX_CONSTANT_WORD7_0, SQ_TEX_VTX_VALID_BUFFER, + SQ_TEX_RESOURCE_WORD6_0__TYPE_shift, SQ_TEX_RESOURCE_WORD6_0__TYPE_mask); + + BEGIN_BATCH_NO_AUTOSTATE(10 + 2); + + R600_OUT_BATCH(CP_PACKET3(R600_IT_SET_RESOURCE, 8)); + R600_OUT_BATCH((pStreamDesc->element + EG_SQ_FETCH_RESOURCE_VS_OFFSET) * EG_FETCH_RESOURCE_STRIDE); + R600_OUT_BATCH(uSQ_VTX_CONSTANT_WORD0_0); + R600_OUT_BATCH(uSQ_VTX_CONSTANT_WORD1_0); + R600_OUT_BATCH(uSQ_VTX_CONSTANT_WORD2_0); + R600_OUT_BATCH(uSQ_VTX_CONSTANT_WORD3_0); + R600_OUT_BATCH(0); + R600_OUT_BATCH(0); + R600_OUT_BATCH(0); + R600_OUT_BATCH(uSQ_VTX_CONSTANT_WORD7_0); + R600_OUT_BATCH_RELOC(uSQ_VTX_CONSTANT_WORD0_0, + paos->bo, + uSQ_VTX_CONSTANT_WORD0_0, + RADEON_GEM_DOMAIN_GTT, 0, 0); + END_BATCH(); + + COMMIT_BATCH(); +} + +static int check_evergreen_vtx(GLcontext *ctx, struct radeon_state_atom *atom) +{ + context_t *context = EVERGREEN_CONTEXT(ctx); + int count = context->radeon.tcl.aos_count * 12; + + if (count) + count += 6; + + radeon_print(RADEON_STATE, RADEON_TRACE, "%s %d\n", __func__, count); + return count; +} + +static void evergreenSendVTX(GLcontext *ctx, struct radeon_state_atom *atom) +{ + context_t *context = EVERGREEN_CONTEXT(ctx); + struct evergreen_vertex_program *vp = (struct evergreen_vertex_program *)(context->selected_vp); + unsigned int i, j = 0; + BATCH_LOCALS(&context->radeon); + radeon_print(RADEON_STATE, RADEON_VERBOSE, "%s\n", __func__); + + if (context->radeon.tcl.aos_count == 0) + return; + + BEGIN_BATCH_NO_AUTOSTATE(6); + R600_OUT_BATCH(CP_PACKET3(R600_IT_SET_CTL_CONST, 1)); + R600_OUT_BATCH(mmSQ_VTX_BASE_VTX_LOC - ASIC_CTL_CONST_BASE_INDEX); + R600_OUT_BATCH(0); + + R600_OUT_BATCH(CP_PACKET3(R600_IT_SET_CTL_CONST, 1)); + R600_OUT_BATCH(mmSQ_VTX_START_INST_LOC - ASIC_CTL_CONST_BASE_INDEX); + R600_OUT_BATCH(0); + END_BATCH(); + COMMIT_BATCH(); + + for(i=0; i<VERT_ATTRIB_MAX; i++) { + if(vp->mesa_program->Base.InputsRead & (1 << i)) + { + evergreenSetupVTXConstants(ctx, + (void*)(&context->radeon.tcl.aos[j]), + &(context->stream_desc[j])); + j++; + } + } +} +static void evergreenSendPA(GLcontext *ctx, struct radeon_state_atom *atom) +{ + context_t *context = EVERGREEN_CONTEXT(ctx); + EVERGREEN_CHIP_CONTEXT *evergreen = GET_EVERGREEN_CHIP(context); + BATCH_LOCALS(&context->radeon); + radeon_print(RADEON_STATE, RADEON_VERBOSE, "%s\n", __func__); + int id = 0; + + BEGIN_BATCH_NO_AUTOSTATE(3); + EVERGREEN_OUT_BATCH_REGVAL(EG_PA_SU_HARDWARE_SCREEN_OFFSET, 0); + END_BATCH(); + + BEGIN_BATCH_NO_AUTOSTATE(22); + EVERGREEN_OUT_BATCH_REGSEQ(EG_PA_SC_SCREEN_SCISSOR_TL, 2); + R600_OUT_BATCH(evergreen->PA_SC_SCREEN_SCISSOR_TL.u32All); + R600_OUT_BATCH(evergreen->PA_SC_SCREEN_SCISSOR_BR.u32All); + + EVERGREEN_OUT_BATCH_REGSEQ(EG_PA_SC_WINDOW_OFFSET, 12); + R600_OUT_BATCH(evergreen->PA_SC_WINDOW_OFFSET.u32All); + R600_OUT_BATCH(evergreen->PA_SC_WINDOW_SCISSOR_TL.u32All); + R600_OUT_BATCH(evergreen->PA_SC_WINDOW_SCISSOR_BR.u32All); + R600_OUT_BATCH(evergreen->PA_SC_CLIPRECT_RULE.u32All); + R600_OUT_BATCH(evergreen->PA_SC_CLIPRECT_0_TL.u32All); + R600_OUT_BATCH(evergreen->PA_SC_CLIPRECT_0_BR.u32All); + R600_OUT_BATCH(evergreen->PA_SC_CLIPRECT_1_TL.u32All); + R600_OUT_BATCH(evergreen->PA_SC_CLIPRECT_1_BR.u32All); + R600_OUT_BATCH(evergreen->PA_SC_CLIPRECT_2_TL.u32All); + R600_OUT_BATCH(evergreen->PA_SC_CLIPRECT_2_BR.u32All); + R600_OUT_BATCH(evergreen->PA_SC_CLIPRECT_3_TL.u32All); + R600_OUT_BATCH(evergreen->PA_SC_CLIPRECT_3_BR.u32All); + + EVERGREEN_OUT_BATCH_REGSEQ(EG_PA_SC_GENERIC_SCISSOR_TL, 2); + R600_OUT_BATCH(evergreen->PA_SC_GENERIC_SCISSOR_TL.u32All); + R600_OUT_BATCH(evergreen->PA_SC_GENERIC_SCISSOR_BR.u32All); + END_BATCH(); + + BEGIN_BATCH_NO_AUTOSTATE(3); + EVERGREEN_OUT_BATCH_REGVAL(EG_PA_SC_EDGERULE, evergreen->PA_SC_EDGERULE.u32All); + END_BATCH(); + + + BEGIN_BATCH_NO_AUTOSTATE(18); + EVERGREEN_OUT_BATCH_REGSEQ(EG_PA_SC_VPORT_SCISSOR_0_TL, 4); + R600_OUT_BATCH(evergreen->viewport[id].PA_SC_VPORT_SCISSOR_0_TL.u32All); + R600_OUT_BATCH(evergreen->viewport[id].PA_SC_VPORT_SCISSOR_0_BR.u32All); + R600_OUT_BATCH(evergreen->viewport[id].PA_SC_VPORT_SCISSOR_0_TL.u32All); + R600_OUT_BATCH(evergreen->viewport[id].PA_SC_VPORT_SCISSOR_0_BR.u32All); + + EVERGREEN_OUT_BATCH_REGSEQ(EG_PA_SC_VPORT_ZMIN_0, 2); + R600_OUT_BATCH(evergreen->viewport[id].PA_SC_VPORT_ZMIN_0.u32All); + R600_OUT_BATCH(evergreen->viewport[id].PA_SC_VPORT_ZMAX_0.u32All); + + EVERGREEN_OUT_BATCH_REGSEQ(EG_PA_CL_VPORT_XSCALE, 6); + R600_OUT_BATCH(evergreen->viewport[id].PA_CL_VPORT_XSCALE.u32All); + R600_OUT_BATCH(evergreen->viewport[id].PA_CL_VPORT_XOFFSET.u32All); + R600_OUT_BATCH(evergreen->viewport[id].PA_CL_VPORT_YSCALE.u32All); + R600_OUT_BATCH(evergreen->viewport[id].PA_CL_VPORT_YOFFSET.u32All); + R600_OUT_BATCH(evergreen->viewport[id].PA_CL_VPORT_ZSCALE.u32All); + R600_OUT_BATCH(evergreen->viewport[id].PA_CL_VPORT_ZOFFSET.u32All); + END_BATCH(); + + + for (id = 0; id < EVERGREEN_MAX_UCP; id++) { + if (evergreen->ucp[id].enabled) { + BEGIN_BATCH_NO_AUTOSTATE(6); + EVERGREEN_OUT_BATCH_REGSEQ(EG_PA_CL_UCP_0_X + (4 * id), 4); + R600_OUT_BATCH(evergreen->ucp[id].PA_CL_UCP_0_X.u32All); + R600_OUT_BATCH(evergreen->ucp[id].PA_CL_UCP_0_Y.u32All); + R600_OUT_BATCH(evergreen->ucp[id].PA_CL_UCP_0_Z.u32All); + R600_OUT_BATCH(evergreen->ucp[id].PA_CL_UCP_0_W.u32All); + END_BATCH(); + } + } + + BEGIN_BATCH_NO_AUTOSTATE(42); + EVERGREEN_OUT_BATCH_REGSEQ(EG_PA_CL_CLIP_CNTL, 5); + R600_OUT_BATCH(evergreen->PA_CL_CLIP_CNTL.u32All); + R600_OUT_BATCH(evergreen->PA_SU_SC_MODE_CNTL.u32All); + R600_OUT_BATCH(evergreen->PA_CL_VTE_CNTL.u32All); + R600_OUT_BATCH(evergreen->PA_CL_VS_OUT_CNTL.u32All); + R600_OUT_BATCH(evergreen->PA_CL_NANINF_CNTL.u32All); + + EVERGREEN_OUT_BATCH_REGSEQ(EG_PA_SU_POINT_SIZE, 3); + R600_OUT_BATCH(evergreen->PA_SU_POINT_SIZE.u32All); + R600_OUT_BATCH(evergreen->PA_SU_POINT_MINMAX.u32All); + R600_OUT_BATCH(evergreen->PA_SU_LINE_CNTL.u32All); + + EVERGREEN_OUT_BATCH_REGSEQ(EG_PA_SC_MODE_CNTL_0, 2); + R600_OUT_BATCH(evergreen->PA_SC_MODE_CNTL_0.u32All); + R600_OUT_BATCH(evergreen->PA_SC_MODE_CNTL_1.u32All); + + EVERGREEN_OUT_BATCH_REGSEQ(EG_PA_SU_POLY_OFFSET_DB_FMT_CNTL, 6); + R600_OUT_BATCH(evergreen->PA_SU_POLY_OFFSET_DB_FMT_CNTL.u32All); + R600_OUT_BATCH(evergreen->PA_SU_POLY_OFFSET_CLAMP.u32All); + R600_OUT_BATCH(evergreen->PA_SU_POLY_OFFSET_FRONT_SCALE.u32All); + R600_OUT_BATCH(evergreen->PA_SU_POLY_OFFSET_FRONT_OFFSET.u32All); + R600_OUT_BATCH(evergreen->PA_SU_POLY_OFFSET_BACK_SCALE.u32All); + R600_OUT_BATCH(evergreen->PA_SU_POLY_OFFSET_BACK_OFFSET.u32All); + + EVERGREEN_OUT_BATCH_REGSEQ(EG_PA_SC_LINE_CNTL, 16); + R600_OUT_BATCH(evergreen->PA_SC_LINE_CNTL.u32All); + R600_OUT_BATCH(evergreen->PA_SC_AA_CONFIG.u32All); + R600_OUT_BATCH(evergreen->PA_SU_VTX_CNTL.u32All); + R600_OUT_BATCH(evergreen->PA_CL_GB_VERT_CLIP_ADJ.u32All); + R600_OUT_BATCH(evergreen->PA_CL_GB_VERT_DISC_ADJ.u32All); + R600_OUT_BATCH(evergreen->PA_CL_GB_HORZ_CLIP_ADJ.u32All); + R600_OUT_BATCH(evergreen->PA_CL_GB_HORZ_DISC_ADJ.u32All); + R600_OUT_BATCH(evergreen->PA_SC_AA_SAMPLE_LOCS_0.u32All); + R600_OUT_BATCH(evergreen->PA_SC_AA_SAMPLE_LOCS_1.u32All); + R600_OUT_BATCH(evergreen->PA_SC_AA_SAMPLE_LOCS_2.u32All); + R600_OUT_BATCH(evergreen->PA_SC_AA_SAMPLE_LOCS_3.u32All); + R600_OUT_BATCH(evergreen->PA_SC_AA_SAMPLE_LOCS_4.u32All); + R600_OUT_BATCH(evergreen->PA_SC_AA_SAMPLE_LOCS_5.u32All); + R600_OUT_BATCH(evergreen->PA_SC_AA_SAMPLE_LOCS_6.u32All); + R600_OUT_BATCH(evergreen->PA_SC_AA_SAMPLE_LOCS_7.u32All); + R600_OUT_BATCH(evergreen->PA_SC_AA_MASK.u32All); + + END_BATCH(); + + COMMIT_BATCH(); +} +static void evergreenSendTP(GLcontext *ctx, struct radeon_state_atom *atom) +{ + /* + context_t *context = EVERGREEN_CONTEXT(ctx); + EVERGREEN_CHIP_CONTEXT *evergreen = GET_EVERGREEN_CHIP(context); + BATCH_LOCALS(&context->radeon); + radeon_print(RADEON_STATE, RADEON_VERBOSE, "%s\n", __func__); + + COMMIT_BATCH(); + */ +} + +static void evergreenSendPSresource(GLcontext *ctx) +{ + context_t *context = EVERGREEN_CONTEXT(ctx); + EVERGREEN_CHIP_CONTEXT *evergreen = GET_EVERGREEN_CHIP(context); + struct radeon_bo * pbo; + + struct radeon_bo * pbo_const; + + BATCH_LOCALS(&context->radeon); + radeon_print(RADEON_STATE, RADEON_VERBOSE, "%s\n", __func__); + + pbo = (struct radeon_bo *)evergreenGetActiveFpShaderBo(GL_CONTEXT(context)); + + if (!pbo) + return; + + r700SyncSurf(context, pbo, RADEON_GEM_DOMAIN_GTT, 0, SH_ACTION_ENA_bit); + + BEGIN_BATCH_NO_AUTOSTATE(3 + 2); + EVERGREEN_OUT_BATCH_REGSEQ(EG_SQ_PGM_START_PS, 1); + R600_OUT_BATCH(evergreen->ps.SQ_PGM_START_PS.u32All); + R600_OUT_BATCH_RELOC(evergreen->ps.SQ_PGM_START_PS.u32All, + pbo, + evergreen->ps.SQ_PGM_START_PS.u32All, + RADEON_GEM_DOMAIN_GTT, 0, 0); + END_BATCH(); + + BEGIN_BATCH_NO_AUTOSTATE(3); + EVERGREEN_OUT_BATCH_REGVAL(EG_SQ_LOOP_CONST_0, 0x01000FFF); + END_BATCH(); + + pbo_const = (struct radeon_bo *)(context->fp_Constbo); + + if(NULL != pbo_const) + { + r700SyncSurf(context, pbo_const, RADEON_GEM_DOMAIN_GTT, 0, SH_ACTION_ENA_bit); + + BEGIN_BATCH_NO_AUTOSTATE(3); + + if(evergreen->ps.num_consts < 4) + { + EVERGREEN_OUT_BATCH_REGVAL(EG_SQ_ALU_CONST_BUFFER_SIZE_PS_0, 1); + } + else + { + EVERGREEN_OUT_BATCH_REGVAL(EG_SQ_ALU_CONST_BUFFER_SIZE_PS_0, (evergreen->ps.num_consts * 4)/16 ); + } + + END_BATCH(); + + BEGIN_BATCH_NO_AUTOSTATE(3 + 2); + EVERGREEN_OUT_BATCH_REGSEQ(EG_SQ_ALU_CONST_CACHE_PS_0, 1); + R600_OUT_BATCH(context->fp_bo_offset >> 8); + R600_OUT_BATCH_RELOC(0, + pbo_const, + 0, + RADEON_GEM_DOMAIN_GTT, 0, 0); + END_BATCH(); + } + + COMMIT_BATCH(); +} + +static void evergreenSendVSresource(GLcontext *ctx, struct radeon_state_atom *atom) +{ + context_t *context = EVERGREEN_CONTEXT(ctx); + EVERGREEN_CHIP_CONTEXT *evergreen = GET_EVERGREEN_CHIP(context); + struct radeon_bo * pbo; + + struct radeon_bo * pbo_const; + + BATCH_LOCALS(&context->radeon); + radeon_print(RADEON_STATE, RADEON_VERBOSE, "%s\n", __func__); + + pbo = (struct radeon_bo *)evergreenGetActiveVpShaderBo(GL_CONTEXT(context)); + + if (!pbo) + return; + + r700SyncSurf(context, pbo, RADEON_GEM_DOMAIN_GTT, 0, SH_ACTION_ENA_bit); + + BEGIN_BATCH_NO_AUTOSTATE(3 + 2); + EVERGREEN_OUT_BATCH_REGSEQ(EG_SQ_PGM_START_VS, 1); + R600_OUT_BATCH(evergreen->vs.SQ_PGM_START_VS.u32All); + R600_OUT_BATCH_RELOC(evergreen->vs.SQ_PGM_START_VS.u32All, + pbo, + evergreen->vs.SQ_PGM_START_VS.u32All, + RADEON_GEM_DOMAIN_GTT, 0, 0); + END_BATCH(); + + BEGIN_BATCH_NO_AUTOSTATE(3); + EVERGREEN_OUT_BATCH_REGVAL((EG_SQ_LOOP_CONST_0 + 32*1), 0x0100000F); //consts == 1 + //EVERGREEN_OUT_BATCH_REGVAL((EG_SQ_LOOP_CONST_0 + (SQ_LOOP_CONST_vs<2)), 0x0100000F); + END_BATCH(); + + pbo_const = (struct radeon_bo *)(context->vp_Constbo); + + if(NULL != pbo_const) + { + r700SyncSurf(context, pbo_const, RADEON_GEM_DOMAIN_GTT, 0, SH_ACTION_ENA_bit); + + BEGIN_BATCH_NO_AUTOSTATE(3); + + if(evergreen->vs.num_consts < 4) + { + EVERGREEN_OUT_BATCH_REGVAL(EG_SQ_ALU_CONST_BUFFER_SIZE_VS_0, 1); + } + else + { + EVERGREEN_OUT_BATCH_REGVAL(EG_SQ_ALU_CONST_BUFFER_SIZE_VS_0, (evergreen->vs.num_consts * 4)/16 ); + } + + END_BATCH(); + + BEGIN_BATCH_NO_AUTOSTATE(3 + 2); + EVERGREEN_OUT_BATCH_REGSEQ(EG_SQ_ALU_CONST_CACHE_VS_0, 1); + R600_OUT_BATCH(context->vp_bo_offset >> 8); + R600_OUT_BATCH_RELOC(0, + pbo_const, + 0, + RADEON_GEM_DOMAIN_GTT, 0, 0); + END_BATCH(); + } + + COMMIT_BATCH(); +} + +static void evergreenSendSQ(GLcontext *ctx, struct radeon_state_atom *atom) +{ + context_t *context = EVERGREEN_CONTEXT(ctx); + EVERGREEN_CHIP_CONTEXT *evergreen = GET_EVERGREEN_CHIP(context); + BATCH_LOCALS(&context->radeon); + radeon_print(RADEON_STATE, RADEON_VERBOSE, "%s\n", __func__); + + evergreenSendPSresource(ctx); //16 entries now + + BEGIN_BATCH_NO_AUTOSTATE(77); + + //34 + EVERGREEN_OUT_BATCH_REGSEQ(EG_SQ_VTX_SEMANTIC_0, 32); + R600_OUT_BATCH(evergreen->SQ_VTX_SEMANTIC_0.u32All); //// // = 0x28380, // SAME + R600_OUT_BATCH(evergreen->SQ_VTX_SEMANTIC_1.u32All); //// // = 0x28384, // SAME + R600_OUT_BATCH(evergreen->SQ_VTX_SEMANTIC_2.u32All); //// // = 0x28388, // SAME + R600_OUT_BATCH(evergreen->SQ_VTX_SEMANTIC_3.u32All); //// // = 0x2838C, // SAME + R600_OUT_BATCH(evergreen->SQ_VTX_SEMANTIC_4.u32All); //// // = 0x28390, // SAME + R600_OUT_BATCH(evergreen->SQ_VTX_SEMANTIC_5.u32All); //// // = 0x28394, // SAME + R600_OUT_BATCH(evergreen->SQ_VTX_SEMANTIC_6.u32All); //// // = 0x28398, // SAME + R600_OUT_BATCH(evergreen->SQ_VTX_SEMANTIC_7.u32All); //// // = 0x2839C, // SAME + R600_OUT_BATCH(evergreen->SQ_VTX_SEMANTIC_8.u32All); //// // = 0x283A0, // SAME + R600_OUT_BATCH(evergreen->SQ_VTX_SEMANTIC_9.u32All); //// // = 0x283A4, // SAME + R600_OUT_BATCH(evergreen->SQ_VTX_SEMANTIC_10.u32All); //// // = 0x283A8, // SAME + R600_OUT_BATCH(evergreen->SQ_VTX_SEMANTIC_11.u32All); //// // = 0x283AC, // SAME + R600_OUT_BATCH(evergreen->SQ_VTX_SEMANTIC_12.u32All); //// // = 0x283B0, // SAME + R600_OUT_BATCH(evergreen->SQ_VTX_SEMANTIC_13.u32All); //// // = 0x283B4, // SAME + R600_OUT_BATCH(evergreen->SQ_VTX_SEMANTIC_14.u32All); //// // = 0x283B8, // SAME + R600_OUT_BATCH(evergreen->SQ_VTX_SEMANTIC_15.u32All); //// // = 0x283BC, // SAME + R600_OUT_BATCH(evergreen->SQ_VTX_SEMANTIC_16.u32All); //// // = 0x283C0, // SAME + R600_OUT_BATCH(evergreen->SQ_VTX_SEMANTIC_17.u32All); //// // = 0x283C4, // SAME + R600_OUT_BATCH(evergreen->SQ_VTX_SEMANTIC_18.u32All); //// // = 0x283C8, // SAME + R600_OUT_BATCH(evergreen->SQ_VTX_SEMANTIC_19.u32All); //// // = 0x283CC, // SAME + R600_OUT_BATCH(evergreen->SQ_VTX_SEMANTIC_20.u32All); //// // = 0x283D0, // SAME + R600_OUT_BATCH(evergreen->SQ_VTX_SEMANTIC_21.u32All); //// // = 0x283D4, // SAME + R600_OUT_BATCH(evergreen->SQ_VTX_SEMANTIC_22.u32All); //// // = 0x283D8, // SAME + R600_OUT_BATCH(evergreen->SQ_VTX_SEMANTIC_23.u32All); //// // = 0x283DC, // SAME + R600_OUT_BATCH(evergreen->SQ_VTX_SEMANTIC_24.u32All); //// // = 0x283E0, // SAME + R600_OUT_BATCH(evergreen->SQ_VTX_SEMANTIC_25.u32All); //// // = 0x283E4, // SAME + R600_OUT_BATCH(evergreen->SQ_VTX_SEMANTIC_26.u32All); //// // = 0x283E8, // SAME + R600_OUT_BATCH(evergreen->SQ_VTX_SEMANTIC_27.u32All); //// // = 0x283EC, // SAME + R600_OUT_BATCH(evergreen->SQ_VTX_SEMANTIC_28.u32All); //// // = 0x283F0, // SAME + R600_OUT_BATCH(evergreen->SQ_VTX_SEMANTIC_29.u32All); //// // = 0x283F4, // SAME + R600_OUT_BATCH(evergreen->SQ_VTX_SEMANTIC_30.u32All); //// // = 0x283F8, // SAME + R600_OUT_BATCH(evergreen->SQ_VTX_SEMANTIC_31.u32All); //// // = 0x283FC, // SAME + + + //3 + EVERGREEN_OUT_BATCH_REGSEQ(EG_SQ_DYN_GPR_RESOURCE_LIMIT_1, 1); + R600_OUT_BATCH(evergreen->SQ_DYN_GPR_RESOURCE_LIMIT_1.u32All);//// // = 0x28838, // + + //5 + EVERGREEN_OUT_BATCH_REGSEQ(EG_SQ_PGM_RESOURCES_PS, 3); + R600_OUT_BATCH(evergreen->SQ_PGM_RESOURCES_PS.u32All); //// // = 0x28844, // DIFF 0x28850 + R600_OUT_BATCH(evergreen->SQ_PGM_RESOURCES_2_PS.u32All); //// // = 0x28848, // + R600_OUT_BATCH(evergreen->SQ_PGM_EXPORTS_PS.u32All); //// // = 0x2884C, // SAME 0x28854 + + //4 + EVERGREEN_OUT_BATCH_REGSEQ(EG_SQ_PGM_RESOURCES_VS, 2); + R600_OUT_BATCH(evergreen->SQ_PGM_RESOURCES_VS.u32All);//// // = 0x28860, // DIFF 0x28868 + R600_OUT_BATCH(evergreen->SQ_PGM_RESOURCES_2_VS.u32All); //// // = 0x28864, // + + //5 + EVERGREEN_OUT_BATCH_REGSEQ(EG_SQ_PGM_RESOURCES_GS, 2); + /* + R600_OUT_BATCH(evergreen->SQ_PGM_START_GS.u32All); //// // = 0x28874, // SAME 0x2886C + */ + R600_OUT_BATCH(evergreen->SQ_PGM_RESOURCES_GS.u32All); //// // = 0x28878, // DIFF 0x2887C + R600_OUT_BATCH(evergreen->SQ_PGM_RESOURCES_2_GS.u32All); //// // = 0x2887C, // + + //5 + EVERGREEN_OUT_BATCH_REGSEQ(EG_SQ_PGM_RESOURCES_ES, 2); + /* + R600_OUT_BATCH(evergreen->SQ_PGM_START_ES.u32All); //// // = 0x2888C, // SAME 0x28880 + */ + R600_OUT_BATCH(evergreen->SQ_PGM_RESOURCES_ES.u32All); //// // = 0x28890, // DIFF + R600_OUT_BATCH(evergreen->SQ_PGM_RESOURCES_2_ES.u32All); //// // = 0x28894, // + + //4 + EVERGREEN_OUT_BATCH_REGSEQ(EG_SQ_PGM_RESOURCES_FS, 1); + /* + R600_OUT_BATCH(evergreen->SQ_PGM_START_FS.u32All); //// // = 0x288A4, // SAME 0x28894 + */ + R600_OUT_BATCH(evergreen->SQ_PGM_RESOURCES_FS.u32All); //// // = 0x288A8, // DIFF 0x288A4 + + //3 + EVERGREEN_OUT_BATCH_REGSEQ(EG_SQ_PGM_RESOURCES_2_HS, 1); + R600_OUT_BATCH(evergreen->SQ_PGM_RESOURCES_2_HS.u32All);//// // = 0x288C0, // + + //3 + EVERGREEN_OUT_BATCH_REGSEQ(EG_SQ_PGM_RESOURCES_2_LS, 1); + R600_OUT_BATCH(evergreen->SQ_PGM_RESOURCES_2_LS.u32All); //// // = 0x288D8, // + + //3 + EVERGREEN_OUT_BATCH_REGSEQ(EG_SQ_LDS_ALLOC_PS, 1); + R600_OUT_BATCH(evergreen->SQ_LDS_ALLOC_PS.u32All); //// // = 0x288EC, // + + //8 + EVERGREEN_OUT_BATCH_REGSEQ(EG_SQ_ESGS_RING_ITEMSIZE, 6); + R600_OUT_BATCH(evergreen->SQ_ESGS_RING_ITEMSIZE.u32All); //// // = 0x28900, // SAME 0x288A8 + R600_OUT_BATCH(evergreen->SQ_GSVS_RING_ITEMSIZE.u32All); //// // = 0x28904, // SAME 0x288AC + R600_OUT_BATCH(evergreen->SQ_ESTMP_RING_ITEMSIZE.u32All); //// // = 0x28908, // SAME 0x288B0 + R600_OUT_BATCH(evergreen->SQ_GSTMP_RING_ITEMSIZE.u32All); //// // = 0x2890C, // SAME 0x288B4 + R600_OUT_BATCH(evergreen->SQ_VSTMP_RING_ITEMSIZE.u32All); //// // = 0x28910, // SAME 0x288B8 + R600_OUT_BATCH(evergreen->SQ_PSTMP_RING_ITEMSIZE.u32All); //// // = 0x28914, // SAME 0x288BC + + //3 + EVERGREEN_OUT_BATCH_REGSEQ(EG_SQ_GS_VERT_ITEMSIZE, 1); + R600_OUT_BATCH(evergreen->SQ_GS_VERT_ITEMSIZE.u32All); //// // = 0x2891C, // SAME 0x288C8 + + END_BATCH(); + + COMMIT_BATCH(); + +} +static void evergreenSendSPI(GLcontext *ctx, struct radeon_state_atom *atom) +{ + context_t *context = EVERGREEN_CONTEXT(ctx); + EVERGREEN_CHIP_CONTEXT *evergreen = GET_EVERGREEN_CHIP(context); + BATCH_LOCALS(&context->radeon); + radeon_print(RADEON_STATE, RADEON_VERBOSE, "%s\n", __func__); + + BEGIN_BATCH_NO_AUTOSTATE(59); + + EVERGREEN_OUT_BATCH_REGSEQ(EG_SPI_VS_OUT_ID_0, 10); + R600_OUT_BATCH(evergreen->SPI_VS_OUT_ID_0.u32All); + R600_OUT_BATCH(evergreen->SPI_VS_OUT_ID_1.u32All); + R600_OUT_BATCH(evergreen->SPI_VS_OUT_ID_2.u32All); + R600_OUT_BATCH(evergreen->SPI_VS_OUT_ID_3.u32All); + R600_OUT_BATCH(evergreen->SPI_VS_OUT_ID_4.u32All); + R600_OUT_BATCH(evergreen->SPI_VS_OUT_ID_5.u32All); + R600_OUT_BATCH(evergreen->SPI_VS_OUT_ID_6.u32All); + R600_OUT_BATCH(evergreen->SPI_VS_OUT_ID_7.u32All); + R600_OUT_BATCH(evergreen->SPI_VS_OUT_ID_8.u32All); + R600_OUT_BATCH(evergreen->SPI_VS_OUT_ID_9.u32All); + + EVERGREEN_OUT_BATCH_REGSEQ(EG_SPI_PS_INPUT_CNTL_0, 45); + R600_OUT_BATCH(evergreen->SPI_PS_INPUT_CNTL[0].u32All); + R600_OUT_BATCH(evergreen->SPI_PS_INPUT_CNTL[1].u32All); + R600_OUT_BATCH(evergreen->SPI_PS_INPUT_CNTL[2].u32All); + R600_OUT_BATCH(evergreen->SPI_PS_INPUT_CNTL[3].u32All); + R600_OUT_BATCH(evergreen->SPI_PS_INPUT_CNTL[4].u32All); + R600_OUT_BATCH(evergreen->SPI_PS_INPUT_CNTL[5].u32All); + R600_OUT_BATCH(evergreen->SPI_PS_INPUT_CNTL[6].u32All); + R600_OUT_BATCH(evergreen->SPI_PS_INPUT_CNTL[7].u32All); + R600_OUT_BATCH(evergreen->SPI_PS_INPUT_CNTL[8].u32All); + R600_OUT_BATCH(evergreen->SPI_PS_INPUT_CNTL[9].u32All); + R600_OUT_BATCH(evergreen->SPI_PS_INPUT_CNTL[10].u32All); + R600_OUT_BATCH(evergreen->SPI_PS_INPUT_CNTL[11].u32All); + R600_OUT_BATCH(evergreen->SPI_PS_INPUT_CNTL[12].u32All); + R600_OUT_BATCH(evergreen->SPI_PS_INPUT_CNTL[13].u32All); + R600_OUT_BATCH(evergreen->SPI_PS_INPUT_CNTL[14].u32All); + R600_OUT_BATCH(evergreen->SPI_PS_INPUT_CNTL[15].u32All); + R600_OUT_BATCH(evergreen->SPI_PS_INPUT_CNTL[16].u32All); + R600_OUT_BATCH(evergreen->SPI_PS_INPUT_CNTL[17].u32All); + R600_OUT_BATCH(evergreen->SPI_PS_INPUT_CNTL[18].u32All); + R600_OUT_BATCH(evergreen->SPI_PS_INPUT_CNTL[19].u32All); + R600_OUT_BATCH(evergreen->SPI_PS_INPUT_CNTL[20].u32All); + R600_OUT_BATCH(evergreen->SPI_PS_INPUT_CNTL[21].u32All); + R600_OUT_BATCH(evergreen->SPI_PS_INPUT_CNTL[22].u32All); + R600_OUT_BATCH(evergreen->SPI_PS_INPUT_CNTL[23].u32All); + R600_OUT_BATCH(evergreen->SPI_PS_INPUT_CNTL[24].u32All); + R600_OUT_BATCH(evergreen->SPI_PS_INPUT_CNTL[25].u32All); + R600_OUT_BATCH(evergreen->SPI_PS_INPUT_CNTL[26].u32All); + R600_OUT_BATCH(evergreen->SPI_PS_INPUT_CNTL[27].u32All); + R600_OUT_BATCH(evergreen->SPI_PS_INPUT_CNTL[28].u32All); + R600_OUT_BATCH(evergreen->SPI_PS_INPUT_CNTL[29].u32All); + R600_OUT_BATCH(evergreen->SPI_PS_INPUT_CNTL[30].u32All); + R600_OUT_BATCH(evergreen->SPI_PS_INPUT_CNTL[31].u32All); + R600_OUT_BATCH(evergreen->SPI_VS_OUT_CONFIG.u32All); + R600_OUT_BATCH(evergreen->SPI_THREAD_GROUPING.u32All); + R600_OUT_BATCH(evergreen->SPI_PS_IN_CONTROL_0.u32All); + R600_OUT_BATCH(evergreen->SPI_PS_IN_CONTROL_1.u32All); + R600_OUT_BATCH(evergreen->SPI_INTERP_CONTROL_0.u32All); + R600_OUT_BATCH(evergreen->SPI_INPUT_Z.u32All); + R600_OUT_BATCH(evergreen->SPI_FOG_CNTL.u32All); + R600_OUT_BATCH(evergreen->SPI_BARYC_CNTL.u32All); + R600_OUT_BATCH(evergreen->SPI_PS_IN_CONTROL_2.u32All); + R600_OUT_BATCH(evergreen->SPI_COMPUTE_INPUT_CNTL.u32All); + R600_OUT_BATCH(evergreen->SPI_COMPUTE_NUM_THREAD_X.u32All); + R600_OUT_BATCH(evergreen->SPI_COMPUTE_NUM_THREAD_Y.u32All); + R600_OUT_BATCH(evergreen->SPI_COMPUTE_NUM_THREAD_Z.u32All); + + END_BATCH(); + + COMMIT_BATCH(); +} +static void evergreenSendSX(GLcontext *ctx, struct radeon_state_atom *atom) +{ + context_t *context = EVERGREEN_CONTEXT(ctx); + EVERGREEN_CHIP_CONTEXT *evergreen = GET_EVERGREEN_CHIP(context); + BATCH_LOCALS(&context->radeon); + radeon_print(RADEON_STATE, RADEON_VERBOSE, "%s\n", __func__); + + BEGIN_BATCH_NO_AUTOSTATE(9); + + EVERGREEN_OUT_BATCH_REGVAL(EG_SX_MISC, evergreen->SX_MISC.u32All); + EVERGREEN_OUT_BATCH_REGVAL(EG_SX_ALPHA_TEST_CONTROL, evergreen->SX_ALPHA_TEST_CONTROL.u32All); + EVERGREEN_OUT_BATCH_REGVAL(EG_SX_ALPHA_REF, evergreen->SX_ALPHA_REF.u32All); + + END_BATCH(); + + COMMIT_BATCH(); +} + +static void evergreenSetDepthTarget(context_t *context) +{ + EVERGREEN_CHIP_CONTEXT *evergreen = GET_EVERGREEN_CHIP(context); + struct radeon_renderbuffer *rrb; + unsigned int nPitchInPixel; + + rrb = radeon_get_depthbuffer(&context->radeon); + if (!rrb) + { + return; + } + + EVERGREEN_STATECHANGE(context, db); + + evergreen->DB_DEPTH_SIZE.u32All = 0; + + SETfield(evergreen->DB_DEPTH_SIZE.u32All, (nPitchInPixel/8)-1, + EG_DB_DEPTH_SIZE__PITCH_TILE_MAX_shift, + EG_DB_DEPTH_SIZE__PITCH_TILE_MAX_mask); + SETfield(evergreen->DB_DEPTH_SIZE.u32All, (context->radeon.radeonScreen->driScreen->fbHeight/8)-1, + EG_DB_DEPTH_SIZE__HEIGHT_TILE_MAX_shift, + EG_DB_DEPTH_SIZE__HEIGHT_TILE_MAX_mask); + evergreen->DB_DEPTH_SLICE.u32All = ( (nPitchInPixel * context->radeon.radeonScreen->driScreen->fbHeight)/64 )-1; + + if(4 == rrb->cpp) + { + SETfield(evergreen->DB_Z_INFO.u32All, DEPTH_8_24, + EG_DB_Z_INFO__FORMAT_shift, + EG_DB_Z_INFO__FORMAT_mask); + } + else + { + SETfield(evergreen->DB_Z_INFO.u32All, DEPTH_16, + EG_DB_Z_INFO__FORMAT_shift, + EG_DB_Z_INFO__FORMAT_mask); + } + SETfield(evergreen->DB_Z_INFO.u32All, ARRAY_1D_TILED_THIN1, + EG_DB_Z_INFO__ARRAY_MODE_shift, + EG_DB_Z_INFO__ARRAY_MODE_mask); +} + +static void evergreenSendDB(GLcontext *ctx, struct radeon_state_atom *atom) +{ + context_t *context = EVERGREEN_CONTEXT(ctx); + EVERGREEN_CHIP_CONTEXT *evergreen = GET_EVERGREEN_CHIP(context); + struct radeon_renderbuffer *rrb; + BATCH_LOCALS(&context->radeon); + radeon_print(RADEON_STATE, RADEON_VERBOSE, "%s\n", __func__); + + evergreenSetDepthTarget(context); + + //8 + BEGIN_BATCH_NO_AUTOSTATE(7); + EVERGREEN_OUT_BATCH_REGSEQ(EG_DB_RENDER_CONTROL, 5); + R600_OUT_BATCH(evergreen->DB_RENDER_CONTROL.u32All); + R600_OUT_BATCH(evergreen->DB_COUNT_CONTROL.u32All); + R600_OUT_BATCH(evergreen->DB_DEPTH_VIEW.u32All); + R600_OUT_BATCH(evergreen->DB_RENDER_OVERRIDE.u32All); + R600_OUT_BATCH(evergreen->DB_RENDER_OVERRIDE2.u32All); + /* + R600_OUT_BATCH(evergreen->DB_HTILE_DATA_BASE.u32All); + */ + END_BATCH(); + + //4 + BEGIN_BATCH_NO_AUTOSTATE(4); + EVERGREEN_OUT_BATCH_REGSEQ(EG_DB_STENCIL_CLEAR, 2); + R600_OUT_BATCH(evergreen->DB_STENCIL_CLEAR.u32All); + R600_OUT_BATCH(evergreen->DB_DEPTH_CLEAR.u32All); + END_BATCH(); + + //4 + BEGIN_BATCH_NO_AUTOSTATE(4); + EVERGREEN_OUT_BATCH_REGSEQ(EG_DB_DEPTH_SIZE, 2); + R600_OUT_BATCH(evergreen->DB_DEPTH_SIZE.u32All); + R600_OUT_BATCH(evergreen->DB_DEPTH_SLICE.u32All); + END_BATCH(); + + //3 + BEGIN_BATCH_NO_AUTOSTATE(3); + EVERGREEN_OUT_BATCH_REGVAL(EG_DB_DEPTH_CONTROL, evergreen->DB_DEPTH_CONTROL.u32All); + END_BATCH(); + + //3 + BEGIN_BATCH_NO_AUTOSTATE(3); + EVERGREEN_OUT_BATCH_REGVAL(EG_DB_SHADER_CONTROL, evergreen->DB_SHADER_CONTROL.u32All); + END_BATCH(); + + //5 + BEGIN_BATCH_NO_AUTOSTATE(5); + EVERGREEN_OUT_BATCH_REGSEQ(EG_DB_SRESULTS_COMPARE_STATE0, 3); + R600_OUT_BATCH(evergreen->DB_SRESULTS_COMPARE_STATE0.u32All); + R600_OUT_BATCH(evergreen->DB_SRESULTS_COMPARE_STATE1.u32All); + R600_OUT_BATCH(evergreen->DB_PRELOAD_CONTROL.u32All); + END_BATCH(); + + //3 + BEGIN_BATCH_NO_AUTOSTATE(3); + EVERGREEN_OUT_BATCH_REGVAL(EG_DB_ALPHA_TO_MASK, evergreen->DB_ALPHA_TO_MASK.u32All); + END_BATCH(); + + rrb = radeon_get_depthbuffer(&context->radeon); + if( (rrb != NULL) && (rrb->bo != NULL) ) + { + //5 + BEGIN_BATCH_NO_AUTOSTATE(3 + 2); + EVERGREEN_OUT_BATCH_REGVAL(EG_DB_Z_INFO, evergreen->DB_Z_INFO.u32All); + R600_OUT_BATCH_RELOC(evergreen->DB_Z_INFO.u32All, + rrb->bo, + evergreen->DB_Z_INFO.u32All, + 0, RADEON_GEM_DOMAIN_VRAM, 0); + END_BATCH(); + + //5 + if((evergreen->DB_DEPTH_CONTROL.u32All & Z_ENABLE_bit) > 0) + { + BEGIN_BATCH_NO_AUTOSTATE(3 + 2); + EVERGREEN_OUT_BATCH_REGVAL(EG_DB_Z_READ_BASE, evergreen->DB_Z_READ_BASE.u32All); + R600_OUT_BATCH_RELOC(evergreen->DB_Z_READ_BASE.u32All, + rrb->bo, + evergreen->DB_Z_READ_BASE.u32All, + 0, RADEON_GEM_DOMAIN_VRAM, 0); + END_BATCH(); + } + //5 + if((evergreen->DB_DEPTH_CONTROL.u32All & Z_WRITE_ENABLE_bit) > 0) + { + BEGIN_BATCH_NO_AUTOSTATE(3 + 2); + EVERGREEN_OUT_BATCH_REGVAL(EG_DB_Z_WRITE_BASE, evergreen->DB_Z_READ_BASE.u32All); + R600_OUT_BATCH_RELOC(evergreen->DB_Z_WRITE_BASE.u32All, + rrb->bo, + evergreen->DB_Z_WRITE_BASE.u32All, + 0, RADEON_GEM_DOMAIN_VRAM, 0); + END_BATCH(); + } + } +/* + if (ctx->DrawBuffer) + { + rrb = radeon_get_renderbuffer(ctx->DrawBuffer, BUFFER_STENCIL); + + if((rrb != NULL) && (rrb->bo != NULL)) + { + //5 + BEGIN_BATCH_NO_AUTOSTATE(3 + 2); + EVERGREEN_OUT_BATCH_REGVAL(EG_DB_STENCIL_INFO, evergreen->DB_Z_INFO.u32All); + R600_OUT_BATCH_RELOC(evergreen->DB_STENCIL_INFO.u32All, + rrb->bo, + evergreen->DB_STENCIL_INFO.u32All, + 0, RADEON_GEM_DOMAIN_VRAM, 0); + END_BATCH(); + + //10 + if((evergreen->DB_DEPTH_CONTROL.u32All & STENCIL_ENABLE_bit) > 0) + { + BEGIN_BATCH_NO_AUTOSTATE(3 + 2); + EVERGREEN_OUT_BATCH_REGVAL(EG_DB_STENCIL_READ_BASE, evergreen->DB_STENCIL_READ_BASE.u32All); + R600_OUT_BATCH_RELOC(evergreen->DB_STENCIL_READ_BASE.u32All, + rrb->bo, + evergreen->DB_STENCIL_READ_BASE.u32All, + 0, RADEON_GEM_DOMAIN_VRAM, 0); + END_BATCH(); + + BEGIN_BATCH_NO_AUTOSTATE(3 + 2); + EVERGREEN_OUT_BATCH_REGVAL(EG_DB_STENCIL_WRITE_BASE, evergreen->DB_STENCIL_WRITE_BASE.u32All); + R600_OUT_BATCH_RELOC(evergreen->DB_STENCIL_WRITE_BASE.u32All, + rrb->bo, + evergreen->DB_STENCIL_WRITE_BASE.u32All, + 0, RADEON_GEM_DOMAIN_VRAM, 0); + END_BATCH(); + } + } + } +*/ + COMMIT_BATCH(); +} + +static void evergreenSetRenderTarget(context_t *context, int id) +{ + EVERGREEN_CHIP_CONTEXT *evergreen = GET_EVERGREEN_CHIP(context); + + struct radeon_renderbuffer *rrb; + unsigned int nPitchInPixel; + + rrb = radeon_get_colorbuffer(&context->radeon); + if (!rrb || !rrb->bo) { + return; + } + + EVERGREEN_STATECHANGE(context, cb); + + /* addr */ + evergreen->render_target[id].CB_COLOR0_BASE.u32All = context->radeon.state.color.draw_offset / 256; + + /* pitch */ + nPitchInPixel = rrb->pitch/rrb->cpp; + + SETfield(evergreen->render_target[id].CB_COLOR0_PITCH.u32All, (nPitchInPixel/8)-1, + EG_CB_COLOR0_PITCH__TILE_MAX_shift, + EG_CB_COLOR0_PITCH__TILE_MAX_mask); + + /* skice */ + SETfield(evergreen->render_target[id].CB_COLOR0_SLICE.u32All, + //( (nPitchInPixel * context->radeon.radeonScreen->driScreen->fbHeight)/64 )-1, + ( (nPitchInPixel * 240)/64 )-1, + EG_CB_COLOR0_SLICE__TILE_MAX_shift, + EG_CB_COLOR0_SLICE__TILE_MAX_mask); + + /* CB_COLOR0_ATTRIB */ /* TODO : for z clear, this should be set to 0 */ + SETbit(evergreen->render_target[id].CB_COLOR0_ATTRIB.u32All, + EG_CB_COLOR0_ATTRIB__NON_DISP_TILING_ORDER_bit); + + /* CB_COLOR0_INFO */ + SETfield(evergreen->render_target[id].CB_COLOR0_INFO.u32All, + ENDIAN_NONE, + EG_CB_COLOR0_INFO__ENDIAN_shift, + EG_CB_COLOR0_INFO__ENDIAN_mask); + SETfield(evergreen->render_target[id].CB_COLOR0_INFO.u32All, + ARRAY_LINEAR_GENERAL, + EG_CB_COLOR0_INFO__ARRAY_MODE_shift, + EG_CB_COLOR0_INFO__ARRAY_MODE_mask); + if(4 == rrb->cpp) + { + SETfield(evergreen->render_target[id].CB_COLOR0_INFO.u32All, + COLOR_8_8_8_8, + EG_CB_COLOR0_INFO__FORMAT_shift, + EG_CB_COLOR0_INFO__FORMAT_mask); + SETfield(evergreen->render_target[id].CB_COLOR0_INFO.u32All, + SWAP_ALT, //SWAP_STD + EG_CB_COLOR0_INFO__COMP_SWAP_shift, + EG_CB_COLOR0_INFO__COMP_SWAP_mask); + } + else + { + SETfield(evergreen->render_target[id].CB_COLOR0_INFO.u32All, + COLOR_5_6_5, + EG_CB_COLOR0_INFO__FORMAT_shift, + EG_CB_COLOR0_INFO__FORMAT_mask); + SETfield(evergreen->render_target[id].CB_COLOR0_INFO.u32All, + SWAP_ALT_REV, + EG_CB_COLOR0_INFO__COMP_SWAP_shift, + EG_CB_COLOR0_INFO__COMP_SWAP_mask); + } + SETfield(evergreen->render_target[id].CB_COLOR0_INFO.u32All, + 1, + EG_CB_COLOR0_INFO__SOURCE_FORMAT_shift, + EG_CB_COLOR0_INFO__SOURCE_FORMAT_mask); + SETbit(evergreen->render_target[id].CB_COLOR0_INFO.u32All, + EG_CB_COLOR0_INFO__BLEND_CLAMP_bit); + SETfield(evergreen->render_target[id].CB_COLOR0_INFO.u32All, + NUMBER_UNORM, + EG_CB_COLOR0_INFO__NUMBER_TYPE_shift, + EG_CB_COLOR0_INFO__NUMBER_TYPE_mask); + + evergreen->render_target[id].CB_COLOR0_VIEW.u32All = 0; + evergreen->render_target[id].CB_COLOR0_CMASK.u32All = 0; + evergreen->render_target[id].CB_COLOR0_FMASK.u32All = 0; + evergreen->render_target[id].CB_COLOR0_FMASK_SLICE.u32All = 0; + + evergreen->render_target[id].enabled = GL_TRUE; +} + +static void evergreenSendCB(GLcontext *ctx, struct radeon_state_atom *atom) +{ + context_t *context = EVERGREEN_CONTEXT(ctx); + EVERGREEN_CHIP_CONTEXT *evergreen = GET_EVERGREEN_CHIP(context); + struct radeon_renderbuffer *rrb; + BATCH_LOCALS(&context->radeon); + int id = 0; + radeon_print(RADEON_STATE, RADEON_VERBOSE, "%s\n", __func__); + + rrb = radeon_get_colorbuffer(&context->radeon); + if (!rrb || !rrb->bo) { + return; + } + + evergreenSetRenderTarget(context, 0); + + if (!evergreen->render_target[id].enabled) + return; + + BEGIN_BATCH_NO_AUTOSTATE(3 + 2); + EVERGREEN_OUT_BATCH_REGSEQ(EG_CB_COLOR0_BASE + (4 * id), 1); + R600_OUT_BATCH(evergreen->render_target[id].CB_COLOR0_BASE.u32All); + R600_OUT_BATCH_RELOC(evergreen->render_target[id].CB_COLOR0_BASE.u32All, + rrb->bo, + evergreen->render_target[id].CB_COLOR0_BASE.u32All, + 0, RADEON_GEM_DOMAIN_VRAM, 0); + END_BATCH(); + + BEGIN_BATCH_NO_AUTOSTATE(3 + 2); + EVERGREEN_OUT_BATCH_REGVAL(EG_CB_COLOR0_INFO, evergreen->render_target[id].CB_COLOR0_INFO.u32All); + R600_OUT_BATCH_RELOC(evergreen->render_target[id].CB_COLOR0_INFO.u32All, + rrb->bo, + evergreen->render_target[id].CB_COLOR0_INFO.u32All, + 0, RADEON_GEM_DOMAIN_VRAM, 0); + END_BATCH(); + + BEGIN_BATCH_NO_AUTOSTATE(5); + EVERGREEN_OUT_BATCH_REGSEQ(EG_CB_COLOR0_PITCH, 3); + R600_OUT_BATCH(evergreen->render_target[id].CB_COLOR0_PITCH.u32All); + R600_OUT_BATCH(evergreen->render_target[id].CB_COLOR0_SLICE.u32All); + R600_OUT_BATCH(evergreen->render_target[id].CB_COLOR0_VIEW.u32All); + END_BATCH(); + + BEGIN_BATCH_NO_AUTOSTATE(4); + EVERGREEN_OUT_BATCH_REGSEQ(EG_CB_COLOR0_ATTRIB, 2); + R600_OUT_BATCH(evergreen->render_target[id].CB_COLOR0_ATTRIB.u32All); + R600_OUT_BATCH(evergreen->render_target[id].CB_COLOR0_DIM.u32All); + /* + R600_OUT_BATCH(evergreen->render_target[id].CB_COLOR0_CMASK.u32All); + R600_OUT_BATCH(evergreen->render_target[id].CB_COLOR0_CMASK_SLICE.u32All); + R600_OUT_BATCH(evergreen->render_target[id].CB_COLOR0_FMASK.u32All); + R600_OUT_BATCH(evergreen->render_target[id].CB_COLOR0_FMASK_SLICE.u32All); + */ + END_BATCH(); + + BEGIN_BATCH_NO_AUTOSTATE(4); + EVERGREEN_OUT_BATCH_REGSEQ(EG_CB_TARGET_MASK, 2); + R600_OUT_BATCH(evergreen->CB_TARGET_MASK.u32All); + R600_OUT_BATCH(evergreen->CB_SHADER_MASK.u32All); + END_BATCH(); + + BEGIN_BATCH_NO_AUTOSTATE(5); + EVERGREEN_OUT_BATCH_REGSEQ(EG_CB_BLEND_RED, 3); + R600_OUT_BATCH(evergreen->CB_BLEND_RED.u32All); + R600_OUT_BATCH(evergreen->CB_BLEND_GREEN.u32All); + R600_OUT_BATCH(evergreen->CB_BLEND_BLUE.u32All); + END_BATCH(); + + BEGIN_BATCH_NO_AUTOSTATE(9); + EVERGREEN_OUT_BATCH_REGVAL(EG_CB_BLEND_ALPHA, evergreen->CB_BLEND_ALPHA.u32All); + EVERGREEN_OUT_BATCH_REGVAL(EG_CB_BLEND0_CONTROL, evergreen->CB_BLEND0_CONTROL.u32All); + EVERGREEN_OUT_BATCH_REGVAL(EG_CB_COLOR_CONTROL, evergreen->CB_COLOR_CONTROL.u32All); + END_BATCH(); + + COMMIT_BATCH(); +} +static void evergreenSendCP(GLcontext *ctx, struct radeon_state_atom *atom) +{ + context_t *context = EVERGREEN_CONTEXT(ctx); + BATCH_LOCALS(&context->radeon); + radeon_print(RADEON_STATE, RADEON_VERBOSE, "%s\n", __func__); + + //first to send + //r700Start3D + BEGIN_BATCH_NO_AUTOSTATE(3); + R600_OUT_BATCH(CP_PACKET3(R600_IT_CONTEXT_CONTROL, 1)); //IT_CONTEXT_CONTROL 0x28 + R600_OUT_BATCH(0x80000000); + R600_OUT_BATCH(0x80000000); + END_BATCH(); + + COMMIT_BATCH(); +} +static void evergreenSendVGT(GLcontext *ctx, struct radeon_state_atom *atom) +{ + context_t *context = EVERGREEN_CONTEXT(ctx); + EVERGREEN_CHIP_CONTEXT *evergreen = GET_EVERGREEN_CHIP(context); + BATCH_LOCALS(&context->radeon); + radeon_print(RADEON_STATE, RADEON_VERBOSE, "%s\n", __func__); + +/* moved to draw: + VGT_DRAW_INITIATOR + VGT_INDEX_TYPE + VGT_PRIMITIVE_TYPE +*/ + BEGIN_BATCH_NO_AUTOSTATE(5); + EVERGREEN_OUT_BATCH_REGSEQ(EG_VGT_MAX_VTX_INDX, 3); + R600_OUT_BATCH(evergreen->VGT_MAX_VTX_INDX.u32All); + R600_OUT_BATCH(evergreen->VGT_MIN_VTX_INDX.u32All); + R600_OUT_BATCH(evergreen->VGT_INDX_OFFSET.u32All); + END_BATCH(); + + BEGIN_BATCH_NO_AUTOSTATE(6); + EVERGREEN_OUT_BATCH_REGVAL(EG_VGT_OUTPUT_PATH_CNTL, evergreen->VGT_OUTPUT_PATH_CNTL.u32All); + + EVERGREEN_OUT_BATCH_REGVAL(EG_VGT_GS_MODE, evergreen->VGT_GS_MODE.u32All); + END_BATCH(); + + BEGIN_BATCH_NO_AUTOSTATE(3); + EVERGREEN_OUT_BATCH_REGSEQ(EG_VGT_PRIMITIVEID_EN, 1); + R600_OUT_BATCH(evergreen->VGT_PRIMITIVEID_EN.u32All); + END_BATCH(); + + BEGIN_BATCH_NO_AUTOSTATE(4); + EVERGREEN_OUT_BATCH_REGSEQ(EG_VGT_INSTANCE_STEP_RATE_0, 2); + R600_OUT_BATCH(evergreen->VGT_INSTANCE_STEP_RATE_0.u32All); + R600_OUT_BATCH(evergreen->VGT_INSTANCE_STEP_RATE_1.u32All); + END_BATCH(); + + BEGIN_BATCH_NO_AUTOSTATE(4); + EVERGREEN_OUT_BATCH_REGSEQ(EG_VGT_REUSE_OFF, 2); + R600_OUT_BATCH(evergreen->VGT_REUSE_OFF.u32All); + R600_OUT_BATCH(evergreen->VGT_VTX_CNT_EN.u32All); + END_BATCH(); + + BEGIN_BATCH_NO_AUTOSTATE(3); + EVERGREEN_OUT_BATCH_REGVAL(EG_VGT_SHADER_STAGES_EN, evergreen->VGT_SHADER_STAGES_EN.u32All); + END_BATCH(); + + BEGIN_BATCH_NO_AUTOSTATE(4); + EVERGREEN_OUT_BATCH_REGSEQ(EG_VGT_STRMOUT_CONFIG, 2); + R600_OUT_BATCH(evergreen->VGT_STRMOUT_CONFIG.u32All); + R600_OUT_BATCH(evergreen->VGT_STRMOUT_BUFFER_CONFIG.u32All); + END_BATCH(); + + COMMIT_BATCH(); +} + +static void evergreenSendTIMESTAMP(GLcontext *ctx, struct radeon_state_atom *atom) +{ + context_t *context = EVERGREEN_CONTEXT(ctx); + BATCH_LOCALS(&context->radeon); + radeon_print(RADEON_STATE, RADEON_VERBOSE, "%s\n", __func__); +} + +void evergreenInitAtoms(context_t *context) +{ + radeon_print(RADEON_STATE, RADEON_NORMAL, "%s %p\n", __func__, context); + context->radeon.hw.max_state_size = 10 + 5 + 14 + 3; /* start 3d, idle, cb/db flush, 3 for time stamp */ + + /* Setup the atom linked list */ + make_empty_list(&context->radeon.hw.atomlist); + context->radeon.hw.atomlist.name = "atom-list"; + + EVERGREEN_ALLOC_STATE(init, always, 19, evergreenSendSQConfig); + + //make sure send first + EVERGREEN_ALLOC_STATE(cp, always, 3, evergreenSendCP); + + EVERGREEN_ALLOC_STATE(vtx, evergreen_vtx, (6 + (VERT_ATTRIB_MAX * 12)), evergreenSendVTX); + EVERGREEN_ALLOC_STATE(pa, always, 124, evergreenSendPA); + EVERGREEN_ALLOC_STATE(tp, always, 0, evergreenSendTP); + EVERGREEN_ALLOC_STATE(sq, always, 86, evergreenSendSQ); /* 85 */ + EVERGREEN_ALLOC_STATE(vs, always, 16, evergreenSendVSresource); + EVERGREEN_ALLOC_STATE(spi, always, 59, evergreenSendSPI); + EVERGREEN_ALLOC_STATE(sx, always, 9, evergreenSendSX); + EVERGREEN_ALLOC_STATE(tx, evergreen_tx, (R700_TEXTURE_NUMBERUNITS * (21+5) + 6), evergreenSendTexState); /* 21 for resource, 5 for sampler */ + EVERGREEN_ALLOC_STATE(db, always, 60, evergreenSendDB); + EVERGREEN_ALLOC_STATE(cb, always, 35, evergreenSendCB); + EVERGREEN_ALLOC_STATE(vgt, always, 29, evergreenSendVGT); + EVERGREEN_ALLOC_STATE(timestamp, always, 3, evergreenSendTIMESTAMP); + + //evergreen_init_query_stateobj(&context->radeon, 6 * 2); + + context->radeon.hw.is_dirty = GL_TRUE; + context->radeon.hw.all_dirty = GL_TRUE; +} diff --git a/src/mesa/drivers/dri/r600/evergreen_chip.h b/src/mesa/drivers/dri/r600/evergreen_chip.h new file mode 100644 index 00000000000..2ea5cd213c7 --- /dev/null +++ b/src/mesa/drivers/dri/r600/evergreen_chip.h @@ -0,0 +1,516 @@ +/* + * Copyright (C) 2008-2010 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +/* + * Authors: + * Richard Li <[email protected]>, <[email protected]> + */ + +#ifndef _EVERGREEN_CHIP_H_ +#define _EVERGREEN_CHIP_H_ + +#include "r700_chip.h" + +#define EVERGREEN_MAX_DX9_CONSTS 256 +#define EVERGREEN_MAX_SHADER_EXPORTS 32 +#define EVERGREEN_MAX_VIEWPORTS 16 + +typedef struct _EVERGREEN_VIEWPORT_STATE +{ + union UINT_FLOAT PA_SC_VPORT_SCISSOR_0_TL; ////0,1 // = 0x28250, // DIFF + union UINT_FLOAT PA_SC_VPORT_SCISSOR_0_BR; ////0,1 // = 0x28254, // DIFF + union UINT_FLOAT PA_SC_VPORT_ZMIN_0; ////0 // = 0x282D0, // SAME + union UINT_FLOAT PA_SC_VPORT_ZMAX_0; ////0 // = 0x282D4, // SAME + union UINT_FLOAT PA_CL_VPORT_XSCALE; //// // = 0x2843C, // SAME + union UINT_FLOAT PA_CL_VPORT_XOFFSET; //// // = 0x28440, // SAME + union UINT_FLOAT PA_CL_VPORT_YSCALE; //// // = 0x28444, // SAME + union UINT_FLOAT PA_CL_VPORT_YOFFSET; //// // = 0x28448, // SAME + union UINT_FLOAT PA_CL_VPORT_ZSCALE; //// // = 0x2844C, // SAME + union UINT_FLOAT PA_CL_VPORT_ZOFFSET; //// // = 0x28450, // SAME + GLboolean enabled; + GLboolean dirty; +} EVERGREEN_VIEWPORT_STATE; + +#define EVERGREEN_MAX_UCP 6 + +typedef struct _EVERGREEN_UCP_STATE +{ + union UINT_FLOAT PA_CL_UCP_0_X; // = 0x285BC, // SAME 0x28E20 + union UINT_FLOAT PA_CL_UCP_0_Y; // = 0x285C0, // SAME 0x28E24 + union UINT_FLOAT PA_CL_UCP_0_Z; // = 0x285C4, // SAME 0x28E28 + union UINT_FLOAT PA_CL_UCP_0_W; // = 0x285C8, // SAME 0x28E2C + GLboolean enabled; + GLboolean dirty; +} EVERGREEN_UCP_STATE; + +#define EVERGREEN_MAX_RENDER_TARGETS 12 + +typedef struct _EVERGREEN_RENDER_TARGET_STATE +{ + union UINT_FLOAT CB_COLOR0_BASE; ////0 // = 0x28C60, // SAME 0x28040 + union UINT_FLOAT CB_COLOR0_PITCH; ////0 // = 0x28C64, // + union UINT_FLOAT CB_COLOR0_SLICE; ////0 // = 0x28C68, // + union UINT_FLOAT CB_COLOR0_VIEW; ////0 // = 0x28C6C, // SAME 0x28080 + union UINT_FLOAT CB_COLOR0_INFO; ////0,1,2,3,4,5,6,78,9,10,11 // = 0x28C70, // DIFF 0x280A0 + union UINT_FLOAT CB_COLOR0_ATTRIB; ////0 // = 0x28C74, // + union UINT_FLOAT CB_COLOR0_DIM; // = 0x28C78, // + union UINT_FLOAT CB_COLOR0_CMASK; ////0 // = 0x28C7C, // + union UINT_FLOAT CB_COLOR0_CMASK_SLICE; ////0 // = 0x28C80, // + union UINT_FLOAT CB_COLOR0_FMASK; ////0 // = 0x28C84, // + union UINT_FLOAT CB_COLOR0_FMASK_SLICE; ////0 // = 0x28C88, // + union UINT_FLOAT CB_COLOR0_CLEAR_WORD0; // = 0x28C8C, // + union UINT_FLOAT CB_COLOR0_CLEAR_WORD1; // = 0x28C90, // + union UINT_FLOAT CB_COLOR0_CLEAR_WORD2; // = 0x28C94, // + union UINT_FLOAT CB_COLOR0_CLEAR_WORD3; // = 0x28C98, // + GLboolean enabled; + GLboolean dirty; +} EVERGREEN_RENDER_TARGET_STATE; + +typedef struct _EVERGREEN_CONFIG +{ + union UINT_FLOAT SPI_CONFIG_CNTL; // = 0x9100, // DIFF + union UINT_FLOAT SPI_CONFIG_CNTL_1; // = 0x913C, // DIFF + union UINT_FLOAT CP_PERFMON_CNTL; // = 0x87FC, // SAME + union UINT_FLOAT SQ_MS_FIFO_SIZES; // = 0x8CF0, // SAME + + union UINT_FLOAT SQ_CONFIG; // = 0x8C00, // DIFF + union UINT_FLOAT SQ_GPR_RESOURCE_MGMT_1; // = 0x8C04, // SAME + union UINT_FLOAT SQ_GPR_RESOURCE_MGMT_2; // = 0x8C08, // SAME + union UINT_FLOAT SQ_GPR_RESOURCE_MGMT_3; // = 0x8C0C, // + + union UINT_FLOAT SQ_THREAD_RESOURCE_MGMT; // = 0x8C18, // SAME 0x8C0C + union UINT_FLOAT SQ_THREAD_RESOURCE_MGMT_2; // = 0x8C1C, // + union UINT_FLOAT SQ_STACK_RESOURCE_MGMT_1; // = 0x8C20, // SAME 0x8C10 + union UINT_FLOAT SQ_STACK_RESOURCE_MGMT_2; // = 0x8C24, // SAME 0x8C14 + union UINT_FLOAT SQ_STACK_RESOURCE_MGMT_3; // = 0x8C28, // + + union UINT_FLOAT SQ_DYN_GPR_CNTL_PS_FLUSH_REQ; // = 0x8D8C, // DIFF + union UINT_FLOAT SQ_LDS_RESOURCE_MGMT; // = 0x8E2C, // + union UINT_FLOAT VGT_CACHE_INVALIDATION; // = 0x88C4, // DIFF + union UINT_FLOAT VGT_GS_VERTEX_REUSE; // = 0x88D4, // SAME + union UINT_FLOAT PA_SC_FORCE_EOV_MAX_CNTS; // = 0x8B24, // SAME + union UINT_FLOAT PA_SC_LINE_STIPPLE_STATE; // = 0x8B10, // SAME + union UINT_FLOAT PA_CL_ENHANCE; // = 0x8A14, // SAME +} EVERGREEN_CONFIG; + +typedef struct _EVERGREEN_PS_RES +{ + union UINT_FLOAT SQ_PGM_START_PS; //// // = 0x28840, // SAME + GLboolean dirty; + + union UINT_FLOAT SQ_ALU_CONST_CACHE_PS_0; // = 0x28940, // SAME + union UINT_FLOAT SQ_ALU_CONST_CACHE_PS_1; // = 0x28944, // SAME + union UINT_FLOAT SQ_ALU_CONST_CACHE_PS_2; // = 0x28948, // SAME + union UINT_FLOAT SQ_ALU_CONST_CACHE_PS_3; // = 0x2894C, // SAME + union UINT_FLOAT SQ_ALU_CONST_CACHE_PS_4; // = 0x28950, // SAME + union UINT_FLOAT SQ_ALU_CONST_CACHE_PS_5; // = 0x28954, // SAME + union UINT_FLOAT SQ_ALU_CONST_CACHE_PS_6; // = 0x28958, // SAME + union UINT_FLOAT SQ_ALU_CONST_CACHE_PS_7; // = 0x2895C, // SAME + union UINT_FLOAT SQ_ALU_CONST_CACHE_PS_8; // = 0x28960, // SAME + union UINT_FLOAT SQ_ALU_CONST_CACHE_PS_9; // = 0x28964, // SAME + union UINT_FLOAT SQ_ALU_CONST_CACHE_PS_10; // = 0x28968, // SAME + union UINT_FLOAT SQ_ALU_CONST_CACHE_PS_11; // = 0x2896C, // SAME + union UINT_FLOAT SQ_ALU_CONST_CACHE_PS_12; // = 0x28970, // SAME + union UINT_FLOAT SQ_ALU_CONST_CACHE_PS_13; // = 0x28974, // SAME + union UINT_FLOAT SQ_ALU_CONST_CACHE_PS_14; // = 0x28978, // SAME + union UINT_FLOAT SQ_ALU_CONST_CACHE_PS_15; // = 0x2897C, // SAME + + int num_consts; + union UINT_FLOAT consts[EVERGREEN_MAX_DX9_CONSTS][4]; +} EVERGREEN_PS_RES; + +typedef struct _EVERGREEN_VS_RES +{ + union UINT_FLOAT SQ_PGM_START_VS; //// // = 0x2885C, // SAME 0x28858 + union UINT_FLOAT SQ_ALU_CONST_BUFFER_SIZE_VS_0; //// // = 0x28180, //? + union UINT_FLOAT SQ_ALU_CONST_CACHE_VS_0; //// // = 0x28980, // SAME + + union UINT_FLOAT SQ_ALU_CONST_CACHE_VS_1; // = 0x28984, // SAME + union UINT_FLOAT SQ_ALU_CONST_CACHE_VS_2; // = 0x28988, // SAME + union UINT_FLOAT SQ_ALU_CONST_CACHE_VS_3; // = 0x2898C, // SAME + union UINT_FLOAT SQ_ALU_CONST_CACHE_VS_4; // = 0x28990, // SAME + union UINT_FLOAT SQ_ALU_CONST_CACHE_VS_5; // = 0x28994, // SAME + union UINT_FLOAT SQ_ALU_CONST_CACHE_VS_6; // = 0x28998, // SAME + union UINT_FLOAT SQ_ALU_CONST_CACHE_VS_7; // = 0x2899C, // SAME + union UINT_FLOAT SQ_ALU_CONST_CACHE_VS_8; // = 0x289A0, // SAME + union UINT_FLOAT SQ_ALU_CONST_CACHE_VS_9; // = 0x289A4, // SAME + union UINT_FLOAT SQ_ALU_CONST_CACHE_VS_10; // = 0x289A8, // SAME + union UINT_FLOAT SQ_ALU_CONST_CACHE_VS_11; // = 0x289AC, // SAME + union UINT_FLOAT SQ_ALU_CONST_CACHE_VS_12; // = 0x289B0, // SAME + union UINT_FLOAT SQ_ALU_CONST_CACHE_VS_13; // = 0x289B4, // SAME + union UINT_FLOAT SQ_ALU_CONST_CACHE_VS_14; // = 0x289B8, // SAME + union UINT_FLOAT SQ_ALU_CONST_CACHE_VS_15; // = 0x289BC, // SAME + + GLboolean dirty; + int num_consts; + union UINT_FLOAT consts[EVERGREEN_MAX_DX9_CONSTS][4]; +} EVERGREEN_VS_RES; + +typedef struct _EVERGREEN_CHIP_CONTEXT +{ +/* Registers from PA block: */ + union UINT_FLOAT PA_SC_SCREEN_SCISSOR_TL; //// // = 0x28030, // DIFF + union UINT_FLOAT PA_SC_SCREEN_SCISSOR_BR; //// // = 0x28034, // DIFF + union UINT_FLOAT PA_SC_WINDOW_OFFSET; //// // = 0x28200, // DIFF + union UINT_FLOAT PA_SC_WINDOW_SCISSOR_TL; //// // = 0x28204, // DIFF + union UINT_FLOAT PA_SC_WINDOW_SCISSOR_BR; //// // = 0x28208, // DIFF + union UINT_FLOAT PA_SC_CLIPRECT_RULE; //// // = 0x2820C, // SAME + union UINT_FLOAT PA_SC_CLIPRECT_0_TL; //// // = 0x28210, // DIFF + union UINT_FLOAT PA_SC_CLIPRECT_0_BR; //// // = 0x28214, // DIFF + union UINT_FLOAT PA_SC_CLIPRECT_1_TL; //// // = 0x28218, // DIFF + union UINT_FLOAT PA_SC_CLIPRECT_1_BR; //// // = 0x2821C, // DIFF + union UINT_FLOAT PA_SC_CLIPRECT_2_TL; //// // = 0x28220, // DIFF + union UINT_FLOAT PA_SC_CLIPRECT_2_BR; //// // = 0x28224, // DIFF + union UINT_FLOAT PA_SC_CLIPRECT_3_TL; //// // = 0x28228, // DIFF + union UINT_FLOAT PA_SC_CLIPRECT_3_BR; //// // = 0x2822C, // DIFF + union UINT_FLOAT PA_SC_EDGERULE; // = 0x28230, // SAME + union UINT_FLOAT PA_SU_HARDWARE_SCREEN_OFFSET; // = 0x28234, // + union UINT_FLOAT PA_SC_GENERIC_SCISSOR_TL; //// // = 0x28240, // DIFF + union UINT_FLOAT PA_SC_GENERIC_SCISSOR_BR; //// // = 0x28244, // DIFF + + EVERGREEN_VIEWPORT_STATE viewport[EVERGREEN_MAX_VIEWPORTS]; + EVERGREEN_UCP_STATE ucp[EVERGREEN_MAX_UCP]; + + union UINT_FLOAT PA_CL_POINT_X_RAD; // = 0x287D4, // SAME 0x28E10 + union UINT_FLOAT PA_CL_POINT_Y_RAD; // = 0x287D8, // SAME 0x28E14 + union UINT_FLOAT PA_CL_POINT_SIZE; // = 0x287DC, // SAME 0x28E18 + union UINT_FLOAT PA_CL_POINT_CULL_RAD; // = 0x287E0, // SAME 0x28E1C + union UINT_FLOAT PA_CL_CLIP_CNTL; //// // = 0x28810, // SAME + union UINT_FLOAT PA_SU_SC_MODE_CNTL; //// // = 0x28814, // SAME + union UINT_FLOAT PA_CL_VTE_CNTL; //// // = 0x28818, // SAME + union UINT_FLOAT PA_CL_VS_OUT_CNTL; //// // = 0x2881C, // SAME + union UINT_FLOAT PA_CL_NANINF_CNTL; //// // = 0x28820, // SAME + union UINT_FLOAT PA_SU_LINE_STIPPLE_CNTL; // = 0x28824, // + union UINT_FLOAT PA_SU_LINE_STIPPLE_SCALE; // = 0x28828, // + union UINT_FLOAT PA_SU_PRIM_FILTER_CNTL; // = 0x2882C, // + union UINT_FLOAT PA_SU_POINT_SIZE; //// // = 0x28A00, // SAME + union UINT_FLOAT PA_SU_POINT_MINMAX; //// // = 0x28A04, // SAME + union UINT_FLOAT PA_SU_LINE_CNTL; //// // = 0x28A08, // SAME + union UINT_FLOAT PA_SC_LINE_STIPPLE; // = 0x28A0C, // SAME + union UINT_FLOAT PA_SC_MODE_CNTL_0; //// // = 0x28A48, // + union UINT_FLOAT PA_SC_MODE_CNTL_1; //// // = 0x28A4C, // + union UINT_FLOAT PA_SU_POLY_OFFSET_DB_FMT_CNTL; //// // = 0x28B78, // SAME 0x28DF8 + union UINT_FLOAT PA_SU_POLY_OFFSET_CLAMP; //// // = 0x28B7C, // SAME 0x28DFC + union UINT_FLOAT PA_SU_POLY_OFFSET_FRONT_SCALE;//// // = 0x28B80, // SAME 0x28E00 + union UINT_FLOAT PA_SU_POLY_OFFSET_FRONT_OFFSET; //// // = 0x28B84, // SAME 0x28E04 + union UINT_FLOAT PA_SU_POLY_OFFSET_BACK_SCALE; //// // = 0x28B88, // SAME 0x28E08 + union UINT_FLOAT PA_SU_POLY_OFFSET_BACK_OFFSET; //// // = 0x28B8C, // SAME 0x28E0C + union UINT_FLOAT PA_SC_LINE_CNTL; //// // = 0x28C00, // DIFF + union UINT_FLOAT PA_SC_AA_CONFIG; //// // = 0x28C04, // SAME + union UINT_FLOAT PA_SU_VTX_CNTL; //// // = 0x28C08, // SAME + union UINT_FLOAT PA_CL_GB_VERT_CLIP_ADJ; //// // = 0x28C0C, // SAME + union UINT_FLOAT PA_CL_GB_VERT_DISC_ADJ; //// // = 0x28C10, // SAME + union UINT_FLOAT PA_CL_GB_HORZ_CLIP_ADJ; //// // = 0x28C14, // SAME + union UINT_FLOAT PA_CL_GB_HORZ_DISC_ADJ; //// // = 0x28C18, // SAME + union UINT_FLOAT PA_SC_AA_SAMPLE_LOCS_0; //// // = 0x28C1C, // + union UINT_FLOAT PA_SC_AA_SAMPLE_LOCS_1; //// // = 0x28C20, // + union UINT_FLOAT PA_SC_AA_SAMPLE_LOCS_2; //// // = 0x28C24, // + union UINT_FLOAT PA_SC_AA_SAMPLE_LOCS_3; //// // = 0x28C28, // + union UINT_FLOAT PA_SC_AA_SAMPLE_LOCS_4; //// // = 0x28C2C, // + union UINT_FLOAT PA_SC_AA_SAMPLE_LOCS_5; //// // = 0x28C30, // + union UINT_FLOAT PA_SC_AA_SAMPLE_LOCS_6; //// // = 0x28C34, // + union UINT_FLOAT PA_SC_AA_SAMPLE_LOCS_7; //// // = 0x28C38, // + union UINT_FLOAT PA_SC_AA_MASK; //// // = 0x28C3C, // SAME 0x28C48 + +/* Registers from VGT block: */ + union UINT_FLOAT VGT_INDEX_TYPE; // = 0x895C, // SAME + union UINT_FLOAT VGT_PRIMITIVE_TYPE; // = 0x8958, // SAME + union UINT_FLOAT VGT_MAX_VTX_INDX; //// // = 0x28400, // SAME + union UINT_FLOAT VGT_MIN_VTX_INDX; //// // = 0x28404, // SAME + union UINT_FLOAT VGT_INDX_OFFSET; //// // = 0x28408, // SAME + union UINT_FLOAT VGT_MULTI_PRIM_IB_RESET_INDX; // = 0x2840C, // SAME + + union UINT_FLOAT VGT_DRAW_INITIATOR; // = 0x287F0, // SAME + union UINT_FLOAT VGT_IMMED_DATA; // = 0x287F4, // SAME + + union UINT_FLOAT VGT_OUTPUT_PATH_CNTL; //// // = 0x28A10, // DIFF + union UINT_FLOAT VGT_HOS_CNTL; // = 0x28A14, // SAME + union UINT_FLOAT VGT_HOS_MAX_TESS_LEVEL; // = 0x28A18, // SAME + union UINT_FLOAT VGT_HOS_MIN_TESS_LEVEL; // = 0x28A1C, // SAME + union UINT_FLOAT VGT_HOS_REUSE_DEPTH; // = 0x28A20, // SAME + union UINT_FLOAT VGT_GROUP_PRIM_TYPE; // = 0x28A24, // SAME + union UINT_FLOAT VGT_GROUP_FIRST_DECR; // = 0x28A28, // SAME + union UINT_FLOAT VGT_GROUP_DECR; // = 0x28A2C, // SAME + union UINT_FLOAT VGT_GROUP_VECT_0_CNTL; // = 0x28A30, // SAME + union UINT_FLOAT VGT_GROUP_VECT_1_CNTL; // = 0x28A34, // SAME + union UINT_FLOAT VGT_GROUP_VECT_0_FMT_CNTL; // = 0x28A38, // SAME + union UINT_FLOAT VGT_GROUP_VECT_1_FMT_CNTL; // = 0x28A3C, // SAME + union UINT_FLOAT VGT_GS_MODE; //// // = 0x28A40, // DIFF + + union UINT_FLOAT VGT_PRIMITIVEID_EN; //// // = 0x28A84, // SAME + union UINT_FLOAT VGT_DMA_NUM_INSTANCES; //// // = 0x28A88, // SAME + union UINT_FLOAT VGT_EVENT_INITIATOR; // = 0x28A90, // SAME + union UINT_FLOAT VGT_MULTI_PRIM_IB_RESET_EN; // = 0x28A94, // SAME + union UINT_FLOAT VGT_INSTANCE_STEP_RATE_0; //// // = 0x28AA0, // SAME + union UINT_FLOAT VGT_INSTANCE_STEP_RATE_1; //// // = 0x28AA4, // SAME + union UINT_FLOAT VGT_REUSE_OFF; //// // = 0x28AB4, // SAME + union UINT_FLOAT VGT_VTX_CNT_EN; //// // = 0x28AB8, // SAME + + union UINT_FLOAT VGT_SHADER_STAGES_EN; //// // = 0x28B54, // + + union UINT_FLOAT VGT_STRMOUT_CONFIG; //// // = 0x28B94, // + union UINT_FLOAT VGT_STRMOUT_BUFFER_CONFIG; //// // = 0x28B98, // + union UINT_FLOAT VGT_VERTEX_REUSE_BLOCK_CNTL;//// // = 0x28C58, // SAME + union UINT_FLOAT VGT_OUT_DEALLOC_CNTL; //// // = 0x28C5C, // SAME + +/* Registers from SQ block: */ + union UINT_FLOAT SQ_VTX_SEMANTIC_0; //// // = 0x28380, // SAME + union UINT_FLOAT SQ_VTX_SEMANTIC_1; //// // = 0x28384, // SAME + union UINT_FLOAT SQ_VTX_SEMANTIC_2; //// // = 0x28388, // SAME + union UINT_FLOAT SQ_VTX_SEMANTIC_3; //// // = 0x2838C, // SAME + union UINT_FLOAT SQ_VTX_SEMANTIC_4; //// // = 0x28390, // SAME + union UINT_FLOAT SQ_VTX_SEMANTIC_5; //// // = 0x28394, // SAME + union UINT_FLOAT SQ_VTX_SEMANTIC_6; //// // = 0x28398, // SAME + union UINT_FLOAT SQ_VTX_SEMANTIC_7; //// // = 0x2839C, // SAME + union UINT_FLOAT SQ_VTX_SEMANTIC_8; //// // = 0x283A0, // SAME + union UINT_FLOAT SQ_VTX_SEMANTIC_9; //// // = 0x283A4, // SAME + union UINT_FLOAT SQ_VTX_SEMANTIC_10; //// // = 0x283A8, // SAME + union UINT_FLOAT SQ_VTX_SEMANTIC_11; //// // = 0x283AC, // SAME + union UINT_FLOAT SQ_VTX_SEMANTIC_12; //// // = 0x283B0, // SAME + union UINT_FLOAT SQ_VTX_SEMANTIC_13; //// // = 0x283B4, // SAME + union UINT_FLOAT SQ_VTX_SEMANTIC_14; //// // = 0x283B8, // SAME + union UINT_FLOAT SQ_VTX_SEMANTIC_15; //// // = 0x283BC, // SAME + union UINT_FLOAT SQ_VTX_SEMANTIC_16; //// // = 0x283C0, // SAME + union UINT_FLOAT SQ_VTX_SEMANTIC_17; //// // = 0x283C4, // SAME + union UINT_FLOAT SQ_VTX_SEMANTIC_18; //// // = 0x283C8, // SAME + union UINT_FLOAT SQ_VTX_SEMANTIC_19; //// // = 0x283CC, // SAME + union UINT_FLOAT SQ_VTX_SEMANTIC_20; //// // = 0x283D0, // SAME + union UINT_FLOAT SQ_VTX_SEMANTIC_21; //// // = 0x283D4, // SAME + union UINT_FLOAT SQ_VTX_SEMANTIC_22; //// // = 0x283D8, // SAME + union UINT_FLOAT SQ_VTX_SEMANTIC_23; //// // = 0x283DC, // SAME + union UINT_FLOAT SQ_VTX_SEMANTIC_24; //// // = 0x283E0, // SAME + union UINT_FLOAT SQ_VTX_SEMANTIC_25; //// // = 0x283E4, // SAME + union UINT_FLOAT SQ_VTX_SEMANTIC_26; //// // = 0x283E8, // SAME + union UINT_FLOAT SQ_VTX_SEMANTIC_27; //// // = 0x283EC, // SAME + union UINT_FLOAT SQ_VTX_SEMANTIC_28; //// // = 0x283F0, // SAME + union UINT_FLOAT SQ_VTX_SEMANTIC_29; //// // = 0x283F4, // SAME + union UINT_FLOAT SQ_VTX_SEMANTIC_30; //// // = 0x283F8, // SAME + union UINT_FLOAT SQ_VTX_SEMANTIC_31; //// // = 0x283FC, // SAME + union UINT_FLOAT SQ_DYN_GPR_RESOURCE_LIMIT_1;//// // = 0x28838, // + + union UINT_FLOAT SQ_PGM_RESOURCES_PS; //// // = 0x28844, // DIFF 0x28850 + union UINT_FLOAT SQ_PGM_RESOURCES_2_PS; //// // = 0x28848, // + union UINT_FLOAT SQ_PGM_EXPORTS_PS; //// // = 0x2884C, // SAME 0x28854 + + union UINT_FLOAT SQ_PGM_RESOURCES_VS;//// // = 0x28860, // DIFF 0x28868 + union UINT_FLOAT SQ_PGM_RESOURCES_2_VS; //// // = 0x28864, // + union UINT_FLOAT SQ_PGM_START_GS; //// // = 0x28874, // SAME 0x2886C + union UINT_FLOAT SQ_PGM_RESOURCES_GS; //// // = 0x28878, // DIFF 0x2887C + union UINT_FLOAT SQ_PGM_RESOURCES_2_GS; //// // = 0x2887C, // + union UINT_FLOAT SQ_PGM_START_ES; //// // = 0x2888C, // SAME 0x28880 + union UINT_FLOAT SQ_PGM_RESOURCES_ES; //// // = 0x28890, // DIFF + union UINT_FLOAT SQ_PGM_RESOURCES_2_ES; //// // = 0x28894, // + union UINT_FLOAT SQ_PGM_START_FS; //// // = 0x288A4, // SAME 0x28894 + union UINT_FLOAT SQ_PGM_RESOURCES_FS; //// // = 0x288A8, // DIFF 0x288A4 + union UINT_FLOAT SQ_PGM_START_HS; // = 0x288B8, // + union UINT_FLOAT SQ_PGM_RESOURCES_HS; // = 0x288BC, // + union UINT_FLOAT SQ_PGM_RESOURCES_2_HS;//// // = 0x288C0, // + union UINT_FLOAT SQ_PGM_START_LS; // = 0x288D0, // + union UINT_FLOAT SQ_PGM_RESOURCES_LS; // = 0x288D4, // + union UINT_FLOAT SQ_PGM_RESOURCES_2_LS; //// // = 0x288D8, // + union UINT_FLOAT SQ_LDS_ALLOC_PS; //// // = 0x288EC, // + union UINT_FLOAT SQ_ESGS_RING_ITEMSIZE; //// // = 0x28900, // SAME 0x288A8 + union UINT_FLOAT SQ_GSVS_RING_ITEMSIZE; //// // = 0x28904, // SAME 0x288AC + union UINT_FLOAT SQ_ESTMP_RING_ITEMSIZE; //// // = 0x28908, // SAME 0x288B0 + union UINT_FLOAT SQ_GSTMP_RING_ITEMSIZE; //// // = 0x2890C, // SAME 0x288B4 + union UINT_FLOAT SQ_VSTMP_RING_ITEMSIZE; //// // = 0x28910, // SAME 0x288B8 + union UINT_FLOAT SQ_PSTMP_RING_ITEMSIZE; //// // = 0x28914, // SAME 0x288BC + union UINT_FLOAT SQ_GS_VERT_ITEMSIZE; //// // = 0x2891C, // SAME 0x288C8 + union UINT_FLOAT SQ_GS_VERT_ITEMSIZE_1; // = 0x28920, // + union UINT_FLOAT SQ_GS_VERT_ITEMSIZE_2; // = 0x28924, // + union UINT_FLOAT SQ_GS_VERT_ITEMSIZE_3; // = 0x28928, // + + union UINT_FLOAT SQ_ALU_CONST_CACHE_GS_0; // = 0x289C0, // SAME + union UINT_FLOAT SQ_ALU_CONST_CACHE_GS_1; // = 0x289C4, // SAME + union UINT_FLOAT SQ_ALU_CONST_CACHE_GS_2; // = 0x289C8, // SAME + union UINT_FLOAT SQ_ALU_CONST_CACHE_GS_3; // = 0x289CC, // SAME + union UINT_FLOAT SQ_ALU_CONST_CACHE_GS_4; // = 0x289D0, // SAME + union UINT_FLOAT SQ_ALU_CONST_CACHE_GS_5; // = 0x289D4, // SAME + union UINT_FLOAT SQ_ALU_CONST_CACHE_GS_6; // = 0x289D8, // SAME + union UINT_FLOAT SQ_ALU_CONST_CACHE_GS_7; // = 0x289DC, // SAME + union UINT_FLOAT SQ_ALU_CONST_CACHE_GS_8; // = 0x289E0, // SAME + union UINT_FLOAT SQ_ALU_CONST_CACHE_GS_9; // = 0x289E4, // SAME + union UINT_FLOAT SQ_ALU_CONST_CACHE_GS_10; // = 0x289E8, // SAME + union UINT_FLOAT SQ_ALU_CONST_CACHE_GS_11; // = 0x289EC, // SAME + union UINT_FLOAT SQ_ALU_CONST_CACHE_GS_12; // = 0x289F0, // SAME + union UINT_FLOAT SQ_ALU_CONST_CACHE_GS_13; // = 0x289F4, // SAME + union UINT_FLOAT SQ_ALU_CONST_CACHE_GS_14; // = 0x289F8, // SAME + union UINT_FLOAT SQ_ALU_CONST_CACHE_GS_15; // = 0x289FC, // SAME + union UINT_FLOAT SQ_ALU_CONST_CACHE_HS_0; // = 0x28F00, // + union UINT_FLOAT SQ_ALU_CONST_CACHE_HS_1; // = 0x28F04, // + union UINT_FLOAT SQ_ALU_CONST_CACHE_HS_2; // = 0x28F08, // + union UINT_FLOAT SQ_ALU_CONST_CACHE_HS_3; // = 0x28F0C, // + union UINT_FLOAT SQ_ALU_CONST_CACHE_HS_4; // = 0x28F10, // + union UINT_FLOAT SQ_ALU_CONST_CACHE_HS_5; // = 0x28F14, // + union UINT_FLOAT SQ_ALU_CONST_CACHE_HS_6; // = 0x28F18, // + union UINT_FLOAT SQ_ALU_CONST_CACHE_HS_7; // = 0x28F1C, // + union UINT_FLOAT SQ_ALU_CONST_CACHE_HS_8; // = 0x28F20, // + union UINT_FLOAT SQ_ALU_CONST_CACHE_HS_9; // = 0x28F24, // + union UINT_FLOAT SQ_ALU_CONST_CACHE_HS_10; // = 0x28F28, // + union UINT_FLOAT SQ_ALU_CONST_CACHE_HS_11; // = 0x28F2C, // + union UINT_FLOAT SQ_ALU_CONST_CACHE_HS_12; // = 0x28F30, // + union UINT_FLOAT SQ_ALU_CONST_CACHE_HS_13; // = 0x28F34, // + union UINT_FLOAT SQ_ALU_CONST_CACHE_HS_14; // = 0x28F38, // + union UINT_FLOAT SQ_ALU_CONST_CACHE_HS_15; // = 0x28F3C, // + union UINT_FLOAT SQ_ALU_CONST_CACHE_LS_0; // = 0x28F40, // + union UINT_FLOAT SQ_ALU_CONST_CACHE_LS_1; // = 0x28F44, // + union UINT_FLOAT SQ_ALU_CONST_CACHE_LS_2; // = 0x28F48, // + union UINT_FLOAT SQ_ALU_CONST_CACHE_LS_3; // = 0x28F4C, // + union UINT_FLOAT SQ_ALU_CONST_CACHE_LS_4; // = 0x28F50, // + union UINT_FLOAT SQ_ALU_CONST_CACHE_LS_5; // = 0x28F54, // + union UINT_FLOAT SQ_ALU_CONST_CACHE_LS_6; // = 0x28F58, // + union UINT_FLOAT SQ_ALU_CONST_CACHE_LS_7; // = 0x28F5C, // + union UINT_FLOAT SQ_ALU_CONST_CACHE_LS_8; // = 0x28F60, // + union UINT_FLOAT SQ_ALU_CONST_CACHE_LS_9; // = 0x28F64, // + union UINT_FLOAT SQ_ALU_CONST_CACHE_LS_10; // = 0x28F68, // + union UINT_FLOAT SQ_ALU_CONST_CACHE_LS_11; // = 0x28F6C, // + union UINT_FLOAT SQ_ALU_CONST_CACHE_LS_12; // = 0x28F70, // + union UINT_FLOAT SQ_ALU_CONST_CACHE_LS_13; // = 0x28F74, // + union UINT_FLOAT SQ_ALU_CONST_CACHE_LS_14; // = 0x28F78, // + union UINT_FLOAT SQ_ALU_CONST_CACHE_LS_15; // = 0x28F7C, // + union UINT_FLOAT SQ_ALU_CONST_BUFFER_SIZE_HS_0; // = 0x28F80, // + union UINT_FLOAT SQ_ALU_CONST_BUFFER_SIZE_HS_1; // = 0x28F84, // + union UINT_FLOAT SQ_ALU_CONST_BUFFER_SIZE_HS_2; // = 0x28F88, // + union UINT_FLOAT SQ_ALU_CONST_BUFFER_SIZE_HS_3; // = 0x28F8C, // + union UINT_FLOAT SQ_ALU_CONST_BUFFER_SIZE_HS_4; // = 0x28F90, // + union UINT_FLOAT SQ_ALU_CONST_BUFFER_SIZE_HS_5; // = 0x28F94, // + union UINT_FLOAT SQ_ALU_CONST_BUFFER_SIZE_HS_6; // = 0x28F98, // + union UINT_FLOAT SQ_ALU_CONST_BUFFER_SIZE_HS_7; // = 0x28F9C, // + union UINT_FLOAT SQ_ALU_CONST_BUFFER_SIZE_HS_8; // = 0x28FA0, // + union UINT_FLOAT SQ_ALU_CONST_BUFFER_SIZE_HS_9; // = 0x28FA4, // + union UINT_FLOAT SQ_ALU_CONST_BUFFER_SIZE_HS_10; // = 0x28FA8, // + union UINT_FLOAT SQ_ALU_CONST_BUFFER_SIZE_HS_11; // = 0x28FAC, // + union UINT_FLOAT SQ_ALU_CONST_BUFFER_SIZE_HS_12; // = 0x28FB0, // + union UINT_FLOAT SQ_ALU_CONST_BUFFER_SIZE_HS_13; // = 0x28FB4, // + union UINT_FLOAT SQ_ALU_CONST_BUFFER_SIZE_HS_14; // = 0x28FB8, // + union UINT_FLOAT SQ_ALU_CONST_BUFFER_SIZE_HS_15; // = 0x28FBC, // + union UINT_FLOAT SQ_ALU_CONST_BUFFER_SIZE_LS_0; // = 0x28FC0, // + union UINT_FLOAT SQ_ALU_CONST_BUFFER_SIZE_LS_1; // = 0x28FC4, // + union UINT_FLOAT SQ_ALU_CONST_BUFFER_SIZE_LS_2; // = 0x28FC8, // + union UINT_FLOAT SQ_ALU_CONST_BUFFER_SIZE_LS_3; // = 0x28FCC, // + union UINT_FLOAT SQ_ALU_CONST_BUFFER_SIZE_LS_4; // = 0x28FD0, // + union UINT_FLOAT SQ_ALU_CONST_BUFFER_SIZE_LS_5; // = 0x28FD4, // + union UINT_FLOAT SQ_ALU_CONST_BUFFER_SIZE_LS_6; // = 0x28FD8, // + union UINT_FLOAT SQ_ALU_CONST_BUFFER_SIZE_LS_7; // = 0x28FDC, // + union UINT_FLOAT SQ_ALU_CONST_BUFFER_SIZE_LS_8; // = 0x28FE0, // + union UINT_FLOAT SQ_ALU_CONST_BUFFER_SIZE_LS_9; // = 0x28FE4, // + union UINT_FLOAT SQ_ALU_CONST_BUFFER_SIZE_LS_10; // = 0x28FE8, // + union UINT_FLOAT SQ_ALU_CONST_BUFFER_SIZE_LS_11; // = 0x28FEC, // + union UINT_FLOAT SQ_ALU_CONST_BUFFER_SIZE_LS_12; // = 0x28FF0, // + union UINT_FLOAT SQ_ALU_CONST_BUFFER_SIZE_LS_13; // = 0x28FF4, // + union UINT_FLOAT SQ_ALU_CONST_BUFFER_SIZE_LS_14; // = 0x28FF8, // + union UINT_FLOAT SQ_ALU_CONST_BUFFER_SIZE_LS_15; // = 0x28FFC, // + + EVERGREEN_PS_RES ps; + EVERGREEN_VS_RES vs; + +/* Registers from SPI block: */ + union UINT_FLOAT SPI_VS_OUT_ID_0; //// // = 0x2861C, // SAME 0x28614 + union UINT_FLOAT SPI_VS_OUT_ID_1; //// // = 0x28620, // SAME 0x28618 + union UINT_FLOAT SPI_VS_OUT_ID_2; //// // = 0x28624, // SAME 0x2861C + union UINT_FLOAT SPI_VS_OUT_ID_3; //// // = 0x28628, // SAME 0x28620 + union UINT_FLOAT SPI_VS_OUT_ID_4; //// // = 0x2862C, // SAME 0x28624 + union UINT_FLOAT SPI_VS_OUT_ID_5; //// // = 0x28630, // SAME 0x28628 + union UINT_FLOAT SPI_VS_OUT_ID_6; //// // = 0x28634, // SAME 0x2862C + union UINT_FLOAT SPI_VS_OUT_ID_7; //// // = 0x28638, // SAME 0x28630 + union UINT_FLOAT SPI_VS_OUT_ID_8; //// // = 0x2863C, // SAME 0x28634 + union UINT_FLOAT SPI_VS_OUT_ID_9; //// // = 0x28640, // SAME 0x28638 + union UINT_FLOAT SPI_PS_INPUT_CNTL[32]; //// // = 0x28644, // SAME + + union UINT_FLOAT SPI_VS_OUT_CONFIG; //// // = 0x286C4, // SAME + union UINT_FLOAT SPI_THREAD_GROUPING; //// // = 0x286C8, // DIFF + union UINT_FLOAT SPI_PS_IN_CONTROL_0; //// // = 0x286CC, // SAME + union UINT_FLOAT SPI_PS_IN_CONTROL_1; //// // = 0x286D0, // SAME + union UINT_FLOAT SPI_INTERP_CONTROL_0; //// // = 0x286D4, // SAME + union UINT_FLOAT SPI_INPUT_Z; //// // = 0x286D8, // SAME + union UINT_FLOAT SPI_FOG_CNTL; //// // = 0x286DC, // SAME + union UINT_FLOAT SPI_BARYC_CNTL; //// // = 0x286E0, // + union UINT_FLOAT SPI_PS_IN_CONTROL_2; //// // = 0x286E4, // + union UINT_FLOAT SPI_COMPUTE_INPUT_CNTL; // = 0x286E8, // + union UINT_FLOAT SPI_COMPUTE_NUM_THREAD_X; // = 0x286EC, // + union UINT_FLOAT SPI_COMPUTE_NUM_THREAD_Y; // = 0x286F0, // + union UINT_FLOAT SPI_COMPUTE_NUM_THREAD_Z; // = 0x286F4, // + +/* Registers from SX block: */ + union UINT_FLOAT SX_MISC; // = 0x28350, // SAME + union UINT_FLOAT SX_SURFACE_SYNC; // = 0x28354, // DIFF + union UINT_FLOAT SX_ALPHA_TEST_CONTROL; //// // = 0x28410, // SAME + union UINT_FLOAT SX_ALPHA_REF; // = 0x28438, // SAME + +/* Registers from DB block: */ + union UINT_FLOAT DB_RENDER_CONTROL; //// // = 0x28000, // DIFF 0x28D0C + union UINT_FLOAT DB_COUNT_CONTROL; //// // = 0x28004, // + union UINT_FLOAT DB_DEPTH_VIEW; //// // = 0x28008, // DIFF 0x28004 + union UINT_FLOAT DB_RENDER_OVERRIDE; //// // = 0x2800C, // DIFF 0x28D10 + union UINT_FLOAT DB_RENDER_OVERRIDE2; //// // = 0x28010, // + union UINT_FLOAT DB_HTILE_DATA_BASE; //// // = 0x28014, // SAME + union UINT_FLOAT DB_STENCIL_CLEAR; //// // = 0x28028, // SAME + union UINT_FLOAT DB_DEPTH_CLEAR; //// // = 0x2802C, // SAME + union UINT_FLOAT DB_Z_INFO; //// // = 0x28040, // + union UINT_FLOAT DB_STENCIL_INFO; //// // = 0x28044, // + union UINT_FLOAT DB_Z_READ_BASE; //// // = 0x28048, // + union UINT_FLOAT DB_STENCIL_READ_BASE;//// // = 0x2804C, // + union UINT_FLOAT DB_Z_WRITE_BASE; //// // = 0x28050, // + union UINT_FLOAT DB_STENCIL_WRITE_BASE; //// // = 0x28054, // + union UINT_FLOAT DB_DEPTH_SIZE; //// // = 0x28058, // DIFF 0x28000 + union UINT_FLOAT DB_DEPTH_SLICE; //// // = 0x2805C, // + union UINT_FLOAT DB_STENCILREFMASK; // = 0x28430, // SAME + union UINT_FLOAT DB_STENCILREFMASK_BF; // = 0x28434, // SAME + union UINT_FLOAT DB_DEPTH_CONTROL; //// // = 0x28800, // SAME + union UINT_FLOAT DB_SHADER_CONTROL;//// // = 0x2880C, // DIFF + union UINT_FLOAT DB_HTILE_SURFACE; //// // = 0x28ABC, // SAME 0x28D24 + union UINT_FLOAT DB_SRESULTS_COMPARE_STATE0; //// // = 0x28AC0, // SAME 0x28D28 + union UINT_FLOAT DB_SRESULTS_COMPARE_STATE1; //// // = 0x28AC4, // SAME 0x28D2C + union UINT_FLOAT DB_PRELOAD_CONTROL; //// // = 0x28AC8, // SAME 0x28D30 + union UINT_FLOAT DB_ALPHA_TO_MASK; //// // = 0x28B70, // SAME 0x28D44 + +/* Registers from CB block: */ + union UINT_FLOAT CB_TARGET_MASK; //// // = 0x28238, // SAME + union UINT_FLOAT CB_SHADER_MASK; //// // = 0x2823C, // SAME + union UINT_FLOAT CB_BLEND_RED; //// // = 0x28414, // SAME + union UINT_FLOAT CB_BLEND_GREEN; //// // = 0x28418, // SAME + union UINT_FLOAT CB_BLEND_BLUE; //// // = 0x2841C, // SAME + union UINT_FLOAT CB_BLEND_ALPHA; //// // = 0x28420, // SAME + union UINT_FLOAT CB_BLEND0_CONTROL; //// // = 0x28780, // DIFF + union UINT_FLOAT CB_BLEND1_CONTROL; // = 0x28784, // DIFF + union UINT_FLOAT CB_BLEND2_CONTROL; // = 0x28788, // DIFF + union UINT_FLOAT CB_BLEND3_CONTROL; // = 0x2878C, // DIFF + union UINT_FLOAT CB_BLEND4_CONTROL; // = 0x28790, // DIFF + union UINT_FLOAT CB_BLEND5_CONTROL; // = 0x28794, // DIFF + union UINT_FLOAT CB_BLEND6_CONTROL; // = 0x28798, // DIFF + union UINT_FLOAT CB_BLEND7_CONTROL; // = 0x2879C, // DIFF + union UINT_FLOAT CB_COLOR_CONTROL; //// // = 0x28808, // DIFF + union UINT_FLOAT CB_CLRCMP_CONTROL; //// // = 0x28C40, // SAME 0x28C30 + union UINT_FLOAT CB_CLRCMP_SRC; //// // = 0x28C44, // SAME 0x28C34 + union UINT_FLOAT CB_CLRCMP_DST; //// // = 0x28C48, // SAME 0x28C38 + union UINT_FLOAT CB_CLRCMP_MSK; //// // = 0x28C4C, // SAME 0x28C3C + + EVERGREEN_RENDER_TARGET_STATE render_target[EVERGREEN_MAX_RENDER_TARGETS]; + + radeonTexObj* textures[R700_TEXTURE_NUMBERUNITS]; + + EVERGREEN_CONFIG evergreen_config; + + GLboolean bEnablePerspective; + +} EVERGREEN_CHIP_CONTEXT; + +#endif /* _EVERGREEN_CHIP_H_ */
\ No newline at end of file diff --git a/src/mesa/drivers/dri/r600/evergreen_context.c b/src/mesa/drivers/dri/r600/evergreen_context.c new file mode 100644 index 00000000000..65b5898efa6 --- /dev/null +++ b/src/mesa/drivers/dri/r600/evergreen_context.c @@ -0,0 +1,106 @@ +/* + * Copyright (C) 2008-2010 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +/* + * Authors: + * Richard Li <[email protected]>, <[email protected]> + */ + +#include "main/glheader.h" +#include "main/api_arrayelt.h" +#include "main/context.h" +#include "main/simple_list.h" +#include "main/imports.h" +#include "main/bufferobj.h" +#include "main/texobj.h" + +#include "radeon_common_context.h" +#include "evergreen_context.h" +#include "evergreen_state.h" +#include "r600_blit.h" + +static void evergreen_get_lock(radeonContextPtr rmesa) +{ + drm_radeon_sarea_t *sarea = rmesa->sarea; + + if (sarea->ctx_owner != rmesa->dri.hwContext) { + sarea->ctx_owner = rmesa->dri.hwContext; + if (!rmesa->radeonScreen->kernel_mm) + radeon_bo_legacy_texture_age(rmesa->radeonScreen->bom); + } +} + +static void evergreen_vtbl_emit_cs_header(struct radeon_cs *cs, radeonContextPtr rmesa) +{ + /* please flush pipe do all pending work */ + /* to be enabled */ +} + +static void evergreen_vtbl_pre_emit_atoms(radeonContextPtr radeon) +{ + //TODO apr.01 + //r700Start3D((context_t *)radeon); +} + +static void evergreen_fallback(GLcontext *ctx, GLuint bit, GLboolean mode) +{ + context_t *context = EVERGREEN_CONTEXT(ctx); + if (mode) + context->radeon.Fallback |= bit; + else + context->radeon.Fallback &= ~bit; +} + +static void evergreen_emit_query_finish(radeonContextPtr radeon) +{ + //TODO apr.01 + //context_t *context = (context_t*) radeon; + //BATCH_LOCALS(&context->radeon); + + struct radeon_query_object *query = radeon->query.current; + + //BEGIN_BATCH_NO_AUTOSTATE(4 + 2); + //R600_OUT_BATCH(CP_PACKET3(R600_IT_EVENT_WRITE, 2)); + //R600_OUT_BATCH(ZPASS_DONE); + //R600_OUT_BATCH(query->curr_offset + 8); /* hw writes qwords */ + //R600_OUT_BATCH(0x00000000); + //R600_OUT_BATCH_RELOC(VGT_EVENT_INITIATOR, query->bo, 0, 0, RADEON_GEM_DOMAIN_GTT, 0); + //END_BATCH(); + //assert(query->curr_offset < RADEON_QUERY_PAGE_SIZE); + query->emitted_begin = GL_FALSE; +} + +void evergreen_init_vtbl(radeonContextPtr radeon) +{ + radeon->vtbl.get_lock = evergreen_get_lock; + radeon->vtbl.update_viewport_offset = evergreenUpdateViewportOffset; + radeon->vtbl.emit_cs_header = evergreen_vtbl_emit_cs_header; + radeon->vtbl.swtcl_flush = NULL; + radeon->vtbl.pre_emit_atoms = evergreen_vtbl_pre_emit_atoms; + radeon->vtbl.fallback = evergreen_fallback; + radeon->vtbl.emit_query_finish = evergreen_emit_query_finish; + radeon->vtbl.check_blit = r600_check_blit; + radeon->vtbl.blit = r600_blit; + radeon->vtbl.is_format_renderable = radeonIsFormatRenderable; +} + + + diff --git a/src/mesa/drivers/dri/r600/evergreen_context.h b/src/mesa/drivers/dri/r600/evergreen_context.h new file mode 100644 index 00000000000..4e50999c98f --- /dev/null +++ b/src/mesa/drivers/dri/r600/evergreen_context.h @@ -0,0 +1,38 @@ +/* + * Copyright (C) 2008-2010 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +/* + * Authors: + * Richard Li <[email protected]>, <[email protected]> + */ + +#ifndef _EVERGREEN_CONTEXT_H_ +#define _EVERGREEN_CONTEXT_H_ + +extern void evergreen_init_vtbl(radeonContextPtr radeon); + +#endif //_EVERGREEN_CONTEXT_H_ + + + + + + diff --git a/src/mesa/drivers/dri/r600/evergreen_diff.h b/src/mesa/drivers/dri/r600/evergreen_diff.h new file mode 100644 index 00000000000..c3a5fd0a38a --- /dev/null +++ b/src/mesa/drivers/dri/r600/evergreen_diff.h @@ -0,0 +1,335 @@ +/* + * Copyright (C) 2008-2010 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +/* + * Authors: + * Richard Li <[email protected]>, <[email protected]> + */ + +#ifndef _EVERGREEN_DIFF_H_ +#define _EVERGREEN_DIFF_H_ + +enum { + /* CB_BLEND_CONTROL */ + EG_CB_BLENDX_CONTROL_ENABLE_bit = 1 << 30, + /* PA_SC_SCREEN_SCISSOR_TL */ + EG_PA_SC_SCREEN_SCISSOR_TL__TL_X_mask = 0xffff << 0, + EG_PA_SC_SCREEN_SCISSOR_TL__TL_Y_mask = 0xffff << 16, + /* PA_SC_SCREEN_SCISSOR_BR */ + EG_PA_SC_SCREEN_SCISSOR_BR__BR_X_mask = 0xffff << 0, + EG_PA_SC_SCREEN_SCISSOR_BR__BR_Y_mask = 0xffff << 16, + /* PA_SC_WINDOW_SCISSOR_TL */ + EG_PA_SC_WINDOW_SCISSOR_TL__TL_X_mask = 0x7fff << 0, + EG_PA_SC_WINDOW_SCISSOR_TL__TL_Y_mask = 0x7fff << 16, + /* PA_SC_WINDOW_SCISSOR_BR */ + EG_PA_SC_WINDOW_SCISSOR_BR__BR_X_mask = 0x7fff << 0, + EG_PA_SC_WINDOW_SCISSOR_BR__BR_Y_mask = 0x7fff << 16, + /* PA_SC_CLIPRECT_0_TL */ + EG_PA_SC_CLIPRECT_0_TL__TL_X_mask = 0x7fff << 0, + EG_PA_SC_CLIPRECT_0_TL__TL_Y_mask = 0x7fff << 16, + /* PA_SC_CLIPRECT_0_BR */ + EG_PA_SC_CLIPRECT_0_BR__BR_X_mask = 0x7fff << 0, + EG_PA_SC_CLIPRECT_0_BR__BR_Y_mask = 0x7fff << 16, + /* PA_SC_GENERIC_SCISSOR_TL */ + EG_PA_SC_GENERIC_SCISSOR_TL__TL_X_mask = 0x7fff << 0, + EG_PA_SC_GENERIC_SCISSOR_TL__TL_Y_mask = 0x7fff << 16, + /* PA_SC_GENERIC_SCISSOR_BR */ + EG_PA_SC_GENERIC_SCISSOR_BR__BR_X_mask = 0x7fff << 0, + EG_PA_SC_GENERIC_SCISSOR_BR__BR_Y_mask = 0x7fff << 16, + /* PA_SC_VPORT_SCISSOR_0_TL */ + EG_PA_SC_VPORT_SCISSOR_0_TL__TL_X_mask = 0x7fff << 0, + EG_PA_SC_VPORT_SCISSOR_0_TL__TL_Y_mask = 0x7fff << 16, + /* PA_SC_VPORT_SCISSOR_0_BR */ + EG_PA_SC_VPORT_SCISSOR_0_BR__BR_X_mask = 0x7fff << 0, + EG_PA_SC_VPORT_SCISSOR_0_BR__BR_Y_mask = 0x7fff << 16, + /* PA_SC_WINDOW_OFFSET */ + EG_PA_SC_WINDOW_OFFSET__WINDOW_X_OFFSET_shift = 0, + EG_PA_SC_WINDOW_OFFSET__WINDOW_X_OFFSET_mask = 0xffff << 0, + EG_PA_SC_WINDOW_OFFSET__WINDOW_Y_OFFSET_shift = 16, + EG_PA_SC_WINDOW_OFFSET__WINDOW_Y_OFFSET_mask = 0xffff << 16, + /* SPI_BARYC_CNTL */ + EG_SPI_BARYC_CNTL__PERSP_CENTROID_ENA_shift = 4, + EG_SPI_BARYC_CNTL__PERSP_CENTROID_ENA_mask = 0x3 << 4, + EG_SPI_BARYC_CNTL__LINEAR_CENTROID_ENA_shift = 20, + EG_SPI_BARYC_CNTL__LINEAR_CENTROID_ENA_mask = 0x3 << 20, + /* DB_SHADER_CONTROL */ + EG_DB_SHADER_CONTROL__DUAL_EXPORT_ENABLE_bit = 1 << 9, + + /* DB_Z_INFO */ + EG_DB_Z_INFO__FORMAT_shift = 0, //2; + EG_DB_Z_INFO__FORMAT_mask = 0x3, + //2; + EG_DB_Z_INFO__ARRAY_MODE_shift = 4, //4; + EG_DB_Z_INFO__ARRAY_MODE_mask = 0xf << 4, + EG_DB_Z_INFO__TILE_SPLIT_shift = 8, //3; + EG_DB_Z_INFO__TILE_SPLIT_mask = 0x7 << 8, + //1; + EG_DB_Z_INFO__NUM_BANKS_shift = 12, //2; + EG_DB_Z_INFO__NUM_BANKS_mask = 0x3 << 12, + //2; + EG_DB_Z_INFO__BANK_WIDTH_shift = 16, //2; + EG_DB_Z_INFO__BANK_WIDTH_mask = 0x3 << 16, + //2; + EG_DB_Z_INFO__BANK_HEIGHT_shift = 20, //2; + EG_DB_Z_INFO__BANK_HEIGHT_mask = 0x3 << 20, + + EG_Z_INVALID = 0x00000000, + EG_Z_16 = 0x00000001, + EG_Z_24 = 0x00000002, + EG_Z_32_FLOAT = 0x00000003, + EG_ADDR_SURF_TILE_SPLIT_256B = 0x00000002, + EG_ADDR_SURF_8_BANK = 0x00000002, + EG_ADDR_SURF_BANK_WIDTH_1 = 0x00000000, + EG_ADDR_SURF_BANK_HEIGHT_1 = 0x00000000, + /* DB_STENCIL_INFO */ + EG_DB_STENCIL_INFO__FORMAT_bit = 1, //1; + //7; + EG_DB_STENCIL_INFO__TILE_SPLIT_shift = 8, //3; + EG_DB_STENCIL_INFO__TILE_SPLIT_mask = 0x7 << 8, + + /* DB_DEPTH_SIZE */ + EG_DB_DEPTH_SIZE__PITCH_TILE_MAX_shift = 0, // 11; + EG_DB_DEPTH_SIZE__PITCH_TILE_MAX_mask = 0x7ff, + EG_DB_DEPTH_SIZE__HEIGHT_TILE_MAX_shift = 11, // 11; + EG_DB_DEPTH_SIZE__HEIGHT_TILE_MAX_mask = 0x7ff << 11, + + /* DB_COUNT_CONTROL */ + EG_DB_COUNT_CONTROL__ZPASS_INCREMENT_DISABLE_shift = 0, //1 + EG_DB_COUNT_CONTROL__ZPASS_INCREMENT_DISABLE_bit = 1, + EG_DB_COUNT_CONTROL__PERFECT_ZPASS_COUNTS_shift = 1, //1 + EG_DB_COUNT_CONTROL__PERFECT_ZPASS_COUNTS_bit = 1 << 1, + + /* CB_COLOR_CONTROL */ + //3; + EG_CB_COLOR_CONTROL__DEGAMMA_ENABLE_bit = 1 << 3,//1; + EG_CB_COLOR_CONTROL__MODE_shift = 4, //3; + EG_CB_COLOR_CONTROL__MODE_mask = 0x7 << 4, + //9; + EG_CB_COLOR_CONTROL__ROP3_shift = 16, //8; + EG_CB_COLOR_CONTROL__ROP3_mask = 0xff << 16, + EG_CB_NORMAL = 0x00000001, + + /* CB_COLOR0_INFO */ + EG_CB_COLOR0_INFO__ENDIAN_shift = 0, //2; + EG_CB_COLOR0_INFO__ENDIAN_mask = 0x3, + EG_CB_COLOR0_INFO__FORMAT_shift = 2, //6; + EG_CB_COLOR0_INFO__FORMAT_mask = 0x3f << 2, + EG_CB_COLOR0_INFO__ARRAY_MODE_shift = 8, //4; + EG_CB_COLOR0_INFO__ARRAY_MODE_mask = 0xf << 8, + EG_CB_COLOR0_INFO__NUMBER_TYPE_shift = 12, //3; + EG_CB_COLOR0_INFO__NUMBER_TYPE_mask = 0x7 << 12, + EG_CB_COLOR0_INFO__COMP_SWAP_shift = 15, //2; + EG_CB_COLOR0_INFO__COMP_SWAP_mask = 0x3 << 15, + EG_CB_COLOR0_INFO__FAST_CLEAR_bit = 1 << 17,//1; + EG_CB_COLOR0_INFO__COMPRESSION_bit = 1 << 18,//1; + EG_CB_COLOR0_INFO__BLEND_CLAMP_bit = 1 << 19,//1; + EG_CB_COLOR0_INFO__BLEND_BYPASS_bit = 1 << 20,//1; + EG_CB_COLOR0_INFO__SIMPLE_FLOAT_bit = 1 << 21,//1; + EG_CB_COLOR0_INFO__ROUND_MODE_bit = 1 << 22,//1; + EG_CB_COLOR0_INFO__TILE_COMPACT_bit = 1 << 23,//1; + EG_CB_COLOR0_INFO__SOURCE_FORMAT_shift = 24, //2; + EG_CB_COLOR0_INFO__SOURCE_FORMAT_mask = 0x3 << 24, + EG_CB_COLOR0_INFO__RAT_bit = 1 << 26,//1; + EG_CB_COLOR0_INFO__RESOURCE_TYPE_shift = 27, //3; + EG_CB_COLOR0_INFO__RESOURCE_TYPE_mask = 0x7 << 27, + + /* CB_COLOR0_ATTRIB */ + EG_CB_COLOR0_ATTRIB__NON_DISP_TILING_ORDER_shift = 4, + EG_CB_COLOR0_ATTRIB__NON_DISP_TILING_ORDER_bit = 1 << 4, + + /* SPI_CONFIG_CNTL_1 */ + EG_SPI_CONFIG_CNTL_1__VTX_DONE_DELAY_shift = 0, + EG_SPI_CONFIG_CNTL_1__VTX_DONE_DELAY_mask = 0xf, + /* SQ_MS_FIFO_SIZES */ + EG_SQ_MS_FIFO_SIZES__CACHE_FIFO_SIZE_shift = 0, + EG_SQ_MS_FIFO_SIZES__CACHE_FIFO_SIZE_mask = 0xff, + EG_SQ_MS_FIFO_SIZES__FETCH_FIFO_HIWATER_shift = 8, + EG_SQ_MS_FIFO_SIZES__FETCH_FIFO_HIWATER_mask = 0x1f << 8, + EG_SQ_MS_FIFO_SIZES__DONE_FIFO_HIWATER_shift = 16, + EG_SQ_MS_FIFO_SIZES__DONE_FIFO_HIWATER_mask = 0xff << 16, + EG_SQ_MS_FIFO_SIZES__ALU_UPDATE_FIFO_HIWATER_shift = 24, + EG_SQ_MS_FIFO_SIZES__ALU_UPDATE_FIFO_HIWATER_mask = 0x1f << 24, + /* SQ_CONFIG */ + EG_SQ_CONFIG__VC_ENABLE_bit = 1, + EG_SQ_CONFIG__EXPORT_SRC_C_bit = 1 << 1, + EG_SQ_CONFIG__PS_PRIO_shift = 24, + EG_SQ_CONFIG__PS_PRIO_mask = 0x3 << 24, + EG_SQ_CONFIG__VS_PRIO_shift = 26, + EG_SQ_CONFIG__VS_PRIO_mask = 0x3 << 26, + EG_SQ_CONFIG__GS_PRIO_shift = 28, + EG_SQ_CONFIG__GS_PRIO_mask = 0x3 << 28, + EG_SQ_CONFIG__ES_PRIO_shift = 30, + EG_SQ_CONFIG__ES_PRIO_mask = 0x3 << 30, + /* PA_SC_FORCE_EOV_MAX_CNTS */ + EG_PA_SC_FORCE_EOV_MAX_CNTS__FORCE_EOV_MAX_CLK_CNT_shift = 0, + EG_PA_SC_FORCE_EOV_MAX_CNTS__FORCE_EOV_MAX_CLK_CNT_mask = 0x3fff, + EG_PA_SC_FORCE_EOV_MAX_CNTS__FORCE_EOV_MAX_REZ_CNT_shift = 16, + EG_PA_SC_FORCE_EOV_MAX_CNTS__FORCE_EOV_MAX_REZ_CNT_mask = 0x3fff << 16, + /* VGT_CACHE_INVALIDATION */ + EG_VGT_CACHE_INVALIDATION__CACHE_INVALIDATION_shift = 0, + EG_VGT_CACHE_INVALIDATION__CACHE_INVALIDATION_mask = 0x3, + /* CB_COLOR0_PITCH */ + EG_CB_COLOR0_PITCH__TILE_MAX_shift = 0, + EG_CB_COLOR0_PITCH__TILE_MAX_mask = 0x7ff, + /* CB_COLOR0_SLICE */ + EG_CB_COLOR0_SLICE__TILE_MAX_shift = 0, + EG_CB_COLOR0_SLICE__TILE_MAX_mask = 0x3fffff, + /* SQ_VTX_CONSTANT_WORD3_0 */ + EG_SQ_VTX_CONSTANT_WORD3_0__UNCACHED_shift = 2, + EG_SQ_VTX_CONSTANT_WORD3_0__UNCACHED_bit = 1 << 2, + + EG_SQ_VTX_CONSTANT_WORD3_0__DST_SEL_X_shift = 3, + EG_SQ_VTX_CONSTANT_WORD3_0__DST_SEL_X_mask = 0x7 << 3, + + EG_SQ_VTX_CONSTANT_WORD3_0__DST_SEL_Y_shift = 6, + EG_SQ_VTX_CONSTANT_WORD3_0__DST_SEL_Y_mask = 0x7 << 6, + + EG_SQ_VTX_CONSTANT_WORD3_0__DST_SEL_Z_shift = 9, + EG_SQ_VTX_CONSTANT_WORD3_0__DST_SEL_Z_mask = 0x7 << 9, + + EG_SQ_VTX_CONSTANT_WORD3_0__DST_SEL_W_shift = 12, + EG_SQ_VTX_CONSTANT_WORD3_0__DST_SEL_W_mask = 0x7 << 12, + /* SQ_VTX_CONSTANT_WORD4_0 */ + EG_SQ_VTX_CONSTANT_WORD4_0__NUM_ELEMENTS_shift = 0, + EG_SQ_VTX_CONSTANT_WORD4_0__NUM_ELEMENTS_mask = 0xFFFFFFFF, + /* SQ_VTX_CONSTANT_WORD7_0 */ + EG_SQ_VTX_CONSTANT_WORD7_0__TYPE_shift = 30, + EG_SQ_VTX_CONSTANT_WORD7_0__TYPE_mask = 0x3 << 30, + /* SQ_TEX_SAMPLER_WORD0_0 */ + EG_SQ_TEX_SAMPLER_WORD0_0__CLAMP_X_shift = 0, // 3; + EG_SQ_TEX_SAMPLER_WORD0_0__CLAMP_X_mask = 0x7, + EG_SQ_TEX_SAMPLER_WORD0_0__CLAMP_Y_shift = 3, // 3; + EG_SQ_TEX_SAMPLER_WORD0_0__CLAMP_Y_mask = 0x7 << 3, + EG_SQ_TEX_SAMPLER_WORD0_0__CLAMP_Z_shift = 6, // 3; + EG_SQ_TEX_SAMPLER_WORD0_0__CLAMP_Z_mask = 0x7 << 6, + EG_SQ_TEX_SAMPLER_WORD0_0__XY_MAG_FILTER_shift = 9, // 2; + EG_SQ_TEX_SAMPLER_WORD0_0__XY_MAG_FILTER_mask = 0x3 << 9, + EG_SQ_TEX_SAMPLER_WORD0_0__XY_MIN_FILTER_shift = 11, // 2; + EG_SQ_TEX_SAMPLER_WORD0_0__XY_MIN_FILTER_mask = 0x3 << 11, + EG_SQ_TEX_SAMPLER_WORD0_0__Z_FILTER_shift = 13, // 2; + EG_SQ_TEX_SAMPLER_WORD0_0__Z_FILTER_mask = 0x3 << 13, + EG_SQ_TEX_SAMPLER_WORD0_0__MIP_FILTER_shift = 15, // 2; + EG_SQ_TEX_SAMPLER_WORD0_0__MIP_FILTER_mask = 0x3 << 15, + EG_SQ_TEX_SAMPLER_WORD0_0__MAX_ANISO_RATIO_shift = 17, // 3; + EG_SQ_TEX_SAMPLER_WORD0_0__MAX_ANISO_RATIO_mask = 0x7 << 17, + EG_SQ_TEX_SAMPLER_WORD0_0__BORDER_COLOR_TYPE_shift = 20,//2; + EG_SQ_TEX_SAMPLER_WORD0_0__BORDER_COLOR_TYPE_mask = 0x3 << 20, + EG_SQ_TEX_SAMPLER_WORD0_0__DCF_shift = 22, // 3; + EG_SQ_TEX_SAMPLER_WORD0_0__DCF_mask = 0x7 << 22, + EG_SQ_TEX_SAMPLER_WORD0_0__CHROMA_KEY_shift = 25, // 2; + EG_SQ_TEX_SAMPLER_WORD0_0__CHROMA_KEY_mask = 0x3 << 25, + EG_SQ_TEX_SAMPLER_WORD0_0__ANISO_THRESHOLD_shift = 27, // 3; + EG_SQ_TEX_SAMPLER_WORD0_0__ANISO_THRESHOLD_mask = 0x7 << 27, + EG_SQ_TEX_SAMPLER_WORD0_0__Reserved_shift = 30, // 2 + EG_SQ_TEX_SAMPLER_WORD0_0__Reserved_mask = 0x3 << 30, + /* SQ_TEX_SAMPLER_WORD1_0 */ + EG_SQ_TEX_SAMPLER_WORD1_0__MIN_LOD_shift = 0, // 12; + EG_SQ_TEX_SAMPLER_WORD1_0__MIN_LOD_mask = 0xfff, + EG_SQ_TEX_SAMPLER_WORD1_0__MAX_LOD_shift = 12,// 12; + EG_SQ_TEX_SAMPLER_WORD1_0__MAX_LOD_mask = 0xfff << 12, + /* SQ_TEX_SAMPLER_WORD2_0 */ + EG_SQ_TEX_SAMPLER_WORD2_0__LOD_BIAS_shift = 0, //14; + EG_SQ_TEX_SAMPLER_WORD2_0__LOD_BIAS_mask = 0x3fff, + EG_SQ_TEX_SAMPLER_WORD2_0__LOD_BIAS_SEC_shift = 14,//6; + EG_SQ_TEX_SAMPLER_WORD2_0__LOD_BIAS_SEC_mask = 0x3f << 14, + EG_SQ_TEX_SAMPLER_WORD2_0__MC_COORD_TRUNCATE_shift = 20,//1; + EG_SQ_TEX_SAMPLER_WORD2_0__MC_COORD_TRUNCATE_bit = 1 << 20, + EG_SQ_TEX_SAMPLER_WORD2_0__FORCE_DEGAMMA_shift = 21,//1; + EG_SQ_TEX_SAMPLER_WORD2_0__FORCE_DEGAMMA_bit = 1 << 21, + EG_SQ_TEX_SAMPLER_WORD2_0__ANISO_BIAS_shift = 22,//6; + EG_SQ_TEX_SAMPLER_WORD2_0__ANISO_BIAS_mask = 0x3f << 22, + EG_SQ_TEX_SAMPLER_WORD2_0__TRUNCATE_COORD_shift = 28,//1; + EG_SQ_TEX_SAMPLER_WORD2_0__TRUNCATE_COORD_bit = 1 << 28, + EG_SQ_TEX_SAMPLER_WORD2_0__DISABLE_CUBE_WRAP_shift = 29,//1; + EG_SQ_TEX_SAMPLER_WORD2_0__DISABLE_CUBE_WRAP_bit = 1 << 29, + EG_SQ_TEX_SAMPLER_WORD2_0__Reserved_shift = 30,//1; + EG_SQ_TEX_SAMPLER_WORD2_0__Reserved_bit = 1 << 30, + EG_SQ_TEX_SAMPLER_WORD2_0__TYPE_shift = 31,//1; + EG_SQ_TEX_SAMPLER_WORD2_0__TYPE_bit = 1 << 31, + /* SQ_TEX_RESOURCE_WORD0_0 */ + EG_SQ_TEX_RESOURCE_WORD0_0__DIM_shift = 0, // 3; + EG_SQ_TEX_RESOURCE_WORD0_0__DIM_mask = 0x7, + EG_SQ_TEX_RESOURCE_WORD0_0__ISET_shift = 3, // 1; + EG_SQ_TEX_RESOURCE_WORD0_0__ISET_bit = 1 << 3, + EG_SQ_TEX_RESOURCE_WORD0_0__Reserve_shift = 4, // 1; + EG_SQ_TEX_RESOURCE_WORD0_0__Reserve_bit = 1 << 4, + EG_SQ_TEX_RESOURCE_WORD0_0__NDTO_shift = 5, // 1; + EG_SQ_TEX_RESOURCE_WORD0_0__NDTO_bit = 1 << 5, + EG_SQ_TEX_RESOURCE_WORD0_0__PITCH_shift = 6, // 12; + EG_SQ_TEX_RESOURCE_WORD0_0__PITCH_mask = 0xfff << 6, + EG_SQ_TEX_RESOURCE_WORD0_0__TEX_WIDTH_shift = 18,// 14; + EG_SQ_TEX_RESOURCE_WORD0_0__TEX_WIDTH_mask = 0x3fff << 18, + /* SQ_TEX_RESOURCE_WORD1_0 */ + EG_SQ_TEX_RESOURCE_WORD1_0__TEX_HEIGHT_shift = 0, // 14; + EG_SQ_TEX_RESOURCE_WORD1_0__TEX_HEIGHT_mask = 0x3fff, + EG_SQ_TEX_RESOURCE_WORD1_0__TEX_DEPTH_shift = 14,// 13; + EG_SQ_TEX_RESOURCE_WORD1_0__TEX_DEPTH_mask = 0x1fff << 14, + EG_SQ_TEX_RESOURCE_WORD1_0__Reserved_shift = 27,// 1; + EG_SQ_TEX_RESOURCE_WORD1_0__Reserved_bit = 1 << 27, + EG_SQ_TEX_RESOURCE_WORD1_0__ARRAY_MODE_shift = 28,// 4; + EG_SQ_TEX_RESOURCE_WORD1_0__ARRAY_MODE_mask = 0xf << 28, + /* SQ_TEX_RESOURCE_WORD6_0 */ + EG_SQ_TEX_RESOURCE_WORD6_0__MAX_ANISO_RATIO_shift = 0, //: 3; + EG_SQ_TEX_RESOURCE_WORD6_0__MAX_ANISO_RATIO_mask = 0x7, + EG_SQ_TEX_RESOURCE_WORD6_0__INTERLACED_shift = 6, //1; + EG_SQ_TEX_RESOURCE_WORD6_0__INTERLACED_bit = 1 << 6, + EG_SQ_TEX_RESOURCE_WORD6_0__MIN_LOD_shift = 8, //12; + EG_SQ_TEX_RESOURCE_WORD6_0__MIN_LOD_mask = 0xfff << 8, + EG_SQ_TEX_RESOURCE_WORD6_0__TILE_SPLIT_shift = 29,// 3; + EG_SQ_TEX_RESOURCE_WORD6_0__TILE_SPLIT_mask = 0x7 << 29, + /* SQ_TEX_RESOURCE_WORD7_0 */ + EG_SQ_TEX_RESOURCE_WORD7_0__DATA_FORMAT_shift = 0, // 6; + EG_SQ_TEX_RESOURCE_WORD7_0__DATA_FORMAT_mask = 0x3f, + EG_SQ_TEX_RESOURCE_WORD7_0__MACRO_TILE_ASPECT_shift = 6, // 2; + EG_SQ_TEX_RESOURCE_WORD7_0__MACRO_TILE_ASPECT_mask = 0x3 << 6, + EG_SQ_TEX_RESOURCE_WORD7_0__BANK_WIDTH_shift = 8, // 2; + EG_SQ_TEX_RESOURCE_WORD7_0__BANK_WIDTH_mask = 0x3 << 8, + EG_SQ_TEX_RESOURCE_WORD7_0__BANK_HEIGHT_shift = 10,// 2; + EG_SQ_TEX_RESOURCE_WORD7_0__BANK_HEIGHT_mask = 0x3 << 10, + EG_SQ_TEX_RESOURCE_WORD7_0__DEPTH_SAMPLE_ORDER_shift = 15,// 1; + EG_SQ_TEX_RESOURCE_WORD7_0__DEPTH_SAMPLE_ORDER_bit = 1 << 15, + EG_SQ_TEX_RESOURCE_WORD7_0__NUM_BANKS_shift = 16,// 2; + EG_SQ_TEX_RESOURCE_WORD7_0__NUM_BANKS_mask = 0x3 << 16, + EG_SQ_TEX_RESOURCE_WORD7_0__TYPE_shift = 30,// 2; + EG_SQ_TEX_RESOURCE_WORD7_0__TYPE_mask = 0x3 << 30, +}; + +/* */ + +#define EG_SQ_FETCH_RESOURCE_COUNT 0x00000400 +#define EG_SQ_TEX_SAMPLER_COUNT 0x0000006c +#define EG_SQ_LOOP_CONST_COUNT 0x000000c0 + +#define EG_SET_RESOURCE_OFFSET 0x30000 +#define EG_SET_RESOURCE_END 0x30400 //r600 := offset + 0x4000 + +#define EG_SET_LOOP_CONST_OFFSET 0x3A200 +#define EG_SET_LOOP_CONST_END 0x3A26C //r600 := offset + 0x180 + + +#define EG_SQ_FETCH_RESOURCE_VS_OFFSET 0x000000b0 +#define EG_FETCH_RESOURCE_STRIDE 8 + +#define EG_SET_BOOL_CONST_OFFSET 0x3A500 +#define EG_SET_BOOL_CONST_END 0x3A506 + + +#endif //_EVERGREEN_DIFF_H_ diff --git a/src/mesa/drivers/dri/r600/evergreen_fragprog.c b/src/mesa/drivers/dri/r600/evergreen_fragprog.c new file mode 100644 index 00000000000..b53ff424a01 --- /dev/null +++ b/src/mesa/drivers/dri/r600/evergreen_fragprog.c @@ -0,0 +1,817 @@ +/* + * Copyright (C) 2008-2009 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +/* + * Authors: + * Richard Li <[email protected]>, <[email protected]> + * CooperYuan <[email protected]>, <[email protected]> + */ + +#include <stdio.h> +#include <stdarg.h> +#include <stdlib.h> +#include <string.h> +#include <math.h> + +#include "main/imports.h" + +#include "program/prog_parameter.h" +#include "program/prog_statevars.h" +#include "program/program.h" + +#include "r600_context.h" +#include "r600_cmdbuf.h" +#include "r600_emit.h" + +#include "evergreen_vertprog.h" +#include "evergreen_fragprog.h" + +void evergreen_insert_wpos_code(GLcontext *ctx, struct gl_fragment_program *fprog) +{ + static const gl_state_index winstate[STATE_LENGTH] + = { STATE_INTERNAL, STATE_FB_SIZE, 0, 0, 0}; + struct prog_instruction *newInst, *inst; + GLint win_size; /* state reference */ + GLuint wpos_temp; /* temp register */ + int i, j; + + /* PARAM win_size = STATE_FB_SIZE */ + win_size = _mesa_add_state_reference(fprog->Base.Parameters, winstate); + + wpos_temp = fprog->Base.NumTemporaries++; + + /* scan program where WPOS is used and replace with wpos_temp */ + inst = fprog->Base.Instructions; + for (i = 0; i < fprog->Base.NumInstructions; i++) { + for (j=0; j < 3; j++) { + if(inst->SrcReg[j].File == PROGRAM_INPUT && + inst->SrcReg[j].Index == FRAG_ATTRIB_WPOS) { + inst->SrcReg[j].File = PROGRAM_TEMPORARY; + inst->SrcReg[j].Index = wpos_temp; + } + } + inst++; + } + + _mesa_insert_instructions(&(fprog->Base), 0, 1); + + newInst = fprog->Base.Instructions; + /* invert wpos.y + * wpos_temp.xyzw = wpos.x-yzw + winsize.0y00 */ + newInst[0].Opcode = OPCODE_ADD; + newInst[0].DstReg.File = PROGRAM_TEMPORARY; + newInst[0].DstReg.Index = wpos_temp; + newInst[0].DstReg.WriteMask = WRITEMASK_XYZW; + + newInst[0].SrcReg[0].File = PROGRAM_INPUT; + newInst[0].SrcReg[0].Index = FRAG_ATTRIB_WPOS; + newInst[0].SrcReg[0].Swizzle = SWIZZLE_XYZW; + newInst[0].SrcReg[0].Negate = NEGATE_Y; + + newInst[0].SrcReg[1].File = PROGRAM_STATE_VAR; + newInst[0].SrcReg[1].Index = win_size; + newInst[0].SrcReg[1].Swizzle = MAKE_SWIZZLE4(SWIZZLE_ZERO, SWIZZLE_Y, SWIZZLE_ZERO, SWIZZLE_ZERO); + +} + +//TODO : Validate FP input with VP output. +void evergreen_Map_Fragment_Program(r700_AssemblerBase *pAsm, + struct gl_fragment_program *mesa_fp, + GLcontext *ctx) +{ + unsigned int unBit; + unsigned int i; + GLuint ui; + + /* match fp inputs with vp exports. */ + struct evergreen_vertex_program_cont *vpc = + (struct evergreen_vertex_program_cont *)ctx->VertexProgram._Current; + GLbitfield OutputsWritten = vpc->mesa_program.Base.OutputsWritten; + + pAsm->number_used_registers = 0; + +//Input mapping : mesa_fp->Base.InputsRead set the flag, set in + //The flags parsed in parse_attrib_binding. FRAG_ATTRIB_COLx, FRAG_ATTRIB_TEXx, ... + //MUST match order in Map_Vertex_Output + unBit = 1 << FRAG_ATTRIB_WPOS; + if(mesa_fp->Base.InputsRead & unBit) + { + pAsm->uiFP_AttributeMap[FRAG_ATTRIB_WPOS] = pAsm->number_used_registers++; + } + + unBit = 1 << VERT_RESULT_COL0; + if(OutputsWritten & unBit) + { + pAsm->uiFP_AttributeMap[FRAG_ATTRIB_COL0] = pAsm->number_used_registers++; + } + + unBit = 1 << VERT_RESULT_COL1; + if(OutputsWritten & unBit) + { + pAsm->uiFP_AttributeMap[FRAG_ATTRIB_COL1] = pAsm->number_used_registers++; + } + + unBit = 1 << VERT_RESULT_FOGC; + if(OutputsWritten & unBit) + { + pAsm->uiFP_AttributeMap[FRAG_ATTRIB_FOGC] = pAsm->number_used_registers++; + } + + for(i=0; i<8; i++) + { + unBit = 1 << (VERT_RESULT_TEX0 + i); + if(OutputsWritten & unBit) + { + pAsm->uiFP_AttributeMap[FRAG_ATTRIB_TEX0 + i] = pAsm->number_used_registers++; + } + } + +/* order has been taken care of */ +#if 1 + for(i=VERT_RESULT_VAR0; i<VERT_RESULT_MAX; i++) + { + unBit = 1 << i; + if(OutputsWritten & unBit) + { + pAsm->uiFP_AttributeMap[i-VERT_RESULT_VAR0+FRAG_ATTRIB_VAR0] = pAsm->number_used_registers++; + } + } +#else + if( (mesa_fp->Base.InputsRead >> FRAG_ATTRIB_VAR0) > 0 ) + { + struct evergreen_vertex_program_cont *vpc = + (struct evergreen_vertex_program_cont *)ctx->VertexProgram._Current; + struct gl_program_parameter_list * VsVarying = vpc->mesa_program.Base.Varying; + struct gl_program_parameter_list * PsVarying = mesa_fp->Base.Varying; + struct gl_program_parameter * pVsParam; + struct gl_program_parameter * pPsParam; + GLuint j, k; + GLuint unMaxVarying = 0; + + for(i=0; i<VsVarying->NumParameters; i++) + { + pAsm->uiFP_AttributeMap[i + FRAG_ATTRIB_VAR0] = 0; + } + + for(i=FRAG_ATTRIB_VAR0; i<FRAG_ATTRIB_MAX; i++) + { + unBit = 1 << i; + if(mesa_fp->Base.InputsRead & unBit) + { + j = i - FRAG_ATTRIB_VAR0; + pPsParam = PsVarying->Parameters + j; + + for(k=0; k<VsVarying->NumParameters; k++) + { + pVsParam = VsVarying->Parameters + k; + + if( strcmp(pPsParam->Name, pVsParam->Name) == 0) + { + pAsm->uiFP_AttributeMap[i] = pAsm->number_used_registers + k; + if(k > unMaxVarying) + { + unMaxVarying = k; + } + break; + } + } + } + } + + pAsm->number_used_registers += unMaxVarying + 1; + } +#endif + unBit = 1 << FRAG_ATTRIB_FACE; + if(mesa_fp->Base.InputsRead & unBit) + { + pAsm->uiFP_AttributeMap[FRAG_ATTRIB_FACE] = pAsm->number_used_registers++; + } + + unBit = 1 << FRAG_ATTRIB_PNTC; + if(mesa_fp->Base.InputsRead & unBit) + { + pAsm->uiFP_AttributeMap[FRAG_ATTRIB_PNTC] = pAsm->number_used_registers++; + } + + pAsm->uIIns = pAsm->number_used_registers; + +/* Map temporary registers (GPRs) */ + pAsm->starting_temp_register_number = pAsm->number_used_registers; + + if(mesa_fp->Base.NumNativeTemporaries >= mesa_fp->Base.NumTemporaries) + { + pAsm->number_used_registers += mesa_fp->Base.NumNativeTemporaries; + } + else + { + pAsm->number_used_registers += mesa_fp->Base.NumTemporaries; + } + +/* Output mapping */ + pAsm->number_of_exports = 0; + pAsm->number_of_colorandz_exports = 0; /* don't include stencil and mask out. */ + pAsm->starting_export_register_number = pAsm->number_used_registers; + unBit = 1 << FRAG_RESULT_COLOR; + if(mesa_fp->Base.OutputsWritten & unBit) + { + pAsm->uiFP_OutputMap[FRAG_RESULT_COLOR] = pAsm->number_used_registers++; + pAsm->number_of_exports++; + pAsm->number_of_colorandz_exports++; + } + unBit = 1 << FRAG_RESULT_DEPTH; + if(mesa_fp->Base.OutputsWritten & unBit) + { + pAsm->depth_export_register_number = pAsm->number_used_registers; + pAsm->uiFP_OutputMap[FRAG_RESULT_DEPTH] = pAsm->number_used_registers++; + pAsm->number_of_exports++; + pAsm->number_of_colorandz_exports++; + pAsm->pR700Shader->depthIsExported = 1; + } + + pAsm->pucOutMask = (unsigned char*) MALLOC(pAsm->number_of_exports); + for(ui=0; ui<pAsm->number_of_exports; ui++) + { + pAsm->pucOutMask[ui] = 0x0; + } + + pAsm->flag_reg_index = pAsm->number_used_registers++; + + pAsm->uFirstHelpReg = pAsm->number_used_registers; +} + +GLboolean evergreen_Find_Instruction_Dependencies_fp(struct evergreen_fragment_program *fp, + struct gl_fragment_program *mesa_fp) +{ + GLuint i, j; + GLint * puiTEMPwrites; + GLint * puiTEMPreads; + struct prog_instruction * pILInst; + InstDeps *pInstDeps; + struct prog_instruction * texcoord_DepInst; + GLint nDepInstID; + + puiTEMPwrites = (GLint*) MALLOC(sizeof(GLuint)*mesa_fp->Base.NumTemporaries); + puiTEMPreads = (GLint*) MALLOC(sizeof(GLuint)*mesa_fp->Base.NumTemporaries); + + for(i=0; i<mesa_fp->Base.NumTemporaries; i++) + { + puiTEMPwrites[i] = -1; + puiTEMPreads[i] = -1; + } + + pInstDeps = (InstDeps*)MALLOC(sizeof(InstDeps)*mesa_fp->Base.NumInstructions); + + for(i=0; i<mesa_fp->Base.NumInstructions; i++) + { + pInstDeps[i].nDstDep = -1; + pILInst = &(mesa_fp->Base.Instructions[i]); + + //Dst + if(pILInst->DstReg.File == PROGRAM_TEMPORARY) + { + //Set lastwrite for the temp + puiTEMPwrites[pILInst->DstReg.Index] = i; + } + + //Src + for(j=0; j<3; j++) + { + if(pILInst->SrcReg[j].File == PROGRAM_TEMPORARY) + { + //Set dep. + pInstDeps[i].nSrcDeps[j] = puiTEMPwrites[pILInst->SrcReg[j].Index]; + //Set first read + if(puiTEMPreads[pILInst->SrcReg[j].Index] < 0 ) + { + puiTEMPreads[pILInst->SrcReg[j].Index] = i; + } + } + else + { + pInstDeps[i].nSrcDeps[j] = -1; + } + } + } + + fp->r700AsmCode.pInstDeps = pInstDeps; + + //Find dep for tex inst + for(i=0; i<mesa_fp->Base.NumInstructions; i++) + { + pILInst = &(mesa_fp->Base.Instructions[i]); + + if(GL_TRUE == IsTex(pILInst->Opcode)) + { //src0 is the tex coord register, src1 is texunit, src2 is textype + nDepInstID = pInstDeps[i].nSrcDeps[0]; + if(nDepInstID >= 0) + { + texcoord_DepInst = &(mesa_fp->Base.Instructions[nDepInstID]); + if(GL_TRUE == IsAlu(texcoord_DepInst->Opcode) ) + { + pInstDeps[nDepInstID].nDstDep = i; + pInstDeps[i].nDstDep = i; + } + else if(GL_TRUE == IsTex(texcoord_DepInst->Opcode) ) + { + pInstDeps[i].nDstDep = i; + } + else + { //... other deps? + } + } + // make sure that we dont overwrite src used earlier + nDepInstID = puiTEMPreads[pILInst->DstReg.Index]; + if(nDepInstID < i) + { + pInstDeps[i].nDstDep = puiTEMPreads[pILInst->DstReg.Index]; + texcoord_DepInst = &(mesa_fp->Base.Instructions[nDepInstID]); + if(GL_TRUE == IsAlu(texcoord_DepInst->Opcode) ) + { + pInstDeps[nDepInstID].nDstDep = i; + } + + } + + } + } + + FREE(puiTEMPwrites); + FREE(puiTEMPreads); + + return GL_TRUE; +} + +GLboolean evergreenTranslateFragmentShader(struct evergreen_fragment_program *fp, + struct gl_fragment_program *mesa_fp, + GLcontext *ctx) +{ + GLuint number_of_colors_exported; + GLboolean z_enabled = GL_FALSE; + GLuint unBit, shadow_unit; + int i; + struct prog_instruction *inst; + gl_state_index shadow_ambient[STATE_LENGTH] + = { STATE_INTERNAL, STATE_SHADOW_AMBIENT, 0, 0, 0}; + + //Init_Program + Init_r700_AssemblerBase( SPT_FP, &(fp->r700AsmCode), &(fp->r700Shader) ); + + fp->constbo0 = NULL; + fp->r700AsmCode.bUseMemConstant = GL_TRUE; + fp->r700AsmCode.unAsic = 8; + + if(mesa_fp->Base.InputsRead & FRAG_BIT_WPOS) + { + evergreen_insert_wpos_code(ctx, mesa_fp); + } + + /* add/map consts for ARB_shadow_ambient */ + if(mesa_fp->Base.ShadowSamplers) + { + inst = mesa_fp->Base.Instructions; + for (i = 0; i < mesa_fp->Base.NumInstructions; i++) + { + if(inst->TexShadow == 1) + { + shadow_unit = inst->TexSrcUnit; + shadow_ambient[2] = shadow_unit; + fp->r700AsmCode.shadow_regs[shadow_unit] = + _mesa_add_state_reference(mesa_fp->Base.Parameters, shadow_ambient); + } + inst++; + } + } + + evergreen_Map_Fragment_Program(&(fp->r700AsmCode), mesa_fp, ctx); + + if( GL_FALSE == evergreen_Find_Instruction_Dependencies_fp(fp, mesa_fp) ) + { + return GL_FALSE; + } + + InitShaderProgram(&(fp->r700AsmCode)); + + for(i=0; i < MAX_SAMPLERS; i++) + { + fp->r700AsmCode.SamplerUnits[i] = fp->mesa_program.Base.SamplerUnits[i]; + } + + fp->r700AsmCode.unCurNumILInsts = mesa_fp->Base.NumInstructions; + + if( GL_FALSE == AssembleInstr(0, + 0, + mesa_fp->Base.NumInstructions, + &(mesa_fp->Base.Instructions[0]), + &(fp->r700AsmCode)) ) + { + return GL_FALSE; + } + + if(GL_FALSE == Process_Fragment_Exports(&(fp->r700AsmCode), mesa_fp->Base.OutputsWritten) ) + { + return GL_FALSE; + } + + if( GL_FALSE == RelocProgram(&(fp->r700AsmCode), &(mesa_fp->Base)) ) + { + return GL_FALSE; + } + + fp->r700Shader.nRegs = (fp->r700AsmCode.number_used_registers == 0) ? 0 + : (fp->r700AsmCode.number_used_registers - 1); + + fp->r700Shader.nParamExports = fp->r700AsmCode.number_of_exports; + + number_of_colors_exported = fp->r700AsmCode.number_of_colorandz_exports; + + unBit = 1 << FRAG_RESULT_DEPTH; + if(mesa_fp->Base.OutputsWritten & unBit) + { + z_enabled = GL_TRUE; + number_of_colors_exported--; + } + + /* illegal to set this to 0 */ + if(number_of_colors_exported || z_enabled) + { + fp->r700Shader.exportMode = number_of_colors_exported << 1 | z_enabled; + } + else + { + fp->r700Shader.exportMode = (1 << 1); + } + + fp->translated = GL_TRUE; + + return GL_TRUE; +} + +void evergreenSelectFragmentShader(GLcontext *ctx) +{ + context_t *context = EVERGREEN_CONTEXT(ctx); + struct evergreen_fragment_program *fp = (struct evergreen_fragment_program *) + (ctx->FragmentProgram._Current); + if (context->radeon.radeonScreen->chip_family < CHIP_FAMILY_RV770) + { + fp->r700AsmCode.bR6xx = 1; + } + + if (GL_FALSE == fp->translated) + evergreenTranslateFragmentShader(fp, &(fp->mesa_program), ctx); +} + +void * evergreenGetActiveFpShaderBo(GLcontext * ctx) +{ + struct evergreen_fragment_program *fp = (struct evergreen_fragment_program *) + (ctx->FragmentProgram._Current); + + return fp->shaderbo; +} + +void * evergreenGetActiveFpShaderConstBo(GLcontext * ctx) +{ + struct evergreen_fragment_program *fp = (struct evergreen_fragment_program *) + (ctx->FragmentProgram._Current); + + return fp->constbo0; +} + +GLboolean evergreenSetupFragmentProgram(GLcontext * ctx) +{ + context_t *context = EVERGREEN_CONTEXT(ctx); + EVERGREEN_CHIP_CONTEXT *evergreen = GET_EVERGREEN_CHIP(context); + struct evergreen_fragment_program *fp = (struct evergreen_fragment_program *) + (ctx->FragmentProgram._Current); + r700_AssemblerBase *pAsm = &(fp->r700AsmCode); + struct gl_fragment_program *mesa_fp = &(fp->mesa_program); + unsigned int ui, i; + unsigned int unNumOfReg; + unsigned int unBit; + GLuint exportCount; + GLboolean point_sprite = GL_FALSE; + + if(GL_FALSE == fp->loaded) + { + if(fp->r700Shader.bNeedsAssembly == GL_TRUE) + { + Assemble( &(fp->r700Shader) ); + } + + r600EmitShader(ctx, + &(fp->shaderbo), + (GLvoid *)(fp->r700Shader.pProgram), + fp->r700Shader.uShaderBinaryDWORDSize, + "FS"); + + fp->loaded = GL_TRUE; + } + + /* TODO : enable this after MemUse fixed *= + (context->chipobj.MemUse)(context, fp->shadercode.buf->id); + */ + + EVERGREEN_STATECHANGE(context, sq); + + evergreen->SQ_PGM_RESOURCES_PS.u32All = 0; + SETbit(evergreen->SQ_PGM_RESOURCES_PS.u32All, PGM_RESOURCES__PRIME_CACHE_ON_DRAW_bit); + + evergreen->ps.SQ_ALU_CONST_CACHE_PS_0.u32All = 0; + evergreen->ps.SQ_PGM_START_PS.u32All = 0; + + EVERGREEN_STATECHANGE(context, spi); + + unNumOfReg = fp->r700Shader.nRegs + 1; + + ui = (evergreen->SPI_PS_IN_CONTROL_0.u32All & NUM_INTERP_mask) / (1 << NUM_INTERP_shift); + + /* PS uses fragment.position */ + if (mesa_fp->Base.InputsRead & (1 << FRAG_ATTRIB_WPOS)) + { + ui += 1; + SETfield(evergreen->SPI_PS_IN_CONTROL_0.u32All, ui, NUM_INTERP_shift, NUM_INTERP_mask); + SETfield(evergreen->SPI_PS_IN_CONTROL_0.u32All, CENTERS_ONLY, BARYC_SAMPLE_CNTL_shift, BARYC_SAMPLE_CNTL_mask); + SETbit(evergreen->SPI_PS_IN_CONTROL_0.u32All, POSITION_ENA_bit); + SETbit(evergreen->SPI_INPUT_Z.u32All, PROVIDE_Z_TO_SPI_bit); + } + else + { + CLEARbit(evergreen->SPI_PS_IN_CONTROL_0.u32All, POSITION_ENA_bit); + CLEARbit(evergreen->SPI_INPUT_Z.u32All, PROVIDE_Z_TO_SPI_bit); + } + + if (mesa_fp->Base.InputsRead & (1 << FRAG_ATTRIB_FACE)) + { + ui += 1; + SETfield(evergreen->SPI_PS_IN_CONTROL_0.u32All, ui, NUM_INTERP_shift, NUM_INTERP_mask); + SETbit(evergreen->SPI_PS_IN_CONTROL_1.u32All, FRONT_FACE_ENA_bit); + SETbit(evergreen->SPI_PS_IN_CONTROL_1.u32All, FRONT_FACE_ALL_BITS_bit); + SETfield(evergreen->SPI_PS_IN_CONTROL_1.u32All, pAsm->uiFP_AttributeMap[FRAG_ATTRIB_FACE], FRONT_FACE_ADDR_shift, FRONT_FACE_ADDR_mask); + } + else + { + CLEARbit(evergreen->SPI_PS_IN_CONTROL_1.u32All, FRONT_FACE_ENA_bit); + } + + /* see if we need any point_sprite replacements */ + for (i = VERT_RESULT_TEX0; i<= VERT_RESULT_TEX7; i++) + { + if(ctx->Point.CoordReplace[i - VERT_RESULT_TEX0] == GL_TRUE) + point_sprite = GL_TRUE; + } + + if ((mesa_fp->Base.InputsRead & (1 << FRAG_ATTRIB_PNTC)) || point_sprite) + { + /* for FRAG_ATTRIB_PNTC we need to increase num_interp */ + if(mesa_fp->Base.InputsRead & (1 << FRAG_ATTRIB_PNTC)) + { + ui++; + SETfield(evergreen->SPI_PS_IN_CONTROL_0.u32All, ui, NUM_INTERP_shift, NUM_INTERP_mask); + } + SETbit(evergreen->SPI_INTERP_CONTROL_0.u32All, PNT_SPRITE_ENA_bit); + SETfield(evergreen->SPI_INTERP_CONTROL_0.u32All, SPI_PNT_SPRITE_SEL_S, PNT_SPRITE_OVRD_X_shift, PNT_SPRITE_OVRD_X_mask); + SETfield(evergreen->SPI_INTERP_CONTROL_0.u32All, SPI_PNT_SPRITE_SEL_T, PNT_SPRITE_OVRD_Y_shift, PNT_SPRITE_OVRD_Y_mask); + SETfield(evergreen->SPI_INTERP_CONTROL_0.u32All, SPI_PNT_SPRITE_SEL_0, PNT_SPRITE_OVRD_Z_shift, PNT_SPRITE_OVRD_Z_mask); + SETfield(evergreen->SPI_INTERP_CONTROL_0.u32All, SPI_PNT_SPRITE_SEL_1, PNT_SPRITE_OVRD_W_shift, PNT_SPRITE_OVRD_W_mask); + if(ctx->Point.SpriteOrigin == GL_LOWER_LEFT) + SETbit(evergreen->SPI_INTERP_CONTROL_0.u32All, PNT_SPRITE_TOP_1_bit); + else + CLEARbit(evergreen->SPI_INTERP_CONTROL_0.u32All, PNT_SPRITE_TOP_1_bit); + } + else + { + CLEARbit(evergreen->SPI_INTERP_CONTROL_0.u32All, PNT_SPRITE_ENA_bit); + } + + + ui = (unNumOfReg < ui) ? ui : unNumOfReg; + + SETfield(evergreen->SQ_PGM_RESOURCES_PS.u32All, ui, NUM_GPRS_shift, NUM_GPRS_mask); + + CLEARbit(evergreen->SQ_PGM_RESOURCES_PS.u32All, UNCACHED_FIRST_INST_bit); + + if(fp->r700Shader.uStackSize) /* we don't use branch for now, it should be zero. */ + { + SETfield(evergreen->SQ_PGM_RESOURCES_PS.u32All, fp->r700Shader.uStackSize, + STACK_SIZE_shift, STACK_SIZE_mask); + } + + SETfield(evergreen->SQ_PGM_EXPORTS_PS.u32All, fp->r700Shader.exportMode, + EXPORT_MODE_shift, EXPORT_MODE_mask); + + // emit ps input map + struct evergreen_vertex_program_cont *vpc = + (struct evergreen_vertex_program_cont *)ctx->VertexProgram._Current; + GLbitfield OutputsWritten = vpc->mesa_program.Base.OutputsWritten; + + for(ui = 0; ui < EVERGREEN_MAX_SHADER_EXPORTS; ui++) + evergreen->SPI_PS_INPUT_CNTL[ui].u32All = 0; + + unBit = 1 << FRAG_ATTRIB_WPOS; + if(mesa_fp->Base.InputsRead & unBit) + { + ui = pAsm->uiFP_AttributeMap[FRAG_ATTRIB_WPOS]; + SETbit(evergreen->SPI_PS_INPUT_CNTL[ui].u32All, SEL_CENTROID_bit); + SETfield(evergreen->SPI_PS_INPUT_CNTL[ui].u32All, ui, + SEMANTIC_shift, SEMANTIC_mask); + if (evergreen->SPI_INTERP_CONTROL_0.u32All & FLAT_SHADE_ENA_bit) + SETbit(evergreen->SPI_PS_INPUT_CNTL[ui].u32All, FLAT_SHADE_bit); + else + CLEARbit(evergreen->SPI_PS_INPUT_CNTL[ui].u32All, FLAT_SHADE_bit); + } + + unBit = 1 << VERT_RESULT_COL0; + if(OutputsWritten & unBit) + { + ui = pAsm->uiFP_AttributeMap[FRAG_ATTRIB_COL0]; + SETbit(evergreen->SPI_PS_INPUT_CNTL[ui].u32All, SEL_CENTROID_bit); + SETfield(evergreen->SPI_PS_INPUT_CNTL[ui].u32All, ui, + SEMANTIC_shift, SEMANTIC_mask); + if (evergreen->SPI_INTERP_CONTROL_0.u32All & FLAT_SHADE_ENA_bit) + SETbit(evergreen->SPI_PS_INPUT_CNTL[ui].u32All, FLAT_SHADE_bit); + else + CLEARbit(evergreen->SPI_PS_INPUT_CNTL[ui].u32All, FLAT_SHADE_bit); + } + + unBit = 1 << VERT_RESULT_COL1; + if(OutputsWritten & unBit) + { + ui = pAsm->uiFP_AttributeMap[FRAG_ATTRIB_COL1]; + SETbit(evergreen->SPI_PS_INPUT_CNTL[ui].u32All, SEL_CENTROID_bit); + SETfield(evergreen->SPI_PS_INPUT_CNTL[ui].u32All, ui, + SEMANTIC_shift, SEMANTIC_mask); + if (evergreen->SPI_INTERP_CONTROL_0.u32All & FLAT_SHADE_ENA_bit) + SETbit(evergreen->SPI_PS_INPUT_CNTL[ui].u32All, FLAT_SHADE_bit); + else + CLEARbit(evergreen->SPI_PS_INPUT_CNTL[ui].u32All, FLAT_SHADE_bit); + } + + unBit = 1 << VERT_RESULT_FOGC; + if(OutputsWritten & unBit) + { + ui = pAsm->uiFP_AttributeMap[FRAG_ATTRIB_FOGC]; + SETbit(evergreen->SPI_PS_INPUT_CNTL[ui].u32All, SEL_CENTROID_bit); + SETfield(evergreen->SPI_PS_INPUT_CNTL[ui].u32All, ui, + SEMANTIC_shift, SEMANTIC_mask); + if (evergreen->SPI_INTERP_CONTROL_0.u32All & FLAT_SHADE_ENA_bit) + SETbit(evergreen->SPI_PS_INPUT_CNTL[ui].u32All, FLAT_SHADE_bit); + else + CLEARbit(evergreen->SPI_PS_INPUT_CNTL[ui].u32All, FLAT_SHADE_bit); + } + + for(i=0; i<8; i++) + { + unBit = 1 << (VERT_RESULT_TEX0 + i); + if(OutputsWritten & unBit) + { + ui = pAsm->uiFP_AttributeMap[FRAG_ATTRIB_TEX0 + i]; + SETbit(evergreen->SPI_PS_INPUT_CNTL[ui].u32All, SEL_CENTROID_bit); + SETfield(evergreen->SPI_PS_INPUT_CNTL[ui].u32All, ui, + SEMANTIC_shift, SEMANTIC_mask); + CLEARbit(evergreen->SPI_PS_INPUT_CNTL[ui].u32All, FLAT_SHADE_bit); + /* ARB_point_sprite */ + if(ctx->Point.CoordReplace[i] == GL_TRUE) + { + SETbit(evergreen->SPI_PS_INPUT_CNTL[ui].u32All, PT_SPRITE_TEX_bit); + } + } + } + + unBit = 1 << FRAG_ATTRIB_FACE; + if(mesa_fp->Base.InputsRead & unBit) + { + ui = pAsm->uiFP_AttributeMap[FRAG_ATTRIB_FACE]; + SETbit(evergreen->SPI_PS_INPUT_CNTL[ui].u32All, SEL_CENTROID_bit); + SETfield(evergreen->SPI_PS_INPUT_CNTL[ui].u32All, ui, + SEMANTIC_shift, SEMANTIC_mask); + if (evergreen->SPI_INTERP_CONTROL_0.u32All & FLAT_SHADE_ENA_bit) + SETbit(evergreen->SPI_PS_INPUT_CNTL[ui].u32All, FLAT_SHADE_bit); + else + CLEARbit(evergreen->SPI_PS_INPUT_CNTL[ui].u32All, FLAT_SHADE_bit); + } + unBit = 1 << FRAG_ATTRIB_PNTC; + if(mesa_fp->Base.InputsRead & unBit) + { + ui = pAsm->uiFP_AttributeMap[FRAG_ATTRIB_PNTC]; + SETbit(evergreen->SPI_PS_INPUT_CNTL[ui].u32All, SEL_CENTROID_bit); + SETfield(evergreen->SPI_PS_INPUT_CNTL[ui].u32All, ui, + SEMANTIC_shift, SEMANTIC_mask); + if (evergreen->SPI_INTERP_CONTROL_0.u32All & FLAT_SHADE_ENA_bit) + SETbit(evergreen->SPI_PS_INPUT_CNTL[ui].u32All, FLAT_SHADE_bit); + else + CLEARbit(evergreen->SPI_PS_INPUT_CNTL[ui].u32All, FLAT_SHADE_bit); + SETbit(evergreen->SPI_PS_INPUT_CNTL[ui].u32All, PT_SPRITE_TEX_bit); + } + + + + + for(i=VERT_RESULT_VAR0; i<VERT_RESULT_MAX; i++) + { + unBit = 1 << i; + if(OutputsWritten & unBit) + { + ui = pAsm->uiFP_AttributeMap[i-VERT_RESULT_VAR0+FRAG_ATTRIB_VAR0]; + SETbit(evergreen->SPI_PS_INPUT_CNTL[ui].u32All, SEL_CENTROID_bit); + SETfield(evergreen->SPI_PS_INPUT_CNTL[ui].u32All, ui, + SEMANTIC_shift, SEMANTIC_mask); + if (evergreen->SPI_INTERP_CONTROL_0.u32All & FLAT_SHADE_ENA_bit) + SETbit(evergreen->SPI_PS_INPUT_CNTL[ui].u32All, FLAT_SHADE_bit); + else + CLEARbit(evergreen->SPI_PS_INPUT_CNTL[ui].u32All, FLAT_SHADE_bit); + } + } + + exportCount = (evergreen->SQ_PGM_EXPORTS_PS.u32All & EXPORT_MODE_mask) / (1 << EXPORT_MODE_shift); + + return GL_TRUE; +} + +GLboolean evergreenSetupFPconstants(GLcontext * ctx) +{ + context_t *context = EVERGREEN_CONTEXT(ctx); + EVERGREEN_CHIP_CONTEXT *evergreen = GET_EVERGREEN_CHIP(context); + struct evergreen_fragment_program *fp = (struct evergreen_fragment_program *) + (ctx->FragmentProgram._Current); + r700_AssemblerBase *pAsm = &(fp->r700AsmCode); + + struct gl_program_parameter_list *paramList; + unsigned int unNumParamData; + unsigned int ui; + + /* sent out shader constants. */ + paramList = fp->mesa_program.Base.Parameters; + + if(NULL != paramList) + { + _mesa_load_state_parameters(ctx, paramList); + + if (paramList->NumParameters > EVERGREEN_MAX_DX9_CONSTS) + return GL_FALSE; + + EVERGREEN_STATECHANGE(context, sq); + + evergreen->ps.num_consts = paramList->NumParameters; + + unNumParamData = paramList->NumParameters; + + for(ui=0; ui<unNumParamData; ui++) { + evergreen->ps.consts[ui][0].f32All = paramList->ParameterValues[ui][0]; + evergreen->ps.consts[ui][1].f32All = paramList->ParameterValues[ui][1]; + evergreen->ps.consts[ui][2].f32All = paramList->ParameterValues[ui][2]; + evergreen->ps.consts[ui][3].f32All = paramList->ParameterValues[ui][3]; + } + + /* Load fp constants to gpu */ + if(unNumParamData > 0) + { + radeonAllocDmaRegion(&context->radeon, + &context->fp_Constbo, + &context->fp_bo_offset, + 256, + 256); + r600EmitShaderConsts(ctx, + context->fp_Constbo, + context->fp_bo_offset, + (GLvoid *)&(evergreen->ps.consts[0][0]), + unNumParamData * 4 * 4); + } + } else + evergreen->ps.num_consts = 0; + + COMPILED_SUB * pCompiledSub; + GLuint uj; + GLuint unConstOffset = evergreen->ps.num_consts; + for(ui=0; ui<pAsm->unNumPresub; ui++) + { + pCompiledSub = pAsm->presubs[ui].pCompiledSub; + + evergreen->ps.num_consts += pCompiledSub->NumParameters; + + for(uj=0; uj<pCompiledSub->NumParameters; uj++) + { + evergreen->ps.consts[uj + unConstOffset][0].f32All = pCompiledSub->ParameterValues[uj][0]; + evergreen->ps.consts[uj + unConstOffset][1].f32All = pCompiledSub->ParameterValues[uj][1]; + evergreen->ps.consts[uj + unConstOffset][2].f32All = pCompiledSub->ParameterValues[uj][2]; + evergreen->ps.consts[uj + unConstOffset][3].f32All = pCompiledSub->ParameterValues[uj][3]; + } + unConstOffset += pCompiledSub->NumParameters; + } +}
\ No newline at end of file diff --git a/src/mesa/drivers/dri/r600/evergreen_fragprog.h b/src/mesa/drivers/dri/r600/evergreen_fragprog.h new file mode 100644 index 00000000000..0e200bf3833 --- /dev/null +++ b/src/mesa/drivers/dri/r600/evergreen_fragprog.h @@ -0,0 +1,77 @@ +/* + * Copyright (C) 2008-2009 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +/* + * Authors: + * Richard Li <[email protected]>, <[email protected]> + */ + +#ifndef _EVERGREEN_FRAGPROG_H_ +#define _EVERGREEN_FRAGPROG_H_ + +#include "r600_context.h" +#include "r700_assembler.h" + +struct evergreen_fragment_program +{ + struct gl_fragment_program mesa_program; + + r700_AssemblerBase r700AsmCode; + R700_Shader r700Shader; + + GLboolean translated; + GLboolean loaded; + GLboolean error; + + void * shaderbo; + + GLuint k0used; + void * constbo0; + + GLboolean WritesDepth; + GLuint optimization; +}; + +/* Internal */ +void evergreen_insert_wpos_code(GLcontext *ctx, struct gl_fragment_program *fprog); + +void evergreen_Map_Fragment_Program(r700_AssemblerBase *pAsm, + struct gl_fragment_program *mesa_fp, + GLcontext *ctx); +GLboolean evergreen_Find_Instruction_Dependencies_fp(struct evergreen_fragment_program *fp, + struct gl_fragment_program *mesa_fp); + +GLboolean evergreenTranslateFragmentShader(struct evergreen_fragment_program *fp, + struct gl_fragment_program *mesa_vp, + GLcontext *ctx); + +/* Interface */ +extern void evergreenSelectFragmentShader(GLcontext *ctx); + +extern GLboolean evergreenSetupFragmentProgram(GLcontext * ctx); + +extern GLboolean evergreenSetupFPconstants(GLcontext * ctx); + +extern void * evergreenGetActiveFpShaderBo(GLcontext * ctx); + +extern void * evergreenGetActiveFpShaderConstBo(GLcontext * ctx); + +#endif /*_EVERGREEN_FRAGPROG_H_*/ diff --git a/src/mesa/drivers/dri/r600/evergreen_ioctl.c b/src/mesa/drivers/dri/r600/evergreen_ioctl.c new file mode 100644 index 00000000000..5c1270790df --- /dev/null +++ b/src/mesa/drivers/dri/r600/evergreen_ioctl.c @@ -0,0 +1,53 @@ +/* + * Copyright (C) 2008-2009 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +/* + * Authors: + * Richard Li <[email protected]>, <[email protected]> + */ + +#include <sched.h> +#include <errno.h> + +#include "main/glheader.h" +#include "main/imports.h" +#include "main/macros.h" +#include "main/context.h" +#include "main/simple_list.h" + +#include "radeon_common.h" +#include "r600_context.h" + +#include "evergreen_ioctl.h" + +#include "r700_clear.h" + +void evergreenClear(GLcontext * ctx, GLbitfield mask) +{ + r700Clear(ctx, mask); +} + +void evergreenInitIoctlFuncs(struct dd_function_table *functions) +{ + functions->Clear = evergreenClear; + functions->Finish = radeonFinish; + functions->Flush = radeonFlush; +} diff --git a/src/mesa/drivers/dri/r600/evergreen_ioctl.h b/src/mesa/drivers/dri/r600/evergreen_ioctl.h new file mode 100644 index 00000000000..3c663a7083a --- /dev/null +++ b/src/mesa/drivers/dri/r600/evergreen_ioctl.h @@ -0,0 +1,36 @@ +/* + * Copyright (C) 2008-2010 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +/* + * Authors: + * Richard Li <[email protected]>, <[email protected]> + */ + +#ifndef _EVERGREEN_IOCTL_H_ +#define _EVERGREEN_IOCTL_H_ + +#include "r600_context.h" +#include "radeon_drm.h" + +extern void evergreenClear(GLcontext * ctx, GLbitfield mask); +extern void evergreenInitIoctlFuncs(struct dd_function_table *functions); + +#endif /* _EVERGREEN_IOCTL_H_ */ diff --git a/src/mesa/drivers/dri/r600/evergreen_off.h b/src/mesa/drivers/dri/r600/evergreen_off.h new file mode 100644 index 00000000000..8c250699ec6 --- /dev/null +++ b/src/mesa/drivers/dri/r600/evergreen_off.h @@ -0,0 +1,881 @@ +/* + * Copyright (C) 2008-2010 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +/* + * Authors: + * Richard Li <[email protected]>, <[email protected]> + */ + +#ifndef _EVERGREEN_OFF_H_ +#define _EVERGREEN_OFF_H_ + +enum +{ +/* Registers from PA block: */ + EG_PA_SC_SCREEN_SCISSOR_TL = 0x28030, // DIFF + EG_PA_SC_SCREEN_SCISSOR_BR = 0x28034, // DIFF + EG_PA_SC_WINDOW_OFFSET = 0x28200, // DIFF + EG_PA_SC_WINDOW_SCISSOR_TL = 0x28204, // DIFF + EG_PA_SC_WINDOW_SCISSOR_BR = 0x28208, // DIFF + EG_PA_SC_CLIPRECT_RULE = 0x2820C, // SAME + EG_PA_SC_CLIPRECT_0_TL = 0x28210, // DIFF + EG_PA_SC_CLIPRECT_0_BR = 0x28214, // DIFF + EG_PA_SC_CLIPRECT_1_TL = 0x28218, // DIFF + EG_PA_SC_CLIPRECT_1_BR = 0x2821C, // DIFF + EG_PA_SC_CLIPRECT_2_TL = 0x28220, // DIFF + EG_PA_SC_CLIPRECT_2_BR = 0x28224, // DIFF + EG_PA_SC_CLIPRECT_3_TL = 0x28228, // DIFF + EG_PA_SC_CLIPRECT_3_BR = 0x2822C, // DIFF + EG_PA_SC_EDGERULE = 0x28230, // SAME + EG_PA_SU_HARDWARE_SCREEN_OFFSET = 0x28234, // + EG_PA_SC_GENERIC_SCISSOR_TL = 0x28240, // DIFF + EG_PA_SC_GENERIC_SCISSOR_BR = 0x28244, // DIFF + EG_PA_SC_VPORT_SCISSOR_0_TL = 0x28250, // DIFF + EG_PA_SC_VPORT_SCISSOR_0_BR = 0x28254, // DIFF + EG_PA_SC_VPORT_SCISSOR_1_TL = 0x28258, // DIFF + EG_PA_SC_VPORT_SCISSOR_1_BR = 0x2825C, // DIFF + EG_PA_SC_VPORT_SCISSOR_2_TL = 0x28260, // DIFF + EG_PA_SC_VPORT_SCISSOR_2_BR = 0x28264, // DIFF + EG_PA_SC_VPORT_SCISSOR_3_TL = 0x28268, // DIFF + EG_PA_SC_VPORT_SCISSOR_3_BR = 0x2826C, // DIFF + EG_PA_SC_VPORT_SCISSOR_4_TL = 0x28270, // DIFF + EG_PA_SC_VPORT_SCISSOR_4_BR = 0x28274, // DIFF + EG_PA_SC_VPORT_SCISSOR_5_TL = 0x28278, // DIFF + EG_PA_SC_VPORT_SCISSOR_5_BR = 0x2827C, // DIFF + EG_PA_SC_VPORT_SCISSOR_6_TL = 0x28280, // DIFF + EG_PA_SC_VPORT_SCISSOR_6_BR = 0x28284, // DIFF + EG_PA_SC_VPORT_SCISSOR_7_TL = 0x28288, // DIFF + EG_PA_SC_VPORT_SCISSOR_7_BR = 0x2828C, // DIFF + EG_PA_SC_VPORT_SCISSOR_8_TL = 0x28290, // DIFF + EG_PA_SC_VPORT_SCISSOR_8_BR = 0x28294, // DIFF + EG_PA_SC_VPORT_SCISSOR_9_TL = 0x28298, // DIFF + EG_PA_SC_VPORT_SCISSOR_9_BR = 0x2829C, // DIFF + EG_PA_SC_VPORT_SCISSOR_10_TL = 0x282A0, // DIFF + EG_PA_SC_VPORT_SCISSOR_10_BR = 0x282A4, // DIFF + EG_PA_SC_VPORT_SCISSOR_11_TL = 0x282A8, // DIFF + EG_PA_SC_VPORT_SCISSOR_11_BR = 0x282AC, // DIFF + EG_PA_SC_VPORT_SCISSOR_12_TL = 0x282B0, // DIFF + EG_PA_SC_VPORT_SCISSOR_12_BR = 0x282B4, // DIFF + EG_PA_SC_VPORT_SCISSOR_13_TL = 0x282B8, // DIFF + EG_PA_SC_VPORT_SCISSOR_13_BR = 0x282BC, // DIFF + EG_PA_SC_VPORT_SCISSOR_14_TL = 0x282C0, // DIFF + EG_PA_SC_VPORT_SCISSOR_14_BR = 0x282C4, // DIFF + EG_PA_SC_VPORT_SCISSOR_15_TL = 0x282C8, // DIFF + EG_PA_SC_VPORT_SCISSOR_15_BR = 0x282CC, // DIFF + EG_PA_SC_VPORT_ZMIN_0 = 0x282D0, // SAME + EG_PA_SC_VPORT_ZMAX_0 = 0x282D4, // SAME + EG_PA_SC_VPORT_ZMIN_1 = 0x282D8, // SAME + EG_PA_SC_VPORT_ZMAX_1 = 0x282DC, // SAME + EG_PA_SC_VPORT_ZMIN_2 = 0x282E0, // SAME + EG_PA_SC_VPORT_ZMAX_2 = 0x282E4, // SAME + EG_PA_SC_VPORT_ZMIN_3 = 0x282E8, // SAME + EG_PA_SC_VPORT_ZMAX_3 = 0x282EC, // SAME + EG_PA_SC_VPORT_ZMIN_4 = 0x282F0, // SAME + EG_PA_SC_VPORT_ZMAX_4 = 0x282F4, // SAME + EG_PA_SC_VPORT_ZMIN_5 = 0x282F8, // SAME + EG_PA_SC_VPORT_ZMAX_5 = 0x282FC, // SAME + EG_PA_SC_VPORT_ZMIN_6 = 0x28300, // SAME + EG_PA_SC_VPORT_ZMAX_6 = 0x28304, // SAME + EG_PA_SC_VPORT_ZMIN_7 = 0x28308, // SAME + EG_PA_SC_VPORT_ZMAX_7 = 0x2830C, // SAME + EG_PA_SC_VPORT_ZMIN_8 = 0x28310, // SAME + EG_PA_SC_VPORT_ZMAX_8 = 0x28314, // SAME + EG_PA_SC_VPORT_ZMIN_9 = 0x28318, // SAME + EG_PA_SC_VPORT_ZMAX_9 = 0x2831C, // SAME + EG_PA_SC_VPORT_ZMIN_10 = 0x28320, // SAME + EG_PA_SC_VPORT_ZMAX_10 = 0x28324, // SAME + EG_PA_SC_VPORT_ZMIN_11 = 0x28328, // SAME + EG_PA_SC_VPORT_ZMAX_11 = 0x2832C, // SAME + EG_PA_SC_VPORT_ZMIN_12 = 0x28330, // SAME + EG_PA_SC_VPORT_ZMAX_12 = 0x28334, // SAME + EG_PA_SC_VPORT_ZMIN_13 = 0x28338, // SAME + EG_PA_SC_VPORT_ZMAX_13 = 0x2833C, // SAME + EG_PA_SC_VPORT_ZMIN_14 = 0x28340, // SAME + EG_PA_SC_VPORT_ZMAX_14 = 0x28344, // SAME + EG_PA_SC_VPORT_ZMIN_15 = 0x28348, // SAME + EG_PA_SC_VPORT_ZMAX_15 = 0x2834C, // SAME + EG_PA_CL_VPORT_XSCALE = 0x2843C, // SAME + EG_PA_CL_VPORT_XOFFSET = 0x28440, // SAME + EG_PA_CL_VPORT_YSCALE = 0x28444, // SAME + EG_PA_CL_VPORT_YOFFSET = 0x28448, // SAME + EG_PA_CL_VPORT_ZSCALE = 0x2844C, // SAME + EG_PA_CL_VPORT_ZOFFSET = 0x28450, // SAME + EG_PA_CL_VPORT_XSCALE_1 = 0x28454, // SAME + EG_PA_CL_VPORT_XOFFSET_1 = 0x28458, // SAME + EG_PA_CL_VPORT_YSCALE_1 = 0x2845C, // SAME + EG_PA_CL_VPORT_YOFFSET_1 = 0x28460, // SAME + EG_PA_CL_VPORT_ZSCALE_1 = 0x28464, // SAME + EG_PA_CL_VPORT_ZOFFSET_1 = 0x28468, // SAME + EG_PA_CL_VPORT_XSCALE_2 = 0x2846C, // SAME + EG_PA_CL_VPORT_XOFFSET_2 = 0x28470, // SAME + EG_PA_CL_VPORT_YSCALE_2 = 0x28474, // SAME + EG_PA_CL_VPORT_YOFFSET_2 = 0x28478, // SAME + EG_PA_CL_VPORT_ZSCALE_2 = 0x2847C, // SAME + EG_PA_CL_VPORT_ZOFFSET_2 = 0x28480, // SAME + EG_PA_CL_VPORT_XSCALE_3 = 0x28484, // SAME + EG_PA_CL_VPORT_XOFFSET_3 = 0x28488, // SAME + EG_PA_CL_VPORT_YSCALE_3 = 0x2848C, // SAME + EG_PA_CL_VPORT_YOFFSET_3 = 0x28490, // SAME + EG_PA_CL_VPORT_ZSCALE_3 = 0x28494, // SAME + EG_PA_CL_VPORT_ZOFFSET_3 = 0x28498, // SAME + EG_PA_CL_VPORT_XSCALE_4 = 0x2849C, // SAME + EG_PA_CL_VPORT_XOFFSET_4 = 0x284A0, // SAME + EG_PA_CL_VPORT_YSCALE_4 = 0x284A4, // SAME + EG_PA_CL_VPORT_YOFFSET_4 = 0x284A8, // SAME + EG_PA_CL_VPORT_ZSCALE_4 = 0x284AC, // SAME + EG_PA_CL_VPORT_ZOFFSET_4 = 0x284B0, // SAME + EG_PA_CL_VPORT_XSCALE_5 = 0x284B4, // SAME + EG_PA_CL_VPORT_XOFFSET_5 = 0x284B8, // SAME + EG_PA_CL_VPORT_YSCALE_5 = 0x284BC, // SAME + EG_PA_CL_VPORT_YOFFSET_5 = 0x284C0, // SAME + EG_PA_CL_VPORT_ZSCALE_5 = 0x284C4, // SAME + EG_PA_CL_VPORT_ZOFFSET_5 = 0x284C8, // SAME + EG_PA_CL_VPORT_XSCALE_6 = 0x284CC, // SAME + EG_PA_CL_VPORT_XOFFSET_6 = 0x284D0, // SAME + EG_PA_CL_VPORT_YSCALE_6 = 0x284D4, // SAME + EG_PA_CL_VPORT_YOFFSET_6 = 0x284D8, // SAME + EG_PA_CL_VPORT_ZSCALE_6 = 0x284DC, // SAME + EG_PA_CL_VPORT_ZOFFSET_6 = 0x284E0, // SAME + EG_PA_CL_VPORT_XSCALE_7 = 0x284E4, // SAME + EG_PA_CL_VPORT_XOFFSET_7 = 0x284E8, // SAME + EG_PA_CL_VPORT_YSCALE_7 = 0x284EC, // SAME + EG_PA_CL_VPORT_YOFFSET_7 = 0x284F0, // SAME + EG_PA_CL_VPORT_ZSCALE_7 = 0x284F4, // SAME + EG_PA_CL_VPORT_ZOFFSET_7 = 0x284F8, // SAME + EG_PA_CL_VPORT_XSCALE_8 = 0x284FC, // SAME + EG_PA_CL_VPORT_XOFFSET_8 = 0x28500, // SAME + EG_PA_CL_VPORT_YSCALE_8 = 0x28504, // SAME + EG_PA_CL_VPORT_YOFFSET_8 = 0x28508, // SAME + EG_PA_CL_VPORT_ZSCALE_8 = 0x2850C, // SAME + EG_PA_CL_VPORT_ZOFFSET_8 = 0x28510, // SAME + EG_PA_CL_VPORT_XSCALE_9 = 0x28514, // SAME + EG_PA_CL_VPORT_XOFFSET_9 = 0x28518, // SAME + EG_PA_CL_VPORT_YSCALE_9 = 0x2851C, // SAME + EG_PA_CL_VPORT_YOFFSET_9 = 0x28520, // SAME + EG_PA_CL_VPORT_ZSCALE_9 = 0x28524, // SAME + EG_PA_CL_VPORT_ZOFFSET_9 = 0x28528, // SAME + EG_PA_CL_VPORT_XSCALE_10 = 0x2852C, // SAME + EG_PA_CL_VPORT_XOFFSET_10 = 0x28530, // SAME + EG_PA_CL_VPORT_YSCALE_10 = 0x28534, // SAME + EG_PA_CL_VPORT_YOFFSET_10 = 0x28538, // SAME + EG_PA_CL_VPORT_ZSCALE_10 = 0x2853C, // SAME + EG_PA_CL_VPORT_ZOFFSET_10 = 0x28540, // SAME + EG_PA_CL_VPORT_XSCALE_11 = 0x28544, // SAME + EG_PA_CL_VPORT_XOFFSET_11 = 0x28548, // SAME + EG_PA_CL_VPORT_YSCALE_11 = 0x2854C, // SAME + EG_PA_CL_VPORT_YOFFSET_11 = 0x28550, // SAME + EG_PA_CL_VPORT_ZSCALE_11 = 0x28554, // SAME + EG_PA_CL_VPORT_ZOFFSET_11 = 0x28558, // SAME + EG_PA_CL_VPORT_XSCALE_12 = 0x2855C, // SAME + EG_PA_CL_VPORT_XOFFSET_12 = 0x28560, // SAME + EG_PA_CL_VPORT_YSCALE_12 = 0x28564, // SAME + EG_PA_CL_VPORT_YOFFSET_12 = 0x28568, // SAME + EG_PA_CL_VPORT_ZSCALE_12 = 0x2856C, // SAME + EG_PA_CL_VPORT_ZOFFSET_12 = 0x28570, // SAME + EG_PA_CL_VPORT_XSCALE_13 = 0x28574, // SAME + EG_PA_CL_VPORT_XOFFSET_13 = 0x28578, // SAME + EG_PA_CL_VPORT_YSCALE_13 = 0x2857C, // SAME + EG_PA_CL_VPORT_YOFFSET_13 = 0x28580, // SAME + EG_PA_CL_VPORT_ZSCALE_13 = 0x28584, // SAME + EG_PA_CL_VPORT_ZOFFSET_13 = 0x28588, // SAME + EG_PA_CL_VPORT_XSCALE_14 = 0x2858C, // SAME + EG_PA_CL_VPORT_XOFFSET_14 = 0x28590, // SAME + EG_PA_CL_VPORT_YSCALE_14 = 0x28594, // SAME + EG_PA_CL_VPORT_YOFFSET_14 = 0x28598, // SAME + EG_PA_CL_VPORT_ZSCALE_14 = 0x2859C, // SAME + EG_PA_CL_VPORT_ZOFFSET_14 = 0x285A0, // SAME + EG_PA_CL_VPORT_XSCALE_15 = 0x285A4, // SAME + EG_PA_CL_VPORT_XOFFSET_15 = 0x285A8, // SAME + EG_PA_CL_VPORT_YSCALE_15 = 0x285AC, // SAME + EG_PA_CL_VPORT_YOFFSET_15 = 0x285B0, // SAME + EG_PA_CL_VPORT_ZSCALE_15 = 0x285B4, // SAME + EG_PA_CL_VPORT_ZOFFSET_15 = 0x285B8, // SAME + EG_PA_CL_UCP_0_X = 0x285BC, // SAME 0x28E20 + EG_PA_CL_UCP_0_Y = 0x285C0, // SAME 0x28E24 + EG_PA_CL_UCP_0_Z = 0x285C4, // SAME 0x28E28 + EG_PA_CL_UCP_0_W = 0x285C8, // SAME 0x28E2C + EG_PA_CL_UCP_1_X = 0x285CC, // SAME 0x28E30 + EG_PA_CL_UCP_1_Y = 0x285D0, // SAME 0x28E34 + EG_PA_CL_UCP_1_Z = 0x285D4, // SAME 0x28E38 + EG_PA_CL_UCP_1_W = 0x285D8, // SAME 0x28E3C + EG_PA_CL_UCP_2_X = 0x285DC, // SAME 0x28E40 + EG_PA_CL_UCP_2_Y = 0x285E0, // SAME 0x28E44 + EG_PA_CL_UCP_2_Z = 0x285E4, // SAME 0x28E48 + EG_PA_CL_UCP_2_W = 0x285E8, // SAME 0x28E4C + EG_PA_CL_UCP_3_X = 0x285EC, // SAME 0x28E50 + EG_PA_CL_UCP_3_Y = 0x285F0, // SAME 0x28E54 + EG_PA_CL_UCP_3_Z = 0x285F4, // SAME 0x28E58 + EG_PA_CL_UCP_3_W = 0x285F8, // SAME 0x28E5C + EG_PA_CL_UCP_4_X = 0x285FC, // SAME 0x28E60 + EG_PA_CL_UCP_4_Y = 0x28600, // SAME 0x28E64 + EG_PA_CL_UCP_4_Z = 0x28604, // SAME 0x28E68 + EG_PA_CL_UCP_4_W = 0x28608, // SAME 0x28E6C + EG_PA_CL_UCP_5_X = 0x2860C, // SAME 0x28E70 + EG_PA_CL_UCP_5_Y = 0x28610, // SAME 0x28E74 + EG_PA_CL_UCP_5_Z = 0x28614, // SAME 0x28E78 + EG_PA_CL_UCP_5_W = 0x28618, // SAME 0x28E7C + EG_PA_CL_POINT_X_RAD = 0x287D4, // SAME 0x28E10 + EG_PA_CL_POINT_Y_RAD = 0x287D8, // SAME 0x28E14 + EG_PA_CL_POINT_SIZE = 0x287DC, // SAME 0x28E18 + EG_PA_CL_POINT_CULL_RAD = 0x287E0, // SAME 0x28E1C + EG_PA_CL_CLIP_CNTL = 0x28810, // SAME + EG_PA_SU_SC_MODE_CNTL = 0x28814, // SAME + EG_PA_CL_VTE_CNTL = 0x28818, // SAME + EG_PA_CL_VS_OUT_CNTL = 0x2881C, // SAME + EG_PA_CL_NANINF_CNTL = 0x28820, // SAME + EG_PA_SU_LINE_STIPPLE_CNTL = 0x28824, // + EG_PA_SU_LINE_STIPPLE_SCALE = 0x28828, // + EG_PA_SU_PRIM_FILTER_CNTL = 0x2882C, // + EG_PA_SU_POINT_SIZE = 0x28A00, // SAME + EG_PA_SU_POINT_MINMAX = 0x28A04, // SAME + EG_PA_SU_LINE_CNTL = 0x28A08, // SAME + EG_PA_SC_LINE_STIPPLE = 0x28A0C, // SAME + EG_PA_SC_MODE_CNTL_0 = 0x28A48, // + EG_PA_SC_MODE_CNTL_1 = 0x28A4C, // + EG_PA_SU_POLY_OFFSET_DB_FMT_CNTL = 0x28B78, // SAME 0x28DF8 + EG_PA_SU_POLY_OFFSET_CLAMP = 0x28B7C, // SAME 0x28DFC + EG_PA_SU_POLY_OFFSET_FRONT_SCALE = 0x28B80, // SAME 0x28E00 + EG_PA_SU_POLY_OFFSET_FRONT_OFFSET = 0x28B84, // SAME 0x28E04 + EG_PA_SU_POLY_OFFSET_BACK_SCALE = 0x28B88, // SAME 0x28E08 + EG_PA_SU_POLY_OFFSET_BACK_OFFSET = 0x28B8C, // SAME 0x28E0C + EG_PA_SC_LINE_CNTL = 0x28C00, // DIFF + EG_PA_SC_AA_CONFIG = 0x28C04, // SAME + EG_PA_SU_VTX_CNTL = 0x28C08, // SAME + EG_PA_CL_GB_VERT_CLIP_ADJ = 0x28C0C, // SAME + EG_PA_CL_GB_VERT_DISC_ADJ = 0x28C10, // SAME + EG_PA_CL_GB_HORZ_CLIP_ADJ = 0x28C14, // SAME + EG_PA_CL_GB_HORZ_DISC_ADJ = 0x28C18, // SAME + EG_PA_SC_AA_SAMPLE_LOCS_0 = 0x28C1C, // + EG_PA_SC_AA_SAMPLE_LOCS_1 = 0x28C20, // + EG_PA_SC_AA_SAMPLE_LOCS_2 = 0x28C24, // + EG_PA_SC_AA_SAMPLE_LOCS_3 = 0x28C28, // + EG_PA_SC_AA_SAMPLE_LOCS_4 = 0x28C2C, // + EG_PA_SC_AA_SAMPLE_LOCS_5 = 0x28C30, // + EG_PA_SC_AA_SAMPLE_LOCS_6 = 0x28C34, // + EG_PA_SC_AA_SAMPLE_LOCS_7 = 0x28C38, // + EG_PA_SC_AA_MASK = 0x28C3C, // SAME 0x28C48 + +/* Registers from VGT block: */ + EG_VGT_INDEX_TYPE = 0x895C, //? config space + EG_VGT_PRIMITIVE_TYPE = 0x8958, //? config space + + EG_VGT_MAX_VTX_INDX = 0x28400, // SAME + EG_VGT_MIN_VTX_INDX = 0x28404, // SAME + EG_VGT_INDX_OFFSET = 0x28408, // SAME + EG_VGT_MULTI_PRIM_IB_RESET_INDX = 0x2840C, // SAME + EG_CS_COPY_STATE = 0x287CC, // + EG_GFX_COPY_STATE = 0x287D0, // SAME + EG_VGT_DMA_BASE_HI = 0x287E4, // SAME + EG_VGT_DMA_BASE = 0x287E8, // SAME + EG_VGT_DRAW_INITIATOR = 0x287F0, // SAME + EG_VGT_IMMED_DATA = 0x287F4, // SAME + EG_VGT_EVENT_ADDRESS_REG = 0x287F8, // SAME + EG_VGT_OUTPUT_PATH_CNTL = 0x28A10, // DIFF + EG_VGT_HOS_CNTL = 0x28A14, // SAME + EG_VGT_HOS_MAX_TESS_LEVEL = 0x28A18, // SAME + EG_VGT_HOS_MIN_TESS_LEVEL = 0x28A1C, // SAME + EG_VGT_HOS_REUSE_DEPTH = 0x28A20, // SAME + EG_VGT_GROUP_PRIM_TYPE = 0x28A24, // SAME + EG_VGT_GROUP_FIRST_DECR = 0x28A28, // SAME + EG_VGT_GROUP_DECR = 0x28A2C, // SAME + EG_VGT_GROUP_VECT_0_CNTL = 0x28A30, // SAME + EG_VGT_GROUP_VECT_1_CNTL = 0x28A34, // SAME + EG_VGT_GROUP_VECT_0_FMT_CNTL = 0x28A38, // SAME + EG_VGT_GROUP_VECT_1_FMT_CNTL = 0x28A3C, // SAME + EG_VGT_GS_MODE = 0x28A40, // DIFF + EG_VGT_ENHANCE = 0x28A50, // DIFF + EG_VGT_GS_PER_ES = 0x28A54, // DIFF 0x88C8 + EG_VGT_ES_PER_GS = 0x28A58, // DIFF 0x88CC + EG_VGT_GS_PER_VS = 0x28A5C, // SAME 0x88E8 + EG_VGT_GS_OUT_PRIM_TYPE = 0x28A6C, // SAME + EG_VGT_DMA_SIZE = 0x28A74, // SAME + EG_VGT_DMA_MAX_SIZE = 0x28A78, // SAME + EG_VGT_DMA_INDEX_TYPE = 0x28A7C, // SAME + EG_VGT_PRIMITIVEID_EN = 0x28A84, // SAME + EG_VGT_DMA_NUM_INSTANCES = 0x28A88, // SAME + EG_VGT_EVENT_INITIATOR = 0x28A90, // SAME + EG_VGT_MULTI_PRIM_IB_RESET_EN = 0x28A94, // SAME + EG_VGT_INSTANCE_STEP_RATE_0 = 0x28AA0, // SAME + EG_VGT_INSTANCE_STEP_RATE_1 = 0x28AA4, // SAME + EG_VGT_REUSE_OFF = 0x28AB4, // SAME + EG_VGT_VTX_CNT_EN = 0x28AB8, // SAME + EG_VGT_STRMOUT_BUFFER_SIZE_0 = 0x28AD0, // SAME + EG_VGT_STRMOUT_VTX_STRIDE_0 = 0x28AD4, // SAME + EG_VGT_STRMOUT_BUFFER_BASE_0 = 0x28AD8, // SAME + EG_VGT_STRMOUT_BUFFER_OFFSET_0 = 0x28ADC, // SAME + EG_VGT_STRMOUT_BUFFER_SIZE_1 = 0x28AE0, // SAME + EG_VGT_STRMOUT_VTX_STRIDE_1 = 0x28AE4, // SAME + EG_VGT_STRMOUT_BUFFER_BASE_1 = 0x28AE8, // SAME + EG_VGT_STRMOUT_BUFFER_OFFSET_1 = 0x28AEC, // SAME + EG_VGT_STRMOUT_BUFFER_SIZE_2 = 0x28AF0, // SAME + EG_VGT_STRMOUT_VTX_STRIDE_2 = 0x28AF4, // SAME + EG_VGT_STRMOUT_BUFFER_BASE_2 = 0x28AF8, // SAME + EG_VGT_STRMOUT_BUFFER_OFFSET_2 = 0x28AFC, // SAME + EG_VGT_STRMOUT_BUFFER_SIZE_3 = 0x28B00, // SAME + EG_VGT_STRMOUT_VTX_STRIDE_3 = 0x28B04, // SAME + EG_VGT_STRMOUT_BUFFER_BASE_3 = 0x28B08, // SAME + EG_VGT_STRMOUT_BUFFER_OFFSET_3 = 0x28B0C, // SAME + EG_VGT_STRMOUT_BASE_OFFSET_0 = 0x28B10, // SAME + EG_VGT_STRMOUT_BASE_OFFSET_1 = 0x28B14, // SAME + EG_VGT_STRMOUT_BASE_OFFSET_2 = 0x28B18, // SAME + EG_VGT_STRMOUT_BASE_OFFSET_3 = 0x28B1C, // SAME + EG_VGT_STRMOUT_DRAW_OPAQUE_OFFSET = 0x28B28, // SAME + EG_VGT_STRMOUT_DRAW_OPAQUE_BUFFER_FILLED_SIZE = 0x28B2C, // SAME + EG_VGT_STRMOUT_DRAW_OPAQUE_VERTEX_STRIDE = 0x28B30, // DIFF + EG_VGT_GS_MAX_VERT_OUT = 0x28B38, // SAME + EG_VGT_STRMOUT_BASE_OFFSET_HI_0 = 0x28B44, // SAME + EG_VGT_STRMOUT_BASE_OFFSET_HI_1 = 0x28B48, // SAME + EG_VGT_STRMOUT_BASE_OFFSET_HI_2 = 0x28B4C, // SAME + EG_VGT_STRMOUT_BASE_OFFSET_HI_3 = 0x28B50, // SAME + EG_VGT_SHADER_STAGES_EN = 0x28B54, // + EG_VGT_LS_HS_CONFIG = 0x28B58, // + EG_VGT_LS_SIZE = 0x28B5C, // + EG_VGT_HS_SIZE = 0x28B60, // + EG_VGT_LS_HS_ALLOC = 0x28B64, // + EG_VGT_HS_PATCH_CONST = 0x28B68, // + EG_VGT_TF_PARAM = 0x28B6C, // + EG_VGT_DISPATCH_INITIATOR = 0x28B74, // + EG_VGT_GS_INSTANCE_CNT = 0x28B90, // + EG_VGT_STRMOUT_CONFIG = 0x28B94, // + EG_VGT_STRMOUT_BUFFER_CONFIG = 0x28B98, // + EG_VGT_VERTEX_REUSE_BLOCK_CNTL = 0x28C58, // SAME + EG_VGT_OUT_DEALLOC_CNTL = 0x28C5C, // SAME + +/* Registers from TP block: */ + EG_GDS_ADDR_BASE = 0x28720, // + EG_GDS_ADDR_SIZE = 0x28724, // + EG_GDS_ORDERED_WAVE_PER_SE = 0x28728, // + EG_GDS_APPEND_CONSUME_UAV0 = 0x2872C, // + EG_GDS_APPEND_CONSUME_UAV1 = 0x28730, // + EG_GDS_APPEND_CONSUME_UAV2 = 0x28734, // + EG_GDS_APPEND_CONSUME_UAV3 = 0x28738, // + EG_GDS_APPEND_CONSUME_UAV4 = 0x2873C, // + EG_GDS_APPEND_CONSUME_UAV5 = 0x28740, // + EG_GDS_APPEND_CONSUME_UAV6 = 0x28744, // + EG_GDS_APPEND_CONSUME_UAV7 = 0x28748, // + EG_GDS_APPEND_CONSUME_UAV8 = 0x2874C, // + EG_GDS_APPEND_CONSUME_UAV9 = 0x28750, // + EG_GDS_APPEND_CONSUME_UAV10 = 0x28754, // + EG_GDS_APPEND_CONSUME_UAV11 = 0x28758, // + +/* Registers from SQ block: */ + EG_SQ_LOOP_CONST_0 = 0x3A200, // 0x3E200 + EG_SQ_ALU_CONST_BUFFER_SIZE_VS_0 = 0x28180, // ? + EG_SQ_VTX_SEMANTIC_0 = 0x28380, // SAME + EG_SQ_VTX_SEMANTIC_1 = 0x28384, // SAME + EG_SQ_VTX_SEMANTIC_2 = 0x28388, // SAME + EG_SQ_VTX_SEMANTIC_3 = 0x2838C, // SAME + EG_SQ_VTX_SEMANTIC_4 = 0x28390, // SAME + EG_SQ_VTX_SEMANTIC_5 = 0x28394, // SAME + EG_SQ_VTX_SEMANTIC_6 = 0x28398, // SAME + EG_SQ_VTX_SEMANTIC_7 = 0x2839C, // SAME + EG_SQ_VTX_SEMANTIC_8 = 0x283A0, // SAME + EG_SQ_VTX_SEMANTIC_9 = 0x283A4, // SAME + EG_SQ_VTX_SEMANTIC_10 = 0x283A8, // SAME + EG_SQ_VTX_SEMANTIC_11 = 0x283AC, // SAME + EG_SQ_VTX_SEMANTIC_12 = 0x283B0, // SAME + EG_SQ_VTX_SEMANTIC_13 = 0x283B4, // SAME + EG_SQ_VTX_SEMANTIC_14 = 0x283B8, // SAME + EG_SQ_VTX_SEMANTIC_15 = 0x283BC, // SAME + EG_SQ_VTX_SEMANTIC_16 = 0x283C0, // SAME + EG_SQ_VTX_SEMANTIC_17 = 0x283C4, // SAME + EG_SQ_VTX_SEMANTIC_18 = 0x283C8, // SAME + EG_SQ_VTX_SEMANTIC_19 = 0x283CC, // SAME + EG_SQ_VTX_SEMANTIC_20 = 0x283D0, // SAME + EG_SQ_VTX_SEMANTIC_21 = 0x283D4, // SAME + EG_SQ_VTX_SEMANTIC_22 = 0x283D8, // SAME + EG_SQ_VTX_SEMANTIC_23 = 0x283DC, // SAME + EG_SQ_VTX_SEMANTIC_24 = 0x283E0, // SAME + EG_SQ_VTX_SEMANTIC_25 = 0x283E4, // SAME + EG_SQ_VTX_SEMANTIC_26 = 0x283E8, // SAME + EG_SQ_VTX_SEMANTIC_27 = 0x283EC, // SAME + EG_SQ_VTX_SEMANTIC_28 = 0x283F0, // SAME + EG_SQ_VTX_SEMANTIC_29 = 0x283F4, // SAME + EG_SQ_VTX_SEMANTIC_30 = 0x283F8, // SAME + EG_SQ_VTX_SEMANTIC_31 = 0x283FC, // SAME + EG_SQ_LSTMP_RING_ITEMSIZE = 0x28830, // + EG_SQ_HSTMP_RING_ITEMSIZE = 0x28834, // + EG_SQ_DYN_GPR_RESOURCE_LIMIT_1 = 0x28838, // + EG_SQ_PGM_START_PS = 0x28840, // SAME + EG_SQ_PGM_RESOURCES_PS = 0x28844, // DIFF 0x28850 + EG_SQ_PGM_RESOURCES_2_PS = 0x28848, // + EG_SQ_PGM_EXPORTS_PS = 0x2884C, // SAME 0x28854 + EG_SQ_PGM_START_VS = 0x2885C, // SAME 0x28858 + EG_SQ_PGM_RESOURCES_VS = 0x28860, // DIFF 0x28868 + EG_SQ_PGM_RESOURCES_2_VS = 0x28864, // + EG_SQ_PGM_START_GS = 0x28874, // SAME 0x2886C + EG_SQ_PGM_RESOURCES_GS = 0x28878, // DIFF 0x2887C + EG_SQ_PGM_RESOURCES_2_GS = 0x2887C, // + EG_SQ_PGM_START_ES = 0x2888C, // SAME 0x28880 + EG_SQ_PGM_RESOURCES_ES = 0x28890, // DIFF + EG_SQ_PGM_RESOURCES_2_ES = 0x28894, // + EG_SQ_PGM_START_FS = 0x288A4, // SAME 0x28894 + EG_SQ_PGM_RESOURCES_FS = 0x288A8, // DIFF 0x288A4 + EG_SQ_PGM_START_HS = 0x288B8, // + EG_SQ_PGM_RESOURCES_HS = 0x288BC, // + EG_SQ_PGM_RESOURCES_2_HS = 0x288C0, // + EG_SQ_PGM_START_LS = 0x288D0, // + EG_SQ_PGM_RESOURCES_LS = 0x288D4, // + EG_SQ_PGM_RESOURCES_2_LS = 0x288D8, // + EG_SQ_THREAD_TRACE_USERDATA = 0x288DC, // + EG_SQ_LDS_ALLOC = 0x288E8, // + EG_SQ_LDS_ALLOC_PS = 0x288EC, // + EG_SQ_VTX_SEMANTIC_CLEAR = 0x288F0, // SAME 0x288E0 + EG_SQ_THREAD_TRACE_CTRL = 0x288F8, // + EG_SQ_ESGS_RING_ITEMSIZE = 0x28900, // SAME 0x288A8 + EG_SQ_GSVS_RING_ITEMSIZE = 0x28904, // SAME 0x288AC + EG_SQ_ESTMP_RING_ITEMSIZE = 0x28908, // SAME 0x288B0 + EG_SQ_GSTMP_RING_ITEMSIZE = 0x2890C, // SAME 0x288B4 + EG_SQ_VSTMP_RING_ITEMSIZE = 0x28910, // SAME 0x288B8 + EG_SQ_PSTMP_RING_ITEMSIZE = 0x28914, // SAME 0x288BC + EG_SQ_GS_VERT_ITEMSIZE = 0x2891C, // SAME 0x288C8 + EG_SQ_GS_VERT_ITEMSIZE_1 = 0x28920, // + EG_SQ_GS_VERT_ITEMSIZE_2 = 0x28924, // + EG_SQ_GS_VERT_ITEMSIZE_3 = 0x28928, // + EG_SQ_GSVS_RING_OFFSET_1 = 0x2892C, // + EG_SQ_GSVS_RING_OFFSET_2 = 0x28930, // + EG_SQ_GSVS_RING_OFFSET_3 = 0x28934, // + EG_SQ_ALU_CONST_CACHE_PS_0 = 0x28940, // SAME + EG_SQ_ALU_CONST_CACHE_PS_1 = 0x28944, // SAME + EG_SQ_ALU_CONST_CACHE_PS_2 = 0x28948, // SAME + EG_SQ_ALU_CONST_CACHE_PS_3 = 0x2894C, // SAME + EG_SQ_ALU_CONST_CACHE_PS_4 = 0x28950, // SAME + EG_SQ_ALU_CONST_CACHE_PS_5 = 0x28954, // SAME + EG_SQ_ALU_CONST_CACHE_PS_6 = 0x28958, // SAME + EG_SQ_ALU_CONST_CACHE_PS_7 = 0x2895C, // SAME + EG_SQ_ALU_CONST_CACHE_PS_8 = 0x28960, // SAME + EG_SQ_ALU_CONST_CACHE_PS_9 = 0x28964, // SAME + EG_SQ_ALU_CONST_CACHE_PS_10 = 0x28968, // SAME + EG_SQ_ALU_CONST_CACHE_PS_11 = 0x2896C, // SAME + EG_SQ_ALU_CONST_CACHE_PS_12 = 0x28970, // SAME + EG_SQ_ALU_CONST_CACHE_PS_13 = 0x28974, // SAME + EG_SQ_ALU_CONST_CACHE_PS_14 = 0x28978, // SAME + EG_SQ_ALU_CONST_CACHE_PS_15 = 0x2897C, // SAME + EG_SQ_ALU_CONST_CACHE_VS_0 = 0x28980, // SAME + EG_SQ_ALU_CONST_CACHE_VS_1 = 0x28984, // SAME + EG_SQ_ALU_CONST_CACHE_VS_2 = 0x28988, // SAME + EG_SQ_ALU_CONST_CACHE_VS_3 = 0x2898C, // SAME + EG_SQ_ALU_CONST_CACHE_VS_4 = 0x28990, // SAME + EG_SQ_ALU_CONST_CACHE_VS_5 = 0x28994, // SAME + EG_SQ_ALU_CONST_CACHE_VS_6 = 0x28998, // SAME + EG_SQ_ALU_CONST_CACHE_VS_7 = 0x2899C, // SAME + EG_SQ_ALU_CONST_CACHE_VS_8 = 0x289A0, // SAME + EG_SQ_ALU_CONST_CACHE_VS_9 = 0x289A4, // SAME + EG_SQ_ALU_CONST_CACHE_VS_10 = 0x289A8, // SAME + EG_SQ_ALU_CONST_CACHE_VS_11 = 0x289AC, // SAME + EG_SQ_ALU_CONST_CACHE_VS_12 = 0x289B0, // SAME + EG_SQ_ALU_CONST_CACHE_VS_13 = 0x289B4, // SAME + EG_SQ_ALU_CONST_CACHE_VS_14 = 0x289B8, // SAME + EG_SQ_ALU_CONST_CACHE_VS_15 = 0x289BC, // SAME + EG_SQ_ALU_CONST_CACHE_GS_0 = 0x289C0, // SAME + EG_SQ_ALU_CONST_CACHE_GS_1 = 0x289C4, // SAME + EG_SQ_ALU_CONST_CACHE_GS_2 = 0x289C8, // SAME + EG_SQ_ALU_CONST_CACHE_GS_3 = 0x289CC, // SAME + EG_SQ_ALU_CONST_CACHE_GS_4 = 0x289D0, // SAME + EG_SQ_ALU_CONST_CACHE_GS_5 = 0x289D4, // SAME + EG_SQ_ALU_CONST_CACHE_GS_6 = 0x289D8, // SAME + EG_SQ_ALU_CONST_CACHE_GS_7 = 0x289DC, // SAME + EG_SQ_ALU_CONST_CACHE_GS_8 = 0x289E0, // SAME + EG_SQ_ALU_CONST_CACHE_GS_9 = 0x289E4, // SAME + EG_SQ_ALU_CONST_CACHE_GS_10 = 0x289E8, // SAME + EG_SQ_ALU_CONST_CACHE_GS_11 = 0x289EC, // SAME + EG_SQ_ALU_CONST_CACHE_GS_12 = 0x289F0, // SAME + EG_SQ_ALU_CONST_CACHE_GS_13 = 0x289F4, // SAME + EG_SQ_ALU_CONST_CACHE_GS_14 = 0x289F8, // SAME + EG_SQ_ALU_CONST_CACHE_GS_15 = 0x289FC, // SAME + EG_SQ_ALU_CONST_CACHE_HS_0 = 0x28F00, // + EG_SQ_ALU_CONST_CACHE_HS_1 = 0x28F04, // + EG_SQ_ALU_CONST_CACHE_HS_2 = 0x28F08, // + EG_SQ_ALU_CONST_CACHE_HS_3 = 0x28F0C, // + EG_SQ_ALU_CONST_CACHE_HS_4 = 0x28F10, // + EG_SQ_ALU_CONST_CACHE_HS_5 = 0x28F14, // + EG_SQ_ALU_CONST_CACHE_HS_6 = 0x28F18, // + EG_SQ_ALU_CONST_CACHE_HS_7 = 0x28F1C, // + EG_SQ_ALU_CONST_CACHE_HS_8 = 0x28F20, // + EG_SQ_ALU_CONST_CACHE_HS_9 = 0x28F24, // + EG_SQ_ALU_CONST_CACHE_HS_10 = 0x28F28, // + EG_SQ_ALU_CONST_CACHE_HS_11 = 0x28F2C, // + EG_SQ_ALU_CONST_CACHE_HS_12 = 0x28F30, // + EG_SQ_ALU_CONST_CACHE_HS_13 = 0x28F34, // + EG_SQ_ALU_CONST_CACHE_HS_14 = 0x28F38, // + EG_SQ_ALU_CONST_CACHE_HS_15 = 0x28F3C, // + EG_SQ_ALU_CONST_CACHE_LS_0 = 0x28F40, // + EG_SQ_ALU_CONST_CACHE_LS_1 = 0x28F44, // + EG_SQ_ALU_CONST_CACHE_LS_2 = 0x28F48, // + EG_SQ_ALU_CONST_CACHE_LS_3 = 0x28F4C, // + EG_SQ_ALU_CONST_CACHE_LS_4 = 0x28F50, // + EG_SQ_ALU_CONST_CACHE_LS_5 = 0x28F54, // + EG_SQ_ALU_CONST_CACHE_LS_6 = 0x28F58, // + EG_SQ_ALU_CONST_CACHE_LS_7 = 0x28F5C, // + EG_SQ_ALU_CONST_CACHE_LS_8 = 0x28F60, // + EG_SQ_ALU_CONST_CACHE_LS_9 = 0x28F64, // + EG_SQ_ALU_CONST_CACHE_LS_10 = 0x28F68, // + EG_SQ_ALU_CONST_CACHE_LS_11 = 0x28F6C, // + EG_SQ_ALU_CONST_CACHE_LS_12 = 0x28F70, // + EG_SQ_ALU_CONST_CACHE_LS_13 = 0x28F74, // + EG_SQ_ALU_CONST_CACHE_LS_14 = 0x28F78, // + EG_SQ_ALU_CONST_CACHE_LS_15 = 0x28F7C, // + EG_SQ_ALU_CONST_BUFFER_SIZE_PS_0 = 0x28140, + EG_SQ_ALU_CONST_BUFFER_SIZE_HS_0 = 0x28F80, // + EG_SQ_ALU_CONST_BUFFER_SIZE_HS_1 = 0x28F84, // + EG_SQ_ALU_CONST_BUFFER_SIZE_HS_2 = 0x28F88, // + EG_SQ_ALU_CONST_BUFFER_SIZE_HS_3 = 0x28F8C, // + EG_SQ_ALU_CONST_BUFFER_SIZE_HS_4 = 0x28F90, // + EG_SQ_ALU_CONST_BUFFER_SIZE_HS_5 = 0x28F94, // + EG_SQ_ALU_CONST_BUFFER_SIZE_HS_6 = 0x28F98, // + EG_SQ_ALU_CONST_BUFFER_SIZE_HS_7 = 0x28F9C, // + EG_SQ_ALU_CONST_BUFFER_SIZE_HS_8 = 0x28FA0, // + EG_SQ_ALU_CONST_BUFFER_SIZE_HS_9 = 0x28FA4, // + EG_SQ_ALU_CONST_BUFFER_SIZE_HS_10 = 0x28FA8, // + EG_SQ_ALU_CONST_BUFFER_SIZE_HS_11 = 0x28FAC, // + EG_SQ_ALU_CONST_BUFFER_SIZE_HS_12 = 0x28FB0, // + EG_SQ_ALU_CONST_BUFFER_SIZE_HS_13 = 0x28FB4, // + EG_SQ_ALU_CONST_BUFFER_SIZE_HS_14 = 0x28FB8, // + EG_SQ_ALU_CONST_BUFFER_SIZE_HS_15 = 0x28FBC, // + EG_SQ_ALU_CONST_BUFFER_SIZE_LS_0 = 0x28FC0, // + EG_SQ_ALU_CONST_BUFFER_SIZE_LS_1 = 0x28FC4, // + EG_SQ_ALU_CONST_BUFFER_SIZE_LS_2 = 0x28FC8, // + EG_SQ_ALU_CONST_BUFFER_SIZE_LS_3 = 0x28FCC, // + EG_SQ_ALU_CONST_BUFFER_SIZE_LS_4 = 0x28FD0, // + EG_SQ_ALU_CONST_BUFFER_SIZE_LS_5 = 0x28FD4, // + EG_SQ_ALU_CONST_BUFFER_SIZE_LS_6 = 0x28FD8, // + EG_SQ_ALU_CONST_BUFFER_SIZE_LS_7 = 0x28FDC, // + EG_SQ_ALU_CONST_BUFFER_SIZE_LS_8 = 0x28FE0, // + EG_SQ_ALU_CONST_BUFFER_SIZE_LS_9 = 0x28FE4, // + EG_SQ_ALU_CONST_BUFFER_SIZE_LS_10 = 0x28FE8, // + EG_SQ_ALU_CONST_BUFFER_SIZE_LS_11 = 0x28FEC, // + EG_SQ_ALU_CONST_BUFFER_SIZE_LS_12 = 0x28FF0, // + EG_SQ_ALU_CONST_BUFFER_SIZE_LS_13 = 0x28FF4, // + EG_SQ_ALU_CONST_BUFFER_SIZE_LS_14 = 0x28FF8, // + EG_SQ_ALU_CONST_BUFFER_SIZE_LS_15 = 0x28FFC, // + +/* Registers from SPI block: */ + EG_SPI_VS_OUT_ID_0 = 0x2861C, // SAME 0x28614 + EG_SPI_VS_OUT_ID_1 = 0x28620, // SAME 0x28618 + EG_SPI_VS_OUT_ID_2 = 0x28624, // SAME 0x2861C + EG_SPI_VS_OUT_ID_3 = 0x28628, // SAME 0x28620 + EG_SPI_VS_OUT_ID_4 = 0x2862C, // SAME 0x28624 + EG_SPI_VS_OUT_ID_5 = 0x28630, // SAME 0x28628 + EG_SPI_VS_OUT_ID_6 = 0x28634, // SAME 0x2862C + EG_SPI_VS_OUT_ID_7 = 0x28638, // SAME 0x28630 + EG_SPI_VS_OUT_ID_8 = 0x2863C, // SAME 0x28634 + EG_SPI_VS_OUT_ID_9 = 0x28640, // SAME 0x28638 + EG_SPI_PS_INPUT_CNTL_0 = 0x28644, // SAME + EG_SPI_PS_INPUT_CNTL_1 = 0x28648, // SAME + EG_SPI_PS_INPUT_CNTL_2 = 0x2864C, // SAME + EG_SPI_PS_INPUT_CNTL_3 = 0x28650, // SAME + EG_SPI_PS_INPUT_CNTL_4 = 0x28654, // SAME + EG_SPI_PS_INPUT_CNTL_5 = 0x28658, // SAME + EG_SPI_PS_INPUT_CNTL_6 = 0x2865C, // SAME + EG_SPI_PS_INPUT_CNTL_7 = 0x28660, // SAME + EG_SPI_PS_INPUT_CNTL_8 = 0x28664, // SAME + EG_SPI_PS_INPUT_CNTL_9 = 0x28668, // SAME + EG_SPI_PS_INPUT_CNTL_10 = 0x2866C, // SAME + EG_SPI_PS_INPUT_CNTL_11 = 0x28670, // SAME + EG_SPI_PS_INPUT_CNTL_12 = 0x28674, // SAME + EG_SPI_PS_INPUT_CNTL_13 = 0x28678, // SAME + EG_SPI_PS_INPUT_CNTL_14 = 0x2867C, // SAME + EG_SPI_PS_INPUT_CNTL_15 = 0x28680, // SAME + EG_SPI_PS_INPUT_CNTL_16 = 0x28684, // SAME + EG_SPI_PS_INPUT_CNTL_17 = 0x28688, // SAME + EG_SPI_PS_INPUT_CNTL_18 = 0x2868C, // SAME + EG_SPI_PS_INPUT_CNTL_19 = 0x28690, // SAME + EG_SPI_PS_INPUT_CNTL_20 = 0x28694, // SAME + EG_SPI_PS_INPUT_CNTL_21 = 0x28698, // SAME + EG_SPI_PS_INPUT_CNTL_22 = 0x2869C, // SAME + EG_SPI_PS_INPUT_CNTL_23 = 0x286A0, // SAME + EG_SPI_PS_INPUT_CNTL_24 = 0x286A4, // SAME + EG_SPI_PS_INPUT_CNTL_25 = 0x286A8, // SAME + EG_SPI_PS_INPUT_CNTL_26 = 0x286AC, // SAME + EG_SPI_PS_INPUT_CNTL_27 = 0x286B0, // SAME + EG_SPI_PS_INPUT_CNTL_28 = 0x286B4, // SAME + EG_SPI_PS_INPUT_CNTL_29 = 0x286B8, // SAME + EG_SPI_PS_INPUT_CNTL_30 = 0x286BC, // SAME + EG_SPI_PS_INPUT_CNTL_31 = 0x286C0, // SAME + EG_SPI_VS_OUT_CONFIG = 0x286C4, // SAME + EG_SPI_THREAD_GROUPING = 0x286C8, // DIFF + EG_SPI_PS_IN_CONTROL_0 = 0x286CC, // SAME + EG_SPI_PS_IN_CONTROL_1 = 0x286D0, // SAME + EG_SPI_INTERP_CONTROL_0 = 0x286D4, // SAME + EG_SPI_INPUT_Z = 0x286D8, // SAME + EG_SPI_FOG_CNTL = 0x286DC, // SAME + EG_SPI_BARYC_CNTL = 0x286E0, // + EG_SPI_PS_IN_CONTROL_2 = 0x286E4, // + EG_SPI_COMPUTE_INPUT_CNTL = 0x286E8, // + EG_SPI_COMPUTE_NUM_THREAD_X = 0x286EC, // + EG_SPI_COMPUTE_NUM_THREAD_Y = 0x286F0, // + EG_SPI_COMPUTE_NUM_THREAD_Z = 0x286F4, // + +/* Registers from SX block: */ + EG_SX_MISC = 0x28350, // SAME + EG_SX_SURFACE_SYNC = 0x28354, // DIFF + EG_SX_ALPHA_TEST_CONTROL = 0x28410, // SAME + EG_SX_ALPHA_REF = 0x28438, // SAME + +/* Registers from DB block: */ + EG_DB_RENDER_CONTROL = 0x28000, // DIFF 0x28D0C + EG_DB_COUNT_CONTROL = 0x28004, // + EG_DB_DEPTH_VIEW = 0x28008, // DIFF 0x28004 + EG_DB_RENDER_OVERRIDE = 0x2800C, // DIFF 0x28D10 + EG_DB_RENDER_OVERRIDE2 = 0x28010, // + EG_DB_HTILE_DATA_BASE = 0x28014, // SAME + + EG_DB_STENCIL_CLEAR = 0x28028, // SAME + EG_DB_DEPTH_CLEAR = 0x2802C, // SAME + + EG_DB_Z_INFO = 0x28040, // + EG_DB_STENCIL_INFO = 0x28044, // + EG_DB_Z_READ_BASE = 0x28048, // + EG_DB_STENCIL_READ_BASE = 0x2804C, // + EG_DB_Z_WRITE_BASE = 0x28050, // + EG_DB_STENCIL_WRITE_BASE = 0x28054, // + EG_DB_DEPTH_SIZE = 0x28058, // DIFF 0x28000 + EG_DB_DEPTH_SLICE = 0x2805C, // + + EG_DB_STENCILREFMASK = 0x28430, // SAME + EG_DB_STENCILREFMASK_BF = 0x28434, // SAME + EG_DB_DEPTH_CONTROL = 0x28800, // SAME + EG_DB_SHADER_CONTROL = 0x2880C, // DIFF + EG_DB_HTILE_SURFACE = 0x28ABC, // SAME 0x28D24 + EG_DB_SRESULTS_COMPARE_STATE0 = 0x28AC0, // SAME 0x28D28 + EG_DB_SRESULTS_COMPARE_STATE1 = 0x28AC4, // SAME 0x28D2C + EG_DB_PRELOAD_CONTROL = 0x28AC8, // SAME 0x28D30 + EG_DB_ALPHA_TO_MASK = 0x28B70, // SAME 0x28D44 + +/* Registers from CB block: */ + EG_CB_TARGET_MASK = 0x28238, // SAME + EG_CB_SHADER_MASK = 0x2823C, // SAME + EG_CB_BLEND_RED = 0x28414, // SAME + EG_CB_BLEND_GREEN = 0x28418, // SAME + EG_CB_BLEND_BLUE = 0x2841C, // SAME + EG_CB_BLEND_ALPHA = 0x28420, // SAME + EG_CB_BLEND0_CONTROL = 0x28780, // DIFF + EG_CB_BLEND1_CONTROL = 0x28784, // DIFF + EG_CB_BLEND2_CONTROL = 0x28788, // DIFF + EG_CB_BLEND3_CONTROL = 0x2878C, // DIFF + EG_CB_BLEND4_CONTROL = 0x28790, // DIFF + EG_CB_BLEND5_CONTROL = 0x28794, // DIFF + EG_CB_BLEND6_CONTROL = 0x28798, // DIFF + EG_CB_BLEND7_CONTROL = 0x2879C, // DIFF + EG_CB_COLOR_CONTROL = 0x28808, // DIFF + EG_CB_IMMED0_BASE = 0x28B9C, // + EG_CB_IMMED1_BASE = 0x28BA0, // + EG_CB_IMMED2_BASE = 0x28BA4, // + EG_CB_IMMED3_BASE = 0x28BA8, // + EG_CB_IMMED4_BASE = 0x28BAC, // + EG_CB_IMMED5_BASE = 0x28BB0, // + EG_CB_IMMED6_BASE = 0x28BB4, // + EG_CB_IMMED7_BASE = 0x28BB8, // + EG_CB_IMMED8_BASE = 0x28BBC, // + EG_CB_IMMED9_BASE = 0x28BC0, // + EG_CB_IMMED10_BASE = 0x28BC4, // + EG_CB_IMMED11_BASE = 0x28BC8, // + EG_CB_CLRCMP_CONTROL = 0x28C40, // SAME 0x28C30 + EG_CB_CLRCMP_SRC = 0x28C44, // SAME 0x28C34 + EG_CB_CLRCMP_DST = 0x28C48, // SAME 0x28C38 + EG_CB_CLRCMP_MSK = 0x28C4C, // SAME 0x28C3C + EG_CB_COLOR0_BASE = 0x28C60, // SAME 0x28040 + EG_CB_COLOR0_PITCH = 0x28C64, // + EG_CB_COLOR0_SLICE = 0x28C68, // + EG_CB_COLOR0_VIEW = 0x28C6C, // SAME 0x28080 + EG_CB_COLOR0_INFO = 0x28C70, // DIFF 0x280A0 + EG_CB_COLOR0_ATTRIB = 0x28C74, // + EG_CB_COLOR0_DIM = 0x28C78, // + EG_CB_COLOR0_CMASK = 0x28C7C, // + EG_CB_COLOR0_CMASK_SLICE = 0x28C80, // + EG_CB_COLOR0_FMASK = 0x28C84, // + EG_CB_COLOR0_FMASK_SLICE = 0x28C88, // + EG_CB_COLOR0_CLEAR_WORD0 = 0x28C8C, // + EG_CB_COLOR0_CLEAR_WORD1 = 0x28C90, // + EG_CB_COLOR0_CLEAR_WORD2 = 0x28C94, // + EG_CB_COLOR0_CLEAR_WORD3 = 0x28C98, // + EG_CB_COLOR1_BASE = 0x28C9C, // SAME 0x28044 + EG_CB_COLOR1_PITCH = 0x28CA0, // + EG_CB_COLOR1_SLICE = 0x28CA4, // + EG_CB_COLOR1_VIEW = 0x28CA8, // SAME 0x28084 + EG_CB_COLOR1_INFO = 0x28CAC, // DIFF 0x280A4 + EG_CB_COLOR1_ATTRIB = 0x28CB0, // + EG_CB_COLOR1_DIM = 0x28CB4, // + EG_CB_COLOR1_CMASK = 0x28CB8, // + EG_CB_COLOR1_CMASK_SLICE = 0x28CBC, // + EG_CB_COLOR1_FMASK = 0x28CC0, // + EG_CB_COLOR1_FMASK_SLICE = 0x28CC4, // + EG_CB_COLOR1_CLEAR_WORD0 = 0x28CC8, // + EG_CB_COLOR1_CLEAR_WORD1 = 0x28CCC, // + EG_CB_COLOR1_CLEAR_WORD2 = 0x28CD0, // + EG_CB_COLOR1_CLEAR_WORD3 = 0x28CD4, // + EG_CB_COLOR2_BASE = 0x28CD8, // SAME 0x28048 + EG_CB_COLOR2_PITCH = 0x28CDC, // + EG_CB_COLOR2_SLICE = 0x28CE0, // + EG_CB_COLOR2_VIEW = 0x28CE4, // SAME 0x28088 + EG_CB_COLOR2_INFO = 0x28CE8, // DIFF 0x280A8 + EG_CB_COLOR2_ATTRIB = 0x28CEC, // + EG_CB_COLOR2_DIM = 0x28CF0, // + EG_CB_COLOR2_CMASK = 0x28CF4, // + EG_CB_COLOR2_CMASK_SLICE = 0x28CF8, // + EG_CB_COLOR2_FMASK = 0x28CFC, // + EG_CB_COLOR2_FMASK_SLICE = 0x28D00, // + EG_CB_COLOR2_CLEAR_WORD0 = 0x28D04, // + EG_CB_COLOR2_CLEAR_WORD1 = 0x28D08, // + EG_CB_COLOR2_CLEAR_WORD2 = 0x28D0C, // + EG_CB_COLOR2_CLEAR_WORD3 = 0x28D10, // + EG_CB_COLOR3_BASE = 0x28D14, // SAME 0x2804C + EG_CB_COLOR3_PITCH = 0x28D18, // + EG_CB_COLOR3_SLICE = 0x28D1C, // + EG_CB_COLOR3_VIEW = 0x28D20, // SAME 0x2808C + EG_CB_COLOR3_INFO = 0x28D24, // DIFF 0x280AC + EG_CB_COLOR3_ATTRIB = 0x28D28, // + EG_CB_COLOR3_DIM = 0x28D2C, // + EG_CB_COLOR3_CMASK = 0x28D30, // + EG_CB_COLOR3_CMASK_SLICE = 0x28D34, // + EG_CB_COLOR3_FMASK = 0x28D38, // + EG_CB_COLOR3_FMASK_SLICE = 0x28D3C, // + EG_CB_COLOR3_CLEAR_WORD0 = 0x28D40, // + EG_CB_COLOR3_CLEAR_WORD1 = 0x28D44, // + EG_CB_COLOR3_CLEAR_WORD2 = 0x28D48, // + EG_CB_COLOR3_CLEAR_WORD3 = 0x28D4C, // + EG_CB_COLOR4_BASE = 0x28D50, // SAME 0x28050 + EG_CB_COLOR4_PITCH = 0x28D54, // + EG_CB_COLOR4_SLICE = 0x28D58, // + EG_CB_COLOR4_VIEW = 0x28D5C, // SAME 0x28090 + EG_CB_COLOR4_INFO = 0x28D60, // DIFF 0x280B0 + EG_CB_COLOR4_ATTRIB = 0x28D64, // + EG_CB_COLOR4_DIM = 0x28D68, // + EG_CB_COLOR4_CMASK = 0x28D6C, // + EG_CB_COLOR4_CMASK_SLICE = 0x28D70, // + EG_CB_COLOR4_FMASK = 0x28D74, // + EG_CB_COLOR4_FMASK_SLICE = 0x28D78, // + EG_CB_COLOR4_CLEAR_WORD0 = 0x28D7C, // + EG_CB_COLOR4_CLEAR_WORD1 = 0x28D80, // + EG_CB_COLOR4_CLEAR_WORD2 = 0x28D84, // + EG_CB_COLOR4_CLEAR_WORD3 = 0x28D88, // + EG_CB_COLOR5_BASE = 0x28D8C, // SAME 0x28054 + EG_CB_COLOR5_PITCH = 0x28D90, // + EG_CB_COLOR5_SLICE = 0x28D94, // + EG_CB_COLOR5_VIEW = 0x28D98, // SAME 0x28094 + EG_CB_COLOR5_INFO = 0x28D9C, // DIFF 0x280B4 + EG_CB_COLOR5_ATTRIB = 0x28DA0, // + EG_CB_COLOR5_DIM = 0x28DA4, // + EG_CB_COLOR5_CMASK = 0x28DA8, // + EG_CB_COLOR5_CMASK_SLICE = 0x28DAC, // + EG_CB_COLOR5_FMASK = 0x28DB0, // + EG_CB_COLOR5_FMASK_SLICE = 0x28DB4, // + EG_CB_COLOR5_CLEAR_WORD0 = 0x28DB8, // + EG_CB_COLOR5_CLEAR_WORD1 = 0x28DBC, // + EG_CB_COLOR5_CLEAR_WORD2 = 0x28DC0, // + EG_CB_COLOR5_CLEAR_WORD3 = 0x28DC4, // + EG_CB_COLOR6_BASE = 0x28DC8, // SAME 0x28058 + EG_CB_COLOR6_PITCH = 0x28DCC, // + EG_CB_COLOR6_SLICE = 0x28DD0, // + EG_CB_COLOR6_VIEW = 0x28DD4, // SAME 0x28098 + EG_CB_COLOR6_INFO = 0x28DD8, // DIFF 0x280B8 + EG_CB_COLOR6_ATTRIB = 0x28DDC, // + EG_CB_COLOR6_DIM = 0x28DE0, // + EG_CB_COLOR6_CMASK = 0x28DE4, // + EG_CB_COLOR6_CMASK_SLICE = 0x28DE8, // + EG_CB_COLOR6_FMASK = 0x28DEC, // + EG_CB_COLOR6_FMASK_SLICE = 0x28DF0, // + EG_CB_COLOR6_CLEAR_WORD0 = 0x28DF4, // + EG_CB_COLOR6_CLEAR_WORD1 = 0x28DF8, // + EG_CB_COLOR6_CLEAR_WORD2 = 0x28DFC, // + EG_CB_COLOR6_CLEAR_WORD3 = 0x28E00, // + EG_CB_COLOR7_BASE = 0x28E04, // SAME 0x2805C + EG_CB_COLOR7_PITCH = 0x28E08, // + EG_CB_COLOR7_SLICE = 0x28E0C, // + EG_CB_COLOR7_VIEW = 0x28E10, // SAME 0x2809C + EG_CB_COLOR7_INFO = 0x28E14, // DIFF 0x280BC + EG_CB_COLOR7_ATTRIB = 0x28E18, // + EG_CB_COLOR7_DIM = 0x28E1C, // + EG_CB_COLOR7_CMASK = 0x28E20, // + EG_CB_COLOR7_CMASK_SLICE = 0x28E24, // + EG_CB_COLOR7_FMASK = 0x28E28, // + EG_CB_COLOR7_FMASK_SLICE = 0x28E2C, // + EG_CB_COLOR7_CLEAR_WORD0 = 0x28E30, // + EG_CB_COLOR7_CLEAR_WORD1 = 0x28E34, // + EG_CB_COLOR7_CLEAR_WORD2 = 0x28E38, // + EG_CB_COLOR7_CLEAR_WORD3 = 0x28E3C, // + EG_CB_COLOR8_BASE = 0x28E40, // + EG_CB_COLOR8_PITCH = 0x28E44, // + EG_CB_COLOR8_SLICE = 0x28E48, // + EG_CB_COLOR8_VIEW = 0x28E4C, // + EG_CB_COLOR8_INFO = 0x28E50, // + EG_CB_COLOR8_ATTRIB = 0x28E54, // + EG_CB_COLOR8_DIM = 0x28E58, // + EG_CB_COLOR9_BASE = 0x28E5C, // + EG_CB_COLOR9_PITCH = 0x28E60, // + EG_CB_COLOR9_SLICE = 0x28E64, // + EG_CB_COLOR9_VIEW = 0x28E68, // + EG_CB_COLOR9_INFO = 0x28E6C, // + EG_CB_COLOR9_ATTRIB = 0x28E70, // + EG_CB_COLOR9_DIM = 0x28E74, // + EG_CB_COLOR10_BASE = 0x28E78, // + EG_CB_COLOR10_PITCH = 0x28E7C, // + EG_CB_COLOR10_SLICE = 0x28E80, // + EG_CB_COLOR10_VIEW = 0x28E84, // + EG_CB_COLOR10_INFO = 0x28E88, // + EG_CB_COLOR10_ATTRIB = 0x28E8C, // + EG_CB_COLOR10_DIM = 0x28E90, // + EG_CB_COLOR11_BASE = 0x28E94, // + EG_CB_COLOR11_PITCH = 0x28E98, // + EG_CB_COLOR11_SLICE = 0x28E9C, // + EG_CB_COLOR11_VIEW = 0x28EA0, // + EG_CB_COLOR11_INFO = 0x28EA4, // + EG_CB_COLOR11_ATTRIB = 0x28EA8, // + EG_CB_COLOR11_DIM = 0x28EAC, // + +/* Registers from CP block: */ + EG_COHER_DEST_BASE_0 = 0x28248, // SAME + EG_COHER_DEST_BASE_1 = 0x2824C, // SAME + EG_CP_PERFMON_CNTX_CNTL = 0x28358, // + +/* Config: */ + EG_SPI_CONFIG_CNTL = 0x9100, // DIFF + EG_SPI_CONFIG_CNTL_1 = 0x913C, // DIFF + EG_CP_PERFMON_CNTL = 0x87FC, // SAME + EG_SQ_MS_FIFO_SIZES = 0x8CF0, // SAME + EG_SQ_CONFIG = 0x8C00, // DIFF + EG_SQ_GPR_RESOURCE_MGMT_1 = 0x8C04, // SAME + EG_SQ_GPR_RESOURCE_MGMT_2 = 0x8C08, // SAME + EG_SQ_THREAD_RESOURCE_MGMT = 0x8C18, // SAME 0x8C0C, + EG_SQ_STACK_RESOURCE_MGMT_1 = 0x8C20, // SAME 0x8C10, + EG_SQ_STACK_RESOURCE_MGMT_2 = 0x8C24, // SAME 0x8C14, + EG_SQ_DYN_GPR_CNTL_PS_FLUSH_REQ = 0x8D8C, // DIFF + EG_SQ_LDS_RESOURCE_MGMT = 0x8E2C, // + EG_SQ_GPR_RESOURCE_MGMT_3 = 0x8C0C, // + EG_SQ_STACK_RESOURCE_MGMT_3 = 0x8C28, // + EG_SQ_THREAD_RESOURCE_MGMT_2 = 0x8C1C, // + EG_VGT_CACHE_INVALIDATION = 0x88C4, // DIFF + EG_VGT_GS_VERTEX_REUSE = 0x88D4, // SAME + EG_PA_SC_FORCE_EOV_MAX_CNTS = 0x8B24, // SAME + EG_PA_SC_LINE_STIPPLE_STATE = 0x8B10, // SAME + EG_PA_CL_ENHANCE = 0x8A14, // SAME + +/* Tex border color */ + EG_TD_PS_BORDER_COLOR_RED = 0xA404, + EG_TD_PS_BORDER_COLOR_GREEN = 0xA408, + EG_TD_PS_BORDER_COLOR_BLUE = 0xA40C, + EG_TD_PS_BORDER_COLOR_ALPHA = 0xA410, + +/* const */ + EG_SQ_VTX_CONSTANT_WORD0_0 = 0x30000, // 0x38000 +}; + +#endif /* _EVERGREEN_OFF_H_ */
\ No newline at end of file diff --git a/src/mesa/drivers/dri/r600/evergreen_oglprog.c b/src/mesa/drivers/dri/r600/evergreen_oglprog.c new file mode 100644 index 00000000000..9fe523234cc --- /dev/null +++ b/src/mesa/drivers/dri/r600/evergreen_oglprog.c @@ -0,0 +1,193 @@ +/* + * Copyright (C) 2008-2009 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +/* + * Authors: + * Richard Li <[email protected]>, <[email protected]> + */ + +#include <string.h> + +#include "main/glheader.h" +#include "main/imports.h" +#include "program/program.h" + +#include "tnl/tnl.h" + +#include "r600_context.h" +#include "r600_emit.h" + +#include "evergreen_oglprog.h" +#include "evergreen_fragprog.h" +#include "evergreen_vertprog.h" + + +static void evergreen_freeVertProgCache(GLcontext *ctx, struct r700_vertex_program_cont *cache) +{ + struct evergreen_vertex_program *tmp, *vp = cache->progs; + + while (vp) { + tmp = vp->next; + /* Release DMA region */ + r600DeleteShader(ctx, vp->shaderbo); + + if(NULL != vp->constbo0) + { + r600DeleteShader(ctx, vp->constbo0); + } + + /* Clean up */ + Clean_Up_Assembler(&(vp->r700AsmCode)); + Clean_Up_Shader(&(vp->r700Shader)); + + _mesa_reference_vertprog(ctx, &vp->mesa_program, NULL); + free(vp); + vp = tmp; + } +} + +static struct gl_program *evergreenNewProgram(GLcontext * ctx, + GLenum target, + GLuint id) +{ + struct gl_program *pProgram = NULL; + + struct evergreen_vertex_program_cont *vpc; + struct evergreen_fragment_program *fp; + + radeon_print(RADEON_SHADER, RADEON_VERBOSE, + "%s %u, %u\n", __func__, target, id); + + switch (target) + { + case GL_VERTEX_STATE_PROGRAM_NV: + case GL_VERTEX_PROGRAM_ARB: + vpc = CALLOC_STRUCT(evergreen_vertex_program_cont); + pProgram = _mesa_init_vertex_program(ctx, + &vpc->mesa_program, + target, + id); + + break; + case GL_FRAGMENT_PROGRAM_NV: + case GL_FRAGMENT_PROGRAM_ARB: + fp = CALLOC_STRUCT(evergreen_fragment_program); + pProgram = _mesa_init_fragment_program(ctx, + &fp->mesa_program, + target, + id); + fp->translated = GL_FALSE; + fp->loaded = GL_FALSE; + + fp->shaderbo = NULL; + + fp->constbo0 = NULL; + + break; + default: + _mesa_problem(ctx, "Bad target in evergreenNewProgram"); + } + + return pProgram; +} + +static void evergreenDeleteProgram(GLcontext * ctx, struct gl_program *prog) +{ + struct evergreen_vertex_program_cont *vpc = (struct evergreen_vertex_program_cont *)prog; + struct evergreen_fragment_program * fp; + + radeon_print(RADEON_SHADER, RADEON_VERBOSE, + "%s %p\n", __func__, prog); + + switch (prog->Target) + { + case GL_VERTEX_STATE_PROGRAM_NV: + case GL_VERTEX_PROGRAM_ARB: + evergreen_freeVertProgCache(ctx, vpc); + break; + case GL_FRAGMENT_PROGRAM_NV: + case GL_FRAGMENT_PROGRAM_ARB: + fp = (struct evergreen_fragment_program*)prog; + /* Release DMA region */ + + r600DeleteShader(ctx, fp->shaderbo); + + if(NULL != fp->constbo0) + { + r600DeleteShader(ctx, fp->constbo0); + } + + /* Clean up */ + Clean_Up_Assembler(&(fp->r700AsmCode)); + Clean_Up_Shader(&(fp->r700Shader)); + break; + default: + _mesa_problem(ctx, "Bad target in evergreenNewProgram"); + } + + _mesa_delete_program(ctx, prog); +} + +static GLboolean +evergreenProgramStringNotify(GLcontext * ctx, GLenum target, struct gl_program *prog) +{ + struct evergreen_vertex_program_cont *vpc = (struct evergreen_vertex_program_cont *)prog; + struct evergreen_fragment_program * fp = (struct evergreen_fragment_program*)prog; + + switch (target) { + case GL_VERTEX_PROGRAM_ARB: + evergreen_freeVertProgCache(ctx, vpc); + vpc->progs = NULL; + break; + case GL_FRAGMENT_PROGRAM_ARB: + r600DeleteShader(ctx, fp->shaderbo); + + if(NULL != fp->constbo0) + { + r600DeleteShader(ctx, fp->constbo0); + fp->constbo0 = NULL; + } + + Clean_Up_Assembler(&(fp->r700AsmCode)); + Clean_Up_Shader(&(fp->r700Shader)); + fp->translated = GL_FALSE; + fp->loaded = GL_FALSE; + fp->shaderbo = NULL; + break; + } + + /* XXX check if program is legal, within limits */ + return GL_TRUE; +} + +static GLboolean evergreenIsProgramNative(GLcontext * ctx, GLenum target, struct gl_program *prog) +{ + + return GL_TRUE; +} + +void evergreenInitShaderFuncs(struct dd_function_table *functions) +{ + functions->NewProgram = evergreenNewProgram; + functions->DeleteProgram = evergreenDeleteProgram; + functions->ProgramStringNotify = evergreenProgramStringNotify; + functions->IsProgramNative = evergreenIsProgramNative; +} diff --git a/src/mesa/drivers/dri/r600/evergreen_oglprog.h b/src/mesa/drivers/dri/r600/evergreen_oglprog.h new file mode 100644 index 00000000000..1cf3e79d05c --- /dev/null +++ b/src/mesa/drivers/dri/r600/evergreen_oglprog.h @@ -0,0 +1,33 @@ +/* + * Copyright (C) 2008-2009 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +/* + * Authors: + * Richard Li <[email protected]>, <[email protected]> + */ + +#ifndef _EVERGREEN_OGLPROG_H_ +#define _EVERGREEN_OGLPROG_H_ +#include "r600_context.h" + +extern void evergreenInitShaderFuncs(struct dd_function_table *functions); + +#endif /*_EVERGREEN_OGLPROG_H_*/ diff --git a/src/mesa/drivers/dri/r600/evergreen_render.c b/src/mesa/drivers/dri/r600/evergreen_render.c new file mode 100644 index 00000000000..85b2f9d6ab7 --- /dev/null +++ b/src/mesa/drivers/dri/r600/evergreen_render.c @@ -0,0 +1,937 @@ +/* + * Copyright (C) 2008-2010 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +/* + * Authors: + * Richard Li <[email protected]>, <[email protected]> + */ + +#include "main/glheader.h" +#include "main/state.h" +#include "main/imports.h" +#include "main/enums.h" +#include "main/macros.h" +#include "main/context.h" +#include "main/dd.h" +#include "main/simple_list.h" +#include "main/api_arrayelt.h" +#include "swrast/swrast.h" +#include "swrast_setup/swrast_setup.h" +#include "vbo/vbo.h" + +#include "tnl/tnl.h" +#include "tnl/t_vp_build.h" +#include "tnl/t_context.h" +#include "tnl/t_vertex.h" +#include "vbo/vbo_context.h" + +#include "r600_context.h" +#include "r600_cmdbuf.h" + +#include "evergreen_fragprog.h" +#include "evergreen_vertprog.h" + +#include "evergreen_state.h" +#include "evergreen_tex.h" + +#include "radeon_buffer_objects.h" +#include "radeon_common_context.h" + +static unsigned int evergreenPrimitiveType(int prim) //same +{ + switch (prim & PRIM_MODE_MASK) + { + case GL_POINTS: + return DI_PT_POINTLIST; + break; + case GL_LINES: + return DI_PT_LINELIST; + break; + case GL_LINE_STRIP: + return DI_PT_LINESTRIP; + break; + case GL_LINE_LOOP: + return DI_PT_LINELOOP; + break; + case GL_TRIANGLES: + return DI_PT_TRILIST; + break; + case GL_TRIANGLE_STRIP: + return DI_PT_TRISTRIP; + break; + case GL_TRIANGLE_FAN: + return DI_PT_TRIFAN; + break; + case GL_QUADS: + return DI_PT_QUADLIST; + break; + case GL_QUAD_STRIP: + return DI_PT_QUADSTRIP; + break; + case GL_POLYGON: + return DI_PT_POLYGON; + break; + default: + assert(0); + return -1; + break; + } +} + +static int evergreenNumVerts(int num_verts, int prim) //same +{ + int verts_off = 0; + + switch (prim & PRIM_MODE_MASK) { + case GL_POINTS: + verts_off = 0; + break; + case GL_LINES: + verts_off = num_verts % 2; + break; + case GL_LINE_STRIP: + if (num_verts < 2) + verts_off = num_verts; + break; + case GL_LINE_LOOP: + if (num_verts < 2) + verts_off = num_verts; + break; + case GL_TRIANGLES: + verts_off = num_verts % 3; + break; + case GL_TRIANGLE_STRIP: + if (num_verts < 3) + verts_off = num_verts; + break; + case GL_TRIANGLE_FAN: + if (num_verts < 3) + verts_off = num_verts; + break; + case GL_QUADS: + verts_off = num_verts % 4; + break; + case GL_QUAD_STRIP: + if (num_verts < 4) + verts_off = num_verts; + else + verts_off = num_verts % 2; + break; + case GL_POLYGON: + if (num_verts < 3) + verts_off = num_verts; + break; + default: + assert(0); + return -1; + break; + } + + return num_verts - verts_off; +} + +static void evergreenRunRenderPrimitive(GLcontext * ctx, int start, int end, int prim) //same +{ + context_t *context = EVERGREEN_CONTEXT(ctx); + BATCH_LOCALS(&context->radeon); + int type, total_emit; + int num_indices; + uint32_t vgt_draw_initiator = 0; + uint32_t vgt_index_type = 0; + uint32_t vgt_primitive_type = 0; + uint32_t vgt_num_indices = 0; + + type = evergreenPrimitiveType(prim); + num_indices = evergreenNumVerts(end - start, prim); + + radeon_print(RADEON_RENDER, RADEON_TRACE, + "%s type %x num_indices %d\n", + __func__, type, num_indices); + + if (type < 0 || num_indices <= 0) + return; + + SETfield(vgt_primitive_type, type, + VGT_PRIMITIVE_TYPE__PRIM_TYPE_shift, VGT_PRIMITIVE_TYPE__PRIM_TYPE_mask); + + SETfield(vgt_index_type, DI_INDEX_SIZE_32_BIT, INDEX_TYPE_shift, INDEX_TYPE_mask); + + if(GL_TRUE != context->ind_buf.is_32bit) + { + SETfield(vgt_index_type, DI_INDEX_SIZE_16_BIT, INDEX_TYPE_shift, INDEX_TYPE_mask); + } + + vgt_num_indices = num_indices; + SETfield(vgt_draw_initiator, DI_SRC_SEL_DMA, SOURCE_SELECT_shift, SOURCE_SELECT_mask); + SETfield(vgt_draw_initiator, DI_MAJOR_MODE_0, MAJOR_MODE_shift, MAJOR_MODE_mask); + + total_emit = 3 /* VGT_PRIMITIVE_TYPE */ + + 2 /* VGT_INDEX_TYPE */ + + 2 /* NUM_INSTANCES */ + + 5 + 2; /* DRAW_INDEX */ + + BEGIN_BATCH_NO_AUTOSTATE(total_emit); + // prim + R600_OUT_BATCH_REGSEQ(VGT_PRIMITIVE_TYPE, 1); + R600_OUT_BATCH(vgt_primitive_type); + // index type + R600_OUT_BATCH(CP_PACKET3(R600_IT_INDEX_TYPE, 0)); + R600_OUT_BATCH(vgt_index_type); + // num instances + R600_OUT_BATCH(CP_PACKET3(R600_IT_NUM_INSTANCES, 0)); + R600_OUT_BATCH(1); + // draw packet + R600_OUT_BATCH(CP_PACKET3(R600_IT_DRAW_INDEX, 3)); + R600_OUT_BATCH(context->ind_buf.bo_offset); + R600_OUT_BATCH(0); + R600_OUT_BATCH(vgt_num_indices); + R600_OUT_BATCH(vgt_draw_initiator); + R600_OUT_BATCH_RELOC(context->ind_buf.bo_offset, + context->ind_buf.bo, + context->ind_buf.bo_offset, + RADEON_GEM_DOMAIN_GTT, 0, 0); + END_BATCH(); + COMMIT_BATCH(); +} + +static void evergreenRunRenderPrimitiveImmediate(GLcontext * ctx, int start, int end, int prim) //same +{ + context_t *context = EVERGREEN_CONTEXT(ctx); + BATCH_LOCALS(&context->radeon); + int type, i; + uint32_t num_indices, total_emit = 0; + uint32_t vgt_draw_initiator = 0; + uint32_t vgt_index_type = 0; + uint32_t vgt_primitive_type = 0; + uint32_t vgt_num_indices = 0; + + type = evergreenPrimitiveType(prim); + num_indices = evergreenNumVerts(end - start, prim); + + radeon_print(RADEON_RENDER, RADEON_TRACE, + "%s type %x num_indices %d\n", + __func__, type, num_indices); + + if (type < 0 || num_indices <= 0) + return; + + SETfield(vgt_primitive_type, type, + VGT_PRIMITIVE_TYPE__PRIM_TYPE_shift, VGT_PRIMITIVE_TYPE__PRIM_TYPE_mask); + + if (num_indices > 0xffff) + { + SETfield(vgt_index_type, DI_INDEX_SIZE_32_BIT, INDEX_TYPE_shift, INDEX_TYPE_mask); + } + else + { + SETfield(vgt_index_type, DI_INDEX_SIZE_16_BIT, INDEX_TYPE_shift, INDEX_TYPE_mask); + } + + vgt_num_indices = num_indices; + SETfield(vgt_draw_initiator, DI_MAJOR_MODE_0, MAJOR_MODE_shift, MAJOR_MODE_mask); + + if (start == 0) + { + SETfield(vgt_draw_initiator, DI_SRC_SEL_AUTO_INDEX, SOURCE_SELECT_shift, SOURCE_SELECT_mask); + } + else + { + if (num_indices > 0xffff) + { + total_emit += num_indices; + } + else + { + total_emit += (num_indices + 1) / 2; + } + SETfield(vgt_draw_initiator, DI_SRC_SEL_IMMEDIATE, SOURCE_SELECT_shift, SOURCE_SELECT_mask); + } + + total_emit += 3 /* VGT_PRIMITIVE_TYPE */ + + 2 /* VGT_INDEX_TYPE */ + + 2 /* NUM_INSTANCES */ + + 3; /* DRAW */ + + BEGIN_BATCH_NO_AUTOSTATE(total_emit); + // prim + R600_OUT_BATCH_REGSEQ(VGT_PRIMITIVE_TYPE, 1); + R600_OUT_BATCH(vgt_primitive_type); + // index type + R600_OUT_BATCH(CP_PACKET3(R600_IT_INDEX_TYPE, 0)); + R600_OUT_BATCH(vgt_index_type); + // num instances + R600_OUT_BATCH(CP_PACKET3(R600_IT_NUM_INSTANCES, 0)); + R600_OUT_BATCH(1); + // draw packet + if(start == 0) + { + R600_OUT_BATCH(CP_PACKET3(R600_IT_DRAW_INDEX_AUTO, 1)); + R600_OUT_BATCH(vgt_num_indices); + R600_OUT_BATCH(vgt_draw_initiator); + } + else + { + if (num_indices > 0xffff) + { + R600_OUT_BATCH(CP_PACKET3(R600_IT_DRAW_INDEX_IMMD, (num_indices + 1))); + R600_OUT_BATCH(vgt_num_indices); + R600_OUT_BATCH(vgt_draw_initiator); + for (i = start; i < (start + num_indices); i++) + { + R600_OUT_BATCH(i); + } + } + else + { + R600_OUT_BATCH(CP_PACKET3(R600_IT_DRAW_INDEX_IMMD, (((num_indices + 1) / 2) + 1))); + R600_OUT_BATCH(vgt_num_indices); + R600_OUT_BATCH(vgt_draw_initiator); + for (i = start; i < (start + num_indices); i += 2) + { + if ((i + 1) == (start + num_indices)) + { + R600_OUT_BATCH(i); + } + else + { + R600_OUT_BATCH(((i + 1) << 16) | (i)); + } + } + } + } + + END_BATCH(); + COMMIT_BATCH(); +} + +#define CONVERT( TYPE, MACRO ) do { \ + GLuint i, j, sz; \ + sz = input->Size; \ + if (input->Normalized) { \ + for (i = 0; i < count; i++) { \ + const TYPE *in = (TYPE *)src_ptr; \ + for (j = 0; j < sz; j++) { \ + *dst_ptr++ = MACRO(*in); \ + in++; \ + } \ + src_ptr += stride; \ + } \ + } else { \ + for (i = 0; i < count; i++) { \ + const TYPE *in = (TYPE *)src_ptr; \ + for (j = 0; j < sz; j++) { \ + *dst_ptr++ = (GLfloat)(*in); \ + in++; \ + } \ + src_ptr += stride; \ + } \ + } \ +} while (0) + +/** + * Convert attribute data type to float + * If the attribute uses named buffer object replace the bo with newly allocated bo + */ +static void evergreenConvertAttrib(GLcontext *ctx, int count, + const struct gl_client_array *input, + struct StreamDesc *attr) +{ + context_t *context = R700_CONTEXT(ctx); + const GLvoid *src_ptr; + GLboolean mapped_named_bo = GL_FALSE; + GLfloat *dst_ptr; + GLuint stride; + + stride = (input->StrideB == 0) ? evergreen_getTypeSize(input->Type) * input->Size : input->StrideB; + + /* Convert value for first element only */ + if (input->StrideB == 0) + { + count = 1; + } + + if (input->BufferObj->Name) + { + if (!input->BufferObj->Pointer) + { + ctx->Driver.MapBuffer(ctx, GL_ARRAY_BUFFER, GL_READ_ONLY_ARB, input->BufferObj); + mapped_named_bo = GL_TRUE; + } + + src_ptr = ADD_POINTERS(input->BufferObj->Pointer, input->Ptr); + } + else + { + src_ptr = input->Ptr; + } + + radeonAllocDmaRegion(&context->radeon, &attr->bo, &attr->bo_offset, + sizeof(GLfloat) * input->Size * count, 32); + + radeon_bo_map(attr->bo, 1); + + dst_ptr = (GLfloat *)ADD_POINTERS(attr->bo->ptr, attr->bo_offset); + + assert(src_ptr != NULL); + + switch (input->Type) + { + case GL_DOUBLE: + CONVERT(GLdouble, (GLfloat)); + break; + case GL_UNSIGNED_INT: + CONVERT(GLuint, UINT_TO_FLOAT); + break; + case GL_INT: + CONVERT(GLint, INT_TO_FLOAT); + break; + case GL_UNSIGNED_SHORT: + CONVERT(GLushort, USHORT_TO_FLOAT); + break; + case GL_SHORT: + CONVERT(GLshort, SHORT_TO_FLOAT); + break; + case GL_UNSIGNED_BYTE: + assert(input->Format != GL_BGRA); + CONVERT(GLubyte, UBYTE_TO_FLOAT); + break; + case GL_BYTE: + CONVERT(GLbyte, BYTE_TO_FLOAT); + break; + default: + assert(0); + break; + } + + radeon_bo_unmap(attr->bo); + + if (mapped_named_bo) + { + ctx->Driver.UnmapBuffer(ctx, GL_ARRAY_BUFFER, input->BufferObj); + } +} + +static void evergreenFixupIndexBuffer(GLcontext *ctx, const struct _mesa_index_buffer *mesa_ind_buf) +{ + context_t *context = EVERGREEN_CONTEXT(ctx); + GLvoid *src_ptr; + GLuint *out; + int i; + GLboolean mapped_named_bo = GL_FALSE; + + if (mesa_ind_buf->obj->Name && !mesa_ind_buf->obj->Pointer) + { + ctx->Driver.MapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER, GL_READ_ONLY_ARB, mesa_ind_buf->obj); + mapped_named_bo = GL_TRUE; + assert(mesa_ind_buf->obj->Pointer != NULL); + } + src_ptr = ADD_POINTERS(mesa_ind_buf->obj->Pointer, mesa_ind_buf->ptr); + + if (mesa_ind_buf->type == GL_UNSIGNED_BYTE) + { + GLuint size = sizeof(GLushort) * ((mesa_ind_buf->count + 1) & ~1); + GLubyte *in = (GLubyte *)src_ptr; + + radeonAllocDmaRegion(&context->radeon, &context->ind_buf.bo, + &context->ind_buf.bo_offset, size, 4); + + radeon_bo_map(context->ind_buf.bo, 1); + assert(context->ind_buf.bo->ptr != NULL); + out = (GLuint *)ADD_POINTERS(context->ind_buf.bo->ptr, context->ind_buf.bo_offset); + + for (i = 0; i + 1 < mesa_ind_buf->count; i += 2) + { + *out++ = in[i] | in[i + 1] << 16; + } + + if (i < mesa_ind_buf->count) + { + *out++ = in[i]; + } + + radeon_bo_unmap(context->ind_buf.bo); +#if MESA_BIG_ENDIAN + } + else + { /* if (mesa_ind_buf->type == GL_UNSIGNED_SHORT) */ + GLushort *in = (GLushort *)src_ptr; + GLuint size = sizeof(GLushort) * ((mesa_ind_buf->count + 1) & ~1); + + radeonAllocDmaRegion(&context->radeon, &context->ind_buf.bo, + &context->ind_buf.bo_offset, size, 4); + + radeon_bo_map(context->ind_buf.bo, 1); + assert(context->ind_buf.bo->ptr != NULL); + out = (GLuint *)ADD_POINTERS(context->ind_buf.bo->ptr, context->ind_buf.bo_offset); + + for (i = 0; i + 1 < mesa_ind_buf->count; i += 2) + { + *out++ = in[i] | in[i + 1] << 16; + } + + if (i < mesa_ind_buf->count) + { + *out++ = in[i]; + } + radeon_bo_unmap(context->ind_buf.bo); +#endif + } + + context->ind_buf.is_32bit = GL_FALSE; + context->ind_buf.count = mesa_ind_buf->count; + + if (mapped_named_bo) + { + ctx->Driver.UnmapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER, mesa_ind_buf->obj); + } +} + +static GLboolean evergreen_check_fallbacks(GLcontext *ctx) //same +{ + if (ctx->RenderMode != GL_RENDER) + return GL_TRUE; + + return GL_FALSE; +} + +/* start 3d, idle, cb/db flush */ +#define PRE_EMIT_STATE_BUFSZ 5 + 5 + 14 + +static GLuint evergreenPredictRenderSize(GLcontext* ctx, + const struct _mesa_prim *prim, + const struct _mesa_index_buffer *ib, + GLuint nr_prims) +{ + context_t *context = EVERGREEN_CONTEXT(ctx); + GLboolean flushed; + GLuint dwords, i; + GLuint state_size; + + dwords = PRE_EMIT_STATE_BUFSZ; + if (ib) + dwords += nr_prims * 14; + else { + for (i = 0; i < nr_prims; ++i) + { + if (prim[i].start == 0) + dwords += 10; + else if (prim[i].count > 0xffff) + dwords += prim[i].count + 10; + else + dwords += ((prim[i].count + 1) / 2) + 10; + } + } + + state_size = radeonCountStateEmitSize(&context->radeon); + flushed = rcommonEnsureCmdBufSpace(&context->radeon, + dwords + state_size, + __FUNCTION__); + if (flushed) + dwords += radeonCountStateEmitSize(&context->radeon); + else + dwords += state_size; + + radeon_print(RADEON_RENDER, RADEON_VERBOSE, "%s: total prediction size is %d.\n", __FUNCTION__, dwords); + return dwords; + +} + +static void evergreenSetupIndexBuffer(GLcontext *ctx, const struct _mesa_index_buffer *mesa_ind_buf) +{ + context_t *context = EVERGREEN_CONTEXT(ctx); + + if (!mesa_ind_buf) { + context->ind_buf.bo = NULL; + return; + } + +#if MESA_BIG_ENDIAN + if (mesa_ind_buf->type == GL_UNSIGNED_INT) +#else + if (mesa_ind_buf->type != GL_UNSIGNED_BYTE) +#endif + { + const GLvoid *src_ptr; + GLvoid *dst_ptr; + GLboolean mapped_named_bo = GL_FALSE; + + if (mesa_ind_buf->obj->Name && !mesa_ind_buf->obj->Pointer) + { + ctx->Driver.MapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER, GL_READ_ONLY_ARB, mesa_ind_buf->obj); + assert(mesa_ind_buf->obj->Pointer != NULL); + mapped_named_bo = GL_TRUE; + } + + src_ptr = ADD_POINTERS(mesa_ind_buf->obj->Pointer, mesa_ind_buf->ptr); + + const GLuint size = mesa_ind_buf->count * getTypeSize(mesa_ind_buf->type); + + radeonAllocDmaRegion(&context->radeon, &context->ind_buf.bo, + &context->ind_buf.bo_offset, size, 4); + radeon_bo_map(context->ind_buf.bo, 1); + assert(context->ind_buf.bo->ptr != NULL); + dst_ptr = ADD_POINTERS(context->ind_buf.bo->ptr, context->ind_buf.bo_offset); + + memcpy(dst_ptr, src_ptr, size); + + radeon_bo_unmap(context->ind_buf.bo); + context->ind_buf.is_32bit = (mesa_ind_buf->type == GL_UNSIGNED_INT); + context->ind_buf.count = mesa_ind_buf->count; + + if (mapped_named_bo) + { + ctx->Driver.UnmapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER, mesa_ind_buf->obj); + } + } + else + { + evergreenFixupIndexBuffer(ctx, mesa_ind_buf); + } +} + +static void evergreenAlignDataToDword(GLcontext *ctx, + const struct gl_client_array *input, + int count, + struct StreamDesc *attr) +{ + context_t *context = EVERGREEN_CONTEXT(ctx); + const int dst_stride = (input->StrideB + 3) & ~3; + const int size = getTypeSize(input->Type) * input->Size * count; + GLboolean mapped_named_bo = GL_FALSE; + + radeonAllocDmaRegion(&context->radeon, &attr->bo, &attr->bo_offset, size, 32); + + radeon_bo_map(attr->bo, 1); + + if (!input->BufferObj->Pointer) + { + ctx->Driver.MapBuffer(ctx, GL_ARRAY_BUFFER, GL_READ_ONLY_ARB, input->BufferObj); + mapped_named_bo = GL_TRUE; + } + + { + GLvoid *src_ptr = ADD_POINTERS(input->BufferObj->Pointer, input->Ptr); + GLvoid *dst_ptr = ADD_POINTERS(attr->bo->ptr, attr->bo_offset); + int i; + + for (i = 0; i < count; ++i) + { + memcpy(dst_ptr, src_ptr, input->StrideB); + src_ptr += input->StrideB; + dst_ptr += dst_stride; + } + } + + radeon_bo_unmap(attr->bo); + if (mapped_named_bo) + { + ctx->Driver.UnmapBuffer(ctx, GL_ARRAY_BUFFER, input->BufferObj); + } + + attr->stride = dst_stride; +} + +static void evergreenSetupStreams(GLcontext *ctx, const struct gl_client_array *input[], int count) +{ + context_t *context = EVERGREEN_CONTEXT(ctx); + GLuint stride; + int ret; + int i, index; + + EVERGREEN_STATECHANGE(context, vtx); + + for(index = 0; index < context->nNumActiveAos; index++) + { + struct radeon_aos *aos = &context->radeon.tcl.aos[index]; + i = context->stream_desc[index].element; + + stride = (input[i]->StrideB == 0) ? getTypeSize(input[i]->Type) * input[i]->Size : input[i]->StrideB; + + if (input[i]->Type == GL_DOUBLE || input[i]->Type == GL_UNSIGNED_INT || input[i]->Type == GL_INT || +#if MESA_BIG_ENDIAN + getTypeSize(input[i]->Type) != 4 || +#endif + stride < 4) + { + evergreenConvertAttrib(ctx, count, input[i], &context->stream_desc[index]); + } + else + { + if (input[i]->BufferObj->Name) + { + if (stride % 4 != 0) + { + assert(((intptr_t) input[i]->Ptr) % input[i]->StrideB == 0); + evergreenAlignDataToDword(ctx, input[i], count, &context->stream_desc[index]); + context->stream_desc[index].is_named_bo = GL_FALSE; + } + else + { + context->stream_desc[index].stride = input[i]->StrideB; + context->stream_desc[index].bo_offset = (intptr_t) input[i]->Ptr; + context->stream_desc[index].bo = get_radeon_buffer_object(input[i]->BufferObj)->bo; + context->stream_desc[index].is_named_bo = GL_TRUE; + } + } + else + { + int size; + int local_count = count; + uint32_t *dst; + + if (input[i]->StrideB == 0) + { + size = getTypeSize(input[i]->Type) * input[i]->Size; + local_count = 1; + } + else + { + size = getTypeSize(input[i]->Type) * input[i]->Size * local_count; + } + + radeonAllocDmaRegion(&context->radeon, &context->stream_desc[index].bo, + &context->stream_desc[index].bo_offset, size, 32); + + radeon_bo_map(context->stream_desc[index].bo, 1); + assert(context->stream_desc[index].bo->ptr != NULL); + + + dst = (uint32_t *)ADD_POINTERS(context->stream_desc[index].bo->ptr, + context->stream_desc[index].bo_offset); + + switch (context->stream_desc[index].dwords) + { + case 1: + radeonEmitVec4(dst, input[i]->Ptr, input[i]->StrideB, local_count); + break; + case 2: + radeonEmitVec8(dst, input[i]->Ptr, input[i]->StrideB, local_count); + break; + case 3: + radeonEmitVec12(dst, input[i]->Ptr, input[i]->StrideB, local_count); + break; + case 4: + radeonEmitVec16(dst, input[i]->Ptr, input[i]->StrideB, local_count); + break; + default: + assert(0); + break; + } + + radeon_bo_unmap(context->stream_desc[index].bo); + } + } + + aos->count = context->stream_desc[index].stride == 0 ? 1 : count; + aos->stride = context->stream_desc[index].stride / sizeof(float); + aos->components = context->stream_desc[index].dwords; + aos->bo = context->stream_desc[index].bo; + aos->offset = context->stream_desc[index].bo_offset; + + if(context->stream_desc[index].is_named_bo) + { + radeon_cs_space_add_persistent_bo(context->radeon.cmdbuf.cs, + context->stream_desc[index].bo, + RADEON_GEM_DOMAIN_GTT, 0); + } + } + + ret = radeon_cs_space_check_with_bo(context->radeon.cmdbuf.cs, + first_elem(&context->radeon.dma.reserved)->bo, + RADEON_GEM_DOMAIN_GTT, 0); +} + +static void evergreenFreeData(GLcontext *ctx) +{ + /* Need to zero tcl.aos[n].bo and tcl.elt_dma_bo + * to prevent double unref in radeonReleaseArrays + * called during context destroy + */ + context_t *context = EVERGREEN_CONTEXT(ctx); + + int i; + + for (i = 0; i < context->nNumActiveAos; i++) + { + if (!context->stream_desc[i].is_named_bo) + { + radeon_bo_unref(context->stream_desc[i].bo); + } + context->radeon.tcl.aos[i].bo = NULL; + } + + if(context->vp_Constbo != NULL) + { + radeon_bo_unref(context->vp_Constbo); + context->vp_Constbo = NULL; + } + if(context->fp_Constbo != NULL) + { + radeon_bo_unref(context->fp_Constbo); + context->fp_Constbo = NULL; + } + + if (context->ind_buf.bo != NULL) + { + radeon_bo_unref(context->ind_buf.bo); + } +} + +static GLboolean evergreenTryDrawPrims(GLcontext *ctx, + const struct gl_client_array *arrays[], + const struct _mesa_prim *prim, + GLuint nr_prims, + const struct _mesa_index_buffer *ib, + GLuint min_index, + GLuint max_index ) +{ + context_t *context = EVERGREEN_CONTEXT(ctx); + radeonContextPtr radeon = &context->radeon; + GLuint i, id = 0; + struct radeon_renderbuffer *rrb; + + if (ctx->NewState) + _mesa_update_state( ctx ); + + if (evergreen_check_fallbacks(ctx)) + return GL_FALSE; + + _tnl_UpdateFixedFunctionProgram(ctx); + evergreenSetVertexFormat(ctx, arrays, max_index + 1); + + + /* shaders need to be updated before buffers are validated */ + evergreenUpdateShaders(ctx); + if (!evergreenValidateBuffers(ctx)) + return GL_FALSE; + + /* always emit CB base to prevent + * lock ups on some chips. + */ + EVERGREEN_STATECHANGE(context, cb); + /* mark vtx as dirty since it changes per-draw */ + EVERGREEN_STATECHANGE(context, vtx); + + evergreenSetScissor(context); + + evergreenSetupVertexProgram(ctx); + evergreenSetupFragmentProgram(ctx); + evergreenUpdateShaderStates(ctx); + + GLuint emit_end = evergreenPredictRenderSize(ctx, prim, ib, nr_prims) + + context->radeon.cmdbuf.cs->cdw; + + /* evergreenPredictRenderSize will call radeonReleaseDmaRegions, so update VP/FP const buf after it. */ + evergreenSetupVPconstants(ctx); + evergreenSetupFPconstants(ctx); + + evergreenSetupIndexBuffer(ctx, ib); + + evergreenSetupStreams(ctx, arrays, max_index + 1); + + radeonEmitState(radeon); + + radeon_debug_add_indent(); + + for (i = 0; i < nr_prims; ++i) + { + if (context->ind_buf.bo) + evergreenRunRenderPrimitive(ctx, + prim[i].start, + prim[i].start + prim[i].count, + prim[i].mode); + else + evergreenRunRenderPrimitiveImmediate(ctx, + prim[i].start, + prim[i].start + prim[i].count, + prim[i].mode); + } + + radeon_debug_remove_indent(); + + /* Flush render op cached for last several quads. */ + /* XXX drm should handle this in fence submit */ + + //evergreeWaitForIdleClean(context); + + rrb = radeon_get_colorbuffer(&context->radeon); + if (rrb && rrb->bo) + r700SyncSurf(context, rrb->bo, 0, RADEON_GEM_DOMAIN_VRAM, + CB_ACTION_ENA_bit | (1 << (id + 6))); + + rrb = radeon_get_depthbuffer(&context->radeon); + if (rrb && rrb->bo) + r700SyncSurf(context, rrb->bo, 0, RADEON_GEM_DOMAIN_VRAM, + DB_ACTION_ENA_bit | DB_DEST_BASE_ENA_bit); + + evergreenFreeData(ctx); + + if (emit_end < context->radeon.cmdbuf.cs->cdw) + { + WARN_ONCE("Rendering was %d commands larger than predicted size." + " We might overflow command buffer.\n", context->radeon.cmdbuf.cs->cdw - emit_end); + } + + return GL_TRUE; +} + +static void evergreenDrawPrims(GLcontext *ctx, + const struct gl_client_array *arrays[], + const struct _mesa_prim *prim, + GLuint nr_prims, + const struct _mesa_index_buffer *ib, + GLboolean index_bounds_valid, + GLuint min_index, + GLuint max_index) +{ + GLboolean retval = GL_FALSE; + + /* This check should get folded into just the places that + * min/max index are really needed. + */ + if (!index_bounds_valid) { + vbo_get_minmax_index(ctx, prim, ib, &min_index, &max_index); + } + + if (min_index) { + vbo_rebase_prims( ctx, arrays, prim, nr_prims, ib, min_index, max_index, evergreenDrawPrims ); + return; + } + + /* Make an attempt at drawing */ + retval = evergreenTryDrawPrims(ctx, arrays, prim, nr_prims, ib, min_index, max_index); + + /* If failed run tnl pipeline - it should take care of fallbacks */ + if (!retval) { + _swsetup_Wakeup(ctx); + _tnl_draw_prims(ctx, arrays, prim, nr_prims, ib, min_index, max_index); + } +} + +void evergreenInitDraw(GLcontext *ctx) +{ + struct vbo_context *vbo = vbo_context(ctx); + + /* to be enabled */ + vbo->draw_prims = evergreenDrawPrims; +} + + diff --git a/src/mesa/drivers/dri/r600/evergreen_sq.h b/src/mesa/drivers/dri/r600/evergreen_sq.h new file mode 100644 index 00000000000..b1a536e76f6 --- /dev/null +++ b/src/mesa/drivers/dri/r600/evergreen_sq.h @@ -0,0 +1,735 @@ +/* + * Copyright (C) 2008-2010 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +/* + * Authors: + * Richard Li <[email protected]>, <[email protected]> + */ + +#ifndef _EVERGREEN_SQ_H_ +#define _EVERGREEN_SQ_H_ + +enum{ +//CF + EG_CF_WORD0__ADDR_shift = 0, + EG_CF_WORD0__ADDR_mask = 0xFFFFFF, + EG_CF_WORD0__JUMPTABLE_SEL_shift = 24, + EG_CF_WORD0__JUMPTABLE_SEL_mask = 0x7 << 24, + + EG_CF_WORD1__POP_COUNT_shift = 0, //3 bits + EG_CF_WORD1__POP_COUNT_mask = 0x7, + EG_CF_WORD1__CF_CONST_shift = 3, //5 bits + EG_CF_WORD1__CF_CONST_mask = 0x1F << 3, + EG_CF_WORD1__COND_shift = 8, //2 bits + EG_CF_WORD1__COND_mask = 0x3 << 8, + EG_CF_WORD1__COUNT_shift = 10,//6 bits + EG_CF_WORD1__COUNT_mask = 0x3F << 10, + EG_CF_WORD1__reserved_shift = 16,//4 bits + EG_CF_WORD1__VPM_shift = 20,//1 bit + EG_CF_WORD1__VPM_bit = 1 << 20, + EG_CF_WORD1__EOP_shift = 21,//1 bit + EG_CF_WORD1__EOP_bit = 1 << 21, + EG_CF_WORD1__CF_INST_shift = 22,//8 bits + EG_CF_WORD1__CF_INST_mask = 0xFF << 22, + EG_CF_WORD1__WQM_shift = 30,//1 bit + EG_CF_WORD1__WQM_bit = 1 << 30, + EG_CF_WORD1__BARRIER_shift = 31,//1 bit + EG_CF_WORD1__BARRIER_bit = 1 << 31, + + EG_CF_INST_NOP = 0, + EG_CF_INST_TC = 1, + EG_CF_INST_VC = 2, + EG_CF_INST_GDS = 3, + EG_CF_INST_LOOP_START = 4, + EG_CF_INST_LOOP_END = 5, + EG_CF_INST_LOOP_START_DX10 = 6, + EG_CF_INST_LOOP_START_NO_AL = 7, + EG_CF_INST_LOOP_CONTINUE = 8, + EG_CF_INST_LOOP_BREAK = 9, + EG_CF_INST_JUMP = 10, + EG_CF_INST_PUSH = 11, + EG_CF_INST_Reserved_12 = 12, + EG_CF_INST_ELSE = 13, + EG_CF_INST_POP = 14, + EG_CF_INST_Reserved_15 = 15, + EG_CF_INST_Reserved_16 = 16, + EG_CF_INST_Reserved_17 = 17, + EG_CF_INST_CALL = 18, + EG_CF_INST_CALL_FS = 19, + EG_CF_INST_RETURN = 20, + EG_CF_INST_EMIT_VERTEX = 21, + EG_CF_INST_EMIT_CUT_VERTEX = 22, + EG_CF_INST_CUT_VERTEX = 23, + EG_CF_INST_KILL = 24, + EG_CF_INST_Reserved_25 = 25, + EG_CF_INST_WAIT_ACK = 26, + EG_CF_INST_TC_ACK = 27, + EG_CF_INST_VC_ACK = 28, + EG_CF_INST_JUMPTABLE = 29, + EG_CF_INST_GLOBAL_WAVE_SYNC = 30, + EG_CF_INST_HALT = 31, + +//TEX + EG_TEX_WORD0__TEX_INST_shift = 0, //5 bits + EG_TEX_WORD0__TEX_INST_mask = 0x1F, + EG_TEX_WORD0__INST_MOD_shift = 5, //2 bits + EG_TEX_WORD0__INST_MOD_mask = 0x3 << 5, + EG_TEX_WORD0__FWQ_shift = 7, //1 bit + EG_TEX_WORD0__FWQ_bit = 1 << 7, + EG_TEX_WORD0__RESOURCE_ID_shift = 8, //8 bits + EG_TEX_WORD0__RESOURCE_ID_mask = 0xFF << 8, + EG_TEX_WORD0__SRC_GPR_shift = 16,//7 bits + EG_TEX_WORD0__SRC_GPR_mask = 0x7F << 16, + EG_TEX_WORD0__SRC_REL_shift = 23,//1 bit + EG_TEX_WORD0__SRC_REL_bit = 1 << 23, + EG_TEX_WORD0__ALT_CONST_shift = 24,//1 bit + EG_TEX_WORD0__ALT_CONST_bit = 1 << 24, + EG_TEX_WORD0__RIM_shift = 25,//2 bits + EG_TEX_WORD0__RIM_mask = 0x3 << 25, + EG_TEX_WORD0__SIM_shift = 27,//2 bits + EG_TEX_WORD0__SIM_mask = 0x3 << 27, + EG_TEX_WORD0__Reserved_shift = 29,//3 bits + EG_TEX_WORD0__Reserved_mask = 0x7 << 29, + + EG_TEX_INST_Reserved_0 = 0, + EG_TEX_INST_Reserved_1 = 1, + EG_TEX_INST_Reserved_2 = 2, + EG_TEX_INST_LD = 3, + EG_TEX_INST_GET_TEXTURE_RESINFO = 4, + EG_TEX_INST_GET_NUMBER_OF_SAMPLES= 5, + EG_TEX_INST_GET_COMP_TEX_LOD = 6, + EG_TEX_INST_GET_GRADIENTS_H = 7, + EG_TEX_INST_GET_GRADIENTS_V = 8, + EG_TEX_INST_SET_TEXTURE_OFFSETS = 9, + EG_TEX_INST_KEEP_GRADIENTS = 10, + EG_TEX_INST_SET_GRADIENTS_H = 11, + EG_TEX_INST_SET_GRADIENTS_V = 12, + EG_TEX_INST_Reserved_13 = 13, + EG_TEX_INST_Reserved_14 = 14, + EG_TEX_INST_Reserved_15 = 15, + EG_TEX_INST_SAMPLE = 16, + EG_TEX_INST_SAMPLE_L = 17, + EG_TEX_INST_SAMPLE_LB = 18, + EG_TEX_INST_SAMPLE_LZ = 19, + EG_TEX_INST_SAMPLE_G = 20, + EG_TEX_INST_GATHER4 = 21, + EG_TEX_INST_SAMPLE_G_LB = 22, + EG_TEX_INST_GATHER4_O = 23, + EG_TEX_INST_SAMPLE_C = 24, + EG_TEX_INST_SAMPLE_C_L = 25, + EG_TEX_INST_SAMPLE_C_LB = 26, + EG_TEX_INST_SAMPLE_C_LZ = 27, + EG_TEX_INST_SAMPLE_C_G = 28, + EG_TEX_INST_GATHER4_C = 29, + EG_TEX_INST_SAMPLE_C_G_LB = 30, + EG_TEX_INST_GATHER4_C_O = 31, + + EG_TEX_WORD1__DST_GPR_shift = 0, //7 bits + EG_TEX_WORD1__DST_GPR_mask = 0x7F, + EG_TEX_WORD1__DST_REL_shift = 7, //1 bit + EG_TEX_WORD1__DST_REL_bit = 1 << 7, + EG_TEX_WORD1__Reserved_shift = 8, //1 bit + EG_TEX_WORD1__Reserved_bit = 1 << 8, + EG_TEX_WORD1__DST_SEL_X_shift = 9, //3 bits + EG_TEX_WORD1__DST_SEL_X_mask = 0x7 << 9, + EG_TEX_WORD1__DST_SEL_Y_shift = 12,//3 bits + EG_TEX_WORD1__DST_SEL_Y_mask = 0x7 << 12, + EG_TEX_WORD1__DST_SEL_Z_shift = 15,//3 bits + EG_TEX_WORD1__DST_SEL_Z_mask = 0x7 << 15, + EG_TEX_WORD1__DST_SEL_W_shift = 18,//3 bits + EG_TEX_WORD1__DST_SEL_W_mask = 0x7 << 18, + EG_TEX_WORD1__LOD_BIAS_shift = 21,//7 bits + EG_TEX_WORD1__LOD_BIAS_mask = 0x7F << 21, + EG_TEX_WORD1__COORD_TYPE_X_shift = 28,//1 bit + EG_TEX_WORD1__COORD_TYPE_X_bit = 1 << 28, + EG_TEX_WORD1__COORD_TYPE_Y_shift = 29,//1 bit + EG_TEX_WORD1__COORD_TYPE_Y_bit = 1 << 29, + EG_TEX_WORD1__COORD_TYPE_Z_shift = 30,//1 bit + EG_TEX_WORD1__COORD_TYPE_Z_bit = 1 << 30, + EG_TEX_WORD1__COORD_TYPE_W_shift = 31,//1 bit + EG_TEX_WORD1__COORD_TYPE_W_bit = 1 << 31, + + EG_TEX_WORD2__OFFSET_X_shift = 0, //5 bits + EG_TEX_WORD2__OFFSET_X_mask = 0x1F, + EG_TEX_WORD2__OFFSET_Y_shift = 5, //5 bits + EG_TEX_WORD2__OFFSET_Y_mask = 0x1F << 5, + EG_TEX_WORD2__OFFSET_Z_shift = 10,//5 bits + EG_TEX_WORD2__OFFSET_Z_mask = 0x1F << 10, + EG_TEX_WORD2__SAMPLER_ID_shift = 15,//5 bits + EG_TEX_WORD2__SAMPLER_ID_mask = 0x1F << 15, + EG_TEX_WORD2__SRC_SEL_X_shift = 20,//3 bits + EG_TEX_WORD2__SRC_SEL_X_mask = 0x7 << 20, + EG_TEX_WORD2__SRC_SEL_Y_shift = 23,//3 bits + EG_TEX_WORD2__SRC_SEL_Y_mask = 0x7 << 23, + EG_TEX_WORD2__SRC_SEL_Z_shift = 26,//3 bits + EG_TEX_WORD2__SRC_SEL_Z_mask = 0x7 << 26, + EG_TEX_WORD2__SRC_SEL_W_shift = 29,//3 bits + EG_TEX_WORD2__SRC_SEL_W_mask = 0x7 << 29, + +//VTX + EG_VTX_WORD0__VC_INST_shift = 0, //5 bits + EG_VTX_WORD0__VC_INST_mask = 0x1F, + EG_VTX_WORD0__FETCH_TYPE_shift = 5, //2 bits + EG_VTX_WORD0__FETCH_TYPE_mask = 0x3 << 5, + EG_VTX_WORD0__FWQ_shift = 7, //1 bit + EG_VTX_WORD0__FWQ_bit = 1 << 7, + EG_VTX_WORD0__BUFFER_ID_shift = 8, //8 bits + EG_VTX_WORD0__BUFFER_ID_mask = 0xFF << 8, + EG_VTX_WORD0__SRC_GPR_shift = 16,//7 bits + EG_VTX_WORD0__SRC_GPR_mask = 0x7F << 16, + EG_VTX_WORD0__SRC_REL_shift = 23,//1 bit + EG_VTX_WORD0__SRC_REL_bit = 1 << 23, + EG_VTX_WORD0__SRC_SEL_X_shift = 24,//2 bits + EG_VTX_WORD0__SRC_SEL_X_mask = 0x3 << 24, + EG_VTX_WORD0__MFC_shift = 26,//6 bits + EG_VTX_WORD0__MFC_mask = 0x3F << 26, + + EG_VC_INST_FETCH = 0, + EG_VC_INST_SEMANTIC = 1, + EG_VC_INST_Reserved_2 = 2, + EG_VC_INST_Reserved_3 = 3, + EG_VC_INST_Reserved_4 = 4, + EG_VC_INST_Reserved_5 = 5, + EG_VC_INST_Reserved_6 = 6, + EG_VC_INST_Reserved_7 = 7, + EG_VC_INST_Reserved_8 = 8, + EG_VC_INST_Reserved_9 = 9, + EG_VC_INST_Reserved_10 = 10, + EG_VC_INST_Reserved_11 = 11, + EG_VC_INST_Reserved_12 = 12, + EG_VC_INST_Reserved_13 = 13, + EG_VC_INST_GET_BUFFER_RESINFO = 14, + + EG_VTX_FETCH_VERTEX_DATA = 0, + EG_VTX_FETCH_INSTANCE_DATA = 1, + EG_VTX_FETCH_NO_INDEX_OFFSET = 2, + + EG_VTX_WORD1_SEM__SEMANTIC_ID_shift = 0, //8 bits + EG_VTX_WORD1_SEM__SEMANTIC_ID_mask = 0xFF, + EG_VTX_WORD1_GPR__DST_GPR_shift = 0, //7 bits + EG_VTX_WORD1_GPR__DST_GPR_mask = 0x7F, + EG_VTX_WORD1_GPR__DST_REL_shift = 7, //1 bit + EG_VTX_WORD1_GPR__DST_REL_bit = 1 << 7, + EG_VTX_WORD1__Reserved_shift = 8, //1 bit + EG_VTX_WORD1__Reserved_bit = 1 << 8, + EG_VTX_WORD1__DST_SEL_X_shift = 9, //3 bits + EG_VTX_WORD1__DST_SEL_X_mask = 0x7 << 9, + EG_VTX_WORD1__DST_SEL_Y_shift = 12,//3 bits + EG_VTX_WORD1__DST_SEL_Y_mask = 0x7 << 12, + EG_VTX_WORD1__DST_SEL_Z_shift = 15,//3 bits + EG_VTX_WORD1__DST_SEL_Z_mask = 0x7 << 15, + EG_VTX_WORD1__DST_SEL_W_shift = 18,//3 bits + EG_VTX_WORD1__DST_SEL_W_mask = 0x7 << 18, + EG_VTX_WORD1__UCF_shift = 21,//1 bit + EG_VTX_WORD1__UCF_bit = 1 << 21, + EG_VTX_WORD1__DATA_FORMAT_shift = 22,//6 bits + EG_VTX_WORD1__DATA_FORMAT_mask = 0x3F << 22, + EG_VTX_WORD1__NFA_shift = 28,//2 bits + EG_VTX_WORD1__NFA_mask = 0x3 << 28, + EG_VTX_WORD1__FCA_shift = 30,//1 bit + EG_VTX_WORD1__FCA_bit = 1 << 30, + EG_VTX_WORD1__SMA_shift = 31,//1 bit + EG_VTX_WORD1__SMA_bit = 1 << 31, + + EG_VTX_WORD2__OFFSET_shift = 0, //16 bits + EG_VTX_WORD2__OFFSET_mask = 0xFFFF, + EG_VTX_WORD2__ENDIAN_SWAP_shift = 16,//2 bits + EG_VTX_WORD2__ENDIAN_SWAP_mask = 0x3 << 16, + EG_VTX_WORD2__CBNS_shift = 18,//1 bit + EG_VTX_WORD2__CBNS_bit = 1 << 18, + EG_VTX_WORD2__MEGA_FETCH_shift = 19,//1 bit + EG_VTX_WORD2__MEGA_FETCH_mask = 1 << 19, + EG_VTX_WORD2__ALT_CONST_shift = 20,//1 bit + EG_VTX_WORD2__ALT_CONST_mask = 1 << 20, + EG_VTX_WORD2__BIM_shift = 21,//2 bits + EG_VTX_WORD2__BIM_mask = 0x3 << 21, + EG_VTX_WORD2__Reserved_shift = 23,//9 bits + EG_VTX_WORD2__Reserved_mask = 0x1FF << 23, + +//CF_ALU + EG_CF_ALU_WORD0__ADDR_shift = 0, //22 bits + EG_CF_ALU_WORD0__ADDR_mask = 0x3FFFFF, + EG_CF_ALU_WORD0__KCACHE_BANK0_shift = 22,//4 bits + EG_CF_ALU_WORD0__KCACHE_BANK0_mask = 0xF << 22, + EG_CF_ALU_WORD0__KCACHE_BANK1_shift = 26,//4 bits + EG_CF_ALU_WORD0__KCACHE_BANK1_mask = 0xF << 26, + EG_CF_ALU_WORD0__KCACHE_MODE0_shift = 30,//2 bits + EG_CF_ALU_WORD0__KCACHE_MODE0_mask = 0x3 << 30, + + EG_CF_ALU_WORD1__KCACHE_MODE1_shift = 0, //2 bits + EG_CF_ALU_WORD1__KCACHE_MODE1_mask = 0x3, + EG_CF_ALU_WORD1__KCACHE_ADDR0_shift = 2, //8 bits + EG_CF_ALU_WORD1__KCACHE_ADDR0_mask = 0xFF << 2, + EG_CF_ALU_WORD1__KCACHE_ADDR1_shift = 10, //8 bits + EG_CF_ALU_WORD1__KCACHE_ADDR1_mask = 0xFF << 10, + EG_CF_ALU_WORD1__COUNT_shift = 18, //7 bits + EG_CF_ALU_WORD1__COUNT_mask = 0x7F << 18, + EG_CF_ALU_WORD1__ALT_CONST_shift = 25, //1 bit + EG_CF_ALU_WORD1__ALT_CONST_bit = 1 << 25, + EG_CF_ALU_WORD1__CF_INST_shift = 26, //4 bits + EG_CF_ALU_WORD1__CF_INST_mask = 0xF << 26, + EG_CF_ALU_WORD1__WQM_shift = 30, //1 bit + EG_CF_ALU_WORD1__WQM_bit = 1 << 30, + EG_CF_ALU_WORD1__BARRIER_shift = 31, //1 bit + EG_CF_ALU_WORD1__BARRIER_bit = 1 << 31, + + EG_CF_INST_ALU = 8, + EG_CF_INST_ALU_PUSH_BEFORE = 9, + EG_CF_INST_ALU_POP_AFTER = 10, + EG_CF_INST_ALU_POP2_AFTER = 11, + EG_CF_INST_ALU_EXTENDED = 12, + EG_CF_INST_ALU_CONTINUE = 13, + EG_CF_INST_ALU_BREAK = 14, + EG_CF_INST_ALU_ELSE_AFTER = 15, + + EG_CF_ALU_WORD0_EXT__Reserved0_shift = 0, //4 bits + EG_CF_ALU_WORD0_EXT__Reserved0_mask = 0xF, + EG_CF_ALU_WORD0_EXT__KBIM0_shift = 4, //2 bits + EG_CF_ALU_WORD0_EXT__KBIM0_mask = 0x3 << 4, + EG_CF_ALU_WORD0_EXT__KBIM1_shift = 6, //2 bits + EG_CF_ALU_WORD0_EXT__KBIM1_mask = 0x3 << 6, + EG_CF_ALU_WORD0_EXT__KBIM2_shift = 8, //2 bits + EG_CF_ALU_WORD0_EXT__KBIM2_mask = 0x3 << 8, + EG_CF_ALU_WORD0_EXT__KBIM3_shift = 10,//2 bits + EG_CF_ALU_WORD0_EXT__KBIM3_mask = 0x3 << 10, + EG_CF_ALU_WORD0_EXT__Reserved12_shift = 12,//10 bits + EG_CF_ALU_WORD0_EXT__Reserved12_mask = 0x3FF << 12, + EG_CF_ALU_WORD0_EXT__KCACHE_BANK2_shift = 22,//4 bits + EG_CF_ALU_WORD0_EXT__KCACHE_BANK2_mask = 0xF << 22, + EG_CF_ALU_WORD0_EXT__KCACHE_BANK3_shift = 26,//4 bits + EG_CF_ALU_WORD0_EXT__KCACHE_BANK3_mask = 0xF << 26, + EG_CF_ALU_WORD0_EXT__KCACHE_MODE2_shift = 30,//2 btis + EG_CF_ALU_WORD0_EXT__KCACHE_MODE2_mask = 0x3 << 30, + + EG_CF_ALU_WORD1_EXT__KCACHE_MODE3_shift = 0, //2 bits + EG_CF_ALU_WORD1_EXT__KCACHE_MODE3_mask = 0x3, + EG_CF_ALU_WORD1_EXT__KCACHE_ADDR2_shift = 2, //8 bits + EG_CF_ALU_WORD1_EXT__KCACHE_ADDR2_mask = 0xFF << 2, + EG_CF_ALU_WORD1_EXT__KCACHE_ADDR3_shift = 10, //8 bits + EG_CF_ALU_WORD1_EXT__KCACHE_ADDR3_mask = 0xFF << 10, + EG_CF_ALU_WORD1_EXT__Reserved18_shift = 18, //8 bits + EG_CF_ALU_WORD1_EXT__Reserved18_mask = 0xFF << 18, + EG_CF_ALU_WORD1_EXT__CF_INST_shift = 26, //4 bits + EG_CF_ALU_WORD1_EXT__CF_INST_mask = 0xF << 26, + EG_CF_ALU_WORD1_EXT__Reserved30_shift = 30, //1 bit + EG_CF_ALU_WORD1_EXT__Reserved30_bit = 1 << 30, + EG_CF_ALU_WORD1_EXT__BARRIER_shift = 31, //1 bit + EG_CF_ALU_WORD1_EXT__BARRIER_bit = 1 << 31, + +//ALU + EG_ALU_WORD0__SRC0_SEL_shift = 0, //9 bits + EG_ALU_WORD0__SRC0_SEL_mask = 0x1FF, + EG_ALU_WORD0__SRC1_SEL_shift = 13,//9 bits + EG_ALU_WORD0__SRC1_SEL_mask = 0x1FF << 13, + EG_ALU_WORD0__SRC0_REL_shift = 9, //1 bit + EG_ALU_WORD0__SRC0_REL_bit = 1 << 9, + EG_ALU_WORD0__SRC1_REL_shift = 22,//1 bit + EG_ALU_WORD0__SRC1_REL_bit = 1 << 22, + EG_ALU_WORD0__SRC0_CHAN_shift = 10,//2 bits + EG_ALU_WORD0__SRC0_CHAN_mask = 0x3 << 10, + EG_ALU_WORD0__SRC1_CHAN_shift = 23,//2 bits + EG_ALU_WORD0__SRC1_CHAN_mask = 0x3 << 23, + EG_ALU_WORD0__SRC0_NEG_shift = 12,//1 bit + EG_ALU_WORD0__SRC0_NEG_bit = 1 << 12, + EG_ALU_WORD0__SRC1_NEG_shift = 25,//1 bit + EG_ALU_WORD0__SRC1_NEG_bit = 1 << 25, + EG_ALU_WORD0__INDEX_MODE_shift = 26,//3 bits + EG_ALU_WORD0__INDEX_MODE_mask = 0x7 << 26, + EG_ALU_WORD0__PRED_SEL_shift = 29,//2 bits + EG_ALU_WORD0__PRED_SEL_mask = 0x3 << 29, + EG_ALU_WORD0__LAST_shift = 31,//1 bit + EG_ALU_WORD0__LAST_bit = 1 << 31, + + EG_ALU_WORD1_OP2__SRC0_ABS_shift = 0, //1 bit + EG_ALU_WORD1_OP2__SRC0_ABS_bit = 1, + EG_ALU_WORD1_OP2__SRC1_ABS_shift = 1, //1 bit + EG_ALU_WORD1_OP2__SRC1_ABS_bit = 1 << 1, + EG_ALU_WORD1_OP2__UEM_shift = 2, //1 bit + EG_ALU_WORD1_OP2__UEM_bit = 1 << 2, + EG_ALU_WORD1_OP2__UPDATE_PRED_shift = 3, //1 bit + EG_ALU_WORD1_OP2__UPDATE_PRED_bit = 1 << 3, + EG_ALU_WORD1_OP2__WRITE_MASK_shift = 4, //1 bit + EG_ALU_WORD1_OP2__WRITE_MASK_bit = 1 << 4, + EG_ALU_WORD1_OP2__OMOD_shift = 5, //2 bits + EG_ALU_WORD1_OP2__OMOD_mask = 0x3 << 5, + EG_ALU_WORD1_OP2__ALU_INST_shift = 7, //11 bits + EG_ALU_WORD1_OP2__ALU_INST_mask = 0x7FF << 7, + + EG_ALU_WORD1__BANK_SWIZZLE_shift = 18,//3 bits + EG_ALU_WORD1__BANK_SWIZZLE_mask = 0x7 << 18, + EG_ALU_WORD1__DST_GPR_shift = 21,//7 bits + EG_ALU_WORD1__DST_GPR_mask = 0x7F << 21, + EG_ALU_WORD1__DST_REL_shift = 28,//1 bit + EG_ALU_WORD1__DST_REL_mask = 1 << 28, + EG_ALU_WORD1__DST_CHAN_shift = 29,//2 bits + EG_ALU_WORD1__DST_CHAN_mask = 0x3 << 29, + EG_ALU_WORD1__CLAMP_shift = 31,//1 bits + EG_ALU_WORD1__CLAMP_mask = 1 << 31, + + EG_ALU_WORD1_OP3__SRC2_SEL_shift = 0, //9 bits + EG_ALU_WORD1_OP3__SRC2_SEL_mask = 0x1FF, + EG_ALU_WORD1_OP3__SRC2_REL_shift = 9, //1 bit + EG_ALU_WORD1_OP3__SRC2_REL_bit = 1 << 9, + EG_ALU_WORD1_OP3__SRC2_CHAN_shift = 10,//2 bits + EG_ALU_WORD1_OP3__SRC2_CHAN_mask = 0x3 << 10, + EG_ALU_WORD1_OP3__SRC2_NEG_shift = 12,//1 bit + EG_ALU_WORD1_OP3__SRC2_NEG_bit = 1 << 12, + EG_ALU_WORD1_OP3__ALU_INST_shift = 13,//5 bits + EG_ALU_WORD1_OP3__ALU_INST_mask = 0x1F << 13, + + EG_OP3_INST_BFE_UINT = 4, + EG_OP3_INST_BFE_INT = 5, + EG_OP3_INST_BFI_INT = 6, + EG_OP3_INST_FMA = 7, + EG_OP3_INST_CNDNE_64 = 9, + EG_OP3_INST_FMA_64 = 10, + EG_OP3_INST_LERP_UINT = 11, + EG_OP3_INST_BIT_ALIGN_INT = 12, + EG_OP3_INST_BYTE_ALIGN_INT = 13, + EG_OP3_INST_SAD_ACCUM_UINT = 14, + EG_OP3_INST_SAD_ACCUM_HI_UINT = 15, + EG_OP3_INST_MULADD_UINT24 = 16, + EG_OP3_INST_LDS_IDX_OP = 17, + EG_OP3_INST_MULADD = 20, + EG_OP3_INST_MULADD_M2 = 21, + EG_OP3_INST_MULADD_M4 = 22, + EG_OP3_INST_MULADD_D2 = 23, + EG_OP3_INST_MULADD_IEEE = 24, + EG_OP3_INST_CNDE = 25, + EG_OP3_INST_CNDGT = 26, + EG_OP3_INST_CNDGE = 27, + EG_OP3_INST_CNDE_INT = 28, + EG_OP3_INST_CMNDGT_INT = 29, + EG_OP3_INST_CMNDGE_INT = 30, + EG_OP3_INST_MUL_LIT = 31, + + EG_OP2_INST_ADD = 0, + EG_OP2_INST_MUL = 1, + EG_OP2_INST_MUL_IEEE = 2, + EG_OP2_INST_MAX = 3, + EG_OP2_INST_MIN = 4, + EG_OP2_INST_MAX_DX10 = 5, + EG_OP2_INST_MIN_DX10 = 6, + EG_OP2_INST_SETE = 8, + EG_OP2_INST_SETGT = 9, + EG_OP2_INST_SETGE = 10, + EG_OP2_INST_SETNE = 11, + EG_OP2_INST_SETE_DX10 = 12, + EG_OP2_INST_SETGT_DX10 = 13, + EG_OP2_INST_SETGE_DX10 = 14, + EG_OP2_INST_SETNE_DX10 = 15, + EG_OP2_INST_FRACT = 16, + EG_OP2_INST_TRUNC = 17, + EG_OP2_INST_CEIL = 18, + EG_OP2_INST_RNDNE = 19, + EG_OP2_INST_FLOOR = 20, + EG_OP2_INST_ASHR_INT = 21, + EG_OP2_INST_LSHR_INT = 22, + EG_OP2_INST_LSHL_INT = 23, + EG_OP2_INST_MOV = 25, + EG_OP2_INST_NOP = 26, + EG_OP2_INST_MUL_64 = 27, + EG_OP2_INST_FLT64_TO_FLT32 = 28, + EG_OP2_INST_FLT32_TO_FLT64 = 29, + EG_OP2_INST_PRED_SETGT_UINT = 30, + EG_OP2_INST_PRED_SETGE_UINT = 31, + EG_OP2_INST_PRED_SETE = 32, + EG_OP2_INST_PRED_SETGT = 33, + EG_OP2_INST_PRED_SETGE = 34, + EG_OP2_INST_PRED_SETNE = 35, + EG_OP2_INST_PRED_SET_INV = 36, + EG_OP2_INST_PRED_SET_POP = 37, + EG_OP2_INST_PRED_SET_CLR = 38, + EG_OP2_INST_PRED_SET_RESTORE = 39, + EG_OP2_INST_PRED_SETE_PUSH = 40, + EG_OP2_INST_PRED_SETGT_PUSH = 41, + EG_OP2_INST_PRED_SETGE_PUSH = 42, + EG_OP2_INST_PRED_SETNE_PUSH = 43, + EG_OP2_INST_KILLE = 44, + EG_OP2_INST_KILLGT = 45, + EG_OP2_INST_KILLGE = 46, + EG_OP2_INST_KILLNE = 47, + EG_OP2_INST_AND_INT = 48, + EG_OP2_INST_OR_INT = 49, + EG_OP2_INST_XOR_INT = 50, + EG_OP2_INST_NOT_INT = 51, + EG_OP2_INST_ADD_INT = 52, + EG_OP2_INST_SUB_INT = 53, + EG_OP2_INST_MAX_INT = 54, + EG_OP2_INST_MIN_INT = 55, + EG_OP2_INST_MAX_UINT = 56, + EG_OP2_INST_MIN_UINT = 57, + EG_OP2_INST_SETE_INT = 58, + EG_OP2_INST_SETGT_INT = 59, + EG_OP2_INST_SETGE_INT = 60, + EG_OP2_INST_SETNE_INT = 61, + EG_OP2_INST_SETGT_UINT = 62, + EG_OP2_INST_SETGE_UINT = 63, + EG_OP2_INST_KILLGT_UINT = 64, + EG_OP2_INST_KILLGE_UINT = 65, + EG_OP2_INST_PREDE_INT = 66, + EG_OP2_INST_PRED_SETGT_INT = 67, + EG_OP2_INST_PRED_SETGE_INT = 68, + EG_OP2_INST_PRED_SETNE_INT = 69, + EG_OP2_INST_KILLE_INT = 70, + EG_OP2_INST_KILLGT_INT = 71, + EG_OP2_INST_KILLGE_INT = 72, + EG_OP2_INST_KILLNE_INT = 73, + EG_OP2_INST_PRED_SETE_PUSH_INT = 74, + EG_OP2_INST_PRED_SETGT_PUSH_INT = 75, + EG_OP2_INST_PRED_SETGE_PUSH_INT = 76, + EG_OP2_INST_PRED_SETNE_PUSH_INT = 77, + EG_OP2_INST_PRED_SETLT_PUSH_INT = 78, + EG_OP2_INST_PRED_SETLE_PUSH_INT = 79, + EG_OP2_INST_FLT_TO_INT = 80, + EG_OP2_INST_BFREV_INT = 81, + EG_OP2_INST_ADDC_UINT = 82, + EG_OP2_INST_SUBB_UINT = 83, + EG_OP2_INST_GROUP_BARRIER = 84, + EG_OP2_INST_GROUP_SEQ_BEGIN = 85, + EG_OP2_INST_GROUP_SEQ_END = 86, + EG_OP2_INST_SET_MODE = 87, + EG_OP2_INST_SET_CF_IDX0 = 88, + EG_OP2_INST_SET_CF_IDX1 = 89, + EG_OP2_INST_SET_LDS_SIZE = 90, + EG_OP2_INST_EXP_IEEE = 129, + EG_OP2_INST_LOG_CLAMPED = 130, + EG_OP2_INST_LOG_IEEE = 131, + EG_OP2_INST_RECIP_CLAMPED = 132, + EG_OP2_INST_RECIP_FF = 133, + EG_OP2_INST_RECIP_IEEE = 134, + EG_OP2_INST_RECIPSQRT_CLAMPED = 135, + EG_OP2_INST_RECIPSQRT_FF = 136, + EG_OP2_INST_RECIPSQRT_IEEE = 137, + EG_OP2_INST_SQRT_IEEE = 138, + EG_OP2_INST_SIN = 141, + EG_OP2_INST_COS = 142, + EG_OP2_INST_MULLO_INT = 143, + EG_OP2_INST_MULHI_INT = 144, + EG_OP2_INST_MULLO_UINT = 145, + EG_OP2_INST_MULHI_UINT = 146, + EG_OP2_INST_RECIP_INT = 147, + EG_OP2_INST_RECIP_UINT = 148, + EG_OP2_INST_RECIP_64 = 149, + EG_OP2_INST_RECIP_CLAMPED_64 = 150, + EG_OP2_INST_RECIPSQRT_64 = 151, + EG_OP2_INST_RECIPSQRT_CLAMPED_64 = 152, + EG_OP2_INST_SQRT_64 = 153, + EG_OP2_INST_FLT_TO_UINT = 154, + EG_OP2_INST_INT_TO_FLT = 155, + EG_OP2_INST_UINT_TO_FLT = 156, + EG_OP2_INST_BFM_INT = 160, + EG_OP2_INST_FLT32_TO_FLT16 = 162, + EG_OP2_INST_FLT16_TO_FLT32 = 163, + EG_OP2_INST_UBYTE0_FLT = 164, + EG_OP2_INST_UBYTE1_FLT = 165, + EG_OP2_INST_UBYTE2_FLT = 166, + EG_OP2_INST_UBYTE3_FLT = 167, + EG_OP2_INST_BCNT_INT = 170, + EG_OP2_INST_FFBH_UINT = 171, + EG_OP2_INST_FFBL_INT = 172, + EG_OP2_INST_FFBH_INT = 173, + EG_OP2_INST_FLT_TO_UINT4 = 174, + EG_OP2_INST_DOT_IEEE = 175, + EG_OP2_INST_FLT_TO_INT_RPI = 176, + EG_OP2_INST_FLT_TO_INT_FLOOR = 177, + EG_OP2_INST_MULHI_UINT24 = 178, + EG_OP2_INST_MBCNT_32HI_INT = 179, + EG_OP2_INST_OFFSET_TO_FLT = 180, + EG_OP2_INST_MUL_UINT24 = 181, + EG_OP2_INST_BCNT_ACCUM_PREV_INT = 182, + EG_OP2_INST_MBCNT_32LO_ACCUM_PREV_INT = 183, + EG_OP2_INST_SETE_64 = 184, + EG_OP2_INST_SETNE_64 = 185, + EG_OP2_INST_SETGT_64 = 186, + EG_OP2_INST_SETGE_64 = 187, + EG_OP2_INST_MIN_64 = 188, + EG_OP2_INST_MAX_64 = 189, + EG_OP2_INST_DOT4 = 190, + EG_OP2_INST_DOT4_IEEE = 191, + EG_OP2_INST_CUBE = 192, + EG_OP2_INST_MAX4 = 193, + EG_OP2_INST_FREXP_64 = 196, + EG_OP2_INST_LDEXP_64 = 197, + EG_OP2_INST_FRACT_64 = 198, + EG_OP2_INST_PRED_SETGT_64 = 199, + EG_OP2_INST_PRED_SETE_64 = 200, + EG_OP2_INST_PRED_SETGE_64 = 201, + EG_OP2_INST_MUL_64_2 = 202, //same as prev? + EG_OP2_INST_ADD_64 = 203, + EG_OP2_INST_MOVA_INT = 204, + EG_OP2_INST_FLT64_TO_FLT32_2 = 205, //same as prev? + EG_OP2_INST_FLT32_TO_FLT64_2 = 206, //same as prev? + EG_OP2_INST_SAD_ACCUM_PREV_UINT = 207, + EG_OP2_INST_DOT = 208, + EG_OP2_INST_MUL_PREV = 209, + EG_OP2_INST_MUL_IEEE_PREV = 210, + EG_OP2_INST_ADD_PREV = 211, + EG_OP2_INST_MULADD_PREV = 212, + EG_OP2_INST_MULADD_IEEE_PREV = 213, + EG_OP2_INST_INTERP_XY = 214, + EG_OP2_INST_INTERP_ZW = 215, + EG_OP2_INST_INTERP_X = 216, + EG_OP2_INST_INTERP_Z = 217, + EG_OP2_INST_STORE_FLAGS = 218, + EG_OP2_INST_LOAD_STORE_FLAGS = 219, + EG_OP2_INST_LDS_1A = 220, + EG_OP2_INST_LDS_1A1D = 221, + EG_OP2_INST_LDS_2A = 223, + EG_OP2_INST_INTERP_LOAD_P0 = 224, + EG_OP2_INST_INTERP_LOAD_P10 = 225, + EG_OP2_INST_INTERP_LOAD_P20 = 226, + + EG_SRC_SEL__GPR_start = 0, + EG_SRC_SEL__GPR_end = 127, + EG_SRC_SEL__KCONST_BANK0_start = 128, + EG_SRC_SEL__KCONST_BANK0_end = 159, + EG_SRC_SEL__KCONST_BANK1_start = 160, + EG_SRC_SEL__KCONST_BANK1_end = 191, + EG_SRC_SEL__INLINE_satrt = 192, + EG_SRC_SEL__INLINE_end = 255, + EG_SRC_SEL__KCONST_BANK2_start = 256, + EG_SRC_SEL__KCONST_BANK2_end = 287, + EG_SRC_SEL__KCONST_BANK3_start = 288, + EG_SRC_SEL__KCONST_BANK3_end = 319, + EG_SRC_SEL__ALU_SRC_LDS_OQ_A = 219, + EG_SRC_SEL__ALU_SRC_LDS_OQ_B = 220, + EG_SRC_SEL__ALU_SRC_LDS_OQ_A_POP = 221, + EG_SRC_SEL__ALU_SRC_LDS_OQ_B_POP = 222, + EG_SRC_SEL__ALU_SRC_LDS_DIRECT_A = 223, + EG_SRC_SEL__ALU_SRC_LDS_DIRECT_B = 224, + EG_SRC_SEL__ALU_SRC_TIME_HI = 227, + EG_SRC_SEL__ALU_SRC_TIME_LO = 228, + EG_SRC_SEL__ALU_SRC_MASK_HI = 229, + EG_SRC_SEL__ALU_SRC_MASK_LO = 230, + EG_SRC_SEL__ALU_SRC_HW_WAVE_ID = 231, + EG_SRC_SEL__ALU_SRC_SIMD_ID = 232, + EG_SRC_SEL__ALU_SRC_SE_ID = 233, + EG_SRC_SEL__ALU_SRC_HW_THREADGRP_ID = 234, + EG_SRC_SEL__ALU_SRC_WAVE_ID_IN_GRP = 235, + EG_SRC_SEL__ALU_SRC_NUM_THREADGRP_WAVES = 236, + EG_SRC_SEL__ALU_SRC_HW_ALU_ODD = 237, + EG_SRC_SEL__ALU_SRC_LOOP_IDX = 238, + EG_SRC_SEL__ALU_SRC_PARAM_BASE_ADDR = 240, + EG_SRC_SEL__ALU_SRC_NEW_PRIM_MASK = 241, + EG_SRC_SEL__ALU_SRC_PRIM_MASK_HI = 242, + EG_SRC_SEL__ALU_SRC_PRIM_MASK_LO = 243, + EG_SRC_SEL__ALU_SRC_1_DBL_L = 244, + EG_SRC_SEL__ALU_SRC_1_DBL_M = 245, + EG_SRC_SEL__ALU_SRC_0_5_DBL_L = 246, + EG_SRC_SEL__ALU_SRC_0_5_DBL_M = 247, + EG_SRC_SEL__ALU_SRC_0 = 248, + EG_SRC_SEL__ALU_SRC_1 = 249, + EG_SRC_SEL__ALU_SRC_1_INT = 250, + EG_SRC_SEL__ALU_SRC_M_1_INT = 251, + EG_SRC_SEL__ALU_SRC_0_5 = 252, + EG_SRC_SEL__ALU_SRC_LITERAL = 253, + EG_SRC_SEL__ALU_SRC_PV = 254, + EG_SRC_SEL__ALU_SRC_PS = 255, + +//ALLOC_EXPORT + EG_CF_ALLOC_EXPORT_WORD0__ARRAY_BASE_shift = 0, //13 bits + EG_CF_ALLOC_EXPORT_WORD0__ARRAY_BASE_mask = 0x1FFF, + EG_CF_ALLOC_EXPORT_WORD0__TYPE_shift = 13,//2 bits + EG_CF_ALLOC_EXPORT_WORD0__TYPE_mask = 0x3 << 13, + EG_CF_ALLOC_EXPORT_WORD0__RW_GPR_shift = 15,//7 bits + EG_CF_ALLOC_EXPORT_WORD0__RW_GPR_mask = 0x7F << 15, + EG_CF_ALLOC_EXPORT_WORD0__RW_REL_shift = 22,//1 bit + EG_CF_ALLOC_EXPORT_WORD0__RW_REL_bit = 1 << 22, + EG_CF_ALLOC_EXPORT_WORD0__INDEX_GPR_shift = 23,//7 bits + EG_CF_ALLOC_EXPORT_WORD0__INDEX_GPR_mask = 0x7F << 23, + EG_CF_ALLOC_EXPORT_WORD0__ELEM_SIZE_shift = 30,//2 bits + EG_CF_ALLOC_EXPORT_WORD0__ELEM_SIZE_mask = 0x3 << 30, + + EG_CF_ALLOC_EXPORT_WORD1_BUF__ARRAY_SIZE_shift = 0, //12 bits + EG_CF_ALLOC_EXPORT_WORD1_BUF__ARRAY_SIZE_mask = 0xFFF, + EG_CF_ALLOC_EXPORT_WORD1_BUF__COMP_MASK_shift = 12, //4 bits + EG_CF_ALLOC_EXPORT_WORD1_BUF__COMP_MASK_mask = 0xF << 12, + + EG_CF_ALLOC_EXPORT_WORD1_SWIZ__SEL_X_shift = 0, //3 bits + EG_CF_ALLOC_EXPORT_WORD1_SWIZ__SEL_X_mask = 0x7, + EG_CF_ALLOC_EXPORT_WORD1_SWIZ__SEL_Y_shift = 3, //3 bits + EG_CF_ALLOC_EXPORT_WORD1_SWIZ__SEL_Y_mask = 0x7 << 3, + EG_CF_ALLOC_EXPORT_WORD1_SWIZ__SEL_Z_shift = 6, //3 bits + EG_CF_ALLOC_EXPORT_WORD1_SWIZ__SEL_Z_mask = 0x7 << 6, + EG_CF_ALLOC_EXPORT_WORD1_SWIZ__SEL_W_shift = 9, //3 bits + EG_CF_ALLOC_EXPORT_WORD1_SWIZ__SEL_W_mask = 0x7 << 9, + EG_CF_ALLOC_EXPORT_WORD1_SWIZ__Resreve_shift = 12,//4 bits + EG_CF_ALLOC_EXPORT_WORD1_SWIZ__Resreve_mask = 0xF << 12, + + EG_CF_ALLOC_EXPORT_WORD1__BURST_COUNT_shift = 16, //4 bits + EG_CF_ALLOC_EXPORT_WORD1__BURST_COUNT_mask = 0xF << 16, + EG_CF_ALLOC_EXPORT_WORD1__VPM_shift = 20, //1 bit + EG_CF_ALLOC_EXPORT_WORD1__VPM_bit = 1 << 20, + EG_CF_ALLOC_EXPORT_WORD1__EOP_shift = 21, //1 bit + EG_CF_ALLOC_EXPORT_WORD1__EOP_bit = 1 << 21, + EG_CF_ALLOC_EXPORT_WORD1__CF_INST_shift = 22, //8 bits + EG_CF_ALLOC_EXPORT_WORD1__CF_INST_mask = 0xFF << 22, + EG_CF_ALLOC_EXPORT_WORD1__MARK_shift = 30, //1 bit + EG_CF_ALLOC_EXPORT_WORD1__MARK_bit = 1 << 30, + EG_CF_ALLOC_EXPORT_WORD1__BARRIER_shift = 31, //1 bit + EG_CF_ALLOC_EXPORT_WORD1__BARRIER_bit = 1 << 31, + + EG_CF_INST_MEM_STREAM0_BUF0 = 64 , + EG_CF_INST_MEM_STREAM0_BUF1 = 65, + EG_CF_INST_MEM_STREAM0_BUF2 = 66, + EG_CF_INST_MEM_STREAM0_BUF3 = 67, + EG_CF_INST_MEM_STREAM1_BUF0 = 68, + EG_CF_INST_MEM_STREAM1_BUF1 = 69, + EG_CF_INST_MEM_STREAM1_BUF2 = 70, + EG_CF_INST_MEM_STREAM1_BUF3 = 71, + EG_CF_INST_MEM_STREAM2_BUF0 = 72, + EG_CF_INST_MEM_STREAM2_BUF1 = 73, + EG_CF_INST_MEM_STREAM2_BUF2 = 74, + EG_CF_INST_MEM_STREAM2_BUF3 = 75, + EG_CF_INST_MEM_STREAM3_BUF0 = 76, + EG_CF_INST_MEM_STREAM3_BUF1 = 77, + EG_CF_INST_MEM_STREAM3_BUF2 = 78, + EG_CF_INST_MEM_STREAM3_BUF3 = 79, + EG_CF_INST_MEM_WR_SCRATCH = 80, + EG_CF_INST_MEM_RING = 82, + EG_CF_INST_EXPORT = 83, + EG_CF_INST_EXPORT_DONE = 84, + EG_CF_INST_MEM_EXPORT = 85, + EG_CF_INST_MEM_RAT = 86, + EG_CF_INST_MEM_RAT_CACHELESS = 87, + EG_CF_INST_MEM_RING1 = 88, + EG_CF_INST_MEM_RING2 = 89, + EG_CF_INST_MEM_RING3 = 90, + EG_CF_INST_MEM_EXPORT_COMBINED = 91, + EG_CF_INST_MEM_RAT_COMBINED_CACHELESS = 92, + + EG_EXPORT_PIXEL = 0, + EG_EXPORT_WRITE = 0, + EG_EXPORT_POS = 1, + EG_EXPORT_WRITE_IND = 1, + EG_EXPORT_PARAM = 2, + EG_EXPORT_WRITE_ACK = 2, + EG_EXPORT_WRITE_IND_ACK = 3, + + /* PS interp param source */ + EG_ALU_SRC_PARAM_BASE = 0x000001c0, + EG_ALU_SRC_PARAM_SIZE = 0x00000021, +}; + +#endif //_EVERGREEN_SQ_H_ + + diff --git a/src/mesa/drivers/dri/r600/evergreen_state.c b/src/mesa/drivers/dri/r600/evergreen_state.c new file mode 100644 index 00000000000..931478caa5a --- /dev/null +++ b/src/mesa/drivers/dri/r600/evergreen_state.c @@ -0,0 +1,1878 @@ +/* + * Copyright (C) 2008-2009 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +/* + * Authors: + * Richard Li <[email protected]>, <[email protected]> + */ + +#include "main/glheader.h" +#include "main/mtypes.h" +#include "main/imports.h" +#include "main/enums.h" +#include "main/macros.h" +#include "main/context.h" +#include "main/dd.h" +#include "main/simple_list.h" + +#include "tnl/tnl.h" +#include "tnl/t_pipeline.h" +#include "swrast/swrast.h" +#include "swrast_setup/swrast_setup.h" +#include "main/api_arrayelt.h" +#include "main/framebuffer.h" +#include "drivers/common/meta.h" +#include "program/prog_parameter.h" +#include "program/prog_statevars.h" + +#include "vbo/vbo.h" + +#include "r600_context.h" + +#include "evergreen_state.h" +#include "evergreen_diff.h" +#include "evergreen_vertprog.h" +#include "evergreen_fragprog.h" +#include "evergreen_tex.h" + +void evergreenUpdateStateParameters(GLcontext * ctx, GLuint new_state); //same + +void evergreenUpdateShaders(GLcontext * ctx) +{ + context_t *context = EVERGREEN_CONTEXT(ctx); + + /* should only happenen once, just after context is created */ + /* TODO: shouldn't we fallback to sw here? */ + if (!ctx->FragmentProgram._Current) { + fprintf(stderr, "No ctx->FragmentProgram._Current!!\n"); + return; + } + + evergreenSelectFragmentShader(ctx); + + evergreenSelectVertexShader(ctx); + evergreenUpdateStateParameters(ctx, _NEW_PROGRAM | _NEW_PROGRAM_CONSTANTS); + context->radeon.NewGLState = 0; +} + +void evergreeUpdateShaders(GLcontext * ctx) +{ + context_t *context = R700_CONTEXT(ctx); + + /* should only happenen once, just after context is created */ + /* TODO: shouldn't we fallback to sw here? */ + if (!ctx->FragmentProgram._Current) { + fprintf(stderr, "No ctx->FragmentProgram._Current!!\n"); + return; + } + + evergreenSelectFragmentShader(ctx); + + evergreenSelectVertexShader(ctx); + evergreenUpdateStateParameters(ctx, _NEW_PROGRAM | _NEW_PROGRAM_CONSTANTS); + context->radeon.NewGLState = 0; +} + +/* + * To correctly position primitives: + */ +void evergreenUpdateViewportOffset(GLcontext * ctx) //------------------ +{ + context_t *context = R700_CONTEXT(ctx); + EVERGREEN_CHIP_CONTEXT *evergreen = GET_EVERGREEN_CHIP(context); + __DRIdrawable *dPriv = radeon_get_drawable(&context->radeon); + GLfloat xoffset = (GLfloat) dPriv->x; + GLfloat yoffset = (GLfloat) dPriv->y + dPriv->h; + const GLfloat *v = ctx->Viewport._WindowMap.m; + int id = 0; + + GLfloat tx = v[MAT_TX] + xoffset; + GLfloat ty = (-v[MAT_TY]) + yoffset; + + if (evergreen->viewport[id].PA_CL_VPORT_XOFFSET.f32All != tx || + evergreen->viewport[id].PA_CL_VPORT_YOFFSET.f32All != ty) { + /* Note: this should also modify whatever data the context reset + * code uses... + */ + EVERGREEN_STATECHANGE(context, pa); + evergreen->viewport[id].PA_CL_VPORT_XOFFSET.f32All = tx; + evergreen->viewport[id].PA_CL_VPORT_YOFFSET.f32All = ty; + } + + radeonUpdateScissor(ctx); +} + +void evergreenUpdateStateParameters(GLcontext * ctx, GLuint new_state) //same +{ + struct evergreen_fragment_program *fp = + (struct evergreen_fragment_program *)ctx->FragmentProgram._Current; + struct gl_program_parameter_list *paramList; + + if (!(new_state & (_NEW_BUFFERS | _NEW_PROGRAM | _NEW_PROGRAM_CONSTANTS))) + return; + + if (!ctx->FragmentProgram._Current || !fp) + return; + + paramList = ctx->FragmentProgram._Current->Base.Parameters; + + if (!paramList) + return; + + _mesa_load_state_parameters(ctx, paramList); + +} + +/** + * Called by Mesa after an internal state update. + */ +static void evergreenInvalidateState(GLcontext * ctx, GLuint new_state) //same +{ + context_t *context = EVERGREEN_CONTEXT(ctx); + + EVERGREEN_CHIP_CONTEXT *evergreen = GET_EVERGREEN_CHIP(context); + + _swrast_InvalidateState(ctx, new_state); + _swsetup_InvalidateState(ctx, new_state); + _vbo_InvalidateState(ctx, new_state); + _tnl_InvalidateState(ctx, new_state); + _ae_invalidate_state(ctx, new_state); + + if (new_state & _NEW_BUFFERS) { + _mesa_update_framebuffer(ctx); + /* this updates the DrawBuffer's Width/Height if it's a FBO */ + _mesa_update_draw_buffer_bounds(ctx); + + EVERGREEN_STATECHANGE(context, cb); + EVERGREEN_STATECHANGE(context, db); + } + + if (new_state & (_NEW_LIGHT)) { + EVERGREEN_STATECHANGE(context, pa); + if (ctx->Light.ProvokingVertex == GL_LAST_VERTEX_CONVENTION) + SETbit(evergreen->PA_SU_SC_MODE_CNTL.u32All, PROVOKING_VTX_LAST_bit); + else + CLEARbit(evergreen->PA_SU_SC_MODE_CNTL.u32All, PROVOKING_VTX_LAST_bit); + } + + evergreenUpdateStateParameters(ctx, new_state); + + EVERGREEN_STATECHANGE(context, pa); + EVERGREEN_STATECHANGE(context, spi); + + if(GL_TRUE == evergreen->bEnablePerspective) + { + /* Do scale XY and Z by 1/W0 for perspective correction on pos. For orthogonal case, set both to one. */ + CLEARbit(evergreen->PA_CL_VTE_CNTL.u32All, VTX_XY_FMT_bit); + CLEARbit(evergreen->PA_CL_VTE_CNTL.u32All, VTX_Z_FMT_bit); + + SETbit(evergreen->PA_CL_VTE_CNTL.u32All, VTX_W0_FMT_bit); + + SETbit(evergreen->SPI_PS_IN_CONTROL_0.u32All, PERSP_GRADIENT_ENA_bit); + CLEARbit(evergreen->SPI_PS_IN_CONTROL_0.u32All, LINEAR_GRADIENT_ENA_bit); + + SETfield(evergreen->SPI_BARYC_CNTL.u32All, 1, + EG_SPI_BARYC_CNTL__PERSP_CENTROID_ENA_shift, + EG_SPI_BARYC_CNTL__PERSP_CENTROID_ENA_mask); + } + else + { + /* For orthogonal case. */ + SETbit(evergreen->PA_CL_VTE_CNTL.u32All, VTX_XY_FMT_bit); + SETbit(evergreen->PA_CL_VTE_CNTL.u32All, VTX_Z_FMT_bit); + + SETbit(evergreen->PA_CL_VTE_CNTL.u32All, VTX_W0_FMT_bit); + + CLEARbit(evergreen->SPI_PS_IN_CONTROL_0.u32All, PERSP_GRADIENT_ENA_bit); + SETbit(evergreen->SPI_PS_IN_CONTROL_0.u32All, LINEAR_GRADIENT_ENA_bit); + + SETfield(evergreen->SPI_BARYC_CNTL.u32All, 1, + EG_SPI_BARYC_CNTL__LINEAR_CENTROID_ENA_shift, + EG_SPI_BARYC_CNTL__LINEAR_CENTROID_ENA_mask); + } + + context->radeon.NewGLState |= new_state; +} + +static void evergreenSetAlphaState(GLcontext * ctx) //same +{ + context_t *context = EVERGREEN_CONTEXT(ctx); + EVERGREEN_CHIP_CONTEXT *evergreen = GET_EVERGREEN_CHIP(context); + uint32_t alpha_func = REF_ALWAYS; + GLboolean really_enabled = ctx->Color.AlphaEnabled; + + EVERGREEN_STATECHANGE(context, sx); + + switch (ctx->Color.AlphaFunc) { + case GL_NEVER: + alpha_func = REF_NEVER; + break; + case GL_LESS: + alpha_func = REF_LESS; + break; + case GL_EQUAL: + alpha_func = REF_EQUAL; + break; + case GL_LEQUAL: + alpha_func = REF_LEQUAL; + break; + case GL_GREATER: + alpha_func = REF_GREATER; + break; + case GL_NOTEQUAL: + alpha_func = REF_NOTEQUAL; + break; + case GL_GEQUAL: + alpha_func = REF_GEQUAL; + break; + case GL_ALWAYS: + /*alpha_func = REF_ALWAYS; */ + really_enabled = GL_FALSE; + break; + } + + if (really_enabled) { + SETfield(evergreen->SX_ALPHA_TEST_CONTROL.u32All, alpha_func, + ALPHA_FUNC_shift, ALPHA_FUNC_mask); + SETbit(evergreen->SX_ALPHA_TEST_CONTROL.u32All, ALPHA_TEST_ENABLE_bit); + evergreen->SX_ALPHA_REF.f32All = ctx->Color.AlphaRef; + } else { + CLEARbit(evergreen->SX_ALPHA_TEST_CONTROL.u32All, ALPHA_TEST_ENABLE_bit); + } +} + +static void evergreenAlphaFunc(GLcontext * ctx, GLenum func, GLfloat ref) //same +{ + (void)func; + (void)ref; + evergreenSetAlphaState(ctx); +} + +static void evergreenBlendColor(GLcontext * ctx, const GLfloat cf[4]) //same +{ + context_t *context = EVERGREEN_CONTEXT(ctx); + EVERGREEN_CHIP_CONTEXT *evergreen = GET_EVERGREEN_CHIP(context); + + EVERGREEN_STATECHANGE(context, cb); + + evergreen->CB_BLEND_RED.f32All = cf[0]; + evergreen->CB_BLEND_GREEN.f32All = cf[1]; + evergreen->CB_BLEND_BLUE.f32All = cf[2]; + evergreen->CB_BLEND_ALPHA.f32All = cf[3]; +} + +static int evergreenblend_factor(GLenum factor, GLboolean is_src) //same +{ + switch (factor) { + case GL_ZERO: + return BLEND_ZERO; + break; + case GL_ONE: + return BLEND_ONE; + break; + case GL_DST_COLOR: + return BLEND_DST_COLOR; + break; + case GL_ONE_MINUS_DST_COLOR: + return BLEND_ONE_MINUS_DST_COLOR; + break; + case GL_SRC_COLOR: + return BLEND_SRC_COLOR; + break; + case GL_ONE_MINUS_SRC_COLOR: + return BLEND_ONE_MINUS_SRC_COLOR; + break; + case GL_SRC_ALPHA: + return BLEND_SRC_ALPHA; + break; + case GL_ONE_MINUS_SRC_ALPHA: + return BLEND_ONE_MINUS_SRC_ALPHA; + break; + case GL_DST_ALPHA: + return BLEND_DST_ALPHA; + break; + case GL_ONE_MINUS_DST_ALPHA: + return BLEND_ONE_MINUS_DST_ALPHA; + break; + case GL_SRC_ALPHA_SATURATE: + return (is_src) ? BLEND_SRC_ALPHA_SATURATE : BLEND_ZERO; + break; + case GL_CONSTANT_COLOR: + return BLEND_CONSTANT_COLOR; + break; + case GL_ONE_MINUS_CONSTANT_COLOR: + return BLEND_ONE_MINUS_CONSTANT_COLOR; + break; + case GL_CONSTANT_ALPHA: + return BLEND_CONSTANT_ALPHA; + break; + case GL_ONE_MINUS_CONSTANT_ALPHA: + return BLEND_ONE_MINUS_CONSTANT_ALPHA; + break; + default: + fprintf(stderr, "unknown blend factor %x\n", factor); + return (is_src) ? BLEND_ONE : BLEND_ZERO; + break; + } +} + +static void evergreenSetBlendState(GLcontext * ctx) //diff : CB_COLOR_CONTROL, CB_BLEND0_CONTROL bits +{ + context_t *context = EVERGREEN_CONTEXT(ctx); + EVERGREEN_CHIP_CONTEXT *evergreen = GET_EVERGREEN_CHIP(context); + uint32_t blend_reg = 0, eqn, eqnA; + + EVERGREEN_STATECHANGE(context, cb); + + if (RGBA_LOGICOP_ENABLED(ctx) || !ctx->Color.BlendEnabled) { + SETfield(blend_reg, + BLEND_ONE, COLOR_SRCBLEND_shift, COLOR_SRCBLEND_mask); + SETfield(blend_reg, + BLEND_ZERO, COLOR_DESTBLEND_shift, COLOR_DESTBLEND_mask); + SETfield(blend_reg, + COMB_DST_PLUS_SRC, COLOR_COMB_FCN_shift, COLOR_COMB_FCN_mask); + SETfield(blend_reg, + BLEND_ONE, ALPHA_SRCBLEND_shift, ALPHA_SRCBLEND_mask); + SETfield(blend_reg, + BLEND_ZERO, ALPHA_DESTBLEND_shift, ALPHA_DESTBLEND_mask); + SETfield(blend_reg, + COMB_DST_PLUS_SRC, ALPHA_COMB_FCN_shift, ALPHA_COMB_FCN_mask); + //if (context->radeon.radeonScreen->chip_family == CHIP_FAMILY_R600) + // evergreen->CB_BLEND_CONTROL.u32All = blend_reg; + //else + evergreen->CB_BLEND0_CONTROL.u32All = blend_reg; + return; + } + + SETfield(blend_reg, + evergreenblend_factor(ctx->Color.BlendSrcRGB, GL_TRUE), + COLOR_SRCBLEND_shift, COLOR_SRCBLEND_mask); + SETfield(blend_reg, + evergreenblend_factor(ctx->Color.BlendDstRGB, GL_FALSE), + COLOR_DESTBLEND_shift, COLOR_DESTBLEND_mask); + + switch (ctx->Color.BlendEquationRGB) { + case GL_FUNC_ADD: + eqn = COMB_DST_PLUS_SRC; + break; + case GL_FUNC_SUBTRACT: + eqn = COMB_SRC_MINUS_DST; + break; + case GL_FUNC_REVERSE_SUBTRACT: + eqn = COMB_DST_MINUS_SRC; + break; + case GL_MIN: + eqn = COMB_MIN_DST_SRC; + SETfield(blend_reg, + BLEND_ONE, + COLOR_SRCBLEND_shift, COLOR_SRCBLEND_mask); + SETfield(blend_reg, + BLEND_ONE, + COLOR_DESTBLEND_shift, COLOR_DESTBLEND_mask); + break; + case GL_MAX: + eqn = COMB_MAX_DST_SRC; + SETfield(blend_reg, + BLEND_ONE, + COLOR_SRCBLEND_shift, COLOR_SRCBLEND_mask); + SETfield(blend_reg, + BLEND_ONE, + COLOR_DESTBLEND_shift, COLOR_DESTBLEND_mask); + break; + + default: + fprintf(stderr, + "[%s:%u] Invalid RGB blend equation (0x%04x).\n", + __FUNCTION__, __LINE__, ctx->Color.BlendEquationRGB); + return; + } + SETfield(blend_reg, + eqn, COLOR_COMB_FCN_shift, COLOR_COMB_FCN_mask); + + SETfield(blend_reg, + evergreenblend_factor(ctx->Color.BlendSrcA, GL_TRUE), + ALPHA_SRCBLEND_shift, ALPHA_SRCBLEND_mask); + SETfield(blend_reg, + evergreenblend_factor(ctx->Color.BlendDstA, GL_FALSE), + ALPHA_DESTBLEND_shift, ALPHA_DESTBLEND_mask); + + switch (ctx->Color.BlendEquationA) { + case GL_FUNC_ADD: + eqnA = COMB_DST_PLUS_SRC; + break; + case GL_FUNC_SUBTRACT: + eqnA = COMB_SRC_MINUS_DST; + break; + case GL_FUNC_REVERSE_SUBTRACT: + eqnA = COMB_DST_MINUS_SRC; + break; + case GL_MIN: + eqnA = COMB_MIN_DST_SRC; + SETfield(blend_reg, + BLEND_ONE, + ALPHA_SRCBLEND_shift, ALPHA_SRCBLEND_mask); + SETfield(blend_reg, + BLEND_ONE, + ALPHA_DESTBLEND_shift, ALPHA_DESTBLEND_mask); + break; + case GL_MAX: + eqnA = COMB_MAX_DST_SRC; + SETfield(blend_reg, + BLEND_ONE, + ALPHA_SRCBLEND_shift, ALPHA_SRCBLEND_mask); + SETfield(blend_reg, + BLEND_ONE, + ALPHA_DESTBLEND_shift, ALPHA_DESTBLEND_mask); + break; + default: + fprintf(stderr, + "[%s:%u] Invalid A blend equation (0x%04x).\n", + __FUNCTION__, __LINE__, ctx->Color.BlendEquationA); + return; + } + + SETfield(blend_reg, + eqnA, ALPHA_COMB_FCN_shift, ALPHA_COMB_FCN_mask); + + SETbit(blend_reg, SEPARATE_ALPHA_BLEND_bit); + + SETbit(blend_reg, EG_CB_BLENDX_CONTROL_ENABLE_bit); + + evergreen->CB_BLEND0_CONTROL.u32All = blend_reg; +} + +static void evergreenBlendEquationSeparate(GLcontext * ctx, + GLenum modeRGB, GLenum modeA) //same +{ + evergreenSetBlendState(ctx); +} + +static void evergreenBlendFuncSeparate(GLcontext * ctx, + GLenum sfactorRGB, GLenum dfactorRGB, + GLenum sfactorA, GLenum dfactorA) //same +{ + evergreenSetBlendState(ctx); +} + +static GLuint evergreen_translate_logicop(GLenum logicop) //same +{ + switch (logicop) { + case GL_CLEAR: + return 0x00; + case GL_SET: + return 0xff; + case GL_COPY: + return 0xcc; + case GL_COPY_INVERTED: + return 0x33; + case GL_NOOP: + return 0xaa; + case GL_INVERT: + return 0x55; + case GL_AND: + return 0x88; + case GL_NAND: + return 0x77; + case GL_OR: + return 0xee; + case GL_NOR: + return 0x11; + case GL_XOR: + return 0x66; + case GL_EQUIV: + return 0x99; + case GL_AND_REVERSE: + return 0x44; + case GL_AND_INVERTED: + return 0x22; + case GL_OR_REVERSE: + return 0xdd; + case GL_OR_INVERTED: + return 0xbb; + default: + fprintf(stderr, "unknown blend logic operation %x\n", logicop); + return 0xcc; + } +} + +static void evergreenSetLogicOpState(GLcontext *ctx) //diff : CB_COLOR_CONTROL.ROP3 is actually same bits. +{ + context_t *context = EVERGREEN_CONTEXT(ctx); + EVERGREEN_CHIP_CONTEXT *evergreen = GET_EVERGREEN_CHIP(context); + + EVERGREEN_STATECHANGE(context, cb); + + if (RGBA_LOGICOP_ENABLED(ctx)) + SETfield(evergreen->CB_COLOR_CONTROL.u32All, + evergreen_translate_logicop(ctx->Color.LogicOp), + EG_CB_COLOR_CONTROL__ROP3_shift, + EG_CB_COLOR_CONTROL__ROP3_mask); + else + SETfield(evergreen->CB_COLOR_CONTROL.u32All, 0xCC, + EG_CB_COLOR_CONTROL__ROP3_shift, + EG_CB_COLOR_CONTROL__ROP3_mask); +} + +static void evergreenClipPlane( GLcontext *ctx, GLenum plane, const GLfloat *eq ) //same , but PA_CL_UCP_0_ offset diff +{ + context_t *context = EVERGREEN_CONTEXT(ctx); + EVERGREEN_CHIP_CONTEXT *evergreen = GET_EVERGREEN_CHIP(context); + GLint p; + GLint *ip; + + p = (GLint) plane - (GLint) GL_CLIP_PLANE0; + ip = (GLint *)ctx->Transform._ClipUserPlane[p]; + + EVERGREEN_STATECHANGE(context, pa); + + evergreen->ucp[p].PA_CL_UCP_0_X.u32All = ip[0]; + evergreen->ucp[p].PA_CL_UCP_0_Y.u32All = ip[1]; + evergreen->ucp[p].PA_CL_UCP_0_Z.u32All = ip[2]; + evergreen->ucp[p].PA_CL_UCP_0_W.u32All = ip[3]; +} + +static void evergreenSetClipPlaneState(GLcontext * ctx, GLenum cap, GLboolean state) //diff in func calls +{ + context_t *context = EVERGREEN_CONTEXT(ctx); + EVERGREEN_CHIP_CONTEXT *evergreen = GET_EVERGREEN_CHIP(context); + GLuint p; + + p = cap - GL_CLIP_PLANE0; + + EVERGREEN_STATECHANGE(context, pa); + + if (state) { + evergreen->PA_CL_CLIP_CNTL.u32All |= (UCP_ENA_0_bit << p); + evergreen->ucp[p].enabled = GL_TRUE; + evergreenClipPlane(ctx, cap, NULL); + } else { + evergreen->PA_CL_CLIP_CNTL.u32All &= ~(UCP_ENA_0_bit << p); + evergreen->ucp[p].enabled = GL_FALSE; + } +} + +static void evergreenSetDBRenderState(GLcontext * ctx) +{ + context_t *context = EVERGREEN_CONTEXT(ctx); + EVERGREEN_CHIP_CONTEXT *evergreen = GET_EVERGREEN_CHIP(context); + struct evergreen_fragment_program *fp = + (struct evergreen_fragment_program *)(ctx->FragmentProgram._Current); + + EVERGREEN_STATECHANGE(context, db); + + SETbit(evergreen->DB_SHADER_CONTROL.u32All, + DUAL_EXPORT_ENABLE_bit); + SETfield(evergreen->DB_SHADER_CONTROL.u32All, EARLY_Z_THEN_LATE_Z, + Z_ORDER_shift, + Z_ORDER_mask); + /* XXX need to enable htile for hiz/s */ + SETfield(evergreen->DB_RENDER_OVERRIDE.u32All, FORCE_DISABLE, + FORCE_HIZ_ENABLE_shift, + FORCE_HIZ_ENABLE_mask); + SETfield(evergreen->DB_RENDER_OVERRIDE.u32All, FORCE_DISABLE, + FORCE_HIS_ENABLE0_shift, + FORCE_HIS_ENABLE0_mask); + SETfield(evergreen->DB_RENDER_OVERRIDE.u32All, FORCE_DISABLE, + FORCE_HIS_ENABLE1_shift, + FORCE_HIS_ENABLE1_mask); + + if (context->radeon.query.current) + { + SETbit(evergreen->DB_RENDER_OVERRIDE.u32All, NOOP_CULL_DISABLE_bit); + SETbit(evergreen->DB_COUNT_CONTROL.u32All, + EG_DB_COUNT_CONTROL__PERFECT_ZPASS_COUNTS_bit); + } + else + { + CLEARbit(evergreen->DB_RENDER_OVERRIDE.u32All, NOOP_CULL_DISABLE_bit); + CLEARbit(evergreen->DB_COUNT_CONTROL.u32All, + EG_DB_COUNT_CONTROL__PERFECT_ZPASS_COUNTS_bit); + } + + if (fp) + { + if (fp->r700Shader.killIsUsed) + { + SETbit(evergreen->DB_SHADER_CONTROL.u32All, KILL_ENABLE_bit); + } + else + { + CLEARbit(evergreen->DB_SHADER_CONTROL.u32All, KILL_ENABLE_bit); + } + + if (fp->r700Shader.depthIsExported) + { + SETbit(evergreen->DB_SHADER_CONTROL.u32All, Z_EXPORT_ENABLE_bit); + } + else + { + CLEARbit(evergreen->DB_SHADER_CONTROL.u32All, Z_EXPORT_ENABLE_bit); + } + } +} + +void evergreenUpdateShaderStates(GLcontext * ctx) +{ + evergreenSetDBRenderState(ctx); + evergreenUpdateTextureState(ctx); +} + +static void evergreenSetDepthState(GLcontext * ctx) //same +{ + context_t *context = EVERGREEN_CONTEXT(ctx); + EVERGREEN_CHIP_CONTEXT *evergreen = GET_EVERGREEN_CHIP(context); + + EVERGREEN_STATECHANGE(context, db); + + if (ctx->Depth.Test) + { + SETbit(evergreen->DB_DEPTH_CONTROL.u32All, Z_ENABLE_bit); + if (ctx->Depth.Mask) + { + SETbit(evergreen->DB_DEPTH_CONTROL.u32All, Z_WRITE_ENABLE_bit); + } + else + { + CLEARbit(evergreen->DB_DEPTH_CONTROL.u32All, Z_WRITE_ENABLE_bit); + } + + switch (ctx->Depth.Func) + { + case GL_NEVER: + SETfield(evergreen->DB_DEPTH_CONTROL.u32All, FRAG_NEVER, + ZFUNC_shift, ZFUNC_mask); + break; + case GL_LESS: + SETfield(evergreen->DB_DEPTH_CONTROL.u32All, FRAG_LESS, + ZFUNC_shift, ZFUNC_mask); + break; + case GL_EQUAL: + SETfield(evergreen->DB_DEPTH_CONTROL.u32All, FRAG_EQUAL, + ZFUNC_shift, ZFUNC_mask); + break; + case GL_LEQUAL: + SETfield(evergreen->DB_DEPTH_CONTROL.u32All, FRAG_LEQUAL, + ZFUNC_shift, ZFUNC_mask); + break; + case GL_GREATER: + SETfield(evergreen->DB_DEPTH_CONTROL.u32All, FRAG_GREATER, + ZFUNC_shift, ZFUNC_mask); + break; + case GL_NOTEQUAL: + SETfield(evergreen->DB_DEPTH_CONTROL.u32All, FRAG_NOTEQUAL, + ZFUNC_shift, ZFUNC_mask); + break; + case GL_GEQUAL: + SETfield(evergreen->DB_DEPTH_CONTROL.u32All, FRAG_GEQUAL, + ZFUNC_shift, ZFUNC_mask); + break; + case GL_ALWAYS: + SETfield(evergreen->DB_DEPTH_CONTROL.u32All, FRAG_ALWAYS, + ZFUNC_shift, ZFUNC_mask); + break; + default: + SETfield(evergreen->DB_DEPTH_CONTROL.u32All, FRAG_ALWAYS, + ZFUNC_shift, ZFUNC_mask); + break; + } + } + else + { + CLEARbit(evergreen->DB_DEPTH_CONTROL.u32All, Z_ENABLE_bit); + CLEARbit(evergreen->DB_DEPTH_CONTROL.u32All, Z_WRITE_ENABLE_bit); + } +} + +static void evergreenSetStencilState(GLcontext * ctx, GLboolean state) //same +{ + context_t *context = EVERGREEN_CONTEXT(ctx); + EVERGREEN_CHIP_CONTEXT *evergreen = GET_EVERGREEN_CHIP(context); + GLboolean hw_stencil = GL_FALSE; + + if (ctx->DrawBuffer) { + struct radeon_renderbuffer *rrbStencil + = radeon_get_renderbuffer(ctx->DrawBuffer, BUFFER_STENCIL); + hw_stencil = (rrbStencil && rrbStencil->bo); + } + + if (hw_stencil) { + EVERGREEN_STATECHANGE(context, db); + if (state) { + SETbit(evergreen->DB_DEPTH_CONTROL.u32All, STENCIL_ENABLE_bit); + SETbit(evergreen->DB_DEPTH_CONTROL.u32All, BACKFACE_ENABLE_bit); + } else + CLEARbit(evergreen->DB_DEPTH_CONTROL.u32All, STENCIL_ENABLE_bit); + } +} + +static void evergreenUpdateCulling(GLcontext * ctx) //same +{ + context_t *context = EVERGREEN_CONTEXT(ctx); + EVERGREEN_CHIP_CONTEXT *evergreen = GET_EVERGREEN_CHIP(context); + + EVERGREEN_STATECHANGE(context, pa); + + CLEARbit(evergreen->PA_SU_SC_MODE_CNTL.u32All, FACE_bit); + CLEARbit(evergreen->PA_SU_SC_MODE_CNTL.u32All, CULL_FRONT_bit); + CLEARbit(evergreen->PA_SU_SC_MODE_CNTL.u32All, CULL_BACK_bit); + + if (ctx->Polygon.CullFlag) + { + switch (ctx->Polygon.CullFaceMode) + { + case GL_FRONT: + SETbit(evergreen->PA_SU_SC_MODE_CNTL.u32All, CULL_FRONT_bit); + CLEARbit(evergreen->PA_SU_SC_MODE_CNTL.u32All, CULL_BACK_bit); + break; + case GL_BACK: + CLEARbit(evergreen->PA_SU_SC_MODE_CNTL.u32All, CULL_FRONT_bit); + SETbit(evergreen->PA_SU_SC_MODE_CNTL.u32All, CULL_BACK_bit); + break; + case GL_FRONT_AND_BACK: + SETbit(evergreen->PA_SU_SC_MODE_CNTL.u32All, CULL_FRONT_bit); + SETbit(evergreen->PA_SU_SC_MODE_CNTL.u32All, CULL_BACK_bit); + break; + default: + CLEARbit(evergreen->PA_SU_SC_MODE_CNTL.u32All, CULL_FRONT_bit); + CLEARbit(evergreen->PA_SU_SC_MODE_CNTL.u32All, CULL_BACK_bit); + break; + } + } + + switch (ctx->Polygon.FrontFace) + { + case GL_CW: + SETbit(evergreen->PA_SU_SC_MODE_CNTL.u32All, FACE_bit); + break; + case GL_CCW: + CLEARbit(evergreen->PA_SU_SC_MODE_CNTL.u32All, FACE_bit); + break; + default: + CLEARbit(evergreen->PA_SU_SC_MODE_CNTL.u32All, FACE_bit); /* default: ccw */ + break; + } + + /* Winding is inverted when rendering to FBO */ + if (ctx->DrawBuffer && ctx->DrawBuffer->Name) + evergreen->PA_SU_SC_MODE_CNTL.u32All ^= FACE_bit; +} + +static void evergreenSetPolygonOffsetState(GLcontext * ctx, GLboolean state) //same +{ + context_t *context = EVERGREEN_CONTEXT(ctx); + EVERGREEN_CHIP_CONTEXT *evergreen = GET_EVERGREEN_CHIP(context); + + EVERGREEN_STATECHANGE(context, pa); + + if (state) { + SETbit(evergreen->PA_SU_SC_MODE_CNTL.u32All, POLY_OFFSET_FRONT_ENABLE_bit); + SETbit(evergreen->PA_SU_SC_MODE_CNTL.u32All, POLY_OFFSET_BACK_ENABLE_bit); + SETbit(evergreen->PA_SU_SC_MODE_CNTL.u32All, POLY_OFFSET_PARA_ENABLE_bit); + } else { + CLEARbit(evergreen->PA_SU_SC_MODE_CNTL.u32All, POLY_OFFSET_FRONT_ENABLE_bit); + CLEARbit(evergreen->PA_SU_SC_MODE_CNTL.u32All, POLY_OFFSET_BACK_ENABLE_bit); + CLEARbit(evergreen->PA_SU_SC_MODE_CNTL.u32All, POLY_OFFSET_PARA_ENABLE_bit); + } +} + +static void evergreenUpdateLineStipple(GLcontext * ctx) //diff +{ + /* TODO */ +} + +void evergreenSetScissor(context_t *context) //diff +{ + EVERGREEN_CHIP_CONTEXT *evergreen = GET_EVERGREEN_CHIP(context); + unsigned x1, y1, x2, y2; + int id = 0; + struct radeon_renderbuffer *rrb; + + rrb = radeon_get_colorbuffer(&context->radeon); + if (!rrb || !rrb->bo) { + return; + } + if (context->radeon.state.scissor.enabled) { + x1 = context->radeon.state.scissor.rect.x1; + y1 = context->radeon.state.scissor.rect.y1; + x2 = context->radeon.state.scissor.rect.x2; + y2 = context->radeon.state.scissor.rect.y2; + /* r600 has exclusive BR scissors */ + if (context->radeon.radeonScreen->kernel_mm) { + x2++; + y2++; + } + } else { + if (context->radeon.radeonScreen->driScreen->dri2.enabled) { + x1 = 0; + y1 = 0; + x2 = rrb->base.Width; + y2 = rrb->base.Height; + } else { + x1 = rrb->dPriv->x; + y1 = rrb->dPriv->y; + x2 = rrb->dPriv->x + rrb->dPriv->w; + y2 = rrb->dPriv->y + rrb->dPriv->h; + } + } + + EVERGREEN_STATECHANGE(context, pa); + + /* screen */ + /* TODO : check WINDOW_OFFSET_DISABLE */ + //SETbit(evergreen->PA_SC_SCREEN_SCISSOR_TL.u32All, WINDOW_OFFSET_DISABLE_bit); + SETfield(evergreen->PA_SC_SCREEN_SCISSOR_TL.u32All, x1, + PA_SC_SCREEN_SCISSOR_TL__TL_X_shift, EG_PA_SC_SCREEN_SCISSOR_TL__TL_X_mask); + SETfield(evergreen->PA_SC_SCREEN_SCISSOR_TL.u32All, y1, + PA_SC_SCREEN_SCISSOR_TL__TL_Y_shift, EG_PA_SC_SCREEN_SCISSOR_TL__TL_Y_mask); + + SETfield(evergreen->PA_SC_SCREEN_SCISSOR_BR.u32All, x2, + PA_SC_SCREEN_SCISSOR_BR__BR_X_shift, EG_PA_SC_SCREEN_SCISSOR_BR__BR_X_mask); + SETfield(evergreen->PA_SC_SCREEN_SCISSOR_BR.u32All, y2, + PA_SC_SCREEN_SCISSOR_BR__BR_Y_shift, EG_PA_SC_SCREEN_SCISSOR_BR__BR_Y_mask); + + /* window */ + SETbit(evergreen->PA_SC_WINDOW_SCISSOR_TL.u32All, WINDOW_OFFSET_DISABLE_bit); + SETfield(evergreen->PA_SC_WINDOW_SCISSOR_TL.u32All, x1, + PA_SC_WINDOW_SCISSOR_TL__TL_X_shift, EG_PA_SC_WINDOW_SCISSOR_TL__TL_X_mask); + SETfield(evergreen->PA_SC_WINDOW_SCISSOR_TL.u32All, y1, + PA_SC_WINDOW_SCISSOR_TL__TL_Y_shift, EG_PA_SC_WINDOW_SCISSOR_TL__TL_Y_mask); + + SETfield(evergreen->PA_SC_WINDOW_SCISSOR_BR.u32All, x2, + PA_SC_WINDOW_SCISSOR_BR__BR_X_shift, EG_PA_SC_WINDOW_SCISSOR_BR__BR_X_mask); + SETfield(evergreen->PA_SC_WINDOW_SCISSOR_BR.u32All, y2, + PA_SC_WINDOW_SCISSOR_BR__BR_Y_shift, EG_PA_SC_WINDOW_SCISSOR_BR__BR_Y_mask); + + + SETfield(evergreen->PA_SC_CLIPRECT_0_TL.u32All, x1, + PA_SC_CLIPRECT_0_TL__TL_X_shift, EG_PA_SC_CLIPRECT_0_TL__TL_X_mask); + SETfield(evergreen->PA_SC_CLIPRECT_0_TL.u32All, y1, + PA_SC_CLIPRECT_0_TL__TL_Y_shift, EG_PA_SC_CLIPRECT_0_TL__TL_Y_mask); + SETfield(evergreen->PA_SC_CLIPRECT_0_BR.u32All, x2, + PA_SC_CLIPRECT_0_BR__BR_X_shift, EG_PA_SC_CLIPRECT_0_BR__BR_X_mask); + SETfield(evergreen->PA_SC_CLIPRECT_0_BR.u32All, y2, + PA_SC_CLIPRECT_0_BR__BR_Y_shift, EG_PA_SC_CLIPRECT_0_BR__BR_Y_mask); + + evergreen->PA_SC_CLIPRECT_1_TL.u32All = evergreen->PA_SC_CLIPRECT_0_TL.u32All; + evergreen->PA_SC_CLIPRECT_1_BR.u32All = evergreen->PA_SC_CLIPRECT_0_BR.u32All; + evergreen->PA_SC_CLIPRECT_2_TL.u32All = evergreen->PA_SC_CLIPRECT_0_TL.u32All; + evergreen->PA_SC_CLIPRECT_2_BR.u32All = evergreen->PA_SC_CLIPRECT_0_BR.u32All; + evergreen->PA_SC_CLIPRECT_3_TL.u32All = evergreen->PA_SC_CLIPRECT_0_TL.u32All; + evergreen->PA_SC_CLIPRECT_3_BR.u32All = evergreen->PA_SC_CLIPRECT_0_BR.u32All; + + /* more....2d clip */ + SETbit(evergreen->PA_SC_GENERIC_SCISSOR_TL.u32All, WINDOW_OFFSET_DISABLE_bit); + SETfield(evergreen->PA_SC_GENERIC_SCISSOR_TL.u32All, x1, + PA_SC_GENERIC_SCISSOR_TL__TL_X_shift, EG_PA_SC_GENERIC_SCISSOR_TL__TL_X_mask); + SETfield(evergreen->PA_SC_GENERIC_SCISSOR_TL.u32All, y1, + PA_SC_GENERIC_SCISSOR_TL__TL_Y_shift, EG_PA_SC_GENERIC_SCISSOR_TL__TL_Y_mask); + SETfield(evergreen->PA_SC_GENERIC_SCISSOR_BR.u32All, x2, + PA_SC_GENERIC_SCISSOR_BR__BR_X_shift, EG_PA_SC_GENERIC_SCISSOR_BR__BR_X_mask); + SETfield(evergreen->PA_SC_GENERIC_SCISSOR_BR.u32All, y2, + PA_SC_GENERIC_SCISSOR_BR__BR_Y_shift, EG_PA_SC_GENERIC_SCISSOR_BR__BR_Y_mask); + + SETbit(evergreen->viewport[id].PA_SC_VPORT_SCISSOR_0_TL.u32All, WINDOW_OFFSET_DISABLE_bit); + SETfield(evergreen->viewport[id].PA_SC_VPORT_SCISSOR_0_TL.u32All, x1, + PA_SC_VPORT_SCISSOR_0_TL__TL_X_shift, EG_PA_SC_VPORT_SCISSOR_0_TL__TL_X_mask); + SETfield(evergreen->viewport[id].PA_SC_VPORT_SCISSOR_0_TL.u32All, y1, + PA_SC_VPORT_SCISSOR_0_TL__TL_Y_shift, EG_PA_SC_VPORT_SCISSOR_0_TL__TL_Y_mask); + SETfield(evergreen->viewport[id].PA_SC_VPORT_SCISSOR_0_BR.u32All, x2, + PA_SC_VPORT_SCISSOR_0_BR__BR_X_shift, EG_PA_SC_VPORT_SCISSOR_0_BR__BR_X_mask); + SETfield(evergreen->viewport[id].PA_SC_VPORT_SCISSOR_0_BR.u32All, y2, + PA_SC_VPORT_SCISSOR_0_BR__BR_Y_shift, EG_PA_SC_VPORT_SCISSOR_0_BR__BR_Y_mask); + + id = 1; + SETbit(evergreen->viewport[id].PA_SC_VPORT_SCISSOR_0_TL.u32All, WINDOW_OFFSET_DISABLE_bit); + SETfield(evergreen->viewport[id].PA_SC_VPORT_SCISSOR_0_TL.u32All, x1, + PA_SC_VPORT_SCISSOR_0_TL__TL_X_shift, EG_PA_SC_VPORT_SCISSOR_0_TL__TL_X_mask); + SETfield(evergreen->viewport[id].PA_SC_VPORT_SCISSOR_0_TL.u32All, y1, + PA_SC_VPORT_SCISSOR_0_TL__TL_Y_shift, EG_PA_SC_VPORT_SCISSOR_0_TL__TL_Y_mask); + SETfield(evergreen->viewport[id].PA_SC_VPORT_SCISSOR_0_BR.u32All, x2, + PA_SC_VPORT_SCISSOR_0_BR__BR_X_shift, EG_PA_SC_VPORT_SCISSOR_0_BR__BR_X_mask); + SETfield(evergreen->viewport[id].PA_SC_VPORT_SCISSOR_0_BR.u32All, y2, + PA_SC_VPORT_SCISSOR_0_BR__BR_Y_shift, EG_PA_SC_VPORT_SCISSOR_0_BR__BR_Y_mask); + + evergreen->viewport[id].enabled = GL_TRUE; +} + +static void evergreenUpdateWindow(GLcontext * ctx, int id) //diff in calling evergreenSetScissor +{ + context_t *context = EVERGREEN_CONTEXT(ctx); + EVERGREEN_CHIP_CONTEXT *evergreen = GET_EVERGREEN_CHIP(context); + __DRIdrawable *dPriv = radeon_get_drawable(&context->radeon); + GLfloat xoffset = dPriv ? (GLfloat) dPriv->x : 0; + GLfloat yoffset = dPriv ? (GLfloat) dPriv->y + dPriv->h : 0; + const GLfloat *v = ctx->Viewport._WindowMap.m; + const GLfloat depthScale = 1.0F / ctx->DrawBuffer->_DepthMaxF; + const GLboolean render_to_fbo = (ctx->DrawBuffer->Name != 0); + GLfloat y_scale, y_bias; + + if (render_to_fbo) { + y_scale = 1.0; + y_bias = 0; + } else { + y_scale = -1.0; + y_bias = yoffset; + } + + GLfloat sx = v[MAT_SX]; + GLfloat tx = v[MAT_TX] + xoffset; + GLfloat sy = v[MAT_SY] * y_scale; + GLfloat ty = (v[MAT_TY] * y_scale) + y_bias; + GLfloat sz = v[MAT_SZ] * depthScale; + GLfloat tz = v[MAT_TZ] * depthScale; + + EVERGREEN_STATECHANGE(context, pa); + + + evergreen->viewport[id].PA_CL_VPORT_XSCALE.f32All = sx; + evergreen->viewport[id].PA_CL_VPORT_XOFFSET.f32All = tx; + + evergreen->viewport[id].PA_CL_VPORT_YSCALE.f32All = sy; + evergreen->viewport[id].PA_CL_VPORT_YOFFSET.f32All = ty; + + evergreen->viewport[id].PA_CL_VPORT_ZSCALE.f32All = sz; + evergreen->viewport[id].PA_CL_VPORT_ZOFFSET.f32All = tz; + + if (ctx->Transform.DepthClamp) { + evergreen->viewport[id].PA_SC_VPORT_ZMIN_0.f32All = MIN2(ctx->Viewport.Near, ctx->Viewport.Far); + evergreen->viewport[id].PA_SC_VPORT_ZMAX_0.f32All = MAX2(ctx->Viewport.Near, ctx->Viewport.Far); + SETbit(evergreen->PA_CL_CLIP_CNTL.u32All, ZCLIP_NEAR_DISABLE_bit); + SETbit(evergreen->PA_CL_CLIP_CNTL.u32All, ZCLIP_FAR_DISABLE_bit); + } else { + evergreen->viewport[id].PA_SC_VPORT_ZMIN_0.f32All = 0.0; + evergreen->viewport[id].PA_SC_VPORT_ZMAX_0.f32All = 1.0; + CLEARbit(evergreen->PA_CL_CLIP_CNTL.u32All, ZCLIP_NEAR_DISABLE_bit); + CLEARbit(evergreen->PA_CL_CLIP_CNTL.u32All, ZCLIP_FAR_DISABLE_bit); + } + + evergreen->viewport[id].enabled = GL_TRUE; + + evergreenSetScissor(context); +} + +static void evergreenEnable(GLcontext * ctx, GLenum cap, GLboolean state) //diff in func calls +{ + context_t *context = EVERGREEN_CONTEXT(ctx); + + switch (cap) { + case GL_TEXTURE_1D: + case GL_TEXTURE_2D: + case GL_TEXTURE_3D: + /* empty */ + break; + case GL_FOG: + /* empty */ + break; + case GL_ALPHA_TEST: + evergreenSetAlphaState(ctx); + break; + case GL_COLOR_LOGIC_OP: + evergreenSetLogicOpState(ctx); + /* fall-through, because logic op overrides blending */ + case GL_BLEND: + evergreenSetBlendState(ctx); + break; + case GL_CLIP_PLANE0: + case GL_CLIP_PLANE1: + case GL_CLIP_PLANE2: + case GL_CLIP_PLANE3: + case GL_CLIP_PLANE4: + case GL_CLIP_PLANE5: + evergreenSetClipPlaneState(ctx, cap, state); + break; + case GL_DEPTH_TEST: + evergreenSetDepthState(ctx); + break; + case GL_STENCIL_TEST: + evergreenSetStencilState(ctx, state); + break; + case GL_CULL_FACE: + evergreenUpdateCulling(ctx); + break; + case GL_POLYGON_OFFSET_POINT: + case GL_POLYGON_OFFSET_LINE: + case GL_POLYGON_OFFSET_FILL: + evergreenSetPolygonOffsetState(ctx, state); + break; + case GL_SCISSOR_TEST: + radeon_firevertices(&context->radeon); + context->radeon.state.scissor.enabled = state; + radeonUpdateScissor(ctx); + break; + case GL_LINE_STIPPLE: + evergreenUpdateLineStipple(ctx); + break; + case GL_DEPTH_CLAMP: + evergreenUpdateWindow(ctx, 0); + break; + default: + break; + } + +} + +static void evergreenColorMask(GLcontext * ctx, + GLboolean r, GLboolean g, GLboolean b, GLboolean a) //same +{ + context_t *context = EVERGREEN_CONTEXT(ctx); + EVERGREEN_CHIP_CONTEXT *evergreen = GET_EVERGREEN_CHIP(context); + unsigned int mask = ((r ? 1 : 0) | + (g ? 2 : 0) | + (b ? 4 : 0) | + (a ? 8 : 0)); + + if (mask != evergreen->CB_TARGET_MASK.u32All) { + EVERGREEN_STATECHANGE(context, cb); + SETfield(evergreen->CB_TARGET_MASK.u32All, mask, TARGET0_ENABLE_shift, TARGET0_ENABLE_mask); + } +} + +static void evergreenDepthFunc(GLcontext * ctx, GLenum func) //same +{ + evergreenSetDepthState(ctx); +} + +static void evergreenDepthMask(GLcontext * ctx, GLboolean mask) //same +{ + evergreenSetDepthState(ctx); +} + +static void evergreenCullFace(GLcontext * ctx, GLenum mode) //same +{ + evergreenUpdateCulling(ctx); +} + +static void evergreenFogfv(GLcontext * ctx, GLenum pname, const GLfloat * param) //same +{ +} + +static void evergreenUpdatePolygonMode(GLcontext * ctx) //same +{ + context_t *context = EVERGREEN_CONTEXT(ctx); + EVERGREEN_CHIP_CONTEXT *evergreen = GET_EVERGREEN_CHIP(context); + + EVERGREEN_STATECHANGE(context, pa); + + SETfield(evergreen->PA_SU_SC_MODE_CNTL.u32All, X_DISABLE_POLY_MODE, POLY_MODE_shift, POLY_MODE_mask); + + /* Only do something if a polygon mode is wanted, default is GL_FILL */ + if (ctx->Polygon.FrontMode != GL_FILL || + ctx->Polygon.BackMode != GL_FILL) { + GLenum f, b; + + /* Handle GL_CW (clock wise and GL_CCW (counter clock wise) + * correctly by selecting the correct front and back face + */ + f = ctx->Polygon.FrontMode; + b = ctx->Polygon.BackMode; + + /* Enable polygon mode */ + SETfield(evergreen->PA_SU_SC_MODE_CNTL.u32All, X_DUAL_MODE, POLY_MODE_shift, POLY_MODE_mask); + + switch (f) { + case GL_LINE: + SETfield(evergreen->PA_SU_SC_MODE_CNTL.u32All, X_DRAW_LINES, + POLYMODE_FRONT_PTYPE_shift, POLYMODE_FRONT_PTYPE_mask); + break; + case GL_POINT: + SETfield(evergreen->PA_SU_SC_MODE_CNTL.u32All, X_DRAW_POINTS, + POLYMODE_FRONT_PTYPE_shift, POLYMODE_FRONT_PTYPE_mask); + break; + case GL_FILL: + SETfield(evergreen->PA_SU_SC_MODE_CNTL.u32All, X_DRAW_TRIANGLES, + POLYMODE_FRONT_PTYPE_shift, POLYMODE_FRONT_PTYPE_mask); + break; + } + + switch (b) { + case GL_LINE: + SETfield(evergreen->PA_SU_SC_MODE_CNTL.u32All, X_DRAW_LINES, + POLYMODE_BACK_PTYPE_shift, POLYMODE_BACK_PTYPE_mask); + break; + case GL_POINT: + SETfield(evergreen->PA_SU_SC_MODE_CNTL.u32All, X_DRAW_POINTS, + POLYMODE_BACK_PTYPE_shift, POLYMODE_BACK_PTYPE_mask); + break; + case GL_FILL: + SETfield(evergreen->PA_SU_SC_MODE_CNTL.u32All, X_DRAW_TRIANGLES, + POLYMODE_BACK_PTYPE_shift, POLYMODE_BACK_PTYPE_mask); + break; + } + } +} + +static void evergreenFrontFace(GLcontext * ctx, GLenum mode) //same +{ + evergreenUpdateCulling(ctx); + evergreenUpdatePolygonMode(ctx); +} + +static void evergreenShadeModel(GLcontext * ctx, GLenum mode) //same +{ + context_t *context = EVERGREEN_CONTEXT(ctx); + EVERGREEN_CHIP_CONTEXT *evergreen = GET_EVERGREEN_CHIP(context); + + EVERGREEN_STATECHANGE(context, spi); + + /* also need to set/clear FLAT_SHADE bit per param in SPI_PS_INPUT_CNTL_[0-31] */ + switch (mode) { + case GL_FLAT: + SETbit(evergreen->SPI_INTERP_CONTROL_0.u32All, FLAT_SHADE_ENA_bit); + break; + case GL_SMOOTH: + CLEARbit(evergreen->SPI_INTERP_CONTROL_0.u32All, FLAT_SHADE_ENA_bit); + break; + default: + return; + } +} + +static void evergreenLogicOpcode(GLcontext *ctx, GLenum logicop) //diff +{ + if (RGBA_LOGICOP_ENABLED(ctx)) + evergreenSetLogicOpState(ctx); +} + +static void evergreenPointSize(GLcontext * ctx, GLfloat size) //same +{ + context_t *context = EVERGREEN_CONTEXT(ctx); + EVERGREEN_CHIP_CONTEXT *evergreen = GET_EVERGREEN_CHIP(context); + + EVERGREEN_STATECHANGE(context, pa); + + /* We need to clamp to user defined range here, because + * the HW clamping happens only for per vertex point size. */ + size = CLAMP(size, ctx->Point.MinSize, ctx->Point.MaxSize); + + /* same size limits for AA, non-AA points */ + size = CLAMP(size, ctx->Const.MinPointSize, ctx->Const.MaxPointSize); + + /* format is 12.4 fixed point */ + SETfield(evergreen->PA_SU_POINT_SIZE.u32All, (int)(size * 8.0), + PA_SU_POINT_SIZE__HEIGHT_shift, PA_SU_POINT_SIZE__HEIGHT_mask); + SETfield(evergreen->PA_SU_POINT_SIZE.u32All, (int)(size * 8.0), + PA_SU_POINT_SIZE__WIDTH_shift, PA_SU_POINT_SIZE__WIDTH_mask); + +} + +static void evergreenPointParameter(GLcontext * ctx, GLenum pname, const GLfloat * param) //same +{ + context_t *context = EVERGREEN_CONTEXT(ctx); + EVERGREEN_CHIP_CONTEXT *evergreen = GET_EVERGREEN_CHIP(context); + + EVERGREEN_STATECHANGE(context, pa); + + /* format is 12.4 fixed point */ + switch (pname) { + case GL_POINT_SIZE_MIN: + SETfield(evergreen->PA_SU_POINT_MINMAX.u32All, (int)(ctx->Point.MinSize * 8.0), + MIN_SIZE_shift, MIN_SIZE_mask); + evergreenPointSize(ctx, ctx->Point.Size); + break; + case GL_POINT_SIZE_MAX: + SETfield(evergreen->PA_SU_POINT_MINMAX.u32All, (int)(ctx->Point.MaxSize * 8.0), + MAX_SIZE_shift, MAX_SIZE_mask); + evergreenPointSize(ctx, ctx->Point.Size); + break; + case GL_POINT_DISTANCE_ATTENUATION: + break; + case GL_POINT_FADE_THRESHOLD_SIZE: + break; + default: + break; + } +} + +static int evergreen_translate_stencil_func(int func) //same +{ + switch (func) { + case GL_NEVER: + return REF_NEVER; + case GL_LESS: + return REF_LESS; + case GL_EQUAL: + return REF_EQUAL; + case GL_LEQUAL: + return REF_LEQUAL; + case GL_GREATER: + return REF_GREATER; + case GL_NOTEQUAL: + return REF_NOTEQUAL; + case GL_GEQUAL: + return REF_GEQUAL; + case GL_ALWAYS: + return REF_ALWAYS; + } + return 0; +} + +static void evergreenStencilFuncSeparate(GLcontext * ctx, GLenum face, + GLenum func, GLint ref, GLuint mask) //same +{ + context_t *context = EVERGREEN_CONTEXT(ctx); + EVERGREEN_CHIP_CONTEXT *evergreen = GET_EVERGREEN_CHIP(context); + const unsigned back = ctx->Stencil._BackFace; + + + EVERGREEN_STATECHANGE(context, db); + + //front + SETfield(evergreen->DB_STENCILREFMASK.u32All, ctx->Stencil.Ref[0], + STENCILREF_shift, STENCILREF_mask); + SETfield(evergreen->DB_STENCILREFMASK.u32All, ctx->Stencil.ValueMask[0], + STENCILMASK_shift, STENCILMASK_mask); + + SETfield(evergreen->DB_DEPTH_CONTROL.u32All, evergreen_translate_stencil_func(ctx->Stencil.Function[0]), + STENCILFUNC_shift, STENCILFUNC_mask); + + //back + SETfield(evergreen->DB_STENCILREFMASK_BF.u32All, ctx->Stencil.Ref[back], + STENCILREF_BF_shift, STENCILREF_BF_mask); + SETfield(evergreen->DB_STENCILREFMASK_BF.u32All, ctx->Stencil.ValueMask[back], + STENCILMASK_BF_shift, STENCILMASK_BF_mask); + + SETfield(evergreen->DB_DEPTH_CONTROL.u32All, evergreen_translate_stencil_func(ctx->Stencil.Function[back]), + STENCILFUNC_BF_shift, STENCILFUNC_BF_mask); +} + +static void evergreenStencilMaskSeparate(GLcontext * ctx, GLenum face, GLuint mask) //same +{ + context_t *context = EVERGREEN_CONTEXT(ctx); + EVERGREEN_CHIP_CONTEXT *evergreen = GET_EVERGREEN_CHIP(context); + const unsigned back = ctx->Stencil._BackFace; + + EVERGREEN_STATECHANGE(context, db); + + // front + SETfield(evergreen->DB_STENCILREFMASK.u32All, ctx->Stencil.WriteMask[0], + STENCILWRITEMASK_shift, STENCILWRITEMASK_mask); + + // back + SETfield(evergreen->DB_STENCILREFMASK_BF.u32All, ctx->Stencil.WriteMask[back], + STENCILWRITEMASK_BF_shift, STENCILWRITEMASK_BF_mask); + +} + +static int evergreen_translate_stencil_op(int op) //same +{ + switch (op) { + case GL_KEEP: + return STENCIL_KEEP; + case GL_ZERO: + return STENCIL_ZERO; + case GL_REPLACE: + return STENCIL_REPLACE; + case GL_INCR: + return STENCIL_INCR_CLAMP; + case GL_DECR: + return STENCIL_DECR_CLAMP; + case GL_INCR_WRAP_EXT: + return STENCIL_INCR_WRAP; + case GL_DECR_WRAP_EXT: + return STENCIL_DECR_WRAP; + case GL_INVERT: + return STENCIL_INVERT; + default: + WARN_ONCE("Do not know how to translate stencil op"); + return STENCIL_KEEP; + } + return 0; +} + +static void evergreenStencilOpSeparate(GLcontext * ctx, GLenum face, + GLenum fail, GLenum zfail, GLenum zpass) //same +{ + context_t *context = EVERGREEN_CONTEXT(ctx); + EVERGREEN_CHIP_CONTEXT *evergreen = GET_EVERGREEN_CHIP(context); + const unsigned back = ctx->Stencil._BackFace; + + EVERGREEN_STATECHANGE(context, db); + + SETfield(evergreen->DB_DEPTH_CONTROL.u32All, evergreen_translate_stencil_op(ctx->Stencil.FailFunc[0]), + STENCILFAIL_shift, STENCILFAIL_mask); + SETfield(evergreen->DB_DEPTH_CONTROL.u32All, evergreen_translate_stencil_op(ctx->Stencil.ZFailFunc[0]), + STENCILZFAIL_shift, STENCILZFAIL_mask); + SETfield(evergreen->DB_DEPTH_CONTROL.u32All, evergreen_translate_stencil_op(ctx->Stencil.ZPassFunc[0]), + STENCILZPASS_shift, STENCILZPASS_mask); + + SETfield(evergreen->DB_DEPTH_CONTROL.u32All, evergreen_translate_stencil_op(ctx->Stencil.FailFunc[back]), + STENCILFAIL_BF_shift, STENCILFAIL_BF_mask); + SETfield(evergreen->DB_DEPTH_CONTROL.u32All, evergreen_translate_stencil_op(ctx->Stencil.ZFailFunc[back]), + STENCILZFAIL_BF_shift, STENCILZFAIL_BF_mask); + SETfield(evergreen->DB_DEPTH_CONTROL.u32All, evergreen_translate_stencil_op(ctx->Stencil.ZPassFunc[back]), + STENCILZPASS_BF_shift, STENCILZPASS_BF_mask); +} + +static void evergreenViewport(GLcontext * ctx, + GLint x, + GLint y, + GLsizei width, + GLsizei height) //diff in evergreenUpdateWindow +{ + evergreenUpdateWindow(ctx, 0); + + radeon_viewport(ctx, x, y, width, height); +} + +static void evergreenDepthRange(GLcontext * ctx, GLclampd nearval, GLclampd farval) //diff in evergreenUpdateWindow +{ + evergreenUpdateWindow(ctx, 0); +} + +static void evergreenLineWidth(GLcontext * ctx, GLfloat widthf) //same +{ + context_t *context = EVERGREEN_CONTEXT(ctx); + EVERGREEN_CHIP_CONTEXT *evergreen = GET_EVERGREEN_CHIP(context); + uint32_t lineWidth = (uint32_t)((widthf * 0.5) * (1 << 4)); + + EVERGREEN_STATECHANGE(context, pa); + + if (lineWidth > 0xFFFF) + lineWidth = 0xFFFF; + SETfield(evergreen->PA_SU_LINE_CNTL.u32All,(uint16_t)lineWidth, + PA_SU_LINE_CNTL__WIDTH_shift, PA_SU_LINE_CNTL__WIDTH_mask); +} + +static void evergreenLineStipple(GLcontext *ctx, GLint factor, GLushort pattern) //same +{ + context_t *context = EVERGREEN_CONTEXT(ctx); + EVERGREEN_CHIP_CONTEXT *evergreen = GET_EVERGREEN_CHIP(context); + + EVERGREEN_STATECHANGE(context, pa); + + SETfield(evergreen->PA_SC_LINE_STIPPLE.u32All, pattern, LINE_PATTERN_shift, LINE_PATTERN_mask); + SETfield(evergreen->PA_SC_LINE_STIPPLE.u32All, (factor-1), REPEAT_COUNT_shift, REPEAT_COUNT_mask); + SETfield(evergreen->PA_SC_LINE_STIPPLE.u32All, 1, AUTO_RESET_CNTL_shift, AUTO_RESET_CNTL_mask); +} + +static void evergreenPolygonOffset(GLcontext * ctx, GLfloat factor, GLfloat units) //diff : + //all register here offset diff, bits same +{ + context_t *context = EVERGREEN_CONTEXT(ctx); + EVERGREEN_CHIP_CONTEXT *evergreen = GET_EVERGREEN_CHIP(context); + GLfloat constant = units; + GLchar depth = 0; + + EVERGREEN_STATECHANGE(context, pa); + + switch (ctx->Visual.depthBits) { + case 16: + constant *= 4.0; + depth = -16; + break; + case 24: + constant *= 2.0; + depth = -24; + break; + } + + factor *= 12.0; + SETfield(evergreen->PA_SU_POLY_OFFSET_DB_FMT_CNTL.u32All, depth, + POLY_OFFSET_NEG_NUM_DB_BITS_shift, POLY_OFFSET_NEG_NUM_DB_BITS_mask); + //evergreen->PA_SU_POLY_OFFSET_CLAMP.f32All = constant; //??? + evergreen->PA_SU_POLY_OFFSET_FRONT_SCALE.f32All = factor; + evergreen->PA_SU_POLY_OFFSET_FRONT_OFFSET.f32All = constant; + evergreen->PA_SU_POLY_OFFSET_BACK_SCALE.f32All = factor; + evergreen->PA_SU_POLY_OFFSET_BACK_OFFSET.f32All = constant; +} + +static void evergreenPolygonMode(GLcontext * ctx, GLenum face, GLenum mode) //same +{ + (void)face; + (void)mode; + + evergreenUpdatePolygonMode(ctx); +} + +static void evergreenRenderMode(GLcontext * ctx, GLenum mode) //same +{ +} + +//TODO : move to kernel. +static void evergreenInitSQConfig(GLcontext * ctx) +{ + context_t *context = EVERGREEN_CONTEXT(ctx); + EVERGREEN_CHIP_CONTEXT *evergreen = GET_EVERGREEN_CHIP(context); + + uint32_t uSqNumCfInsts, uMaxGPRs, uMaxThreads, uMaxStackEntries, uPSThreadCount, uOtherThreadCount; + uint32_t NUM_PS_GPRS, NUM_VS_GPRS, NUM_GS_GPRS, NUM_ES_GPRS, NUM_HS_GPRS, NUM_LS_GPRS, NUM_CLAUSE_TEMP_GPRS; + GLboolean bVC_ENABLE = GL_TRUE; + + R600_STATECHANGE(context, sq); + + switch (context->radeon.radeonScreen->chip_family) + { + case CHIP_FAMILY_CEDAR: + uSqNumCfInsts = 1; + bVC_ENABLE = GL_FALSE; + uMaxGPRs = 256; + uPSThreadCount = 96; + uMaxThreads = 192; + uMaxStackEntries = 256; + break; + case CHIP_FAMILY_REDWOOD: + uSqNumCfInsts = 2; + bVC_ENABLE = GL_TRUE; + uMaxGPRs = 256; + uPSThreadCount = 128; + uMaxThreads = 248; + uMaxStackEntries = 256; + break; + case CHIP_FAMILY_JUNIPER: + uSqNumCfInsts = 2; + bVC_ENABLE = GL_TRUE; + uMaxGPRs = 256; + uPSThreadCount = 128; + uMaxThreads = 248; + uMaxStackEntries = 512; + break; + case CHIP_FAMILY_CYPRESS: + uSqNumCfInsts = 2; + bVC_ENABLE = GL_TRUE; + uMaxGPRs = 256; + uPSThreadCount = 128; + uMaxThreads = 248; + uMaxStackEntries = 512; + break; + case CHIP_FAMILY_HEMLOCK: + uSqNumCfInsts = 2;//? + bVC_ENABLE = GL_TRUE; + uMaxGPRs = 256; + uPSThreadCount = 128; + uMaxThreads = 248; + uMaxStackEntries = 512; + break; + default: + uSqNumCfInsts = 2; + bVC_ENABLE = GL_TRUE; + uMaxGPRs = 256; + uPSThreadCount = 128; + uMaxThreads = 248; + uMaxStackEntries = 512; + break; + } + + evergreen->evergreen_config.SQ_DYN_GPR_CNTL_PS_FLUSH_REQ.u32All = 0; + + evergreen->evergreen_config.SPI_CONFIG_CNTL.u32All = 0; + evergreen->evergreen_config.SPI_CONFIG_CNTL_1.u32All = 0; + SETfield(evergreen->evergreen_config.SPI_CONFIG_CNTL_1.u32All, 4, + EG_SPI_CONFIG_CNTL_1__VTX_DONE_DELAY_shift, + EG_SPI_CONFIG_CNTL_1__VTX_DONE_DELAY_mask); + + evergreen->evergreen_config.CP_PERFMON_CNTL.u32All = 0; + + evergreen->evergreen_config.SQ_MS_FIFO_SIZES.u32All = 0; + SETfield(evergreen->evergreen_config.SQ_MS_FIFO_SIZES.u32All, 16 * uSqNumCfInsts, + EG_SQ_MS_FIFO_SIZES__CACHE_FIFO_SIZE_shift, + EG_SQ_MS_FIFO_SIZES__CACHE_FIFO_SIZE_mask); + SETfield(evergreen->evergreen_config.SQ_MS_FIFO_SIZES.u32All, 0x4, + EG_SQ_MS_FIFO_SIZES__FETCH_FIFO_HIWATER_shift, + EG_SQ_MS_FIFO_SIZES__FETCH_FIFO_HIWATER_mask); + SETfield(evergreen->evergreen_config.SQ_MS_FIFO_SIZES.u32All, 0xE0, + EG_SQ_MS_FIFO_SIZES__DONE_FIFO_HIWATER_shift, + EG_SQ_MS_FIFO_SIZES__DONE_FIFO_HIWATER_mask); + SETfield(evergreen->evergreen_config.SQ_MS_FIFO_SIZES.u32All, 0x8, + EG_SQ_MS_FIFO_SIZES__ALU_UPDATE_FIFO_HIWATER_shift, + EG_SQ_MS_FIFO_SIZES__ALU_UPDATE_FIFO_HIWATER_mask); + + if(bVC_ENABLE == GL_TRUE) + { + SETbit(evergreen->evergreen_config.SQ_CONFIG.u32All, + EG_SQ_CONFIG__VC_ENABLE_bit); + } + else + { + CLEARbit(evergreen->evergreen_config.SQ_CONFIG.u32All, + EG_SQ_CONFIG__VC_ENABLE_bit); + } + SETbit(evergreen->evergreen_config.SQ_CONFIG.u32All, + EG_SQ_CONFIG__EXPORT_SRC_C_bit); + SETfield(evergreen->evergreen_config.SQ_CONFIG.u32All, 0, + EG_SQ_CONFIG__PS_PRIO_shift, + EG_SQ_CONFIG__PS_PRIO_mask); + SETfield(evergreen->evergreen_config.SQ_CONFIG.u32All, 1, + EG_SQ_CONFIG__VS_PRIO_shift, + EG_SQ_CONFIG__VS_PRIO_mask); + SETfield(evergreen->evergreen_config.SQ_CONFIG.u32All, 2, + EG_SQ_CONFIG__GS_PRIO_shift, + EG_SQ_CONFIG__GS_PRIO_mask); + SETfield(evergreen->evergreen_config.SQ_CONFIG.u32All, 3, + EG_SQ_CONFIG__ES_PRIO_shift, + EG_SQ_CONFIG__ES_PRIO_mask); + + NUM_CLAUSE_TEMP_GPRS = 4; + NUM_PS_GPRS = ((uMaxGPRs-(4*2))*12/32); // 93 + NUM_VS_GPRS = ((uMaxGPRs-(4*2))*6/32); // 46 + NUM_GS_GPRS = ((uMaxGPRs-(4*2))*4/32); // 31 + NUM_ES_GPRS = ((uMaxGPRs-(4*2))*4/32); // 31 + NUM_HS_GPRS = ((uMaxGPRs-(4*2))*3/32); // 23 + NUM_LS_GPRS = ((uMaxGPRs-(4*2))*3/32); // 23 + + evergreen->evergreen_config.SQ_GPR_RESOURCE_MGMT_1.u32All = 0; + evergreen->evergreen_config.SQ_GPR_RESOURCE_MGMT_2.u32All = 0; + evergreen->evergreen_config.SQ_GPR_RESOURCE_MGMT_3.u32All = 0; + + SETfield(evergreen->evergreen_config.SQ_GPR_RESOURCE_MGMT_1.u32All, NUM_PS_GPRS, + NUM_PS_GPRS_shift, NUM_PS_GPRS_mask); + SETfield(evergreen->evergreen_config.SQ_GPR_RESOURCE_MGMT_1.u32All, NUM_VS_GPRS, + NUM_VS_GPRS_shift, NUM_VS_GPRS_mask); + SETfield(evergreen->evergreen_config.SQ_GPR_RESOURCE_MGMT_1.u32All, NUM_CLAUSE_TEMP_GPRS, + NUM_CLAUSE_TEMP_GPRS_shift, NUM_CLAUSE_TEMP_GPRS_mask); + SETfield(evergreen->evergreen_config.SQ_GPR_RESOURCE_MGMT_2.u32All, NUM_GS_GPRS, + NUM_GS_GPRS_shift, NUM_GS_GPRS_mask); + SETfield(evergreen->evergreen_config.SQ_GPR_RESOURCE_MGMT_2.u32All, NUM_ES_GPRS, + NUM_ES_GPRS_shift, NUM_ES_GPRS_mask); + SETfield(evergreen->evergreen_config.SQ_GPR_RESOURCE_MGMT_3.u32All, NUM_HS_GPRS, + NUM_PS_GPRS_shift, NUM_PS_GPRS_mask); + SETfield(evergreen->evergreen_config.SQ_GPR_RESOURCE_MGMT_3.u32All, NUM_LS_GPRS, + NUM_VS_GPRS_shift, NUM_VS_GPRS_mask); + + uOtherThreadCount = (((uMaxThreads-uPSThreadCount)/6)/8)*8; + evergreen->evergreen_config.SQ_THREAD_RESOURCE_MGMT.u32All = 0; + evergreen->evergreen_config.SQ_THREAD_RESOURCE_MGMT_2.u32All = 0; + SETfield(evergreen->evergreen_config.SQ_THREAD_RESOURCE_MGMT.u32All, uPSThreadCount, + NUM_PS_THREADS_shift, NUM_PS_THREADS_mask); + SETfield(evergreen->evergreen_config.SQ_THREAD_RESOURCE_MGMT.u32All, uOtherThreadCount, + NUM_VS_THREADS_shift, NUM_VS_THREADS_mask); + SETfield(evergreen->evergreen_config.SQ_THREAD_RESOURCE_MGMT.u32All, uOtherThreadCount, + NUM_GS_THREADS_shift, NUM_GS_THREADS_mask); + SETfield(evergreen->evergreen_config.SQ_THREAD_RESOURCE_MGMT.u32All, uOtherThreadCount, + NUM_ES_THREADS_shift, NUM_ES_THREADS_mask); + SETfield(evergreen->evergreen_config.SQ_THREAD_RESOURCE_MGMT_2.u32All, uOtherThreadCount, + NUM_PS_THREADS_shift, NUM_PS_THREADS_mask); + SETfield(evergreen->evergreen_config.SQ_THREAD_RESOURCE_MGMT_2.u32All, uOtherThreadCount, + NUM_VS_THREADS_shift, NUM_VS_THREADS_mask); + + uMaxStackEntries = ((uMaxStackEntries*1)/6); + evergreen->evergreen_config.SQ_STACK_RESOURCE_MGMT_1.u32All = 0; + evergreen->evergreen_config.SQ_STACK_RESOURCE_MGMT_2.u32All = 0; + evergreen->evergreen_config.SQ_STACK_RESOURCE_MGMT_3.u32All = 0; + SETfield(evergreen->evergreen_config.SQ_STACK_RESOURCE_MGMT_1.u32All, uMaxStackEntries, + NUM_PS_STACK_ENTRIES_shift, NUM_PS_STACK_ENTRIES_mask); + SETfield(evergreen->evergreen_config.SQ_STACK_RESOURCE_MGMT_1.u32All, uMaxStackEntries, + NUM_VS_STACK_ENTRIES_shift, NUM_VS_STACK_ENTRIES_mask); + SETfield(evergreen->evergreen_config.SQ_STACK_RESOURCE_MGMT_2.u32All, uMaxStackEntries, + NUM_GS_STACK_ENTRIES_shift, NUM_GS_STACK_ENTRIES_mask); + SETfield(evergreen->evergreen_config.SQ_STACK_RESOURCE_MGMT_2.u32All, uMaxStackEntries, + NUM_ES_STACK_ENTRIES_shift, NUM_ES_STACK_ENTRIES_mask); + SETfield(evergreen->evergreen_config.SQ_STACK_RESOURCE_MGMT_3.u32All, uMaxStackEntries, + NUM_PS_STACK_ENTRIES_shift, NUM_PS_STACK_ENTRIES_mask); + SETfield(evergreen->evergreen_config.SQ_STACK_RESOURCE_MGMT_3.u32All, uMaxStackEntries, + NUM_VS_STACK_ENTRIES_shift, NUM_VS_STACK_ENTRIES_mask); + + evergreen->evergreen_config.PA_SC_FORCE_EOV_MAX_CNTS.u32All = 0; + SETfield(evergreen->evergreen_config.PA_SC_FORCE_EOV_MAX_CNTS.u32All, 4095, + EG_PA_SC_FORCE_EOV_MAX_CNTS__FORCE_EOV_MAX_CLK_CNT_shift, + EG_PA_SC_FORCE_EOV_MAX_CNTS__FORCE_EOV_MAX_CLK_CNT_mask); + SETfield(evergreen->evergreen_config.PA_SC_FORCE_EOV_MAX_CNTS.u32All, 255, + EG_PA_SC_FORCE_EOV_MAX_CNTS__FORCE_EOV_MAX_REZ_CNT_shift, + EG_PA_SC_FORCE_EOV_MAX_CNTS__FORCE_EOV_MAX_REZ_CNT_mask); + + evergreen->evergreen_config.VGT_CACHE_INVALIDATION.u32All = 0; + SETfield(evergreen->evergreen_config.VGT_CACHE_INVALIDATION.u32All, 2, + EG_VGT_CACHE_INVALIDATION__CACHE_INVALIDATION_shift, + EG_VGT_CACHE_INVALIDATION__CACHE_INVALIDATION_mask); + + evergreen->evergreen_config.VGT_GS_VERTEX_REUSE.u32All = 0; + SETfield(evergreen->evergreen_config.VGT_GS_VERTEX_REUSE.u32All, 16, + VERT_REUSE_shift, + VERT_REUSE_mask); + + evergreen->evergreen_config.PA_SC_LINE_STIPPLE_STATE.u32All = 0; + + evergreen->evergreen_config.PA_CL_ENHANCE.u32All = 0; + SETbit(evergreen->evergreen_config.PA_CL_ENHANCE.u32All, + CLIP_VTX_REORDER_ENA_bit); + SETfield(evergreen->evergreen_config.PA_CL_ENHANCE.u32All, 3, + NUM_CLIP_SEQ_shift, + NUM_CLIP_SEQ_mask); +} + +void evergreenInitState(GLcontext * ctx) //diff +{ + context_t *context = R700_CONTEXT(ctx); + EVERGREEN_CHIP_CONTEXT *evergreen = GET_EVERGREEN_CHIP(context); + + int id = 0; + + //calloc should have done this + memset(evergreen, 0, sizeof(EVERGREEN_CHIP_CONTEXT)); + + // Disable window clipping and offset: + SETfield(evergreen->PA_SC_WINDOW_OFFSET.u32All, 0, + EG_PA_SC_WINDOW_OFFSET__WINDOW_X_OFFSET_shift, EG_PA_SC_WINDOW_OFFSET__WINDOW_X_OFFSET_mask); + SETfield(evergreen->PA_SC_WINDOW_OFFSET.u32All, 0, + EG_PA_SC_WINDOW_OFFSET__WINDOW_Y_OFFSET_shift, EG_PA_SC_WINDOW_OFFSET__WINDOW_Y_OFFSET_mask); + + SETbit(evergreen->PA_SC_WINDOW_SCISSOR_TL.u32All, WINDOW_OFFSET_DISABLE_bit); + + evergreen->PA_SC_CLIPRECT_RULE.u32All = 0x0000FFFF; + + evergreen->PA_SC_EDGERULE.u32All = 0xAAAAAAAA; + + // Set up Z min/max: + evergreen->viewport[id].PA_SC_VPORT_ZMIN_0.f32All = 0.0; + evergreen->viewport[id].PA_SC_VPORT_ZMAX_0.f32All = 1.0; + + SETfield(evergreen->CB_TARGET_MASK.u32All, 0xF, TARGET0_ENABLE_shift, TARGET0_ENABLE_mask); + SETfield(evergreen->CB_SHADER_MASK.u32All, 0xF, OUTPUT0_ENABLE_shift, OUTPUT0_ENABLE_mask); + + SETfield(evergreen->SPI_BARYC_CNTL.u32All, 1, + EG_SPI_BARYC_CNTL__PERSP_CENTROID_ENA_shift, + EG_SPI_BARYC_CNTL__PERSP_CENTROID_ENA_mask); + SETfield(evergreen->SPI_BARYC_CNTL.u32All, 1, + EG_SPI_BARYC_CNTL__LINEAR_CENTROID_ENA_shift, + EG_SPI_BARYC_CNTL__LINEAR_CENTROID_ENA_mask); + + // Turn off vgt reuse: + evergreen->VGT_REUSE_OFF.u32All = 0; + SETbit(evergreen->VGT_REUSE_OFF.u32All, REUSE_OFF_bit); + + // Specify offsetting and clamp values for vertices: + evergreen->VGT_MAX_VTX_INDX.u32All = 0xFFFFFF; + evergreen->VGT_MIN_VTX_INDX.u32All = 0; + evergreen->VGT_INDX_OFFSET.u32All = 0; + + evergreen->VGT_DMA_NUM_INSTANCES.u32All = 1; + + // Do not alpha blend: + SETfield(evergreen->SX_ALPHA_TEST_CONTROL.u32All, REF_NEVER, + ALPHA_FUNC_shift, ALPHA_FUNC_mask); + CLEARbit(evergreen->SX_ALPHA_TEST_CONTROL.u32All, ALPHA_TEST_ENABLE_bit); + + evergreen->SPI_VS_OUT_ID_0.u32All = 0x03020100; + evergreen->SPI_VS_OUT_ID_1.u32All = 0x07060504; + + evergreen->SPI_PS_INPUT_CNTL[0].u32All = 0x00000800; + evergreen->SPI_PS_INPUT_CNTL[1].u32All = 0x00000801; + evergreen->SPI_PS_INPUT_CNTL[2].u32All = 0x00000802; + + + // Depth buffer currently disabled: + evergreen->DB_DEPTH_CONTROL.u32All = 0; + SETbit(evergreen->DB_DEPTH_CONTROL.u32All, Z_WRITE_ENABLE_bit); + SETfield(evergreen->DB_DEPTH_CONTROL.u32All, FRAG_ALWAYS, + ZFUNC_shift, ZFUNC_mask); + + evergreen->DB_Z_READ_BASE.u32All = 0; + evergreen->DB_Z_WRITE_BASE.u32All = 0; + + evergreen->DB_DEPTH_CLEAR.f32All = 1.0; + + evergreen->DB_DEPTH_VIEW.u32All = 0; + + evergreen->DB_SHADER_CONTROL.u32All = 0; + SETbit(evergreen->DB_SHADER_CONTROL.u32All, EG_DB_SHADER_CONTROL__DUAL_EXPORT_ENABLE_bit); + + evergreen->DB_Z_INFO.u32All = 0; + SETfield(evergreen->DB_Z_INFO.u32All , ARRAY_1D_TILED_THIN1, + EG_DB_Z_INFO__ARRAY_MODE_shift, EG_DB_Z_INFO__ARRAY_MODE_mask); + SETfield(evergreen->DB_Z_INFO.u32All , EG_Z_24, + EG_DB_Z_INFO__FORMAT_shift, EG_DB_Z_INFO__FORMAT_mask); + SETfield(evergreen->DB_Z_INFO.u32All , EG_ADDR_SURF_TILE_SPLIT_256B, + EG_DB_Z_INFO__TILE_SPLIT_shift, EG_DB_Z_INFO__TILE_SPLIT_mask); + SETfield(evergreen->DB_Z_INFO.u32All , EG_ADDR_SURF_8_BANK, + EG_DB_Z_INFO__NUM_BANKS_shift, EG_DB_Z_INFO__NUM_BANKS_mask); + SETfield(evergreen->DB_Z_INFO.u32All , EG_ADDR_SURF_BANK_WIDTH_1, + EG_DB_Z_INFO__BANK_WIDTH_shift, EG_DB_Z_INFO__BANK_WIDTH_mask); + SETfield(evergreen->DB_Z_INFO.u32All , EG_ADDR_SURF_BANK_HEIGHT_1, + EG_DB_Z_INFO__BANK_HEIGHT_shift, EG_DB_Z_INFO__BANK_HEIGHT_mask); + + evergreen->DB_STENCIL_INFO.u32All = 0; + CLEARbit(evergreen->DB_STENCIL_INFO.u32All, EG_DB_STENCIL_INFO__FORMAT_bit); + SETfield(evergreen->DB_STENCIL_INFO.u32All, EG_ADDR_SURF_TILE_SPLIT_256B, + EG_DB_STENCIL_INFO__TILE_SPLIT_shift, EG_DB_STENCIL_INFO__TILE_SPLIT_mask); + + evergreen->DB_RENDER_CONTROL.u32All = 0; + + evergreen->DB_RENDER_OVERRIDE.u32All = 0; + SETfield(evergreen->DB_RENDER_OVERRIDE.u32All, FORCE_DISABLE, FORCE_HIZ_ENABLE_shift, FORCE_HIZ_ENABLE_mask); + SETfield(evergreen->DB_RENDER_OVERRIDE.u32All, FORCE_DISABLE, FORCE_HIS_ENABLE0_shift, FORCE_HIS_ENABLE0_mask); + SETfield(evergreen->DB_RENDER_OVERRIDE.u32All, FORCE_DISABLE, FORCE_HIS_ENABLE1_shift, FORCE_HIS_ENABLE1_mask); + + // Disable ROP3 modes by setting src to dst copy: + SETfield(evergreen->CB_COLOR_CONTROL.u32All, 0xCC, + EG_CB_COLOR_CONTROL__ROP3_shift, + EG_CB_COLOR_CONTROL__ROP3_mask); + SETfield(evergreen->CB_COLOR_CONTROL.u32All, EG_CB_NORMAL, + EG_CB_COLOR_CONTROL__MODE_shift, + EG_CB_COLOR_CONTROL__MODE_mask); + + SETfield(evergreen->CB_BLEND0_CONTROL.u32All, + BLEND_ONE, COLOR_SRCBLEND_shift, COLOR_SRCBLEND_mask); + + SETfield(evergreen->CB_BLEND0_CONTROL.u32All, + BLEND_ONE, ALPHA_SRCBLEND_shift, ALPHA_SRCBLEND_mask); + + //evergreen->PA_CL_CLIP_CNTL.CLIP_DISABLE = 1; + + SETbit(evergreen->PA_CL_CLIP_CNTL.u32All, DX_LINEAR_ATTR_CLIP_ENA_bit); + + // Set up the culling control register: + SETfield(evergreen->PA_SU_SC_MODE_CNTL.u32All, 2, + POLYMODE_FRONT_PTYPE_shift, POLYMODE_FRONT_PTYPE_mask); // draw using triangles + SETfield(evergreen->PA_SU_SC_MODE_CNTL.u32All, 2, + POLYMODE_BACK_PTYPE_shift, POLYMODE_BACK_PTYPE_mask); // draw using triangles + + // Do scale XY or X by 1/W0. eg: + evergreen->bEnablePerspective = GL_TRUE; + + CLEARbit(evergreen->PA_CL_VTE_CNTL.u32All, VTX_XY_FMT_bit); + CLEARbit(evergreen->PA_CL_VTE_CNTL.u32All, VTX_Z_FMT_bit); + SETbit(evergreen->PA_CL_VTE_CNTL.u32All, VTX_W0_FMT_bit); + + // Enable viewport scaling for all three axis: + SETbit(evergreen->PA_CL_VTE_CNTL.u32All, VPORT_X_SCALE_ENA_bit); + SETbit(evergreen->PA_CL_VTE_CNTL.u32All, VPORT_X_OFFSET_ENA_bit); + SETbit(evergreen->PA_CL_VTE_CNTL.u32All, VPORT_Y_SCALE_ENA_bit); + SETbit(evergreen->PA_CL_VTE_CNTL.u32All, VPORT_Y_OFFSET_ENA_bit); + SETbit(evergreen->PA_CL_VTE_CNTL.u32All, VPORT_Z_SCALE_ENA_bit); + SETbit(evergreen->PA_CL_VTE_CNTL.u32All, VPORT_Z_OFFSET_ENA_bit); + + // Set up point sizes and min/max values: + SETfield(evergreen->PA_SU_POINT_SIZE.u32All, 0x8, + PA_SU_POINT_SIZE__HEIGHT_shift, PA_SU_POINT_SIZE__HEIGHT_mask); + SETfield(evergreen->PA_SU_POINT_SIZE.u32All, 0x8, + PA_SU_POINT_SIZE__WIDTH_shift, PA_SU_POINT_SIZE__WIDTH_mask); + CLEARfield(evergreen->PA_SU_POINT_MINMAX.u32All, MIN_SIZE_mask); + SETfield(evergreen->PA_SU_POINT_MINMAX.u32All, 0x8000, MAX_SIZE_shift, MAX_SIZE_mask); + SETfield(evergreen->PA_SU_LINE_CNTL.u32All,0x8, + PA_SU_LINE_CNTL__WIDTH_shift, PA_SU_LINE_CNTL__WIDTH_mask); + + // Set up line control: + evergreen->PA_SC_LINE_CNTL.u32All = 0; + CLEARbit(evergreen->PA_SC_LINE_CNTL.u32All, EXPAND_LINE_WIDTH_bit); + SETbit(evergreen->PA_SC_LINE_CNTL.u32All, LAST_PIXEL_bit); + + // Set up vertex control: + evergreen->PA_SU_VTX_CNTL.u32All = 0; + CLEARfield(evergreen->PA_SU_VTX_CNTL.u32All, QUANT_MODE_mask); + SETbit(evergreen->PA_SU_VTX_CNTL.u32All, PIX_CENTER_bit); + SETfield(evergreen->PA_SU_VTX_CNTL.u32All, X_ROUND_TO_EVEN, + PA_SU_VTX_CNTL__ROUND_MODE_shift, PA_SU_VTX_CNTL__ROUND_MODE_mask); + + // to 1.0 = no guard band: + evergreen->PA_CL_GB_VERT_CLIP_ADJ.u32All = 0x3F800000; // 1.0 + evergreen->PA_CL_GB_VERT_DISC_ADJ.u32All = 0x3F800000; // 1.0 + evergreen->PA_CL_GB_HORZ_CLIP_ADJ.u32All = 0x3F800000; // 1.0 + evergreen->PA_CL_GB_HORZ_DISC_ADJ.u32All = 0x3F800000; // 1.0 + + // Diable color compares: + SETfield(evergreen->CB_CLRCMP_CONTROL.u32All, CLRCMP_DRAW_ALWAYS, + CLRCMP_FCN_SRC_shift, CLRCMP_FCN_SRC_mask); + SETfield(evergreen->CB_CLRCMP_CONTROL.u32All, CLRCMP_DRAW_ALWAYS, + CLRCMP_FCN_DST_shift, CLRCMP_FCN_DST_mask); + SETfield(evergreen->CB_CLRCMP_CONTROL.u32All, CLRCMP_SEL_SRC, + CLRCMP_FCN_SEL_shift, CLRCMP_FCN_SEL_mask); + + // Zero out source: + evergreen->CB_CLRCMP_SRC.u32All = 0x00000000; + + // Put a compare color in for error checking: + evergreen->CB_CLRCMP_DST.u32All = 0x000000FF; + + // Set up color compare mask: + evergreen->CB_CLRCMP_MSK.u32All = 0xFFFFFFFF; + + // Enable all samples for multi-sample anti-aliasing: + evergreen->PA_SC_AA_MASK.u32All = 0xFFFFFFFF; + // Turn off AA: + evergreen->PA_SC_AA_CONFIG.u32All = 0; + + SETfield(evergreen->VGT_OUT_DEALLOC_CNTL.u32All, 16, + DEALLOC_DIST_shift, DEALLOC_DIST_mask); + SETfield(evergreen->VGT_VERTEX_REUSE_BLOCK_CNTL.u32All, 14, + VTX_REUSE_DEPTH_shift, VTX_REUSE_DEPTH_mask); + + evergreen->SX_MISC.u32All = 0; + + SETfield(evergreen->render_target[id].CB_COLOR0_INFO.u32All, 1, + EG_CB_COLOR0_INFO__SOURCE_FORMAT_shift, EG_CB_COLOR0_INFO__SOURCE_FORMAT_mask); + SETbit(evergreen->render_target[id].CB_COLOR0_INFO.u32All, EG_CB_COLOR0_INFO__BLEND_CLAMP_bit); + SETfield(evergreen->render_target[id].CB_COLOR0_INFO.u32All, 0, + EG_CB_COLOR0_INFO__NUMBER_TYPE_shift, EG_CB_COLOR0_INFO__NUMBER_TYPE_mask); + + SETfield(evergreen->render_target[id].CB_COLOR0_INFO.u32All, SWAP_STD, + EG_CB_COLOR0_INFO__COMP_SWAP_shift, EG_CB_COLOR0_INFO__COMP_SWAP_mask); + + evergreen->render_target[id].CB_COLOR0_VIEW.u32All = 0; + evergreen->render_target[id].CB_COLOR0_CMASK.u32All = 0; + evergreen->render_target[id].CB_COLOR0_FMASK.u32All = 0; + evergreen->render_target[id].CB_COLOR0_FMASK_SLICE.u32All = 0; + + evergreenInitSQConfig(ctx); + + context->radeon.hw.all_dirty = GL_TRUE; +} + +void evergreenInitStateFuncs(radeonContextPtr radeon, struct dd_function_table *functions) +{ + functions->UpdateState = evergreenInvalidateState; + functions->AlphaFunc = evergreenAlphaFunc; + functions->BlendColor = evergreenBlendColor; + functions->BlendEquationSeparate = evergreenBlendEquationSeparate; + functions->BlendFuncSeparate = evergreenBlendFuncSeparate; + functions->Enable = evergreenEnable; + functions->ColorMask = evergreenColorMask; + functions->DepthFunc = evergreenDepthFunc; + functions->DepthMask = evergreenDepthMask; + functions->CullFace = evergreenCullFace; + functions->Fogfv = evergreenFogfv; + functions->FrontFace = evergreenFrontFace; + functions->ShadeModel = evergreenShadeModel; + functions->LogicOpcode = evergreenLogicOpcode; + + /* ARB_point_parameters */ + functions->PointParameterfv = evergreenPointParameter; + + /* Stencil related */ + functions->StencilFuncSeparate = evergreenStencilFuncSeparate; + functions->StencilMaskSeparate = evergreenStencilMaskSeparate; + functions->StencilOpSeparate = evergreenStencilOpSeparate; + + /* Viewport related */ + functions->Viewport = evergreenViewport; + functions->DepthRange = evergreenDepthRange; + functions->PointSize = evergreenPointSize; + functions->LineWidth = evergreenLineWidth; + functions->LineStipple = evergreenLineStipple; + + functions->PolygonOffset = evergreenPolygonOffset; + functions->PolygonMode = evergreenPolygonMode; + + functions->RenderMode = evergreenRenderMode; + + functions->ClipPlane = evergreenClipPlane; + + functions->Scissor = radeonScissor; + + functions->DrawBuffer = radeonDrawBuffer; + functions->ReadBuffer = radeonReadBuffer; + + if (radeon->radeonScreen->kernel_mm) { + functions->CopyPixels = _mesa_meta_CopyPixels; + functions->DrawPixels = _mesa_meta_DrawPixels; + functions->ReadPixels = radeonReadPixels; + } +} + + diff --git a/src/mesa/drivers/dri/r600/evergreen_state.h b/src/mesa/drivers/dri/r600/evergreen_state.h new file mode 100644 index 00000000000..ffdb56b38ae --- /dev/null +++ b/src/mesa/drivers/dri/r600/evergreen_state.h @@ -0,0 +1,47 @@ +/* + * Copyright (C) 2008-2009 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +/* + * Authors: + * Richard Li <[email protected]>, <[email protected]> + */ + +#ifndef _EVERGREEN_STATE_H_ +#define _EVERGREEN_STATE_H_ + +#include "main/mtypes.h" + +#include "r600_context.h" + +extern void evergreenUpdateStateParameters(GLcontext * ctx, GLuint new_state); +extern void evergreenUpdateShaders(GLcontext * ctx); +extern void evergreenUpdateShaderStates(GLcontext * ctx); + +extern void evergreeUpdateShaders(GLcontext * ctx); + +extern void evergreenUpdateViewportOffset(GLcontext * ctx); + +extern void evergreenInitState(GLcontext * ctx); +extern void evergreenInitStateFuncs (radeonContextPtr radeon, struct dd_function_table *functions); + +extern void evergreenSetScissor(context_t *context); + +#endif /* _EVERGREEN_STATE_H_ */ diff --git a/src/mesa/drivers/dri/r600/evergreen_tex.c b/src/mesa/drivers/dri/r600/evergreen_tex.c new file mode 100644 index 00000000000..8b42045ebb6 --- /dev/null +++ b/src/mesa/drivers/dri/r600/evergreen_tex.c @@ -0,0 +1,1551 @@ +/* + * Copyright (C) 2008-2010 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +/* + * Authors: + * Richard Li <[email protected]>, <[email protected]> + */ + +#include "main/glheader.h" +#include "main/imports.h" +#include "main/colormac.h" +#include "main/context.h" +#include "main/enums.h" +#include "main/image.h" +#include "main/teximage.h" +#include "main/mipmap.h" +#include "main/simple_list.h" +#include "main/texstore.h" +#include "main/texobj.h" + +#include "texmem.h" + +#include "r600_context.h" +#include "radeon_mipmap_tree.h" +#include "evergreen_diff.h" +#include "evergreen_tex.h" +#include "evergreen_fragprog.h" +#include "evergreen_vertprog.h" + +#include "r600_tex.h" + +static unsigned int evergreen_translate_wrap_mode(GLenum wrapmode) +{ + switch(wrapmode) { + case GL_REPEAT: return SQ_TEX_WRAP; + case GL_CLAMP: return SQ_TEX_CLAMP_HALF_BORDER; + case GL_CLAMP_TO_EDGE: return SQ_TEX_CLAMP_LAST_TEXEL; + case GL_CLAMP_TO_BORDER: return SQ_TEX_CLAMP_BORDER; + case GL_MIRRORED_REPEAT: return SQ_TEX_MIRROR; + case GL_MIRROR_CLAMP_EXT: return SQ_TEX_MIRROR_ONCE_HALF_BORDER; + case GL_MIRROR_CLAMP_TO_EDGE_EXT: return SQ_TEX_MIRROR_ONCE_LAST_TEXEL; + case GL_MIRROR_CLAMP_TO_BORDER_EXT: return SQ_TEX_MIRROR_ONCE_BORDER; + default: + radeon_error("bad wrap mode in %s", __FUNCTION__); + return 0; + } +} + +static GLboolean evergreenGetTexFormat(struct gl_texture_object *tObj, gl_format mesa_format) +{ + radeonTexObj *t = radeon_tex_obj(tObj); + + CLEARfield(t->SQ_TEX_RESOURCE4, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask); + CLEARfield(t->SQ_TEX_RESOURCE4, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask); + CLEARfield(t->SQ_TEX_RESOURCE4, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask); + CLEARfield(t->SQ_TEX_RESOURCE4, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask); + CLEARbit(t->SQ_TEX_RESOURCE4, SQ_TEX_RESOURCE_WORD4_0__FORCE_DEGAMMA_bit); + + SETfield(t->SQ_TEX_RESOURCE4, SQ_FORMAT_COMP_UNSIGNED, + FORMAT_COMP_X_shift, + FORMAT_COMP_X_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_FORMAT_COMP_UNSIGNED, + FORMAT_COMP_Y_shift, + FORMAT_COMP_Y_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_FORMAT_COMP_UNSIGNED, + FORMAT_COMP_Z_shift, + FORMAT_COMP_Z_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_FORMAT_COMP_UNSIGNED, + FORMAT_COMP_W_shift, + FORMAT_COMP_W_mask); + + SETfield(t->SQ_TEX_RESOURCE1, ARRAY_LINEAR_GENERAL, + EG_SQ_TEX_RESOURCE_WORD1_0__ARRAY_MODE_shift, + EG_SQ_TEX_RESOURCE_WORD1_0__ARRAY_MODE_mask); + + switch (mesa_format) /* This is mesa format. */ + { + case MESA_FORMAT_RGBA8888: + case MESA_FORMAT_SIGNED_RGBA8888: + SETfield(t->SQ_TEX_RESOURCE7, FMT_8_8_8_8, + EG_SQ_TEX_RESOURCE_WORD7_0__DATA_FORMAT_shift, + EG_SQ_TEX_RESOURCE_WORD7_0__DATA_FORMAT_mask); + + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_W, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Z, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Y, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask); + if (mesa_format == MESA_FORMAT_SIGNED_RGBA8888) { + SETfield(t->SQ_TEX_RESOURCE4, SQ_FORMAT_COMP_SIGNED, + FORMAT_COMP_X_shift, FORMAT_COMP_X_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_FORMAT_COMP_SIGNED, + FORMAT_COMP_Y_shift, FORMAT_COMP_Y_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_FORMAT_COMP_SIGNED, + FORMAT_COMP_Z_shift, FORMAT_COMP_Z_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_FORMAT_COMP_SIGNED, + FORMAT_COMP_W_shift, FORMAT_COMP_W_mask); + } + break; + case MESA_FORMAT_RGBA8888_REV: + case MESA_FORMAT_SIGNED_RGBA8888_REV: + SETfield(t->SQ_TEX_RESOURCE7, FMT_8_8_8_8, + EG_SQ_TEX_RESOURCE_WORD7_0__DATA_FORMAT_shift, + EG_SQ_TEX_RESOURCE_WORD7_0__DATA_FORMAT_mask); + + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Y, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Z, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_W, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask); + if (mesa_format == MESA_FORMAT_SIGNED_RGBA8888_REV) { + SETfield(t->SQ_TEX_RESOURCE4, SQ_FORMAT_COMP_SIGNED, + FORMAT_COMP_X_shift, FORMAT_COMP_X_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_FORMAT_COMP_SIGNED, + FORMAT_COMP_Y_shift, FORMAT_COMP_Y_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_FORMAT_COMP_SIGNED, + FORMAT_COMP_Z_shift, FORMAT_COMP_Z_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_FORMAT_COMP_SIGNED, + FORMAT_COMP_W_shift, FORMAT_COMP_W_mask); + } + break; + case MESA_FORMAT_ARGB8888: + SETfield(t->SQ_TEX_RESOURCE7, FMT_8_8_8_8, + EG_SQ_TEX_RESOURCE_WORD7_0__DATA_FORMAT_shift, + EG_SQ_TEX_RESOURCE_WORD7_0__DATA_FORMAT_mask); + + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Z, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Y, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_W, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask); + break; + case MESA_FORMAT_XRGB8888: + SETfield(t->SQ_TEX_RESOURCE7, FMT_8_8_8_8, + EG_SQ_TEX_RESOURCE_WORD7_0__DATA_FORMAT_shift, + EG_SQ_TEX_RESOURCE_WORD7_0__DATA_FORMAT_mask); + + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Z, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Y, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_1, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask); + break; + case MESA_FORMAT_XRGB8888_REV: + SETfield(t->SQ_TEX_RESOURCE7, FMT_8_8_8_8, + EG_SQ_TEX_RESOURCE_WORD7_0__DATA_FORMAT_shift, + EG_SQ_TEX_RESOURCE_WORD7_0__DATA_FORMAT_mask); + + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_1, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Z, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_W, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask); + break; + case MESA_FORMAT_ARGB8888_REV: + SETfield(t->SQ_TEX_RESOURCE7, FMT_8_8_8_8, + EG_SQ_TEX_RESOURCE_WORD7_0__DATA_FORMAT_shift, + EG_SQ_TEX_RESOURCE_WORD7_0__DATA_FORMAT_mask); + + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Y, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Z, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_W, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask); + break; + case MESA_FORMAT_RGB888: + SETfield(t->SQ_TEX_RESOURCE7, FMT_8_8_8, + EG_SQ_TEX_RESOURCE_WORD7_0__DATA_FORMAT_shift, + EG_SQ_TEX_RESOURCE_WORD7_0__DATA_FORMAT_mask); + + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Z, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Y, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_1, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask); + break; + case MESA_FORMAT_RGB565: + SETfield(t->SQ_TEX_RESOURCE7, FMT_5_6_5, + EG_SQ_TEX_RESOURCE_WORD7_0__DATA_FORMAT_shift, + EG_SQ_TEX_RESOURCE_WORD7_0__DATA_FORMAT_mask); + + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Z, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Y, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_1, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask); + break; + case MESA_FORMAT_RGB565_REV: + SETfield(t->SQ_TEX_RESOURCE7, FMT_5_6_5, + EG_SQ_TEX_RESOURCE_WORD7_0__DATA_FORMAT_shift, + EG_SQ_TEX_RESOURCE_WORD7_0__DATA_FORMAT_mask); + + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Y, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Z, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_1, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask); + break; + case MESA_FORMAT_ARGB4444: + SETfield(t->SQ_TEX_RESOURCE7, FMT_4_4_4_4, + EG_SQ_TEX_RESOURCE_WORD7_0__DATA_FORMAT_shift, + EG_SQ_TEX_RESOURCE_WORD7_0__DATA_FORMAT_mask); + + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Z, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Y, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_W, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask); + break; + case MESA_FORMAT_ARGB4444_REV: + SETfield(t->SQ_TEX_RESOURCE7, FMT_4_4_4_4, + EG_SQ_TEX_RESOURCE_WORD7_0__DATA_FORMAT_shift, + EG_SQ_TEX_RESOURCE_WORD7_0__DATA_FORMAT_mask); + + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Y, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Z, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_W, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask); + break; + case MESA_FORMAT_ARGB1555: + SETfield(t->SQ_TEX_RESOURCE7, FMT_1_5_5_5, + EG_SQ_TEX_RESOURCE_WORD7_0__DATA_FORMAT_shift, + EG_SQ_TEX_RESOURCE_WORD7_0__DATA_FORMAT_mask); + + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Z, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Y, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_W, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask); + break; + case MESA_FORMAT_ARGB1555_REV: + SETfield(t->SQ_TEX_RESOURCE7, FMT_1_5_5_5, + EG_SQ_TEX_RESOURCE_WORD7_0__DATA_FORMAT_shift, + EG_SQ_TEX_RESOURCE_WORD7_0__DATA_FORMAT_mask); + + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Y, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Z, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_W, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask); + break; + case MESA_FORMAT_AL88: + case MESA_FORMAT_AL88_REV: /* TODO : Check this. */ + SETfield(t->SQ_TEX_RESOURCE7, FMT_8_8, + EG_SQ_TEX_RESOURCE_WORD7_0__DATA_FORMAT_shift, + EG_SQ_TEX_RESOURCE_WORD7_0__DATA_FORMAT_mask); + + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Y, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask); + break; + case MESA_FORMAT_RGB332: + SETfield(t->SQ_TEX_RESOURCE7, FMT_3_3_2, + EG_SQ_TEX_RESOURCE_WORD7_0__DATA_FORMAT_shift, + EG_SQ_TEX_RESOURCE_WORD7_0__DATA_FORMAT_mask); + + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Z, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Y, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_1, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask); + break; + case MESA_FORMAT_A8: /* ZERO, ZERO, ZERO, X */ + SETfield(t->SQ_TEX_RESOURCE7, FMT_8, + EG_SQ_TEX_RESOURCE_WORD7_0__DATA_FORMAT_shift, + EG_SQ_TEX_RESOURCE_WORD7_0__DATA_FORMAT_mask); + + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_0, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_0, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_0, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask); + break; + case MESA_FORMAT_L8: /* X, X, X, ONE */ + SETfield(t->SQ_TEX_RESOURCE7, FMT_8, + EG_SQ_TEX_RESOURCE_WORD7_0__DATA_FORMAT_shift, + EG_SQ_TEX_RESOURCE_WORD7_0__DATA_FORMAT_mask); + + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_1, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask); + break; + case MESA_FORMAT_I8: /* X, X, X, X */ + case MESA_FORMAT_CI8: + SETfield(t->SQ_TEX_RESOURCE7, FMT_8, + EG_SQ_TEX_RESOURCE_WORD7_0__DATA_FORMAT_shift, + EG_SQ_TEX_RESOURCE_WORD7_0__DATA_FORMAT_mask); + + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask); + break; + case MESA_FORMAT_RGB_DXT1: /* not supported yet */ + case MESA_FORMAT_RGBA_DXT1: /* not supported yet */ + case MESA_FORMAT_RGBA_DXT3: /* not supported yet */ + case MESA_FORMAT_RGBA_DXT5: /* not supported yet */ + return GL_FALSE; + + case MESA_FORMAT_RGBA_FLOAT32: + SETfield(t->SQ_TEX_RESOURCE7, FMT_32_32_32_32_FLOAT, + EG_SQ_TEX_RESOURCE_WORD7_0__DATA_FORMAT_shift, + EG_SQ_TEX_RESOURCE_WORD7_0__DATA_FORMAT_mask); + + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Y, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Z, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_W, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask); + break; + case MESA_FORMAT_RGBA_FLOAT16: + SETfield(t->SQ_TEX_RESOURCE7, FMT_16_16_16_16_FLOAT, + EG_SQ_TEX_RESOURCE_WORD7_0__DATA_FORMAT_shift, + EG_SQ_TEX_RESOURCE_WORD7_0__DATA_FORMAT_mask); + + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Y, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Z, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_W, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask); + break; + case MESA_FORMAT_RGB_FLOAT32: /* X, Y, Z, ONE */ + SETfield(t->SQ_TEX_RESOURCE7, FMT_32_32_32_FLOAT, + EG_SQ_TEX_RESOURCE_WORD7_0__DATA_FORMAT_shift, + EG_SQ_TEX_RESOURCE_WORD7_0__DATA_FORMAT_mask); + + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Y, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Z, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_1, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask); + break; + case MESA_FORMAT_RGB_FLOAT16: + SETfield(t->SQ_TEX_RESOURCE7, FMT_16_16_16_FLOAT, + EG_SQ_TEX_RESOURCE_WORD7_0__DATA_FORMAT_shift, + EG_SQ_TEX_RESOURCE_WORD7_0__DATA_FORMAT_mask); + + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Y, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Z, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_1, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask); + break; + case MESA_FORMAT_ALPHA_FLOAT32: /* ZERO, ZERO, ZERO, X */ + SETfield(t->SQ_TEX_RESOURCE7, FMT_32_FLOAT, + EG_SQ_TEX_RESOURCE_WORD7_0__DATA_FORMAT_shift, + EG_SQ_TEX_RESOURCE_WORD7_0__DATA_FORMAT_mask); + + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_0, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_0, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_0, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask); + break; + case MESA_FORMAT_ALPHA_FLOAT16: /* ZERO, ZERO, ZERO, X */ + SETfield(t->SQ_TEX_RESOURCE7, FMT_16_FLOAT, + EG_SQ_TEX_RESOURCE_WORD7_0__DATA_FORMAT_shift, + EG_SQ_TEX_RESOURCE_WORD7_0__DATA_FORMAT_mask); + + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_0, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_0, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_0, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask); + break; + case MESA_FORMAT_LUMINANCE_FLOAT32: /* X, X, X, ONE */ + SETfield(t->SQ_TEX_RESOURCE7, FMT_32_FLOAT, + EG_SQ_TEX_RESOURCE_WORD7_0__DATA_FORMAT_shift, + EG_SQ_TEX_RESOURCE_WORD7_0__DATA_FORMAT_mask); + + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_1, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask); + break; + case MESA_FORMAT_LUMINANCE_FLOAT16: /* X, X, X, ONE */ + SETfield(t->SQ_TEX_RESOURCE7, FMT_16_FLOAT, + EG_SQ_TEX_RESOURCE_WORD7_0__DATA_FORMAT_shift, + EG_SQ_TEX_RESOURCE_WORD7_0__DATA_FORMAT_mask); + + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_1, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask); + break; + case MESA_FORMAT_LUMINANCE_ALPHA_FLOAT32: + SETfield(t->SQ_TEX_RESOURCE7, FMT_32_32_FLOAT, + EG_SQ_TEX_RESOURCE_WORD7_0__DATA_FORMAT_shift, + EG_SQ_TEX_RESOURCE_WORD7_0__DATA_FORMAT_mask); + + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Y, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask); + break; + case MESA_FORMAT_LUMINANCE_ALPHA_FLOAT16: + SETfield(t->SQ_TEX_RESOURCE7, FMT_16_16_FLOAT, + EG_SQ_TEX_RESOURCE_WORD7_0__DATA_FORMAT_shift, + EG_SQ_TEX_RESOURCE_WORD7_0__DATA_FORMAT_mask); + + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Y, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask); + break; + case MESA_FORMAT_INTENSITY_FLOAT32: /* X, X, X, X */ + SETfield(t->SQ_TEX_RESOURCE7, FMT_32_FLOAT, + EG_SQ_TEX_RESOURCE_WORD7_0__DATA_FORMAT_shift, + EG_SQ_TEX_RESOURCE_WORD7_0__DATA_FORMAT_mask); + + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask); + break; + case MESA_FORMAT_INTENSITY_FLOAT16: /* X, X, X, X */ + SETfield(t->SQ_TEX_RESOURCE7, FMT_16_FLOAT, + EG_SQ_TEX_RESOURCE_WORD7_0__DATA_FORMAT_shift, + EG_SQ_TEX_RESOURCE_WORD7_0__DATA_FORMAT_mask); + + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask); + break; + case MESA_FORMAT_Z16: + case MESA_FORMAT_X8_Z24: + case MESA_FORMAT_S8_Z24: + case MESA_FORMAT_Z24_S8: + case MESA_FORMAT_Z32: + case MESA_FORMAT_S8: + CLEARbit(t->SQ_TEX_RESOURCE0, EG_SQ_TEX_RESOURCE_WORD0_0__NDTO_bit); + SETfield(t->SQ_TEX_RESOURCE1, ARRAY_1D_TILED_THIN1, + EG_SQ_TEX_RESOURCE_WORD1_0__ARRAY_MODE_shift, + EG_SQ_TEX_RESOURCE_WORD1_0__ARRAY_MODE_mask); + switch (mesa_format) { + case MESA_FORMAT_Z16: + SETfield(t->SQ_TEX_RESOURCE7, FMT_16, + EG_SQ_TEX_RESOURCE_WORD7_0__DATA_FORMAT_shift, + EG_SQ_TEX_RESOURCE_WORD7_0__DATA_FORMAT_mask); + break; + case MESA_FORMAT_X8_Z24: + case MESA_FORMAT_S8_Z24: + SETfield(t->SQ_TEX_RESOURCE7, FMT_8_24, + EG_SQ_TEX_RESOURCE_WORD7_0__DATA_FORMAT_shift, + EG_SQ_TEX_RESOURCE_WORD7_0__DATA_FORMAT_mask); + break; + case MESA_FORMAT_Z24_S8: + SETfield(t->SQ_TEX_RESOURCE7, FMT_24_8, + EG_SQ_TEX_RESOURCE_WORD7_0__DATA_FORMAT_shift, + EG_SQ_TEX_RESOURCE_WORD7_0__DATA_FORMAT_mask); + break; + case MESA_FORMAT_Z32: + SETfield(t->SQ_TEX_RESOURCE7, FMT_32, + EG_SQ_TEX_RESOURCE_WORD7_0__DATA_FORMAT_shift, + EG_SQ_TEX_RESOURCE_WORD7_0__DATA_FORMAT_mask); + break; + case MESA_FORMAT_S8: + SETfield(t->SQ_TEX_RESOURCE7, FMT_8, + EG_SQ_TEX_RESOURCE_WORD7_0__DATA_FORMAT_shift, + EG_SQ_TEX_RESOURCE_WORD7_0__DATA_FORMAT_mask); + break; + default: + break; + }; + switch (tObj->DepthMode) { + case GL_LUMINANCE: /* X, X, X, ONE */ + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_1, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask); + break; + case GL_INTENSITY: /* X, X, X, X */ + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask); + break; + case GL_ALPHA: /* ZERO, ZERO, ZERO, X */ + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_0, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_0, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_0, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask); + break; + default: + return GL_FALSE; + } + break; + /* EXT_texture_sRGB */ + case MESA_FORMAT_SRGBA8: + SETfield(t->SQ_TEX_RESOURCE7, FMT_8_8_8_8, + EG_SQ_TEX_RESOURCE_WORD7_0__DATA_FORMAT_shift, + EG_SQ_TEX_RESOURCE_WORD7_0__DATA_FORMAT_mask); + + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_W, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Z, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Y, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask); + SETbit(t->SQ_TEX_RESOURCE4, SQ_TEX_RESOURCE_WORD4_0__FORCE_DEGAMMA_bit); + break; + case MESA_FORMAT_SLA8: + SETfield(t->SQ_TEX_RESOURCE7, FMT_8_8, + EG_SQ_TEX_RESOURCE_WORD7_0__DATA_FORMAT_shift, + EG_SQ_TEX_RESOURCE_WORD7_0__DATA_FORMAT_mask); + + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Y, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask); + SETbit(t->SQ_TEX_RESOURCE4, SQ_TEX_RESOURCE_WORD4_0__FORCE_DEGAMMA_bit); + break; + case MESA_FORMAT_SL8: /* X, X, X, ONE */ + SETfield(t->SQ_TEX_RESOURCE7, FMT_8, + EG_SQ_TEX_RESOURCE_WORD7_0__DATA_FORMAT_shift, + EG_SQ_TEX_RESOURCE_WORD7_0__DATA_FORMAT_mask); + + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_1, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask); + SETbit(t->SQ_TEX_RESOURCE4, SQ_TEX_RESOURCE_WORD4_0__FORCE_DEGAMMA_bit); + break; + default: + /* Not supported format */ + return GL_FALSE; + }; + + return GL_TRUE; +} + +static GLuint evergreen_translate_shadow_func(GLenum func) +{ + switch (func) { + case GL_NEVER: + return SQ_TEX_DEPTH_COMPARE_NEVER; + case GL_LESS: + return SQ_TEX_DEPTH_COMPARE_LESS; + case GL_LEQUAL: + return SQ_TEX_DEPTH_COMPARE_LESSEQUAL; + case GL_GREATER: + return SQ_TEX_DEPTH_COMPARE_GREATER; + case GL_GEQUAL: + return SQ_TEX_DEPTH_COMPARE_GREATEREQUAL; + case GL_NOTEQUAL: + return SQ_TEX_DEPTH_COMPARE_NOTEQUAL; + case GL_EQUAL: + return SQ_TEX_DEPTH_COMPARE_EQUAL; + case GL_ALWAYS: + return SQ_TEX_DEPTH_COMPARE_ALWAYS; + default: + WARN_ONCE("Unknown shadow compare function! %d", func); + return 0; + } +} + +static void evergreenUpdateTexWrap(radeonTexObjPtr t) +{ + struct gl_texture_object *tObj = &t->base; + + SETfield(t->SQ_TEX_SAMPLER0, evergreen_translate_wrap_mode(tObj->WrapS), + EG_SQ_TEX_SAMPLER_WORD0_0__CLAMP_X_shift, + EG_SQ_TEX_SAMPLER_WORD0_0__CLAMP_X_mask); + + if (tObj->Target != GL_TEXTURE_1D) + { + SETfield(t->SQ_TEX_SAMPLER0, evergreen_translate_wrap_mode(tObj->WrapT), + EG_SQ_TEX_SAMPLER_WORD0_0__CLAMP_Y_shift, + EG_SQ_TEX_SAMPLER_WORD0_0__CLAMP_Y_mask); + + if (tObj->Target == GL_TEXTURE_3D) + SETfield(t->SQ_TEX_SAMPLER0, evergreen_translate_wrap_mode(tObj->WrapR), + EG_SQ_TEX_SAMPLER_WORD0_0__CLAMP_Z_shift, + EG_SQ_TEX_SAMPLER_WORD0_0__CLAMP_Z_mask); + } +} + +static void evergreenSetTexDefaultState(radeonTexObjPtr t) +{ + /* Init text object to default states. */ + t->SQ_TEX_RESOURCE0 = 0; + t->SQ_TEX_RESOURCE1 = 0; + t->SQ_TEX_RESOURCE2 = 0; + t->SQ_TEX_RESOURCE3 = 0; + t->SQ_TEX_RESOURCE4 = 0; + t->SQ_TEX_RESOURCE5 = 0; + t->SQ_TEX_RESOURCE6 = 0; + t->SQ_TEX_RESOURCE7 = 0; + + SETfield(t->SQ_TEX_RESOURCE0, SQ_TEX_DIM_2D, + EG_SQ_TEX_RESOURCE_WORD0_0__DIM_shift, + EG_SQ_TEX_RESOURCE_WORD0_0__DIM_mask); + + CLEARbit(t->SQ_TEX_RESOURCE0, EG_SQ_TEX_RESOURCE_WORD0_0__NDTO_bit); + + SETfield(t->SQ_TEX_RESOURCE1, ARRAY_LINEAR_GENERAL, + EG_SQ_TEX_RESOURCE_WORD1_0__ARRAY_MODE_shift, + EG_SQ_TEX_RESOURCE_WORD1_0__ARRAY_MODE_mask); + + SETfield(t->SQ_TEX_RESOURCE4, SQ_FORMAT_COMP_UNSIGNED, + FORMAT_COMP_X_shift, FORMAT_COMP_X_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_FORMAT_COMP_UNSIGNED, + FORMAT_COMP_Y_shift, FORMAT_COMP_Y_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_FORMAT_COMP_UNSIGNED, + FORMAT_COMP_Z_shift, FORMAT_COMP_Z_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_FORMAT_COMP_UNSIGNED, + FORMAT_COMP_W_shift, FORMAT_COMP_W_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_NUM_FORMAT_NORM, + SQ_TEX_RESOURCE_WORD4_0__NUM_FORMAT_ALL_shift, SQ_TEX_RESOURCE_WORD4_0__NUM_FORMAT_ALL_mask); + CLEARbit(t->SQ_TEX_RESOURCE4, SQ_TEX_RESOURCE_WORD4_0__SRF_MODE_ALL_bit); + CLEARbit(t->SQ_TEX_RESOURCE4, SQ_TEX_RESOURCE_WORD4_0__FORCE_DEGAMMA_bit); + SETfield(t->SQ_TEX_RESOURCE4, SQ_ENDIAN_NONE, + SQ_TEX_RESOURCE_WORD4_0__ENDIAN_SWAP_shift, SQ_TEX_RESOURCE_WORD4_0__ENDIAN_SWAP_mask); + + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Y, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Z, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_W, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask); + SETfield(t->SQ_TEX_RESOURCE4, 0, + BASE_LEVEL_shift, + BASE_LEVEL_mask); /* mip-maps */ + + SETfield(t->SQ_TEX_RESOURCE7, FMT_8_8_8_8, + EG_SQ_TEX_RESOURCE_WORD7_0__DATA_FORMAT_shift, + EG_SQ_TEX_RESOURCE_WORD7_0__DATA_FORMAT_mask); + SETfield(t->SQ_TEX_RESOURCE7, SQ_TEX_VTX_VALID_TEXTURE, + EG_SQ_TEX_RESOURCE_WORD7_0__TYPE_shift, + EG_SQ_TEX_RESOURCE_WORD7_0__TYPE_mask); + + /* Initialize sampler registers */ + t->SQ_TEX_SAMPLER0 = 0; + SETfield(t->SQ_TEX_SAMPLER0, SQ_TEX_WRAP, + EG_SQ_TEX_SAMPLER_WORD0_0__CLAMP_X_shift, + EG_SQ_TEX_SAMPLER_WORD0_0__CLAMP_X_mask); + SETfield(t->SQ_TEX_SAMPLER0, SQ_TEX_WRAP, + EG_SQ_TEX_SAMPLER_WORD0_0__CLAMP_X_shift, + EG_SQ_TEX_SAMPLER_WORD0_0__CLAMP_X_mask); + SETfield(t->SQ_TEX_SAMPLER0, SQ_TEX_WRAP, + EG_SQ_TEX_SAMPLER_WORD0_0__CLAMP_X_shift, + EG_SQ_TEX_SAMPLER_WORD0_0__CLAMP_X_mask); + SETfield(t->SQ_TEX_SAMPLER0, SQ_TEX_XY_FILTER_POINT, + EG_SQ_TEX_SAMPLER_WORD0_0__XY_MAG_FILTER_shift, + EG_SQ_TEX_SAMPLER_WORD0_0__XY_MAG_FILTER_mask); + SETfield(t->SQ_TEX_SAMPLER0, SQ_TEX_XY_FILTER_POINT, + EG_SQ_TEX_SAMPLER_WORD0_0__XY_MIN_FILTER_shift, + EG_SQ_TEX_SAMPLER_WORD0_0__XY_MIN_FILTER_mask); + SETfield(t->SQ_TEX_SAMPLER0, SQ_TEX_Z_FILTER_NONE, + EG_SQ_TEX_SAMPLER_WORD0_0__Z_FILTER_shift, + EG_SQ_TEX_SAMPLER_WORD0_0__Z_FILTER_mask); + SETfield(t->SQ_TEX_SAMPLER0, SQ_TEX_Z_FILTER_NONE, + EG_SQ_TEX_SAMPLER_WORD0_0__MIP_FILTER_shift, + EG_SQ_TEX_SAMPLER_WORD0_0__MIP_FILTER_mask); + SETfield(t->SQ_TEX_SAMPLER0, SQ_TEX_BORDER_COLOR_TRANS_BLACK, + EG_SQ_TEX_SAMPLER_WORD0_0__BORDER_COLOR_TYPE_shift, + EG_SQ_TEX_SAMPLER_WORD0_0__BORDER_COLOR_TYPE_mask); + + t->SQ_TEX_SAMPLER1 = 0; + SETfield(t->SQ_TEX_SAMPLER1, 0x7ff, + EG_SQ_TEX_SAMPLER_WORD1_0__MAX_LOD_shift, + EG_SQ_TEX_SAMPLER_WORD1_0__MAX_LOD_mask); + + t->SQ_TEX_SAMPLER2 = 0; + SETbit(t->SQ_TEX_SAMPLER2, EG_SQ_TEX_SAMPLER_WORD2_0__TYPE_bit); +} + +static void evergreenSetTexFilter(radeonTexObjPtr t, GLenum minf, GLenum magf, GLfloat anisotropy) +{ + /* Force revalidation to account for switches from/to mipmapping. */ + t->validated = GL_FALSE; + + /* Note that EXT_texture_filter_anisotropic is extremely vague about + * how anisotropic filtering interacts with the "normal" filter modes. + * When anisotropic filtering is enabled, we override min and mag + * filter settings completely. This includes driconf's settings. + */ + if (anisotropy >= 2.0 && (minf != GL_NEAREST) && (magf != GL_NEAREST)) { + /*t->pp_txfilter |= R300_TX_MAG_FILTER_ANISO + | R300_TX_MIN_FILTER_ANISO + | R300_TX_MIN_FILTER_MIP_LINEAR + | aniso_filter(anisotropy);*/ + radeon_print(RADEON_TEXTURE, RADEON_NORMAL, "Using maximum anisotropy of %f\n", anisotropy); + return; + } + + switch (minf) + { + case GL_NEAREST: + SETfield(t->SQ_TEX_SAMPLER0, TEX_XYFilter_Point, + EG_SQ_TEX_SAMPLER_WORD0_0__XY_MIN_FILTER_shift, + EG_SQ_TEX_SAMPLER_WORD0_0__XY_MIN_FILTER_mask); + SETfield(t->SQ_TEX_SAMPLER0, TEX_MipFilter_None, + EG_SQ_TEX_SAMPLER_WORD0_0__MIP_FILTER_shift, + EG_SQ_TEX_SAMPLER_WORD0_0__MIP_FILTER_mask); + break; + case GL_LINEAR: + SETfield(t->SQ_TEX_SAMPLER0, TEX_XYFilter_Linear, + EG_SQ_TEX_SAMPLER_WORD0_0__XY_MIN_FILTER_shift, + EG_SQ_TEX_SAMPLER_WORD0_0__XY_MIN_FILTER_mask); + SETfield(t->SQ_TEX_SAMPLER0, TEX_MipFilter_None, + EG_SQ_TEX_SAMPLER_WORD0_0__MIP_FILTER_shift, + EG_SQ_TEX_SAMPLER_WORD0_0__MIP_FILTER_mask); + break; + case GL_NEAREST_MIPMAP_NEAREST: + SETfield(t->SQ_TEX_SAMPLER0, TEX_XYFilter_Point, + EG_SQ_TEX_SAMPLER_WORD0_0__XY_MIN_FILTER_shift, + EG_SQ_TEX_SAMPLER_WORD0_0__XY_MIN_FILTER_mask); + SETfield(t->SQ_TEX_SAMPLER0, TEX_MipFilter_Point, + EG_SQ_TEX_SAMPLER_WORD0_0__MIP_FILTER_shift, + EG_SQ_TEX_SAMPLER_WORD0_0__MIP_FILTER_mask); + break; + case GL_NEAREST_MIPMAP_LINEAR: + SETfield(t->SQ_TEX_SAMPLER0, TEX_XYFilter_Point, + EG_SQ_TEX_SAMPLER_WORD0_0__XY_MIN_FILTER_shift, + EG_SQ_TEX_SAMPLER_WORD0_0__XY_MIN_FILTER_mask); + SETfield(t->SQ_TEX_SAMPLER0, TEX_MipFilter_Linear, + EG_SQ_TEX_SAMPLER_WORD0_0__MIP_FILTER_shift, + EG_SQ_TEX_SAMPLER_WORD0_0__MIP_FILTER_mask); + break; + case GL_LINEAR_MIPMAP_NEAREST: + SETfield(t->SQ_TEX_SAMPLER0, TEX_XYFilter_Linear, + EG_SQ_TEX_SAMPLER_WORD0_0__XY_MIN_FILTER_shift, + EG_SQ_TEX_SAMPLER_WORD0_0__XY_MIN_FILTER_mask); + SETfield(t->SQ_TEX_SAMPLER0, TEX_MipFilter_Point, + EG_SQ_TEX_SAMPLER_WORD0_0__MIP_FILTER_shift, + EG_SQ_TEX_SAMPLER_WORD0_0__MIP_FILTER_mask); + break; + case GL_LINEAR_MIPMAP_LINEAR: + SETfield(t->SQ_TEX_SAMPLER0, TEX_XYFilter_Linear, + EG_SQ_TEX_SAMPLER_WORD0_0__XY_MIN_FILTER_shift, + EG_SQ_TEX_SAMPLER_WORD0_0__XY_MIN_FILTER_mask); + SETfield(t->SQ_TEX_SAMPLER0, TEX_MipFilter_Linear, + EG_SQ_TEX_SAMPLER_WORD0_0__MIP_FILTER_shift, + EG_SQ_TEX_SAMPLER_WORD0_0__MIP_FILTER_mask); + break; + } + + /* Note we don't have 3D mipmaps so only use the mag filter setting + * to set the 3D texture filter mode. + */ + switch (magf) + { + case GL_NEAREST: + SETfield(t->SQ_TEX_SAMPLER0, TEX_XYFilter_Point, + EG_SQ_TEX_SAMPLER_WORD0_0__XY_MAG_FILTER_shift, + EG_SQ_TEX_SAMPLER_WORD0_0__XY_MAG_FILTER_mask); + break; + case GL_LINEAR: + SETfield(t->SQ_TEX_SAMPLER0, TEX_XYFilter_Linear, + EG_SQ_TEX_SAMPLER_WORD0_0__XY_MAG_FILTER_shift, + EG_SQ_TEX_SAMPLER_WORD0_0__XY_MAG_FILTER_mask); + break; + } +} + +static void evergreenSetTexBorderColor(radeonTexObjPtr t, const GLfloat color[4]) +{ + t->TD_PS_SAMPLER0_BORDER_ALPHA = *((uint32_t*)&(color[3])); + t->TD_PS_SAMPLER0_BORDER_RED = *((uint32_t*)&(color[2])); + t->TD_PS_SAMPLER0_BORDER_GREEN = *((uint32_t*)&(color[1])); + t->TD_PS_SAMPLER0_BORDER_BLUE = *((uint32_t*)&(color[0])); + + SETfield(t->SQ_TEX_SAMPLER0, SQ_TEX_BORDER_COLOR_REGISTER, + EG_SQ_TEX_SAMPLER_WORD0_0__BORDER_COLOR_TYPE_shift, + EG_SQ_TEX_SAMPLER_WORD0_0__BORDER_COLOR_TYPE_mask); +} + +static void evergreenSetDepthTexMode(struct gl_texture_object *tObj) +{ + radeonTexObjPtr t; + + if (!tObj) + return; + + t = radeon_tex_obj(tObj); + + if(!evergreenGetTexFormat(tObj, tObj->Image[0][tObj->BaseLevel]->TexFormat)) + t->validated = GL_FALSE; +} + +static INLINE uint32_t +EG_S_FIXED(float value, uint32_t frac_bits) +{ + return value * (1 << frac_bits); +} + +static GLboolean evergreen_setup_hardware_state(GLcontext * ctx, struct gl_texture_object *texObj, int unit) +{ + context_t *context = EVERGREEN_CONTEXT(ctx); + radeonTexObj *t = radeon_tex_obj(texObj); + const struct gl_texture_image *firstImage; + GLuint uTexelPitch, row_align; + + if (context->radeon.radeonScreen->driScreen->dri2.enabled && + t->image_override && + t->bo) + return GL_TRUE; + + firstImage = t->base.Image[0][t->minLod]; + + if (!t->image_override) { + if (!evergreenGetTexFormat(texObj, firstImage->TexFormat)) { + radeon_warning("unsupported texture format in %s\n", + __FUNCTION__); + return GL_FALSE; + } + } + + switch (texObj->Target) + { + case GL_TEXTURE_1D: + SETfield(t->SQ_TEX_RESOURCE0, SQ_TEX_DIM_1D, + EG_SQ_TEX_RESOURCE_WORD0_0__DIM_shift, + EG_SQ_TEX_RESOURCE_WORD0_0__DIM_mask); + SETfield(t->SQ_TEX_RESOURCE1, 0, + EG_SQ_TEX_RESOURCE_WORD1_0__TEX_DEPTH_shift, + EG_SQ_TEX_RESOURCE_WORD1_0__TEX_DEPTH_mask); + break; + case GL_TEXTURE_2D: + case GL_TEXTURE_RECTANGLE_NV: + SETfield(t->SQ_TEX_RESOURCE0, SQ_TEX_DIM_2D, + EG_SQ_TEX_RESOURCE_WORD0_0__DIM_shift, + EG_SQ_TEX_RESOURCE_WORD0_0__DIM_mask); + SETfield(t->SQ_TEX_RESOURCE1, 0, + EG_SQ_TEX_RESOURCE_WORD1_0__TEX_DEPTH_shift, + EG_SQ_TEX_RESOURCE_WORD1_0__TEX_DEPTH_mask); + break; + case GL_TEXTURE_3D: + SETfield(t->SQ_TEX_RESOURCE0, SQ_TEX_DIM_3D, + EG_SQ_TEX_RESOURCE_WORD0_0__DIM_shift, + EG_SQ_TEX_RESOURCE_WORD0_0__DIM_mask); + SETfield(t->SQ_TEX_RESOURCE1, (firstImage->Depth - 1), // ??? + EG_SQ_TEX_RESOURCE_WORD1_0__TEX_DEPTH_shift, + EG_SQ_TEX_RESOURCE_WORD1_0__TEX_DEPTH_mask); + break; + case GL_TEXTURE_CUBE_MAP: + SETfield(t->SQ_TEX_RESOURCE0, SQ_TEX_DIM_CUBEMAP, + EG_SQ_TEX_RESOURCE_WORD0_0__DIM_shift, + EG_SQ_TEX_RESOURCE_WORD0_0__DIM_mask); + SETfield(t->SQ_TEX_RESOURCE1, 0, + EG_SQ_TEX_RESOURCE_WORD1_0__TEX_DEPTH_shift, + EG_SQ_TEX_RESOURCE_WORD1_0__TEX_DEPTH_mask); + break; + default: + radeon_error("unexpected texture target type in %s\n", __FUNCTION__); + return GL_FALSE; + } + + row_align = context->radeon.texture_row_align - 1; + uTexelPitch = (_mesa_format_row_stride(firstImage->TexFormat, firstImage->Width) + row_align) & ~row_align; + uTexelPitch = uTexelPitch / _mesa_get_format_bytes(firstImage->TexFormat); + uTexelPitch = (uTexelPitch + R700_TEXEL_PITCH_ALIGNMENT_MASK) + & ~R700_TEXEL_PITCH_ALIGNMENT_MASK; + + /* min pitch is 8 */ + if (uTexelPitch < 8) + uTexelPitch = 8; + + SETfield(t->SQ_TEX_RESOURCE0, (uTexelPitch/8)-1, + EG_SQ_TEX_RESOURCE_WORD0_0__PITCH_shift, + EG_SQ_TEX_RESOURCE_WORD0_0__PITCH_mask); + SETfield(t->SQ_TEX_RESOURCE0, firstImage->Width - 1, + EG_SQ_TEX_RESOURCE_WORD0_0__TEX_WIDTH_shift, + EG_SQ_TEX_RESOURCE_WORD0_0__TEX_WIDTH_mask); + SETfield(t->SQ_TEX_RESOURCE1, firstImage->Height - 1, + EG_SQ_TEX_RESOURCE_WORD1_0__TEX_HEIGHT_shift, + EG_SQ_TEX_RESOURCE_WORD1_0__TEX_HEIGHT_mask); + + t->SQ_TEX_RESOURCE2 = get_base_teximage_offset(t) / 256; + + t->SQ_TEX_RESOURCE3 = radeon_miptree_image_offset(t->mt, 0, t->minLod + 1) / 256; + + SETfield(t->SQ_TEX_RESOURCE4, 0, BASE_LEVEL_shift, BASE_LEVEL_mask); + SETfield(t->SQ_TEX_RESOURCE5, t->maxLod - t->minLod, LAST_LEVEL_shift, LAST_LEVEL_mask); + + SETfield(t->SQ_TEX_SAMPLER1, + EG_S_FIXED(CLAMP(t->base.MinLod - t->minLod, 0, 15), 6), + EG_SQ_TEX_SAMPLER_WORD1_0__MIN_LOD_shift, + EG_SQ_TEX_SAMPLER_WORD1_0__MIN_LOD_mask); + SETfield(t->SQ_TEX_SAMPLER1, + EG_S_FIXED(CLAMP(t->base.MaxLod - t->minLod, 0, 15), 6), + EG_SQ_TEX_SAMPLER_WORD1_0__MAX_LOD_shift, + EG_SQ_TEX_SAMPLER_WORD1_0__MAX_LOD_mask); + SETfield(t->SQ_TEX_SAMPLER2, + EG_S_FIXED(CLAMP(ctx->Texture.Unit[unit].LodBias + t->base.LodBias, -16, 16), 6), + EG_SQ_TEX_SAMPLER_WORD2_0__LOD_BIAS_shift, + EG_SQ_TEX_SAMPLER_WORD2_0__LOD_BIAS_mask); + + if(texObj->CompareMode == GL_COMPARE_R_TO_TEXTURE_ARB) + { + SETfield(t->SQ_TEX_SAMPLER0, evergreen_translate_shadow_func(texObj->CompareFunc), + EG_SQ_TEX_SAMPLER_WORD0_0__DCF_shift, + EG_SQ_TEX_SAMPLER_WORD0_0__DCF_mask); + } + else + { + CLEARfield(t->SQ_TEX_SAMPLER0, EG_SQ_TEX_SAMPLER_WORD0_0__DCF_mask); + } + + return GL_TRUE; +} + +void evergreenSetTexOffset(__DRIcontext * pDRICtx, GLint texname, + unsigned long long offset, GLint depth, GLuint pitch) +{ + context_t *rmesa = pDRICtx->driverPrivate; + struct gl_texture_object *tObj = + _mesa_lookup_texture(rmesa->radeon.glCtx, texname); + radeonTexObjPtr t = radeon_tex_obj(tObj); + const struct gl_texture_image *firstImage; + uint32_t pitch_val, size, row_align; + + if (!tObj) + return; + + t->image_override = GL_TRUE; + + if (!offset) + return; + + firstImage = t->base.Image[0][t->minLod]; + row_align = rmesa->radeon.texture_row_align - 1; + size = ((_mesa_format_row_stride(firstImage->TexFormat, firstImage->Width) + row_align) & ~row_align) * firstImage->Height; + if (t->bo) { + radeon_bo_unref(t->bo); + t->bo = NULL; + } + t->bo = radeon_legacy_bo_alloc_fake(rmesa->radeon.radeonScreen->bom, size, offset); + t->override_offset = offset; + pitch_val = pitch; + switch (depth) { + case 32: + SETfield(t->SQ_TEX_RESOURCE7, FMT_8_8_8_8, + EG_SQ_TEX_RESOURCE_WORD7_0__DATA_FORMAT_shift, + EG_SQ_TEX_RESOURCE_WORD7_0__DATA_FORMAT_mask); + + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Z, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Y, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_W, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask); + pitch_val /= 4; + break; + case 24: + default: + SETfield(t->SQ_TEX_RESOURCE7, FMT_8_8_8_8, + EG_SQ_TEX_RESOURCE_WORD7_0__DATA_FORMAT_shift, + EG_SQ_TEX_RESOURCE_WORD7_0__DATA_FORMAT_mask); + + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Z, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Y, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_1, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask); + pitch_val /= 4; + break; + case 16: + SETfield(t->SQ_TEX_RESOURCE7, FMT_5_6_5, + EG_SQ_TEX_RESOURCE_WORD7_0__DATA_FORMAT_shift, + EG_SQ_TEX_RESOURCE_WORD7_0__DATA_FORMAT_mask); + + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Z, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Y, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_1, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask); + pitch_val /= 2; + break; + } + + pitch_val = (pitch_val + R700_TEXEL_PITCH_ALIGNMENT_MASK) + & ~R700_TEXEL_PITCH_ALIGNMENT_MASK; + + /* min pitch is 8 */ + if (pitch_val < 8) + pitch_val = 8; + + SETfield(t->SQ_TEX_RESOURCE0, (pitch_val/8)-1, + EG_SQ_TEX_RESOURCE_WORD0_0__PITCH_shift, + EG_SQ_TEX_RESOURCE_WORD0_0__PITCH_mask); +} + +void evergreenSetTexBuffer(__DRIcontext *pDRICtx, GLint target, GLint glx_texture_format, __DRIdrawable *dPriv) +{ + struct gl_texture_unit *texUnit; + struct gl_texture_object *texObj; + struct gl_texture_image *texImage; + struct radeon_renderbuffer *rb; + radeon_texture_image *rImage; + radeonContextPtr radeon; + context_t *rmesa; + struct radeon_framebuffer *rfb; + radeonTexObjPtr t; + uint32_t pitch_val; + uint32_t internalFormat, type, format; + + type = GL_BGRA; + format = GL_UNSIGNED_BYTE; + internalFormat = (glx_texture_format == __DRI_TEXTURE_FORMAT_RGB ? 3 : 4); + + radeon = pDRICtx->driverPrivate; + rmesa = pDRICtx->driverPrivate; + + rfb = dPriv->driverPrivate; + texUnit = &radeon->glCtx->Texture.Unit[radeon->glCtx->Texture.CurrentUnit]; + texObj = _mesa_select_tex_object(radeon->glCtx, texUnit, target); + texImage = _mesa_get_tex_image(radeon->glCtx, texObj, target, 0); + + rImage = get_radeon_texture_image(texImage); + t = radeon_tex_obj(texObj); + if (t == NULL) { + return; + } + + radeon_update_renderbuffers(pDRICtx, dPriv, GL_TRUE); + rb = rfb->color_rb[0]; + if (rb->bo == NULL) { + /* Failed to BO for the buffer */ + return; + } + + _mesa_lock_texture(radeon->glCtx, texObj); + if (t->bo) { + radeon_bo_unref(t->bo); + t->bo = NULL; + } + if (rImage->bo) { + radeon_bo_unref(rImage->bo); + rImage->bo = NULL; + } + + radeon_miptree_unreference(&t->mt); + radeon_miptree_unreference(&rImage->mt); + + _mesa_init_teximage_fields(radeon->glCtx, target, texImage, + rb->base.Width, rb->base.Height, 1, 0, rb->cpp); + texImage->RowStride = rb->pitch / rb->cpp; + + rImage->bo = rb->bo; + radeon_bo_ref(rImage->bo); + t->bo = rb->bo; + radeon_bo_ref(t->bo); + t->image_override = GL_TRUE; + t->override_offset = 0; + pitch_val = rb->pitch; + switch (rb->cpp) { + case 4: + if (glx_texture_format == __DRI_TEXTURE_FORMAT_RGB) { + SETfield(t->SQ_TEX_RESOURCE7, FMT_8_8_8_8, + EG_SQ_TEX_RESOURCE_WORD7_0__DATA_FORMAT_shift, + EG_SQ_TEX_RESOURCE_WORD7_0__DATA_FORMAT_mask); + + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Z, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Y, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_1, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask); + } else { + SETfield(t->SQ_TEX_RESOURCE7, FMT_8_8_8_8, + EG_SQ_TEX_RESOURCE_WORD7_0__DATA_FORMAT_shift, + EG_SQ_TEX_RESOURCE_WORD7_0__DATA_FORMAT_mask); + + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Z, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Y, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_W, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask); + } + pitch_val /= 4; + break; + case 3: + default: + // FMT_8_8_8 ??? + SETfield(t->SQ_TEX_RESOURCE7, FMT_8_8_8_8, + EG_SQ_TEX_RESOURCE_WORD7_0__DATA_FORMAT_shift, + EG_SQ_TEX_RESOURCE_WORD7_0__DATA_FORMAT_mask); + + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_W, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Z, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Y, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_1, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask); + pitch_val /= 4; + break; + case 2: + SETfield(t->SQ_TEX_RESOURCE7, FMT_5_6_5, + EG_SQ_TEX_RESOURCE_WORD7_0__DATA_FORMAT_shift, + EG_SQ_TEX_RESOURCE_WORD7_0__DATA_FORMAT_mask); + + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Z, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Y, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_1, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask); + pitch_val /= 2; + break; + } + + pitch_val = (pitch_val + R700_TEXEL_PITCH_ALIGNMENT_MASK) + & ~R700_TEXEL_PITCH_ALIGNMENT_MASK; + + /* min pitch is 8 */ + if (pitch_val < 8) + pitch_val = 8; + + SETfield(t->SQ_TEX_RESOURCE0, (pitch_val/8)-1, + EG_SQ_TEX_RESOURCE_WORD0_0__PITCH_shift, + EG_SQ_TEX_RESOURCE_WORD0_0__PITCH_mask); + SETfield(t->SQ_TEX_RESOURCE0, rb->base.Width - 1, + EG_SQ_TEX_RESOURCE_WORD0_0__TEX_WIDTH_shift, + EG_SQ_TEX_RESOURCE_WORD0_0__TEX_WIDTH_mask); + SETfield(t->SQ_TEX_RESOURCE1, rb->base.Height - 1, + EG_SQ_TEX_RESOURCE_WORD1_0__TEX_HEIGHT_shift, + EG_SQ_TEX_RESOURCE_WORD1_0__TEX_HEIGHT_mask); + + t->validated = GL_TRUE; + _mesa_unlock_texture(radeon->glCtx, texObj); + return; +} + +void evergreenUpdateTextureState(GLcontext * ctx) +{ + context_t *context = EVERGREEN_CONTEXT(ctx); + EVERGREEN_CHIP_CONTEXT * evergreen = GET_EVERGREEN_CHIP(context); + struct gl_texture_unit *texUnit; + struct radeon_tex_obj *t; + GLuint unit; + + EVERGREEN_STATECHANGE(context, tx); + + for (unit = 0; unit < R700_MAX_TEXTURE_UNITS; unit++) { + texUnit = &ctx->Texture.Unit[unit]; + t = radeon_tex_obj(ctx->Texture.Unit[unit]._Current); + evergreen->textures[unit] = NULL; + if (texUnit->_ReallyEnabled) { + if (!t) + continue; + evergreen->textures[unit] = t; + } + } +} + +static GLboolean evergreen_validate_texture(GLcontext * ctx, struct gl_texture_object *texObj, int unit) +{ + radeonTexObj *t = radeon_tex_obj(texObj); + + if (!radeon_validate_texture_miptree(ctx, texObj)) + return GL_FALSE; + + /* Configure the hardware registers (more precisely, the cached version + * of the hardware registers). */ + if (!evergreen_setup_hardware_state(ctx, texObj, unit)) + return GL_FALSE; + + t->validated = GL_TRUE; + return GL_TRUE; +} + +GLboolean evergreenValidateBuffers(GLcontext * ctx) +{ + context_t *rmesa = EVERGREEN_CONTEXT(ctx); + struct radeon_renderbuffer *rrb; + struct radeon_bo *pbo; + int i; + int ret; + + radeon_cs_space_reset_bos(rmesa->radeon.cmdbuf.cs); + + rrb = radeon_get_colorbuffer(&rmesa->radeon); + /* color buffer */ + if (rrb && rrb->bo) { + radeon_cs_space_add_persistent_bo(rmesa->radeon.cmdbuf.cs, + rrb->bo, 0, + RADEON_GEM_DOMAIN_VRAM); + } + + /* depth buffer */ + rrb = radeon_get_depthbuffer(&rmesa->radeon); + if (rrb && rrb->bo) { + radeon_cs_space_add_persistent_bo(rmesa->radeon.cmdbuf.cs, + rrb->bo, 0, + RADEON_GEM_DOMAIN_VRAM); + } + + for (i = 0; i < ctx->Const.MaxTextureImageUnits; ++i) { + radeonTexObj *t; + + if (!ctx->Texture.Unit[i]._ReallyEnabled) + continue; + + if (!evergreen_validate_texture(ctx, ctx->Texture.Unit[i]._Current, i)) { + radeon_warning("failed to validate texture for unit %d.\n", i); + } + t = radeon_tex_obj(ctx->Texture.Unit[i]._Current); + if (t->image_override && t->bo) + radeon_cs_space_add_persistent_bo(rmesa->radeon.cmdbuf.cs, + t->bo, + RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM, 0); + else if (t->mt->bo) + radeon_cs_space_add_persistent_bo(rmesa->radeon.cmdbuf.cs, + t->mt->bo, + RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM, 0); + } + + pbo = (struct radeon_bo *)evergreenGetActiveFpShaderBo(ctx); + if (pbo) { + radeon_cs_space_add_persistent_bo(rmesa->radeon.cmdbuf.cs, pbo, + RADEON_GEM_DOMAIN_GTT, 0); + } + + pbo = (struct radeon_bo *)evergreenGetActiveVpShaderBo(ctx); + if (pbo) { + radeon_cs_space_add_persistent_bo(rmesa->radeon.cmdbuf.cs, pbo, + RADEON_GEM_DOMAIN_GTT, 0); + } + + pbo = (struct radeon_bo *)evergreenGetActiveFpShaderConstBo(ctx); + if (pbo) { + radeon_cs_space_add_persistent_bo(rmesa->radeon.cmdbuf.cs, pbo, + RADEON_GEM_DOMAIN_GTT, 0); + } + + pbo = (struct radeon_bo *)evergreenGetActiveVpShaderConstBo(ctx); + if (pbo) { + radeon_cs_space_add_persistent_bo(rmesa->radeon.cmdbuf.cs, pbo, + RADEON_GEM_DOMAIN_GTT, 0); + } + + ret = radeon_cs_space_check_with_bo(rmesa->radeon.cmdbuf.cs, first_elem(&rmesa->radeon.dma.reserved)->bo, RADEON_GEM_DOMAIN_GTT, 0); + if (ret) + return GL_FALSE; + return GL_TRUE; +} + +static struct gl_texture_object *evergreenNewTextureObject(GLcontext * ctx, + GLuint name, + GLenum target) +{ + context_t* rmesa = EVERGREEN_CONTEXT(ctx); + radeonTexObj * t = CALLOC_STRUCT(radeon_tex_obj); + + + radeon_print(RADEON_STATE | RADEON_TEXTURE, RADEON_NORMAL, + "%s( %p (target = %s) )\n", __FUNCTION__, + t, _mesa_lookup_enum_by_nr(target)); + + _mesa_initialize_texture_object(&t->base, name, target); + t->base.MaxAnisotropy = rmesa->radeon.initialMaxAnisotropy; + + evergreenSetTexDefaultState(t); + evergreenUpdateTexWrap(t); + evergreenSetTexFilter(t, t->base.MinFilter, t->base.MagFilter, t->base.MaxAnisotropy); + evergreenSetTexBorderColor(t, t->base.BorderColor.f); + + return &t->base; +} + +static void evergreenDeleteTexture(GLcontext * ctx, struct gl_texture_object *texObj) +{ + context_t * rmesa = EVERGREEN_CONTEXT(ctx); + EVERGREEN_CHIP_CONTEXT * evergreen = GET_EVERGREEN_CHIP(rmesa); + radeonTexObj* t = radeon_tex_obj(texObj); + + radeon_print(RADEON_STATE | RADEON_TEXTURE, RADEON_NORMAL, + "%s( %p (target = %s) )\n", __FUNCTION__, + (void *)texObj, + _mesa_lookup_enum_by_nr(texObj->Target)); + + if (rmesa) { + int i; + radeon_firevertices(&rmesa->radeon); + + for(i = 0; i < R700_MAX_TEXTURE_UNITS; ++i) + if (evergreen->textures[i] == t) + evergreen->textures[i] = 0; + } + + if (t->bo) { + radeon_bo_unref(t->bo); + t->bo = NULL; + } + + radeon_miptree_unreference(&t->mt); + + _mesa_delete_texture_object(ctx, texObj); +} + +static void evergreenTexParameter(GLcontext * ctx, GLenum target, + struct gl_texture_object *texObj, + GLenum pname, const GLfloat * params) +{ + radeonTexObj* t = radeon_tex_obj(texObj); + GLenum baseFormat; + + radeon_print(RADEON_STATE | RADEON_TEXTURE, RADEON_VERBOSE, + "%s( %s )\n", __FUNCTION__, + _mesa_lookup_enum_by_nr(pname)); + + switch (pname) { + case GL_TEXTURE_MIN_FILTER: + case GL_TEXTURE_MAG_FILTER: + case GL_TEXTURE_MAX_ANISOTROPY_EXT: + evergreenSetTexFilter(t, texObj->MinFilter, texObj->MagFilter, texObj->MaxAnisotropy); + break; + + case GL_TEXTURE_WRAP_S: + case GL_TEXTURE_WRAP_T: + case GL_TEXTURE_WRAP_R: + evergreenUpdateTexWrap(t); + break; + + case GL_TEXTURE_BORDER_COLOR: + evergreenSetTexBorderColor(t, texObj->BorderColor.f); + break; + + case GL_TEXTURE_BASE_LEVEL: + case GL_TEXTURE_MAX_LEVEL: + case GL_TEXTURE_MIN_LOD: + case GL_TEXTURE_MAX_LOD: + t->validated = GL_FALSE; + break; + + case GL_DEPTH_TEXTURE_MODE: + if (!texObj->Image[0][texObj->BaseLevel]) + return; + baseFormat = texObj->Image[0][texObj->BaseLevel]->_BaseFormat; + if (baseFormat == GL_DEPTH_COMPONENT || + baseFormat == GL_DEPTH_STENCIL) { + evergreenSetDepthTexMode(texObj); + break; + } else { + /* If the texture isn't a depth texture, changing this + * state won't cause any changes to the hardware. + * Don't force a flush of texture state. + */ + return; + } + + default: + return; + } +} + +void evergreenInitTextureFuncs(radeonContextPtr radeon, struct dd_function_table *functions) +{ + /* Note: we only plug in the functions we implement in the driver + * since _mesa_init_driver_functions() was already called. + */ + functions->NewTextureImage = radeonNewTextureImage; + functions->FreeTexImageData = radeonFreeTexImageData; + functions->MapTexture = radeonMapTexture; + functions->UnmapTexture = radeonUnmapTexture; + + functions->ChooseTextureFormat = radeonChooseTextureFormat_mesa; + functions->TexImage1D = radeonTexImage1D; + functions->TexImage2D = radeonTexImage2D; + functions->TexImage3D = radeonTexImage3D; + functions->TexSubImage1D = radeonTexSubImage1D; + functions->TexSubImage2D = radeonTexSubImage2D; + functions->TexSubImage3D = radeonTexSubImage3D; + functions->GetTexImage = radeonGetTexImage; + functions->GetCompressedTexImage = radeonGetCompressedTexImage; + functions->NewTextureObject = evergreenNewTextureObject; + functions->DeleteTexture = evergreenDeleteTexture; + functions->IsTextureResident = driIsTextureResident; + + functions->TexParameter = evergreenTexParameter; + + functions->CompressedTexImage2D = radeonCompressedTexImage2D; + functions->CompressedTexSubImage2D = radeonCompressedTexSubImage2D; + + if (radeon->radeonScreen->kernel_mm) { + functions->CopyTexImage2D = radeonCopyTexImage2D; + functions->CopyTexSubImage2D = radeonCopyTexSubImage2D; + } + + functions->GenerateMipmap = radeonGenerateMipmap; + + driInitTextureFormats(); +} diff --git a/src/mesa/drivers/dri/r600/evergreen_tex.h b/src/mesa/drivers/dri/r600/evergreen_tex.h new file mode 100644 index 00000000000..b43508a9eab --- /dev/null +++ b/src/mesa/drivers/dri/r600/evergreen_tex.h @@ -0,0 +1,38 @@ +/* + * Copyright (C) 2008-2010 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +/* + * Authors: + * Richard Li <[email protected]>, <[email protected]> + */ + +#ifndef _EVERGREEN_TEX_H_ +#define _EVERGREEN_TEX_H_ + +extern GLboolean evergreenValidateBuffers(GLcontext * ctx); + +extern void evergreenUpdateTextureState(GLcontext * ctx); +extern void evergreenInitTextureFuncs(radeonContextPtr radeon, struct dd_function_table *functions); +extern void evergreenSetTexOffset(__DRIcontext * pDRICtx, GLint texname, + unsigned long long offset, GLint depth, GLuint pitch); +extern void evergreenSetTexBuffer(__DRIcontext *pDRICtx, GLint target, GLint glx_texture_format, __DRIdrawable *dPriv); + +#endif /* _EVERGREEN_TEX_H_ */ diff --git a/src/mesa/drivers/dri/r600/evergreen_vertprog.c b/src/mesa/drivers/dri/r600/evergreen_vertprog.c new file mode 100644 index 00000000000..4f3db00c7d2 --- /dev/null +++ b/src/mesa/drivers/dri/r600/evergreen_vertprog.c @@ -0,0 +1,736 @@ +/* + * Copyright (C) 2008-2009 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +/* + * Authors: + * Richard Li <[email protected]>, <[email protected]> + */ + + +#include <stdio.h> +#include <stdarg.h> +#include <stdlib.h> +#include <string.h> +#include <math.h> + +#include "main/imports.h" +#include "main/mtypes.h" + +#include "tnl/t_context.h" +#include "program/program.h" +#include "program/prog_parameter.h" +#include "program/prog_statevars.h" + +#include "radeon_debug.h" +#include "r600_context.h" +#include "r600_cmdbuf.h" +#include "r600_emit.h" +#include "program/programopt.h" + +#include "evergreen_vertprog.h" + +unsigned int evergreen_Map_Vertex_Output(r700_AssemblerBase *pAsm, + struct gl_vertex_program *mesa_vp, + unsigned int unStart) +{ + unsigned int i; + unsigned int unBit; + unsigned int unTotal = unStart; + + //!!!!!!! THE ORDER MATCH FS INPUT + + unBit = 1 << VERT_RESULT_HPOS; + if(mesa_vp->Base.OutputsWritten & unBit) + { + pAsm->ucVP_OutputMap[VERT_RESULT_HPOS] = unTotal++; + } + + unBit = 1 << VERT_RESULT_COL0; + if(mesa_vp->Base.OutputsWritten & unBit) + { + pAsm->ucVP_OutputMap[VERT_RESULT_COL0] = unTotal++; + } + + unBit = 1 << VERT_RESULT_COL1; + if(mesa_vp->Base.OutputsWritten & unBit) + { + pAsm->ucVP_OutputMap[VERT_RESULT_COL1] = unTotal++; + } + + //TODO : dealing back face. + unBit = 1 << VERT_RESULT_BFC0; + if(mesa_vp->Base.OutputsWritten & unBit) + { + pAsm->ucVP_OutputMap[VERT_RESULT_BFC0] = unTotal++; + } + + unBit = 1 << VERT_RESULT_BFC1; + if(mesa_vp->Base.OutputsWritten & unBit) + { + pAsm->ucVP_OutputMap[VERT_RESULT_BFC1] = unTotal++; + } + + //TODO : dealing fog. + unBit = 1 << VERT_RESULT_FOGC; + if(mesa_vp->Base.OutputsWritten & unBit) + { + pAsm->ucVP_OutputMap[VERT_RESULT_FOGC] = unTotal++; + } + + //TODO : dealing point size. + unBit = 1 << VERT_RESULT_PSIZ; + if(mesa_vp->Base.OutputsWritten & unBit) + { + pAsm->ucVP_OutputMap[VERT_RESULT_PSIZ] = unTotal++; + } + + for(i=0; i<8; i++) + { + unBit = 1 << (VERT_RESULT_TEX0 + i); + if(mesa_vp->Base.OutputsWritten & unBit) + { + pAsm->ucVP_OutputMap[VERT_RESULT_TEX0 + i] = unTotal++; + } + } + + for(i=VERT_RESULT_VAR0; i<VERT_RESULT_MAX; i++) + { + unBit = 1 << i; + if(mesa_vp->Base.OutputsWritten & unBit) + { + pAsm->ucVP_OutputMap[i] = unTotal++; + } + } + + return (unTotal - unStart); +} + +unsigned int evergreen_Map_Vertex_Input(r700_AssemblerBase *pAsm, + struct gl_vertex_program *mesa_vp, + unsigned int unStart) +{ + int i; + unsigned int unBit; + unsigned int unTotal = unStart; + for(i=0; i<VERT_ATTRIB_MAX; i++) + { + unBit = 1 << i; + if(mesa_vp->Base.InputsRead & unBit) + { + pAsm->ucVP_AttributeMap[i] = unTotal++; + } + } + return (unTotal - unStart); +} + +GLboolean evergreen_Process_Vertex_Program_Vfetch_Instructions( + struct evergreen_vertex_program *vp, + struct gl_vertex_program *mesa_vp) +{ + int i; + unsigned int unBit; + VTX_FETCH_METHOD vtxFetchMethod; + vtxFetchMethod.bEnableMini = GL_FALSE; + vtxFetchMethod.mega_fetch_remainder = 0; + + for(i=0; i<VERT_ATTRIB_MAX; i++) + { + unBit = 1 << i; + if(mesa_vp->Base.InputsRead & unBit) + { + assemble_vfetch_instruction(&vp->r700AsmCode, + i, + vp->r700AsmCode.ucVP_AttributeMap[i], + vp->aos_desc[i].size, + vp->aos_desc[i].type, + &vtxFetchMethod); + } + } + + return GL_TRUE; +} + +GLboolean evergreen_Process_Vertex_Program_Vfetch_Instructions2( + GLcontext *ctx, + struct evergreen_vertex_program *vp, + struct gl_vertex_program *mesa_vp) +{ + int i; + context_t *context = R700_CONTEXT(ctx); + + VTX_FETCH_METHOD vtxFetchMethod; + vtxFetchMethod.bEnableMini = GL_FALSE; + vtxFetchMethod.mega_fetch_remainder = 0; + + for(i=0; i<context->nNumActiveAos; i++) + { + EG_assemble_vfetch_instruction(&vp->r700AsmCode, + vp->r700AsmCode.ucVP_AttributeMap[context->stream_desc[i].element], + context->stream_desc[i].type, + context->stream_desc[i].size, + context->stream_desc[i].element, + context->stream_desc[i]._signed, + context->stream_desc[i].normalize, + context->stream_desc[i].format, + &vtxFetchMethod); + } + + return GL_TRUE; +} + +void evergreen_Map_Vertex_Program(GLcontext *ctx, + struct evergreen_vertex_program *vp, + struct gl_vertex_program *mesa_vp) +{ + GLuint ui; + r700_AssemblerBase *pAsm = &(vp->r700AsmCode); + unsigned int num_inputs; + + // R0 will always be used for index into vertex buffer + pAsm->number_used_registers = 1; + pAsm->starting_vfetch_register_number = pAsm->number_used_registers; + + // Map Inputs: Add 1 to mapping since R0 is used for index + num_inputs = evergreen_Map_Vertex_Input(pAsm, mesa_vp, pAsm->number_used_registers); + pAsm->number_used_registers += num_inputs; + + // Create VFETCH instructions for inputs + if (GL_TRUE != evergreen_Process_Vertex_Program_Vfetch_Instructions2(ctx, vp, mesa_vp) ) + { + radeon_error("Calling evergreen_Process_Vertex_Program_Vfetch_Instructions2 return error. \n"); + return; + } + + // Map Outputs + pAsm->number_of_exports = evergreen_Map_Vertex_Output(pAsm, mesa_vp, pAsm->number_used_registers); + + pAsm->starting_export_register_number = pAsm->number_used_registers; + + pAsm->number_used_registers += pAsm->number_of_exports; + + pAsm->pucOutMask = (unsigned char*) MALLOC(pAsm->number_of_exports); + + for(ui=0; ui<pAsm->number_of_exports; ui++) + { + pAsm->pucOutMask[ui] = 0x0; + } + + /* Map temporary registers (GPRs) */ + pAsm->starting_temp_register_number = pAsm->number_used_registers; + + if(mesa_vp->Base.NumNativeTemporaries >= mesa_vp->Base.NumTemporaries) + { /* arb uses NumNativeTemporaries */ + pAsm->number_used_registers += mesa_vp->Base.NumNativeTemporaries; + } + else + { /* fix func t_vp uses NumTemporaries */ + pAsm->number_used_registers += mesa_vp->Base.NumTemporaries; + } + + pAsm->flag_reg_index = pAsm->number_used_registers++; + + pAsm->uFirstHelpReg = pAsm->number_used_registers; +} + +GLboolean evergreen_Find_Instruction_Dependencies_vp(struct evergreen_vertex_program *vp, + struct gl_vertex_program *mesa_vp) +{ + GLuint i, j; + GLint * puiTEMPwrites; + struct prog_instruction *pILInst; + InstDeps *pInstDeps; + + puiTEMPwrites = (GLint*) MALLOC(sizeof(GLuint)*mesa_vp->Base.NumTemporaries); + for(i=0; i<mesa_vp->Base.NumTemporaries; i++) + { + puiTEMPwrites[i] = -1; + } + + pInstDeps = (InstDeps*)MALLOC(sizeof(InstDeps)*mesa_vp->Base.NumInstructions); + + for(i=0; i<mesa_vp->Base.NumInstructions; i++) + { + pInstDeps[i].nDstDep = -1; + pILInst = &(mesa_vp->Base.Instructions[i]); + + //Dst + if(pILInst->DstReg.File == PROGRAM_TEMPORARY) + { + //Set lastwrite for the temp + puiTEMPwrites[pILInst->DstReg.Index] = i; + } + + //Src + for(j=0; j<3; j++) + { + if(pILInst->SrcReg[j].File == PROGRAM_TEMPORARY) + { + //Set dep. + pInstDeps[i].nSrcDeps[j] = puiTEMPwrites[pILInst->SrcReg[j].Index]; + } + else + { + pInstDeps[i].nSrcDeps[j] = -1; + } + } + } + + vp->r700AsmCode.pInstDeps = pInstDeps; + + FREE(puiTEMPwrites); + + return GL_TRUE; +} + +struct evergreen_vertex_program* evergreenTranslateVertexShader(GLcontext *ctx, + struct gl_vertex_program *mesa_vp) +{ + context_t *context = EVERGREEN_CONTEXT(ctx); + + struct evergreen_vertex_program *vp; + unsigned int i; + + vp = calloc(1, sizeof(*vp)); + vp->mesa_program = _mesa_clone_vertex_program(ctx, mesa_vp); + + vp->constbo0 = NULL; + + if (mesa_vp->IsPositionInvariant) + { + _mesa_insert_mvp_code(ctx, vp->mesa_program); + } + + for(i=0; i<context->nNumActiveAos; i++) + { + vp->aos_desc[i].size = context->stream_desc[i].size; + vp->aos_desc[i].stride = context->stream_desc[i].stride; + vp->aos_desc[i].type = context->stream_desc[i].type; + vp->aos_desc[i].format = context->stream_desc[i].format; + } + + if (context->radeon.radeonScreen->chip_family < CHIP_FAMILY_RV770) + { + vp->r700AsmCode.bR6xx = 1; + } + + //Init_Program + Init_r700_AssemblerBase(SPT_VP, &(vp->r700AsmCode), &(vp->r700Shader) ); + + vp->r700AsmCode.bUseMemConstant = GL_TRUE; + vp->r700AsmCode.unAsic = 8; + + evergreen_Map_Vertex_Program(ctx, vp, vp->mesa_program ); + + if(GL_FALSE == evergreen_Find_Instruction_Dependencies_vp(vp, vp->mesa_program)) + { + return NULL; + } + + InitShaderProgram(&(vp->r700AsmCode)); + + for(i=0; i < MAX_SAMPLERS; i++) + { + vp->r700AsmCode.SamplerUnits[i] = vp->mesa_program->Base.SamplerUnits[i]; + } + + vp->r700AsmCode.unCurNumILInsts = vp->mesa_program->Base.NumInstructions; + + if(GL_FALSE == AssembleInstr(0, + 0, + vp->mesa_program->Base.NumInstructions, + &(vp->mesa_program->Base.Instructions[0]), + &(vp->r700AsmCode)) ) + { + return NULL; + } + + if(GL_FALSE == Process_Vertex_Exports(&(vp->r700AsmCode), vp->mesa_program->Base.OutputsWritten) ) + { + return NULL; + } + + if( GL_FALSE == RelocProgram(&(vp->r700AsmCode), &(vp->mesa_program->Base)) ) + { + return GL_FALSE; + } + + vp->r700Shader.nRegs = (vp->r700AsmCode.number_used_registers == 0) ? 0 + : (vp->r700AsmCode.number_used_registers - 1); + + vp->r700Shader.nParamExports = vp->r700AsmCode.number_of_exports; + + vp->translated = GL_TRUE; + + return vp; +} + +void evergreenSelectVertexShader(GLcontext *ctx) +{ + context_t *context = EVERGREEN_CONTEXT(ctx); + struct evergreen_vertex_program_cont *vpc; + struct evergreen_vertex_program *vp; + unsigned int i; + GLboolean match; + GLbitfield InputsRead; + + vpc = (struct evergreen_vertex_program_cont *)ctx->VertexProgram._Current; + + InputsRead = vpc->mesa_program.Base.InputsRead; + if (vpc->mesa_program.IsPositionInvariant) + { + InputsRead |= VERT_BIT_POS; + } + + for (vp = vpc->progs; vp; vp = vp->next) + { + match = GL_TRUE; + for(i=0; i<context->nNumActiveAos; i++) + { + if (vp->aos_desc[i].size != context->stream_desc[i].size || + vp->aos_desc[i].format != context->stream_desc[i].format) + { + match = GL_FALSE; + break; + } + } + if (match) + { + context->selected_vp = vp; + return; + } + } + + vp = evergreenTranslateVertexShader(ctx, &(vpc->mesa_program)); + if(!vp) + { + radeon_error("Failed to translate vertex shader. \n"); + return; + } + vp->next = vpc->progs; + vpc->progs = vp; + context->selected_vp = vp; + return; +} + +int evergreen_getTypeSize(GLenum type) +{ + switch (type) + { + case GL_DOUBLE: + return sizeof(GLdouble); + case GL_FLOAT: + return sizeof(GLfloat); + case GL_INT: + return sizeof(GLint); + case GL_UNSIGNED_INT: + return sizeof(GLuint); + case GL_SHORT: + return sizeof(GLshort); + case GL_UNSIGNED_SHORT: + return sizeof(GLushort); + case GL_BYTE: + return sizeof(GLbyte); + case GL_UNSIGNED_BYTE: + return sizeof(GLubyte); + default: + assert(0); + return 0; + } +} + +static void evergreenTranslateAttrib(GLcontext *ctx, GLuint unLoc, int count, const struct gl_client_array *input) +{ + context_t *context = EVERGREEN_CONTEXT(ctx); + + StreamDesc * pStreamDesc = &(context->stream_desc[context->nNumActiveAos]); + + GLuint stride; + + stride = (input->StrideB == 0) ? evergreen_getTypeSize(input->Type) * input->Size + : input->StrideB; + + if (input->Type == GL_DOUBLE || input->Type == GL_UNSIGNED_INT || input->Type == GL_INT || +#if MESA_BIG_ENDIAN + evergreen_getTypeSize(input->Type) != 4 || +#endif + stride < 4) + { + pStreamDesc->type = GL_FLOAT; + + if (input->StrideB == 0) + { + pStreamDesc->stride = 0; + } + else + { + pStreamDesc->stride = sizeof(GLfloat) * input->Size; + } + pStreamDesc->dwords = input->Size; + pStreamDesc->is_named_bo = GL_FALSE; + } + else + { + pStreamDesc->type = input->Type; + pStreamDesc->dwords = (evergreen_getTypeSize(input->Type) * input->Size + 3)/ 4; + if (!input->BufferObj->Name) + { + if (input->StrideB == 0) + { + pStreamDesc->stride = 0; + } + else + { + pStreamDesc->stride = (evergreen_getTypeSize(pStreamDesc->type) * input->Size + 3) & ~3; + } + + pStreamDesc->is_named_bo = GL_FALSE; + } + } + + pStreamDesc->size = input->Size; + pStreamDesc->dst_loc = context->nNumActiveAos; + pStreamDesc->element = unLoc; + pStreamDesc->format = input->Format; + + switch (pStreamDesc->type) + { //GetSurfaceFormat + case GL_FLOAT: + pStreamDesc->_signed = 0; + pStreamDesc->normalize = GL_FALSE; + break; + case GL_SHORT: + pStreamDesc->_signed = 1; + pStreamDesc->normalize = input->Normalized; + break; + case GL_BYTE: + pStreamDesc->_signed = 1; + pStreamDesc->normalize = input->Normalized; + break; + case GL_UNSIGNED_SHORT: + pStreamDesc->_signed = 0; + pStreamDesc->normalize = input->Normalized; + break; + case GL_UNSIGNED_BYTE: + pStreamDesc->_signed = 0; + pStreamDesc->normalize = input->Normalized; + break; + default: + case GL_INT: + case GL_UNSIGNED_INT: + case GL_DOUBLE: + assert(0); + break; + } + context->nNumActiveAos++; +} + +void evergreenSetVertexFormat(GLcontext *ctx, const struct gl_client_array *arrays[], int count) +{ + context_t *context = EVERGREEN_CONTEXT(ctx); + struct evergreen_vertex_program *vpc + = (struct evergreen_vertex_program *)ctx->VertexProgram._Current; + + struct gl_vertex_program * mesa_vp = (struct gl_vertex_program *)&(vpc->mesa_program); + unsigned int unLoc = 0; + unsigned int unBit = mesa_vp->Base.InputsRead; + context->nNumActiveAos = 0; + + if (mesa_vp->IsPositionInvariant) + { + unBit |= VERT_BIT_POS; + } + + while(unBit) + { + if(unBit & 1) + { + evergreenTranslateAttrib(ctx, unLoc, count, arrays[unLoc]); + } + + unBit >>= 1; + ++unLoc; + } + context->radeon.tcl.aos_count = context->nNumActiveAos; +} + +void * evergreenGetActiveVpShaderBo(GLcontext * ctx) +{ + context_t *context = EVERGREEN_CONTEXT(ctx); + struct evergreen_vertex_program *vp = context->selected_vp;; + + if (vp) + return vp->shaderbo; + else + return NULL; +} + +void * evergreenGetActiveVpShaderConstBo(GLcontext * ctx) +{ + context_t *context = EVERGREEN_CONTEXT(ctx); + struct evergreen_vertex_program *vp = context->selected_vp;; + + if (vp) + return vp->constbo0; + else + return NULL; +} + +GLboolean evergreenSetupVertexProgram(GLcontext * ctx) +{ + context_t *context = EVERGREEN_CONTEXT(ctx); + EVERGREEN_CHIP_CONTEXT *evergreen = GET_EVERGREEN_CHIP(context); + struct evergreen_vertex_program *vp = context->selected_vp; + + if(GL_FALSE == vp->loaded) + { + if(vp->r700Shader.bNeedsAssembly == GL_TRUE) + { + Assemble( &(vp->r700Shader) ); + } + + /* Load vp to gpu */ + r600EmitShader(ctx, + &(vp->shaderbo), + (GLvoid *)(vp->r700Shader.pProgram), + vp->r700Shader.uShaderBinaryDWORDSize, + "VS"); + + vp->loaded = GL_TRUE; + } + + EVERGREEN_STATECHANGE(context, vs); + + /* TODO : enable this after MemUse fixed *= + (context->chipobj.MemUse)(context, vp->shadercode.buf->id); + */ + + evergreen->SQ_PGM_RESOURCES_VS.u32All = 0; + SETbit(evergreen->SQ_PGM_RESOURCES_VS.u32All, PGM_RESOURCES__PRIME_CACHE_ON_DRAW_bit); + + evergreen->vs.SQ_ALU_CONST_CACHE_VS_0.u32All = 0; /* set from buffer object. */ + + evergreen->vs.SQ_PGM_START_VS.u32All = 0; + + SETfield(evergreen->SQ_PGM_RESOURCES_VS.u32All, vp->r700Shader.nRegs + 1, + NUM_GPRS_shift, NUM_GPRS_mask); + + if(vp->r700Shader.uStackSize) /* we don't use branch for now, it should be zero. */ + { + SETfield(evergreen->SQ_PGM_RESOURCES_VS.u32All, vp->r700Shader.uStackSize, + STACK_SIZE_shift, STACK_SIZE_mask); + } + + EVERGREEN_STATECHANGE(context, spi); + + SETfield(evergreen->SPI_VS_OUT_CONFIG.u32All, + vp->r700Shader.nParamExports ? (vp->r700Shader.nParamExports - 1) : 0, + VS_EXPORT_COUNT_shift, VS_EXPORT_COUNT_mask); + SETfield(evergreen->SPI_PS_IN_CONTROL_0.u32All, vp->r700Shader.nParamExports, + NUM_INTERP_shift, NUM_INTERP_mask); + + /* + SETbit(evergreen->SPI_PS_IN_CONTROL_0.u32All, PERSP_GRADIENT_ENA_bit); + CLEARbit(evergreen->SPI_PS_IN_CONTROL_0.u32All, LINEAR_GRADIENT_ENA_bit); + */ + + return GL_TRUE; +} + +GLboolean evergreenSetupVPconstants(GLcontext * ctx) +{ + context_t *context = EVERGREEN_CONTEXT(ctx); + EVERGREEN_CHIP_CONTEXT *evergreen = GET_EVERGREEN_CHIP(context); + struct evergreen_vertex_program *vp = context->selected_vp; + + struct gl_program_parameter_list *paramList; + unsigned int unNumParamData; + unsigned int ui; + + /* sent out shader constants. */ + paramList = vp->mesa_program->Base.Parameters; + + if(NULL != paramList) { + /* vp->mesa_program was cloned, not updated by glsl shader api. */ + /* _mesa_reference_program has already checked glsl shProg is ok and set ctx->VertexProgem._Current */ + /* so, use ctx->VertexProgem._Current */ + struct gl_program_parameter_list *paramListOrginal = + ctx->VertexProgram._Current->Base.Parameters; + + _mesa_load_state_parameters(ctx, paramList); + + if (paramList->NumParameters > EVERGREEN_MAX_DX9_CONSTS) + return GL_FALSE; + + EVERGREEN_STATECHANGE(context, vs); + + evergreen->vs.num_consts = paramList->NumParameters; + + unNumParamData = paramList->NumParameters; + + for(ui=0; ui<unNumParamData; ui++) { + if(paramList->Parameters[ui].Type == PROGRAM_UNIFORM) + { + evergreen->vs.consts[ui][0].f32All = paramListOrginal->ParameterValues[ui][0]; + evergreen->vs.consts[ui][1].f32All = paramListOrginal->ParameterValues[ui][1]; + evergreen->vs.consts[ui][2].f32All = paramListOrginal->ParameterValues[ui][2]; + evergreen->vs.consts[ui][3].f32All = paramListOrginal->ParameterValues[ui][3]; + } + else + { + evergreen->vs.consts[ui][0].f32All = paramList->ParameterValues[ui][0]; + evergreen->vs.consts[ui][1].f32All = paramList->ParameterValues[ui][1]; + evergreen->vs.consts[ui][2].f32All = paramList->ParameterValues[ui][2]; + evergreen->vs.consts[ui][3].f32All = paramList->ParameterValues[ui][3]; + } + } + + radeonAllocDmaRegion(&context->radeon, + &context->vp_Constbo, + &context->vp_bo_offset, + 256, + 256); + r600EmitShaderConsts(ctx, + context->vp_Constbo, + context->vp_bo_offset, + (GLvoid *)&(evergreen->vs.consts[0][0]), + unNumParamData * 4 * 4); + } else + evergreen->vs.num_consts = 0; + + COMPILED_SUB * pCompiledSub; + GLuint uj; + GLuint unConstOffset = evergreen->vs.num_consts; + for(ui=0; ui<vp->r700AsmCode.unNumPresub; ui++) + { + pCompiledSub = vp->r700AsmCode.presubs[ui].pCompiledSub; + + evergreen->vs.num_consts += pCompiledSub->NumParameters; + + for(uj=0; uj<pCompiledSub->NumParameters; uj++) + { + evergreen->vs.consts[uj + unConstOffset][0].f32All = pCompiledSub->ParameterValues[uj][0]; + evergreen->vs.consts[uj + unConstOffset][1].f32All = pCompiledSub->ParameterValues[uj][1]; + evergreen->vs.consts[uj + unConstOffset][2].f32All = pCompiledSub->ParameterValues[uj][2]; + evergreen->vs.consts[uj + unConstOffset][3].f32All = pCompiledSub->ParameterValues[uj][3]; + } + unConstOffset += pCompiledSub->NumParameters; + } +}
\ No newline at end of file diff --git a/src/mesa/drivers/dri/r600/evergreen_vertprog.h b/src/mesa/drivers/dri/r600/evergreen_vertprog.h new file mode 100644 index 00000000000..58539021152 --- /dev/null +++ b/src/mesa/drivers/dri/r600/evergreen_vertprog.h @@ -0,0 +1,109 @@ +/* + * Copyright (C) 2008-2009 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +/* + * Authors: + * Richard Li <[email protected]>, <[email protected]> + */ + + +#ifndef _EVERGREEN_VERTPROG_H_ +#define _EVERGREEN_VERTPROG_H_ + +#include "main/glheader.h" +#include "main/mtypes.h" + +#include "r700_shader.h" +#include "r700_assembler.h" + +typedef struct evergreenArrayDesc //TEMP +{ + GLint size; //number of data element + GLenum type; //data element type + GLsizei stride; + GLenum format; //GL_RGBA or GL_BGRA +} evergreenArrayDesc; + +struct evergreen_vertex_program +{ + struct gl_vertex_program *mesa_program; /* Must be first */ + + struct evergreen_vertex_program *next; + + r700_AssemblerBase r700AsmCode; + R700_Shader r700Shader; + + GLboolean translated; + GLboolean loaded; + + void * shaderbo; + + GLuint K0used; + void * constbo0; + + evergreenArrayDesc aos_desc[VERT_ATTRIB_MAX]; +}; + +struct evergreen_vertex_program_cont +{ + struct gl_vertex_program mesa_program; + + struct evergreen_vertex_program *progs; +}; + +//Internal +unsigned int evergreen_Map_Vertex_Output(r700_AssemblerBase *pAsm, + struct gl_vertex_program *mesa_vp, + unsigned int unStart); +unsigned int evergreen_Map_Vertex_Input(r700_AssemblerBase *pAsm, + struct gl_vertex_program *mesa_vp, + unsigned int unStart); +GLboolean evergreen_Process_Vertex_Program_Vfetch_Instructions( + struct evergreen_vertex_program *vp, + struct gl_vertex_program *mesa_vp); +GLboolean evergreen_Process_Vertex_Program_Vfetch_Instructions2( + GLcontext *ctx, + struct evergreen_vertex_program *vp, + struct gl_vertex_program *mesa_vp); +void evergreen_Map_Vertex_Program(GLcontext *ctx, + struct evergreen_vertex_program *vp, + struct gl_vertex_program *mesa_vp); +GLboolean evergreen_Find_Instruction_Dependencies_vp(struct evergreen_vertex_program *vp, + struct gl_vertex_program *mesa_vp); + +struct evergreen_vertex_program* evergreenTranslateVertexShader(GLcontext *ctx, + struct gl_vertex_program *mesa_vp); + +/* Interface */ +extern void evergreenSelectVertexShader(GLcontext *ctx); +extern void evergreenSetVertexFormat(GLcontext *ctx, const struct gl_client_array *arrays[], int count); + +extern GLboolean evergreenSetupVertexProgram(GLcontext * ctx); + +extern GLboolean evergreenSetupVPconstants(GLcontext * ctx); + +extern void * evergreenGetActiveVpShaderBo(GLcontext * ctx); + +extern void * evergreenGetActiveVpShaderConstBo(GLcontext * ctx); + +extern int evergreen_getTypeSize(GLenum type); + +#endif /* _EVERGREEN_VERTPROG_H_ */ diff --git a/src/mesa/drivers/dri/r600/r600_blit.c b/src/mesa/drivers/dri/r600/r600_blit.c index 27acff9c166..ef47ae1c056 100644 --- a/src/mesa/drivers/dri/r600/r600_blit.c +++ b/src/mesa/drivers/dri/r600/r600_blit.c @@ -1454,7 +1454,7 @@ set_default_state(context_t *context) SETbit(sq_dyn_gpr_cntl_ps_flush_req, VS_PC_LIMIT_ENABLE_bit); } - BEGIN_BATCH_NO_AUTOSTATE(117); + BEGIN_BATCH_NO_AUTOSTATE(120); R600_OUT_BATCH_REGSEQ(SQ_CONFIG, 6); R600_OUT_BATCH(sq_config); R600_OUT_BATCH(sq_gpr_resource_mgmt_1); @@ -1499,9 +1499,10 @@ set_default_state(context_t *context) R600_OUT_BATCH_REGVAL(PA_SU_VTX_CNTL, (PIX_CENTER_bit) | (X_ROUND_TO_EVEN << PA_SU_VTX_CNTL__ROUND_MODE_shift) | (X_1_256TH << QUANT_MODE_shift)); + R600_OUT_BATCH_REGVAL(PA_SC_AA_CONFIG, 0); R600_OUT_BATCH_REGSEQ(VGT_MAX_VTX_INDX, 4); - R600_OUT_BATCH(2048); + R600_OUT_BATCH(0xffffff); R600_OUT_BATCH(0); R600_OUT_BATCH(0); R600_OUT_BATCH(0); @@ -1614,7 +1615,7 @@ unsigned r600_blit(GLcontext *ctx, /* Flush is needed to make sure that source buffer has correct data */ radeonFlush(ctx); - rcommonEnsureCmdBufSpace(&context->radeon, 308, __FUNCTION__); + rcommonEnsureCmdBufSpace(&context->radeon, 311, __FUNCTION__); /* load shaders */ load_shaders(context->radeon.glCtx); @@ -1623,7 +1624,7 @@ unsigned r600_blit(GLcontext *ctx, return GL_FALSE; /* set clear state */ - /* 117 */ + /* 120 */ set_default_state(context); /* shaders */ diff --git a/src/mesa/drivers/dri/r600/r600_cmdbuf.c b/src/mesa/drivers/dri/r600/r600_cmdbuf.c index 8013553f679..b3331fc8b88 100644 --- a/src/mesa/drivers/dri/r600/r600_cmdbuf.c +++ b/src/mesa/drivers/dri/r600/r600_cmdbuf.c @@ -473,7 +473,14 @@ void r600InitCmdBuf(context_t *r600) /* from rcommonInitCmdBuf */ radeonContextPtr rmesa = &r600->radeon; GLuint size; - r600InitAtoms(r600); + if(r600->radeon.radeonScreen->chip_family >= CHIP_FAMILY_CEDAR) + { + evergreenInitAtoms(r600); + } + else + { + r600InitAtoms(r600); + } /* Initialize command buffer */ size = 256 * driQueryOptioni(&rmesa->optionCache, diff --git a/src/mesa/drivers/dri/r600/r600_cmdbuf.h b/src/mesa/drivers/dri/r600/r600_cmdbuf.h index 78fccd0b601..801bb013f6e 100644 --- a/src/mesa/drivers/dri/r600/r600_cmdbuf.h +++ b/src/mesa/drivers/dri/r600/r600_cmdbuf.h @@ -190,6 +190,46 @@ do { \ #define R600_OUT_BATCH_REGSEQ(reg, count) \ R600_OUT_BATCH_REGS((reg), (count)) +/* evergreen */ +#define EVERGREEN_OUT_BATCH_REGS(reg, num) \ +do { \ + if ((reg) >= R600_SET_CONFIG_REG_OFFSET && (reg) < R600_SET_CONFIG_REG_END) { \ + R600_OUT_BATCH(CP_PACKET3(R600_IT_SET_CONFIG_REG, (num))); \ + R600_OUT_BATCH(((reg) - R600_SET_CONFIG_REG_OFFSET) >> 2); \ + } else if ((reg) >= R600_SET_CONTEXT_REG_OFFSET && (reg) < R600_SET_CONTEXT_REG_END) { \ + R600_OUT_BATCH(CP_PACKET3(R600_IT_SET_CONTEXT_REG, (num))); \ + R600_OUT_BATCH(((reg) - R600_SET_CONTEXT_REG_OFFSET) >> 2); \ + } else if ((reg) >= EG_SET_RESOURCE_OFFSET && (reg) < EG_SET_RESOURCE_END) { \ + R600_OUT_BATCH(CP_PACKET3(R600_IT_SET_RESOURCE, (num))); \ + R600_OUT_BATCH(((reg) - EG_SET_RESOURCE_OFFSET) >> 2); \ + } else if ((reg) >= EG_SET_LOOP_CONST_OFFSET && (reg) < EG_SET_LOOP_CONST_END) { \ + R600_OUT_BATCH(CP_PACKET3(R600_IT_SET_LOOP_CONST, (num))); \ + R600_OUT_BATCH(((reg) - EG_SET_LOOP_CONST_OFFSET) >> 2); \ + } else if ((reg) >= R600_SET_SAMPLER_OFFSET && (reg) < R600_SET_SAMPLER_END) { \ + R600_OUT_BATCH(CP_PACKET3(R600_IT_SET_SAMPLER, (num))); \ + R600_OUT_BATCH(((reg) - R600_SET_SAMPLER_OFFSET) >> 2); \ + } else if ((reg) >= R600_SET_CTL_CONST_OFFSET && (reg) < R600_SET_CTL_CONST_END) { \ + R600_OUT_BATCH(CP_PACKET3(R600_IT_SET_CTL_CONST, (num))); \ + R600_OUT_BATCH(((reg) - R600_SET_CTL_CONST_OFFSET) >> 2); \ + } else if ((reg) >= EG_SET_BOOL_CONST_OFFSET && (reg) < EG_SET_BOOL_CONST_END) { \ + R600_OUT_BATCH(CP_PACKET3(R600_IT_SET_BOOL_CONST, (num))); \ + R600_OUT_BATCH(((reg) - EG_SET_BOOL_CONST_OFFSET) >> 2); \ + } else { \ + R600_OUT_BATCH(CP_PACKET0((reg), (num))); \ + } \ +} while (0) + +/** Single register write to command buffer; requires 3 dwords for most things. */ +#define EVERGREEN_OUT_BATCH_REGVAL(reg, val) \ + EVERGREEN_OUT_BATCH_REGS((reg), 1); \ + R600_OUT_BATCH((val)) + +/** Continuous register range write to command buffer; requires 1 dword, + * expects count dwords afterwards for register contents. */ +#define EVERGREEN_OUT_BATCH_REGSEQ(reg, count) \ + EVERGREEN_OUT_BATCH_REGS((reg), (count)) + + extern void r600InitCmdBuf(context_t *r600); #endif /* __R600_CMDBUF_H__ */ diff --git a/src/mesa/drivers/dri/r600/r600_context.c b/src/mesa/drivers/dri/r600/r600_context.c index 389b0412baa..bb959e7d2d9 100644 --- a/src/mesa/drivers/dri/r600/r600_context.c +++ b/src/mesa/drivers/dri/r600/r600_context.c @@ -66,6 +66,11 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "r700_state.h" #include "r700_ioctl.h" +#include "evergreen_context.h" +#include "evergreen_state.h" +#include "evergreen_tex.h" +#include "evergreen_ioctl.h" +#include "evergreen_oglprog.h" #include "utils.h" @@ -247,6 +252,19 @@ static void r600_init_vtbl(radeonContextPtr radeon) static void r600InitConstValues(GLcontext *ctx, radeonScreenPtr screen) { + context_t *context = R700_CONTEXT(ctx); + R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&context->hw); + + if( (context->radeon.radeonScreen->chip_family >= CHIP_FAMILY_CEDAR) + &&(context->radeon.radeonScreen->chip_family <= CHIP_FAMILY_HEMLOCK) ) + { + r700->bShaderUseMemConstant = GL_TRUE; + } + else + { + r700->bShaderUseMemConstant = GL_FALSE; + } + ctx->Const.MaxTextureImageUnits = 16; /* 8 per clause on r6xx, 16 on r7xx * but I think mesa only supports 8 at the moment @@ -381,18 +399,45 @@ GLboolean r600CreateContext(gl_api api, r600ParseOptions(r600, screen); r600->radeon.radeonScreen = screen; - r600_init_vtbl(&r600->radeon); + if(screen->chip_family >= CHIP_FAMILY_CEDAR) + { + evergreen_init_vtbl(&r600->radeon); + } + else + { + r600_init_vtbl(&r600->radeon); + } + /* Init default driver functions then plug in our R600-specific functions * (the texture functions are especially important) */ _mesa_init_driver_functions(&functions); - r700InitStateFuncs(&r600->radeon, &functions); - r600InitTextureFuncs(&r600->radeon, &functions); - r700InitShaderFuncs(&functions); + if(screen->chip_family >= CHIP_FAMILY_CEDAR) + { + evergreenCreateChip(r600); + evergreenInitStateFuncs(&r600->radeon, &functions); + evergreenInitTextureFuncs(&r600->radeon, &functions); + evergreenInitShaderFuncs(&functions); + } + else + { + r700InitStateFuncs(&r600->radeon, &functions); + r600InitTextureFuncs(&r600->radeon, &functions); + r700InitShaderFuncs(&functions); + } + radeonInitQueryObjFunctions(&functions); - r700InitIoctlFuncs(&functions); + + if(screen->chip_family >= CHIP_FAMILY_CEDAR) + { + evergreenInitIoctlFuncs(&functions); + } + else + { + r700InitIoctlFuncs(&functions); + } radeonInitBufferObjectFuncs(&functions); if (!radeonInitContext(&r600->radeon, &functions, @@ -435,16 +480,46 @@ GLboolean r600CreateContext(gl_api api, radeon_init_debug(); - r700InitDraw(ctx); + if(screen->chip_family >= CHIP_FAMILY_CEDAR) + { + evergreenInitDraw(ctx); + } + else + { + r700InitDraw(ctx); + } radeon_fbo_init(&r600->radeon); radeonInitSpanFuncs( ctx ); r600InitCmdBuf(r600); - r700InitState(r600->radeon.glCtx); + + if(screen->chip_family >= CHIP_FAMILY_CEDAR) + { + evergreenInitState(r600->radeon.glCtx); + } + else + { + r700InitState(r600->radeon.glCtx); + } r600InitGLExtensions(ctx); return GL_TRUE; } +void r600DestroyContext(__DRIcontext *driContextPriv ) +{ + void *pChip; + context_t *context = (context_t *) driContextPriv->driverPrivate; + + assert(context); + + pChip = context->pChip; + + /* destroy context first, free pChip, in case there are things flush to asic. */ + radeonDestroyContext(driContextPriv); + + FREE(pChip); +} + diff --git a/src/mesa/drivers/dri/r600/r600_context.h b/src/mesa/drivers/dri/r600/r600_context.h index 063dd7c49a1..6a831966487 100644 --- a/src/mesa/drivers/dri/r600/r600_context.h +++ b/src/mesa/drivers/dri/r600/r600_context.h @@ -53,6 +53,8 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "r700_oglprog.h" #include "r700_vertprog.h" +#include "evergreen_chip.h" + struct r600_context; typedef struct r600_context context_t; @@ -63,6 +65,10 @@ typedef struct r600_context context_t; #include "tnl_dd/t_dd_vertex.h" #undef TAG +#define FORCE_CF_TEX_BARRIER 1 + +/* #define GENERATE_SHADER_FOR_2D 1 */ + #define R600_FALLBACK_NONE 0 #define R600_FALLBACK_TCL 1 #define R600_FALLBACK_RAST 2 @@ -103,6 +109,24 @@ struct r600_hw_state { struct radeon_state_atom tx_brdr_clr; }; +struct evergreen_hw_state { + struct radeon_state_atom one_time_init; + struct radeon_state_atom init; + struct radeon_state_atom pa; + struct radeon_state_atom vgt; + struct radeon_state_atom tp; + struct radeon_state_atom sq; + struct radeon_state_atom vs; + struct radeon_state_atom spi; + struct radeon_state_atom sx; + struct radeon_state_atom tx; + struct radeon_state_atom db; + struct radeon_state_atom cb; + struct radeon_state_atom vtx; + struct radeon_state_atom cp; + struct radeon_state_atom timestamp; +}; + typedef struct StreamDesc { GLint size; //number of data element @@ -141,6 +165,9 @@ struct r600_context { struct r600_hw_state atoms; + struct evergreen_hw_state evergreen_atoms; + void * pChip; + struct r700_vertex_program *selected_vp; /* Vertex buffers @@ -150,16 +177,29 @@ struct r600_context { struct r700_index_buffer ind_buf; struct radeon_bo *blit_bo; GLboolean blit_bo_loaded; + + /* Shader const buffer */ + struct radeon_bo * vp_Constbo; + int vp_bo_offset; + struct radeon_bo * fp_Constbo; + int fp_bo_offset; }; +#define EVERGREEN_CONTEXT(ctx) ((context_t *)(ctx->DriverCtx)) + #define R700_CONTEXT(ctx) ((context_t *)(ctx->DriverCtx)) #define GL_CONTEXT(context) ((GLcontext *)(context->radeon.glCtx)) +#define GET_EVERGREEN_CHIP(context) ((EVERGREEN_CHIP_CONTEXT*)(context->pChip)) + extern GLboolean r600CreateContext(gl_api api, const __GLcontextModes * glVisual, __DRIcontext * driContextPriv, void *sharedContextPrivate); +extern void r600DestroyContext(__DRIcontext *driContextPriv ); +extern void evergreenCreateChip(context_t *context); + #define R700_CONTEXT_STATES(context) ((R700_CHIP_CONTEXT *)(&context->hw)) #define R600_NEWPRIM( rmesa ) \ @@ -175,6 +215,13 @@ do { \ r600->radeon.hw.is_dirty = GL_TRUE; \ } while(0) +#define EVERGREEN_STATECHANGE(r600, ATOM) \ +do { \ + R600_NEWPRIM(r600); \ + r600->evergreen_atoms.ATOM.dirty = GL_TRUE; \ + r600->radeon.hw.is_dirty = GL_TRUE; \ +} while(0) + extern GLboolean r700SyncSurf(context_t *context, struct radeon_bo *pbo, uint32_t read_domain, @@ -187,6 +234,9 @@ extern void r700Start3D(context_t *context); extern void r600InitAtoms(context_t *context); extern void r700InitDraw(GLcontext *ctx); +extern void evergreenInitAtoms(context_t *context); +extern void evergreenInitDraw(GLcontext *ctx); + #define RADEON_D_CAPTURE 0 #define RADEON_D_PLAYBACK 1 #define RADEON_D_PLAYBACK_RAW 2 diff --git a/src/mesa/drivers/dri/r600/r600_emit.c b/src/mesa/drivers/dri/r600/r600_emit.c index 1eb89a53058..a840106c144 100644 --- a/src/mesa/drivers/dri/r600/r600_emit.c +++ b/src/mesa/drivers/dri/r600/r600_emit.c @@ -49,6 +49,71 @@ void r600EmitCacheFlush(context_t *rmesa) { } +GLboolean r600AllocShaderConsts(GLcontext * ctx, + void ** constbo, + int sizeinBYTE, + char * szShaderUsage) +{ + radeonContextPtr radeonctx = RADEON_CONTEXT(ctx); + struct radeon_bo * pbo; + + if(sizeinBYTE < 64) /* SQ_ALU_CONST_BUFFER_SIZE need 64 bytes at least to be non 0 */ + { + sizeinBYTE = 64; + } + +shader_again_alloc: + pbo = radeon_bo_open(radeonctx->radeonScreen->bom, + 0, + sizeinBYTE, + 256, + RADEON_GEM_DOMAIN_GTT, + 0); + + radeon_print(RADEON_SHADER, RADEON_NORMAL, "%s %p size %d: %s\n", __func__, pbo, sizeinBYTE, szShaderUsage); + + if (!pbo) { + radeon_print(RADEON_MEMORY | RADEON_CS, RADEON_IMPORTANT, "No memory for buffer object. Flushing command buffer.\n"); + rcommonFlushCmdBuf(radeonctx, __FUNCTION__); + goto shader_again_alloc; + } + + radeon_cs_space_add_persistent_bo(radeonctx->cmdbuf.cs, + pbo, + RADEON_GEM_DOMAIN_GTT, 0); + + if (radeon_cs_space_check_with_bo(radeonctx->cmdbuf.cs, + pbo, + RADEON_GEM_DOMAIN_GTT, 0)) { + radeon_error("failure to revalidate BOs - badness\n"); + return GL_FALSE; + } + + *constbo = (void*)pbo; + + return GL_TRUE; +} +GLboolean r600EmitShaderConsts(GLcontext * ctx, + void * constbo, + int bo_offset, + GLvoid * data, + int sizeinBYTE) +{ + struct radeon_bo * pbo = (struct radeon_bo *)constbo; + uint8_t *out; + + radeon_bo_map(pbo, 1); + + out = (uint8_t*)(pbo->ptr); + out = (uint8_t*)ADD_POINTERS(pbo->ptr, bo_offset); + + memcpy(out, data, sizeinBYTE); + + radeon_bo_unmap(pbo); + + return GL_TRUE; +} + GLboolean r600EmitShader(GLcontext * ctx, void ** shaderbo, GLvoid * data, diff --git a/src/mesa/drivers/dri/r600/r600_emit.h b/src/mesa/drivers/dri/r600/r600_emit.h index 661774d11ea..259561539fa 100644 --- a/src/mesa/drivers/dri/r600/r600_emit.h +++ b/src/mesa/drivers/dri/r600/r600_emit.h @@ -52,4 +52,14 @@ extern GLboolean r600EmitShader(GLcontext * ctx, extern GLboolean r600DeleteShader(GLcontext * ctx, void * shaderbo); +extern GLboolean r600AllocShaderConsts(GLcontext * ctx, + void ** constbo, + int sizeinBYTE, + char * szShaderUsage); +GLboolean r600EmitShaderConsts(GLcontext * ctx, + void * constbo, + int bo_offset, + GLvoid * data, + int sizeinBYTE); + #endif diff --git a/src/mesa/drivers/dri/r600/r600_texstate.c b/src/mesa/drivers/dri/r600/r600_texstate.c index ba3690b70ed..fd928cfe5d2 100644 --- a/src/mesa/drivers/dri/r600/r600_texstate.c +++ b/src/mesa/drivers/dri/r600/r600_texstate.c @@ -50,6 +50,8 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "r700_fragprog.h" #include "r700_vertprog.h" +#include "evergreen_tex.h" + void r600UpdateTextureState(GLcontext * ctx); void r600UpdateTextureState(GLcontext * ctx) @@ -878,6 +880,18 @@ GLboolean r600ValidateBuffers(GLcontext * ctx) RADEON_GEM_DOMAIN_GTT, 0); } + pbo = (struct radeon_bo *)r700GetActiveFpShaderConstBo(ctx); + if (pbo) { + radeon_cs_space_add_persistent_bo(rmesa->radeon.cmdbuf.cs, pbo, + RADEON_GEM_DOMAIN_GTT, 0); + } + + pbo = (struct radeon_bo *)r700GetActiveVpShaderConstBo(ctx); + if (pbo) { + radeon_cs_space_add_persistent_bo(rmesa->radeon.cmdbuf.cs, pbo, + RADEON_GEM_DOMAIN_GTT, 0); + } + ret = radeon_cs_space_check_with_bo(rmesa->radeon.cmdbuf.cs, first_elem(&rmesa->radeon.dma.reserved)->bo, RADEON_GEM_DOMAIN_GTT, 0); if (ret) return GL_FALSE; @@ -897,6 +911,12 @@ void r600SetTexOffset(__DRIcontext * pDRICtx, GLint texname, if (!tObj) return; + if(rmesa->radeon.radeonScreen->chip_family >= CHIP_FAMILY_CEDAR) + { + evergreenSetTexOffset(pDRICtx, texname, offset, depth, pitch); + return; + } + t->image_override = GL_TRUE; if (!offset) @@ -989,6 +1009,12 @@ void r600SetTexBuffer2(__DRIcontext *pDRICtx, GLint target, GLint glx_texture_fo radeon = pDRICtx->driverPrivate; rmesa = pDRICtx->driverPrivate; + if(rmesa->radeon.radeonScreen->chip_family >= CHIP_FAMILY_CEDAR) + { + evergreenSetTexBuffer(pDRICtx, target, glx_texture_format, dPriv); + return; + } + rfb = dPriv->driverPrivate; texUnit = &radeon->glCtx->Texture.Unit[radeon->glCtx->Texture.CurrentUnit]; texObj = _mesa_select_tex_object(radeon->glCtx, texUnit, target); diff --git a/src/mesa/drivers/dri/r600/r700_assembler.c b/src/mesa/drivers/dri/r600/r700_assembler.c index 9c954cbf70c..45ff9c06249 100644 --- a/src/mesa/drivers/dri/r600/r700_assembler.c +++ b/src/mesa/drivers/dri/r600/r700_assembler.c @@ -38,6 +38,7 @@ #include "r600_context.h" #include "r700_assembler.h" +#include "evergreen_sq.h" #define USE_CF_FOR_CONTINUE_BREAK 1 #define USE_CF_FOR_POP_AFTER 1 @@ -258,6 +259,18 @@ GLboolean is_reduction_opcode(PVSDWORD* dest) return GL_FALSE; } +GLboolean EG_is_reduction_opcode(PVSDWORD* dest) +{ + if (dest->dst.op3 == 0) + { + if ( (dest->dst.opcode == EG_OP2_INST_DOT4 || dest->dst.opcode == EG_OP2_INST_DOT4_IEEE || dest->dst.opcode == EG_OP2_INST_CUBE) ) + { + return GL_TRUE; + } + } + return GL_FALSE; +} + GLuint GetSurfaceFormat(GLenum eType, GLuint nChannels, GLuint * pClient_size) { GLuint format = FMT_INVALID; @@ -423,6 +436,60 @@ unsigned int r700GetNumOperands(GLuint opcode, GLuint nIsOp3) return 3; } +unsigned int EG_GetNumOperands(GLuint opcode, GLuint nIsOp3) +{ + if(nIsOp3 > 0) + { + return 3; + } + + switch (opcode) + { + case EG_OP2_INST_ADD: + case EG_OP2_INST_KILLE: + case EG_OP2_INST_KILLGT: + case EG_OP2_INST_KILLGE: + case EG_OP2_INST_KILLNE: + case EG_OP2_INST_MUL: + case EG_OP2_INST_MAX: + case EG_OP2_INST_MIN: + //case EG_OP2_INST_MAX_DX10: + //case EG_OP2_INST_MIN_DX10: + case EG_OP2_INST_SETE: + case EG_OP2_INST_SETNE: + case EG_OP2_INST_SETGT: + case EG_OP2_INST_SETGE: + case EG_OP2_INST_PRED_SETE: + case EG_OP2_INST_PRED_SETGT: + case EG_OP2_INST_PRED_SETGE: + case EG_OP2_INST_PRED_SETNE: + case EG_OP2_INST_DOT4: + case EG_OP2_INST_DOT4_IEEE: + case EG_OP2_INST_CUBE: + return 2; + + case EG_OP2_INST_MOV: + //case SQ_OP2_INST_MOVA_FLOOR: + case EG_OP2_INST_FRACT: + case EG_OP2_INST_FLOOR: + case EG_OP2_INST_TRUNC: + case EG_OP2_INST_EXP_IEEE: + case EG_OP2_INST_LOG_CLAMPED: + case EG_OP2_INST_LOG_IEEE: + case EG_OP2_INST_RECIP_IEEE: + case EG_OP2_INST_RECIPSQRT_IEEE: + case EG_OP2_INST_FLT_TO_INT: + case EG_OP2_INST_SIN: + case EG_OP2_INST_COS: + return 1; + + default: radeon_error( + "Need instruction operand number for %x.\n", opcode); + }; + + return 3; +} + int Init_r700_AssemblerBase(SHADER_PIPE_TYPE spt, r700_AssemblerBase* pAsm, R700_Shader* pShader) { GLuint i; @@ -718,21 +785,55 @@ GLboolean add_vfetch_instruction(r700_AssemblerBase* pAsm, return GL_FALSE; } - pAsm->cf_current_vtx_clause_ptr->m_Word1.f.pop_count = 0x0; - pAsm->cf_current_vtx_clause_ptr->m_Word1.f.cf_const = 0x0; - pAsm->cf_current_vtx_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE; - pAsm->cf_current_vtx_clause_ptr->m_Word1.f.count = 0x0; - pAsm->cf_current_vtx_clause_ptr->m_Word1.f.end_of_program = 0x0; - pAsm->cf_current_vtx_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0; - pAsm->cf_current_vtx_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_VTX; - pAsm->cf_current_vtx_clause_ptr->m_Word1.f.whole_quad_mode = 0x0; - pAsm->cf_current_vtx_clause_ptr->m_Word1.f.barrier = 0x1; + if(8 == pAsm->unAsic) + { + SETfield(pAsm->cf_current_vtx_clause_ptr->m_Word1.val, EG_CF_INST_VC, + EG_CF_WORD1__CF_INST_shift, EG_CF_WORD1__CF_INST_mask); + SETfield(pAsm->cf_current_vtx_clause_ptr->m_Word1.val, 0, + EG_CF_WORD1__POP_COUNT_shift, EG_CF_WORD1__POP_COUNT_mask); + SETfield(pAsm->cf_current_vtx_clause_ptr->m_Word1.val, 0, + EG_CF_WORD1__CF_CONST_shift, EG_CF_WORD1__CF_CONST_mask); + SETfield(pAsm->cf_current_vtx_clause_ptr->m_Word1.val, SQ_CF_COND_ACTIVE, + EG_CF_WORD1__COND_shift, EG_CF_WORD1__COND_mask); + SETfield(pAsm->cf_current_vtx_clause_ptr->m_Word1.val, 0, + EG_CF_WORD1__COUNT_shift, EG_CF_WORD1__COUNT_mask); + SETfield(pAsm->cf_current_vtx_clause_ptr->m_Word1.val, 0, + EG_CF_WORD1__VPM_shift, EG_CF_WORD1__VPM_bit); + SETfield(pAsm->cf_current_vtx_clause_ptr->m_Word1.val, 0, + EG_CF_WORD1__EOP_shift, EG_CF_WORD1__EOP_bit); + SETfield(pAsm->cf_current_vtx_clause_ptr->m_Word1.val, 0, + EG_CF_WORD1__WQM_shift, EG_CF_WORD1__WQM_bit); + SETfield(pAsm->cf_current_vtx_clause_ptr->m_Word1.val, 1, + EG_CF_WORD1__BARRIER_shift, EG_CF_WORD1__BARRIER_bit); + } + else + { + pAsm->cf_current_vtx_clause_ptr->m_Word1.f.pop_count = 0x0; + pAsm->cf_current_vtx_clause_ptr->m_Word1.f.cf_const = 0x0; + pAsm->cf_current_vtx_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE; + pAsm->cf_current_vtx_clause_ptr->m_Word1.f.count = 0x0; + pAsm->cf_current_vtx_clause_ptr->m_Word1.f.end_of_program = 0x0; + pAsm->cf_current_vtx_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0; + pAsm->cf_current_vtx_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_VTX; + pAsm->cf_current_vtx_clause_ptr->m_Word1.f.whole_quad_mode = 0x0; + pAsm->cf_current_vtx_clause_ptr->m_Word1.f.barrier = 0x1; + } LinkVertexInstruction(pAsm->cf_current_vtx_clause_ptr, vertex_instruction_ptr ); } else { - pAsm->cf_current_vtx_clause_ptr->m_Word1.f.count++; + if(8 == pAsm->unAsic) + { + unsigned int count = GETbits(pAsm->cf_current_vtx_clause_ptr->m_Word1.val, + EG_CF_WORD1__COUNT_shift, EG_CF_WORD1__COUNT_mask) + 1; + SETfield(pAsm->cf_current_vtx_clause_ptr->m_Word1.val, count, + EG_CF_WORD1__COUNT_shift, EG_CF_WORD1__COUNT_mask); + } + else + { + pAsm->cf_current_vtx_clause_ptr->m_Word1.f.count++; + } } AddVTXInstruction(pAsm->pR700Shader, vertex_instruction_ptr); @@ -767,20 +868,59 @@ GLboolean add_tex_instruction(r700_AssemblerBase* pAsm, radeon_error("Could not allocate a new TEX CF instruction.\n"); return GL_FALSE; } - - pAsm->cf_current_tex_clause_ptr->m_Word1.f.pop_count = 0x0; - pAsm->cf_current_tex_clause_ptr->m_Word1.f.cf_const = 0x0; - pAsm->cf_current_tex_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE; - pAsm->cf_current_tex_clause_ptr->m_Word1.f.end_of_program = 0x0; - pAsm->cf_current_tex_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0; - pAsm->cf_current_tex_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_TEX; - pAsm->cf_current_tex_clause_ptr->m_Word1.f.whole_quad_mode = 0x0; - pAsm->cf_current_tex_clause_ptr->m_Word1.f.barrier = 0x0; //0x1; + if(8 == pAsm->unAsic) + { + SETfield(pAsm->cf_current_tex_clause_ptr->m_Word1.val, EG_CF_INST_TC, + EG_CF_WORD1__CF_INST_shift, EG_CF_WORD1__CF_INST_mask); + SETfield(pAsm->cf_current_tex_clause_ptr->m_Word1.val, 0, + EG_CF_WORD1__POP_COUNT_shift, EG_CF_WORD1__POP_COUNT_mask); + SETfield(pAsm->cf_current_tex_clause_ptr->m_Word1.val, 0, + EG_CF_WORD1__CF_CONST_shift, EG_CF_WORD1__CF_CONST_mask); + SETfield(pAsm->cf_current_tex_clause_ptr->m_Word1.val, SQ_CF_COND_ACTIVE, + EG_CF_WORD1__COND_shift, EG_CF_WORD1__COND_mask); + SETfield(pAsm->cf_current_tex_clause_ptr->m_Word1.val, 0, + EG_CF_WORD1__COUNT_shift, EG_CF_WORD1__COUNT_mask); + SETfield(pAsm->cf_current_tex_clause_ptr->m_Word1.val, 0, + EG_CF_WORD1__VPM_shift, EG_CF_WORD1__VPM_bit); + SETfield(pAsm->cf_current_tex_clause_ptr->m_Word1.val, 0, + EG_CF_WORD1__EOP_shift, EG_CF_WORD1__EOP_bit); + SETfield(pAsm->cf_current_tex_clause_ptr->m_Word1.val, 0, + EG_CF_WORD1__WQM_shift, EG_CF_WORD1__WQM_bit); +#ifdef FORCE_CF_TEX_BARRIER + SETfield(pAsm->cf_current_tex_clause_ptr->m_Word1.val, 1, + EG_CF_WORD1__BARRIER_shift, EG_CF_WORD1__BARRIER_bit); +#else + SETfield(pAsm->cf_current_tex_clause_ptr->m_Word1.val, 0, + EG_CF_WORD1__BARRIER_shift, EG_CF_WORD1__BARRIER_bit); +#endif + } + else + { + pAsm->cf_current_tex_clause_ptr->m_Word1.f.pop_count = 0x0; + pAsm->cf_current_tex_clause_ptr->m_Word1.f.cf_const = 0x0; + pAsm->cf_current_tex_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE; + + pAsm->cf_current_tex_clause_ptr->m_Word1.f.end_of_program = 0x0; + pAsm->cf_current_tex_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0; + pAsm->cf_current_tex_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_TEX; + pAsm->cf_current_tex_clause_ptr->m_Word1.f.whole_quad_mode = 0x0; + pAsm->cf_current_tex_clause_ptr->m_Word1.f.barrier = 0x0; //0x1; + } } else - { - pAsm->cf_current_tex_clause_ptr->m_Word1.f.count++; + { + if(8 == pAsm->unAsic) + { + unsigned int count = GETbits(pAsm->cf_current_tex_clause_ptr->m_Word1.val, + EG_CF_WORD1__COUNT_shift, EG_CF_WORD1__COUNT_mask) + 1; + SETfield(pAsm->cf_current_vtx_clause_ptr->m_Word1.val, count, + EG_CF_WORD1__COUNT_shift, EG_CF_WORD1__COUNT_mask); + } + else + { + pAsm->cf_current_tex_clause_ptr->m_Word1.f.count++; + } } // If this clause constains any TEX instruction that is dependent on a previous instruction, @@ -891,6 +1031,188 @@ GLboolean assemble_vfetch_instruction(r700_AssemblerBase* pAsm, return GL_TRUE; } +GLboolean EG_assemble_vfetch_instruction(r700_AssemblerBase* pAsm, + GLuint destination_register, + GLenum type, + GLint size, + GLubyte element, + GLuint _signed, + GLboolean normalize, + GLenum format, + VTX_FETCH_METHOD * pFetchMethod) +{ + GLuint client_size_inbyte; + GLuint data_format; + GLuint mega_fetch_count; + GLuint is_mega_fetch_flag; + + GLuint dst_sel_x, dst_sel_y, dst_sel_z, dst_sel_w; + + R700VertexGenericFetch* vfetch_instruction_ptr; + R700VertexGenericFetch* assembled_vfetch_instruction_ptr + = pAsm->vfetch_instruction_ptr_array[element]; + + if (assembled_vfetch_instruction_ptr == NULL) + { + vfetch_instruction_ptr = (R700VertexGenericFetch*) CALLOC_STRUCT(R700VertexGenericFetch); + if (vfetch_instruction_ptr == NULL) + { + return GL_FALSE; + } + Init_R700VertexGenericFetch(vfetch_instruction_ptr); + } + else + { + vfetch_instruction_ptr = assembled_vfetch_instruction_ptr; + } + + data_format = GetSurfaceFormat(type, size, &client_size_inbyte); + + if(GL_TRUE == pFetchMethod->bEnableMini) //More conditions here + { + //TODO : mini fetch + mega_fetch_count = 0; + is_mega_fetch_flag = 0; + } + else + { + mega_fetch_count = MEGA_FETCH_BYTES - 1; + is_mega_fetch_flag = 0x1; + pFetchMethod->mega_fetch_remainder = MEGA_FETCH_BYTES - client_size_inbyte; + } + + SETfield(vfetch_instruction_ptr->m_Word0.val, EG_VC_INST_FETCH, + EG_VTX_WORD0__VC_INST_shift, + EG_VTX_WORD0__VC_INST_mask); + SETfield(vfetch_instruction_ptr->m_Word0.val, EG_VTX_FETCH_VERTEX_DATA, + EG_VTX_WORD0__FETCH_TYPE_shift, + EG_VTX_WORD0__FETCH_TYPE_mask); + CLEARbit(vfetch_instruction_ptr->m_Word0.val, + EG_VTX_WORD0__FWQ_bit); + SETfield(vfetch_instruction_ptr->m_Word0.val, element, + EG_VTX_WORD0__BUFFER_ID_shift, + EG_VTX_WORD0__BUFFER_ID_mask); + SETfield(vfetch_instruction_ptr->m_Word0.val, 0x0, + EG_VTX_WORD0__SRC_GPR_shift, + EG_VTX_WORD0__SRC_GPR_mask); + SETfield(vfetch_instruction_ptr->m_Word0.val, SQ_ABSOLUTE, + EG_VTX_WORD0__SRC_REL_shift, + EG_VTX_WORD0__SRC_REL_bit); + SETfield(vfetch_instruction_ptr->m_Word0.val, SQ_SEL_X, + EG_VTX_WORD0__SRC_SEL_X_shift, + EG_VTX_WORD0__SRC_SEL_X_mask); + SETfield(vfetch_instruction_ptr->m_Word0.val, mega_fetch_count, + EG_VTX_WORD0__MFC_shift, + EG_VTX_WORD0__MFC_mask); + + if(format == GL_BGRA) + { + dst_sel_x = (size < 1) ? SQ_SEL_0 : SQ_SEL_Z; + dst_sel_y = (size < 2) ? SQ_SEL_0 : SQ_SEL_Y; + dst_sel_z = (size < 3) ? SQ_SEL_0 : SQ_SEL_X; + dst_sel_w = (size < 4) ? SQ_SEL_1 : SQ_SEL_W; + } + else + { + dst_sel_x = (size < 1) ? SQ_SEL_0 : SQ_SEL_X; + dst_sel_y = (size < 2) ? SQ_SEL_0 : SQ_SEL_Y; + dst_sel_z = (size < 3) ? SQ_SEL_0 : SQ_SEL_Z; + dst_sel_w = (size < 4) ? SQ_SEL_1 : SQ_SEL_W; + + } + SETfield(vfetch_instruction_ptr->m_Word1.val, dst_sel_x, + EG_VTX_WORD1__DST_SEL_X_shift, + EG_VTX_WORD1__DST_SEL_X_mask); + SETfield(vfetch_instruction_ptr->m_Word1.val, dst_sel_y, + EG_VTX_WORD1__DST_SEL_Y_shift, + EG_VTX_WORD1__DST_SEL_Y_mask); + SETfield(vfetch_instruction_ptr->m_Word1.val, dst_sel_z, + EG_VTX_WORD1__DST_SEL_Z_shift, + EG_VTX_WORD1__DST_SEL_Z_mask); + SETfield(vfetch_instruction_ptr->m_Word1.val, dst_sel_w, + EG_VTX_WORD1__DST_SEL_W_shift, + EG_VTX_WORD1__DST_SEL_W_mask); + + SETfield(vfetch_instruction_ptr->m_Word1.val, 0, /* use format here, in r6/r7, format used set in const, need to use same */ + EG_VTX_WORD1__UCF_shift, + EG_VTX_WORD1__UCF_bit); + SETfield(vfetch_instruction_ptr->m_Word1.val, data_format, + EG_VTX_WORD1__DATA_FORMAT_shift, + EG_VTX_WORD1__DATA_FORMAT_mask); +#ifdef TEST_VFETCH + SETfield(vfetch_instruction_ptr->m_Word1.val, SQ_FORMAT_COMP_SIGNED, + EG_VTX_WORD1__FCA_shift, + EG_VTX_WORD1__FCA_bit); +#else + if(1 == _signed) + { + SETfield(vfetch_instruction_ptr->m_Word1.val, SQ_FORMAT_COMP_SIGNED, + EG_VTX_WORD1__FCA_shift, + EG_VTX_WORD1__FCA_bit); + } + else + { + SETfield(vfetch_instruction_ptr->m_Word1.val, SQ_FORMAT_COMP_UNSIGNED, + EG_VTX_WORD1__FCA_shift, + EG_VTX_WORD1__FCA_bit); + } +#endif /* TEST_VFETCH */ + + if(GL_TRUE == normalize) + { + SETfield(vfetch_instruction_ptr->m_Word1.val, SQ_NUM_FORMAT_NORM, + EG_VTX_WORD1__NFA_shift, + EG_VTX_WORD1__NFA_mask); + } + else + { + SETfield(vfetch_instruction_ptr->m_Word1.val, SQ_NUM_FORMAT_SCALED, + EG_VTX_WORD1__NFA_shift, + EG_VTX_WORD1__NFA_mask); + } + + /* Destination register */ + SETfield(vfetch_instruction_ptr->m_Word1.val, destination_register, + EG_VTX_WORD1_GPR__DST_GPR_shift, + EG_VTX_WORD1_GPR__DST_GPR_mask); + SETfield(vfetch_instruction_ptr->m_Word1.val, SQ_ABSOLUTE, + EG_VTX_WORD1_GPR__DST_REL_shift, + EG_VTX_WORD1_GPR__DST_REL_bit); + + + SETfield(vfetch_instruction_ptr->m_Word2.val, 0, + EG_VTX_WORD2__OFFSET_shift, + EG_VTX_WORD2__OFFSET_mask); + SETfield(vfetch_instruction_ptr->m_Word2.val, SQ_ENDIAN_NONE, + EG_VTX_WORD2__ENDIAN_SWAP_shift, + EG_VTX_WORD2__ENDIAN_SWAP_mask); + SETfield(vfetch_instruction_ptr->m_Word2.val, 0, + EG_VTX_WORD2__CBNS_shift, + EG_VTX_WORD2__CBNS_bit); + SETfield(vfetch_instruction_ptr->m_Word2.val, is_mega_fetch_flag, + EG_VTX_WORD2__MEGA_FETCH_shift, + EG_VTX_WORD2__MEGA_FETCH_mask); + + if (assembled_vfetch_instruction_ptr == NULL) + { + if ( GL_FALSE == add_vfetch_instruction(pAsm, (R700VertexInstruction *)vfetch_instruction_ptr) ) + { + return GL_FALSE; + } + + if (pAsm->vfetch_instruction_ptr_array[element] != NULL) + { + return GL_FALSE; + } + else + { + pAsm->vfetch_instruction_ptr_array[element] = vfetch_instruction_ptr; + } + } + + return GL_TRUE; +} + GLboolean assemble_vfetch_instruction2(r700_AssemblerBase* pAsm, GLuint destination_register, GLenum type, @@ -1357,7 +1679,7 @@ GLboolean assemble_src(r700_AssemblerBase *pAsm, break; case PROGRAM_INPUT: setaddrmode_PVSSRC(&(pAsm->S[fld].src), ADDR_ABSOLUTE); - pAsm->S[fld].src.rtype = SRC_REG_INPUT; + pAsm->S[fld].src.rtype = SRC_REG_GPR; switch (pAsm->currentShaderType) { case SPT_FP: @@ -1368,6 +1690,19 @@ GLboolean assemble_src(r700_AssemblerBase *pAsm, break; } break; + case PROGRAM_OUTPUT: + setaddrmode_PVSSRC(&(pAsm->S[fld].src), ADDR_ABSOLUTE); + pAsm->S[fld].src.rtype = SRC_REG_GPR; + switch (pAsm->currentShaderType) + { + case SPT_FP: + pAsm->S[fld].src.reg = pAsm->uiFP_OutputMap[pILInst->SrcReg[src].Index]; + break; + case SPT_VP: + pAsm->S[fld].src.reg = pAsm->ucVP_OutputMap[pILInst->SrcReg[src].Index]; + break; + } + break; default: radeon_error("Invalid source argument type : %d \n", pILInst->SrcReg[src].File); return GL_FALSE; @@ -1521,7 +1856,7 @@ GLboolean tex_src(r700_AssemblerBase *pAsm) bValidTexCoord = GL_TRUE; pAsm->S[0].src.reg = pAsm->ucVP_AttributeMap[pILInst->SrcReg[0].Index]; - pAsm->S[0].src.rtype = SRC_REG_INPUT; + pAsm->S[0].src.rtype = SRC_REG_GPR; break; } } @@ -1544,7 +1879,7 @@ GLboolean tex_src(r700_AssemblerBase *pAsm) bValidTexCoord = GL_TRUE; pAsm->S[0].src.reg = pAsm->uiFP_AttributeMap[pILInst->SrcReg[0].Index]; - pAsm->S[0].src.rtype = SRC_REG_INPUT; + pAsm->S[0].src.rtype = SRC_REG_GPR; break; case FRAG_ATTRIB_FACE: fprintf(stderr, "FRAG_ATTRIB_FACE unsupported\n"); @@ -1560,7 +1895,7 @@ GLboolean tex_src(r700_AssemblerBase *pAsm) bValidTexCoord = GL_TRUE; pAsm->S[0].src.reg = pAsm->uiFP_AttributeMap[pILInst->SrcReg[0].Index]; - pAsm->S[0].src.rtype = SRC_REG_INPUT; + pAsm->S[0].src.rtype = SRC_REG_GPR; } } @@ -1606,19 +1941,68 @@ GLboolean assemble_tex_instruction(r700_AssemblerBase *pAsm, GLboolean normalize texture_coordinate_source = &(pAsm->S[0].src); texture_unit_source = &(pAsm->S[1].src); - tex_instruction_ptr->m_Word0.f.tex_inst = pAsm->D.dst.opcode; - tex_instruction_ptr->m_Word0.f.bc_frac_mode = 0x0; - tex_instruction_ptr->m_Word0.f.fetch_whole_quad = 0x0; - tex_instruction_ptr->m_Word0.f.alt_const = 0; - - if(SPT_VP == pAsm->currentShaderType) + if(8 == pAsm->unAsic) /* evergreen */ { - tex_instruction_ptr->m_Word0.f.resource_id = texture_unit_source->reg + VERT_ATTRIB_MAX; - pAsm->unVetTexBits |= 1 << texture_unit_source->reg; + + SETfield(tex_instruction_ptr->m_Word0.val, pAsm->D.dst.opcode, + EG_TEX_WORD0__TEX_INST_shift, + EG_TEX_WORD0__TEX_INST_mask); + + if( (SQ_TEX_INST_GET_GRADIENTS_H == pAsm->D.dst.opcode) + ||(SQ_TEX_INST_GET_GRADIENTS_V == pAsm->D.dst.opcode) ) + { + /* Use fine texel derivative calculation rather than use quad derivative */ + SETfield(tex_instruction_ptr->m_Word0.val, 1, + EG_TEX_WORD0__INST_MOD_shift, + EG_TEX_WORD0__INST_MOD_mask); + } + else + { + SETfield(tex_instruction_ptr->m_Word0.val, 0, + EG_TEX_WORD0__INST_MOD_shift, + EG_TEX_WORD0__INST_MOD_mask); + } + + CLEARbit(tex_instruction_ptr->m_Word0.val, EG_TEX_WORD0__FWQ_bit); + + if(SPT_VP == pAsm->currentShaderType) + { + SETfield(tex_instruction_ptr->m_Word0.val, (texture_unit_source->reg + VERT_ATTRIB_MAX), + EG_TEX_WORD0__RESOURCE_ID_shift, + EG_TEX_WORD0__RESOURCE_ID_mask); + pAsm->unVetTexBits |= 1 << texture_unit_source->reg; + } + else + { + SETfield(tex_instruction_ptr->m_Word0.val, texture_unit_source->reg, + EG_TEX_WORD0__RESOURCE_ID_shift, + EG_TEX_WORD0__RESOURCE_ID_mask); + } + + CLEARbit(tex_instruction_ptr->m_Word0.val, EG_TEX_WORD0__ALT_CONST_bit); + SETfield(tex_instruction_ptr->m_Word0.val, 0, + EG_TEX_WORD0__RIM_shift, + EG_TEX_WORD0__RIM_mask); + SETfield(tex_instruction_ptr->m_Word0.val, 0, + EG_TEX_WORD0__SIM_shift, + EG_TEX_WORD0__SIM_mask); } else { - tex_instruction_ptr->m_Word0.f.resource_id = texture_unit_source->reg; + tex_instruction_ptr->m_Word0.f.tex_inst = pAsm->D.dst.opcode; + tex_instruction_ptr->m_Word0.f.bc_frac_mode = 0x0; + tex_instruction_ptr->m_Word0.f.fetch_whole_quad = 0x0; + tex_instruction_ptr->m_Word0.f.alt_const = 0; + + if(SPT_VP == pAsm->currentShaderType) + { + tex_instruction_ptr->m_Word0.f.resource_id = texture_unit_source->reg + VERT_ATTRIB_MAX; + pAsm->unVetTexBits |= 1 << texture_unit_source->reg; + } + else + { + tex_instruction_ptr->m_Word0.f.resource_id = texture_unit_source->reg; + } } tex_instruction_ptr->m_Word1.f.lod_bias = 0x0; @@ -1644,8 +2028,20 @@ GLboolean assemble_tex_instruction(r700_AssemblerBase *pAsm, GLboolean normalize if ( (pAsm->D.dst.rtype == DST_REG_TEMPORARY) || (pAsm->D.dst.rtype == DST_REG_OUT) ) { - tex_instruction_ptr->m_Word0.f.src_gpr = texture_coordinate_source->reg; - tex_instruction_ptr->m_Word0.f.src_rel = SQ_ABSOLUTE; + if(8 == pAsm->unAsic) /* evergreen */ + { + SETfield(tex_instruction_ptr->m_Word0.val, texture_coordinate_source->reg, + EG_TEX_WORD0__SRC_GPR_shift, + EG_TEX_WORD0__SRC_GPR_mask); + SETfield(tex_instruction_ptr->m_Word0.val, SQ_ABSOLUTE, + EG_TEX_WORD0__SRC_REL_shift, + EG_TEX_WORD0__SRC_REL_bit); + } + else + { + tex_instruction_ptr->m_Word0.f.src_gpr = texture_coordinate_source->reg; + tex_instruction_ptr->m_Word0.f.src_rel = SQ_ABSOLUTE; + } tex_instruction_ptr->m_Word1.f.dst_gpr = pAsm->D.dst.reg; tex_instruction_ptr->m_Word1.f.dst_rel = SQ_ABSOLUTE; @@ -1696,7 +2092,8 @@ void initialize(r700_AssemblerBase *pAsm) GLboolean assemble_alu_src(R700ALUInstruction* alu_instruction_ptr, int source_index, PVSSRC* pSource, - BITS scalar_channel_index) + BITS scalar_channel_index, + r700_AssemblerBase *pAsm) { BITS src_sel; BITS src_rel; @@ -1745,14 +2142,23 @@ GLboolean assemble_alu_src(R700ALUInstruction* alu_instruction_ptr, else { if ( (pSource->rtype == SRC_REG_TEMPORARY) || - (pSource->rtype == SRC_REG_INPUT) + (pSource->rtype == SRC_REG_GPR) ) { src_sel = pSource->reg; } else if (pSource->rtype == SRC_REG_CONSTANT) { - src_sel = pSource->reg + CFILE_REGISTER_OFFSET; + /* TODO : 4 const buffers */ + if(GL_TRUE == pAsm->bUseMemConstant) + { + src_sel = pSource->reg + SQ_ALU_SRC_KCACHE0_BASE; + pAsm->kcacheUsed = SQ_ALU_SRC_KCACHE0_BASE; + } + else + { + src_sel = pSource->reg + CFILE_REGISTER_OFFSET; + } } else if (pSource->rtype == SRC_REC_LITERAL) { @@ -1902,6 +2308,17 @@ GLboolean add_alu_instruction(r700_AssemblerBase* pAsm, pAsm->cf_current_alu_clause_ptr->m_Word1.f.count += (GetInstructionSize(alu_instruction_ptr->m_ShaderInstType) / 2); } + /* TODO : handle 4 bufs */ + if( (pAsm->kcacheUsed > 0) && (GL_TRUE == pAsm->bUseMemConstant) ) + { + pAsm->cf_current_alu_clause_ptr->m_Word0.f.kcache_bank0 = 0x0; + pAsm->cf_current_alu_clause_ptr->m_Word0.f.kcache_bank1 = 0x0; + pAsm->cf_current_alu_clause_ptr->m_Word0.f.kcache_mode0 = SQ_CF_KCACHE_LOCK_2; + pAsm->cf_current_alu_clause_ptr->m_Word1.f.kcache_mode1 = SQ_CF_KCACHE_NOP; + pAsm->cf_current_alu_clause_ptr->m_Word1.f.kcache_addr0 = 0x0; + pAsm->cf_current_alu_clause_ptr->m_Word1.f.kcache_addr1 = 0x0; + } + // If this clause constains any instruction that is forward dependent on a TEX instruction, // set the whole_quad_mode for this clause if ( pAsm->pInstDeps[pAsm->uiCurInst].nDstDep > (-1) ) @@ -1925,6 +2342,80 @@ GLboolean add_alu_instruction(r700_AssemblerBase* pAsm, return GL_TRUE; } +GLboolean EG_add_ps_interp(r700_AssemblerBase* pAsm) +{ + R700ALUInstruction * alu_instruction_ptr = NULL; + + int ui; + unsigned int uj; + unsigned int unWord0Temp = 0x380C00; + unsigned int unWord1Temp = 0x146B10; //SQ_SEL_X + + if(pAsm->uIIns > 0) + { + for(ui=(pAsm->uIIns-1); ui>=0; ui--) + { + for(uj=0; uj<8; uj++) + { + alu_instruction_ptr = (R700ALUInstruction*) CALLOC_STRUCT(R700ALUInstruction); + Init_R700ALUInstruction(alu_instruction_ptr); + alu_instruction_ptr->m_Word0.val = unWord0Temp; + alu_instruction_ptr->m_Word1.val = unWord1Temp; + + if(uj < 4) + { + SETfield(alu_instruction_ptr->m_Word1.val, EG_OP2_INST_INTERP_ZW, + EG_ALU_WORD1_OP2__ALU_INST_shift, EG_ALU_WORD1_OP2__ALU_INST_mask); + } + else + { + SETfield(alu_instruction_ptr->m_Word1.val, EG_OP2_INST_INTERP_XY, + EG_ALU_WORD1_OP2__ALU_INST_shift, EG_ALU_WORD1_OP2__ALU_INST_mask); + } + if( (uj > 1) && (uj < 6) ) + { + SETfield(alu_instruction_ptr->m_Word1.val, 1, + EG_ALU_WORD1_OP2__WRITE_MASK_shift, EG_ALU_WORD1_OP2__WRITE_MASK_bit); + } + else + { + SETfield(alu_instruction_ptr->m_Word1.val, 0, + EG_ALU_WORD1_OP2__WRITE_MASK_shift, EG_ALU_WORD1_OP2__WRITE_MASK_bit); + } + if( (uj > 1) && (uj < 6) ) + { + SETfield(alu_instruction_ptr->m_Word1.val, ui, + EG_ALU_WORD1__DST_GPR_shift, EG_ALU_WORD1__DST_GPR_mask); + } + else + { + SETfield(alu_instruction_ptr->m_Word1.val, 111, + EG_ALU_WORD1__DST_GPR_shift, EG_ALU_WORD1__DST_GPR_mask); + } + + SETfield(alu_instruction_ptr->m_Word1.val, (uj % 4), + EG_ALU_WORD1__DST_CHAN_shift, EG_ALU_WORD1__DST_CHAN_mask); + SETfield(alu_instruction_ptr->m_Word0.val, (1 - (uj % 2)), + EG_ALU_WORD0__SRC0_CHAN_shift, EG_ALU_WORD0__SRC0_CHAN_mask); + SETfield(alu_instruction_ptr->m_Word0.val, (EG_ALU_SRC_PARAM_BASE + ui), + EG_ALU_WORD0__SRC1_SEL_shift, EG_ALU_WORD0__SRC1_SEL_mask); + if(3 == (uj % 4)) + { + SETfield(alu_instruction_ptr->m_Word0.val, 1, + EG_ALU_WORD0__LAST_shift, EG_ALU_WORD0__LAST_bit); + } + + if(GL_FALSE == add_alu_instruction(pAsm, alu_instruction_ptr, 4) ) + { + return GL_FALSE; + } + } + } + } + + return GL_TRUE; +} + void get_src_properties(R700ALUInstruction* alu_instruction_ptr, int source_index, BITS* psrc_sel, @@ -2175,8 +2666,16 @@ GLboolean check_scalar(r700_AssemblerBase* pAsm, BITS src_neg [3] = {0,0,0}; GLuint swizzle_key; + GLuint number_of_operands; - GLuint number_of_operands = r700GetNumOperands(pAsm->D.dst.opcode, pAsm->D.dst.op3); + if(8 == pAsm->unAsic) + { + number_of_operands = EG_GetNumOperands(pAsm->D.dst.opcode, pAsm->D.dst.op3); + } + else + { + number_of_operands = r700GetNumOperands(pAsm->D.dst.opcode, pAsm->D.dst.op3); + } for (src=0; src<number_of_operands; src++) { @@ -2264,8 +2763,16 @@ GLboolean check_vector(r700_AssemblerBase* pAsm, BITS src_neg [3] = {0,0,0}; GLuint swizzle_key; + GLuint number_of_operands; - GLuint number_of_operands = r700GetNumOperands(pAsm->D.dst.opcode, pAsm->D.dst.op3); + if(8 == pAsm->unAsic) + { + number_of_operands = EG_GetNumOperands(pAsm->D.dst.opcode, pAsm->D.dst.op3); + } + else + { + number_of_operands = r700GetNumOperands(pAsm->D.dst.opcode, pAsm->D.dst.op3); + } for (src=0; src<number_of_operands; src++) { @@ -2345,12 +2852,23 @@ GLboolean assemble_alu_instruction(r700_AssemblerBase *pAsm) PVSSRC * pcurrent_source; int current_source_index; GLuint contiguous_slots_needed; + GLuint uNumSrc; + GLboolean bSplitInst; + + if(8 == pAsm->unAsic) + { + uNumSrc = EG_GetNumOperands(pAsm->D.dst.opcode, pAsm->D.dst.op3); + } + else + { + uNumSrc = r700GetNumOperands(pAsm->D.dst.opcode, pAsm->D.dst.op3); + } - GLuint uNumSrc = r700GetNumOperands(pAsm->D.dst.opcode, pAsm->D.dst.op3); //GLuint channel_swizzle, j; //GLuint chan_counter[4] = {0, 0, 0, 0}; //PVSSRC * pSource[3]; - GLboolean bSplitInst = GL_FALSE; + bSplitInst = GL_FALSE; + pAsm->kcacheUsed = 0; if (1 == pAsm->D.dst.math) { @@ -2384,7 +2902,7 @@ GLboolean assemble_alu_instruction(r700_AssemblerBase *pAsm) default: channel_swizzle = SQ_SEL_MASK; break; } if ( ((pSource[j]->rtype == SRC_REG_TEMPORARY) || - (pSource[j]->rtype == SRC_REG_INPUT)) + (pSource[j]->rtype == SRC_REG_GPR)) && (channel_swizzle <= SQ_SEL_W) ) { chan_counter[channel_swizzle]++; @@ -2449,7 +2967,8 @@ GLboolean assemble_alu_instruction(r700_AssemblerBase *pAsm) if (GL_FALSE == assemble_alu_src(alu_instruction_ptr, current_source_index, pcurrent_source, - scalar_channel_index) ) + scalar_channel_index, + pAsm) ) { return GL_FALSE; } @@ -2463,7 +2982,8 @@ GLboolean assemble_alu_instruction(r700_AssemblerBase *pAsm) if (GL_FALSE == assemble_alu_src(alu_instruction_ptr, current_source_index, pcurrent_source, - scalar_channel_index) ) + scalar_channel_index, + pAsm) ) { return GL_FALSE; } @@ -2546,7 +3066,8 @@ GLboolean assemble_alu_instruction(r700_AssemblerBase *pAsm) if ( GL_FALSE == assemble_alu_src(alu_instruction_ptr, current_source_index, pcurrent_source, - scalar_channel_index) ) + scalar_channel_index, + pAsm) ) { return GL_FALSE; } @@ -2987,7 +3508,14 @@ GLboolean assemble_DOT(r700_AssemblerBase *pAsm) return GL_FALSE; } - pAsm->D.dst.opcode = SQ_OP2_INST_DOT4; + if(8 == pAsm->unAsic) + { + pAsm->D.dst.opcode = EG_OP2_INST_DOT4; + } + else + { + pAsm->D.dst.opcode = SQ_OP2_INST_DOT4; + } if( GL_FALSE == assemble_dst(pAsm) ) { @@ -3004,7 +3532,14 @@ GLboolean assemble_DOT(r700_AssemblerBase *pAsm) return GL_FALSE; } - if(OPCODE_DP3 == pAsm->pILInst[pAsm->uiCurInst].Opcode) + if(OPCODE_DP2 == pAsm->pILInst[pAsm->uiCurInst].Opcode) + { + zerocomp_PVSSRC(&(pAsm->S[0].src),2); + zerocomp_PVSSRC(&(pAsm->S[0].src),3); + zerocomp_PVSSRC(&(pAsm->S[1].src),2); + zerocomp_PVSSRC(&(pAsm->S[1].src),3); + } + else if(OPCODE_DP3 == pAsm->pILInst[pAsm->uiCurInst].Opcode) { zerocomp_PVSSRC(&(pAsm->S[0].src), 3); zerocomp_PVSSRC(&(pAsm->S[1].src), 3); @@ -3062,6 +3597,11 @@ GLboolean assemble_DST(r700_AssemblerBase *pAsm) GLboolean assemble_EX2(r700_AssemblerBase *pAsm) { + if(8 == pAsm->unAsic) + { + return assemble_math_function(pAsm, EG_OP2_INST_EXP_IEEE); + } + return assemble_math_function(pAsm, SQ_OP2_INST_EXP_IEEE); } @@ -3094,7 +3634,14 @@ GLboolean assemble_EXP(r700_AssemblerBase *pAsm) return GL_FALSE; } - pAsm->D.dst.opcode = SQ_OP2_INST_EXP_IEEE; + if(8 == pAsm->unAsic) + { + pAsm->D.dst.opcode = EG_OP2_INST_EXP_IEEE; + } + else + { + pAsm->D.dst.opcode = SQ_OP2_INST_EXP_IEEE; + } pAsm->D.dst.math = 1; if( GL_FALSE == assemble_dst(pAsm) ) @@ -3143,7 +3690,14 @@ GLboolean assemble_EXP(r700_AssemblerBase *pAsm) // EX2 dst.z, a.x if ((pAsm->pILInst->DstReg.WriteMask >> 2) & 0x1) { - pAsm->D.dst.opcode = SQ_OP2_INST_EXP_IEEE; + if(8 == pAsm->unAsic) + { + pAsm->D.dst.opcode = EG_OP2_INST_EXP_IEEE; + } + else + { + pAsm->D.dst.opcode = SQ_OP2_INST_EXP_IEEE; + } pAsm->D.dst.math = 1; if( GL_FALSE == assemble_dst(pAsm) ) @@ -3218,6 +3772,11 @@ GLboolean assemble_FLR(r700_AssemblerBase *pAsm) GLboolean assemble_FLR_INT(r700_AssemblerBase *pAsm) { + if(8 == pAsm->unAsic) + { + return assemble_math_function(pAsm, EG_OP2_INST_FLT_TO_INT); + } + return assemble_math_function(pAsm, SQ_OP2_INST_FLT_TO_INT); } @@ -3300,6 +3859,11 @@ GLboolean assemble_KIL(r700_AssemblerBase *pAsm, GLuint opcode) GLboolean assemble_LG2(r700_AssemblerBase *pAsm) { + if(8 == pAsm->unAsic) + { + return assemble_math_function(pAsm, EG_OP2_INST_LOG_IEEE); + } + return assemble_math_function(pAsm, SQ_OP2_INST_LOG_IEEE); } @@ -3339,7 +3903,14 @@ GLboolean assemble_LRP(r700_AssemblerBase *pAsm) return GL_FALSE; } - pAsm->D.dst.opcode = SQ_OP3_INST_MULADD; + if(8 == pAsm->unAsic) + { + pAsm->D.dst.opcode = EG_OP3_INST_MULADD; + } + else + { + pAsm->D.dst.opcode = SQ_OP3_INST_MULADD; + } pAsm->D.dst.op3 = 1; pAsm->D.dst.rtype = DST_REG_TEMPORARY; @@ -3437,7 +4008,14 @@ GLboolean assemble_LOG(r700_AssemblerBase *pAsm) // LG2 tmp2.x, tmp1.x // FLOOR tmp3.x, tmp2.x - pAsm->D.dst.opcode = SQ_OP2_INST_LOG_IEEE; + if(8 == pAsm->unAsic) + { + pAsm->D.dst.opcode = EG_OP2_INST_LOG_IEEE; + } + else + { + pAsm->D.dst.opcode = SQ_OP2_INST_LOG_IEEE; + } pAsm->D.dst.math = 1; setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); @@ -3528,7 +4106,14 @@ GLboolean assemble_LOG(r700_AssemblerBase *pAsm) return GL_FALSE; } - pAsm->D.dst.opcode = SQ_OP2_INST_EXP_IEEE; + if(8 == pAsm->unAsic) + { + pAsm->D.dst.opcode = EG_OP2_INST_EXP_IEEE; + } + else + { + pAsm->D.dst.opcode = SQ_OP2_INST_EXP_IEEE; + } pAsm->D.dst.math = 1; if( GL_FALSE == assemble_dst(pAsm) ) @@ -3610,7 +4195,14 @@ GLboolean assemble_MAD(struct r700_AssemblerBase *pAsm) return GL_FALSE; } - pAsm->D.dst.opcode = SQ_OP3_INST_MULADD; + if(8 == pAsm->unAsic) + { + pAsm->D.dst.opcode = EG_OP3_INST_MULADD; + } + else + { + pAsm->D.dst.opcode = SQ_OP3_INST_MULADD; + } pAsm->D.dst.op3 = 1; tmp = (-1); @@ -3778,7 +4370,14 @@ GLboolean assemble_LIT(r700_AssemblerBase *pAsm) swizzleagain_PVSSRC(&(pAsm->S[0].src), SQ_SEL_Y, SQ_SEL_Y, SQ_SEL_Y, SQ_SEL_Y); /* dst.z = log(src.y) */ - pAsm->D.dst.opcode = SQ_OP2_INST_LOG_CLAMPED; + if(8 == pAsm->unAsic) + { + pAsm->D.dst.opcode = EG_OP2_INST_LOG_CLAMPED; + } + else + { + pAsm->D.dst.opcode = SQ_OP2_INST_LOG_CLAMPED; + } pAsm->D.dst.math = 1; pAsm->D.dst.rtype = dstType; pAsm->D.dst.reg = dstReg; @@ -3809,7 +4408,14 @@ GLboolean assemble_LIT(r700_AssemblerBase *pAsm) swizzleagain_PVSSRC(&(pAsm->S[2].src), SQ_SEL_X, SQ_SEL_X, SQ_SEL_X, SQ_SEL_X); /* tmp.x = amd MUL_LIT(src.w, dst.z, src.x ) */ - pAsm->D.dst.opcode = SQ_OP3_INST_MUL_LIT; + if(8 == pAsm->unAsic) + { + pAsm->D.dst.opcode = EG_OP3_INST_MUL_LIT; + } + else + { + pAsm->D.dst.opcode = SQ_OP3_INST_MUL_LIT; + } pAsm->D.dst.math = 1; pAsm->D.dst.op3 = 1; pAsm->D.dst.rtype = DST_REG_TEMPORARY; @@ -3842,7 +4448,14 @@ GLboolean assemble_LIT(r700_AssemblerBase *pAsm) } /* dst.z = exp(tmp.x) */ - pAsm->D.dst.opcode = SQ_OP2_INST_EXP_IEEE; + if(8 == pAsm->unAsic) + { + pAsm->D.dst.opcode = EG_OP2_INST_EXP_IEEE; + } + else + { + pAsm->D.dst.opcode = SQ_OP2_INST_EXP_IEEE; + } pAsm->D.dst.math = 1; pAsm->D.dst.rtype = dstType; pAsm->D.dst.reg = dstReg; @@ -3997,7 +4610,14 @@ GLboolean assemble_POW(r700_AssemblerBase *pAsm) tmp = gethelpr(pAsm); // LG2 tmp.x, a.swizzle - pAsm->D.dst.opcode = SQ_OP2_INST_LOG_IEEE; + if(8 == pAsm->unAsic) + { + pAsm->D.dst.opcode = EG_OP2_INST_LOG_IEEE; + } + else + { + pAsm->D.dst.opcode = SQ_OP2_INST_LOG_IEEE; + } pAsm->D.dst.math = 1; setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); @@ -4041,7 +4661,14 @@ GLboolean assemble_POW(r700_AssemblerBase *pAsm) // EX2 dst.mask, tmp.x // EX2 tmp.x, tmp.x - pAsm->D.dst.opcode = SQ_OP2_INST_EXP_IEEE; + if(8 == pAsm->unAsic) + { + pAsm->D.dst.opcode = EG_OP2_INST_EXP_IEEE; + } + else + { + pAsm->D.dst.opcode = SQ_OP2_INST_EXP_IEEE; + } pAsm->D.dst.math = 1; setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); @@ -4085,11 +4712,21 @@ GLboolean assemble_POW(r700_AssemblerBase *pAsm) GLboolean assemble_RCP(r700_AssemblerBase *pAsm) { + if(8 == pAsm->unAsic) + { + return assemble_math_function(pAsm, EG_OP2_INST_RECIP_IEEE); + } + return assemble_math_function(pAsm, SQ_OP2_INST_RECIP_IEEE); } GLboolean assemble_RSQ(r700_AssemblerBase *pAsm) { + if(8 == pAsm->unAsic) + { + return assemble_math_function(pAsm, EG_OP2_INST_RECIPSQRT_IEEE); + } + return assemble_math_function(pAsm, SQ_OP2_INST_RECIPSQRT_IEEE); } @@ -4175,7 +4812,14 @@ GLboolean assemble_SCS(r700_AssemblerBase *pAsm) } // COS dst.x, a.x - pAsm->D.dst.opcode = SQ_OP2_INST_COS; + if(8 == pAsm->unAsic) + { + pAsm->D.dst.opcode = EG_OP2_INST_COS; + } + else + { + pAsm->D.dst.opcode = SQ_OP2_INST_COS; + } pAsm->D.dst.math = 1; assemble_dst(pAsm); @@ -4194,7 +4838,14 @@ GLboolean assemble_SCS(r700_AssemblerBase *pAsm) } // SIN dst.y, a.x - pAsm->D.dst.opcode = SQ_OP2_INST_SIN; + if(8 == pAsm->unAsic) + { + pAsm->D.dst.opcode = EG_OP2_INST_SIN; + } + else + { + pAsm->D.dst.opcode = SQ_OP2_INST_SIN; + } pAsm->D.dst.math = 1; assemble_dst(pAsm); @@ -4349,6 +5000,65 @@ GLboolean assemble_SLT(r700_AssemblerBase *pAsm) return GL_TRUE; } +GLboolean assemble_SSG(r700_AssemblerBase *pAsm) +{ + checkop1(pAsm); + + GLuint tmp = gethelpr(pAsm); + /* tmp = (src > 0 ? 1 : src) */ + pAsm->D.dst.opcode = SQ_OP3_INST_CNDGT; + pAsm->D.dst.op3 = 1; + pAsm->D.dst.rtype = DST_REG_TEMPORARY; + pAsm->D.dst.reg = tmp; + + if( GL_FALSE == assemble_src(pAsm, 0, -1) ) + { + return GL_FALSE; + } + + setswizzle_PVSSRC(&(pAsm->S[1].src), SQ_SEL_1); + + if( GL_FALSE == assemble_src(pAsm, 0, 2) ) + { + return GL_FALSE; + } + + if( GL_FALSE == next_ins(pAsm) ) + { + return GL_FALSE; + } + + /* dst = (-tmp > 0 ? -1 : tmp) */ + pAsm->D.dst.opcode = SQ_OP3_INST_CNDGT; + pAsm->D.dst.op3 = 1; + + if( GL_FALSE == assemble_dst(pAsm) ) + { + return GL_FALSE; + } + + setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); + pAsm->S[0].src.rtype = SRC_REG_TEMPORARY; + pAsm->S[0].src.reg = tmp; + noswizzle_PVSSRC(&(pAsm->S[0].src)); + neg_PVSSRC(&(pAsm->S[0].src)); + + setswizzle_PVSSRC(&(pAsm->S[1].src), SQ_SEL_1); + neg_PVSSRC(&(pAsm->S[1].src)); + + setaddrmode_PVSSRC(&(pAsm->S[2].src), ADDR_ABSOLUTE); + pAsm->S[2].src.rtype = SRC_REG_TEMPORARY; + pAsm->S[2].src.reg = tmp; + noswizzle_PVSSRC(&(pAsm->S[2].src)); + + if( GL_FALSE == next_ins(pAsm) ) + { + return GL_FALSE; + } + + return GL_TRUE; +} + GLboolean assemble_STP(r700_AssemblerBase *pAsm) { return GL_TRUE; @@ -4387,7 +5097,14 @@ GLboolean assemble_TEX(r700_AssemblerBase *pAsm) if (pAsm->pILInst[pAsm->uiCurInst].Opcode == OPCODE_TXP) { GLuint tmp = gethelpr(pAsm); - pAsm->D.dst.opcode = SQ_OP2_INST_RECIP_IEEE; + if(8 == pAsm->unAsic) + { + pAsm->D.dst.opcode = EG_OP2_INST_RECIP_IEEE; + } + else + { + pAsm->D.dst.opcode = SQ_OP2_INST_RECIP_IEEE; + } pAsm->D.dst.math = 1; setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); pAsm->D.dst.rtype = DST_REG_TEMPORARY; @@ -4437,7 +5154,14 @@ GLboolean assemble_TEX(r700_AssemblerBase *pAsm) GLuint tmp2 = gethelpr(pAsm); /* tmp1.xyzw = CUBE(R0.zzxy, R0.yxzz) */ - pAsm->D.dst.opcode = SQ_OP2_INST_CUBE; + if(8 == pAsm->unAsic) + { + pAsm->D.dst.opcode = EG_OP2_INST_CUBE; + } + else + { + pAsm->D.dst.opcode = SQ_OP2_INST_CUBE; + } setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); pAsm->D.dst.rtype = DST_REG_TEMPORARY; pAsm->D.dst.reg = tmp1; @@ -4462,7 +5186,14 @@ GLboolean assemble_TEX(r700_AssemblerBase *pAsm) } /* tmp1.z = RCP_e(|tmp1.z|) */ - pAsm->D.dst.opcode = SQ_OP2_INST_RECIP_IEEE; + if(8 == pAsm->unAsic) + { + pAsm->D.dst.opcode = EG_OP2_INST_RECIP_IEEE; + } + else + { + pAsm->D.dst.opcode = SQ_OP2_INST_RECIP_IEEE; + } pAsm->D.dst.math = 1; setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); pAsm->D.dst.rtype = DST_REG_TEMPORARY; @@ -4481,7 +5212,14 @@ GLboolean assemble_TEX(r700_AssemblerBase *pAsm) * MULADD R0.y, R0.y, PS1, (0x3FC00000, 1.5f).x * muladd has no writemask, have to use another temp */ - pAsm->D.dst.opcode = SQ_OP3_INST_MULADD; + if(8 == pAsm->unAsic) + { + pAsm->D.dst.opcode = EG_OP3_INST_MULADD; + } + else + { + pAsm->D.dst.opcode = SQ_OP3_INST_MULADD; + } pAsm->D.dst.op3 = 1; setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); pAsm->D.dst.rtype = DST_REG_TEMPORARY; @@ -4668,7 +5406,14 @@ GLboolean assemble_XPD(r700_AssemblerBase *pAsm) return GL_FALSE; } - pAsm->D.dst.opcode = SQ_OP3_INST_MULADD; + if(8 == pAsm->unAsic) + { + pAsm->D.dst.opcode = EG_OP3_INST_MULADD; + } + else + { + pAsm->D.dst.opcode = SQ_OP3_INST_MULADD; + } pAsm->D.dst.op3 = 1; if(0xF != pAsm->pILInst[pAsm->uiCurInst].DstReg.WriteMask) @@ -4825,16 +5570,49 @@ GLboolean jumpToOffest(r700_AssemblerBase *pAsm, GLuint pops, GLint offset) return GL_FALSE; } - pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = pops; - pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0; - pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE; + if(8 == pAsm->unAsic) + { + SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val, + EG_CF_INST_JUMP, + EG_CF_WORD1__CF_INST_shift, EG_CF_WORD1__CF_INST_mask); + SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val, + pops, + EG_CF_WORD1__POP_COUNT_shift, EG_CF_WORD1__POP_COUNT_mask); + SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val, + 0, + EG_CF_WORD1__CF_CONST_shift, EG_CF_WORD1__CF_CONST_mask); + SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val, + SQ_CF_COND_ACTIVE, + EG_CF_WORD1__COND_shift, EG_CF_WORD1__COND_mask); + SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val, + 0, + EG_CF_WORD1__COUNT_shift, EG_CF_WORD1__COUNT_mask); + SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val, + 0, + EG_CF_WORD1__VPM_shift, EG_CF_WORD1__VPM_bit); + SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val, + 0, + EG_CF_WORD1__EOP_shift, EG_CF_WORD1__EOP_bit); + SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val, + 0, + EG_CF_WORD1__WQM_shift, EG_CF_WORD1__WQM_bit); + SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val, + 1, + EG_CF_WORD1__BARRIER_shift, EG_CF_WORD1__BARRIER_bit); + } + else + { + pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = pops; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE; - pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0; - pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0; - pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_JUMP; - pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_JUMP; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0; - pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1; + } pAsm->cf_current_cf_clause_ptr->m_Word0.f.addr = pAsm->cf_current_cf_clause_ptr->m_uIndex + offset; @@ -4848,17 +5626,50 @@ GLboolean pops(r700_AssemblerBase *pAsm, GLuint pops) return GL_FALSE; } - pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = pops; - pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0; - pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE; + if(8 == pAsm->unAsic) + { + SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val, + EG_CF_INST_POP, + EG_CF_WORD1__CF_INST_shift, EG_CF_WORD1__CF_INST_mask); + SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val, + pops, + EG_CF_WORD1__POP_COUNT_shift, EG_CF_WORD1__POP_COUNT_mask); + SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val, + 0, + EG_CF_WORD1__CF_CONST_shift, EG_CF_WORD1__CF_CONST_mask); + SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val, + SQ_CF_COND_ACTIVE, + EG_CF_WORD1__COND_shift, EG_CF_WORD1__COND_mask); + SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val, + 0, + EG_CF_WORD1__EOP_shift, EG_CF_WORD1__EOP_bit); + SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val, + 0, + EG_CF_WORD1__VPM_shift, EG_CF_WORD1__VPM_bit); + SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val, + 0, + EG_CF_WORD1__WQM_shift, EG_CF_WORD1__WQM_bit); + SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val, + 1, + EG_CF_WORD1__BARRIER_shift, EG_CF_WORD1__BARRIER_bit); + SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val, + 0, + EG_CF_WORD1__COUNT_shift, EG_CF_WORD1__COUNT_mask); + } + else + { + pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = pops; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE; - pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0; - pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0; - pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_POP; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_POP; - pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0; - pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1; + } pAsm->cf_current_cf_clause_ptr->m_Word0.f.addr = pAsm->cf_current_cf_clause_ptr->m_uIndex + 1; return GL_TRUE; @@ -4876,23 +5687,66 @@ GLboolean assemble_IF(r700_AssemblerBase *pAsm, GLboolean bHasElse) return GL_FALSE; } - if(GL_TRUE != bHasElse) - { - pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 1; + if(8 == pAsm->unAsic) + { + if(GL_TRUE != bHasElse) + { + SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val, + 1, + EG_CF_WORD1__POP_COUNT_shift, EG_CF_WORD1__POP_COUNT_mask); + } + else + { + SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val, + 0, + EG_CF_WORD1__POP_COUNT_shift, EG_CF_WORD1__POP_COUNT_mask); + } + + SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val, + EG_CF_INST_JUMP, + EG_CF_WORD1__CF_INST_shift, EG_CF_WORD1__CF_INST_mask); + SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val, + 0, + EG_CF_WORD1__CF_CONST_shift, EG_CF_WORD1__CF_CONST_mask); + SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val, + SQ_CF_COND_ACTIVE, + EG_CF_WORD1__COND_shift, EG_CF_WORD1__COND_mask); + SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val, + 0, + EG_CF_WORD1__EOP_shift, EG_CF_WORD1__EOP_bit); + SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val, + 0, + EG_CF_WORD1__VPM_shift, EG_CF_WORD1__VPM_bit); + SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val, + 0, + EG_CF_WORD1__WQM_shift, EG_CF_WORD1__WQM_bit); + SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val, + 1, + EG_CF_WORD1__BARRIER_shift, EG_CF_WORD1__BARRIER_bit); + SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val, + 0, + EG_CF_WORD1__COUNT_shift, EG_CF_WORD1__COUNT_mask); } else { - pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 0; - } - pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0; - pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE; + if(GL_TRUE != bHasElse) + { + pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 1; + } + else + { + pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 0; + } + pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE; - pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0; - pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0; - pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_JUMP; - pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_JUMP; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0; - pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1; + } pAsm->FCSP++; pAsm->fc_stack[pAsm->FCSP].type = FC_IF; @@ -4919,16 +5773,49 @@ GLboolean assemble_ELSE(r700_AssemblerBase *pAsm) return GL_FALSE; } - pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 1; /// - pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0; - pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE; + if(8 == pAsm->unAsic) + { + SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val, + 1, + EG_CF_WORD1__POP_COUNT_shift, EG_CF_WORD1__POP_COUNT_mask); + SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val, + EG_CF_INST_ELSE, + EG_CF_WORD1__CF_INST_shift, EG_CF_WORD1__CF_INST_mask); + SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val, + 0, + EG_CF_WORD1__CF_CONST_shift, EG_CF_WORD1__CF_CONST_mask); + SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val, + SQ_CF_COND_ACTIVE, + EG_CF_WORD1__COND_shift, EG_CF_WORD1__COND_mask); + SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val, + 0, + EG_CF_WORD1__EOP_shift, EG_CF_WORD1__EOP_bit); + SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val, + 0, + EG_CF_WORD1__VPM_shift, EG_CF_WORD1__VPM_bit); + SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val, + 0, + EG_CF_WORD1__WQM_shift, EG_CF_WORD1__WQM_bit); + SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val, + 1, + EG_CF_WORD1__BARRIER_shift, EG_CF_WORD1__BARRIER_bit); + SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val, + 0, + EG_CF_WORD1__COUNT_shift, EG_CF_WORD1__COUNT_mask); + } + else + { + pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 1; /// + pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE; - pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0; - pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0; - pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_ELSE; - pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_ELSE; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0; - pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1; + } pAsm->fc_stack[pAsm->FCSP].mid = (R700ControlFlowGenericClause **)_mesa_realloc( (void *)pAsm->fc_stack[pAsm->FCSP].mid, 0, @@ -4988,17 +5875,49 @@ GLboolean assemble_BGNLOOP(r700_AssemblerBase *pAsm) return GL_FALSE; } - - pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 0; - pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0; - pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE; + if(8 == pAsm->unAsic) + { + SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val, + 0, + EG_CF_WORD1__POP_COUNT_shift, EG_CF_WORD1__POP_COUNT_mask); + SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val, + EG_CF_INST_LOOP_START_NO_AL, + EG_CF_WORD1__CF_INST_shift, EG_CF_WORD1__CF_INST_mask); + SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val, + 0, + EG_CF_WORD1__CF_CONST_shift, EG_CF_WORD1__CF_CONST_mask); + SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val, + SQ_CF_COND_ACTIVE, + EG_CF_WORD1__COND_shift, EG_CF_WORD1__COND_mask); + SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val, + 0, + EG_CF_WORD1__EOP_shift, EG_CF_WORD1__EOP_bit); + SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val, + 0, + EG_CF_WORD1__VPM_shift, EG_CF_WORD1__VPM_bit); + SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val, + 0, + EG_CF_WORD1__WQM_shift, EG_CF_WORD1__WQM_bit); + SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val, + 1, + EG_CF_WORD1__BARRIER_shift, EG_CF_WORD1__BARRIER_bit); + SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val, + 0, + EG_CF_WORD1__COUNT_shift, EG_CF_WORD1__COUNT_mask); + } + else + { + pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 0; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE; - pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0; - pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0; - pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_LOOP_START_NO_AL; - pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_LOOP_START_NO_AL; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0; - pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1; + } pAsm->FCSP++; pAsm->fc_stack[pAsm->FCSP].type = FC_LOOP; @@ -5039,18 +5958,50 @@ GLboolean assemble_BRK(r700_AssemblerBase *pAsm) return GL_FALSE; } - - pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 1; - pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0; - pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE; + if(8 == pAsm->unAsic) + { + SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val, + 1, + EG_CF_WORD1__POP_COUNT_shift, EG_CF_WORD1__POP_COUNT_mask); + SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val, + EG_CF_INST_LOOP_BREAK, + EG_CF_WORD1__CF_INST_shift, EG_CF_WORD1__CF_INST_mask); + SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val, + 0, + EG_CF_WORD1__CF_CONST_shift, EG_CF_WORD1__CF_CONST_mask); + SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val, + SQ_CF_COND_ACTIVE, + EG_CF_WORD1__COND_shift, EG_CF_WORD1__COND_mask); + SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val, + 0, + EG_CF_WORD1__EOP_shift, EG_CF_WORD1__EOP_bit); + SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val, + 0, + EG_CF_WORD1__VPM_shift, EG_CF_WORD1__VPM_bit); + SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val, + 0, + EG_CF_WORD1__WQM_shift, EG_CF_WORD1__WQM_bit); + SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val, + 1, + EG_CF_WORD1__BARRIER_shift, EG_CF_WORD1__BARRIER_bit); + SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val, + 0, + EG_CF_WORD1__COUNT_shift, EG_CF_WORD1__COUNT_mask); + } + else + { + pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 1; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE; - pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0; - pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0; - pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_LOOP_BREAK; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_LOOP_BREAK; - pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0; - pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1; + } pAsm->fc_stack[unFCSP].mid = (R700ControlFlowGenericClause **)_mesa_realloc( (void *)pAsm->fc_stack[unFCSP].mid, @@ -5064,18 +6015,52 @@ GLboolean assemble_BRK(r700_AssemblerBase *pAsm) return GL_FALSE; } - pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 1; - pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0; - pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE; + if(8 == pAsm->unAsic) + { + SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val, + 1, + EG_CF_WORD1__POP_COUNT_shift, EG_CF_WORD1__POP_COUNT_mask); + SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val, + EG_CF_INST_POP, + EG_CF_WORD1__CF_INST_shift, EG_CF_WORD1__CF_INST_mask); + SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val, + 0, + EG_CF_WORD1__CF_CONST_shift, EG_CF_WORD1__CF_CONST_mask); + SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val, + SQ_CF_COND_ACTIVE, + EG_CF_WORD1__COND_shift, EG_CF_WORD1__COND_mask); + SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val, + 0, + EG_CF_WORD1__EOP_shift, EG_CF_WORD1__EOP_bit); + SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val, + 0, + EG_CF_WORD1__VPM_shift, EG_CF_WORD1__VPM_bit); + SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val, + 0, + EG_CF_WORD1__WQM_shift, EG_CF_WORD1__WQM_bit); + SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val, + 1, + EG_CF_WORD1__BARRIER_shift, EG_CF_WORD1__BARRIER_bit); + SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val, + 0, + EG_CF_WORD1__COUNT_shift, EG_CF_WORD1__COUNT_mask); + } + else + { + pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 1; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE; - pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0; - pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0; - pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_POP; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_POP; - pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0; - pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1; - pAsm->cf_current_cf_clause_ptr->m_Word0.f.addr = pAsm->cf_current_cf_clause_ptr->m_uIndex + 1; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1; + } + + pAsm->cf_current_cf_clause_ptr->m_Word0.f.addr = pAsm->cf_current_cf_clause_ptr->m_uIndex + 1; checkStackDepth(pAsm, FC_PUSH_VPM, GL_TRUE); @@ -5109,18 +6094,50 @@ GLboolean assemble_CONT(r700_AssemblerBase *pAsm) return GL_FALSE; } - - pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 1; - pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0; - pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE; + if(8 == pAsm->unAsic) + { + SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val, + 1, + EG_CF_WORD1__POP_COUNT_shift, EG_CF_WORD1__POP_COUNT_mask); + SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val, + EG_CF_INST_LOOP_CONTINUE, + EG_CF_WORD1__CF_INST_shift, EG_CF_WORD1__CF_INST_mask); + SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val, + 0, + EG_CF_WORD1__CF_CONST_shift, EG_CF_WORD1__CF_CONST_mask); + SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val, + SQ_CF_COND_ACTIVE, + EG_CF_WORD1__COND_shift, EG_CF_WORD1__COND_mask); + SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val, + 0, + EG_CF_WORD1__EOP_shift, EG_CF_WORD1__EOP_bit); + SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val, + 0, + EG_CF_WORD1__VPM_shift, EG_CF_WORD1__VPM_bit); + SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val, + 0, + EG_CF_WORD1__WQM_shift, EG_CF_WORD1__WQM_bit); + SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val, + 1, + EG_CF_WORD1__BARRIER_shift, EG_CF_WORD1__BARRIER_bit); + SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val, + 0, + EG_CF_WORD1__COUNT_shift, EG_CF_WORD1__COUNT_mask); + } + else + { + pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 1; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE; - pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0; - pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0; - pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_LOOP_CONTINUE; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_LOOP_CONTINUE; - pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0; - pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1; + } pAsm->fc_stack[unFCSP].mid = (R700ControlFlowGenericClause **)_mesa_realloc( (void *)pAsm->fc_stack[unFCSP].mid, @@ -5134,17 +6151,51 @@ GLboolean assemble_CONT(r700_AssemblerBase *pAsm) return GL_FALSE; } - pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 1; - pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0; - pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE; + if(8 == pAsm->unAsic) + { + SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val, + 1, + EG_CF_WORD1__POP_COUNT_shift, EG_CF_WORD1__POP_COUNT_mask); + SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val, + EG_CF_INST_POP, + EG_CF_WORD1__CF_INST_shift, EG_CF_WORD1__CF_INST_mask); + SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val, + 0, + EG_CF_WORD1__CF_CONST_shift, EG_CF_WORD1__CF_CONST_mask); + SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val, + SQ_CF_COND_ACTIVE, + EG_CF_WORD1__COND_shift, EG_CF_WORD1__COND_mask); + SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val, + 0, + EG_CF_WORD1__EOP_shift, EG_CF_WORD1__EOP_bit); + SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val, + 0, + EG_CF_WORD1__VPM_shift, EG_CF_WORD1__VPM_bit); + SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val, + 0, + EG_CF_WORD1__WQM_shift, EG_CF_WORD1__WQM_bit); + SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val, + 1, + EG_CF_WORD1__BARRIER_shift, EG_CF_WORD1__BARRIER_bit); + SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val, + 0, + EG_CF_WORD1__COUNT_shift, EG_CF_WORD1__COUNT_mask); + } + else + { + pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 1; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE; - pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0; - pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0; - pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_POP; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_POP; - pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0; + + pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1; + } - pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1; pAsm->cf_current_cf_clause_ptr->m_Word0.f.addr = pAsm->cf_current_cf_clause_ptr->m_uIndex + 1; checkStackDepth(pAsm, FC_PUSH_VPM, GL_TRUE); @@ -5163,17 +6214,49 @@ GLboolean assemble_ENDLOOP(r700_AssemblerBase *pAsm) return GL_FALSE; } - - pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 0; - pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0; - pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE; + if(8 == pAsm->unAsic) + { + SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val, + 0, + EG_CF_WORD1__POP_COUNT_shift, EG_CF_WORD1__POP_COUNT_mask); + SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val, + EG_CF_INST_LOOP_END, + EG_CF_WORD1__CF_INST_shift, EG_CF_WORD1__CF_INST_mask); + SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val, + 0, + EG_CF_WORD1__CF_CONST_shift, EG_CF_WORD1__CF_CONST_mask); + SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val, + SQ_CF_COND_ACTIVE, + EG_CF_WORD1__COND_shift, EG_CF_WORD1__COND_mask); + SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val, + 0, + EG_CF_WORD1__EOP_shift, EG_CF_WORD1__EOP_bit); + SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val, + 0, + EG_CF_WORD1__VPM_shift, EG_CF_WORD1__VPM_bit); + SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val, + 0, + EG_CF_WORD1__WQM_shift, EG_CF_WORD1__WQM_bit); + SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val, + 1, + EG_CF_WORD1__BARRIER_shift, EG_CF_WORD1__BARRIER_bit); + SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val, + 0, + EG_CF_WORD1__COUNT_shift, EG_CF_WORD1__COUNT_mask); + } + else + { + pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 0; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE; - pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0; - pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0; - pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_LOOP_END; - pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_LOOP_END; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0; - pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1; + } pAsm->cf_current_cf_clause_ptr->m_Word0.f.addr = pAsm->fc_stack[pAsm->FCSP].first->m_uIndex + 1; pAsm->fc_stack[pAsm->FCSP].first->m_Word0.f.addr = pAsm->cf_current_cf_clause_ptr->m_uIndex + 1; @@ -5235,17 +6318,51 @@ void add_return_inst(r700_AssemblerBase *pAsm) { return; } - //pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 1; - pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 0; - pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0; - pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE; - pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0; - pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0; - pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_RETURN; - pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0; + if(8 == pAsm->unAsic) + { + SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val, + 0, + EG_CF_WORD1__POP_COUNT_shift, EG_CF_WORD1__POP_COUNT_mask); + SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val, + EG_CF_INST_RETURN, + EG_CF_WORD1__CF_INST_shift, EG_CF_WORD1__CF_INST_mask); + SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val, + 0, + EG_CF_WORD1__CF_CONST_shift, EG_CF_WORD1__CF_CONST_mask); + SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val, + SQ_CF_COND_ACTIVE, + EG_CF_WORD1__COND_shift, EG_CF_WORD1__COND_mask); + SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val, + 0, + EG_CF_WORD1__EOP_shift, EG_CF_WORD1__EOP_bit); + SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val, + 0, + EG_CF_WORD1__VPM_shift, EG_CF_WORD1__VPM_bit); + SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val, + 0, + EG_CF_WORD1__WQM_shift, EG_CF_WORD1__WQM_bit); + SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val, + 1, + EG_CF_WORD1__BARRIER_shift, EG_CF_WORD1__BARRIER_bit); + SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val, + 0, + EG_CF_WORD1__COUNT_shift, EG_CF_WORD1__COUNT_mask); + } + else + { + //pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 1; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 0; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE; - pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_RETURN; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0; + + pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1; + } } GLboolean assemble_BGNSUB(r700_AssemblerBase *pAsm, GLint nILindex, GLuint uiIL_Shift) @@ -5368,17 +6485,50 @@ GLboolean assemble_CAL(r700_AssemblerBase *pAsm, return GL_FALSE; } - pAsm->cf_current_cf_clause_ptr->m_Word1.f.call_count = 1; - pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 0; - pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0; - pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE; + if(8 == pAsm->unAsic) + { + SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val, + 0, + EG_CF_WORD1__POP_COUNT_shift, EG_CF_WORD1__POP_COUNT_mask); + SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val, + EG_CF_INST_CALL, + EG_CF_WORD1__CF_INST_shift, EG_CF_WORD1__CF_INST_mask); + SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val, + 0, + EG_CF_WORD1__CF_CONST_shift, EG_CF_WORD1__CF_CONST_mask); + SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val, + SQ_CF_COND_ACTIVE, + EG_CF_WORD1__COND_shift, EG_CF_WORD1__COND_mask); + SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val, + 0, + EG_CF_WORD1__EOP_shift, EG_CF_WORD1__EOP_bit); + SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val, + 0, + EG_CF_WORD1__VPM_shift, EG_CF_WORD1__VPM_bit); + SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val, + 0, + EG_CF_WORD1__WQM_shift, EG_CF_WORD1__WQM_bit); + SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val, + 1, + EG_CF_WORD1__BARRIER_shift, EG_CF_WORD1__BARRIER_bit); + SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val, + 1, + EG_CF_WORD1__COUNT_shift, EG_CF_WORD1__COUNT_mask); + } + else + { + pAsm->cf_current_cf_clause_ptr->m_Word1.f.call_count = 1; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 0; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE; - pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0; - pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0; - pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_CALL; - pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_CALL; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0; - pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1; + } /* Put in caller */ if( (pAsm->unCallerArrayPointer + 1) > pAsm->unCallerArraySize ) @@ -5579,16 +6729,49 @@ GLboolean breakLoopOnFlag(r700_AssemblerBase *pAsm, GLuint unFCSP) return GL_FALSE; } - pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 1; - pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0; - pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE; + if(8 == pAsm->unAsic) + { + SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val, + 1, + EG_CF_WORD1__POP_COUNT_shift, EG_CF_WORD1__POP_COUNT_mask); + SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val, + EG_CF_INST_LOOP_BREAK, + EG_CF_WORD1__CF_INST_shift, EG_CF_WORD1__CF_INST_mask); + SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val, + 0, + EG_CF_WORD1__CF_CONST_shift, EG_CF_WORD1__CF_CONST_mask); + SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val, + SQ_CF_COND_ACTIVE, + EG_CF_WORD1__COND_shift, EG_CF_WORD1__COND_mask); + SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val, + 0, + EG_CF_WORD1__EOP_shift, EG_CF_WORD1__EOP_bit); + SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val, + 0, + EG_CF_WORD1__VPM_shift, EG_CF_WORD1__VPM_bit); + SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val, + 0, + EG_CF_WORD1__WQM_shift, EG_CF_WORD1__WQM_bit); + SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val, + 1, + EG_CF_WORD1__BARRIER_shift, EG_CF_WORD1__BARRIER_bit); + SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val, + 1, + EG_CF_WORD1__COUNT_shift, EG_CF_WORD1__COUNT_mask); + } + else + { + pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 1; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE; - pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0; - pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0; - pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_LOOP_BREAK; - pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_LOOP_BREAK; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0; - pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1; + } pAsm->fc_stack[unFCSP].mid = (R700ControlFlowGenericClause **)_mesa_realloc( (void *)pAsm->fc_stack[unFCSP].mid, @@ -5677,10 +6860,19 @@ GLboolean AssembleInstr(GLuint uiFirstInst, return GL_FALSE; break; case OPCODE_COS: - if ( GL_FALSE == assemble_TRIG(pR700AsmCode, SQ_OP2_INST_COS) ) - return GL_FALSE; + if(8 == pR700AsmCode->unAsic) + { + if ( GL_FALSE == assemble_TRIG(pR700AsmCode, EG_OP2_INST_COS) ) + return GL_FALSE; + } + else + { + if ( GL_FALSE == assemble_TRIG(pR700AsmCode, SQ_OP2_INST_COS) ) + return GL_FALSE; + } break; + case OPCODE_DP2: case OPCODE_DP3: case OPCODE_DP4: case OPCODE_DPH: @@ -5794,8 +6986,16 @@ GLboolean AssembleInstr(GLuint uiFirstInst, return GL_FALSE; break; case OPCODE_SIN: - if ( GL_FALSE == assemble_TRIG(pR700AsmCode, SQ_OP2_INST_SIN) ) - return GL_FALSE; + if(8 == pR700AsmCode->unAsic) + { + if ( GL_FALSE == assemble_TRIG(pR700AsmCode, EG_OP2_INST_SIN) ) + return GL_FALSE; + } + else + { + if ( GL_FALSE == assemble_TRIG(pR700AsmCode, SQ_OP2_INST_SIN) ) + return GL_FALSE; + } break; case OPCODE_SCS: if ( GL_FALSE == assemble_SCS(pR700AsmCode) ) @@ -5872,6 +7072,13 @@ GLboolean AssembleInstr(GLuint uiFirstInst, // return GL_FALSE; // break; + case OPCODE_SSG: + if ( GL_FALSE == assemble_SSG(pR700AsmCode) ) + { + return GL_FALSE; + } + break; + case OPCODE_SWZ: if ( GL_FALSE == assemble_MOV(pR700AsmCode) ) { @@ -6006,7 +7213,7 @@ GLboolean AssembleInstr(GLuint uiFirstInst, return GL_TRUE; default: - radeon_error("internal: unknown instruction\n"); + radeon_error("r600: unknown instruction %d\n", pILInst[i].Opcode); return GL_FALSE; } } @@ -6016,7 +7223,15 @@ GLboolean AssembleInstr(GLuint uiFirstInst, GLboolean InitShaderProgram(r700_AssemblerBase * pAsm) { +#ifndef GENERATE_SHADER_FOR_2D setRetInLoopFlag(pAsm, SQ_SEL_0); +#endif + + if((SPT_FP == pAsm->currentShaderType) && (8 == pAsm->unAsic)) + { + EG_add_ps_interp(pAsm); + } + pAsm->alu_x_opcode = SQ_CF_INST_ALU; return GL_TRUE; } @@ -6039,6 +7254,7 @@ GLboolean RelocProgram(r700_AssemblerBase * pAsm, struct gl_program * pILProg) plstCFmain = pAsm->CALLSTACK[0].plstCFInstructions_local; +#ifndef GENERATE_SHADER_FOR_2D /* remove flags init if they are not used */ if((pAsm->unCFflags & HAS_LOOPRET) == 0) { @@ -6069,6 +7285,7 @@ GLboolean RelocProgram(r700_AssemblerBase * pAsm, struct gl_program * pILProg) pInst = pInst->pNextInst; }; } +#endif /* GENERATE_SHADER_FOR_2D */ if(pAsm->CALLSTACK[0].max > 0) { @@ -6175,13 +7392,20 @@ GLboolean RelocProgram(r700_AssemblerBase * pAsm, struct gl_program * pILProg) } else { - if(pAsm->bR6xx) + if(8 == pAsm->unAsic) { - uNumSrc = r700GetNumOperands(pALU->m_Word1_OP2.f6.alu_inst, 0); + uNumSrc = EG_GetNumOperands(pALU->m_Word1_OP2.f.alu_inst, 0); } else { - uNumSrc = r700GetNumOperands(pALU->m_Word1_OP2.f.alu_inst, 0); + if(pAsm->bR6xx) + { + uNumSrc = r700GetNumOperands(pALU->m_Word1_OP2.f6.alu_inst, 0); + } + else + { + uNumSrc = r700GetNumOperands(pALU->m_Word1_OP2.f.alu_inst, 0); + } } if(2 == uNumSrc) { /* 2 srcs */ @@ -6472,12 +7696,42 @@ GLboolean Process_Export(r700_AssemblerBase* pAsm, pAsm->cf_current_export_clause_ptr->m_Word0.f.index_gpr = 0x0; pAsm->cf_current_export_clause_ptr->m_Word0.f.elem_size = 0x3; - pAsm->cf_current_export_clause_ptr->m_Word1.f.burst_count = (export_count - 1); - pAsm->cf_current_export_clause_ptr->m_Word1.f.end_of_program = 0x0; - pAsm->cf_current_export_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0; - pAsm->cf_current_export_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_EXPORT; // _DONE - pAsm->cf_current_export_clause_ptr->m_Word1.f.whole_quad_mode = 0x0; - pAsm->cf_current_export_clause_ptr->m_Word1.f.barrier = 0x1; + if(8 == pAsm->unAsic) + { + SETfield(pAsm->cf_current_export_clause_ptr->m_Word1.val, + (export_count - 1), + EG_CF_ALLOC_EXPORT_WORD1__BURST_COUNT_shift, + EG_CF_ALLOC_EXPORT_WORD1__BURST_COUNT_mask); + SETfield(pAsm->cf_current_export_clause_ptr->m_Word1.val, + 0, + EG_CF_ALLOC_EXPORT_WORD1__EOP_shift, + EG_CF_ALLOC_EXPORT_WORD1__EOP_bit); + SETfield(pAsm->cf_current_export_clause_ptr->m_Word1.val, + 0, + EG_CF_ALLOC_EXPORT_WORD1__VPM_shift, + EG_CF_ALLOC_EXPORT_WORD1__VPM_bit); + SETfield(pAsm->cf_current_export_clause_ptr->m_Word1.val, + EG_CF_INST_EXPORT, + EG_CF_WORD1__CF_INST_shift, + EG_CF_WORD1__CF_INST_mask); + SETfield(pAsm->cf_current_export_clause_ptr->m_Word1.val, + 0, + EG_CF_ALLOC_EXPORT_WORD1__MARK_shift, + EG_CF_ALLOC_EXPORT_WORD1__MARK_bit); + SETfield(pAsm->cf_current_export_clause_ptr->m_Word1.val, + 1, + EG_CF_ALLOC_EXPORT_WORD1__BARRIER_shift, + EG_CF_ALLOC_EXPORT_WORD1__BARRIER_bit); + } + else + { + pAsm->cf_current_export_clause_ptr->m_Word1.f.burst_count = (export_count - 1); + pAsm->cf_current_export_clause_ptr->m_Word1.f.end_of_program = 0x0; + pAsm->cf_current_export_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0; + pAsm->cf_current_export_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_EXPORT; // _DONE + pAsm->cf_current_export_clause_ptr->m_Word1.f.whole_quad_mode = 0x0; + pAsm->cf_current_export_clause_ptr->m_Word1.f.barrier = 0x1; + } if (export_count == 1) { @@ -6605,8 +7859,22 @@ GLboolean Process_Fragment_Exports(r700_AssemblerBase *pR700AsmCode, if(pR700AsmCode->cf_last_export_ptr != NULL) { - pR700AsmCode->cf_last_export_ptr->m_Word1.f.cf_inst = SQ_CF_INST_EXPORT_DONE; - pR700AsmCode->cf_last_export_ptr->m_Word1.f.end_of_program = 0x1; + if(8 == pR700AsmCode->unAsic) + { + SETfield(pR700AsmCode->cf_last_export_ptr->m_Word1.val, + 1, + EG_CF_ALLOC_EXPORT_WORD1__EOP_shift, + EG_CF_ALLOC_EXPORT_WORD1__EOP_bit); + SETfield(pR700AsmCode->cf_last_export_ptr->m_Word1.val, + EG_CF_INST_EXPORT_DONE, + EG_CF_WORD1__CF_INST_shift, + EG_CF_WORD1__CF_INST_mask); + } + else + { + pR700AsmCode->cf_last_export_ptr->m_Word1.f.cf_inst = SQ_CF_INST_EXPORT_DONE; + pR700AsmCode->cf_last_export_ptr->m_Word1.f.end_of_program = 0x1; + } } return GL_TRUE; @@ -6652,7 +7920,17 @@ GLboolean Process_Vertex_Exports(r700_AssemblerBase *pR700AsmCode, export_count--; } - pR700AsmCode->cf_last_export_ptr->m_Word1.f.cf_inst = SQ_CF_INST_EXPORT_DONE; + if(8 == pR700AsmCode->unAsic) + { + SETfield(pR700AsmCode->cf_last_export_ptr->m_Word1.val, + EG_CF_INST_EXPORT_DONE, + EG_CF_WORD1__CF_INST_shift, + EG_CF_WORD1__CF_INST_mask); + } + else + { + pR700AsmCode->cf_last_export_ptr->m_Word1.f.cf_inst = SQ_CF_INST_EXPORT_DONE; + } pR700AsmCode->number_of_exports = export_count; @@ -6747,7 +8025,17 @@ GLboolean Process_Vertex_Exports(r700_AssemblerBase *pR700AsmCode, // At least one param should be exported if (export_count) { - pR700AsmCode->cf_last_export_ptr->m_Word1.f.cf_inst = SQ_CF_INST_EXPORT_DONE; + if(8 == pR700AsmCode->unAsic) + { + SETfield(pR700AsmCode->cf_last_export_ptr->m_Word1.val, + EG_CF_INST_EXPORT_DONE, + EG_CF_WORD1__CF_INST_shift, + EG_CF_WORD1__CF_INST_mask); + } + else + { + pR700AsmCode->cf_last_export_ptr->m_Word1.f.cf_inst = SQ_CF_INST_EXPORT_DONE; + } } else { @@ -6765,7 +8053,17 @@ GLboolean Process_Vertex_Exports(r700_AssemblerBase *pR700AsmCode, pR700AsmCode->cf_last_export_ptr->m_Word1_SWIZ.f.sel_y = SQ_SEL_0; pR700AsmCode->cf_last_export_ptr->m_Word1_SWIZ.f.sel_z = SQ_SEL_0; pR700AsmCode->cf_last_export_ptr->m_Word1_SWIZ.f.sel_w = SQ_SEL_1; - pR700AsmCode->cf_last_export_ptr->m_Word1.f.cf_inst = SQ_CF_INST_EXPORT_DONE; + if(8 == pR700AsmCode->unAsic) + { + SETfield(pR700AsmCode->cf_last_export_ptr->m_Word1.val, + EG_CF_INST_EXPORT_DONE, + EG_CF_WORD1__CF_INST_shift, + EG_CF_WORD1__CF_INST_mask); + } + else + { + pR700AsmCode->cf_last_export_ptr->m_Word1.f.cf_inst = SQ_CF_INST_EXPORT_DONE; + } } pR700AsmCode->cf_last_export_ptr->m_Word1.f.end_of_program = 0x1; diff --git a/src/mesa/drivers/dri/r600/r700_assembler.h b/src/mesa/drivers/dri/r600/r700_assembler.h index dbc6cdb1903..d357b0e3ec0 100644 --- a/src/mesa/drivers/dri/r600/r700_assembler.h +++ b/src/mesa/drivers/dri/r600/r700_assembler.h @@ -108,7 +108,7 @@ typedef enum AddressMode typedef enum SrcRegisterType { SRC_REG_TEMPORARY = 0, - SRC_REG_INPUT = 1, + SRC_REG_GPR = 1, SRC_REG_CONSTANT = 2, SRC_REG_ALT_TEMPORARY = 3, SRC_REC_LITERAL = 4, @@ -464,6 +464,10 @@ typedef struct r700_AssemblerBase GLuint uiCurInst; GLubyte SamplerUnits[MAX_SAMPLERS]; GLboolean bR6xx; + + /* TODO : merge bR6xx */ + GLuint unAsic; + /* helper to decide which type of instruction to assemble */ GLboolean is_tex; /* we inserted helper intructions and need barrier on next TEX ins */ @@ -489,6 +493,9 @@ typedef struct r700_AssemblerBase GLuint shadow_regs[R700_MAX_TEXTURE_UNITS]; + GLboolean bUseMemConstant; + GLuint kcacheUsed; + } r700_AssemblerBase; //Internal use @@ -512,6 +519,8 @@ GLuint GetSurfaceFormat(GLenum eType, GLuint nChannels, GLuint * pClient_size); unsigned int r700GetNumOperands(GLuint opcode, GLuint nIsOp3); +unsigned int EG_GetNumOperands(GLuint opcode, GLuint nIsOp3); + GLboolean IsTex(gl_inst_opcode Opcode); GLboolean IsAlu(gl_inst_opcode Opcode); int check_current_clause(r700_AssemblerBase* pAsm, @@ -535,6 +544,18 @@ GLboolean assemble_vfetch_instruction2(r700_AssemblerBase* pAsm, GLboolean normalize, GLenum format, VTX_FETCH_METHOD * pFetchMethod); + +GLboolean EG_assemble_vfetch_instruction(r700_AssemblerBase* pAsm, + GLuint destination_register, + GLenum type, + GLint size, + GLubyte element, + GLuint _signed, + GLboolean normalize, + GLenum format, + VTX_FETCH_METHOD * pFetchMethod); +//----------------------- + GLboolean cleanup_vfetch_instructions(r700_AssemblerBase* pAsm); GLuint gethelpr(r700_AssemblerBase* pAsm); void resethelpr(r700_AssemblerBase* pAsm); @@ -553,8 +574,10 @@ GLboolean assemble_tex_instruction(r700_AssemblerBase *pAsm, GLboolean normalize void initialize(r700_AssemblerBase *pAsm); GLboolean assemble_alu_src(R700ALUInstruction* alu_instruction_ptr, int source_index, - PVSSRC* pSource, - BITS scalar_channel_index); + PVSSRC* pSource, + BITS scalar_channel_index, + r700_AssemblerBase *pAsm); + GLboolean add_alu_instruction(r700_AssemblerBase* pAsm, R700ALUInstruction* alu_instruction_ptr, GLuint contiguous_slots_needed); @@ -625,6 +648,7 @@ GLboolean assemble_LOGIC_PRED(r700_AssemblerBase *pAsm, BITS opcode); GLboolean assemble_TRIG(r700_AssemblerBase *pAsm, BITS opcode); GLboolean assemble_SLT(r700_AssemblerBase *pAsm); +GLboolean assemble_SSG(r700_AssemblerBase *pAsm); GLboolean assemble_STP(r700_AssemblerBase *pAsm); GLboolean assemble_TEX(r700_AssemblerBase *pAsm); GLboolean assemble_XPD(r700_AssemblerBase *pAsm); @@ -663,6 +687,7 @@ GLboolean callPreSub(r700_AssemblerBase* pAsm, COMPILED_SUB * pCompiledSub, GLshort uOutReg, GLshort uNumValidSrc); +GLboolean EG_add_ps_interp(r700_AssemblerBase* pAsm); //Interface GLboolean AssembleInstr(GLuint uiFirstInst, diff --git a/src/mesa/drivers/dri/r600/r700_chip.c b/src/mesa/drivers/dri/r600/r700_chip.c index bf8063391a2..71f1af75626 100644 --- a/src/mesa/drivers/dri/r600/r700_chip.c +++ b/src/mesa/drivers/dri/r600/r700_chip.c @@ -173,7 +173,6 @@ static void r700SetupVTXConstants(GLcontext * ctx, { context_t *context = R700_CONTEXT(ctx); struct radeon_aos * paos = (struct radeon_aos *)pAos; - unsigned int nVBsize; BATCH_LOCALS(&context->radeon); unsigned int uSQ_VTX_CONSTANT_WORD0_0; @@ -194,18 +193,8 @@ static void r700SetupVTXConstants(GLcontext * ctx, else r700SyncSurf(context, paos->bo, RADEON_GEM_DOMAIN_GTT, 0, VC_ACTION_ENA_bit); - if(0 == pStreamDesc->stride) - { - nVBsize = paos->count * pStreamDesc->size * getTypeSize(pStreamDesc->type); - } - else - { - nVBsize = (paos->count - 1) * pStreamDesc->stride - + pStreamDesc->size * getTypeSize(pStreamDesc->type); - } - uSQ_VTX_CONSTANT_WORD0_0 = paos->offset; - uSQ_VTX_CONSTANT_WORD1_0 = nVBsize - 1; + uSQ_VTX_CONSTANT_WORD1_0 = paos->bo->size - paos->offset - 1; SETfield(uSQ_VTX_CONSTANT_WORD2_0, 0, BASE_ADDRESS_HI_shift, BASE_ADDRESS_HI_mask); /* TODO */ SETfield(uSQ_VTX_CONSTANT_WORD2_0, pStreamDesc->stride, SQ_VTX_CONSTANT_WORD2_0__STRIDE_shift, @@ -721,6 +710,7 @@ static void r700SendPSState(GLcontext *ctx, struct radeon_state_atom *atom) context_t *context = R700_CONTEXT(ctx); R700_CHIP_CONTEXT *r700 = R700_CONTEXT_STATES(context); struct radeon_bo * pbo; + struct radeon_bo * pbo_const; BATCH_LOCALS(&context->radeon); radeon_print(RADEON_STATE, RADEON_VERBOSE, "%s\n", __func__); @@ -750,6 +740,9 @@ static void r700SendPSState(GLcontext *ctx, struct radeon_state_atom *atom) R600_OUT_BATCH_REGVAL(SQ_LOOP_CONST_0, 0x01000FFF); END_BATCH(); + pbo_const = (struct radeon_bo *)r700GetActiveFpShaderConstBo(GL_CONTEXT(context)); + //TODO : set up shader const + COMMIT_BATCH(); } @@ -759,13 +752,14 @@ static void r700SendVSState(GLcontext *ctx, struct radeon_state_atom *atom) context_t *context = R700_CONTEXT(ctx); R700_CHIP_CONTEXT *r700 = R700_CONTEXT_STATES(context); struct radeon_bo * pbo; + struct radeon_bo * pbo_const; BATCH_LOCALS(&context->radeon); radeon_print(RADEON_STATE, RADEON_VERBOSE, "%s\n", __func__); pbo = (struct radeon_bo *)r700GetActiveVpShaderBo(GL_CONTEXT(context)); if (!pbo) - return; + return; r700SyncSurf(context, pbo, RADEON_GEM_DOMAIN_GTT, 0, SH_ACTION_ENA_bit); @@ -788,6 +782,29 @@ static void r700SendVSState(GLcontext *ctx, struct radeon_state_atom *atom) //R600_OUT_BATCH_REGVAL((SQ_LOOP_CONST_0 + (SQ_LOOP_CONST_vs<2)), 0x0100000F); END_BATCH(); + /* TODO : handle 4 bufs */ + if(GL_TRUE == r700->bShaderUseMemConstant) + { + pbo_const = (struct radeon_bo *)r700GetActiveVpShaderConstBo(GL_CONTEXT(context)); + if(NULL != pbo_const) + { + r700SyncSurf(context, pbo_const, RADEON_GEM_DOMAIN_GTT, 0, SH_ACTION_ENA_bit); /* TODO : Check kc bit. */ + + BEGIN_BATCH_NO_AUTOSTATE(3); + R600_OUT_BATCH_REGVAL(SQ_ALU_CONST_BUFFER_SIZE_VS_0, (r700->vs.num_consts * 4)/16 ); + END_BATCH(); + + BEGIN_BATCH_NO_AUTOSTATE(3 + 2); + R600_OUT_BATCH_REGSEQ(SQ_ALU_CONST_CACHE_VS_0, 1); + R600_OUT_BATCH(r700->vs.SQ_ALU_CONST_CACHE_VS_0.u32All); + R600_OUT_BATCH_RELOC(r700->vs.SQ_ALU_CONST_CACHE_VS_0.u32All, + pbo_const, + r700->vs.SQ_ALU_CONST_CACHE_VS_0.u32All, + RADEON_GEM_DOMAIN_GTT, 0, 0); + END_BATCH(); + } + } + COMMIT_BATCH(); } @@ -1558,45 +1575,55 @@ static void r600_init_query_stateobj(radeonContextPtr radeon, int SZ) void r600InitAtoms(context_t *context) { - radeon_print(RADEON_STATE, RADEON_NORMAL, "%s %p\n", __func__, context); - context->radeon.hw.max_state_size = 10 + 5 + 14; /* start 3d, idle, cb/db flush */ - - /* Setup the atom linked list */ - make_empty_list(&context->radeon.hw.atomlist); - context->radeon.hw.atomlist.name = "atom-list"; - - ALLOC_STATE(sq, always, 34, r700SendSQConfig); - ALLOC_STATE(db, always, 17, r700SendDBState); - ALLOC_STATE(stencil, always, 4, r700SendStencilState); - ALLOC_STATE(db_target, always, 16, r700SendDepthTargetState); - ALLOC_STATE(sc, always, 15, r700SendSCState); - ALLOC_STATE(scissor, always, 22, r700SendScissorState); - ALLOC_STATE(aa, always, 12, r700SendAAState); - ALLOC_STATE(cl, always, 12, r700SendCLState); - ALLOC_STATE(gb, always, 6, r700SendGBState); - ALLOC_STATE(ucp, ucp, (R700_MAX_UCP * 6), r700SendUCPState); - ALLOC_STATE(su, always, 9, r700SendSUState); - ALLOC_STATE(poly, always, 10, r700SendPolyState); - ALLOC_STATE(cb, cb, 18, r700SendCBState); - ALLOC_STATE(clrcmp, always, 6, r700SendCBCLRCMPState); - ALLOC_STATE(cb_target, always, 31, r700SendRenderTargetState); - ALLOC_STATE(blnd, blnd, (6 + (R700_MAX_RENDER_TARGETS * 3)), r700SendCBBlendState); - ALLOC_STATE(blnd_clr, always, 6, r700SendCBBlendColorState); - ALLOC_STATE(sx, always, 9, r700SendSXState); - ALLOC_STATE(vgt, always, 41, r700SendVGTState); - ALLOC_STATE(spi, always, (59 + R700_MAX_SHADER_EXPORTS), r700SendSPIState); - ALLOC_STATE(vpt, always, 16, r700SendViewportState); - ALLOC_STATE(fs, always, 18, r700SendFSState); - ALLOC_STATE(vs, always, 21, r700SendVSState); - ALLOC_STATE(ps, always, 24, r700SendPSState); - ALLOC_STATE(vs_consts, vs_consts, (2 + (R700_MAX_DX9_CONSTS * 4)), r700SendVSConsts); - ALLOC_STATE(ps_consts, ps_consts, (2 + (R700_MAX_DX9_CONSTS * 4)), r700SendPSConsts); - ALLOC_STATE(vtx, vtx, (VERT_ATTRIB_MAX * 18), r700SendVTXState); - ALLOC_STATE(tx, tx, (R700_TEXTURE_NUMBERUNITS * 20), r700SendTexState); - ALLOC_STATE(tx_smplr, tx, (R700_TEXTURE_NUMBERUNITS * 5), r700SendTexSamplerState); - ALLOC_STATE(tx_brdr_clr, tx, (R700_TEXTURE_NUMBERUNITS * 6), r700SendTexBorderColorState); - r600_init_query_stateobj(&context->radeon, 6 * 2); - - context->radeon.hw.is_dirty = GL_TRUE; - context->radeon.hw.all_dirty = GL_TRUE; + R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&context->hw); + radeon_print(RADEON_STATE, RADEON_NORMAL, "%s %p\n", __func__, context); + context->radeon.hw.max_state_size = 10 + 5 + 14; /* start 3d, idle, cb/db flush */ + + /* Setup the atom linked list */ + make_empty_list(&context->radeon.hw.atomlist); + context->radeon.hw.atomlist.name = "atom-list"; + + ALLOC_STATE(sq, always, 34, r700SendSQConfig); + ALLOC_STATE(db, always, 17, r700SendDBState); + ALLOC_STATE(stencil, always, 4, r700SendStencilState); + ALLOC_STATE(db_target, always, 16, r700SendDepthTargetState); + ALLOC_STATE(sc, always, 15, r700SendSCState); + ALLOC_STATE(scissor, always, 22, r700SendScissorState); + ALLOC_STATE(aa, always, 12, r700SendAAState); + ALLOC_STATE(cl, always, 12, r700SendCLState); + ALLOC_STATE(gb, always, 6, r700SendGBState); + ALLOC_STATE(ucp, ucp, (R700_MAX_UCP * 6), r700SendUCPState); + ALLOC_STATE(su, always, 9, r700SendSUState); + ALLOC_STATE(poly, always, 10, r700SendPolyState); + ALLOC_STATE(cb, cb, 18, r700SendCBState); + ALLOC_STATE(clrcmp, always, 6, r700SendCBCLRCMPState); + ALLOC_STATE(cb_target, always, 31, r700SendRenderTargetState); + ALLOC_STATE(blnd, blnd, (6 + (R700_MAX_RENDER_TARGETS * 3)), r700SendCBBlendState); + ALLOC_STATE(blnd_clr, always, 6, r700SendCBBlendColorState); + ALLOC_STATE(sx, always, 9, r700SendSXState); + ALLOC_STATE(vgt, always, 41, r700SendVGTState); + ALLOC_STATE(spi, always, (59 + R700_MAX_SHADER_EXPORTS), r700SendSPIState); + ALLOC_STATE(vpt, always, 16, r700SendViewportState); + ALLOC_STATE(fs, always, 18, r700SendFSState); + if(GL_TRUE == r700->bShaderUseMemConstant) + { + ALLOC_STATE(vs, always, 36, r700SendVSState); + ALLOC_STATE(ps, always, 24, r700SendPSState); /* TODO : not imp yet, fix later. */ + } + else + { + ALLOC_STATE(vs, always, 21, r700SendVSState); + ALLOC_STATE(ps, always, 24, r700SendPSState); + ALLOC_STATE(vs_consts, vs_consts, (2 + (R700_MAX_DX9_CONSTS * 4)), r700SendVSConsts); + ALLOC_STATE(ps_consts, ps_consts, (2 + (R700_MAX_DX9_CONSTS * 4)), r700SendPSConsts); + } + + ALLOC_STATE(vtx, vtx, (VERT_ATTRIB_MAX * 18), r700SendVTXState); + ALLOC_STATE(tx, tx, (R700_TEXTURE_NUMBERUNITS * 20), r700SendTexState); + ALLOC_STATE(tx_smplr, tx, (R700_TEXTURE_NUMBERUNITS * 5), r700SendTexSamplerState); + ALLOC_STATE(tx_brdr_clr, tx, (R700_TEXTURE_NUMBERUNITS * 6), r700SendTexBorderColorState); + r600_init_query_stateobj(&context->radeon, 6 * 2); + + context->radeon.hw.is_dirty = GL_TRUE; + context->radeon.hw.all_dirty = GL_TRUE; } diff --git a/src/mesa/drivers/dri/r600/r700_chip.h b/src/mesa/drivers/dri/r600/r700_chip.h index 0b6b72f8501..ebf1840a795 100644 --- a/src/mesa/drivers/dri/r600/r700_chip.h +++ b/src/mesa/drivers/dri/r600/r700_chip.h @@ -43,6 +43,8 @@ #define SETbit(x, bit) ( (x) |= (bit) ) #define CLEARbit(x, bit) ( (x) &= ~(bit) ) +#define GETbits(x, shift, mask) ( ((x) & (mask)) >> (shift) ) + #define R700_TEXTURE_NUMBERUNITS 16 #define R700_MAX_RENDER_TARGETS 8 #define R700_MAX_VIEWPORTS 16 @@ -238,6 +240,9 @@ typedef struct _VS_STATE_STRUCT union UINT_FLOAT SQ_PGM_CF_OFFSET_VS ; /* 0xA234 */ GLboolean dirty; int num_consts; + + union UINT_FLOAT SQ_ALU_CONST_CACHE_VS_0; + union UINT_FLOAT consts[R700_MAX_DX9_CONSTS][4]; } VS_STATE_STRUCT; @@ -499,6 +504,8 @@ typedef struct _R700_CHIP_CONTEXT GLboolean bEnablePerspective; + GLboolean bShaderUseMemConstant; + } R700_CHIP_CONTEXT; #endif /* _R700_CHIP_H_ */ diff --git a/src/mesa/drivers/dri/r600/r700_fragprog.c b/src/mesa/drivers/dri/r600/r700_fragprog.c index f9d84b6ed68..6fdd93a3302 100644 --- a/src/mesa/drivers/dri/r600/r700_fragprog.c +++ b/src/mesa/drivers/dri/r600/r700_fragprog.c @@ -362,6 +362,9 @@ GLboolean r700TranslateFragmentShader(struct r700_fragment_program *fp, struct gl_fragment_program *mesa_fp, GLcontext *ctx) { + context_t *context = R700_CONTEXT(ctx); + R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&context->hw); + GLuint number_of_colors_exported; GLboolean z_enabled = GL_FALSE; GLuint unBit, shadow_unit; @@ -373,6 +376,17 @@ GLboolean r700TranslateFragmentShader(struct r700_fragment_program *fp, //Init_Program Init_r700_AssemblerBase( SPT_FP, &(fp->r700AsmCode), &(fp->r700Shader) ); + if(GL_TRUE == r700->bShaderUseMemConstant) + { + fp->r700AsmCode.bUseMemConstant = GL_TRUE; + } + else + { + fp->r700AsmCode.bUseMemConstant = GL_FALSE; + } + + fp->r700AsmCode.unAsic = 7; + if(mesa_fp->Base.InputsRead & FRAG_BIT_WPOS) { insert_wpos_code(ctx, mesa_fp); @@ -481,6 +495,14 @@ void * r700GetActiveFpShaderBo(GLcontext * ctx) return fp->shaderbo; } +void * r700GetActiveFpShaderConstBo(GLcontext * ctx) +{ + struct r700_fragment_program *fp = (struct r700_fragment_program *) + (ctx->FragmentProgram._Current); + + return fp->constbo0; +} + GLboolean r700SetupFragmentProgram(GLcontext * ctx) { context_t *context = R700_CONTEXT(ctx); @@ -768,6 +790,17 @@ GLboolean r700SetupFragmentProgram(GLcontext * ctx) r700->ps.consts[ui][2].f32All = paramList->ParameterValues[ui][2]; r700->ps.consts[ui][3].f32All = paramList->ParameterValues[ui][3]; } + + /* Load fp constants to gpu */ + if( (GL_TRUE == r700->bShaderUseMemConstant) && (unNumParamData > 0) ) + { + r600EmitShader(ctx, + &(fp->constbo0), + (GLvoid *)&(paramList->ParameterValues[0][0]), + unNumParamData * 4, + "FS Const"); + } + } else r700->ps.num_consts = 0; diff --git a/src/mesa/drivers/dri/r600/r700_fragprog.h b/src/mesa/drivers/dri/r600/r700_fragprog.h index 39c59c9201d..aaa6043d5d8 100644 --- a/src/mesa/drivers/dri/r600/r700_fragprog.h +++ b/src/mesa/drivers/dri/r600/r700_fragprog.h @@ -43,6 +43,9 @@ struct r700_fragment_program void * shaderbo; + GLuint k0used; + void * constbo0; + GLboolean WritesDepth; GLuint optimization; }; @@ -67,4 +70,6 @@ extern GLboolean r700SetupFragmentProgram(GLcontext * ctx); extern void * r700GetActiveFpShaderBo(GLcontext * ctx); +extern void * r700GetActiveFpShaderConstBo(GLcontext * ctx); + #endif /*_R700_FRAGPROG_H_*/ diff --git a/src/mesa/drivers/dri/r600/r700_oglprog.c b/src/mesa/drivers/dri/r600/r700_oglprog.c index 83517925115..e0c9179004d 100644 --- a/src/mesa/drivers/dri/r600/r700_oglprog.c +++ b/src/mesa/drivers/dri/r600/r700_oglprog.c @@ -48,6 +48,12 @@ static void freeVertProgCache(GLcontext *ctx, struct r700_vertex_program_cont *c tmp = vp->next; /* Release DMA region */ r600DeleteShader(ctx, vp->shaderbo); + + if(NULL != vp->constbo0) + { + r600DeleteShader(ctx, vp->constbo0); + } + /* Clean up */ Clean_Up_Assembler(&(vp->r700AsmCode)); Clean_Up_Shader(&(vp->r700Shader)); @@ -79,6 +85,7 @@ static struct gl_program *r700NewProgram(GLcontext * ctx, &vpc->mesa_program, target, id); + break; case GL_FRAGMENT_PROGRAM_NV: case GL_FRAGMENT_PROGRAM_ARB: @@ -92,6 +99,8 @@ static struct gl_program *r700NewProgram(GLcontext * ctx, fp->shaderbo = NULL; + fp->constbo0 = NULL; + break; default: _mesa_problem(ctx, "Bad target in r700NewProgram"); @@ -121,6 +130,11 @@ static void r700DeleteProgram(GLcontext * ctx, struct gl_program *prog) r600DeleteShader(ctx, fp->shaderbo); + if(NULL != fp->constbo0) + { + r600DeleteShader(ctx, fp->constbo0); + } + /* Clean up */ Clean_Up_Assembler(&(fp->r700AsmCode)); Clean_Up_Shader(&(fp->r700Shader)); @@ -145,6 +159,13 @@ r700ProgramStringNotify(GLcontext * ctx, GLenum target, struct gl_program *prog) break; case GL_FRAGMENT_PROGRAM_ARB: r600DeleteShader(ctx, fp->shaderbo); + + if(NULL != fp->constbo0) + { + r600DeleteShader(ctx, fp->constbo0); + fp->constbo0 = NULL; + } + Clean_Up_Assembler(&(fp->r700AsmCode)); Clean_Up_Shader(&(fp->r700Shader)); fp->translated = GL_FALSE; diff --git a/src/mesa/drivers/dri/r600/r700_render.c b/src/mesa/drivers/dri/r600/r700_render.c index c5771f9fd0b..f90c69c4166 100644 --- a/src/mesa/drivers/dri/r600/r700_render.c +++ b/src/mesa/drivers/dri/r600/r700_render.c @@ -644,6 +644,7 @@ static void r700SetupStreams(GLcontext *ctx, const struct gl_client_array *input #endif ) { + assert(count); r700ConvertAttrib(ctx, count, input[i], &context->stream_desc[index]); } else diff --git a/src/mesa/drivers/dri/r600/r700_state.c b/src/mesa/drivers/dri/r600/r700_state.c index 5ea8918611c..925b4ffe6dd 100644 --- a/src/mesa/drivers/dri/r600/r700_state.c +++ b/src/mesa/drivers/dri/r600/r700_state.c @@ -1580,7 +1580,16 @@ static void r700InitSQConfig(GLcontext * ctx) CLEARbit(r700->sq_config.SQ_CONFIG.u32All, VC_ENABLE_bit); else SETbit(r700->sq_config.SQ_CONFIG.u32All, VC_ENABLE_bit); - SETbit(r700->sq_config.SQ_CONFIG.u32All, DX9_CONSTS_bit); + + if(GL_TRUE == r700->bShaderUseMemConstant) + { + CLEARbit(r700->sq_config.SQ_CONFIG.u32All, DX9_CONSTS_bit); + } + else + { + SETbit(r700->sq_config.SQ_CONFIG.u32All, DX9_CONSTS_bit); + } + SETbit(r700->sq_config.SQ_CONFIG.u32All, ALU_INST_PREFER_VECTOR_bit); SETfield(r700->sq_config.SQ_CONFIG.u32All, ps_prio, PS_PRIO_shift, PS_PRIO_mask); SETfield(r700->sq_config.SQ_CONFIG.u32All, vs_prio, VS_PRIO_shift, VS_PRIO_mask); @@ -1689,8 +1698,9 @@ void r700InitState(GLcontext * ctx) //------------------- SETbit(r700->PA_SC_MODE_CNTL.u32All, FORCE_EOV_CNTDWN_ENABLE_bit); } - /* Do scale XY and Z by 1/W0. */ - r700->bEnablePerspective = GL_TRUE; + /* Do scale XY and Z by 1/W0. */ + r700->bEnablePerspective = GL_TRUE; + CLEARbit(r700->PA_CL_VTE_CNTL.u32All, VTX_XY_FMT_bit); CLEARbit(r700->PA_CL_VTE_CNTL.u32All, VTX_Z_FMT_bit); SETbit(r700->PA_CL_VTE_CNTL.u32All, VTX_W0_FMT_bit); diff --git a/src/mesa/drivers/dri/r600/r700_vertprog.c b/src/mesa/drivers/dri/r600/r700_vertprog.c index 6a2a09eaf1a..7ed4b7d2387 100644 --- a/src/mesa/drivers/dri/r600/r700_vertprog.c +++ b/src/mesa/drivers/dri/r600/r700_vertprog.c @@ -305,12 +305,17 @@ struct r700_vertex_program* r700TranslateVertexShader(GLcontext *ctx, struct gl_vertex_program *mesa_vp) { context_t *context = R700_CONTEXT(ctx); + + R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&context->hw); + struct r700_vertex_program *vp; unsigned int i; vp = calloc(1, sizeof(*vp)); vp->mesa_program = _mesa_clone_vertex_program(ctx, mesa_vp); + vp->constbo0 = NULL; + if (mesa_vp->IsPositionInvariant) { _mesa_insert_mvp_code(ctx, vp->mesa_program); @@ -331,6 +336,18 @@ struct r700_vertex_program* r700TranslateVertexShader(GLcontext *ctx, //Init_Program Init_r700_AssemblerBase(SPT_VP, &(vp->r700AsmCode), &(vp->r700Shader) ); + + if(GL_TRUE == r700->bShaderUseMemConstant) + { + vp->r700AsmCode.bUseMemConstant = GL_TRUE; + } + else + { + vp->r700AsmCode.bUseMemConstant = GL_FALSE; + } + + vp->r700AsmCode.unAsic = 7; + Map_Vertex_Program(ctx, vp, vp->mesa_program ); if(GL_FALSE == Find_Instruction_Dependencies_vp(vp, vp->mesa_program)) @@ -576,6 +593,17 @@ void * r700GetActiveVpShaderBo(GLcontext * ctx) return NULL; } +void * r700GetActiveVpShaderConstBo(GLcontext * ctx) +{ + context_t *context = R700_CONTEXT(ctx); + struct r700_vertex_program *vp = context->selected_vp;; + + if (vp) + return vp->constbo0; + else + return NULL; +} + GLboolean r700SetupVertexProgram(GLcontext * ctx) { context_t *context = R700_CONTEXT(ctx); @@ -600,6 +628,19 @@ GLboolean r700SetupVertexProgram(GLcontext * ctx) vp->r700Shader.uShaderBinaryDWORDSize, "VS"); + if(GL_TRUE == r700->bShaderUseMemConstant) + { + paramList = vp->mesa_program->Base.Parameters; + if(NULL != paramList) + { + unNumParamData = paramList->NumParameters; + r600AllocShaderConsts(ctx, + &(vp->constbo0), + unNumParamData *4*4, + "VSCON"); + } + } + vp->loaded = GL_TRUE; } @@ -616,7 +657,9 @@ GLboolean r700SetupVertexProgram(GLcontext * ctx) r700->vs.SQ_PGM_RESOURCES_VS.u32All = 0; SETbit(r700->vs.SQ_PGM_RESOURCES_VS.u32All, PGM_RESOURCES__PRIME_CACHE_ON_DRAW_bit); - r700->vs.SQ_PGM_START_VS.u32All = 0; /* set from buffer object. */ + r700->vs.SQ_ALU_CONST_CACHE_VS_0.u32All = 0; /* set from buffer object. */ + + r700->vs.SQ_PGM_START_VS.u32All = 0; SETfield(r700->vs.SQ_PGM_RESOURCES_VS.u32All, vp->r700Shader.nRegs + 1, NUM_GPRS_shift, NUM_GPRS_mask); @@ -687,6 +730,16 @@ GLboolean r700SetupVertexProgram(GLcontext * ctx) r700->vs.consts[ui][3].f32All = paramList->ParameterValues[ui][3]; } } + + /* Load vp constants to gpu */ + if(GL_TRUE == r700->bShaderUseMemConstant) + { + r600EmitShaderConsts(ctx, + vp->constbo0, + 0, + (GLvoid *)&(r700->vs.consts[0][0]), + unNumParamData * 4 * 4); + } } else r700->vs.num_consts = 0; diff --git a/src/mesa/drivers/dri/r600/r700_vertprog.h b/src/mesa/drivers/dri/r600/r700_vertprog.h index 645c9ac84aa..9acdc8e3501 100644 --- a/src/mesa/drivers/dri/r600/r700_vertprog.h +++ b/src/mesa/drivers/dri/r600/r700_vertprog.h @@ -56,6 +56,9 @@ struct r700_vertex_program void * shaderbo; + GLuint K0used; + void * constbo0; + ArrayDesc aos_desc[VERT_ATTRIB_MAX]; }; @@ -97,6 +100,8 @@ extern GLboolean r700SetupVertexProgram(GLcontext * ctx); extern void * r700GetActiveVpShaderBo(GLcontext * ctx); +extern void * r700GetActiveVpShaderConstBo(GLcontext * ctx); + extern int getTypeSize(GLenum type); #endif /* _R700_VERTPROG_H_ */ diff --git a/src/mesa/drivers/dri/radeon/radeon_chipset.h b/src/mesa/drivers/dri/radeon/radeon_chipset.h index 7d54fabebbc..61106fbc43f 100644 --- a/src/mesa/drivers/dri/radeon/radeon_chipset.h +++ b/src/mesa/drivers/dri/radeon/radeon_chipset.h @@ -400,6 +400,46 @@ #define PCI_CHIP_RV740_94B5 0x94B5 #define PCI_CHIP_RV740_94B9 0x94B9 +#define PCI_CHIP_CEDAR_68E0 0x68E0 +#define PCI_CHIP_CEDAR_68E1 0x68E1 +#define PCI_CHIP_CEDAR_68E4 0x68E4 +#define PCI_CHIP_CEDAR_68E5 0x68E5 +#define PCI_CHIP_CEDAR_68E8 0x68E8 +#define PCI_CHIP_CEDAR_68E9 0x68E9 +#define PCI_CHIP_CEDAR_68F1 0x68F1 +#define PCI_CHIP_CEDAR_68F8 0x68F8 +#define PCI_CHIP_CEDAR_68F9 0x68F9 +#define PCI_CHIP_CEDAR_68FE 0x68FE + +#define PCI_CHIP_REDWOOD_68C0 0x68C0 +#define PCI_CHIP_REDWOOD_68C1 0x68C1 +#define PCI_CHIP_REDWOOD_68C8 0x68C8 +#define PCI_CHIP_REDWOOD_68C9 0x68C9 +#define PCI_CHIP_REDWOOD_68D8 0x68D8 +#define PCI_CHIP_REDWOOD_68D9 0x68D9 +#define PCI_CHIP_REDWOOD_68DA 0x68DA +#define PCI_CHIP_REDWOOD_68DE 0x68DE + +#define PCI_CHIP_JUNIPER_68A0 0x68A0 +#define PCI_CHIP_JUNIPER_68A1 0x68A1 +#define PCI_CHIP_JUNIPER_68A8 0x68A8 +#define PCI_CHIP_JUNIPER_68A9 0x68A9 +#define PCI_CHIP_JUNIPER_68B0 0x68B0 +#define PCI_CHIP_JUNIPER_68B8 0x68B8 +#define PCI_CHIP_JUNIPER_68B9 0x68B9 +#define PCI_CHIP_JUNIPER_68BE 0x68BE + +#define PCI_CHIP_CYPRESS_6880 0x6880 +#define PCI_CHIP_CYPRESS_6888 0x6888 +#define PCI_CHIP_CYPRESS_6889 0x6889 +#define PCI_CHIP_CYPRESS_688A 0x688A +#define PCI_CHIP_CYPRESS_6898 0x6898 +#define PCI_CHIP_CYPRESS_6899 0x6899 +#define PCI_CHIP_CYPRESS_689E 0x689E + +#define PCI_CHIP_HEMLOCK_689C 0x689C +#define PCI_CHIP_HEMLOCK_689D 0x689D + enum { CHIP_FAMILY_R100, CHIP_FAMILY_RV100, @@ -438,6 +478,11 @@ enum { CHIP_FAMILY_RV730, CHIP_FAMILY_RV710, CHIP_FAMILY_RV740, + CHIP_FAMILY_CEDAR, + CHIP_FAMILY_REDWOOD, + CHIP_FAMILY_JUNIPER, + CHIP_FAMILY_CYPRESS, + CHIP_FAMILY_HEMLOCK, CHIP_FAMILY_LAST }; diff --git a/src/mesa/drivers/dri/radeon/radeon_common_context.c b/src/mesa/drivers/dri/radeon/radeon_common_context.c index 92663bf66d7..07f7cba354e 100644 --- a/src/mesa/drivers/dri/radeon/radeon_common_context.c +++ b/src/mesa/drivers/dri/radeon/radeon_common_context.c @@ -93,6 +93,11 @@ static const char* get_chip_family_name(int chip_family) case CHIP_FAMILY_RV730: return "RV730"; case CHIP_FAMILY_RV710: return "RV710"; case CHIP_FAMILY_RV740: return "RV740"; + case CHIP_FAMILY_CEDAR: return "CEDAR"; + case CHIP_FAMILY_REDWOOD: return "REDWOOD"; + case CHIP_FAMILY_JUNIPER: return "JUNIPER"; + case CHIP_FAMILY_CYPRESS: return "CYPRESS"; + case CHIP_FAMILY_HEMLOCK: return "HEMLOCK"; default: return "unknown"; } } diff --git a/src/mesa/drivers/dri/radeon/radeon_common_context.h b/src/mesa/drivers/dri/radeon/radeon_common_context.h index f06e5fdf244..024e31f8ec7 100644 --- a/src/mesa/drivers/dri/radeon/radeon_common_context.h +++ b/src/mesa/drivers/dri/radeon/radeon_common_context.h @@ -244,6 +244,8 @@ struct radeon_tex_obj { GLuint SQ_TEX_RESOURCE5; GLuint SQ_TEX_RESOURCE6; + GLuint SQ_TEX_RESOURCE7; + GLuint SQ_TEX_SAMPLER0; GLuint SQ_TEX_SAMPLER1; GLuint SQ_TEX_SAMPLER2; diff --git a/src/mesa/drivers/dri/radeon/radeon_fbo.c b/src/mesa/drivers/dri/radeon/radeon_fbo.c index 517485091a2..0597d4250de 100644 --- a/src/mesa/drivers/dri/radeon/radeon_fbo.c +++ b/src/mesa/drivers/dri/radeon/radeon_fbo.c @@ -609,6 +609,7 @@ radeon_validate_framebuffer(GLcontext *ctx, struct gl_framebuffer *fb) void radeon_fbo_init(struct radeon_context *radeon) { +#if FEATURE_EXT_framebuffer_object radeon->glCtx->Driver.NewFramebuffer = radeon_new_framebuffer; radeon->glCtx->Driver.NewRenderbuffer = radeon_new_renderbuffer; radeon->glCtx->Driver.BindFramebuffer = radeon_bind_framebuffer; @@ -617,7 +618,10 @@ void radeon_fbo_init(struct radeon_context *radeon) radeon->glCtx->Driver.FinishRenderTexture = radeon_finish_render_texture; radeon->glCtx->Driver.ResizeBuffers = radeon_resize_buffers; radeon->glCtx->Driver.ValidateFramebuffer = radeon_validate_framebuffer; +#endif +#if FEATURE_EXT_framebuffer_blit radeon->glCtx->Driver.BlitFramebuffer = _mesa_meta_BlitFramebuffer; +#endif } diff --git a/src/mesa/drivers/dri/radeon/radeon_mipmap_tree.c b/src/mesa/drivers/dri/radeon/radeon_mipmap_tree.c index c6e5f110ea3..ddfde3edaf7 100644 --- a/src/mesa/drivers/dri/radeon/radeon_mipmap_tree.c +++ b/src/mesa/drivers/dri/radeon/radeon_mipmap_tree.c @@ -199,10 +199,10 @@ static void calculate_miptree_layout_r300(radeonContextPtr rmesa, radeon_mipmap_ for(face = 0; face < mt->faces; face++) compute_tex_image_offset(rmesa, mt, face, level, &curOffset); - /* r600 cube levels seems to be aligned to 8 faces but - * we have separate register for 1'st level offset so add + /* from r700? cube levels seems to be aligned to 8 faces, + * as we have separate register for 1'st level offset add * 2 image alignment after 1'st mip level */ - if(rmesa->radeonScreen->chip_family >= CHIP_FAMILY_R600 && + if(rmesa->radeonScreen->chip_family >= CHIP_FAMILY_RV770 && mt->target == GL_TEXTURE_CUBE_MAP && level >= 1) curOffset += 2 * mt->levels[level].size; } diff --git a/src/mesa/drivers/dri/radeon/radeon_screen.c b/src/mesa/drivers/dri/radeon/radeon_screen.c index fa97a19302c..2ea77e56c7e 100644 --- a/src/mesa/drivers/dri/radeon/radeon_screen.c +++ b/src/mesa/drivers/dri/radeon/radeon_screen.c @@ -916,6 +916,61 @@ static int radeon_set_screen_flags(radeonScreenPtr screen, int device_id) screen->chip_flags = RADEON_CHIPSET_TCL; break; + case PCI_CHIP_CEDAR_68E0: + case PCI_CHIP_CEDAR_68E1: + case PCI_CHIP_CEDAR_68E4: + case PCI_CHIP_CEDAR_68E5: + case PCI_CHIP_CEDAR_68E8: + case PCI_CHIP_CEDAR_68E9: + case PCI_CHIP_CEDAR_68F1: + case PCI_CHIP_CEDAR_68F8: + case PCI_CHIP_CEDAR_68F9: + case PCI_CHIP_CEDAR_68FE: + screen->chip_family = CHIP_FAMILY_CEDAR; + screen->chip_flags = RADEON_CHIPSET_TCL; + break; + + case PCI_CHIP_REDWOOD_68C0: + case PCI_CHIP_REDWOOD_68C1: + case PCI_CHIP_REDWOOD_68C8: + case PCI_CHIP_REDWOOD_68C9: + case PCI_CHIP_REDWOOD_68D8: + case PCI_CHIP_REDWOOD_68D9: + case PCI_CHIP_REDWOOD_68DA: + case PCI_CHIP_REDWOOD_68DE: + screen->chip_family = CHIP_FAMILY_REDWOOD; + screen->chip_flags = RADEON_CHIPSET_TCL; + break; + + case PCI_CHIP_JUNIPER_68A0: + case PCI_CHIP_JUNIPER_68A1: + case PCI_CHIP_JUNIPER_68A8: + case PCI_CHIP_JUNIPER_68A9: + case PCI_CHIP_JUNIPER_68B0: + case PCI_CHIP_JUNIPER_68B8: + case PCI_CHIP_JUNIPER_68B9: + case PCI_CHIP_JUNIPER_68BE: + screen->chip_family = CHIP_FAMILY_JUNIPER; + screen->chip_flags = RADEON_CHIPSET_TCL; + break; + + case PCI_CHIP_CYPRESS_6880: + case PCI_CHIP_CYPRESS_6888: + case PCI_CHIP_CYPRESS_6889: + case PCI_CHIP_CYPRESS_688A: + case PCI_CHIP_CYPRESS_6898: + case PCI_CHIP_CYPRESS_6899: + case PCI_CHIP_CYPRESS_689E: + screen->chip_family = CHIP_FAMILY_CYPRESS; + screen->chip_flags = RADEON_CHIPSET_TCL; + break; + + case PCI_CHIP_HEMLOCK_689C: + case PCI_CHIP_HEMLOCK_689D: + screen->chip_family = CHIP_FAMILY_HEMLOCK; + screen->chip_flags = RADEON_CHIPSET_TCL; + break; + default: fprintf(stderr, "unknown chip id 0x%x, can't guess.\n", device_id); @@ -1116,7 +1171,7 @@ radeonCreateScreen( __DRIscreen *sPriv ) } } else - { + { screen->fbLocation = (temp & 0xffff) << 16; } } diff --git a/src/mesa/drivers/glslcompiler/Makefile b/src/mesa/drivers/glslcompiler/Makefile deleted file mode 100644 index 6da9f93f59a..00000000000 --- a/src/mesa/drivers/glslcompiler/Makefile +++ /dev/null @@ -1,43 +0,0 @@ -# Makefile for stand-alone GL-SL compiler - -TOP = ../../../.. - -include $(TOP)/configs/current - - -PROGRAM = glslcompiler - -OBJECTS = \ - glslcompiler.o \ - ../common/driverfuncs.o \ - ../../libmesa.a \ - $(TOP)/src/mapi/glapi/libglapi.a - -INCLUDES = \ - -I$(TOP)/include \ - -I$(TOP)/include/GL/internal \ - -I$(TOP)/src/mapi \ - -I$(TOP)/src/mesa \ - -I$(TOP)/src/mesa/main \ - -I$(TOP)/src/mesa/glapi \ - -I$(TOP)/src/mesa/math \ - -I$(TOP)/src/mesa/transform \ - -I$(TOP)/src/mesa/shader \ - -I$(TOP)/src/mesa/swrast \ - -I$(TOP)/src/mesa/swrast_setup \ - - -default: $(PROGRAM) - $(INSTALL) $(PROGRAM) $(TOP)/bin - - -glslcompiler: $(OBJECTS) - $(CC) $(OBJECTS) $(GL_LIB_DEPS) -o $@ - - -glslcompiler.o: glslcompiler.c - $(CC) -c $(INCLUDES) $(CFLAGS) glslcompiler.c -o $@ - - -clean: - -rm -f *.o *~ $(PROGRAM) diff --git a/src/mesa/drivers/glslcompiler/glslcompiler.c b/src/mesa/drivers/glslcompiler/glslcompiler.c deleted file mode 100644 index 7259bf4c560..00000000000 --- a/src/mesa/drivers/glslcompiler/glslcompiler.c +++ /dev/null @@ -1,436 +0,0 @@ -/* - * Mesa 3-D graphics library - * Version: 6.5.3 - * - * Copyright (C) 1999-2007 Brian Paul, Tungsten Graphics, Inc. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included - * in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN - * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - -/** - * \mainpage - * - * Stand-alone Shading Language compiler. - * Basically, a command-line program which accepts GLSL shaders and emits - * vertex/fragment programs (GPU instructions). - * - * This file is basically just a Mesa device driver but instead of building - * a shared library we build an executable. - * - * We can emit programs in three different formats: - * 1. ARB-style (GL_ARB_vertex/fragment_program) - * 2. NV-style (GL_NV_vertex/fragment_program) - * 3. debug-style (a slightly more sophisticated, internal format) - * - * Note that the ARB and NV program languages can't express all the - * features that might be used by a fragment program (examples being - * uniform and varying vars). So, the ARB/NV programs that are - * emitted aren't always legal programs in those languages. - */ - - -#include "main/imports.h" -#include "main/context.h" -#include "main/extensions.h" -#include "main/framebuffer.h" -#include "main/shaderapi.h" -#include "main/shaderobj.h" -#include "program/prog_print.h" -#include "drivers/common/driverfuncs.h" -#include "tnl/tnl.h" -#include "tnl/t_context.h" -#include "tnl/t_pipeline.h" -#include "swrast/swrast.h" -#include "swrast_setup/swrast_setup.h" -#include "vbo/vbo.h" - - -static const char *Prog = "glslcompiler"; - - -struct options { - GLboolean LineNumbers; - GLboolean Link; - gl_prog_print_mode Mode; - const char *VertFile; - const char *FragFile; - const char *GeoFile; - const char *OutputFile; - GLboolean Params; - struct gl_sl_pragmas Pragmas; -}; - -static struct options Options; - - -/** - * GLSL compiler driver context. (kind of an artificial thing for now) - */ -struct compiler_context -{ - GLcontext MesaContext; - int foo; -}; - -typedef struct compiler_context CompilerContext; - - - -static void -UpdateState(GLcontext *ctx, GLuint new_state) -{ - /* easy - just propogate */ - _swrast_InvalidateState( ctx, new_state ); - _swsetup_InvalidateState( ctx, new_state ); - _tnl_InvalidateState( ctx, new_state ); - _vbo_InvalidateState( ctx, new_state ); -} - - - -static GLboolean -CreateContext(void) -{ - struct dd_function_table ddFuncs; - GLvisual *vis; - GLframebuffer *buf; - GLcontext *ctx; - CompilerContext *cc; - - vis = _mesa_create_visual(GL_FALSE, GL_FALSE, /* RGB */ - 8, 8, 8, 8, /* color */ - 0, 0, /* z, stencil */ - 0, 0, 0, 0, 1); /* accum */ - buf = _mesa_create_framebuffer(vis); - - cc = calloc(1, sizeof(*cc)); - if (!vis || !buf || !cc) { - if (vis) - _mesa_destroy_visual(vis); - if (buf) - _mesa_destroy_framebuffer(buf); - free(cc); - return GL_FALSE; - } - - _mesa_init_driver_functions(&ddFuncs); - ddFuncs.GetString = NULL;/*get_string;*/ - ddFuncs.UpdateState = UpdateState; - ddFuncs.GetBufferSize = NULL; - - ctx = &cc->MesaContext; - _mesa_initialize_context(ctx, vis, NULL, &ddFuncs, cc); - _mesa_enable_sw_extensions(ctx); - - if (!_swrast_CreateContext( ctx ) || - !_vbo_CreateContext( ctx ) || - !_tnl_CreateContext( ctx ) || - !_swsetup_CreateContext( ctx )) { - _mesa_destroy_visual(vis); - _mesa_destroy_framebuffer(buf); - _mesa_free_context_data(ctx); - free(cc); - return GL_FALSE; - } - TNL_CONTEXT(ctx)->Driver.RunPipeline = _tnl_run_pipeline; - _swsetup_Wakeup( ctx ); - - /* Override the context's default pragma settings */ - ctx->Shader.DefaultPragmas = Options.Pragmas; - - _mesa_make_current(ctx, buf, buf); - - return GL_TRUE; -} - - -static void -LoadAndCompileShader(GLuint shader, const char *text) -{ - GLint stat; - _mesa_ShaderSourceARB(shader, 1, (const GLchar **) &text, NULL); - _mesa_CompileShaderARB(shader); - _mesa_GetShaderiv(shader, GL_COMPILE_STATUS, &stat); - if (!stat) { - GLchar log[1000]; - GLsizei len; - _mesa_GetShaderInfoLog(shader, 1000, &len, log); - fprintf(stderr, "%s: problem compiling shader: %s\n", Prog, log); - exit(1); - } - else { - printf("Shader compiled OK\n"); - } -} - - -/** - * Read a shader from a file. - */ -static void -ReadShader(GLuint shader, const char *filename) -{ - const int max = 100*1000; - int n; - char *buffer = (char*) malloc(max); - FILE *f = fopen(filename, "r"); - if (!f) { - fprintf(stderr, "%s: Unable to open shader file %s\n", Prog, filename); - exit(1); - } - - n = fread(buffer, 1, max, f); - /* - printf("%s: read %d bytes from shader file %s\n", Prog, n, filename); - */ - if (n > 0) { - buffer[n] = 0; - LoadAndCompileShader(shader, buffer); - } - - fclose(f); - free(buffer); -} - - -static void -CheckLink(GLuint v_shader, GLuint f_shader) -{ - GLuint prog; - GLint stat; - - prog = _mesa_CreateProgram(); - - _mesa_AttachShader(prog, v_shader); - _mesa_AttachShader(prog, f_shader); - - _mesa_LinkProgramARB(prog); - _mesa_GetProgramiv(prog, GL_LINK_STATUS, &stat); - if (!stat) { - GLchar log[1000]; - GLsizei len; - _mesa_GetProgramInfoLog(prog, 1000, &len, log); - fprintf(stderr, "Linker error:\n%s\n", log); - } - else { - fprintf(stderr, "Link success!\n"); - } -} - - -static void -PrintShaderInstructions(GLuint shader, FILE *f) -{ - GET_CURRENT_CONTEXT(ctx); - struct gl_shader *sh = _mesa_lookup_shader(ctx, shader); - struct gl_program *prog = sh->Program; - _mesa_fprint_program_opt(stdout, prog, Options.Mode, Options.LineNumbers); - if (Options.Params) - _mesa_print_program_parameters(ctx, prog); -} - - -static GLuint -CompileShader(const char *filename, GLenum type) -{ - GLuint shader; - - assert(type == GL_FRAGMENT_SHADER || - type == GL_VERTEX_SHADER || - type == GL_GEOMETRY_SHADER_ARB); - - shader = _mesa_CreateShader(type); - ReadShader(shader, filename); - - return shader; -} - - -static void -Usage(void) -{ - printf("Mesa GLSL stand-alone compiler\n"); - printf("Usage:\n"); - printf(" --vs FILE vertex shader input filename\n"); - printf(" --fs FILE fragment shader input filename\n"); - printf(" --gs FILE geometry shader input filename\n"); - printf(" --arb emit ARB-style instructions\n"); - printf(" --nv emit NV-style instructions\n"); - printf(" --link run linker\n"); - printf(" --debug force #pragma debug(on)\n"); - printf(" --nodebug force #pragma debug(off)\n"); - printf(" --opt force #pragma optimize(on)\n"); - printf(" --noopt force #pragma optimize(off)\n"); - printf(" --number, -n emit line numbers (if --arb or --nv)\n"); - printf(" --output, -o FILE output filename\n"); - printf(" --params also emit program parameter info\n"); - printf(" --help display this information\n"); -} - - -static void -ParseOptions(int argc, char *argv[]) -{ - int i; - - Options.LineNumbers = GL_FALSE; - Options.Mode = PROG_PRINT_DEBUG; - Options.VertFile = NULL; - Options.FragFile = NULL; - Options.GeoFile = NULL; - Options.OutputFile = NULL; - Options.Params = GL_FALSE; - Options.Pragmas.IgnoreOptimize = GL_FALSE; - Options.Pragmas.IgnoreDebug = GL_FALSE; - Options.Pragmas.Debug = GL_FALSE; - Options.Pragmas.Optimize = GL_TRUE; - - if (argc == 1) { - Usage(); - exit(0); - } - - for (i = 1; i < argc; i++) { - if (strcmp(argv[i], "--vs") == 0) { - Options.VertFile = argv[i + 1]; - i++; - } - else if (strcmp(argv[i], "--fs") == 0) { - Options.FragFile = argv[i + 1]; - i++; - } - else if (strcmp(argv[i], "--gs") == 0) { - Options.GeoFile = argv[i + 1]; - i++; - } - else if (strcmp(argv[i], "--arb") == 0) { - Options.Mode = PROG_PRINT_ARB; - } - else if (strcmp(argv[i], "--nv") == 0) { - Options.Mode = PROG_PRINT_NV; - } - else if (strcmp(argv[i], "--link") == 0) { - Options.Link = GL_TRUE; - } - else if (strcmp(argv[i], "--debug") == 0) { - Options.Pragmas.IgnoreDebug = GL_TRUE; - Options.Pragmas.Debug = GL_TRUE; - } - else if (strcmp(argv[i], "--nodebug") == 0) { - Options.Pragmas.IgnoreDebug = GL_TRUE; - Options.Pragmas.Debug = GL_FALSE; - } - else if (strcmp(argv[i], "--opt") == 0) { - Options.Pragmas.IgnoreOptimize = GL_TRUE; - Options.Pragmas.Optimize = GL_TRUE; - } - else if (strcmp(argv[i], "--noopt") == 0) { - Options.Pragmas.IgnoreOptimize = GL_TRUE; - Options.Pragmas.Optimize = GL_FALSE; - } - else if (strcmp(argv[i], "--number") == 0 || - strcmp(argv[i], "-n") == 0) { - Options.LineNumbers = GL_TRUE; - } - else if (strcmp(argv[i], "--output") == 0 || - strcmp(argv[i], "-o") == 0) { - Options.OutputFile = argv[i + 1]; - i++; - } - else if (strcmp(argv[i], "--params") == 0) { - Options.Params = GL_TRUE; - } - else if (strcmp(argv[i], "--help") == 0) { - Usage(); - exit(0); - } - else { - printf("Unknown option: %s\n", argv[i]); - Usage(); - exit(1); - } - } - - if (Options.Mode == PROG_PRINT_DEBUG) { - /* always print line numbers when emitting debug-style output */ - Options.LineNumbers = GL_TRUE; - } -} - - -int -main(int argc, char *argv[]) -{ - GLuint v_shader = 0, f_shader = 0, g_shader = 0; - - ParseOptions(argc, argv); - - if (!CreateContext()) { - fprintf(stderr, "%s: Failed to create compiler context\n", Prog); - exit(1); - } - - if (Options.VertFile) { - v_shader = CompileShader(Options.VertFile, GL_VERTEX_SHADER); - } - - if (Options.FragFile) { - f_shader = CompileShader(Options.FragFile, GL_FRAGMENT_SHADER); - } - - if (Options.GeoFile) { - g_shader = CompileShader(Options.GeoFile, GL_GEOMETRY_SHADER_ARB); - } - - - if (v_shader || f_shader || g_shader) { - if (Options.OutputFile) { - FILE *f; - fclose(stdout); - /*stdout =*/ f = freopen(Options.OutputFile, "w", stdout); - if (!f) { - fprintf(stderr, "freopen error\n"); - } - } - if (stdout && v_shader) { - PrintShaderInstructions(v_shader, stdout); - } - if (stdout && f_shader) { - PrintShaderInstructions(f_shader, stdout); - } - if (stdout && g_shader) { - PrintShaderInstructions(g_shader, stdout); - } - if (Options.OutputFile) { - fclose(stdout); - } - } - - if (Options.Link) { - if (!v_shader || !f_shader) { - fprintf(stderr, - "--link option requires both a vertex and fragment shader.\n"); - exit(1); - } - - CheckLink(v_shader, f_shader); - } - - return 0; -} diff --git a/src/mesa/drivers/osmesa/Makefile b/src/mesa/drivers/osmesa/Makefile index c6b4a040851..39ab09af805 100644 --- a/src/mesa/drivers/osmesa/Makefile +++ b/src/mesa/drivers/osmesa/Makefile @@ -23,8 +23,7 @@ INCLUDE_DIRS = \ CORE_MESA = \ $(TOP)/src/mesa/libmesa.a \ $(TOP)/src/mapi/glapi/libglapi.a \ - $(TOP)/src/glsl/cl/libglslcl.a \ - $(TOP)/src/glsl/pp/libglslpp.a + $(TOP)/src/glsl/libglsl.a .c.o: $(CC) -c $(INCLUDE_DIRS) $(CFLAGS) $< -o $@ @@ -37,9 +36,9 @@ default: $(TOP)/$(LIB_DIR)/$(OSMESA_LIB_NAME) # sources. We can also build libOSMesa16/libOSMesa32 by setting # -DCHAN_BITS=16/32. $(TOP)/$(LIB_DIR)/$(OSMESA_LIB_NAME): $(OBJECTS) $(CORE_MESA) - $(MKLIB) -o $(OSMESA_LIB) -linker '$(CC)' -ldflags '$(LDFLAGS)' \ + $(MKLIB) -o $(OSMESA_LIB) -linker '$(CXX)' -ldflags '$(LDFLAGS)' \ -major $(MESA_MAJOR) -minor $(MESA_MINOR) -patch $(MESA_TINY) \ - -install $(TOP)/$(LIB_DIR) $(MKLIB_OPTIONS) \ + -install $(TOP)/$(LIB_DIR) -cplusplus $(MKLIB_OPTIONS) \ -id $(INSTALL_LIB_DIR)/lib$(OSMESA_LIB).$(MESA_MAJOR).dylib \ $(OSMESA_LIB_DEPS) $(OBJECTS) $(CORE_MESA) diff --git a/src/mesa/drivers/x11/Makefile b/src/mesa/drivers/x11/Makefile index b5b0c1f11a8..f759da0a979 100644 --- a/src/mesa/drivers/x11/Makefile +++ b/src/mesa/drivers/x11/Makefile @@ -57,9 +57,10 @@ default: $(TOP)/$(LIB_DIR)/$(GL_LIB_NAME) $(TOP)/$(LIB_DIR)/$(GL_LIB_NAME): $(OBJECTS) $(CORE_MESA) - @ $(MKLIB) -o $(GL_LIB) -linker '$(CC)' -ldflags '$(LDFLAGS)' \ + @ $(MKLIB) -o $(GL_LIB) -linker '$(CXX)' -ldflags '$(LDFLAGS)' \ -major $(GL_MAJOR) -minor $(GL_MINOR) -patch $(GL_TINY) \ - -install $(TOP)/$(LIB_DIR) $(MKLIB_OPTIONS) \ + -install $(TOP)/$(LIB_DIR) \ + -cplusplus $(MKLIB_OPTIONS) \ -id $(INSTALL_LIB_DIR)/lib$(GL_LIB).$(GL_MAJOR).dylib \ $(GL_LIB_DEPS) $(OBJECTS) $(CORE_MESA) |