diff options
author | Nicolai Hähnle <[email protected]> | 2009-10-03 19:30:48 +0200 |
---|---|---|
committer | Nicolai Hähnle <[email protected]> | 2009-10-03 19:30:48 +0200 |
commit | 6d25b9125ec1e66e0e255b0ee20fe18dfe1076fa (patch) | |
tree | c376951940eac2875567979a81e6a03a019942b7 /src/mesa/drivers/dri | |
parent | 81c7561d9d3faf70ac22c6a5f3fbea18f53eed92 (diff) | |
parent | 7d2699aedc084d9cb9c2bd2f8bdb5f038271ac1e (diff) |
Merge branch 'master' into r300-compiler
Diffstat (limited to 'src/mesa/drivers/dri')
62 files changed, 2073 insertions, 731 deletions
diff --git a/src/mesa/drivers/dri/glcore/Makefile b/src/mesa/drivers/dri/glcore/Makefile deleted file mode 100644 index ac7e1de9285..00000000000 --- a/src/mesa/drivers/dri/glcore/Makefile +++ /dev/null @@ -1,84 +0,0 @@ -# src/mesa/drivers/dri/glcore/Makefile - -TOP = ../../../../.. -include $(TOP)/configs/current - -LIBNAME = glcore_dri.so - -DRIVER_SOURCES = glcore_driver.c \ - $(TOP)/src/mesa/drivers/common/driverfuncs.c \ - ../common/dri_util.c - -C_SOURCES = \ - $(DRIVER_SOURCES) \ - $(DRI_SOURCES) - - -# Include directories -INCLUDE_DIRS = \ - -I. \ - -I../common \ - -I../dri_client \ - -I../dri_client/imports \ - -Iserver \ - -I$(TOP)/include \ - -I$(DRM_SOURCE_PATH)/shared-core \ - -I$(TOP)/src/mesa \ - -I$(TOP)/src/mesa/main \ - -I$(TOP)/src/mesa/glapi \ - -I$(TOP)/src/mesa/math \ - -I$(TOP)/src/mesa/transform \ - -I$(TOP)/src/mesa/shader \ - -I$(TOP)/src/mesa/swrast \ - -I$(TOP)/src/mesa/swrast_setup - -# Core Mesa objects -MESA_MODULES = $(TOP)/src/mesa/libmesa.a - -# Libraries that the driver shared lib depends on -LIB_DEPS = -lm -lpthread -lc -# LIB_DEPS = -lGL -lm -lpthread -lc - - -ASM_SOURCES = - -OBJECTS = $(C_SOURCES:.c=.o) \ - $(ASM_SOURCES:.S=.o) - - -##### RULES ##### - -.c.o: - $(CC) -c $(INCLUDE_DIRS) $(CFLAGS) $(DEFINES) $< -o $@ - -.S.o: - $(CC) -c $(INCLUDE_DIRS) $(CFLAGS) $(DEFINES) $< -o $@ - - -##### TARGETS ##### - -default: depend $(TOP)/$(LIB_DIR)/$(LIBNAME) - - -$(TOP)/$(LIB_DIR)/$(LIBNAME): $(OBJECTS) $(MESA_MODULES) $(WINOBJ) Makefile - CC="$(CC)" CXX="$(CXX)" $(TOP)/bin/mklib -o $(LIBNAME) -noprefix -install $(TOP)/$(LIB_DIR) \ - $(OBJECTS) $(WINLIB) $(LIB_DEPS) $(WINOBJ) $(MESA_MODULES) - - -depend: $(C_SOURCES) $(ASM_SOURCES) - rm -f depend - touch depend - $(MKDEP) $(MKDEP_OPTIONS) $(INCLUDE_DIRS) $(C_SOURCES) $(ASM_SOURCES) \ - > /dev/null - - -# Emacs tags -tags: - etags `find . -name \*.[ch]` `find ../include` - - -clean: - -rm -f *.o server/*.o - - -include depend diff --git a/src/mesa/drivers/dri/i915/Makefile b/src/mesa/drivers/dri/i915/Makefile index 9d049dea8fe..393312e7328 100644 --- a/src/mesa/drivers/dri/i915/Makefile +++ b/src/mesa/drivers/dri/i915/Makefile @@ -19,7 +19,6 @@ DRIVER_SOURCES = \ intel_batchbuffer.c \ intel_clear.c \ intel_extensions.c \ - intel_generatemipmap.c \ intel_mipmap_tree.c \ intel_tex_layout.c \ intel_tex_image.c \ diff --git a/src/mesa/drivers/dri/i915/i915_context.c b/src/mesa/drivers/dri/i915/i915_context.c index 3ab7d682ee7..7d4c7cfbabb 100644 --- a/src/mesa/drivers/dri/i915/i915_context.c +++ b/src/mesa/drivers/dri/i915/i915_context.c @@ -40,6 +40,7 @@ #include "utils.h" #include "i915_reg.h" +#include "i915_program.h" #include "intel_regions.h" #include "intel_batchbuffer.h" @@ -80,6 +81,8 @@ i915InvalidateState(GLcontext * ctx, GLuint new_state) i915_update_stencil(ctx); if (new_state & (_NEW_LIGHT)) i915_update_provoking_vertex(ctx); + if (new_state & (_NEW_PROGRAM | _NEW_PROGRAM_CONSTANTS)) + i915_update_program(ctx); } @@ -139,7 +142,7 @@ i915CreateContext(const __GLcontextModes * mesaVis, ctx->Const.MaxTextureUnits = I915_TEX_UNITS; ctx->Const.MaxTextureImageUnits = I915_TEX_UNITS; ctx->Const.MaxTextureCoordUnits = I915_TEX_UNITS; - + ctx->Const.MaxVarying = I915_TEX_UNITS; /* Advertise the full hardware capabilities. The new memory * manager should cope much better with overload situations: diff --git a/src/mesa/drivers/dri/i915/i915_context.h b/src/mesa/drivers/dri/i915/i915_context.h index 8de4a9d0d36..082d6144425 100644 --- a/src/mesa/drivers/dri/i915/i915_context.h +++ b/src/mesa/drivers/dri/i915/i915_context.h @@ -121,10 +121,14 @@ enum { #define I915_MAX_CONSTANT 32 #define I915_CONSTANT_SIZE (2+(4*I915_MAX_CONSTANT)) +#define I915_MAX_INSN (I915_MAX_DECL_INSN + \ + I915_MAX_TEX_INSN + \ + I915_MAX_ALU_INSN) -#define I915_PROGRAM_SIZE 192 - -#define I915_MAX_INSN (I915_MAX_TEX_INSN+I915_MAX_ALU_INSN) +/* Maximum size of the program packet, which matches the limits on + * decl, tex, and ALU instructions. + */ +#define I915_PROGRAM_SIZE (I915_MAX_INSN * 3 + 1) /* Hardware version of a parsed fragment program. "Derived" from the * mesa fragment_program struct. @@ -154,8 +158,9 @@ struct i915_fragment_program */ GLcontext *ctx; - GLuint declarations[I915_PROGRAM_SIZE]; - GLuint program[I915_PROGRAM_SIZE]; + /* declarations contains the packet header. */ + GLuint declarations[I915_MAX_DECL_INSN * 3 + 1]; + GLuint program[(I915_MAX_TEX_INSN + I915_MAX_ALU_INSN) * 3]; GLfloat constant[I915_MAX_CONSTANT][4]; GLuint constant_flags[I915_MAX_CONSTANT]; diff --git a/src/mesa/drivers/dri/i915/i915_fragprog.c b/src/mesa/drivers/dri/i915/i915_fragprog.c index 2db10c60e99..d9c61446f52 100644 --- a/src/mesa/drivers/dri/i915/i915_fragprog.c +++ b/src/mesa/drivers/dri/i915/i915_fragprog.c @@ -89,7 +89,8 @@ src_vector(struct i915_fragment_program *p, */ case PROGRAM_TEMPORARY: if (source->Index >= I915_MAX_TEMPORARY) { - i915_program_error(p, "Exceeded max temporary reg"); + i915_program_error(p, "Exceeded max temporary reg: %d/%d", + source->Index, I915_MAX_TEMPORARY); return 0; } src = UREG(REG_TYPE_R, source->Index); @@ -121,10 +122,23 @@ src_vector(struct i915_fragment_program *p, src = i915_emit_decl(p, REG_TYPE_T, T_TEX0 + (source->Index - FRAG_ATTRIB_TEX0), D0_CHANNEL_ALL); + break; + + case FRAG_ATTRIB_VAR0: + case FRAG_ATTRIB_VAR0 + 1: + case FRAG_ATTRIB_VAR0 + 2: + case FRAG_ATTRIB_VAR0 + 3: + case FRAG_ATTRIB_VAR0 + 4: + case FRAG_ATTRIB_VAR0 + 5: + case FRAG_ATTRIB_VAR0 + 6: + case FRAG_ATTRIB_VAR0 + 7: + src = i915_emit_decl(p, REG_TYPE_T, + T_TEX0 + (source->Index - FRAG_ATTRIB_VAR0), + D0_CHANNEL_ALL); break; default: - i915_program_error(p, "Bad source->Index"); + i915_program_error(p, "Bad source->Index: %d", source->Index); return 0; } break; @@ -146,6 +160,7 @@ src_vector(struct i915_fragment_program *p, case PROGRAM_CONSTANT: case PROGRAM_STATE_VAR: case PROGRAM_NAMED_PARAM: + case PROGRAM_UNIFORM: src = i915_emit_param4fv(p, program->Base.Parameters->ParameterValues[source-> @@ -153,7 +168,7 @@ src_vector(struct i915_fragment_program *p, break; default: - i915_program_error(p, "Bad source->File"); + i915_program_error(p, "Bad source->File: %d", source->File); return 0; } @@ -186,13 +201,14 @@ get_result_vector(struct i915_fragment_program *p, p->depth_written = 1; return UREG(REG_TYPE_OD, 0); default: - i915_program_error(p, "Bad inst->DstReg.Index"); + i915_program_error(p, "Bad inst->DstReg.Index: %d", + inst->DstReg.Index); return 0; } case PROGRAM_TEMPORARY: return UREG(REG_TYPE_R, inst->DstReg.Index); default: - i915_program_error(p, "Bad inst->DstReg.File"); + i915_program_error(p, "Bad inst->DstReg.File: %d", inst->DstReg.File); return 0; } } @@ -231,7 +247,7 @@ translate_tex_src_target(struct i915_fragment_program *p, GLubyte bit) case TEXTURE_CUBE_INDEX: return D0_SAMPLE_TYPE_CUBE; default: - i915_program_error(p, "TexSrcBit"); + i915_program_error(p, "TexSrcBit: %d", bit); return 0; } } @@ -351,7 +367,7 @@ upload_program(struct i915_fragment_program *p) while (1) { GLuint src0, src1, src2, flags; - GLuint tmp = 0, consts0 = 0, consts1 = 0; + GLuint tmp = 0, dst, consts0 = 0, consts1 = 0; switch (inst->Opcode) { case OPCODE_ABS: @@ -503,6 +519,10 @@ upload_program(struct i915_fragment_program *p) EMIT_1ARG_ARITH(A0_FLR); break; + case OPCODE_TRUNC: + EMIT_1ARG_ARITH(A0_TRC); + break; + case OPCODE_FRC: EMIT_1ARG_ARITH(A0_FRC); break; @@ -516,6 +536,22 @@ upload_program(struct i915_fragment_program *p) 0, src0, T0_TEXKILL); break; + case OPCODE_KIL_NV: + if (inst->DstReg.CondMask == COND_TR) { + tmp = i915_get_utemp(p); + + i915_emit_texld(p, get_live_regs(p, inst), + tmp, A0_DEST_CHANNEL_ALL, + 0, /* use a dummy dest reg */ + swizzle(tmp, ONE, ONE, ONE, ONE), /* always */ + T0_TEXKILL); + } else { + p->error = 1; + i915_program_error(p, "Unsupported KIL_NV condition code: %d", + inst->DstReg.CondMask); + } + break; + case OPCODE_LG2: src0 = src_vector(p, &inst->SrcReg[0], program); @@ -615,6 +651,20 @@ upload_program(struct i915_fragment_program *p) EMIT_2ARG_ARITH(A0_MUL); break; + case OPCODE_NOISE1: + case OPCODE_NOISE2: + case OPCODE_NOISE3: + case OPCODE_NOISE4: + /* Don't implement noise because we just don't have the instructions + * to spare. We aren't the first vendor to do so. + */ + i915_program_error(p, "Stubbed-out noise functions"); + i915_emit_arith(p, + A0_MOV, + get_result_vector(p, inst), + get_result_flags(inst), 0, + swizzle(src0, ZERO, ZERO, ZERO, ZERO), 0, 0); + case OPCODE_POW: src0 = src_vector(p, &inst->SrcReg[0], program); src1 = src_vector(p, &inst->SrcReg[1], program); @@ -721,9 +771,38 @@ upload_program(struct i915_fragment_program *p) } break; - case OPCODE_SGE: - EMIT_2ARG_ARITH(A0_SGE); - break; + case OPCODE_SEQ: + tmp = i915_get_utemp(p); + flags = get_result_flags(inst); + dst = get_result_vector(p, inst); + + /* dst = src1 >= src2 */ + i915_emit_arith(p, + A0_SGE, + dst, + flags, 0, + src_vector(p, &inst->SrcReg[0], program), + src_vector(p, &inst->SrcReg[1], program), + 0); + /* tmp = src1 <= src2 */ + i915_emit_arith(p, + A0_SGE, + tmp, + flags, 0, + negate(src_vector(p, &inst->SrcReg[0], program), + 1, 1, 1, 1), + negate(src_vector(p, &inst->SrcReg[1], program), + 1, 1, 1, 1), + 0); + /* dst = tmp && dst */ + i915_emit_arith(p, + A0_MUL, + dst, + flags, 0, + dst, + tmp, + 0); + break; case OPCODE_SIN: src0 = src_vector(p, &inst->SrcReg[0], program); @@ -809,10 +888,71 @@ upload_program(struct i915_fragment_program *p) break; + case OPCODE_SGE: + EMIT_2ARG_ARITH(A0_SGE); + break; + + case OPCODE_SGT: + i915_emit_arith(p, + A0_SLT, + get_result_vector( p, inst ), + get_result_flags( inst ), 0, + negate(src_vector( p, &inst->SrcReg[0], program), + 1, 1, 1, 1), + negate(src_vector( p, &inst->SrcReg[1], program), + 1, 1, 1, 1), + 0); + break; + + case OPCODE_SLE: + i915_emit_arith(p, + A0_SGE, + get_result_vector( p, inst ), + get_result_flags( inst ), 0, + negate(src_vector( p, &inst->SrcReg[0], program), + 1, 1, 1, 1), + negate(src_vector( p, &inst->SrcReg[1], program), + 1, 1, 1, 1), + 0); + break; + case OPCODE_SLT: EMIT_2ARG_ARITH(A0_SLT); break; + case OPCODE_SNE: + tmp = i915_get_utemp(p); + flags = get_result_flags(inst); + dst = get_result_vector(p, inst); + + /* dst = src1 < src2 */ + i915_emit_arith(p, + A0_SLT, + dst, + flags, 0, + src_vector(p, &inst->SrcReg[0], program), + src_vector(p, &inst->SrcReg[1], program), + 0); + /* tmp = src1 > src2 */ + i915_emit_arith(p, + A0_SLT, + tmp, + flags, 0, + negate(src_vector(p, &inst->SrcReg[0], program), + 1, 1, 1, 1), + negate(src_vector(p, &inst->SrcReg[1], program), + 1, 1, 1, 1), + 0); + /* dst = tmp || dst */ + i915_emit_arith(p, + A0_ADD, + dst, + flags | A0_DEST_SATURATE, 0, + dst, + tmp, + 0); + break; + case OPCODE_SUB: src0 = src_vector(p, &inst->SrcReg[0], program); src1 = src_vector(p, &inst->SrcReg[1], program); @@ -869,8 +1009,39 @@ upload_program(struct i915_fragment_program *p) case OPCODE_END: return; + case OPCODE_BGNLOOP: + case OPCODE_BGNSUB: + case OPCODE_BRA: + case OPCODE_BRK: + case OPCODE_CAL: + case OPCODE_CONT: + case OPCODE_DDX: + case OPCODE_DDY: + case OPCODE_ELSE: + case OPCODE_ENDIF: + case OPCODE_ENDLOOP: + case OPCODE_ENDSUB: + case OPCODE_IF: + case OPCODE_RET: + p->error = 1; + i915_program_error(p, "Unsupported opcode: %s", + _mesa_opcode_string(inst->Opcode)); + return; + + case OPCODE_EXP: + case OPCODE_LOG: + /* These opcodes are claimed as GLSL, NV_vp, and ARB_vp in + * prog_instruction.h, but apparently GLSL doesn't ever emit them. + * Instead, it translates to EX2 or LG2. + */ + case OPCODE_TXD: + case OPCODE_TXL: + /* These opcodes are claimed by GLSL in prog_instruction.h, but + * only NV_vp/fp appears to emit them. + */ default: - i915_program_error(p, "bad opcode"); + i915_program_error(p, "bad opcode: %s", + _mesa_opcode_string(inst->Opcode)); return; } @@ -906,7 +1077,7 @@ check_wpos(struct i915_fragment_program *p) p->wpos_tex = -1; for (i = 0; i < p->ctx->Const.MaxTextureCoordUnits; i++) { - if (inputs & FRAG_BIT_TEX(i)) + if (inputs & (FRAG_BIT_TEX(i) | FRAG_BIT_VAR(i))) continue; else if (inputs & FRAG_BIT_WPOS) { p->wpos_tex = i; @@ -1055,6 +1226,28 @@ i915ProgramStringNotify(GLcontext * ctx, _tnl_program_string(ctx, target, prog); } +void +i915_update_program(GLcontext *ctx) +{ + struct intel_context *intel = intel_context(ctx); + struct i915_context *i915 = i915_context(&intel->ctx); + struct i915_fragment_program *fp = + (struct i915_fragment_program *) ctx->FragmentProgram._Current; + + if (i915->current_program != fp) { + if (i915->current_program) { + i915->current_program->on_hardware = 0; + i915->current_program->params_uptodate = 0; + } + + i915->current_program = fp; + } + + if (!fp->translated) + translate_program(fp); + + FALLBACK(&i915->intel, I915_FALLBACK_PROGRAM, fp->error); +} void i915ValidateFragmentProgram(struct i915_context *i915) @@ -1072,16 +1265,6 @@ i915ValidateFragmentProgram(struct i915_context *i915) GLuint s2 = S2_TEXCOORD_NONE; int i, offset = 0; - if (i915->current_program != p) { - if (i915->current_program) { - i915->current_program->on_hardware = 0; - i915->current_program->params_uptodate = 0; - } - - i915->current_program = p; - } - - /* Important: */ VB->AttribPtr[VERT_ATTRIB_POS] = VB->NdcPtr; @@ -1125,6 +1308,14 @@ i915ValidateFragmentProgram(struct i915_context *i915) EMIT_ATTR(_TNL_ATTRIB_TEX0 + i, EMIT_SZ(sz), 0, sz * 4); } + else if (inputsRead & FRAG_BIT_VAR(i)) { + int sz = VB->AttribPtr[_TNL_ATTRIB_GENERIC0 + i]->size; + + s2 &= ~S2_TEXCOORD_FMT(i, S2_TEXCOORD_FMT0_MASK); + s2 |= S2_TEXCOORD_FMT(i, SZ_TO_HW(sz)); + + EMIT_ATTR(_TNL_ATTRIB_GENERIC0 + i, EMIT_SZ(sz), 0, sz * 4); + } else if (i == p->wpos_tex) { /* If WPOS is required, duplicate the XYZ position data in an diff --git a/src/mesa/drivers/dri/i915/i915_program.c b/src/mesa/drivers/dri/i915/i915_program.c index e87700f8e0a..e7908bd48fc 100644 --- a/src/mesa/drivers/dri/i915/i915_program.c +++ b/src/mesa/drivers/dri/i915/i915_program.c @@ -130,6 +130,7 @@ i915_emit_decl(struct i915_fragment_program *p, *(p->decl++) = (D0_DCL | D0_DEST(reg) | d0_flags); *(p->decl++) = D1_MBZ; *(p->decl++) = D2_MBZ; + assert(p->decl <= p->declarations + ARRAY_SIZE(p->declarations)); p->nr_decl_insn++; return reg; @@ -186,6 +187,11 @@ i915_emit_arith(struct i915_fragment_program * p, p->utemp_flag = old_utemp_flag; /* restore */ } + if (p->csr >= p->program + ARRAY_SIZE(p->program)) { + i915_program_error(p, "Program contains too many instructions"); + return UREG_BAD; + } + *(p->csr++) = (op | A0_DEST(dest) | mask | saturate | A0_SRC0(src0)); *(p->csr++) = (A1_SRC0(src0) | A1_SRC1(src1)); *(p->csr++) = (A2_SRC1(src1) | A2_SRC2(src2)); @@ -270,6 +276,11 @@ GLuint i915_emit_texld( struct i915_fragment_program *p, p->register_phases[GET_UREG_NR(coord)] == p->nr_tex_indirect) p->nr_tex_indirect++; + if (p->csr >= p->program + ARRAY_SIZE(p->program)) { + i915_program_error(p, "Program contains too many instructions"); + return UREG_BAD; + } + *(p->csr++) = (op | T0_DEST( dest ) | T0_SAMPLER( sampler )); @@ -424,12 +435,21 @@ i915_emit_param4fv(struct i915_fragment_program * p, const GLfloat * values) return 0; } - - +/* Warning the user about program errors seems to be quite valuable, from + * our bug reports. It unfortunately means piglit reporting errors + * when we fall back to software due to an unsupportable program, though. + */ void -i915_program_error(struct i915_fragment_program *p, const char *msg) +i915_program_error(struct i915_fragment_program *p, const char *fmt, ...) { - _mesa_problem(NULL, "i915_program_error: %s", msg); + va_list args; + + fprintf(stderr, "i915_program_error: "); + va_start(args, fmt); + vfprintf(stderr, fmt, args); + va_end(args); + + fprintf(stderr, "\n"); p->error = 1; } @@ -511,7 +531,8 @@ i915_upload_program(struct i915_context *i915, GLuint program_size = p->csr - p->program; GLuint decl_size = p->decl - p->declarations; - FALLBACK(&i915->intel, I915_FALLBACK_PROGRAM, p->error); + if (p->error) + return; /* Could just go straight to the batchbuffer from here: */ diff --git a/src/mesa/drivers/dri/i915/i915_program.h b/src/mesa/drivers/dri/i915/i915_program.h index 14a3f08801f..0d17d048653 100644 --- a/src/mesa/drivers/dri/i915/i915_program.h +++ b/src/mesa/drivers/dri/i915/i915_program.h @@ -145,7 +145,7 @@ extern GLuint i915_emit_param4fv(struct i915_fragment_program *p, const GLfloat * values); extern void i915_program_error(struct i915_fragment_program *p, - const char *msg); + const char *fmt, ...); extern void i915_init_program(struct i915_context *i915, struct i915_fragment_program *p); @@ -155,7 +155,6 @@ extern void i915_upload_program(struct i915_context *i915, extern void i915_fini_program(struct i915_fragment_program *p); - - +extern void i915_update_program(GLcontext *ctx); #endif diff --git a/src/mesa/drivers/dri/i915/i915_vtbl.c b/src/mesa/drivers/dri/i915/i915_vtbl.c index 9a723d3cd73..9e2523932f1 100644 --- a/src/mesa/drivers/dri/i915/i915_vtbl.c +++ b/src/mesa/drivers/dri/i915/i915_vtbl.c @@ -54,8 +54,7 @@ i915_render_prevalidate(struct intel_context *intel) { struct i915_context *i915 = i915_context(&intel->ctx); - if (!intel->Fallback) - i915ValidateFragmentProgram(i915); + i915ValidateFragmentProgram(i915); } static void diff --git a/src/mesa/drivers/dri/i915/intel_generatemipmap.c b/src/mesa/drivers/dri/i915/intel_generatemipmap.c deleted file mode 120000 index 4c6b37ada01..00000000000 --- a/src/mesa/drivers/dri/i915/intel_generatemipmap.c +++ /dev/null @@ -1 +0,0 @@ -../intel/intel_generatemipmap.c
\ No newline at end of file diff --git a/src/mesa/drivers/dri/i915/intel_tris.c b/src/mesa/drivers/dri/i915/intel_tris.c index a905455342d..0641e6df9d7 100644 --- a/src/mesa/drivers/dri/i915/intel_tris.c +++ b/src/mesa/drivers/dri/i915/intel_tris.c @@ -1076,7 +1076,9 @@ intelRunPipeline(GLcontext * ctx) intel->NewGLState = 0; } + intel_map_vertex_shader_textures(ctx); _tnl_run_pipeline(ctx); + intel_unmap_vertex_shader_textures(ctx); _mesa_unlock_context_textures(ctx); } diff --git a/src/mesa/drivers/dri/i965/Makefile b/src/mesa/drivers/dri/i965/Makefile index 6e9a9a29a39..57dcc915869 100644 --- a/src/mesa/drivers/dri/i965/Makefile +++ b/src/mesa/drivers/dri/i965/Makefile @@ -14,7 +14,6 @@ DRIVER_SOURCES = \ intel_decode.c \ intel_extensions.c \ intel_fbo.c \ - intel_generatemipmap.c \ intel_mipmap_tree.c \ intel_regions.c \ intel_screen.c \ diff --git a/src/mesa/drivers/dri/i965/brw_context.c b/src/mesa/drivers/dri/i965/brw_context.c index 3c5b8483197..c300c33adce 100644 --- a/src/mesa/drivers/dri/i965/brw_context.c +++ b/src/mesa/drivers/dri/i965/brw_context.c @@ -125,6 +125,7 @@ GLboolean brwCreateContext( const __GLcontextModes *mesaVis, /* We want the GLSL compiler to emit code that uses condition codes */ ctx->Shader.EmitCondCodes = GL_TRUE; + ctx->Shader.EmitNVTempInitialization = GL_TRUE; ctx->Const.VertexProgram.MaxNativeInstructions = (16 * 1024); ctx->Const.VertexProgram.MaxAluInstructions = 0; diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index a5209ac41be..fa3e32c7ff1 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -705,10 +705,6 @@ void brw_debug_batch(struct intel_context *intel); /*====================================================================== * brw_tex.c */ -void brwUpdateTextureState( struct intel_context *intel ); -void brw_FrameBufferTexInit( struct brw_context *brw, - struct intel_region *region ); -void brw_FrameBufferTexDestroy( struct brw_context *brw ); void brw_validate_textures( struct brw_context *brw ); diff --git a/src/mesa/drivers/dri/i965/brw_curbe.c b/src/mesa/drivers/dri/i965/brw_curbe.c index 0b0e6931a06..4be6c77aa1e 100644 --- a/src/mesa/drivers/dri/i965/brw_curbe.c +++ b/src/mesa/drivers/dri/i965/brw_curbe.c @@ -248,6 +248,9 @@ static void prepare_constant_buffer(struct brw_context *brw) GLuint offset = brw->curbe.vs_start * 16; GLuint nr = brw->vs.prog_data->nr_params / 4; + if (brw->vertex_program->IsNVProgram) + _mesa_load_tracked_matrices(ctx); + /* Updates the ParamaterValues[i] pointers for all parameters of the * basic type of PROGRAM_STATE_VAR. */ diff --git a/src/mesa/drivers/dri/i965/brw_state.h b/src/mesa/drivers/dri/i965/brw_state.h index 78572356a3d..d639656b9d4 100644 --- a/src/mesa/drivers/dri/i965/brw_state.h +++ b/src/mesa/drivers/dri/i965/brw_state.h @@ -86,9 +86,6 @@ const struct brw_tracked_state brw_psp_urb_cbs; const struct brw_tracked_state brw_pipe_control; -const struct brw_tracked_state brw_clear_surface_cache; -const struct brw_tracked_state brw_clear_batch_cache; - const struct brw_tracked_state brw_drawing_rect; const struct brw_tracked_state brw_indices; const struct brw_tracked_state brw_vertices; @@ -154,6 +151,7 @@ void brw_state_cache_check_size( struct brw_context *brw ); void brw_init_caches( struct brw_context *brw ); void brw_destroy_caches( struct brw_context *brw ); +void brw_state_cache_bo_delete(struct brw_cache *cache, dri_bo *bo); /*********************************************************************** * brw_state_batch.c @@ -165,7 +163,7 @@ GLboolean brw_cached_batch_struct( struct brw_context *brw, const void *data, GLuint sz ); void brw_destroy_batch_cache( struct brw_context *brw ); -void brw_clear_batch_cache_flush( struct brw_context *brw ); +void brw_clear_batch_cache( struct brw_context *brw ); /* brw_wm_surface_state.c */ dri_bo * diff --git a/src/mesa/drivers/dri/i965/brw_state_batch.c b/src/mesa/drivers/dri/i965/brw_state_batch.c index 811940edc05..7821898cf9b 100644 --- a/src/mesa/drivers/dri/i965/brw_state_batch.c +++ b/src/mesa/drivers/dri/i965/brw_state_batch.c @@ -79,7 +79,7 @@ GLboolean brw_cached_batch_struct( struct brw_context *brw, return GL_TRUE; } -static void clear_batch_cache( struct brw_context *brw ) +void brw_clear_batch_cache( struct brw_context *brw ) { struct brw_cached_batch_item *item = brw->cached_batch_items; @@ -93,18 +93,7 @@ static void clear_batch_cache( struct brw_context *brw ) brw->cached_batch_items = NULL; } -void brw_clear_batch_cache_flush( struct brw_context *brw ) -{ - clear_batch_cache(brw); - - brw->state.dirty.mesa |= ~0; - brw->state.dirty.brw |= ~0; - brw->state.dirty.cache |= ~0; -} - - - void brw_destroy_batch_cache( struct brw_context *brw ) { - clear_batch_cache(brw); + brw_clear_batch_cache(brw); } diff --git a/src/mesa/drivers/dri/i965/brw_state_cache.c b/src/mesa/drivers/dri/i965/brw_state_cache.c index e40d7a04164..f8e46aacf71 100644 --- a/src/mesa/drivers/dri/i965/brw_state_cache.c +++ b/src/mesa/drivers/dri/i965/brw_state_cache.c @@ -517,6 +517,55 @@ brw_clear_cache(struct brw_context *brw, struct brw_cache *cache) brw->state.dirty.cache |= ~0; } +/* Clear all entries from the cache that point to the given bo. + * + * This lets us release memory for reuse earlier for known-dead buffers, + * at the cost of walking the entire hash table. + */ +void +brw_state_cache_bo_delete(struct brw_cache *cache, dri_bo *bo) +{ + struct brw_cache_item **prev; + GLuint i; + + if (INTEL_DEBUG & DEBUG_STATE) + _mesa_printf("%s\n", __FUNCTION__); + + for (i = 0; i < cache->size; i++) { + for (prev = &cache->items[i]; *prev;) { + struct brw_cache_item *c = *prev; + int j; + + for (j = 0; j < c->nr_reloc_bufs; j++) { + if (c->reloc_bufs[j] == bo) + break; + } + + if (j != c->nr_reloc_bufs) { + + *prev = c->next; + + for (j = 0; j < c->nr_reloc_bufs; j++) + dri_bo_unreference(c->reloc_bufs[j]); + dri_bo_unreference(c->bo); + free((void *)c->key); + free(c); + cache->n_items--; + + /* Delete up the tree. Notably we're trying to get from + * a request to delete the surface, to deleting the surface state + * object, to deleting the binding table. We're slack and restart + * the deletion process when we do this because the other delete + * may kill our *prev. + */ + brw_state_cache_bo_delete(cache, c->bo); + prev = &cache->items[i]; + } else { + prev = &(*prev)->next; + } + } + } +} void brw_state_cache_check_size(struct brw_context *brw) diff --git a/src/mesa/drivers/dri/i965/brw_state_upload.c b/src/mesa/drivers/dri/i965/brw_state_upload.c index 414620d0b39..b817b741e77 100644 --- a/src/mesa/drivers/dri/i965/brw_state_upload.c +++ b/src/mesa/drivers/dri/i965/brw_state_upload.c @@ -287,6 +287,7 @@ void brw_validate_state( struct brw_context *brw ) if (brw->emit_state_always) { state->mesa |= ~0; state->brw |= ~0; + state->cache |= ~0; } if (brw->fragment_program != ctx->FragmentProgram._Current) { @@ -305,7 +306,7 @@ void brw_validate_state( struct brw_context *brw ) return; if (brw->state.dirty.brw & BRW_NEW_CONTEXT) - brw_clear_batch_cache_flush(brw); + brw_clear_batch_cache(brw); brw->intel.Fallback = 0; diff --git a/src/mesa/drivers/dri/i965/brw_tex.c b/src/mesa/drivers/dri/i965/brw_tex.c index 71bff166dda..e911b105b23 100644 --- a/src/mesa/drivers/dri/i965/brw_tex.c +++ b/src/mesa/drivers/dri/i965/brw_tex.c @@ -39,38 +39,6 @@ #include "intel_tex.h" #include "brw_context.h" - -void brw_FrameBufferTexInit( struct brw_context *brw, - struct intel_region *region ) -{ - struct intel_context *intel = &brw->intel; - GLcontext *ctx = &intel->ctx; - struct gl_texture_object *obj; - struct gl_texture_image *img; - - intel->frame_buffer_texobj = obj = - ctx->Driver.NewTextureObject( ctx, (GLuint) -1, GL_TEXTURE_2D ); - - obj->MinFilter = GL_NEAREST; - obj->MagFilter = GL_NEAREST; - - img = ctx->Driver.NewTextureImage( ctx ); - - _mesa_init_teximage_fields( ctx, GL_TEXTURE_2D, img, - region->pitch, region->height, 1, 0, - region->cpp == 4 ? GL_RGBA : GL_RGB ); - - _mesa_set_tex_image( obj, GL_TEXTURE_2D, 0, img ); -} - -void brw_FrameBufferTexDestroy( struct brw_context *brw ) -{ - if (brw->intel.frame_buffer_texobj != NULL) - brw->intel.ctx.Driver.DeleteTexture( &brw->intel.ctx, - brw->intel.frame_buffer_texobj ); - brw->intel.frame_buffer_texobj = NULL; -} - /** * Finalizes all textures, completing any rendering that needs to be done * to prepare them. diff --git a/src/mesa/drivers/dri/i965/brw_vs.c b/src/mesa/drivers/dri/i965/brw_vs.c index e3111c66800..f0c79efbd96 100644 --- a/src/mesa/drivers/dri/i965/brw_vs.c +++ b/src/mesa/drivers/dri/i965/brw_vs.c @@ -90,8 +90,6 @@ static void brw_upload_vs_prog(struct brw_context *brw) struct brw_vertex_program *vp = (struct brw_vertex_program *)brw->vertex_program; - assert (vp && !vp->program.IsNVProgram); - memset(&key, 0, sizeof(key)); /* Just upload the program verbatim for now. Always send it all diff --git a/src/mesa/drivers/dri/i965/brw_vtbl.c b/src/mesa/drivers/dri/i965/brw_vtbl.c index ac117901515..124fde25fe0 100644 --- a/src/mesa/drivers/dri/i965/brw_vtbl.c +++ b/src/mesa/drivers/dri/i965/brw_vtbl.c @@ -69,8 +69,6 @@ static void brw_destroy_context( struct intel_context *intel ) _mesa_free(brw->wm.compile_data); - brw_FrameBufferTexDestroy( brw ); - for (i = 0; i < brw->state.nr_color_regions; i++) intel_region_release(&brw->state.color_regions[i]); brw->state.nr_color_regions = 0; diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c index 51539ac1e73..9c28a22a298 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c +++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c @@ -660,7 +660,7 @@ brw_wm_get_binding_table(struct brw_context *brw) if (bind_bo == NULL) { GLuint data_size = brw->wm.nr_surfaces * sizeof(GLuint); - uint32_t *data = malloc(data_size); + uint32_t data[BRW_WM_MAX_SURF]; int i; for (i = 0; i < brw->wm.nr_surfaces; i++) @@ -685,8 +685,6 @@ brw_wm_get_binding_table(struct brw_context *brw) brw->wm.surf_bo[i]); } } - - free(data); } return bind_bo; @@ -724,17 +722,8 @@ static void prepare_wm_surfaces(struct brw_context *brw ) /* _NEW_TEXTURE, BRW_NEW_TEXDATA */ if (texUnit->_ReallyEnabled) { - if (texUnit->_Current == intel->frame_buffer_texobj) { - /* render to texture */ - dri_bo_unreference(brw->wm.surf_bo[surf]); - brw->wm.surf_bo[surf] = brw->wm.surf_bo[0]; - dri_bo_reference(brw->wm.surf_bo[surf]); - brw->wm.nr_surfaces = surf + 1; - } else { - /* regular texture */ - brw_update_texture_surface(ctx, i); - brw->wm.nr_surfaces = surf + 1; - } + brw_update_texture_surface(ctx, i); + brw->wm.nr_surfaces = surf + 1; } else { dri_bo_unreference(brw->wm.surf_bo[surf]); brw->wm.surf_bo[surf] = NULL; diff --git a/src/mesa/drivers/dri/i965/intel_generatemipmap.c b/src/mesa/drivers/dri/i965/intel_generatemipmap.c deleted file mode 120000 index 4c6b37ada01..00000000000 --- a/src/mesa/drivers/dri/i965/intel_generatemipmap.c +++ /dev/null @@ -1 +0,0 @@ -../intel/intel_generatemipmap.c
\ No newline at end of file diff --git a/src/mesa/drivers/dri/intel/intel_blit.c b/src/mesa/drivers/dri/intel/intel_blit.c index 43141c509c7..9e114db6c78 100644 --- a/src/mesa/drivers/dri/intel/intel_blit.c +++ b/src/mesa/drivers/dri/intel/intel_blit.c @@ -371,8 +371,6 @@ intelClearWithBlit(GLcontext *ctx, GLbitfield mask) skipBuffers = BUFFER_BIT_STENCIL; } - /* XXX Move this flush/lock into the following conditional? */ - intelFlush(&intel->ctx); LOCK_HARDWARE(intel); intel_get_cliprects(intel, &cliprects, &num_cliprects, &x_off, &y_off); diff --git a/src/mesa/drivers/dri/intel/intel_buffer_objects.c b/src/mesa/drivers/dri/intel/intel_buffer_objects.c index c55c5c426e0..a0225936c81 100644 --- a/src/mesa/drivers/dri/intel/intel_buffer_objects.c +++ b/src/mesa/drivers/dri/intel/intel_buffer_objects.c @@ -207,8 +207,12 @@ intel_bufferobj_subdata(GLcontext * ctx, if (intel_obj->sys_buffer) memcpy((char *)intel_obj->sys_buffer + offset, data, size); - else + else { + /* Flush any existing batchbuffer that might reference this data. */ + intelFlush(ctx); + dri_bo_subdata(intel_obj->buffer, offset, size, data); + } } @@ -225,7 +229,10 @@ intel_bufferobj_get_subdata(GLcontext * ctx, struct intel_buffer_object *intel_obj = intel_buffer_object(obj); assert(intel_obj); - dri_bo_get_subdata(intel_obj->buffer, offset, size, data); + if (intel_obj->sys_buffer) + memcpy(data, (char *)intel_obj->sys_buffer + offset, size); + else + dri_bo_get_subdata(intel_obj->buffer, offset, size, data); } diff --git a/src/mesa/drivers/dri/intel/intel_clear.c b/src/mesa/drivers/dri/intel/intel_clear.c index fef977f4655..fb62f0f430a 100644 --- a/src/mesa/drivers/dri/intel/intel_clear.c +++ b/src/mesa/drivers/dri/intel/intel_clear.c @@ -173,7 +173,6 @@ intelClear(GLcontext *ctx, GLbitfield mask) } _mesa_meta_Clear(&intel->ctx, tri_mask); - intel_batchbuffer_flush(intel->batch); } if (swrast_mask) { diff --git a/src/mesa/drivers/dri/intel/intel_context.h b/src/mesa/drivers/dri/intel/intel_context.h index 03e7cf39d68..b104096912c 100644 --- a/src/mesa/drivers/dri/intel/intel_context.h +++ b/src/mesa/drivers/dri/intel/intel_context.h @@ -254,9 +254,6 @@ struct intel_context intel_line_func draw_line; intel_tri_func draw_tri; - /* These refer to the current drawing buffer: - */ - struct gl_texture_object *frame_buffer_texobj; /** * Set to true if a single constant cliprect should be used in the * batchbuffer. Otherwise, cliprects must be calculated at batchbuffer @@ -296,7 +293,6 @@ struct intel_context GLboolean use_texture_tiling; GLboolean use_early_z; - drm_clip_rect_t fboRect; /**< cliprect for FBO rendering */ int perf_boxes; diff --git a/src/mesa/drivers/dri/intel/intel_extensions.c b/src/mesa/drivers/dri/intel/intel_extensions.c index 5431cf90a1b..2eb08a8f057 100644 --- a/src/mesa/drivers/dri/intel/intel_extensions.c +++ b/src/mesa/drivers/dri/intel/intel_extensions.c @@ -80,6 +80,9 @@ static const struct dri_extension card_extensions[] = { { "GL_ARB_multitexture", NULL }, { "GL_ARB_point_parameters", GL_ARB_point_parameters_functions }, { "GL_ARB_point_sprite", NULL }, + { "GL_ARB_shader_objects", GL_ARB_shader_objects_functions }, + { "GL_ARB_shading_language_100", GL_VERSION_2_0_functions }, + { "GL_ARB_shading_language_120", GL_VERSION_2_1_functions }, { "GL_ARB_sync", GL_ARB_sync_functions }, { "GL_ARB_texture_border_clamp", NULL }, { "GL_ARB_texture_cube_map", NULL }, @@ -91,6 +94,7 @@ static const struct dri_extension card_extensions[] = { { "GL_ARB_texture_rectangle", NULL }, { "GL_ARB_vertex_array_object", GL_ARB_vertex_array_object_functions}, { "GL_ARB_vertex_program", GL_ARB_vertex_program_functions }, + { "GL_ARB_vertex_shader", GL_ARB_vertex_shader_functions }, { "GL_ARB_window_pos", GL_ARB_window_pos_functions }, { "GL_EXT_blend_color", GL_EXT_blend_color_functions }, { "GL_EXT_blend_equation_separate", GL_EXT_blend_equation_separate_functions }, @@ -150,13 +154,9 @@ static const struct dri_extension brw_extensions[] = { { "GL_ARB_occlusion_query", GL_ARB_occlusion_query_functions }, { "GL_ARB_point_sprite", NULL }, { "GL_ARB_seamless_cube_map", NULL }, - { "GL_ARB_shader_objects", GL_ARB_shader_objects_functions }, - { "GL_ARB_shading_language_100", GL_VERSION_2_0_functions }, - { "GL_ARB_shading_language_120", GL_VERSION_2_1_functions }, { "GL_ARB_shadow", NULL }, { "GL_MESA_texture_signed_rgba", NULL }, { "GL_ARB_texture_non_power_of_two", NULL }, - { "GL_ARB_vertex_shader", GL_ARB_vertex_shader_functions }, { "GL_EXT_shadow_funcs", NULL }, { "GL_EXT_stencil_two_side", GL_EXT_stencil_two_side_functions }, { "GL_EXT_texture_sRGB", NULL }, @@ -171,6 +171,7 @@ static const struct dri_extension brw_extensions[] = { static const struct dri_extension arb_oq_extensions[] = { + { "GL_ARB_occlusion_query", GL_ARB_occlusion_query_functions }, { NULL, NULL } }; @@ -182,6 +183,10 @@ static const struct dri_extension ttm_extensions[] = { { NULL, NULL } }; +static const struct dri_extension fragment_shader_extensions[] = { + { "GL_ARB_fragment_shader", NULL }, + { NULL, NULL } +}; /** * Initializes potential list of extensions if ctx == NULL, or actually enables @@ -205,6 +210,14 @@ intelInitExtensions(GLcontext *ctx, GLboolean enable_imaging) driInitExtensions(ctx, brw_extensions, GL_FALSE); if (intel == NULL || IS_915(intel->intelScreen->deviceID) - || IS_945(intel->intelScreen->deviceID)) + || IS_945(intel->intelScreen->deviceID)) { driInitExtensions(ctx, i915_extensions, GL_FALSE); + + if (intel == NULL || driQueryOptionb(&intel->optionCache, "fragment_shader")) + driInitExtensions(ctx, fragment_shader_extensions, GL_FALSE); + + if (intel == NULL || driQueryOptionb(&intel->optionCache, + "stub_occlusion_query")) + driInitExtensions(ctx, arb_oq_extensions, GL_FALSE); + } } diff --git a/src/mesa/drivers/dri/intel/intel_generatemipmap.c b/src/mesa/drivers/dri/intel/intel_generatemipmap.c deleted file mode 100644 index 237754d4695..00000000000 --- a/src/mesa/drivers/dri/intel/intel_generatemipmap.c +++ /dev/null @@ -1,279 +0,0 @@ -/* - * Copyright (C) 1999-2007 Brian Paul All Rights Reserved. - * Copyright © 2009 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - * - * Authors: - * Eric Anholt <[email protected]> - * - */ - -#include "main/glheader.h" -#include "main/enums.h" -#include "main/image.h" -#include "main/mtypes.h" -#include "main/macros.h" -#include "main/bufferobj.h" -#include "main/teximage.h" -#include "main/texenv.h" -#include "main/texobj.h" -#include "main/texstate.h" -#include "main/texparam.h" -#include "main/varray.h" -#include "main/attrib.h" -#include "main/enable.h" -#include "main/buffers.h" -#include "main/fbobject.h" -#include "main/framebuffer.h" -#include "main/renderbuffer.h" -#include "main/depth.h" -#include "main/hash.h" -#include "main/mipmap.h" -#include "main/blend.h" -#include "glapi/dispatch.h" -#include "swrast/swrast.h" - -#include "intel_screen.h" -#include "intel_context.h" -#include "intel_batchbuffer.h" -#include "intel_pixel.h" -#include "intel_tex.h" -#include "intel_mipmap_tree.h" - -static const char *intel_fp_tex2d = - "!!ARBfp1.0\n" - "TEX result.color, fragment.texcoord[0], texture[0], 2D;\n" - "END\n"; - -static GLboolean -intel_generate_mipmap_level(GLcontext *ctx, GLuint tex_name, - int level, int width, int height) -{ - struct intel_context *intel = intel_context(ctx); - GLfloat vertices[4][2]; - GLint status; - - /* Set to source from the previous level */ - _mesa_TexParameterf(GL_TEXTURE_2D, GL_TEXTURE_BASE_LEVEL, level - 1); - _mesa_TexParameterf(GL_TEXTURE_2D, GL_TEXTURE_MAX_LEVEL, level - 1); - - /* Set to draw into the current level */ - _mesa_FramebufferTexture2DEXT(GL_FRAMEBUFFER_EXT, - GL_COLOR_ATTACHMENT0_EXT, - GL_TEXTURE_2D, - tex_name, - level); - /* Choose to render to the color attachment. */ - _mesa_DrawBuffer(GL_COLOR_ATTACHMENT0_EXT); - - status = _mesa_CheckFramebufferStatusEXT (GL_FRAMEBUFFER_EXT); - if (status != GL_FRAMEBUFFER_COMPLETE_EXT) - return GL_FALSE; - - meta_set_passthrough_transform(&intel->meta); - - /* XXX: Doing it right would involve setting up the transformation to do - * 0-1 mapping or something, and not changing the vertex data. - */ - vertices[0][0] = 0; - vertices[0][1] = 0; - vertices[1][0] = width; - vertices[1][1] = 0; - vertices[2][0] = width; - vertices[2][1] = height; - vertices[3][0] = 0; - vertices[3][1] = height; - - _mesa_VertexPointer(2, GL_FLOAT, 2 * sizeof(GLfloat), &vertices); - _mesa_Enable(GL_VERTEX_ARRAY); - meta_set_default_texrect(&intel->meta); - - _mesa_DrawArrays(GL_TRIANGLE_FAN, 0, 4); - - meta_restore_texcoords(&intel->meta); - meta_restore_transform(&intel->meta); - - return GL_TRUE; -} - -static GLboolean -intel_generate_mipmap_2d(GLcontext *ctx, - GLenum target, - struct gl_texture_object *texObj) -{ - struct intel_context *intel = intel_context(ctx); - GLint old_active_texture; - int level, max_levels, start_level, end_level; - GLuint fb_name; - GLboolean success = GL_FALSE; - struct gl_framebuffer *saved_fbo = NULL; - - _mesa_PushAttrib(GL_ENABLE_BIT | GL_TEXTURE_BIT | - GL_CURRENT_BIT | GL_COLOR_BUFFER_BIT | - GL_DEPTH_BUFFER_BIT); - _mesa_PushClientAttrib(GL_CLIENT_VERTEX_ARRAY_BIT); - old_active_texture = ctx->Texture.CurrentUnit; - _mesa_reference_framebuffer(&saved_fbo, ctx->DrawBuffer); - - _mesa_Disable(GL_POLYGON_STIPPLE); - _mesa_Disable(GL_DEPTH_TEST); - _mesa_Disable(GL_STENCIL_TEST); - _mesa_ColorMask(GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE); - _mesa_DepthMask(GL_FALSE); - - /* Bind the given texture to GL_TEXTURE_2D with linear filtering for our - * minification. - */ - _mesa_ActiveTextureARB(GL_TEXTURE0_ARB); - _mesa_Enable(GL_TEXTURE_2D); - _mesa_BindTexture(GL_TEXTURE_2D, texObj->Name); - _mesa_TexParameterf(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, - GL_LINEAR_MIPMAP_NEAREST); - _mesa_TexParameterf(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR); - - /* Bind the new renderbuffer to the color attachment point. */ - _mesa_GenFramebuffersEXT(1, &fb_name); - _mesa_BindFramebufferEXT(GL_FRAMEBUFFER_EXT, fb_name); - - meta_set_fragment_program(&intel->meta, &intel->meta.tex2d_fp, - intel_fp_tex2d); - meta_set_passthrough_vertex_program(&intel->meta); - - max_levels = _mesa_max_texture_levels(ctx, texObj->Target); - start_level = texObj->BaseLevel; - end_level = texObj->MaxLevel; - - /* Loop generating level+1 from level. */ - for (level = start_level; level < end_level && level < max_levels - 1; level++) { - const struct gl_texture_image *srcImage; - int width, height; - - srcImage = _mesa_select_tex_image(ctx, texObj, target, level); - if (srcImage->Border != 0) - goto fail; - - width = srcImage->Width / 2; - if (width < 1) - width = 1; - height = srcImage->Height / 2; - if (height < 1) - height = 1; - - if (width == srcImage->Width && - height == srcImage->Height) { - /* Neither _mesa_max_texture_levels nor texObj->MaxLevel are the - * maximum texture level for the object, so break out when we've gone - * over the edge. - */ - break; - } - - /* Make sure that there's space allocated for the target level. - * We could skip this if there's already space allocated and save some - * time. - */ - _mesa_TexImage2D(GL_TEXTURE_2D, level + 1, srcImage->InternalFormat, - width, height, 0, - GL_RGBA, GL_UNSIGNED_INT, NULL); - - if (!intel_generate_mipmap_level(ctx, texObj->Name, level + 1, - width, height)) - goto fail; - } - - success = GL_TRUE; - -fail: - meta_restore_fragment_program(&intel->meta); - meta_restore_vertex_program(&intel->meta); - - _mesa_DeleteFramebuffersEXT(1, &fb_name); - _mesa_ActiveTextureARB(GL_TEXTURE0_ARB + old_active_texture); - if (saved_fbo) - _mesa_BindFramebufferEXT(GL_FRAMEBUFFER_EXT, saved_fbo->Name); - _mesa_reference_framebuffer(&saved_fbo, NULL); - _mesa_PopClientAttrib(); - _mesa_PopAttrib(); - - return success; -} - - -/** - * Generate new mipmap data from BASE+1 to BASE+p (the minimally-sized mipmap - * level). - * - * The texture object's miptree must be mapped. - * - * This function should also include an accelerated path. - */ -void -intel_generate_mipmap(GLcontext *ctx, GLenum target, - struct gl_texture_object *texObj) -{ - struct intel_context *intel = intel_context(ctx); - struct intel_texture_object *intelObj = intel_texture_object(texObj); - GLuint nr_faces = (intelObj->base.Target == GL_TEXTURE_CUBE_MAP) ? 6 : 1; - int face, i; - - /* HW path */ - if (target == GL_TEXTURE_2D && - ctx->Extensions.EXT_framebuffer_object && - ctx->Extensions.ARB_fragment_program && - ctx->Extensions.ARB_vertex_program) { - GLboolean success; - - /* We'll be accessing this texture using GL entrypoints, which should - * be resilient against other access to this texture. - */ - _mesa_unlock_texture(ctx, texObj); - success = intel_generate_mipmap_2d(ctx, target, texObj); - _mesa_lock_texture(ctx, texObj); - - if (success) - return; - } - - /* SW path */ - intel_tex_map_level_images(intel, intelObj, texObj->BaseLevel); - _mesa_generate_mipmap(ctx, target, texObj); - intel_tex_unmap_level_images(intel, intelObj, texObj->BaseLevel); - - /* Update the level information in our private data in the new images, since - * it didn't get set as part of a normal TexImage path. - */ - for (face = 0; face < nr_faces; face++) { - for (i = texObj->BaseLevel + 1; i < texObj->MaxLevel; i++) { - struct intel_texture_image *intelImage; - - intelImage = intel_texture_image(texObj->Image[face][i]); - if (intelImage == NULL) - break; - - intelImage->level = i; - intelImage->face = face; - /* Unreference the miptree to signal that the new Data is a bare - * pointer from mesa. - */ - intel_miptree_release(intel, &intelImage->mt); - } - } -} diff --git a/src/mesa/drivers/dri/intel/intel_mipmap_tree.c b/src/mesa/drivers/dri/intel/intel_mipmap_tree.c index c985da5aa25..4f5101a3128 100644 --- a/src/mesa/drivers/dri/intel/intel_mipmap_tree.c +++ b/src/mesa/drivers/dri/intel/intel_mipmap_tree.c @@ -29,6 +29,9 @@ #include "intel_mipmap_tree.h" #include "intel_regions.h" #include "intel_chipset.h" +#ifndef I915 +#include "brw_state.h" +#endif #include "main/enums.h" #define FILE_DEBUG_FLAG DEBUG_MIPTREE @@ -269,6 +272,19 @@ intel_miptree_release(struct intel_context *intel, DBG("%s deleting %p\n", __FUNCTION__, *mt); +#ifndef I915 + /* Free up cached binding tables holding a reference on our buffer, to + * avoid excessive memory consumption. + * + * This isn't as aggressive as we could be, as we'd like to do + * it from any time we free the last ref on a region. But intel_region.c + * is context-agnostic. Perhaps our constant state cache should be, as + * well. + */ + brw_state_cache_bo_delete(&brw_context(&intel->ctx)->surface_cache, + (*mt)->region->buffer); +#endif + intel_region_release(&((*mt)->region)); for (i = 0; i < MAX_TEXTURE_LEVELS; i++) diff --git a/src/mesa/drivers/dri/intel/intel_pixel.c b/src/mesa/drivers/dri/intel/intel_pixel.c index a3001416559..993e427a992 100644 --- a/src/mesa/drivers/dri/intel/intel_pixel.c +++ b/src/mesa/drivers/dri/intel/intel_pixel.c @@ -129,20 +129,6 @@ intel_check_blit_fragment_ops(GLcontext * ctx, GLboolean src_alpha_is_one) return GL_TRUE; } - -GLboolean -intel_check_meta_tex_fragment_ops(GLcontext * ctx) -{ - if (ctx->NewState) - _mesa_update_state(ctx); - - /* Some of _ImageTransferState (scale, bias) could be done with - * fragment programs on i915. - */ - return !(ctx->_ImageTransferState || ctx->Fog.Enabled || /* not done yet */ - ctx->Texture._EnabledUnits || ctx->FragmentProgram._Enabled); -} - /* The intel_region struct doesn't really do enough to capture the * format of the pixels in the region. For now this code assumes that * the region is a display surface and hence is either ARGB8888 or diff --git a/src/mesa/drivers/dri/intel/intel_pixel.h b/src/mesa/drivers/dri/intel/intel_pixel.h index 96a6dd17b25..743b6497c52 100644 --- a/src/mesa/drivers/dri/intel/intel_pixel.h +++ b/src/mesa/drivers/dri/intel/intel_pixel.h @@ -34,8 +34,6 @@ void intelInitPixelFuncs(struct dd_function_table *functions); GLboolean intel_check_blit_fragment_ops(GLcontext * ctx, GLboolean src_alpha_is_one); -GLboolean intel_check_meta_tex_fragment_ops(GLcontext * ctx); - GLboolean intel_check_blit_format(struct intel_region *region, GLenum format, GLenum type); diff --git a/src/mesa/drivers/dri/intel/intel_screen.c b/src/mesa/drivers/dri/intel/intel_screen.c index 1b8c56e68d6..24f7fbc9922 100644 --- a/src/mesa/drivers/dri/intel/intel_screen.c +++ b/src/mesa/drivers/dri/intel/intel_screen.c @@ -79,6 +79,10 @@ PUBLIC const char __driConfigOptions[] = DRI_CONF_DESC(en, "Enable early Z in classic mode (unstable, 945-only).") DRI_CONF_OPT_END + DRI_CONF_OPT_BEGIN(fragment_shader, bool, false) + DRI_CONF_DESC(en, "Enable limited ARB_fragment_shader support on 915/945.") + DRI_CONF_OPT_END + DRI_CONF_SECTION_END DRI_CONF_SECTION_QUALITY DRI_CONF_FORCE_S3TC_ENABLE(false) @@ -88,10 +92,14 @@ PUBLIC const char __driConfigOptions[] = DRI_CONF_NO_RAST(false) DRI_CONF_ALWAYS_FLUSH_BATCH(false) DRI_CONF_ALWAYS_FLUSH_CACHE(false) + + DRI_CONF_OPT_BEGIN(stub_occlusion_query, bool, false) + DRI_CONF_DESC(en, "Enable stub ARB_occlusion_query support on 915/945.") + DRI_CONF_OPT_END DRI_CONF_SECTION_END DRI_CONF_END; -const GLuint __driNConfigOptions = 10; +const GLuint __driNConfigOptions = 12; #ifdef USE_NEW_INTERFACE static PFNGLXCREATECONTEXTMODES create_context_modes = NULL; diff --git a/src/mesa/drivers/dri/intel/intel_span.c b/src/mesa/drivers/dri/intel/intel_span.c index 28eabbc0054..dcfcad1d952 100644 --- a/src/mesa/drivers/dri/intel/intel_span.c +++ b/src/mesa/drivers/dri/intel/intel_span.c @@ -558,6 +558,43 @@ intelInitSpanFuncs(GLcontext * ctx) swdd->SpanRenderFinish = intelSpanRenderFinish; } +void +intel_map_vertex_shader_textures(GLcontext *ctx) +{ + struct intel_context *intel = intel_context(ctx); + int i; + + if (ctx->VertexProgram._Current == NULL) + return; + + for (i = 0; i < ctx->Const.MaxTextureImageUnits; i++) { + if (ctx->Texture.Unit[i]._ReallyEnabled && + ctx->VertexProgram._Current->Base.TexturesUsed[i] != 0) { + struct gl_texture_object *texObj = ctx->Texture.Unit[i]._Current; + + intel_tex_map_images(intel, intel_texture_object(texObj)); + } + } +} + +void +intel_unmap_vertex_shader_textures(GLcontext *ctx) +{ + struct intel_context *intel = intel_context(ctx); + int i; + + if (ctx->VertexProgram._Current == NULL) + return; + + for (i = 0; i < ctx->Const.MaxTextureImageUnits; i++) { + if (ctx->Texture.Unit[i]._ReallyEnabled && + ctx->VertexProgram._Current->Base.TexturesUsed[i] != 0) { + struct gl_texture_object *texObj = ctx->Texture.Unit[i]._Current; + + intel_tex_unmap_images(intel, intel_texture_object(texObj)); + } + } +} /** * Plug in appropriate span read/write functions for the given renderbuffer. diff --git a/src/mesa/drivers/dri/intel/intel_span.h b/src/mesa/drivers/dri/intel/intel_span.h index acbeb4abe1c..bffe109aa5b 100644 --- a/src/mesa/drivers/dri/intel/intel_span.h +++ b/src/mesa/drivers/dri/intel/intel_span.h @@ -36,5 +36,7 @@ void intel_renderbuffer_map(struct intel_context *intel, struct gl_renderbuffer *rb); void intel_renderbuffer_unmap(struct intel_context *intel, struct gl_renderbuffer *rb); +void intel_map_vertex_shader_textures(GLcontext *ctx); +void intel_unmap_vertex_shader_textures(GLcontext *ctx); #endif diff --git a/src/mesa/drivers/dri/intel/intel_tex.c b/src/mesa/drivers/dri/intel/intel_tex.c index df63f29a42c..3cbc379dbd3 100644 --- a/src/mesa/drivers/dri/intel/intel_tex.c +++ b/src/mesa/drivers/dri/intel/intel_tex.c @@ -2,6 +2,7 @@ #include "main/texobj.h" #include "main/teximage.h" #include "main/mipmap.h" +#include "drivers/common/meta.h" #include "intel_context.h" #include "intel_mipmap_tree.h" #include "intel_tex.h" @@ -158,11 +159,36 @@ timed_memcpy(void *dest, const void *src, size_t n) } #endif /* DO_DEBUG */ + +/** + * Called via ctx->Driver.GenerateMipmap() + * This is basically a wrapper for _mesa_meta_GenerateMipmap() which checks + * if we'll be using software mipmap generation. In that case, we need to + * map/unmap the base level texture image. + */ +static void +intelGenerateMipmap(GLcontext *ctx, GLenum target, + struct gl_texture_object *texObj) +{ + if (_mesa_meta_check_generate_mipmap_fallback(ctx, target, texObj)) { + /* sw path: need to map texture images */ + struct intel_context *intel = intel_context(ctx); + struct intel_texture_object *intelObj = intel_texture_object(texObj); + intel_tex_map_level_images(intel, intelObj, texObj->BaseLevel); + _mesa_generate_mipmap(ctx, target, texObj); + intel_tex_unmap_level_images(intel, intelObj, texObj->BaseLevel); + } + else { + _mesa_meta_GenerateMipmap(ctx, target, texObj); + } +} + + void intelInitTextureFuncs(struct dd_function_table *functions) { functions->ChooseTextureFormat = intelChooseTextureFormat; - functions->GenerateMipmap = intel_generate_mipmap; + functions->GenerateMipmap = intelGenerateMipmap; functions->NewTextureObject = intelNewTextureObject; functions->NewTextureImage = intelNewTextureImage; diff --git a/src/mesa/drivers/dri/intel/intel_tex.h b/src/mesa/drivers/dri/intel/intel_tex.h index 471aa2a240b..57ed0b1aabd 100644 --- a/src/mesa/drivers/dri/intel/intel_tex.h +++ b/src/mesa/drivers/dri/intel/intel_tex.h @@ -71,7 +71,4 @@ void intel_tex_unmap_images(struct intel_context *intel, int intel_compressed_num_bytes(GLuint mesaFormat); -void intel_generate_mipmap(GLcontext *ctx, GLenum target, - struct gl_texture_object *texObj); - #endif diff --git a/src/mesa/drivers/dri/intel/intel_tex_copy.c b/src/mesa/drivers/dri/intel/intel_tex_copy.c index ac557a92005..8f467e65144 100644 --- a/src/mesa/drivers/dri/intel/intel_tex_copy.c +++ b/src/mesa/drivers/dri/intel/intel_tex_copy.c @@ -75,6 +75,7 @@ get_teximage_source(struct intel_context *intel, GLenum internalFormat) case GL_RGBA: case GL_RGBA8: case GL_RGB: + case GL_RGB8: return intel_readbuf_region(intel); default: return NULL; diff --git a/src/mesa/drivers/dri/r300/r300_fragprog_common.c b/src/mesa/drivers/dri/r300/r300_fragprog_common.c index 7f8a0174595..39618b78dc6 100644 --- a/src/mesa/drivers/dri/r300/r300_fragprog_common.c +++ b/src/mesa/drivers/dri/r300/r300_fragprog_common.c @@ -100,8 +100,8 @@ static void insert_WPOS_trailer(struct r300_fragment_program_compiler *compiler, { int i; + fp->wpos_attr = FRAG_ATTRIB_MAX; if (!(compiler->Base.Program.InputsRead & FRAG_BIT_WPOS)) { - fp->wpos_attr = FRAG_ATTRIB_MAX; return; } @@ -113,6 +113,13 @@ static void insert_WPOS_trailer(struct r300_fragment_program_compiler *compiler, } } + /* No free texcoord found, fall-back to software rendering */ + if (fp->wpos_attr == FRAG_ATTRIB_MAX) + { + compiler->Base.Error = 1; + return; + } + rc_transform_fragment_wpos(&compiler->Base, FRAG_ATTRIB_WPOS, fp->wpos_attr); } @@ -128,8 +135,8 @@ static void rewriteFog(struct r300_fragment_program_compiler *compiler, struct r struct rc_src_register src; int i; + fp->fog_attr = FRAG_ATTRIB_MAX; if (!(compiler->Base.Program.InputsRead & FRAG_BIT_FOGC)) { - fp->fog_attr = FRAG_ATTRIB_MAX; return; } @@ -141,6 +148,13 @@ static void rewriteFog(struct r300_fragment_program_compiler *compiler, struct r } } + /* No free texcoord found, fall-back to software rendering */ + if (fp->fog_attr == FRAG_ATTRIB_MAX) + { + compiler->Base.Error = 1; + return; + } + memset(&src, 0, sizeof(src)); src.File = RC_FILE_INPUT; src.Index = fp->fog_attr; diff --git a/src/mesa/drivers/dri/r300/r300_render.c b/src/mesa/drivers/dri/r300/r300_render.c index b5ddfdc9f82..3cd38753b8a 100644 --- a/src/mesa/drivers/dri/r300/r300_render.c +++ b/src/mesa/drivers/dri/r300/r300_render.c @@ -475,7 +475,7 @@ void r300SwitchFallback(GLcontext *ctx, uint32_t bit, GLboolean mode) /* update only if we have disabled all tcl fallbacks */ if (rmesa->options.hw_tcl_enabled) { - if ((old_fallback & R300_RASTER_FALLBACK_MASK) == bit) { + if ((old_fallback & R300_TCL_FALLBACK_MASK) == bit) { R300_STATECHANGE(rmesa, vap_cntl_status); rmesa->hw.vap_cntl_status.cmd[1] &= ~R300_VAP_TCL_BYPASS; } diff --git a/src/mesa/drivers/dri/r300/r300_vertprog.c b/src/mesa/drivers/dri/r300/r300_vertprog.c index 8ad75fb5c06..fb8d6bceda8 100644 --- a/src/mesa/drivers/dri/r300/r300_vertprog.c +++ b/src/mesa/drivers/dri/r300/r300_vertprog.c @@ -43,6 +43,7 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. #include "compiler/radeon_compiler.h" #include "radeon_mesa_to_rc.h" #include "r300_context.h" +#include "r300_fragprog_common.h" #include "r300_state.h" /** @@ -298,6 +299,20 @@ struct r300_vertex_program * r300SelectAndTranslateVertexShader(GLcontext *ctx) struct r300_vertex_program *vp; vpc = (struct r300_vertex_program_cont *)ctx->VertexProgram._Current; + + if (!r300->selected_fp) { + /* This can happen when GetProgramiv is called to check + * whether the program runs natively. + * + * To be honest, this is not a very good solution, + * but solving the problem of reporting good values + * for those queries is tough anyway considering that + * we recompile vertex programs based on the precise + * fragment program that is in use. + */ + r300SelectAndTranslateFragmentShader(ctx); + } + wanted_key.FpReads = r300->selected_fp->InputsRead; wanted_key.FogAttr = r300->selected_fp->fog_attr; wanted_key.WPosAttr = r300->selected_fp->wpos_attr; diff --git a/src/mesa/drivers/dri/r600/Makefile b/src/mesa/drivers/dri/r600/Makefile index 36bf773c054..7d5a7b1ab6f 100644 --- a/src/mesa/drivers/dri/r600/Makefile +++ b/src/mesa/drivers/dri/r600/Makefile @@ -29,6 +29,7 @@ COMMON_SOURCES = \ RADEON_COMMON_SOURCES = \ radeon_bo_legacy.c \ radeon_common_context.c \ + radeon_buffer_objects.c \ radeon_common.c \ radeon_cs_legacy.c \ radeon_dma.c \ diff --git a/src/mesa/drivers/dri/r600/r600_context.c b/src/mesa/drivers/dri/r600/r600_context.c index f8fd9c13d77..969144ba123 100644 --- a/src/mesa/drivers/dri/r600/r600_context.c +++ b/src/mesa/drivers/dri/r600/r600_context.c @@ -59,6 +59,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "radeon_debug.h" #include "r600_context.h" #include "radeon_common_context.h" +#include "radeon_buffer_objects.h" #include "radeon_span.h" #include "r600_cmdbuf.h" #include "r600_emit.h" @@ -257,6 +258,7 @@ GLboolean r600CreateContext(const __GLcontextModes * glVisual, r600InitTextureFuncs(&functions); r700InitShaderFuncs(&functions); r700InitIoctlFuncs(&functions); + radeonInitBufferObjectFuncs(&functions); if (!radeonInitContext(&r600->radeon, &functions, glVisual, driContextPriv, @@ -284,8 +286,8 @@ GLboolean r600CreateContext(const __GLcontextModes * glVisual, ctx->Const.MaxTextureMaxAnisotropy = 16.0; ctx->Const.MaxTextureLodBias = 16.0; - ctx->Const.MaxTextureLevels = 13; - ctx->Const.MaxTextureRectSize = 4096; + ctx->Const.MaxTextureLevels = 13; /* hw support 14 */ + ctx->Const.MaxTextureRectSize = 4096; /* hw support 8192 */ ctx->Const.MinPointSize = 0x0001 / 8.0; ctx->Const.MinPointSizeAA = 0x0001 / 8.0; @@ -330,26 +332,27 @@ GLboolean r600CreateContext(const __GLcontextModes * glVisual, _tnl_allow_pixel_fog(ctx, GL_FALSE); _tnl_allow_vertex_fog(ctx, GL_TRUE); - /* currently bogus data */ - ctx->Const.VertexProgram.MaxInstructions = VSF_MAX_FRAGMENT_LENGTH / 4; - ctx->Const.VertexProgram.MaxNativeInstructions = - VSF_MAX_FRAGMENT_LENGTH / 4; - ctx->Const.VertexProgram.MaxNativeAttribs = 16; /* r420 */ - ctx->Const.VertexProgram.MaxTemps = 32; - ctx->Const.VertexProgram.MaxNativeTemps = - /*VSF_MAX_FRAGMENT_TEMPS */ 32; - ctx->Const.VertexProgram.MaxNativeParameters = 256; /* r420 */ - ctx->Const.VertexProgram.MaxNativeAddressRegs = 1; - - ctx->Const.FragmentProgram.MaxNativeTemps = PFS_NUM_TEMP_REGS; - ctx->Const.FragmentProgram.MaxNativeAttribs = 11; /* copy i915... */ - ctx->Const.FragmentProgram.MaxNativeParameters = PFS_NUM_CONST_REGS; - ctx->Const.FragmentProgram.MaxNativeAluInstructions = PFS_MAX_ALU_INST; - ctx->Const.FragmentProgram.MaxNativeTexInstructions = PFS_MAX_TEX_INST; - ctx->Const.FragmentProgram.MaxNativeInstructions = - PFS_MAX_ALU_INST + PFS_MAX_TEX_INST; - ctx->Const.FragmentProgram.MaxNativeTexIndirections = - PFS_MAX_TEX_INDIRECT; + /* 256 for reg-based consts, inline consts also supported */ + ctx->Const.VertexProgram.MaxInstructions = 8192; /* in theory no limit */ + ctx->Const.VertexProgram.MaxNativeInstructions = 8192; + ctx->Const.VertexProgram.MaxNativeAttribs = 160; + ctx->Const.VertexProgram.MaxTemps = 128; + ctx->Const.VertexProgram.MaxNativeTemps = 128; + ctx->Const.VertexProgram.MaxNativeParameters = 256; + ctx->Const.VertexProgram.MaxNativeAddressRegs = 1; /* ??? */ + + ctx->Const.FragmentProgram.MaxNativeTemps = 128; + ctx->Const.FragmentProgram.MaxNativeAttribs = 32; + ctx->Const.FragmentProgram.MaxNativeParameters = 256; + ctx->Const.FragmentProgram.MaxNativeAluInstructions = 8192; + /* 8 per clause on r6xx, 16 on rv670/r7xx */ + if ((screen->chip_family == CHIP_FAMILY_RV670) || + (screen->chip_family >= CHIP_FAMILY_RV770)) + ctx->Const.FragmentProgram.MaxNativeTexInstructions = 16; + else + ctx->Const.FragmentProgram.MaxNativeTexInstructions = 8; + ctx->Const.FragmentProgram.MaxNativeInstructions = 8192; + ctx->Const.FragmentProgram.MaxNativeTexIndirections = 8; /* ??? */ ctx->Const.FragmentProgram.MaxNativeAddressRegs = 0; /* and these are?? */ ctx->VertexProgram._MaintainTnlProgram = GL_TRUE; ctx->FragmentProgram._MaintainTexEnvProgram = GL_TRUE; @@ -374,6 +377,8 @@ GLboolean r600CreateContext(const __GLcontextModes * glVisual, _mesa_enable_extension(ctx, "GL_EXT_texture_compression_s3tc"); } + r700InitDraw(ctx); + radeon_fbo_init(&r600->radeon); radeonInitSpanFuncs( ctx ); diff --git a/src/mesa/drivers/dri/r600/r600_context.h b/src/mesa/drivers/dri/r600/r600_context.h index c59df7505af..7f68820fda1 100644 --- a/src/mesa/drivers/dri/r600/r600_context.h +++ b/src/mesa/drivers/dri/r600/r600_context.h @@ -86,29 +86,10 @@ extern int hw_tcl_on; #include "tnl_dd/t_dd_vertex.h" #undef TAG -#define PFS_MAX_ALU_INST 64 -#define PFS_MAX_TEX_INST 64 -#define PFS_MAX_TEX_INDIRECT 4 -#define PFS_NUM_TEMP_REGS 32 -#define PFS_NUM_CONST_REGS 16 - -#define R600_MAX_AOS_ARRAYS 16 - -#define REG_COORDS 0 -#define REG_COLOR0 1 -#define REG_TEX0 2 - #define R600_FALLBACK_NONE 0 #define R600_FALLBACK_TCL 1 #define R600_FALLBACK_RAST 2 -enum -{ - NO_SHIFT = 0, - LEFT_SHIFT = 1, - RIGHT_SHIFT = 2, -}; - struct r600_hw_state { struct radeon_state_atom sq; struct radeon_state_atom db; @@ -145,6 +126,32 @@ struct r600_hw_state { struct radeon_state_atom tx_brdr_clr; }; +typedef struct StreamDesc +{ + GLint size; //number of data element + GLenum type; //data element type + GLsizei stride; + + struct radeon_bo *bo; + GLint bo_offset; + + GLuint dwords; + GLuint dst_loc; + GLuint _signed; + GLboolean normalize; + GLboolean is_named_bo; + GLubyte element; +} StreamDesc; + +typedef struct r700_index_buffer +{ + struct radeon_bo *bo; + int bo_offset; + + GLboolean is_32bit; + GLuint count; +} r700_index_buffer; + /** * \brief R600 context structure. */ @@ -163,6 +170,9 @@ struct r600_context { GLvector4f dummy_attrib[_TNL_ATTRIB_MAX]; GLvector4f *temp_attrib[_TNL_ATTRIB_MAX]; + GLint nNumActiveAos; + StreamDesc stream_desc[VERT_ATTRIB_MAX]; + struct r700_index_buffer ind_buf; }; #define R700_CONTEXT(ctx) ((context_t *)(ctx->DriverCtx)) @@ -196,6 +206,7 @@ extern GLboolean r700SyncSurf(context_t *context, extern void r700SetupStreams(GLcontext * ctx); extern void r700Start3D(context_t *context); extern void r600InitAtoms(context_t *context); +extern void r700InitDraw(GLcontext *ctx); #define RADEON_D_CAPTURE 0 #define RADEON_D_PLAYBACK 1 diff --git a/src/mesa/drivers/dri/r600/r700_assembler.c b/src/mesa/drivers/dri/r600/r700_assembler.c index efeccb25f1e..903b6968be1 100644 --- a/src/mesa/drivers/dri/r600/r700_assembler.c +++ b/src/mesa/drivers/dri/r600/r700_assembler.c @@ -213,7 +213,7 @@ GLboolean is_reduction_opcode(PVSDWORD* dest) { if (dest->dst.op3 == 0) { - if ( (dest->dst.opcode == SQ_OP2_INST_DOT4 || dest->dst.opcode == SQ_OP2_INST_DOT4_IEEE) ) + if ( (dest->dst.opcode == SQ_OP2_INST_DOT4 || dest->dst.opcode == SQ_OP2_INST_DOT4_IEEE || dest->dst.opcode == SQ_OP2_INST_CUBE) ) { return GL_TRUE; } @@ -350,6 +350,7 @@ unsigned int r700GetNumOperands(r700_AssemblerBase* pAsm) case SQ_OP2_INST_PRED_SETNE: case SQ_OP2_INST_DOT4: case SQ_OP2_INST_DOT4_IEEE: + case SQ_OP2_INST_CUBE: return 2; case SQ_OP2_INST_MOV: @@ -469,6 +470,9 @@ int Init_r700_AssemblerBase(SHADER_PIPE_TYPE spt, r700_AssemblerBase* pAsm, R700 pAsm->number_of_inputs = 0; + pAsm->is_tex = GL_FALSE; + pAsm->need_tex_barrier = GL_FALSE; + return 0; } @@ -682,7 +686,7 @@ GLboolean add_tex_instruction(r700_AssemblerBase* pAsm, // If this clause constains any TEX instruction that is dependent on a previous instruction, // set the barrier bit - if( pAsm->pInstDeps[pAsm->uiCurInst].nDstDep > (-1) ) + if( pAsm->pInstDeps[pAsm->uiCurInst].nDstDep > (-1) || pAsm->need_tex_barrier == GL_TRUE ) { pAsm->cf_current_tex_clause_ptr->m_Word1.f.barrier = 0x1; } @@ -786,6 +790,133 @@ GLboolean assemble_vfetch_instruction(r700_AssemblerBase* pAsm, return GL_TRUE; } +GLboolean assemble_vfetch_instruction2(r700_AssemblerBase* pAsm, + GLuint destination_register, + GLenum type, + GLint size, + GLubyte element, + GLuint _signed, + GLboolean normalize, + VTX_FETCH_METHOD * pFetchMethod) +{ + GLuint client_size_inbyte; + GLuint data_format; + GLuint mega_fetch_count; + GLuint is_mega_fetch_flag; + + R700VertexGenericFetch* vfetch_instruction_ptr; + R700VertexGenericFetch* assembled_vfetch_instruction_ptr + = pAsm->vfetch_instruction_ptr_array[element]; + + if (assembled_vfetch_instruction_ptr == NULL) + { + vfetch_instruction_ptr = (R700VertexGenericFetch*) CALLOC_STRUCT(R700VertexGenericFetch); + if (vfetch_instruction_ptr == NULL) + { + return GL_FALSE; + } + Init_R700VertexGenericFetch(vfetch_instruction_ptr); + } + else + { + vfetch_instruction_ptr = assembled_vfetch_instruction_ptr; + } + + data_format = GetSurfaceFormat(type, size, &client_size_inbyte); + + if(GL_TRUE == pFetchMethod->bEnableMini) //More conditions here + { + //TODO : mini fetch + } + else + { + mega_fetch_count = MEGA_FETCH_BYTES - 1; + is_mega_fetch_flag = 0x1; + pFetchMethod->mega_fetch_remainder = MEGA_FETCH_BYTES - client_size_inbyte; + } + + vfetch_instruction_ptr->m_Word0.f.vtx_inst = SQ_VTX_INST_FETCH; + vfetch_instruction_ptr->m_Word0.f.fetch_type = SQ_VTX_FETCH_VERTEX_DATA; + vfetch_instruction_ptr->m_Word0.f.fetch_whole_quad = 0x0; + + vfetch_instruction_ptr->m_Word0.f.buffer_id = element; + vfetch_instruction_ptr->m_Word0.f.src_gpr = 0x0; + vfetch_instruction_ptr->m_Word0.f.src_rel = SQ_ABSOLUTE; + vfetch_instruction_ptr->m_Word0.f.src_sel_x = SQ_SEL_X; + vfetch_instruction_ptr->m_Word0.f.mega_fetch_count = mega_fetch_count; + + vfetch_instruction_ptr->m_Word1.f.dst_sel_x = (size < 1) ? SQ_SEL_0 : SQ_SEL_X; + vfetch_instruction_ptr->m_Word1.f.dst_sel_y = (size < 2) ? SQ_SEL_0 : SQ_SEL_Y; + vfetch_instruction_ptr->m_Word1.f.dst_sel_z = (size < 3) ? SQ_SEL_0 : SQ_SEL_Z; + vfetch_instruction_ptr->m_Word1.f.dst_sel_w = (size < 4) ? SQ_SEL_1 : SQ_SEL_W; + + vfetch_instruction_ptr->m_Word1.f.use_const_fields = 1; + vfetch_instruction_ptr->m_Word1.f.data_format = data_format; + vfetch_instruction_ptr->m_Word2.f.endian_swap = SQ_ENDIAN_NONE; + + if(1 == _signed) + { + vfetch_instruction_ptr->m_Word1.f.format_comp_all = SQ_FORMAT_COMP_SIGNED; + } + else + { + vfetch_instruction_ptr->m_Word1.f.format_comp_all = SQ_FORMAT_COMP_UNSIGNED; + } + + if(GL_TRUE == normalize) + { + vfetch_instruction_ptr->m_Word1.f.num_format_all = SQ_NUM_FORMAT_NORM; + } + else + { + vfetch_instruction_ptr->m_Word1.f.num_format_all = SQ_NUM_FORMAT_INT; + } + + // Destination register + vfetch_instruction_ptr->m_Word1_GPR.f.dst_gpr = destination_register; + vfetch_instruction_ptr->m_Word1_GPR.f.dst_rel = SQ_ABSOLUTE; + + vfetch_instruction_ptr->m_Word2.f.offset = 0; + vfetch_instruction_ptr->m_Word2.f.const_buf_no_stride = 0x0; + + vfetch_instruction_ptr->m_Word2.f.mega_fetch = is_mega_fetch_flag; + + if (assembled_vfetch_instruction_ptr == NULL) + { + if ( GL_FALSE == add_vfetch_instruction(pAsm, (R700VertexInstruction *)vfetch_instruction_ptr) ) + { + return GL_FALSE; + } + + if (pAsm->vfetch_instruction_ptr_array[element] != NULL) + { + return GL_FALSE; + } + else + { + pAsm->vfetch_instruction_ptr_array[element] = vfetch_instruction_ptr; + } + } + + return GL_TRUE; +} + +GLboolean cleanup_vfetch_instructions(r700_AssemblerBase* pAsm) +{ + GLint i; + pAsm->cf_current_clause_type = CF_EMPTY_CLAUSE; + pAsm->cf_current_vtx_clause_ptr = NULL; + + for (i=0; i<VERT_ATTRIB_MAX; i++) + { + pAsm->vfetch_instruction_ptr_array[ i ] = NULL; + } + + cleanup_vfetch_shaderinst(pAsm->pR700Shader); + + return GL_TRUE; +} + GLuint gethelpr(r700_AssemblerBase* pAsm) { GLuint r = pAsm->uHelpReg; @@ -1152,42 +1283,48 @@ GLboolean tex_src(r700_AssemblerBase *pAsm) GLboolean bValidTexCoord = GL_FALSE; + if(pAsm->aArgSubst[1] >= 0) + { + bValidTexCoord = GL_TRUE; + setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); + pAsm->S[0].src.rtype = SRC_REG_TEMPORARY; + pAsm->S[0].src.reg = pAsm->aArgSubst[1]; + } + else + { switch (pILInst->SrcReg[0].File) { - case PROGRAM_CONSTANT: - case PROGRAM_LOCAL_PARAM: - case PROGRAM_ENV_PARAM: - case PROGRAM_STATE_VAR: - bValidTexCoord = GL_TRUE; - setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); - pAsm->S[0].src.rtype = SRC_REG_TEMPORARY; - pAsm->S[0].src.reg = pAsm->aArgSubst[1]; - break; - case PROGRAM_TEMPORARY: - bValidTexCoord = GL_TRUE; - pAsm->S[0].src.reg = pILInst->SrcReg[0].Index + - pAsm->starting_temp_register_number; - pAsm->S[0].src.rtype = SRC_REG_TEMPORARY; - break; - case PROGRAM_INPUT: - switch (pILInst->SrcReg[0].Index) - { - case FRAG_ATTRIB_COL0: - case FRAG_ATTRIB_COL1: - case FRAG_ATTRIB_TEX0: - case FRAG_ATTRIB_TEX1: - case FRAG_ATTRIB_TEX2: - case FRAG_ATTRIB_TEX3: - case FRAG_ATTRIB_TEX4: - case FRAG_ATTRIB_TEX5: - case FRAG_ATTRIB_TEX6: - case FRAG_ATTRIB_TEX7: - bValidTexCoord = GL_TRUE; - pAsm->S[0].src.reg = - pAsm->uiFP_AttributeMap[pILInst->SrcReg[0].Index]; - pAsm->S[0].src.rtype = SRC_REG_INPUT; - break; - } - break; + case PROGRAM_CONSTANT: + case PROGRAM_LOCAL_PARAM: + case PROGRAM_ENV_PARAM: + case PROGRAM_STATE_VAR: + break; + case PROGRAM_TEMPORARY: + bValidTexCoord = GL_TRUE; + pAsm->S[0].src.reg = pILInst->SrcReg[0].Index + + pAsm->starting_temp_register_number; + pAsm->S[0].src.rtype = SRC_REG_TEMPORARY; + break; + case PROGRAM_INPUT: + switch (pILInst->SrcReg[0].Index) + { + case FRAG_ATTRIB_COL0: + case FRAG_ATTRIB_COL1: + case FRAG_ATTRIB_TEX0: + case FRAG_ATTRIB_TEX1: + case FRAG_ATTRIB_TEX2: + case FRAG_ATTRIB_TEX3: + case FRAG_ATTRIB_TEX4: + case FRAG_ATTRIB_TEX5: + case FRAG_ATTRIB_TEX6: + case FRAG_ATTRIB_TEX7: + bValidTexCoord = GL_TRUE; + pAsm->S[0].src.reg = + pAsm->uiFP_AttributeMap[pILInst->SrcReg[0].Index]; + pAsm->S[0].src.rtype = SRC_REG_INPUT; + break; + } + break; + } } if(GL_TRUE == bValidTexCoord) @@ -1941,9 +2078,9 @@ GLboolean assemble_alu_instruction(r700_AssemblerBase *pAsm) GLuint contiguous_slots_needed; GLuint uNumSrc = r700GetNumOperands(pAsm); - GLuint channel_swizzle, j; - GLuint chan_counter[4] = {0, 0, 0, 0}; - PVSSRC * pSource[3]; + //GLuint channel_swizzle, j; + //GLuint chan_counter[4] = {0, 0, 0, 0}; + //PVSSRC * pSource[3]; GLboolean bSplitInst = GL_FALSE; if (1 == pAsm->D.dst.math) @@ -1955,7 +2092,9 @@ GLboolean assemble_alu_instruction(r700_AssemblerBase *pAsm) { is_single_scalar_operation = GL_FALSE; number_of_scalar_operations = 4; - + +/* current assembler doesn't do more than 1 register per source */ +#if 0 /* check read port, only very preliminary algorithm, not count in src0/1 same comp case and prev slot repeat case; also not count relative addressing. TODO: improve performance. */ @@ -1990,6 +2129,7 @@ GLboolean assemble_alu_instruction(r700_AssemblerBase *pAsm) { bSplitInst = GL_TRUE; } +#endif } contiguous_slots_needed = 0; @@ -2024,7 +2164,7 @@ GLboolean assemble_alu_instruction(r700_AssemblerBase *pAsm) return GL_FALSE; } - if (pAsm->D.dst.math == 0) + if (uNumSrc > 1) { // Process source 1 current_source_index = 1; @@ -2210,9 +2350,7 @@ GLboolean next_ins(r700_AssemblerBase *pAsm) { struct prog_instruction *pILInst = &(pAsm->pILInst[pAsm->uiCurInst]); - if( GL_TRUE == IsTex(pILInst->Opcode) && - /* handle const moves to temp register */ - !(pAsm->D.dst.opcode == SQ_OP2_INST_MOV) ) + if( GL_TRUE == pAsm->is_tex ) { if (pILInst->TexSrcTarget == TEXTURE_RECT_INDEX) { if( GL_FALSE == assemble_tex_instruction(pAsm, GL_FALSE) ) @@ -2256,7 +2394,8 @@ GLboolean next_ins(r700_AssemblerBase *pAsm) pAsm->S[0].bits = 0; pAsm->S[1].bits = 0; pAsm->S[2].bits = 0; - + pAsm->is_tex = GL_FALSE; + pAsm->need_tex_barrier = GL_FALSE; return GL_TRUE; } @@ -2880,6 +3019,11 @@ GLboolean assemble_LIT(r700_AssemblerBase *pAsm) return GL_FALSE; } + if( GL_FALSE == assemble_src(pAsm, 0, -1) ) + { + return GL_FALSE; + } + /* dst.y = max(src.x, 0.0) */ pAsm->D.dst.opcode = SQ_OP2_INST_MAX; pAsm->D.dst.rtype = dstType; @@ -2891,11 +3035,6 @@ GLboolean assemble_LIT(r700_AssemblerBase *pAsm) pAsm->S[0].src.rtype = srcType; pAsm->S[0].src.reg = srcReg; setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); - noneg_PVSSRC(&(pAsm->S[0].src)); - pAsm->S[0].src.swizzlex = SQ_SEL_X; - pAsm->S[0].src.swizzley = SQ_SEL_X; - pAsm->S[0].src.swizzlez = SQ_SEL_X; - pAsm->S[0].src.swizzlew = SQ_SEL_X; pAsm->S[1].src.rtype = SRC_REG_TEMPORARY; pAsm->S[1].src.reg = tmp; setaddrmode_PVSSRC(&(pAsm->S[1].src), ADDR_ABSOLUTE); @@ -2909,34 +3048,47 @@ GLboolean assemble_LIT(r700_AssemblerBase *pAsm) return GL_FALSE; } - /* before: dst.w = log(src.y) - * after : dst.x = log(src.y) - * why change dest register is that dst.w has been initialized as 1 before - */ + if( GL_FALSE == assemble_src(pAsm, 0, -1) ) + { + return GL_FALSE; + } + + swizzleagain_PVSSRC(&(pAsm->S[0].src), SQ_SEL_Y, SQ_SEL_Y, SQ_SEL_Y, SQ_SEL_Y); + + /* dst.z = log(src.y) */ pAsm->D.dst.opcode = SQ_OP2_INST_LOG_CLAMPED; pAsm->D.dst.math = 1; pAsm->D.dst.rtype = dstType; pAsm->D.dst.reg = dstReg; - pAsm->D.dst.writex = 1; + pAsm->D.dst.writex = 0; pAsm->D.dst.writey = 0; - pAsm->D.dst.writez = 0; + pAsm->D.dst.writez = 1; pAsm->D.dst.writew = 0; pAsm->S[0].src.rtype = srcType; pAsm->S[0].src.reg = srcReg; setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); - noneg_PVSSRC(&(pAsm->S[0].src)); - pAsm->S[0].src.swizzlex = SQ_SEL_Y; - pAsm->S[0].src.swizzley = SQ_SEL_Y; - pAsm->S[0].src.swizzlez = SQ_SEL_Y; - pAsm->S[0].src.swizzlew = SQ_SEL_Y; if( GL_FALSE == next_ins(pAsm) ) { return GL_FALSE; } - /* before: tmp.x = amd MUL_LIT(src.w, dst.w, src.x ) */ - /* after : tmp.x = amd MUL_LIT(src.w, dst.x, src.x ) */ + if( GL_FALSE == assemble_src(pAsm, 0, -1) ) + { + return GL_FALSE; + } + + if( GL_FALSE == assemble_src(pAsm, 0, 2) ) + { + return GL_FALSE; + } + + swizzleagain_PVSSRC(&(pAsm->S[0].src), SQ_SEL_W, SQ_SEL_W, SQ_SEL_W, SQ_SEL_W); + + swizzleagain_PVSSRC(&(pAsm->S[2].src), SQ_SEL_X, SQ_SEL_X, SQ_SEL_X, SQ_SEL_X); + + /* tmp.x = amd MUL_LIT(src.w, dst.z, src.x ) */ pAsm->D.dst.opcode = SQ_OP3_INST_MUL_LIT; + pAsm->D.dst.math = 1; pAsm->D.dst.op3 = 1; pAsm->D.dst.rtype = DST_REG_TEMPORARY; pAsm->D.dst.reg = tmp; @@ -2948,29 +3100,19 @@ GLboolean assemble_LIT(r700_AssemblerBase *pAsm) pAsm->S[0].src.rtype = srcType; pAsm->S[0].src.reg = srcReg; setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); - noneg_PVSSRC(&(pAsm->S[0].src)); - pAsm->S[0].src.swizzlex = SQ_SEL_W; - pAsm->S[0].src.swizzley = SQ_SEL_W; - pAsm->S[0].src.swizzlez = SQ_SEL_W; - pAsm->S[0].src.swizzlew = SQ_SEL_W; pAsm->S[1].src.rtype = SRC_REG_TEMPORARY; pAsm->S[1].src.reg = dstReg; setaddrmode_PVSSRC(&(pAsm->S[1].src), ADDR_ABSOLUTE); noneg_PVSSRC(&(pAsm->S[1].src)); - pAsm->S[1].src.swizzlex = SQ_SEL_X; - pAsm->S[1].src.swizzley = SQ_SEL_X; - pAsm->S[1].src.swizzlez = SQ_SEL_X; - pAsm->S[1].src.swizzlew = SQ_SEL_X; + pAsm->S[1].src.swizzlex = SQ_SEL_Z; + pAsm->S[1].src.swizzley = SQ_SEL_Z; + pAsm->S[1].src.swizzlez = SQ_SEL_Z; + pAsm->S[1].src.swizzlew = SQ_SEL_Z; pAsm->S[2].src.rtype = srcType; pAsm->S[2].src.reg = srcReg; setaddrmode_PVSSRC(&(pAsm->S[2].src), ADDR_ABSOLUTE); - noneg_PVSSRC(&(pAsm->S[2].src)); - pAsm->S[2].src.swizzlex = SQ_SEL_X; - pAsm->S[2].src.swizzley = SQ_SEL_X; - pAsm->S[2].src.swizzlez = SQ_SEL_X; - pAsm->S[2].src.swizzlew = SQ_SEL_X; if( GL_FALSE == next_ins(pAsm) ) { @@ -3376,7 +3518,10 @@ GLboolean assemble_STP(r700_AssemblerBase *pAsm) GLboolean assemble_TEX(r700_AssemblerBase *pAsm) { GLboolean src_const; + GLboolean need_barrier = GL_FALSE; + checkop1(pAsm); + switch (pAsm->pILInst[pAsm->uiCurInst].SrcReg[0].File) { case PROGRAM_CONSTANT: @@ -3396,20 +3541,18 @@ GLboolean assemble_TEX(r700_AssemblerBase *pAsm) { if ( GL_FALSE == mov_temp(pAsm, 0) ) return GL_FALSE; + need_barrier = GL_TRUE; } switch (pAsm->pILInst[pAsm->uiCurInst].Opcode) { case OPCODE_TEX: - pAsm->D.dst.opcode = SQ_TEX_INST_SAMPLE; break; case OPCODE_TXB: radeon_error("do not support TXB yet\n"); return GL_FALSE; break; case OPCODE_TXP: - /* TODO : tex proj version : divid first 3 components by 4th */ - pAsm->D.dst.opcode = SQ_TEX_INST_SAMPLE; break; default: radeon_error("Internal error: bad texture op (not TEX)\n"); @@ -3417,6 +3560,190 @@ GLboolean assemble_TEX(r700_AssemblerBase *pAsm) break; } + if (pAsm->pILInst[pAsm->uiCurInst].Opcode == OPCODE_TXP) + { + GLuint tmp = gethelpr(pAsm); + pAsm->D.dst.opcode = SQ_OP2_INST_RECIP_IEEE; + pAsm->D.dst.math = 1; + setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); + pAsm->D.dst.rtype = DST_REG_TEMPORARY; + pAsm->D.dst.reg = tmp; + pAsm->D.dst.writew = 1; + + if( GL_FALSE == assemble_src(pAsm, 0, -1) ) + { + return GL_FALSE; + } + swizzleagain_PVSSRC(&(pAsm->S[0].src), SQ_SEL_W, SQ_SEL_W, SQ_SEL_W, SQ_SEL_W); + if( GL_FALSE == next_ins(pAsm) ) + { + return GL_FALSE; + } + + pAsm->D.dst.opcode = SQ_OP2_INST_MUL; + setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); + pAsm->D.dst.rtype = DST_REG_TEMPORARY; + pAsm->D.dst.reg = tmp; + pAsm->D.dst.writex = 1; + pAsm->D.dst.writey = 1; + pAsm->D.dst.writez = 1; + pAsm->D.dst.writew = 0; + + if( GL_FALSE == assemble_src(pAsm, 0, -1) ) + { + return GL_FALSE; + } + setaddrmode_PVSSRC(&(pAsm->S[1].src), ADDR_ABSOLUTE); + pAsm->S[1].src.rtype = SRC_REG_TEMPORARY; + pAsm->S[1].src.reg = tmp; + setswizzle_PVSSRC(&(pAsm->S[1].src), SQ_SEL_W); + + if( GL_FALSE == next_ins(pAsm) ) + { + return GL_FALSE; + } + + pAsm->aArgSubst[1] = tmp; + need_barrier = GL_TRUE; + } + + if (pAsm->pILInst[pAsm->uiCurInst].TexSrcTarget == TEXTURE_CUBE_INDEX ) + { + GLuint tmp1 = gethelpr(pAsm); + GLuint tmp2 = gethelpr(pAsm); + + /* tmp1.xyzw = CUBE(R0.zzxy, R0.yxzz) */ + pAsm->D.dst.opcode = SQ_OP2_INST_CUBE; + setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); + pAsm->D.dst.rtype = DST_REG_TEMPORARY; + pAsm->D.dst.reg = tmp1; + nomask_PVSDST(&(pAsm->D.dst)); + + if( GL_FALSE == assemble_src(pAsm, 0, -1) ) + { + return GL_FALSE; + } + + if( GL_FALSE == assemble_src(pAsm, 0, 1) ) + { + return GL_FALSE; + } + + swizzleagain_PVSSRC(&(pAsm->S[0].src), SQ_SEL_Z, SQ_SEL_Z, SQ_SEL_X, SQ_SEL_Y); + swizzleagain_PVSSRC(&(pAsm->S[1].src), SQ_SEL_Y, SQ_SEL_X, SQ_SEL_Z, SQ_SEL_Z); + + if( GL_FALSE == next_ins(pAsm) ) + { + return GL_FALSE; + } + + /* tmp1.z = ABS(tmp1.z) dont have abs support in assembler currently + * have to do explicit instruction + */ + pAsm->D.dst.opcode = SQ_OP2_INST_MAX; + setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); + pAsm->D.dst.rtype = DST_REG_TEMPORARY; + pAsm->D.dst.reg = tmp1; + pAsm->D.dst.writez = 1; + + setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); + pAsm->S[0].src.rtype = SRC_REG_TEMPORARY; + pAsm->S[0].src.reg = tmp1; + noswizzle_PVSSRC(&(pAsm->S[0].src)); + pAsm->S[1].bits = pAsm->S[0].bits; + flipneg_PVSSRC(&(pAsm->S[1].src)); + + next_ins(pAsm); + + /* tmp1.z = RCP_e(|tmp1.z|) */ + pAsm->D.dst.opcode = SQ_OP2_INST_RECIP_IEEE; + pAsm->D.dst.math = 1; + setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); + pAsm->D.dst.rtype = DST_REG_TEMPORARY; + pAsm->D.dst.reg = tmp1; + pAsm->D.dst.writez = 1; + + setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); + pAsm->S[0].src.rtype = SRC_REG_TEMPORARY; + pAsm->S[0].src.reg = tmp1; + pAsm->S[0].src.swizzlex = SQ_SEL_Z; + + next_ins(pAsm); + + /* MULADD R0.x, R0.x, PS1, (0x3FC00000, 1.5f).x + * MULADD R0.y, R0.y, PS1, (0x3FC00000, 1.5f).x + * muladd has no writemask, have to use another temp + * also no support for imm constants, so add 1 here + */ + pAsm->D.dst.opcode = SQ_OP3_INST_MULADD; + pAsm->D.dst.op3 = 1; + setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); + pAsm->D.dst.rtype = DST_REG_TEMPORARY; + pAsm->D.dst.reg = tmp2; + + setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); + pAsm->S[0].src.rtype = SRC_REG_TEMPORARY; + pAsm->S[0].src.reg = tmp1; + noswizzle_PVSSRC(&(pAsm->S[0].src)); + setaddrmode_PVSSRC(&(pAsm->S[1].src), ADDR_ABSOLUTE); + pAsm->S[1].src.rtype = SRC_REG_TEMPORARY; + pAsm->S[1].src.reg = tmp1; + setswizzle_PVSSRC(&(pAsm->S[1].src), SQ_SEL_Z); + setaddrmode_PVSSRC(&(pAsm->S[2].src), ADDR_ABSOLUTE); + pAsm->S[2].src.rtype = SRC_REG_TEMPORARY; + pAsm->S[2].src.reg = tmp1; + setswizzle_PVSSRC(&(pAsm->S[2].src), SQ_SEL_1); + + next_ins(pAsm); + + /* ADD the remaining .5 */ + pAsm->D.dst.opcode = SQ_OP2_INST_ADD; + setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); + pAsm->D.dst.rtype = DST_REG_TEMPORARY; + pAsm->D.dst.reg = tmp2; + pAsm->D.dst.writex = 1; + pAsm->D.dst.writey = 1; + pAsm->D.dst.writez = 0; + pAsm->D.dst.writew = 0; + + setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); + pAsm->S[0].src.rtype = SRC_REG_TEMPORARY; + pAsm->S[0].src.reg = tmp2; + noswizzle_PVSSRC(&(pAsm->S[0].src)); + setaddrmode_PVSSRC(&(pAsm->S[1].src), ADDR_ABSOLUTE); + pAsm->S[1].src.rtype = SRC_REG_TEMPORARY; + pAsm->S[1].src.reg = 252; // SQ_ALU_SRC_0_5 + noswizzle_PVSSRC(&(pAsm->S[1].src)); + + next_ins(pAsm); + + /* tmp1.xy = temp2.xy */ + pAsm->D.dst.opcode = SQ_OP2_INST_MOV; + setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); + pAsm->D.dst.rtype = DST_REG_TEMPORARY; + pAsm->D.dst.reg = tmp1; + pAsm->D.dst.writex = 1; + pAsm->D.dst.writey = 1; + pAsm->D.dst.writez = 0; + pAsm->D.dst.writew = 0; + + setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); + pAsm->S[0].src.rtype = SRC_REG_TEMPORARY; + pAsm->S[0].src.reg = tmp2; + noswizzle_PVSSRC(&(pAsm->S[0].src)); + + next_ins(pAsm); + pAsm->aArgSubst[1] = tmp1; + need_barrier = GL_TRUE; + + } + + pAsm->D.dst.opcode = SQ_TEX_INST_SAMPLE; + pAsm->is_tex = GL_TRUE; + if ( GL_TRUE == need_barrier ) + { + pAsm->need_tex_barrier = GL_TRUE; + } // Set src1 to tex unit id pAsm->S[1].src.reg = pAsm->pILInst[pAsm->uiCurInst].TexSrcUnit; pAsm->S[1].src.rtype = SRC_REG_TEMPORARY; @@ -3437,10 +3764,25 @@ GLboolean assemble_TEX(r700_AssemblerBase *pAsm) return GL_FALSE; } - if ( GL_FALSE == next_ins(pAsm) ) + if(pAsm->pILInst[pAsm->uiCurInst].Opcode == OPCODE_TXP) { - return GL_FALSE; + /* hopefully did swizzles before */ + noswizzle_PVSSRC(&(pAsm->S[0].src)); } + + if(pAsm->pILInst[pAsm->uiCurInst].TexSrcTarget == TEXTURE_CUBE_INDEX) + { + /* SAMPLE dst, tmp.yxwy, CUBE */ + pAsm->S[0].src.swizzlex = SQ_SEL_Y; + pAsm->S[0].src.swizzley = SQ_SEL_X; + pAsm->S[0].src.swizzlez = SQ_SEL_W; + pAsm->S[0].src.swizzlew = SQ_SEL_Y; + } + + if ( GL_FALSE == next_ins(pAsm) ) + { + return GL_FALSE; + } return GL_TRUE; } diff --git a/src/mesa/drivers/dri/r600/r700_assembler.h b/src/mesa/drivers/dri/r600/r700_assembler.h index f9c4d849c65..0d4283e4bad 100644 --- a/src/mesa/drivers/dri/r600/r700_assembler.h +++ b/src/mesa/drivers/dri/r600/r700_assembler.h @@ -374,6 +374,10 @@ typedef struct r700_AssemblerBase struct prog_instruction * pILInst; GLuint uiCurInst; GLboolean bR6xx; + /* helper to decide which type of instruction to assemble */ + GLboolean is_tex; + /* we inserted helper intructions and need barrier on next TEX ins */ + GLboolean need_tex_barrier; } r700_AssemblerBase; //Internal use @@ -411,6 +415,15 @@ GLboolean assemble_vfetch_instruction(r700_AssemblerBase* pAsm, GLuint number_of_elements, GLenum dataElementType, VTX_FETCH_METHOD* pFetchMethod); +GLboolean assemble_vfetch_instruction2(r700_AssemblerBase* pAsm, + GLuint destination_register, + GLenum type, + GLint size, + GLubyte element, + GLuint _signed, + GLboolean normalize, + VTX_FETCH_METHOD * pFetchMethod); +GLboolean cleanup_vfetch_instructions(r700_AssemblerBase* pAsm); GLuint gethelpr(r700_AssemblerBase* pAsm); void resethelpr(r700_AssemblerBase* pAsm); void checkop_init(r700_AssemblerBase* pAsm); diff --git a/src/mesa/drivers/dri/r600/r700_chip.c b/src/mesa/drivers/dri/r600/r700_chip.c index 06d7e9c9ab1..3b7f6fffe03 100644 --- a/src/mesa/drivers/dri/r600/r700_chip.c +++ b/src/mesa/drivers/dri/r600/r700_chip.c @@ -208,6 +208,93 @@ static void r700SetupVTXConstants(GLcontext * ctx, } +extern int getTypeSize(GLenum type); +static void r700SetupVTXConstants2(GLcontext * ctx, + void * pAos, + StreamDesc * pStreamDesc) +{ + context_t *context = R700_CONTEXT(ctx); + struct radeon_aos * paos = (struct radeon_aos *)pAos; + unsigned int nVBsize; + BATCH_LOCALS(&context->radeon); + + unsigned int uSQ_VTX_CONSTANT_WORD0_0; + unsigned int uSQ_VTX_CONSTANT_WORD1_0; + unsigned int uSQ_VTX_CONSTANT_WORD2_0 = 0; + unsigned int uSQ_VTX_CONSTANT_WORD3_0 = 0; + unsigned int uSQ_VTX_CONSTANT_WORD6_0 = 0; + + if (!paos->bo) + return; + + if ((context->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV610) || + (context->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV620) || + (context->radeon.radeonScreen->chip_family == CHIP_FAMILY_RS780) || + (context->radeon.radeonScreen->chip_family == CHIP_FAMILY_RS880) || + (context->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV710)) + r700SyncSurf(context, paos->bo, RADEON_GEM_DOMAIN_GTT, 0, TC_ACTION_ENA_bit); + else + r700SyncSurf(context, paos->bo, RADEON_GEM_DOMAIN_GTT, 0, VC_ACTION_ENA_bit); + + if(0 == pStreamDesc->stride) + { + nVBsize = paos->count * pStreamDesc->size * getTypeSize(pStreamDesc->type); + } + else + { + nVBsize = paos->count * pStreamDesc->stride; + } + + uSQ_VTX_CONSTANT_WORD0_0 = paos->offset; + uSQ_VTX_CONSTANT_WORD1_0 = nVBsize - 1; + + SETfield(uSQ_VTX_CONSTANT_WORD2_0, 0, BASE_ADDRESS_HI_shift, BASE_ADDRESS_HI_mask); /* TODO */ + SETfield(uSQ_VTX_CONSTANT_WORD2_0, pStreamDesc->stride, SQ_VTX_CONSTANT_WORD2_0__STRIDE_shift, + SQ_VTX_CONSTANT_WORD2_0__STRIDE_mask); + SETfield(uSQ_VTX_CONSTANT_WORD2_0, GetSurfaceFormat(pStreamDesc->type, pStreamDesc->size, NULL), + SQ_VTX_CONSTANT_WORD2_0__DATA_FORMAT_shift, + SQ_VTX_CONSTANT_WORD2_0__DATA_FORMAT_mask); /* TODO : trace back api for initial data type, not only GL_FLOAT */ + + if(GL_TRUE == pStreamDesc->normalize) + { + SETfield(uSQ_VTX_CONSTANT_WORD2_0, SQ_NUM_FORMAT_NORM, + SQ_VTX_CONSTANT_WORD2_0__NUM_FORMAT_ALL_shift, SQ_VTX_CONSTANT_WORD2_0__NUM_FORMAT_ALL_mask); + } + //else + //{ + // SETfield(uSQ_VTX_CONSTANT_WORD2_0, SQ_NUM_FORMAT_INT, + // SQ_VTX_CONSTANT_WORD2_0__NUM_FORMAT_ALL_shift, SQ_VTX_CONSTANT_WORD2_0__NUM_FORMAT_ALL_mask); + //} + + if(1 == pStreamDesc->_signed) + { + SETbit(uSQ_VTX_CONSTANT_WORD2_0, SQ_VTX_CONSTANT_WORD2_0__FORMAT_COMP_ALL_bit); + } + + SETfield(uSQ_VTX_CONSTANT_WORD3_0, 1, MEM_REQUEST_SIZE_shift, MEM_REQUEST_SIZE_mask); + SETfield(uSQ_VTX_CONSTANT_WORD6_0, SQ_TEX_VTX_VALID_BUFFER, + SQ_TEX_RESOURCE_WORD6_0__TYPE_shift, SQ_TEX_RESOURCE_WORD6_0__TYPE_mask); + + BEGIN_BATCH_NO_AUTOSTATE(9 + 2); + + R600_OUT_BATCH(CP_PACKET3(R600_IT_SET_RESOURCE, 7)); + R600_OUT_BATCH((pStreamDesc->element + SQ_FETCH_RESOURCE_VS_OFFSET) * FETCH_RESOURCE_STRIDE); + R600_OUT_BATCH(uSQ_VTX_CONSTANT_WORD0_0); + R600_OUT_BATCH(uSQ_VTX_CONSTANT_WORD1_0); + R600_OUT_BATCH(uSQ_VTX_CONSTANT_WORD2_0); + R600_OUT_BATCH(uSQ_VTX_CONSTANT_WORD3_0); + R600_OUT_BATCH(0); + R600_OUT_BATCH(0); + R600_OUT_BATCH(uSQ_VTX_CONSTANT_WORD6_0); + R600_OUT_BATCH_RELOC(uSQ_VTX_CONSTANT_WORD0_0, + paos->bo, + uSQ_VTX_CONSTANT_WORD0_0, + RADEON_GEM_DOMAIN_GTT, 0, 0); + END_BATCH(); + COMMIT_BATCH(); + +} + void r700SetupStreams(GLcontext *ctx) { context_t *context = R700_CONTEXT(ctx); @@ -256,14 +343,24 @@ static void r700SendVTXState(GLcontext *ctx, struct radeon_state_atom *atom) COMMIT_BATCH(); for(i=0; i<VERT_ATTRIB_MAX; i++) { - if(vp->mesa_program->Base.InputsRead & (1 << i)) { - /* currently aos are packed */ - r700SetupVTXConstants(ctx, - i, - (void*)(&context->radeon.tcl.aos[j]), - (unsigned int)context->radeon.tcl.aos[j].components, - (unsigned int)context->radeon.tcl.aos[j].stride * 4, - (unsigned int)context->radeon.tcl.aos[j].count); + if(vp->mesa_program->Base.InputsRead & (1 << i)) + { + if(1 == context->selected_vp->uiVersion) + { + /* currently aos are packed */ + r700SetupVTXConstants(ctx, + i, + (void*)(&context->radeon.tcl.aos[j]), + (unsigned int)context->radeon.tcl.aos[j].components, + (unsigned int)context->radeon.tcl.aos[j].stride * 4, + (unsigned int)context->radeon.tcl.aos[j].count); + } + else + { /* context->selected_vp->uiVersion == 2 : aos not always packed */ + r700SetupVTXConstants2(ctx, + (void*)(&context->radeon.tcl.aos[j]), + &(context->stream_desc[j])); + } j++; } } diff --git a/src/mesa/drivers/dri/r600/r700_fragprog.c b/src/mesa/drivers/dri/r600/r700_fragprog.c index 78ce3ae4366..62a1ea1a22a 100644 --- a/src/mesa/drivers/dri/r600/r700_fragprog.c +++ b/src/mesa/drivers/dri/r600/r700_fragprog.c @@ -341,6 +341,11 @@ GLboolean r700SetupFragmentProgram(GLcontext * ctx) SETbit(r700->SPI_PS_IN_CONTROL_0.u32All, POSITION_ENA_bit); SETbit(r700->SPI_INPUT_Z.u32All, PROVIDE_Z_TO_SPI_bit); } + else + { + CLEARbit(r700->SPI_PS_IN_CONTROL_0.u32All, POSITION_ENA_bit); + CLEARbit(r700->SPI_INPUT_Z.u32All, PROVIDE_Z_TO_SPI_bit); + } ui = (unNumOfReg < ui) ? ui : unNumOfReg; diff --git a/src/mesa/drivers/dri/r600/r700_render.c b/src/mesa/drivers/dri/r600/r700_render.c index b1c3648ca56..0aef0b7ea1f 100644 --- a/src/mesa/drivers/dri/r600/r700_render.c +++ b/src/mesa/drivers/dri/r600/r700_render.c @@ -43,6 +43,7 @@ #include "tnl/t_context.h" #include "tnl/t_vertex.h" #include "tnl/t_pipeline.h" +#include "vbo/vbo_context.h" #include "r600_context.h" #include "r600_cmdbuf.h" @@ -53,6 +54,7 @@ #include "r700_fragprog.h" #include "r700_state.h" +#include "radeon_buffer_objects.h" #include "radeon_common_context.h" void r700WaitForIdle(context_t *context); @@ -249,78 +251,134 @@ static int r700NumVerts(int num_verts, int prim) static void r700RunRenderPrimitive(GLcontext * ctx, int start, int end, int prim) { - context_t *context = R700_CONTEXT(ctx); - BATCH_LOCALS(&context->radeon); - int type, i, total_emit; - int num_indices; - uint32_t vgt_draw_initiator = 0; - uint32_t vgt_index_type = 0; - uint32_t vgt_primitive_type = 0; - uint32_t vgt_num_indices = 0; - TNLcontext *tnl = TNL_CONTEXT(ctx); - struct vertex_buffer *vb = &tnl->vb; - - type = r700PrimitiveType(prim); - num_indices = r700NumVerts(end - start, prim); - - radeon_print(RADEON_RENDER, RADEON_TRACE, - "%s type %x num_indices %d\n", - __func__, type, num_indices); - - if (type < 0 || num_indices <= 0) - return; + context_t *context = R700_CONTEXT(ctx); + BATCH_LOCALS(&context->radeon); + int type, i, total_emit; + int num_indices; + uint32_t vgt_draw_initiator = 0; + uint32_t vgt_index_type = 0; + uint32_t vgt_primitive_type = 0; + uint32_t vgt_num_indices = 0; + TNLcontext *tnl = TNL_CONTEXT(ctx); + struct vertex_buffer *vb = &tnl->vb; + GLboolean bUseDrawIndex; + + if(NULL != context->ind_buf.bo) + { + bUseDrawIndex = GL_TRUE; + } + else + { + bUseDrawIndex = GL_FALSE; + } + + type = r700PrimitiveType(prim); + num_indices = r700NumVerts(end - start, prim); + + radeon_print(RADEON_RENDER, RADEON_TRACE, + "%s type %x num_indices %d\n", + __func__, type, num_indices); + + if (type < 0 || num_indices <= 0) + return; + if(GL_TRUE == bUseDrawIndex) + { + total_emit = 3 /* VGT_PRIMITIVE_TYPE */ + + 2 /* VGT_INDEX_TYPE */ + + 2 /* NUM_INSTANCES */ + + 5 + 2; /* DRAW_INDEX */ + } + else + { total_emit = 3 /* VGT_PRIMITIVE_TYPE */ + 2 /* VGT_INDEX_TYPE */ - + 2 /* NUM_INSTANCES */ + + 2 /* NUM_INSTANCES */ + num_indices + 3; /* DRAW_INDEX_IMMD */ + } - BEGIN_BATCH_NO_AUTOSTATE(total_emit); - // prim - SETfield(vgt_primitive_type, type, - VGT_PRIMITIVE_TYPE__PRIM_TYPE_shift, VGT_PRIMITIVE_TYPE__PRIM_TYPE_mask); - R600_OUT_BATCH(CP_PACKET3(R600_IT_SET_CONFIG_REG, 1)); - R600_OUT_BATCH(mmVGT_PRIMITIVE_TYPE - ASIC_CONFIG_BASE_INDEX); - R600_OUT_BATCH(vgt_primitive_type); + BEGIN_BATCH_NO_AUTOSTATE(total_emit); + // prim + SETfield(vgt_primitive_type, type, + VGT_PRIMITIVE_TYPE__PRIM_TYPE_shift, VGT_PRIMITIVE_TYPE__PRIM_TYPE_mask); + R600_OUT_BATCH(CP_PACKET3(R600_IT_SET_CONFIG_REG, 1)); + R600_OUT_BATCH(mmVGT_PRIMITIVE_TYPE - ASIC_CONFIG_BASE_INDEX); + R600_OUT_BATCH(vgt_primitive_type); // index type - SETfield(vgt_index_type, DI_INDEX_SIZE_32_BIT, INDEX_TYPE_shift, INDEX_TYPE_mask); - R600_OUT_BATCH(CP_PACKET3(R600_IT_INDEX_TYPE, 0)); - R600_OUT_BATCH(vgt_index_type); + SETfield(vgt_index_type, DI_INDEX_SIZE_32_BIT, INDEX_TYPE_shift, INDEX_TYPE_mask); - // num instances - R600_OUT_BATCH(CP_PACKET3(R600_IT_NUM_INSTANCES, 0)); - R600_OUT_BATCH(1); + if(GL_TRUE == bUseDrawIndex) + { + if(GL_TRUE != context->ind_buf.is_32bit) + { + SETfield(vgt_index_type, DI_INDEX_SIZE_16_BIT, INDEX_TYPE_shift, INDEX_TYPE_mask); + } + } + + R600_OUT_BATCH(CP_PACKET3(R600_IT_INDEX_TYPE, 0)); + R600_OUT_BATCH(vgt_index_type); + + // num instances + R600_OUT_BATCH(CP_PACKET3(R600_IT_NUM_INSTANCES, 0)); + R600_OUT_BATCH(1); + + // draw packet + vgt_num_indices = num_indices; - // draw packet - vgt_num_indices = num_indices; + if(GL_TRUE == bUseDrawIndex) + { + SETfield(vgt_draw_initiator, DI_SRC_SEL_DMA, SOURCE_SELECT_shift, SOURCE_SELECT_mask); + } + else + { SETfield(vgt_draw_initiator, DI_SRC_SEL_IMMEDIATE, SOURCE_SELECT_shift, SOURCE_SELECT_mask); + } + SETfield(vgt_draw_initiator, DI_MAJOR_MODE_0, MAJOR_MODE_shift, MAJOR_MODE_mask); + if(GL_TRUE == bUseDrawIndex) + { + R600_OUT_BATCH(CP_PACKET3(R600_IT_DRAW_INDEX, 3)); + R600_OUT_BATCH(context->ind_buf.bo_offset); + R600_OUT_BATCH(0); + R600_OUT_BATCH(vgt_num_indices); + R600_OUT_BATCH(vgt_draw_initiator); + R600_OUT_BATCH_RELOC(context->ind_buf.bo_offset, + context->ind_buf.bo, + context->ind_buf.bo_offset, + RADEON_GEM_DOMAIN_GTT, 0, 0); + } + else + { R600_OUT_BATCH(CP_PACKET3(R600_IT_DRAW_INDEX_IMMD, (num_indices + 1))); R600_OUT_BATCH(vgt_num_indices); R600_OUT_BATCH(vgt_draw_initiator); - for (i = start; i < (start + num_indices); i++) { - if(vb->Elts) - R600_OUT_BATCH(vb->Elts[i]); - else - R600_OUT_BATCH(i); + for (i = start; i < (start + num_indices); i++) + { + if(vb->Elts) + { + R600_OUT_BATCH(vb->Elts[i]); + } + else + { + R600_OUT_BATCH(i); + } } - END_BATCH(); - COMMIT_BATCH(); + } + END_BATCH(); + COMMIT_BATCH(); } /* start 3d, idle, cb/db flush */ #define PRE_EMIT_STATE_BUFSZ 10 + 5 + 14 -static GLuint r700PredictRenderSize(GLcontext* ctx) +static GLuint r700PredictRenderSize(GLcontext* ctx, GLuint nr_prims) { context_t *context = R700_CONTEXT(ctx); - TNLcontext *tnl = TNL_CONTEXT(ctx); struct r700_vertex_program *vp = context->selected_vp; - struct vertex_buffer *vb = &tnl->vb; GLboolean flushed; GLuint dwords, i; GLuint state_size; @@ -328,8 +386,15 @@ static GLuint r700PredictRenderSize(GLcontext* ctx) context->radeon.tcl.aos_count = _mesa_bitcount(vp->mesa_program->Base.InputsRead); dwords = PRE_EMIT_STATE_BUFSZ; - for (i = 0; i < vb->PrimitiveCount; i++) - dwords += vb->Primitive[i].count + 10; + if (nr_prims) + dwords += nr_prims * 14; + else { + TNLcontext *tnl = TNL_CONTEXT(ctx); + struct vertex_buffer *vb = &tnl->vb; + + for (i = 0; i < vb->PrimitiveCount; i++) + dwords += vb->Primitive[i].count + 10; + } state_size = radeonCountStateEmitSize(&context->radeon); flushed = rcommonEnsureCmdBufSpace(&context->radeon, dwords + state_size, __FUNCTION__); @@ -369,7 +434,7 @@ static GLboolean r700RunRender(GLcontext * ctx, r700SetupFragmentProgram(ctx); r600UpdateTextureState(ctx); - GLuint emit_end = r700PredictRenderSize(ctx) + GLuint emit_end = r700PredictRenderSize(ctx, 0) + context->radeon.cmdbuf.cs->cdw; r700SetupStreams(ctx); @@ -477,4 +542,544 @@ const struct tnl_pipeline_stage *r700_pipeline[] = 0, }; +#define CONVERT( TYPE, MACRO ) do { \ + GLuint i, j, sz; \ + sz = input->Size; \ + if (input->Normalized) { \ + for (i = 0; i < count; i++) { \ + const TYPE *in = (TYPE *)src_ptr; \ + for (j = 0; j < sz; j++) { \ + *dst_ptr++ = MACRO(*in); \ + in++; \ + } \ + src_ptr += stride; \ + } \ + } else { \ + for (i = 0; i < count; i++) { \ + const TYPE *in = (TYPE *)src_ptr; \ + for (j = 0; j < sz; j++) { \ + *dst_ptr++ = (GLfloat)(*in); \ + in++; \ + } \ + src_ptr += stride; \ + } \ + } \ +} while (0) + +/** + * Convert attribute data type to float + * If the attribute uses named buffer object replace the bo with newly allocated bo + */ +static void r700ConvertAttrib(GLcontext *ctx, int count, + const struct gl_client_array *input, + struct StreamDesc *attr) +{ + context_t *context = R700_CONTEXT(ctx); + const GLvoid *src_ptr; + GLboolean mapped_named_bo = GL_FALSE; + GLfloat *dst_ptr; + GLuint stride; + + stride = (input->StrideB == 0) ? getTypeSize(input->Type) * input->Size : input->StrideB; + + /* Convert value for first element only */ + if (input->StrideB == 0) + { + count = 1; + } + + if (input->BufferObj->Name) + { + if (!input->BufferObj->Pointer) + { + ctx->Driver.MapBuffer(ctx, GL_ARRAY_BUFFER, GL_READ_ONLY_ARB, input->BufferObj); + mapped_named_bo = GL_TRUE; + } + + src_ptr = ADD_POINTERS(input->BufferObj->Pointer, input->Ptr); + } + else + { + src_ptr = input->Ptr; + } + + radeonAllocDmaRegion(&context->radeon, &attr->bo, &attr->bo_offset, + sizeof(GLfloat) * input->Size * count, 32); + dst_ptr = (GLfloat *)ADD_POINTERS(attr->bo->ptr, attr->bo_offset); + + assert(src_ptr != NULL); + + switch (input->Type) + { + case GL_DOUBLE: + CONVERT(GLdouble, (GLfloat)); + break; + case GL_UNSIGNED_INT: + CONVERT(GLuint, UINT_TO_FLOAT); + break; + case GL_INT: + CONVERT(GLint, INT_TO_FLOAT); + break; + case GL_UNSIGNED_SHORT: + CONVERT(GLushort, USHORT_TO_FLOAT); + break; + case GL_SHORT: + CONVERT(GLshort, SHORT_TO_FLOAT); + break; + case GL_UNSIGNED_BYTE: + assert(input->Format != GL_BGRA); + CONVERT(GLubyte, UBYTE_TO_FLOAT); + break; + case GL_BYTE: + CONVERT(GLbyte, BYTE_TO_FLOAT); + break; + default: + assert(0); + break; + } + + if (mapped_named_bo) + { + ctx->Driver.UnmapBuffer(ctx, GL_ARRAY_BUFFER, input->BufferObj); + } +} + +static void r700AlignDataToDword(GLcontext *ctx, + const struct gl_client_array *input, + int count, + struct StreamDesc *attr) +{ + context_t *context = R700_CONTEXT(ctx); + const int dst_stride = (input->StrideB + 3) & ~3; + const int size = getTypeSize(input->Type) * input->Size * count; + GLboolean mapped_named_bo = GL_FALSE; + + radeonAllocDmaRegion(&context->radeon, &attr->bo, &attr->bo_offset, size, 32); + + if (!input->BufferObj->Pointer) + { + ctx->Driver.MapBuffer(ctx, GL_ARRAY_BUFFER, GL_READ_ONLY_ARB, input->BufferObj); + mapped_named_bo = GL_TRUE; + } + + { + GLvoid *src_ptr = ADD_POINTERS(input->BufferObj->Pointer, input->Ptr); + GLvoid *dst_ptr = ADD_POINTERS(attr->bo->ptr, attr->bo_offset); + int i; + + for (i = 0; i < count; ++i) + { + _mesa_memcpy(dst_ptr, src_ptr, input->StrideB); + src_ptr += input->StrideB; + dst_ptr += dst_stride; + } + } + + if (mapped_named_bo) + { + ctx->Driver.UnmapBuffer(ctx, GL_ARRAY_BUFFER, input->BufferObj); + } + + attr->stride = dst_stride; +} + +static void r700SetupStreams2(GLcontext *ctx, const struct gl_client_array *input[], int count) +{ + context_t *context = R700_CONTEXT(ctx); + GLuint stride; + int ret; + int i, index; + + R600_STATECHANGE(context, vtx); + + for(index = 0; index < context->nNumActiveAos; index++) + { + struct radeon_aos *aos = &context->radeon.tcl.aos[index]; + i = context->stream_desc[index].element; + + stride = (input[i]->StrideB == 0) ? getTypeSize(input[i]->Type) * input[i]->Size : input[i]->StrideB; + + if (input[i]->Type == GL_DOUBLE || input[i]->Type == GL_UNSIGNED_INT || input[i]->Type == GL_INT || +#if MESA_BIG_ENDIAN + getTypeSize(input[i]->Type) != 4 || +#endif + stride < 4) + { + r700ConvertAttrib(ctx, count, input[i], &context->stream_desc[index]); + } + else + { + if (input[i]->BufferObj->Name) + { + if (stride % 4 != 0) + { + assert(((intptr_t) input[i]->Ptr) % input[i]->StrideB == 0); + r700AlignDataToDword(ctx, input[i], count, &context->stream_desc[index]); + context->stream_desc[index].is_named_bo = GL_FALSE; + } + else + { + context->stream_desc[index].stride = input[i]->StrideB; + context->stream_desc[index].bo_offset = (intptr_t) input[i]->Ptr; + context->stream_desc[index].bo = get_radeon_buffer_object(input[i]->BufferObj)->bo; + context->stream_desc[index].is_named_bo = GL_TRUE; + } + } + else + { + int size; + int local_count = count; + uint32_t *dst; + + if (input[i]->StrideB == 0) + { + size = getTypeSize(input[i]->Type) * input[i]->Size; + local_count = 1; + } + else + { + size = getTypeSize(input[i]->Type) * input[i]->Size * local_count; + } + + radeonAllocDmaRegion(&context->radeon, &context->stream_desc[index].bo, + &context->stream_desc[index].bo_offset, size, 32); + assert(context->stream_desc[index].bo->ptr != NULL); + dst = (uint32_t *)ADD_POINTERS(context->stream_desc[index].bo->ptr, + context->stream_desc[index].bo_offset); + + switch (context->stream_desc[index].dwords) + { + case 1: + radeonEmitVec4(dst, input[i]->Ptr, input[i]->StrideB, local_count); + context->stream_desc[index].stride = 4; + break; + case 2: + radeonEmitVec8(dst, input[i]->Ptr, input[i]->StrideB, local_count); + context->stream_desc[index].stride = 8; + break; + case 3: + radeonEmitVec12(dst, input[i]->Ptr, input[i]->StrideB, local_count); + context->stream_desc[index].stride = 12; + break; + case 4: + radeonEmitVec16(dst, input[i]->Ptr, input[i]->StrideB, local_count); + context->stream_desc[index].stride = 16; + break; + default: + assert(0); + break; + } + } + } + + aos->count = context->stream_desc[index].stride == 0 ? 1 : count; + aos->stride = context->stream_desc[index].stride / sizeof(float); + aos->components = context->stream_desc[index].dwords; + aos->bo = context->stream_desc[index].bo; + aos->offset = context->stream_desc[index].bo_offset; + + if(context->stream_desc[index].is_named_bo) + { + radeon_cs_space_add_persistent_bo(context->radeon.cmdbuf.cs, + context->stream_desc[index].bo, + RADEON_GEM_DOMAIN_GTT, 0); + } + } + + context->radeon.tcl.aos_count = context->nNumActiveAos; + ret = radeon_cs_space_check_with_bo(context->radeon.cmdbuf.cs, + first_elem(&context->radeon.dma.reserved)->bo, + RADEON_GEM_DOMAIN_GTT, 0); +} + +static void r700FreeData(GLcontext *ctx) +{ + /* Need to zero tcl.aos[n].bo and tcl.elt_dma_bo + * to prevent double unref in radeonReleaseArrays + * called during context destroy + */ + context_t *context = R700_CONTEXT(ctx); + + int i; + + for (i = 0; i < context->nNumActiveAos; i++) + { + if (!context->stream_desc[i].is_named_bo) + { + radeon_bo_unref(context->stream_desc[i].bo); + } + context->radeon.tcl.aos[i].bo = NULL; + } + + if (context->ind_buf.bo != NULL) + { + radeon_bo_unref(context->ind_buf.bo); + } +} + +static void r700FixupIndexBuffer(GLcontext *ctx, const struct _mesa_index_buffer *mesa_ind_buf) +{ + context_t *context = R700_CONTEXT(ctx); + GLvoid *src_ptr; + GLuint *out; + int i; + GLboolean mapped_named_bo = GL_FALSE; + + if (mesa_ind_buf->obj->Name && !mesa_ind_buf->obj->Pointer) + { + ctx->Driver.MapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER, GL_READ_ONLY_ARB, mesa_ind_buf->obj); + mapped_named_bo = GL_TRUE; + assert(mesa_ind_buf->obj->Pointer != NULL); + } + src_ptr = ADD_POINTERS(mesa_ind_buf->obj->Pointer, mesa_ind_buf->ptr); + + if (mesa_ind_buf->type == GL_UNSIGNED_BYTE) + { + GLuint size = sizeof(GLushort) * ((mesa_ind_buf->count + 1) & ~1); + GLubyte *in = (GLubyte *)src_ptr; + + radeonAllocDmaRegion(&context->radeon, &context->ind_buf.bo, + &context->ind_buf.bo_offset, size, 4); + + assert(context->ind_buf.bo->ptr != NULL); + out = (GLuint *)ADD_POINTERS(context->ind_buf.bo->ptr, context->ind_buf.bo_offset); + + for (i = 0; i + 1 < mesa_ind_buf->count; i += 2) + { + *out++ = in[i] | in[i + 1] << 16; + } + + if (i < mesa_ind_buf->count) + { + *out++ = in[i]; + } + +#if MESA_BIG_ENDIAN + } + else + { /* if (mesa_ind_buf->type == GL_UNSIGNED_SHORT) */ + GLushort *in = (GLushort *)src_ptr; + GLuint size = sizeof(GLushort) * ((mesa_ind_buf->count + 1) & ~1); + + radeonAllocDmaRegion(&context->radeon, &context->ind_buf.bo, + &context->ind_buf.bo_offset, size, 4); + + assert(context->ind_buf.bo->ptr != NULL); + out = (GLuint *)ADD_POINTERS(context->ind_buf.bo->ptr, context->ind_buf.bo_offset); + + for (i = 0; i + 1 < mesa_ind_buf->count; i += 2) + { + *out++ = in[i] | in[i + 1] << 16; + } + + if (i < mesa_ind_buf->count) + { + *out++ = in[i]; + } +#endif + } + + context->ind_buf.is_32bit = GL_FALSE; + context->ind_buf.count = mesa_ind_buf->count; + + if (mapped_named_bo) + { + ctx->Driver.UnmapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER, mesa_ind_buf->obj); + } +} + +static void r700SetupIndexBuffer(GLcontext *ctx, const struct _mesa_index_buffer *mesa_ind_buf) +{ + context_t *context = R700_CONTEXT(ctx); + + if (!mesa_ind_buf) { + context->ind_buf.bo = NULL; + return; + } + +#if MESA_BIG_ENDIAN + if (mesa_ind_buf->type == GL_UNSIGNED_INT) + { +#else + if (mesa_ind_buf->type != GL_UNSIGNED_BYTE) + { +#endif + const GLvoid *src_ptr; + GLvoid *dst_ptr; + GLboolean mapped_named_bo = GL_FALSE; + + if (mesa_ind_buf->obj->Name && !mesa_ind_buf->obj->Pointer) + { + ctx->Driver.MapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER, GL_READ_ONLY_ARB, mesa_ind_buf->obj); + assert(mesa_ind_buf->obj->Pointer != NULL); + mapped_named_bo = GL_TRUE; + } + + src_ptr = ADD_POINTERS(mesa_ind_buf->obj->Pointer, mesa_ind_buf->ptr); + + const GLuint size = mesa_ind_buf->count * getTypeSize(mesa_ind_buf->type); + + radeonAllocDmaRegion(&context->radeon, &context->ind_buf.bo, + &context->ind_buf.bo_offset, size, 4); + assert(context->ind_buf.bo->ptr != NULL); + dst_ptr = ADD_POINTERS(context->ind_buf.bo->ptr, context->ind_buf.bo_offset); + + _mesa_memcpy(dst_ptr, src_ptr, size); + + context->ind_buf.is_32bit = (mesa_ind_buf->type == GL_UNSIGNED_INT); + context->ind_buf.count = mesa_ind_buf->count; + + if (mapped_named_bo) + { + ctx->Driver.UnmapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER, mesa_ind_buf->obj); + } + } + else + { + r700FixupIndexBuffer(ctx, mesa_ind_buf); + } +} + +static GLboolean r700TryDrawPrims(GLcontext *ctx, + const struct gl_client_array *arrays[], + const struct _mesa_prim *prim, + GLuint nr_prims, + const struct _mesa_index_buffer *ib, + GLuint min_index, + GLuint max_index ) +{ + context_t *context = R700_CONTEXT(ctx); + radeonContextPtr radeon = &context->radeon; + GLuint i, id = 0; + struct radeon_renderbuffer *rrb; + + if (ctx->NewState) + { + _mesa_update_state( ctx ); + } + + _tnl_UpdateFixedFunctionProgram(ctx); + r700SetVertexFormat(ctx, arrays, max_index + 1); + r700SetupIndexBuffer(ctx, ib); + /* shaders need to be updated before buffers are validated */ + r700UpdateShaders2(ctx); + if (!r600ValidateBuffers(ctx)) + return GL_FALSE; + + /* always emit CB base to prevent + * lock ups on some chips. + */ + R600_STATECHANGE(context, cb_target); + /* mark vtx as dirty since it changes per-draw */ + R600_STATECHANGE(context, vtx); + + r700SetScissor(context); + r700SetupVertexProgram(ctx); + r700SetupFragmentProgram(ctx); + r600UpdateTextureState(ctx); + + GLuint emit_end = r700PredictRenderSize(ctx, nr_prims) + + context->radeon.cmdbuf.cs->cdw; + + r700SetupStreams2(ctx, arrays, max_index + 1); + + radeonEmitState(radeon); + + radeon_debug_add_indent(); + for (i = 0; i < nr_prims; ++i) + { + r700RunRenderPrimitive(ctx, + prim[i].start, + prim[i].start + prim[i].count, + prim[i].mode); + } + radeon_debug_remove_indent(); + + /* Flush render op cached for last several quads. */ + r700WaitForIdleClean(context); + + rrb = radeon_get_colorbuffer(&context->radeon); + if (rrb && rrb->bo) + r700SyncSurf(context, rrb->bo, 0, RADEON_GEM_DOMAIN_VRAM, + CB_ACTION_ENA_bit | (1 << (id + 6))); + + rrb = radeon_get_depthbuffer(&context->radeon); + if (rrb && rrb->bo) + r700SyncSurf(context, rrb->bo, 0, RADEON_GEM_DOMAIN_VRAM, + DB_ACTION_ENA_bit | DB_DEST_BASE_ENA_bit); + + r700FreeData(ctx); + + if (emit_end < context->radeon.cmdbuf.cs->cdw) + { + WARN_ONCE("Rendering was %d commands larger than predicted size." + " We might overflow command buffer.\n", context->radeon.cmdbuf.cs->cdw - emit_end); + } + + return GL_TRUE; +} + +static void r700DrawPrimsRe(GLcontext *ctx, + const struct gl_client_array *arrays[], + const struct _mesa_prim *prim, + GLuint nr_prims, + const struct _mesa_index_buffer *ib, + GLboolean index_bounds_valid, + GLuint min_index, + GLuint max_index) +{ + GLboolean retval = GL_FALSE; + + /* This check should get folded into just the places that + * min/max index are really needed. + */ + if (!index_bounds_valid) { + vbo_get_minmax_index(ctx, prim, ib, &min_index, &max_index); + } + + if (min_index) { + vbo_rebase_prims( ctx, arrays, prim, nr_prims, ib, min_index, max_index, r700DrawPrimsRe ); + return; + } + + /* Make an attempt at drawing */ + retval = r700TryDrawPrims(ctx, arrays, prim, nr_prims, ib, min_index, max_index); + + /* If failed run tnl pipeline - it should take care of fallbacks */ + if (!retval) + _tnl_draw_prims(ctx, arrays, prim, nr_prims, ib, min_index, max_index); +} + +static void r700DrawPrims(GLcontext *ctx, + const struct gl_client_array *arrays[], + const struct _mesa_prim *prim, + GLuint nr_prims, + const struct _mesa_index_buffer *ib, + GLboolean index_bounds_valid, + GLuint min_index, + GLuint max_index) +{ + context_t *context = R700_CONTEXT(ctx); + + /* For non indexed drawing, using tnl pipe. */ + if(!ib) + { + context->ind_buf.bo = NULL; + + _tnl_vbo_draw_prims(ctx, arrays, prim, nr_prims, ib, + index_bounds_valid, min_index, max_index); + return; + } + + r700DrawPrimsRe(ctx, arrays, prim, nr_prims, ib, index_bounds_valid, min_index, max_index); +} + +void r700InitDraw(GLcontext *ctx) +{ + struct vbo_context *vbo = vbo_context(ctx); + + /* to be enabled */ + vbo->draw_prims = r700DrawPrims; +} + diff --git a/src/mesa/drivers/dri/r600/r700_shader.c b/src/mesa/drivers/dri/r600/r700_shader.c index b4fd51c1370..955ea4e4e1d 100644 --- a/src/mesa/drivers/dri/r600/r700_shader.c +++ b/src/mesa/drivers/dri/r600/r700_shader.c @@ -60,6 +60,55 @@ void AddInstToList(TypedShaderList * plstCFInstructions, R700ShaderInstruction * plstCFInstructions->uNumOfNode++; } +void TakeInstOutFromList(TypedShaderList * plstCFInstructions, R700ShaderInstruction * pInst) +{ + GLuint ulIndex = 0; + GLboolean bFound = GL_FALSE; + R700ShaderInstruction * pPrevInst = NULL; + R700ShaderInstruction * pCurInst = plstCFInstructions->pHead; + + /* Need go thro list to make sure pInst is there. */ + while(NULL != pCurInst) + { + if(pCurInst == pInst) + { + bFound = GL_TRUE; + break; + } + + pPrevInst = pCurInst; + pCurInst = pCurInst->pNextInst; + } + if(GL_TRUE == bFound) + { + plstCFInstructions->uNumOfNode--; + + pCurInst = pInst->pNextInst; + ulIndex = pInst->m_uIndex; + while(NULL != pCurInst) + { + pCurInst->m_uIndex = ulIndex; + ulIndex++; + pCurInst = pCurInst->pNextInst; + } + + if(plstCFInstructions->pHead == pInst) + { + plstCFInstructions->pHead = pInst->pNextInst; + } + if(plstCFInstructions->pTail == pInst) + { + plstCFInstructions->pTail = pPrevInst; + } + if(NULL != pPrevInst) + { + pPrevInst->pNextInst = pInst->pNextInst; + } + + FREE(pInst); + } +} + void Init_R700_Shader(R700_Shader * pShader) { pShader->Type = R700_SHADER_INVALID; @@ -488,6 +537,47 @@ void DebugPrint(void) { } +void cleanup_vfetch_shaderinst(R700_Shader *pShader) +{ + R700ShaderInstruction *pInst; + R700ShaderInstruction *pInstToFree; + R700VertexInstruction *pVTXInst; + R700ControlFlowInstruction *pCFInst; + + pInst = pShader->lstVTXInstructions.pHead; + while(NULL != pInst) + { + pVTXInst = (R700VertexInstruction *)pInst; + pShader->uShaderBinaryDWORDSize -= GetInstructionSize(pVTXInst->m_ShaderInstType); + + if(NULL != pVTXInst->m_pLinkedGenericClause) + { + pCFInst = (R700ControlFlowInstruction*)(pVTXInst->m_pLinkedGenericClause); + + TakeInstOutFromList(&(pShader->lstCFInstructions), + (R700ShaderInstruction*)pCFInst); + + pShader->uShaderBinaryDWORDSize -= GetInstructionSize(pCFInst->m_ShaderInstType); + } + + pInst = pInst->pNextInst; + }; + + //destroy each item in pShader->lstVTXInstructions; + pInst = pShader->lstVTXInstructions.pHead; + while(NULL != pInst) + { + pInstToFree = pInst; + pInst = pInst->pNextInst; + FREE(pInstToFree); + }; + + //set NULL pShader->lstVTXInstructions + pShader->lstVTXInstructions.pHead=NULL; + pShader->lstVTXInstructions.pTail=NULL; + pShader->lstVTXInstructions.uNumOfNode=0; +} + void Clean_Up_Shader(R700_Shader *pShader) { FREE(pShader->pProgram); diff --git a/src/mesa/drivers/dri/r600/r700_shader.h b/src/mesa/drivers/dri/r600/r700_shader.h index bfd01e1a93a..997cb05aaf8 100644 --- a/src/mesa/drivers/dri/r600/r700_shader.h +++ b/src/mesa/drivers/dri/r600/r700_shader.h @@ -143,6 +143,7 @@ void LoadProgram(R700_Shader *pShader); void UpdateShaderRegisters(R700_Shader *pShader); void DeleteInstructions(R700_Shader *pShader); void DebugPrint(void); +void cleanup_vfetch_shaderinst(R700_Shader *pShader); void Clean_Up_Shader(R700_Shader *pShader); diff --git a/src/mesa/drivers/dri/r600/r700_state.c b/src/mesa/drivers/dri/r600/r700_state.c index fc0b5116843..7e8b48f91eb 100644 --- a/src/mesa/drivers/dri/r600/r700_state.c +++ b/src/mesa/drivers/dri/r600/r700_state.c @@ -92,7 +92,25 @@ void r700UpdateShaders (GLcontext * ctx) //---------------------------------- } } - r700SelectVertexShader(ctx); + r700SelectVertexShader(ctx, 1); + r700UpdateStateParameters(ctx, _NEW_PROGRAM | _NEW_PROGRAM_CONSTANTS); + context->radeon.NewGLState = 0; +} + +void r700UpdateShaders2(GLcontext * ctx) +{ + context_t *context = R700_CONTEXT(ctx); + + /* should only happenen once, just after context is created */ + /* TODO: shouldn't we fallback to sw here? */ + if (!ctx->FragmentProgram._Current) { + _mesa_fprintf(stderr, "No ctx->FragmentProgram._Current!!\n"); + return; + } + + r700SelectFragmentShader(ctx); + + r700SelectVertexShader(ctx, 2); r700UpdateStateParameters(ctx, _NEW_PROGRAM | _NEW_PROGRAM_CONSTANTS); context->radeon.NewGLState = 0; } @@ -475,10 +493,10 @@ static void r700SetBlendState(GLcontext * ctx) eqn, COLOR_COMB_FCN_shift, COLOR_COMB_FCN_mask); SETfield(blend_reg, - blend_factor(ctx->Color.BlendSrcRGB, GL_TRUE), + blend_factor(ctx->Color.BlendSrcA, GL_TRUE), ALPHA_SRCBLEND_shift, ALPHA_SRCBLEND_mask); SETfield(blend_reg, - blend_factor(ctx->Color.BlendDstRGB, GL_FALSE), + blend_factor(ctx->Color.BlendDstA, GL_FALSE), ALPHA_DESTBLEND_shift, ALPHA_DESTBLEND_mask); switch (ctx->Color.BlendEquationA) { @@ -753,9 +771,9 @@ static void r700ColorMask(GLcontext * ctx, (b ? 4 : 0) | (a ? 8 : 0)); - if (mask != r700->CB_SHADER_MASK.u32All) { + if (mask != r700->CB_TARGET_MASK.u32All) { R600_STATECHANGE(context, cb); - SETfield(r700->CB_SHADER_MASK.u32All, mask, OUTPUT0_ENABLE_shift, OUTPUT0_ENABLE_mask); + SETfield(r700->CB_TARGET_MASK.u32All, mask, TARGET0_ENABLE_shift, TARGET0_ENABLE_mask); } } @@ -1277,10 +1295,11 @@ void r700SetScissor(context_t *context) //--------------- return; } if (context->radeon.state.scissor.enabled) { + /* r600 has exclusive scissors */ x1 = context->radeon.state.scissor.rect.x1; y1 = context->radeon.state.scissor.rect.y1; - x2 = context->radeon.state.scissor.rect.x2; - y2 = context->radeon.state.scissor.rect.y2; + x2 = context->radeon.state.scissor.rect.x2 + 1; + y2 = context->radeon.state.scissor.rect.y2 + 1; } else { if (context->radeon.radeonScreen->driScreen->dri2.enabled) { x1 = 0; @@ -1761,7 +1780,7 @@ void r700InitState(GLcontext * ctx) //------------------- r700->CB_CLRCMP_MSK.u32All = 0xFFFFFFFF; /* screen/window/view */ - SETfield(r700->CB_TARGET_MASK.u32All, 0xF, (4 * id), TARGET0_ENABLE_mask); + SETfield(r700->CB_SHADER_MASK.u32All, 0xF, (4 * id), OUTPUT0_ENABLE_mask); context->radeon.hw.all_dirty = GL_TRUE; diff --git a/src/mesa/drivers/dri/r600/r700_state.h b/src/mesa/drivers/dri/r600/r700_state.h index 0f53d5b4c59..209189d8d72 100644 --- a/src/mesa/drivers/dri/r600/r700_state.h +++ b/src/mesa/drivers/dri/r600/r700_state.h @@ -35,6 +35,7 @@ extern void r700UpdateStateParameters(GLcontext * ctx, GLuint new_state); extern void r700UpdateShaders (GLcontext * ctx); +extern void r700UpdateShaders2(GLcontext * ctx); extern void r700UpdateViewportOffset(GLcontext * ctx); diff --git a/src/mesa/drivers/dri/r600/r700_vertprog.c b/src/mesa/drivers/dri/r600/r700_vertprog.c index 9ee26286d9b..e7a209be9d8 100644 --- a/src/mesa/drivers/dri/r600/r700_vertprog.c +++ b/src/mesa/drivers/dri/r600/r700_vertprog.c @@ -159,7 +159,35 @@ GLboolean Process_Vertex_Program_Vfetch_Instructions( return GL_TRUE; } -void Map_Vertex_Program(struct r700_vertex_program *vp, +GLboolean Process_Vertex_Program_Vfetch_Instructions2( + GLcontext *ctx, + struct r700_vertex_program *vp, + struct gl_vertex_program *mesa_vp) +{ + int i; + context_t *context = R700_CONTEXT(ctx); + + VTX_FETCH_METHOD vtxFetchMethod; + vtxFetchMethod.bEnableMini = GL_FALSE; + vtxFetchMethod.mega_fetch_remainder = 0; + + for(i=0; i<context->nNumActiveAos; i++) + { + assemble_vfetch_instruction2(&vp->r700AsmCode, + vp->r700AsmCode.ucVP_AttributeMap[context->stream_desc[i].element], + context->stream_desc[i].type, + context->stream_desc[i].size, + context->stream_desc[i].element, + context->stream_desc[i]._signed, + context->stream_desc[i].normalize, + &vtxFetchMethod); + } + + return GL_TRUE; +} + +void Map_Vertex_Program(GLcontext *ctx, + struct r700_vertex_program *vp, struct gl_vertex_program *mesa_vp) { GLuint ui; @@ -175,11 +203,22 @@ void Map_Vertex_Program(struct r700_vertex_program *vp, pAsm->number_used_registers += num_inputs; // Create VFETCH instructions for inputs - if (GL_TRUE != Process_Vertex_Program_Vfetch_Instructions(vp, mesa_vp) ) - { - radeon_error("Calling Process_Vertex_Program_Vfetch_Instructions return error. \n"); - return; //error - } + if(1 == vp->uiVersion) + { + if (GL_TRUE != Process_Vertex_Program_Vfetch_Instructions(vp, mesa_vp) ) + { + radeon_error("Calling Process_Vertex_Program_Vfetch_Instructions return error. \n"); + return; + } + } + else + { + if (GL_TRUE != Process_Vertex_Program_Vfetch_Instructions2(ctx, vp, mesa_vp) ) + { + radeon_error("Calling Process_Vertex_Program_Vfetch_Instructions2 return error. \n"); + return; + } + } // Map Outputs pAsm->number_of_exports = Map_Vertex_Output(pAsm, mesa_vp, pAsm->number_used_registers); @@ -261,7 +300,8 @@ GLboolean Find_Instruction_Dependencies_vp(struct r700_vertex_program *vp, } struct r700_vertex_program* r700TranslateVertexShader(GLcontext *ctx, - struct gl_vertex_program *mesa_vp) + struct gl_vertex_program *mesa_vp, + GLint nVer) { context_t *context = R700_CONTEXT(ctx); struct r700_vertex_program *vp; @@ -271,6 +311,7 @@ struct r700_vertex_program* r700TranslateVertexShader(GLcontext *ctx, unsigned int i; vp = _mesa_calloc(sizeof(*vp)); + vp->uiVersion = nVer; vp->mesa_program = (struct gl_vertex_program *)_mesa_clone_program(ctx, &mesa_vp->Base); if (mesa_vp->IsPositionInvariant) @@ -296,7 +337,7 @@ struct r700_vertex_program* r700TranslateVertexShader(GLcontext *ctx, //Init_Program Init_r700_AssemblerBase(SPT_VP, &(vp->r700AsmCode), &(vp->r700Shader) ); - Map_Vertex_Program( vp, vp->mesa_program ); + Map_Vertex_Program(ctx, vp, vp->mesa_program ); if(GL_FALSE == Find_Instruction_Dependencies_vp(vp, vp->mesa_program)) { @@ -325,7 +366,7 @@ struct r700_vertex_program* r700TranslateVertexShader(GLcontext *ctx, return vp; } -void r700SelectVertexShader(GLcontext *ctx) +void r700SelectVertexShader(GLcontext *ctx, GLint nVersion) { context_t *context = R700_CONTEXT(ctx); struct r700_vertex_program_cont *vpc; @@ -365,7 +406,7 @@ void r700SelectVertexShader(GLcontext *ctx) } } - vp = r700TranslateVertexShader(ctx, &(vpc->mesa_program) ); + vp = r700TranslateVertexShader(ctx, &(vpc->mesa_program), nVersion); if(!vp) { radeon_error("Failed to translate vertex shader. \n"); @@ -377,6 +418,140 @@ void r700SelectVertexShader(GLcontext *ctx) return; } +int getTypeSize(GLenum type) +{ + switch (type) + { + case GL_DOUBLE: + return sizeof(GLdouble); + case GL_FLOAT: + return sizeof(GLfloat); + case GL_INT: + return sizeof(GLint); + case GL_UNSIGNED_INT: + return sizeof(GLuint); + case GL_SHORT: + return sizeof(GLshort); + case GL_UNSIGNED_SHORT: + return sizeof(GLushort); + case GL_BYTE: + return sizeof(GLbyte); + case GL_UNSIGNED_BYTE: + return sizeof(GLubyte); + default: + assert(0); + return 0; + } +} + +static void r700TranslateAttrib(GLcontext *ctx, GLuint unLoc, int count, const struct gl_client_array *input) +{ + context_t *context = R700_CONTEXT(ctx); + + StreamDesc * pStreamDesc = &(context->stream_desc[context->nNumActiveAos]); + + GLuint stride; + + stride = (input->StrideB == 0) ? getTypeSize(input->Type) * input->Size + : input->StrideB; + + if (input->Type == GL_DOUBLE || input->Type == GL_UNSIGNED_INT || input->Type == GL_INT || +#if MESA_BIG_ENDIAN + getTypeSize(input->Type) != 4 || +#endif + stride < 4) + { + pStreamDesc->type = GL_FLOAT; + + if (input->StrideB == 0) + { + pStreamDesc->stride = 0; + } + else + { + pStreamDesc->stride = sizeof(GLfloat) * input->Size; + } + pStreamDesc->dwords = input->Size; + pStreamDesc->is_named_bo = GL_FALSE; + } + else + { + pStreamDesc->type = input->Type; + pStreamDesc->dwords = (getTypeSize(input->Type) * input->Size + 3)/ 4; + if (!input->BufferObj->Name) + { + if (input->StrideB == 0) + { + pStreamDesc->stride = 0; + } + else + { + pStreamDesc->stride = (getTypeSize(pStreamDesc->type) * input->Size + 3) & ~3; + } + + pStreamDesc->is_named_bo = GL_FALSE; + } + } + + pStreamDesc->size = input->Size; + pStreamDesc->dst_loc = context->nNumActiveAos; + pStreamDesc->element = unLoc; + + switch (pStreamDesc->type) + { //GetSurfaceFormat + case GL_FLOAT: + pStreamDesc->_signed = 0; + pStreamDesc->normalize = GL_FALSE; + break; + case GL_SHORT: + pStreamDesc->_signed = 1; + pStreamDesc->normalize = input->Normalized; + break; + case GL_BYTE: + pStreamDesc->_signed = 1; + pStreamDesc->normalize = input->Normalized; + break; + case GL_UNSIGNED_SHORT: + pStreamDesc->_signed = 0; + pStreamDesc->normalize = input->Normalized; + break; + case GL_UNSIGNED_BYTE: + pStreamDesc->_signed = 0; + pStreamDesc->normalize = input->Normalized; + break; + default: + case GL_INT: + case GL_UNSIGNED_INT: + case GL_DOUBLE: + assert(0); + break; + } + context->nNumActiveAos++; +} + +void r700SetVertexFormat(GLcontext *ctx, const struct gl_client_array *arrays[], int count) +{ + context_t *context = R700_CONTEXT(ctx); + struct r700_vertex_program *vpc + = (struct r700_vertex_program *)ctx->VertexProgram._Current; + + struct gl_vertex_program * mesa_vp = (struct gl_vertex_program *)&(vpc->mesa_program); + unsigned int unLoc = 0; + unsigned int unBit = mesa_vp->Base.InputsRead; + context->nNumActiveAos = 0; + + while(unBit) + { + if(unBit & 1) + { + r700TranslateAttrib(ctx, unLoc, count, arrays[unLoc]); + } + + unBit >>= 1; + ++unLoc; + } +} + void * r700GetActiveVpShaderBo(GLcontext * ctx) { context_t *context = R700_CONTEXT(ctx); diff --git a/src/mesa/drivers/dri/r600/r700_vertprog.h b/src/mesa/drivers/dri/r600/r700_vertprog.h index c48764c43ba..f9a3e395ee9 100644 --- a/src/mesa/drivers/dri/r600/r700_vertprog.h +++ b/src/mesa/drivers/dri/r600/r700_vertprog.h @@ -52,7 +52,7 @@ struct r700_vertex_program GLboolean translated; GLboolean loaded; - GLboolean needUpdateVF; + GLint uiVersion; void * shaderbo; @@ -76,19 +76,28 @@ unsigned int Map_Vertex_Input(r700_AssemblerBase *pAsm, GLboolean Process_Vertex_Program_Vfetch_Instructions( struct r700_vertex_program *vp, struct gl_vertex_program *mesa_vp); -void Map_Vertex_Program(struct r700_vertex_program *vp, +GLboolean Process_Vertex_Program_Vfetch_Instructions2( + GLcontext *ctx, + struct r700_vertex_program *vp, + struct gl_vertex_program *mesa_vp); +void Map_Vertex_Program(GLcontext *ctx, + struct r700_vertex_program *vp, struct gl_vertex_program *mesa_vp); GLboolean Find_Instruction_Dependencies_vp(struct r700_vertex_program *vp, struct gl_vertex_program *mesa_vp); struct r700_vertex_program* r700TranslateVertexShader(GLcontext *ctx, - struct gl_vertex_program *mesa_vp); + struct gl_vertex_program *mesa_vp, + GLint nVer); /* Interface */ -extern void r700SelectVertexShader(GLcontext *ctx); +extern void r700SelectVertexShader(GLcontext *ctx, GLint nVersion); +extern void r700SetVertexFormat(GLcontext *ctx, const struct gl_client_array *arrays[], int count); extern GLboolean r700SetupVertexProgram(GLcontext * ctx); extern void * r700GetActiveVpShaderBo(GLcontext * ctx); +extern int getTypeSize(GLenum type); + #endif /* _R700_VERTPROG_H_ */ diff --git a/src/mesa/drivers/dri/r600/radeon_buffer_objects.c b/src/mesa/drivers/dri/r600/radeon_buffer_objects.c new file mode 120000 index 00000000000..f6a5f664701 --- /dev/null +++ b/src/mesa/drivers/dri/r600/radeon_buffer_objects.c @@ -0,0 +1 @@ +../radeon/radeon_buffer_objects.c
\ No newline at end of file diff --git a/src/mesa/drivers/dri/r600/radeon_buffer_objects.h b/src/mesa/drivers/dri/r600/radeon_buffer_objects.h new file mode 120000 index 00000000000..2f134fd17b8 --- /dev/null +++ b/src/mesa/drivers/dri/r600/radeon_buffer_objects.h @@ -0,0 +1 @@ +../radeon/radeon_buffer_objects.h
\ No newline at end of file diff --git a/src/mesa/drivers/dri/radeon/radeon_buffer_objects.c b/src/mesa/drivers/dri/radeon/radeon_buffer_objects.c index a24b6dac265..8fac5c6c512 100644 --- a/src/mesa/drivers/dri/radeon/radeon_buffer_objects.c +++ b/src/mesa/drivers/dri/radeon/radeon_buffer_objects.c @@ -187,7 +187,11 @@ radeonMapBuffer(GLcontext * ctx, radeon_bo_map(radeon_obj->bo, access == GL_WRITE_ONLY_ARB); - return obj->Pointer = radeon_obj->bo->ptr; + obj->Pointer = radeon_obj->bo->ptr; + obj->Length = obj->Size; + obj->Offset = 0; + + return obj->Pointer; } @@ -203,9 +207,12 @@ radeonUnmapBuffer(GLcontext * ctx, if (radeon_obj->bo != NULL) { radeon_bo_unmap(radeon_obj->bo); - obj->Pointer = NULL; } + obj->Pointer = NULL; + obj->Offset = 0; + obj->Length = 0; + return GL_TRUE; } diff --git a/src/mesa/drivers/dri/radeon/radeon_common.c b/src/mesa/drivers/dri/radeon/radeon_common.c index 264392b3270..f8a4cdb4954 100644 --- a/src/mesa/drivers/dri/radeon/radeon_common.c +++ b/src/mesa/drivers/dri/radeon/radeon_common.c @@ -232,13 +232,13 @@ void radeonUpdateScissor( GLcontext *ctx ) __DRIdrawablePrivate *dPriv = radeon_get_drawable(rmesa); x1 += dPriv->x; - x2 += dPriv->x + 1; + x2 += dPriv->x; min_x += dPriv->x; - max_x += dPriv->x + 1; + max_x += dPriv->x; y1 += dPriv->y; - y2 += dPriv->y + 1; + y2 += dPriv->y; min_y += dPriv->y; - max_y += dPriv->y + 1; + max_y += dPriv->y; } rmesa->state.scissor.rect.x1 = CLAMP(x1, min_x, max_x); diff --git a/src/mesa/drivers/dri/radeon/radeon_common_context.c b/src/mesa/drivers/dri/radeon/radeon_common_context.c index 1c53c04da77..6b9b1e3c5e4 100644 --- a/src/mesa/drivers/dri/radeon/radeon_common_context.c +++ b/src/mesa/drivers/dri/radeon/radeon_common_context.c @@ -227,11 +227,8 @@ GLboolean radeonInitContext(radeonContextPtr radeon, fthrottle_mode = driQueryOptioni(&radeon->optionCache, "fthrottle_mode"); radeon->iw.irq_seq = -1; radeon->irqsEmitted = 0; - if (IS_R600_CLASS(radeon->radeonScreen)) - radeon->do_irqs = 0; - else - radeon->do_irqs = (fthrottle_mode == DRI_CONF_FTHROTTLE_IRQS && - radeon->radeonScreen->irq); + radeon->do_irqs = (fthrottle_mode == DRI_CONF_FTHROTTLE_IRQS && + radeon->radeonScreen->irq); radeon->do_usleeps = (fthrottle_mode == DRI_CONF_FTHROTTLE_USLEEPS); diff --git a/src/mesa/drivers/dri/radeon/radeon_texture.c b/src/mesa/drivers/dri/radeon/radeon_texture.c index 049284ef8c5..7b7392b2179 100644 --- a/src/mesa/drivers/dri/radeon/radeon_texture.c +++ b/src/mesa/drivers/dri/radeon/radeon_texture.c @@ -101,7 +101,12 @@ void radeonFreeTexImageData(GLcontext *ctx, struct gl_texture_image *timage) /* Set Data pointer and additional data for mapped texture image */ static void teximage_set_map_data(radeon_texture_image *image) { - radeon_mipmap_level *lvl = &image->mt->levels[image->mtlevel]; + radeon_mipmap_level *lvl; + + if (!image->mt) + return; + + lvl = &image->mt->levels[image->mtlevel]; image->base.Data = image->mt->bo->ptr + lvl->faces[image->mtface].offset; image->base.RowStride = lvl->rowstride / image->mt->bpp; @@ -969,7 +974,7 @@ int radeon_validate_texture_miptree(GLcontext * ctx, struct gl_texture_object *t radeon_texture_image *image = get_radeon_texture_image(texObj->Image[face][level]); if (RADEON_DEBUG & RADEON_TEXTURE) fprintf(stderr, " face %i, level %i... %p vs %p ", face, level, t->mt, image->mt); - if (t->mt == image->mt) { + if (t->mt == image->mt || (!image->mt && !image->base.Data)) { if (RADEON_DEBUG & RADEON_TEXTURE) fprintf(stderr, "OK\n"); diff --git a/src/mesa/drivers/dri/swrast/swrast.c b/src/mesa/drivers/dri/swrast/swrast.c index cd499cd5d2e..f4947daa063 100644 --- a/src/mesa/drivers/dri/swrast/swrast.c +++ b/src/mesa/drivers/dri/swrast/swrast.c @@ -116,6 +116,7 @@ const struct dri_extension card_extensions[] = { "GL_NV_depth_clamp", NULL }, { "GL_NV_vertex_program", GL_NV_vertex_program_functions }, { "GL_NV_fragment_program", GL_NV_fragment_program_functions }, + { "GL_NV_fragment_program_option", NULL }, { NULL, NULL } }; |