From 5db8ebb8f534907614247afaf1dd8621b2d0462e Mon Sep 17 00:00:00 2001 From: Ian Romanick Date: Thu, 3 Sep 2009 14:06:42 -0700 Subject: Enable GL_NV_fragment_program_option for software rendering At this point the extension is not fully implemented. --- src/mesa/drivers/dri/swrast/swrast.c | 1 + 1 file changed, 1 insertion(+) (limited to 'src/mesa/drivers/dri') diff --git a/src/mesa/drivers/dri/swrast/swrast.c b/src/mesa/drivers/dri/swrast/swrast.c index 3aa7843b1bc..d8de5cca808 100644 --- a/src/mesa/drivers/dri/swrast/swrast.c +++ b/src/mesa/drivers/dri/swrast/swrast.c @@ -109,6 +109,7 @@ const struct dri_extension card_extensions[] = { "GL_MESA_resize_buffers", GL_MESA_resize_buffers_functions }, { "GL_NV_vertex_program", GL_NV_vertex_program_functions }, { "GL_NV_fragment_program", GL_NV_fragment_program_functions }, + { "GL_NV_fragment_program_option", NULL }, { NULL, NULL } }; -- cgit v1.2.3 From 2d729e6e3bcb0af84790cafb9824a3937954e078 Mon Sep 17 00:00:00 2001 From: Andre Maasikas Date: Mon, 21 Sep 2009 10:14:25 -0400 Subject: r600: fix some issues with LIT instruction - MUL_LIT is ALU.Trans instruction - some Trans instructions can take 3 arguments - don't clobber dst.x, use dst.z as temp, it'll get written correct value in last insn - respect source swizzles --- src/mesa/drivers/dri/r600/r700_assembler.c | 69 ++++++++++++++++-------------- 1 file changed, 36 insertions(+), 33 deletions(-) (limited to 'src/mesa/drivers/dri') diff --git a/src/mesa/drivers/dri/r600/r700_assembler.c b/src/mesa/drivers/dri/r600/r700_assembler.c index efeccb25f1e..f46bc322011 100644 --- a/src/mesa/drivers/dri/r600/r700_assembler.c +++ b/src/mesa/drivers/dri/r600/r700_assembler.c @@ -2024,7 +2024,7 @@ GLboolean assemble_alu_instruction(r700_AssemblerBase *pAsm) return GL_FALSE; } - if (pAsm->D.dst.math == 0) + if (uNumSrc > 1) { // Process source 1 current_source_index = 1; @@ -2880,6 +2880,11 @@ GLboolean assemble_LIT(r700_AssemblerBase *pAsm) return GL_FALSE; } + if( GL_FALSE == assemble_src(pAsm, 0, -1) ) + { + return GL_FALSE; + } + /* dst.y = max(src.x, 0.0) */ pAsm->D.dst.opcode = SQ_OP2_INST_MAX; pAsm->D.dst.rtype = dstType; @@ -2891,11 +2896,6 @@ GLboolean assemble_LIT(r700_AssemblerBase *pAsm) pAsm->S[0].src.rtype = srcType; pAsm->S[0].src.reg = srcReg; setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); - noneg_PVSSRC(&(pAsm->S[0].src)); - pAsm->S[0].src.swizzlex = SQ_SEL_X; - pAsm->S[0].src.swizzley = SQ_SEL_X; - pAsm->S[0].src.swizzlez = SQ_SEL_X; - pAsm->S[0].src.swizzlew = SQ_SEL_X; pAsm->S[1].src.rtype = SRC_REG_TEMPORARY; pAsm->S[1].src.reg = tmp; setaddrmode_PVSSRC(&(pAsm->S[1].src), ADDR_ABSOLUTE); @@ -2909,34 +2909,47 @@ GLboolean assemble_LIT(r700_AssemblerBase *pAsm) return GL_FALSE; } - /* before: dst.w = log(src.y) - * after : dst.x = log(src.y) - * why change dest register is that dst.w has been initialized as 1 before - */ + if( GL_FALSE == assemble_src(pAsm, 0, -1) ) + { + return GL_FALSE; + } + + swizzleagain_PVSSRC(&(pAsm->S[0].src), SQ_SEL_Y, SQ_SEL_Y, SQ_SEL_Y, SQ_SEL_Y); + + /* dst.z = log(src.y) */ pAsm->D.dst.opcode = SQ_OP2_INST_LOG_CLAMPED; pAsm->D.dst.math = 1; pAsm->D.dst.rtype = dstType; pAsm->D.dst.reg = dstReg; - pAsm->D.dst.writex = 1; + pAsm->D.dst.writex = 0; pAsm->D.dst.writey = 0; - pAsm->D.dst.writez = 0; + pAsm->D.dst.writez = 1; pAsm->D.dst.writew = 0; pAsm->S[0].src.rtype = srcType; pAsm->S[0].src.reg = srcReg; setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); - noneg_PVSSRC(&(pAsm->S[0].src)); - pAsm->S[0].src.swizzlex = SQ_SEL_Y; - pAsm->S[0].src.swizzley = SQ_SEL_Y; - pAsm->S[0].src.swizzlez = SQ_SEL_Y; - pAsm->S[0].src.swizzlew = SQ_SEL_Y; if( GL_FALSE == next_ins(pAsm) ) { return GL_FALSE; } - /* before: tmp.x = amd MUL_LIT(src.w, dst.w, src.x ) */ - /* after : tmp.x = amd MUL_LIT(src.w, dst.x, src.x ) */ + if( GL_FALSE == assemble_src(pAsm, 0, -1) ) + { + return GL_FALSE; + } + + if( GL_FALSE == assemble_src(pAsm, 0, 2) ) + { + return GL_FALSE; + } + + swizzleagain_PVSSRC(&(pAsm->S[0].src), SQ_SEL_W, SQ_SEL_W, SQ_SEL_W, SQ_SEL_W); + + swizzleagain_PVSSRC(&(pAsm->S[2].src), SQ_SEL_X, SQ_SEL_X, SQ_SEL_X, SQ_SEL_X); + + /* tmp.x = amd MUL_LIT(src.w, dst.z, src.x ) */ pAsm->D.dst.opcode = SQ_OP3_INST_MUL_LIT; + pAsm->D.dst.math = 1; pAsm->D.dst.op3 = 1; pAsm->D.dst.rtype = DST_REG_TEMPORARY; pAsm->D.dst.reg = tmp; @@ -2948,29 +2961,19 @@ GLboolean assemble_LIT(r700_AssemblerBase *pAsm) pAsm->S[0].src.rtype = srcType; pAsm->S[0].src.reg = srcReg; setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); - noneg_PVSSRC(&(pAsm->S[0].src)); - pAsm->S[0].src.swizzlex = SQ_SEL_W; - pAsm->S[0].src.swizzley = SQ_SEL_W; - pAsm->S[0].src.swizzlez = SQ_SEL_W; - pAsm->S[0].src.swizzlew = SQ_SEL_W; pAsm->S[1].src.rtype = SRC_REG_TEMPORARY; pAsm->S[1].src.reg = dstReg; setaddrmode_PVSSRC(&(pAsm->S[1].src), ADDR_ABSOLUTE); noneg_PVSSRC(&(pAsm->S[1].src)); - pAsm->S[1].src.swizzlex = SQ_SEL_X; - pAsm->S[1].src.swizzley = SQ_SEL_X; - pAsm->S[1].src.swizzlez = SQ_SEL_X; - pAsm->S[1].src.swizzlew = SQ_SEL_X; + pAsm->S[1].src.swizzlex = SQ_SEL_Z; + pAsm->S[1].src.swizzley = SQ_SEL_Z; + pAsm->S[1].src.swizzlez = SQ_SEL_Z; + pAsm->S[1].src.swizzlew = SQ_SEL_Z; pAsm->S[2].src.rtype = srcType; pAsm->S[2].src.reg = srcReg; setaddrmode_PVSSRC(&(pAsm->S[2].src), ADDR_ABSOLUTE); - noneg_PVSSRC(&(pAsm->S[2].src)); - pAsm->S[2].src.swizzlex = SQ_SEL_X; - pAsm->S[2].src.swizzley = SQ_SEL_X; - pAsm->S[2].src.swizzlez = SQ_SEL_X; - pAsm->S[2].src.swizzlew = SQ_SEL_X; if( GL_FALSE == next_ins(pAsm) ) { -- cgit v1.2.3 From 2b83483fb43386bd4b8d199d371a3e513828695f Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Wed, 20 May 2009 14:05:03 -0700 Subject: intel: Mark the FBO as incomplete if there's no intel_renderbuffer for it. This happens to rendering with textures with a border, which had resulted in a segfault on dereferencing the irb. (cherry-picked from commit 8bba183b9eeb162661a287bf2e118c6dd419dd24) --- src/mesa/drivers/dri/intel/intel_fbo.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'src/mesa/drivers/dri') diff --git a/src/mesa/drivers/dri/intel/intel_fbo.c b/src/mesa/drivers/dri/intel/intel_fbo.c index 30f58b1f44a..ed7c78e06c4 100644 --- a/src/mesa/drivers/dri/intel/intel_fbo.c +++ b/src/mesa/drivers/dri/intel/intel_fbo.c @@ -680,6 +680,11 @@ intel_validate_framebuffer(GLcontext *ctx, struct gl_framebuffer *fb) if (rb == NULL) continue; + if (irb == NULL) { + fb->_Status = GL_FRAMEBUFFER_UNSUPPORTED_EXT; + continue; + } + switch (irb->texformat->MesaFormat) { case MESA_FORMAT_ARGB8888: case MESA_FORMAT_RGB565: -- cgit v1.2.3 From ab4ec85f6c04fdb5fabb0c74593643c31f630ac3 Mon Sep 17 00:00:00 2001 From: Maciej Cencora Date: Sat, 19 Sep 2009 18:46:51 +0200 Subject: r300: fix a typo --- src/mesa/drivers/dri/r300/r300_render.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/mesa/drivers/dri') diff --git a/src/mesa/drivers/dri/r300/r300_render.c b/src/mesa/drivers/dri/r300/r300_render.c index b5ddfdc9f82..3cd38753b8a 100644 --- a/src/mesa/drivers/dri/r300/r300_render.c +++ b/src/mesa/drivers/dri/r300/r300_render.c @@ -475,7 +475,7 @@ void r300SwitchFallback(GLcontext *ctx, uint32_t bit, GLboolean mode) /* update only if we have disabled all tcl fallbacks */ if (rmesa->options.hw_tcl_enabled) { - if ((old_fallback & R300_RASTER_FALLBACK_MASK) == bit) { + if ((old_fallback & R300_TCL_FALLBACK_MASK) == bit) { R300_STATECHANGE(rmesa, vap_cntl_status); rmesa->hw.vap_cntl_status.cmd[1] &= ~R300_VAP_TCL_BYPASS; } -- cgit v1.2.3 From ff5535c5219047fc56f89c55ee44899d218dc234 Mon Sep 17 00:00:00 2001 From: Maciej Cencora Date: Sun, 20 Sep 2009 13:54:59 +0200 Subject: radeon: update buffer map/unmap code for changes introduced in 92033a9516942d7272ce4bf36ecd422009bbaf60 and 822c7964819ca1fcc270880d4ca8b3de8a4276d0 --- src/mesa/drivers/dri/radeon/radeon_buffer_objects.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'src/mesa/drivers/dri') diff --git a/src/mesa/drivers/dri/radeon/radeon_buffer_objects.c b/src/mesa/drivers/dri/radeon/radeon_buffer_objects.c index a24b6dac265..8fac5c6c512 100644 --- a/src/mesa/drivers/dri/radeon/radeon_buffer_objects.c +++ b/src/mesa/drivers/dri/radeon/radeon_buffer_objects.c @@ -187,7 +187,11 @@ radeonMapBuffer(GLcontext * ctx, radeon_bo_map(radeon_obj->bo, access == GL_WRITE_ONLY_ARB); - return obj->Pointer = radeon_obj->bo->ptr; + obj->Pointer = radeon_obj->bo->ptr; + obj->Length = obj->Size; + obj->Offset = 0; + + return obj->Pointer; } @@ -203,9 +207,12 @@ radeonUnmapBuffer(GLcontext * ctx, if (radeon_obj->bo != NULL) { radeon_bo_unmap(radeon_obj->bo); - obj->Pointer = NULL; } + obj->Pointer = NULL; + obj->Offset = 0; + obj->Length = 0; + return GL_TRUE; } -- cgit v1.2.3 From 1869bdabbac0926c7da8bfd9e22616cab9457126 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Mon, 21 Sep 2009 16:30:14 -0400 Subject: r600: various cleanups - max texture size is 8k, but mesa doesn't support that at the moment. - attempt to set shader limits to what the hw actually supports - clean up some old r300 cruft - no need to explicitly disable irqs. This is fixed in the drm now. Signed-off-by: Alex Deucher --- src/mesa/drivers/dri/r600/r600_context.c | 43 +++++++++++----------- src/mesa/drivers/dri/r600/r600_context.h | 19 ---------- .../drivers/dri/radeon/radeon_common_context.c | 7 +--- 3 files changed, 24 insertions(+), 45 deletions(-) (limited to 'src/mesa/drivers/dri') diff --git a/src/mesa/drivers/dri/r600/r600_context.c b/src/mesa/drivers/dri/r600/r600_context.c index f8fd9c13d77..6a90a5bcd19 100644 --- a/src/mesa/drivers/dri/r600/r600_context.c +++ b/src/mesa/drivers/dri/r600/r600_context.c @@ -284,8 +284,8 @@ GLboolean r600CreateContext(const __GLcontextModes * glVisual, ctx->Const.MaxTextureMaxAnisotropy = 16.0; ctx->Const.MaxTextureLodBias = 16.0; - ctx->Const.MaxTextureLevels = 13; - ctx->Const.MaxTextureRectSize = 4096; + ctx->Const.MaxTextureLevels = 13; /* hw support 14 */ + ctx->Const.MaxTextureRectSize = 4096; /* hw support 8192 */ ctx->Const.MinPointSize = 0x0001 / 8.0; ctx->Const.MinPointSizeAA = 0x0001 / 8.0; @@ -331,25 +331,26 @@ GLboolean r600CreateContext(const __GLcontextModes * glVisual, _tnl_allow_vertex_fog(ctx, GL_TRUE); /* currently bogus data */ - ctx->Const.VertexProgram.MaxInstructions = VSF_MAX_FRAGMENT_LENGTH / 4; - ctx->Const.VertexProgram.MaxNativeInstructions = - VSF_MAX_FRAGMENT_LENGTH / 4; - ctx->Const.VertexProgram.MaxNativeAttribs = 16; /* r420 */ - ctx->Const.VertexProgram.MaxTemps = 32; - ctx->Const.VertexProgram.MaxNativeTemps = - /*VSF_MAX_FRAGMENT_TEMPS */ 32; - ctx->Const.VertexProgram.MaxNativeParameters = 256; /* r420 */ - ctx->Const.VertexProgram.MaxNativeAddressRegs = 1; - - ctx->Const.FragmentProgram.MaxNativeTemps = PFS_NUM_TEMP_REGS; - ctx->Const.FragmentProgram.MaxNativeAttribs = 11; /* copy i915... */ - ctx->Const.FragmentProgram.MaxNativeParameters = PFS_NUM_CONST_REGS; - ctx->Const.FragmentProgram.MaxNativeAluInstructions = PFS_MAX_ALU_INST; - ctx->Const.FragmentProgram.MaxNativeTexInstructions = PFS_MAX_TEX_INST; - ctx->Const.FragmentProgram.MaxNativeInstructions = - PFS_MAX_ALU_INST + PFS_MAX_TEX_INST; - ctx->Const.FragmentProgram.MaxNativeTexIndirections = - PFS_MAX_TEX_INDIRECT; + ctx->Const.VertexProgram.MaxInstructions = 8192; /* in theory no limit */ + ctx->Const.VertexProgram.MaxNativeInstructions = 8192; + ctx->Const.VertexProgram.MaxNativeAttribs = 160; + ctx->Const.VertexProgram.MaxTemps = 256; /* 256 for reg-based constants, inline consts also supported */ + ctx->Const.VertexProgram.MaxNativeTemps = 256; + ctx->Const.VertexProgram.MaxNativeParameters = 256; /* ??? */ + ctx->Const.VertexProgram.MaxNativeAddressRegs = 1; /* ??? */ + + ctx->Const.FragmentProgram.MaxNativeTemps = 256; + ctx->Const.FragmentProgram.MaxNativeAttribs = 32; + ctx->Const.FragmentProgram.MaxNativeParameters = 256; + ctx->Const.FragmentProgram.MaxNativeAluInstructions = 8192; + /* 8 per clause on r6xx, 16 on rv670/r7xx */ + if ((screen->chip_family == CHIP_FAMILY_RV670) || + (screen->chip_family >= CHIP_FAMILY_RV770)) + ctx->Const.FragmentProgram.MaxNativeTexInstructions = 16; + else + ctx->Const.FragmentProgram.MaxNativeTexInstructions = 8; + ctx->Const.FragmentProgram.MaxNativeInstructions = 8192; + ctx->Const.FragmentProgram.MaxNativeTexIndirections = 8; /* ??? */ ctx->Const.FragmentProgram.MaxNativeAddressRegs = 0; /* and these are?? */ ctx->VertexProgram._MaintainTnlProgram = GL_TRUE; ctx->FragmentProgram._MaintainTexEnvProgram = GL_TRUE; diff --git a/src/mesa/drivers/dri/r600/r600_context.h b/src/mesa/drivers/dri/r600/r600_context.h index c59df7505af..9397ecde81b 100644 --- a/src/mesa/drivers/dri/r600/r600_context.h +++ b/src/mesa/drivers/dri/r600/r600_context.h @@ -86,29 +86,10 @@ extern int hw_tcl_on; #include "tnl_dd/t_dd_vertex.h" #undef TAG -#define PFS_MAX_ALU_INST 64 -#define PFS_MAX_TEX_INST 64 -#define PFS_MAX_TEX_INDIRECT 4 -#define PFS_NUM_TEMP_REGS 32 -#define PFS_NUM_CONST_REGS 16 - -#define R600_MAX_AOS_ARRAYS 16 - -#define REG_COORDS 0 -#define REG_COLOR0 1 -#define REG_TEX0 2 - #define R600_FALLBACK_NONE 0 #define R600_FALLBACK_TCL 1 #define R600_FALLBACK_RAST 2 -enum -{ - NO_SHIFT = 0, - LEFT_SHIFT = 1, - RIGHT_SHIFT = 2, -}; - struct r600_hw_state { struct radeon_state_atom sq; struct radeon_state_atom db; diff --git a/src/mesa/drivers/dri/radeon/radeon_common_context.c b/src/mesa/drivers/dri/radeon/radeon_common_context.c index 1c53c04da77..6b9b1e3c5e4 100644 --- a/src/mesa/drivers/dri/radeon/radeon_common_context.c +++ b/src/mesa/drivers/dri/radeon/radeon_common_context.c @@ -227,11 +227,8 @@ GLboolean radeonInitContext(radeonContextPtr radeon, fthrottle_mode = driQueryOptioni(&radeon->optionCache, "fthrottle_mode"); radeon->iw.irq_seq = -1; radeon->irqsEmitted = 0; - if (IS_R600_CLASS(radeon->radeonScreen)) - radeon->do_irqs = 0; - else - radeon->do_irqs = (fthrottle_mode == DRI_CONF_FTHROTTLE_IRQS && - radeon->radeonScreen->irq); + radeon->do_irqs = (fthrottle_mode == DRI_CONF_FTHROTTLE_IRQS && + radeon->radeonScreen->irq); radeon->do_usleeps = (fthrottle_mode == DRI_CONF_FTHROTTLE_USLEEPS); -- cgit v1.2.3 From c63e78b3e583e39ef296f1c2c9a34c90eb221503 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Mon, 21 Sep 2009 16:48:55 -0400 Subject: r600: fix typo in the last commit 128 gprs, 256 reg-based consts --- src/mesa/drivers/dri/r600/r600_context.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'src/mesa/drivers/dri') diff --git a/src/mesa/drivers/dri/r600/r600_context.c b/src/mesa/drivers/dri/r600/r600_context.c index 6a90a5bcd19..354b263f5cb 100644 --- a/src/mesa/drivers/dri/r600/r600_context.c +++ b/src/mesa/drivers/dri/r600/r600_context.c @@ -330,16 +330,16 @@ GLboolean r600CreateContext(const __GLcontextModes * glVisual, _tnl_allow_pixel_fog(ctx, GL_FALSE); _tnl_allow_vertex_fog(ctx, GL_TRUE); - /* currently bogus data */ + /* 256 for reg-based consts, inline consts also supported */ ctx->Const.VertexProgram.MaxInstructions = 8192; /* in theory no limit */ ctx->Const.VertexProgram.MaxNativeInstructions = 8192; ctx->Const.VertexProgram.MaxNativeAttribs = 160; - ctx->Const.VertexProgram.MaxTemps = 256; /* 256 for reg-based constants, inline consts also supported */ - ctx->Const.VertexProgram.MaxNativeTemps = 256; - ctx->Const.VertexProgram.MaxNativeParameters = 256; /* ??? */ + ctx->Const.VertexProgram.MaxTemps = 128; + ctx->Const.VertexProgram.MaxNativeTemps = 128; + ctx->Const.VertexProgram.MaxNativeParameters = 256; ctx->Const.VertexProgram.MaxNativeAddressRegs = 1; /* ??? */ - ctx->Const.FragmentProgram.MaxNativeTemps = 256; + ctx->Const.FragmentProgram.MaxNativeTemps = 128; ctx->Const.FragmentProgram.MaxNativeAttribs = 32; ctx->Const.FragmentProgram.MaxNativeParameters = 256; ctx->Const.FragmentProgram.MaxNativeAluInstructions = 8192; -- cgit v1.2.3 From b1c9c5a800a485e3e066312f5736c93ef2774c9b Mon Sep 17 00:00:00 2001 From: Maciej Cencora Date: Sat, 19 Sep 2009 18:46:51 +0200 Subject: r300: fix a typo --- src/mesa/drivers/dri/r300/r300_render.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/mesa/drivers/dri') diff --git a/src/mesa/drivers/dri/r300/r300_render.c b/src/mesa/drivers/dri/r300/r300_render.c index b5ddfdc9f82..3cd38753b8a 100644 --- a/src/mesa/drivers/dri/r300/r300_render.c +++ b/src/mesa/drivers/dri/r300/r300_render.c @@ -475,7 +475,7 @@ void r300SwitchFallback(GLcontext *ctx, uint32_t bit, GLboolean mode) /* update only if we have disabled all tcl fallbacks */ if (rmesa->options.hw_tcl_enabled) { - if ((old_fallback & R300_RASTER_FALLBACK_MASK) == bit) { + if ((old_fallback & R300_TCL_FALLBACK_MASK) == bit) { R300_STATECHANGE(rmesa, vap_cntl_status); rmesa->hw.vap_cntl_status.cmd[1] &= ~R300_VAP_TCL_BYPASS; } -- cgit v1.2.3 From 4916a5a2e72b05c176809dd0db5066a966a45b80 Mon Sep 17 00:00:00 2001 From: Maciej Cencora Date: Sun, 20 Sep 2009 13:54:59 +0200 Subject: radeon: update buffer map/unmap code for changes introduced in 92033a9516942d7272ce4bf36ecd422009bbaf60 and 822c7964819ca1fcc270880d4ca8b3de8a4276d0 --- src/mesa/drivers/dri/radeon/radeon_buffer_objects.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'src/mesa/drivers/dri') diff --git a/src/mesa/drivers/dri/radeon/radeon_buffer_objects.c b/src/mesa/drivers/dri/radeon/radeon_buffer_objects.c index a24b6dac265..8fac5c6c512 100644 --- a/src/mesa/drivers/dri/radeon/radeon_buffer_objects.c +++ b/src/mesa/drivers/dri/radeon/radeon_buffer_objects.c @@ -187,7 +187,11 @@ radeonMapBuffer(GLcontext * ctx, radeon_bo_map(radeon_obj->bo, access == GL_WRITE_ONLY_ARB); - return obj->Pointer = radeon_obj->bo->ptr; + obj->Pointer = radeon_obj->bo->ptr; + obj->Length = obj->Size; + obj->Offset = 0; + + return obj->Pointer; } @@ -203,9 +207,12 @@ radeonUnmapBuffer(GLcontext * ctx, if (radeon_obj->bo != NULL) { radeon_bo_unmap(radeon_obj->bo); - obj->Pointer = NULL; } + obj->Pointer = NULL; + obj->Offset = 0; + obj->Length = 0; + return GL_TRUE; } -- cgit v1.2.3 From 8cc12ffb34769a84050be034d19921af811a3406 Mon Sep 17 00:00:00 2001 From: Nicolai Hähnle Date: Tue, 22 Sep 2009 20:57:05 +0200 Subject: r300: Fix crash reported in bug #24066 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Nicolai Hähnle --- src/mesa/drivers/dri/r300/r300_vertprog.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'src/mesa/drivers/dri') diff --git a/src/mesa/drivers/dri/r300/r300_vertprog.c b/src/mesa/drivers/dri/r300/r300_vertprog.c index 0cb7dde985b..2f7b67c1431 100644 --- a/src/mesa/drivers/dri/r300/r300_vertprog.c +++ b/src/mesa/drivers/dri/r300/r300_vertprog.c @@ -43,6 +43,7 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. #include "compiler/radeon_compiler.h" #include "compiler/radeon_nqssadce.h" #include "r300_context.h" +#include "r300_fragprog_common.h" #include "r300_state.h" /** @@ -298,6 +299,20 @@ struct r300_vertex_program * r300SelectAndTranslateVertexShader(GLcontext *ctx) struct r300_vertex_program *vp; vpc = (struct r300_vertex_program_cont *)ctx->VertexProgram._Current; + + if (!r300->selected_fp) { + /* This can happen when GetProgramiv is called to check + * whether the program runs natively. + * + * To be honest, this is not a very good solution, + * but solving the problem of reporting good values + * for those queries is tough anyway considering that + * we recompile vertex programs based on the precise + * fragment program that is in use. + */ + r300SelectAndTranslateFragmentShader(ctx); + } + wanted_key.FpReads = r300->selected_fp->InputsRead; wanted_key.FogAttr = r300->selected_fp->fog_attr; wanted_key.WPosAttr = r300->selected_fp->wpos_attr; -- cgit v1.2.3 From 81283b0bf0a8f7b31517adc224c20531e27fab42 Mon Sep 17 00:00:00 2001 From: Richard Li Date: Tue, 22 Sep 2009 16:39:11 -0400 Subject: r600 : add draw_prim support. --- src/mesa/drivers/dri/r600/Makefile | 1 + src/mesa/drivers/dri/r600/r600_context.c | 3 + src/mesa/drivers/dri/r600/r600_context.h | 32 ++ src/mesa/drivers/dri/r600/r700_assembler.c | 127 ++++ src/mesa/drivers/dri/r600/r700_assembler.h | 8 + src/mesa/drivers/dri/r600/r700_chip.c | 100 +++- src/mesa/drivers/dri/r600/r700_render.c | 667 +++++++++++++++++++++- src/mesa/drivers/dri/r600/r700_shader.c | 90 +++ src/mesa/drivers/dri/r600/r700_shader.h | 1 + src/mesa/drivers/dri/r600/r700_state.c | 20 +- src/mesa/drivers/dri/r600/r700_state.h | 1 + src/mesa/drivers/dri/r600/r700_vertprog.c | 195 ++++++- src/mesa/drivers/dri/r600/r700_vertprog.h | 17 +- src/mesa/drivers/dri/r600/radeon_buffer_objects.c | 1 + src/mesa/drivers/dri/r600/radeon_buffer_objects.h | 1 + 15 files changed, 1217 insertions(+), 47 deletions(-) create mode 120000 src/mesa/drivers/dri/r600/radeon_buffer_objects.c create mode 120000 src/mesa/drivers/dri/r600/radeon_buffer_objects.h (limited to 'src/mesa/drivers/dri') diff --git a/src/mesa/drivers/dri/r600/Makefile b/src/mesa/drivers/dri/r600/Makefile index 36bf773c054..7d5a7b1ab6f 100644 --- a/src/mesa/drivers/dri/r600/Makefile +++ b/src/mesa/drivers/dri/r600/Makefile @@ -29,6 +29,7 @@ COMMON_SOURCES = \ RADEON_COMMON_SOURCES = \ radeon_bo_legacy.c \ radeon_common_context.c \ + radeon_buffer_objects.c \ radeon_common.c \ radeon_cs_legacy.c \ radeon_dma.c \ diff --git a/src/mesa/drivers/dri/r600/r600_context.c b/src/mesa/drivers/dri/r600/r600_context.c index 354b263f5cb..6fc6d9d7bfa 100644 --- a/src/mesa/drivers/dri/r600/r600_context.c +++ b/src/mesa/drivers/dri/r600/r600_context.c @@ -257,6 +257,7 @@ GLboolean r600CreateContext(const __GLcontextModes * glVisual, r600InitTextureFuncs(&functions); r700InitShaderFuncs(&functions); r700InitIoctlFuncs(&functions); + radeonInitBufferObjectFuncs(&functions); if (!radeonInitContext(&r600->radeon, &functions, glVisual, driContextPriv, @@ -375,6 +376,8 @@ GLboolean r600CreateContext(const __GLcontextModes * glVisual, _mesa_enable_extension(ctx, "GL_EXT_texture_compression_s3tc"); } + r700InitDraw(ctx); + radeon_fbo_init(&r600->radeon); radeonInitSpanFuncs( ctx ); diff --git a/src/mesa/drivers/dri/r600/r600_context.h b/src/mesa/drivers/dri/r600/r600_context.h index 9397ecde81b..a296ea23fa4 100644 --- a/src/mesa/drivers/dri/r600/r600_context.h +++ b/src/mesa/drivers/dri/r600/r600_context.h @@ -126,6 +126,34 @@ struct r600_hw_state { struct radeon_state_atom tx_brdr_clr; }; +typedef struct StreamDesc +{ + GLint size; //number of data element + GLenum type; //data element type + GLsizei stride; + + struct radeon_bo *bo; + GLint bo_offset; + + GLuint dwords; + GLuint dst_loc; + GLuint _signed; + GLboolean normalize; + GLboolean is_named_bo; + GLubyte element; +} StreamDesc; + +typedef struct r700_index_buffer +{ + struct radeon_bo *bo; + int bo_offset; + + GLboolean is_32bit; + GLuint count; + + GLboolean bHostIb; +} r700_index_buffer; + /** * \brief R600 context structure. */ @@ -144,6 +172,9 @@ struct r600_context { GLvector4f dummy_attrib[_TNL_ATTRIB_MAX]; GLvector4f *temp_attrib[_TNL_ATTRIB_MAX]; + GLint nNumActiveAos; + StreamDesc stream_desc[VERT_ATTRIB_MAX]; + struct r700_index_buffer ind_buf; }; #define R700_CONTEXT(ctx) ((context_t *)(ctx->DriverCtx)) @@ -177,6 +208,7 @@ extern GLboolean r700SyncSurf(context_t *context, extern void r700SetupStreams(GLcontext * ctx); extern void r700Start3D(context_t *context); extern void r600InitAtoms(context_t *context); +extern void r700InitDraw(GLcontext *ctx); #define RADEON_D_CAPTURE 0 #define RADEON_D_PLAYBACK 1 diff --git a/src/mesa/drivers/dri/r600/r700_assembler.c b/src/mesa/drivers/dri/r600/r700_assembler.c index f46bc322011..81269350e43 100644 --- a/src/mesa/drivers/dri/r600/r700_assembler.c +++ b/src/mesa/drivers/dri/r600/r700_assembler.c @@ -786,6 +786,133 @@ GLboolean assemble_vfetch_instruction(r700_AssemblerBase* pAsm, return GL_TRUE; } +GLboolean assemble_vfetch_instruction2(r700_AssemblerBase* pAsm, + GLuint destination_register, + GLenum type, + GLint size, + GLubyte element, + GLuint _signed, + GLboolean normalize, + VTX_FETCH_METHOD * pFetchMethod) +{ + GLuint client_size_inbyte; + GLuint data_format; + GLuint mega_fetch_count; + GLuint is_mega_fetch_flag; + + R700VertexGenericFetch* vfetch_instruction_ptr; + R700VertexGenericFetch* assembled_vfetch_instruction_ptr + = pAsm->vfetch_instruction_ptr_array[element]; + + if (assembled_vfetch_instruction_ptr == NULL) + { + vfetch_instruction_ptr = (R700VertexGenericFetch*) CALLOC_STRUCT(R700VertexGenericFetch); + if (vfetch_instruction_ptr == NULL) + { + return GL_FALSE; + } + Init_R700VertexGenericFetch(vfetch_instruction_ptr); + } + else + { + vfetch_instruction_ptr = assembled_vfetch_instruction_ptr; + } + + data_format = GetSurfaceFormat(type, size, &client_size_inbyte); + + if(GL_TRUE == pFetchMethod->bEnableMini) //More conditions here + { + //TODO : mini fetch + } + else + { + mega_fetch_count = MEGA_FETCH_BYTES - 1; + is_mega_fetch_flag = 0x1; + pFetchMethod->mega_fetch_remainder = MEGA_FETCH_BYTES - client_size_inbyte; + } + + vfetch_instruction_ptr->m_Word0.f.vtx_inst = SQ_VTX_INST_FETCH; + vfetch_instruction_ptr->m_Word0.f.fetch_type = SQ_VTX_FETCH_VERTEX_DATA; + vfetch_instruction_ptr->m_Word0.f.fetch_whole_quad = 0x0; + + vfetch_instruction_ptr->m_Word0.f.buffer_id = element; + vfetch_instruction_ptr->m_Word0.f.src_gpr = 0x0; + vfetch_instruction_ptr->m_Word0.f.src_rel = SQ_ABSOLUTE; + vfetch_instruction_ptr->m_Word0.f.src_sel_x = SQ_SEL_X; + vfetch_instruction_ptr->m_Word0.f.mega_fetch_count = mega_fetch_count; + + vfetch_instruction_ptr->m_Word1.f.dst_sel_x = (size < 1) ? SQ_SEL_0 : SQ_SEL_X; + vfetch_instruction_ptr->m_Word1.f.dst_sel_y = (size < 2) ? SQ_SEL_0 : SQ_SEL_Y; + vfetch_instruction_ptr->m_Word1.f.dst_sel_z = (size < 3) ? SQ_SEL_0 : SQ_SEL_Z; + vfetch_instruction_ptr->m_Word1.f.dst_sel_w = (size < 4) ? SQ_SEL_1 : SQ_SEL_W; + + vfetch_instruction_ptr->m_Word1.f.use_const_fields = 1; + vfetch_instruction_ptr->m_Word1.f.data_format = data_format; + vfetch_instruction_ptr->m_Word2.f.endian_swap = SQ_ENDIAN_NONE; + + if(1 == _signed) + { + vfetch_instruction_ptr->m_Word1.f.format_comp_all = SQ_FORMAT_COMP_SIGNED; + } + else + { + vfetch_instruction_ptr->m_Word1.f.format_comp_all = SQ_FORMAT_COMP_UNSIGNED; + } + + if(GL_TRUE == normalize) + { + vfetch_instruction_ptr->m_Word1.f.num_format_all = SQ_NUM_FORMAT_NORM; + } + else + { + vfetch_instruction_ptr->m_Word1.f.num_format_all = SQ_NUM_FORMAT_INT; + } + + // Destination register + vfetch_instruction_ptr->m_Word1_GPR.f.dst_gpr = destination_register; + vfetch_instruction_ptr->m_Word1_GPR.f.dst_rel = SQ_ABSOLUTE; + + vfetch_instruction_ptr->m_Word2.f.offset = 0; + vfetch_instruction_ptr->m_Word2.f.const_buf_no_stride = 0x0; + + vfetch_instruction_ptr->m_Word2.f.mega_fetch = is_mega_fetch_flag; + + if (assembled_vfetch_instruction_ptr == NULL) + { + if ( GL_FALSE == add_vfetch_instruction(pAsm, (R700VertexInstruction *)vfetch_instruction_ptr) ) + { + return GL_FALSE; + } + + if (pAsm->vfetch_instruction_ptr_array[element] != NULL) + { + return GL_FALSE; + } + else + { + pAsm->vfetch_instruction_ptr_array[element] = vfetch_instruction_ptr; + } + } + + return GL_TRUE; +} + +GLboolean cleanup_vfetch_instructions(r700_AssemblerBase* pAsm) +{ + GLint i; + pAsm->cf_current_clause_type = CF_EMPTY_CLAUSE; + pAsm->cf_current_vtx_clause_ptr = NULL; + + for (i=0; ivfetch_instruction_ptr_array[ i ] = NULL; + } + + cleanup_vfetch_shaderinst(pAsm->pR700Shader); + + return GL_TRUE; +} + GLuint gethelpr(r700_AssemblerBase* pAsm) { GLuint r = pAsm->uHelpReg; diff --git a/src/mesa/drivers/dri/r600/r700_assembler.h b/src/mesa/drivers/dri/r600/r700_assembler.h index f9c4d849c65..4e6e20011ad 100644 --- a/src/mesa/drivers/dri/r600/r700_assembler.h +++ b/src/mesa/drivers/dri/r600/r700_assembler.h @@ -411,6 +411,14 @@ GLboolean assemble_vfetch_instruction(r700_AssemblerBase* pAsm, GLuint number_of_elements, GLenum dataElementType, VTX_FETCH_METHOD* pFetchMethod); +GLboolean assemble_vfetch_instruction2(r700_AssemblerBase* pAsm, + GLuint destination_register, + GLenum type, + GLint size, + GLubyte element, + GLuint _signed, + GLboolean normalize, + VTX_FETCH_METHOD * pFetchMethod); GLuint gethelpr(r700_AssemblerBase* pAsm); void resethelpr(r700_AssemblerBase* pAsm); void checkop_init(r700_AssemblerBase* pAsm); diff --git a/src/mesa/drivers/dri/r600/r700_chip.c b/src/mesa/drivers/dri/r600/r700_chip.c index 06d7e9c9ab1..783427a94c9 100644 --- a/src/mesa/drivers/dri/r600/r700_chip.c +++ b/src/mesa/drivers/dri/r600/r700_chip.c @@ -208,6 +208,80 @@ static void r700SetupVTXConstants(GLcontext * ctx, } +extern int getTypeSize(GLenum type); +static void r700SetupVTXConstants2(GLcontext * ctx, + void * pAos, + StreamDesc * pStreamDesc) +{ + context_t *context = R700_CONTEXT(ctx); + struct radeon_aos * paos = (struct radeon_aos *)pAos; + unsigned int nVBsize; + BATCH_LOCALS(&context->radeon); + + unsigned int uSQ_VTX_CONSTANT_WORD0_0; + unsigned int uSQ_VTX_CONSTANT_WORD1_0; + unsigned int uSQ_VTX_CONSTANT_WORD2_0 = 0; + unsigned int uSQ_VTX_CONSTANT_WORD3_0 = 0; + unsigned int uSQ_VTX_CONSTANT_WORD6_0 = 0; + + if (!paos->bo) + return; + + if ((context->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV610) || + (context->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV620) || + (context->radeon.radeonScreen->chip_family == CHIP_FAMILY_RS780) || + (context->radeon.radeonScreen->chip_family == CHIP_FAMILY_RS880) || + (context->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV710)) + r700SyncSurf(context, paos->bo, RADEON_GEM_DOMAIN_GTT, 0, TC_ACTION_ENA_bit); + else + r700SyncSurf(context, paos->bo, RADEON_GEM_DOMAIN_GTT, 0, VC_ACTION_ENA_bit); + + if(0 == pStreamDesc->stride) + { + nVBsize = paos->count * pStreamDesc->size * getTypeSize(pStreamDesc->type); + } + else + { + nVBsize = paos->count * pStreamDesc->stride; + } + + uSQ_VTX_CONSTANT_WORD0_0 = paos->offset; + uSQ_VTX_CONSTANT_WORD1_0 = nVBsize - 1; + + SETfield(uSQ_VTX_CONSTANT_WORD2_0, 0, BASE_ADDRESS_HI_shift, BASE_ADDRESS_HI_mask); /* TODO */ + SETfield(uSQ_VTX_CONSTANT_WORD2_0, pStreamDesc->stride, SQ_VTX_CONSTANT_WORD2_0__STRIDE_shift, + SQ_VTX_CONSTANT_WORD2_0__STRIDE_mask); + SETfield(uSQ_VTX_CONSTANT_WORD2_0, GetSurfaceFormat(pStreamDesc->type, pStreamDesc->size, NULL), + SQ_VTX_CONSTANT_WORD2_0__DATA_FORMAT_shift, + SQ_VTX_CONSTANT_WORD2_0__DATA_FORMAT_mask); /* TODO : trace back api for initial data type, not only GL_FLOAT */ + SETfield(uSQ_VTX_CONSTANT_WORD2_0, SQ_NUM_FORMAT_SCALED, + SQ_VTX_CONSTANT_WORD2_0__NUM_FORMAT_ALL_shift, SQ_VTX_CONSTANT_WORD2_0__NUM_FORMAT_ALL_mask); + SETbit(uSQ_VTX_CONSTANT_WORD2_0, SQ_VTX_CONSTANT_WORD2_0__FORMAT_COMP_ALL_bit); + + SETfield(uSQ_VTX_CONSTANT_WORD3_0, 1, MEM_REQUEST_SIZE_shift, MEM_REQUEST_SIZE_mask); + SETfield(uSQ_VTX_CONSTANT_WORD6_0, SQ_TEX_VTX_VALID_BUFFER, + SQ_TEX_RESOURCE_WORD6_0__TYPE_shift, SQ_TEX_RESOURCE_WORD6_0__TYPE_mask); + + BEGIN_BATCH_NO_AUTOSTATE(9 + 2); + + R600_OUT_BATCH(CP_PACKET3(R600_IT_SET_RESOURCE, 7)); + R600_OUT_BATCH((pStreamDesc->element + SQ_FETCH_RESOURCE_VS_OFFSET) * FETCH_RESOURCE_STRIDE); + R600_OUT_BATCH(uSQ_VTX_CONSTANT_WORD0_0); + R600_OUT_BATCH(uSQ_VTX_CONSTANT_WORD1_0); + R600_OUT_BATCH(uSQ_VTX_CONSTANT_WORD2_0); + R600_OUT_BATCH(uSQ_VTX_CONSTANT_WORD3_0); + R600_OUT_BATCH(0); + R600_OUT_BATCH(0); + R600_OUT_BATCH(uSQ_VTX_CONSTANT_WORD6_0); + R600_OUT_BATCH_RELOC(uSQ_VTX_CONSTANT_WORD0_0, + paos->bo, + uSQ_VTX_CONSTANT_WORD0_0, + RADEON_GEM_DOMAIN_GTT, 0, 0); + END_BATCH(); + COMMIT_BATCH(); + +} + void r700SetupStreams(GLcontext *ctx) { context_t *context = R700_CONTEXT(ctx); @@ -256,14 +330,24 @@ static void r700SendVTXState(GLcontext *ctx, struct radeon_state_atom *atom) COMMIT_BATCH(); for(i=0; imesa_program->Base.InputsRead & (1 << i)) { - /* currently aos are packed */ - r700SetupVTXConstants(ctx, - i, - (void*)(&context->radeon.tcl.aos[j]), - (unsigned int)context->radeon.tcl.aos[j].components, - (unsigned int)context->radeon.tcl.aos[j].stride * 4, - (unsigned int)context->radeon.tcl.aos[j].count); + if(vp->mesa_program->Base.InputsRead & (1 << i)) + { + if(1 == context->selected_vp->uiVersion) + { + /* currently aos are packed */ + r700SetupVTXConstants(ctx, + i, + (void*)(&context->radeon.tcl.aos[j]), + (unsigned int)context->radeon.tcl.aos[j].components, + (unsigned int)context->radeon.tcl.aos[j].stride * 4, + (unsigned int)context->radeon.tcl.aos[j].count); + } + else + { /* context->selected_vp->uiVersion == 2 : aos not always packed */ + r700SetupVTXConstants2(ctx, + (void*)(&context->radeon.tcl.aos[j]), + &(context->stream_desc[j])); + } j++; } } diff --git a/src/mesa/drivers/dri/r600/r700_render.c b/src/mesa/drivers/dri/r600/r700_render.c index b1c3648ca56..b58859b6ba3 100644 --- a/src/mesa/drivers/dri/r600/r700_render.c +++ b/src/mesa/drivers/dri/r600/r700_render.c @@ -43,6 +43,7 @@ #include "tnl/t_context.h" #include "tnl/t_vertex.h" #include "tnl/t_pipeline.h" +#include "vbo/vbo_context.h" #include "r600_context.h" #include "r600_cmdbuf.h" @@ -53,6 +54,7 @@ #include "r700_fragprog.h" #include "r700_state.h" +#include "radeon_buffer_objects.h" #include "radeon_common_context.h" void r700WaitForIdle(context_t *context); @@ -270,46 +272,82 @@ static void r700RunRenderPrimitive(GLcontext * ctx, int start, int end, int prim if (type < 0 || num_indices <= 0) return; - total_emit = 3 /* VGT_PRIMITIVE_TYPE */ - + 2 /* VGT_INDEX_TYPE */ - + 2 /* NUM_INSTANCES */ - + num_indices + 3; /* DRAW_INDEX_IMMD */ + total_emit = 3 /* VGT_PRIMITIVE_TYPE */ + + 2 /* VGT_INDEX_TYPE */ + + 2 /* NUM_INSTANCES */ + + num_indices + 3; /* DRAW_INDEX_IMMD */ - BEGIN_BATCH_NO_AUTOSTATE(total_emit); + BEGIN_BATCH_NO_AUTOSTATE(total_emit); // prim - SETfield(vgt_primitive_type, type, - VGT_PRIMITIVE_TYPE__PRIM_TYPE_shift, VGT_PRIMITIVE_TYPE__PRIM_TYPE_mask); - R600_OUT_BATCH(CP_PACKET3(R600_IT_SET_CONFIG_REG, 1)); - R600_OUT_BATCH(mmVGT_PRIMITIVE_TYPE - ASIC_CONFIG_BASE_INDEX); - R600_OUT_BATCH(vgt_primitive_type); + SETfield(vgt_primitive_type, type, + VGT_PRIMITIVE_TYPE__PRIM_TYPE_shift, VGT_PRIMITIVE_TYPE__PRIM_TYPE_mask); + R600_OUT_BATCH(CP_PACKET3(R600_IT_SET_CONFIG_REG, 1)); + R600_OUT_BATCH(mmVGT_PRIMITIVE_TYPE - ASIC_CONFIG_BASE_INDEX); + R600_OUT_BATCH(vgt_primitive_type); // index type - SETfield(vgt_index_type, DI_INDEX_SIZE_32_BIT, INDEX_TYPE_shift, INDEX_TYPE_mask); - R600_OUT_BATCH(CP_PACKET3(R600_IT_INDEX_TYPE, 0)); - R600_OUT_BATCH(vgt_index_type); + SETfield(vgt_index_type, DI_INDEX_SIZE_32_BIT, INDEX_TYPE_shift, INDEX_TYPE_mask); + R600_OUT_BATCH(CP_PACKET3(R600_IT_INDEX_TYPE, 0)); + R600_OUT_BATCH(vgt_index_type); // num instances R600_OUT_BATCH(CP_PACKET3(R600_IT_NUM_INSTANCES, 0)); R600_OUT_BATCH(1); // draw packet - vgt_num_indices = num_indices; - SETfield(vgt_draw_initiator, DI_SRC_SEL_IMMEDIATE, SOURCE_SELECT_shift, SOURCE_SELECT_mask); + vgt_num_indices = num_indices; + SETfield(vgt_draw_initiator, DI_SRC_SEL_IMMEDIATE, SOURCE_SELECT_shift, SOURCE_SELECT_mask); SETfield(vgt_draw_initiator, DI_MAJOR_MODE_0, MAJOR_MODE_shift, MAJOR_MODE_mask); - R600_OUT_BATCH(CP_PACKET3(R600_IT_DRAW_INDEX_IMMD, (num_indices + 1))); - R600_OUT_BATCH(vgt_num_indices); - R600_OUT_BATCH(vgt_draw_initiator); + R600_OUT_BATCH(CP_PACKET3(R600_IT_DRAW_INDEX_IMMD, (num_indices + 1))); + R600_OUT_BATCH(vgt_num_indices); + R600_OUT_BATCH(vgt_draw_initiator); + if(NULL == context->ind_buf.bo) + { for (i = start; i < (start + num_indices); i++) { - if(vb->Elts) - R600_OUT_BATCH(vb->Elts[i]); - else - R600_OUT_BATCH(i); + if(vb->Elts) + { + R600_OUT_BATCH(vb->Elts[i]); + } + else + R600_OUT_BATCH(i); } - END_BATCH(); - COMMIT_BATCH(); + } + else + { + if(GL_TRUE == context->ind_buf.bHostIb) + { + if(GL_TRUE != context->ind_buf.is_32bit) + { + GLushort * pIndex = (GLushort*)ADD_POINTERS(context->ind_buf.bo->ptr, context->ind_buf.bo_offset); + pIndex += start; + for (i = 0; i < num_indices; i++) + { + R600_OUT_BATCH(*pIndex); + pIndex++; + } + } + else + { + GLuint * pIndex = (GLuint*)ADD_POINTERS(context->ind_buf.bo->ptr, context->ind_buf.bo_offset); + pIndex += start; + + for (i = 0; i < num_indices; i++) + { + R600_OUT_BATCH(*pIndex); + pIndex++; + } + } + } + else + { + /* TODO : hw ib draw */ + } + } + END_BATCH(); + COMMIT_BATCH(); } /* start 3d, idle, cb/db flush */ @@ -477,4 +515,585 @@ const struct tnl_pipeline_stage *r700_pipeline[] = 0, }; +#define CONVERT( TYPE, MACRO ) do { \ + GLuint i, j, sz; \ + sz = input->Size; \ + if (input->Normalized) { \ + for (i = 0; i < count; i++) { \ + const TYPE *in = (TYPE *)src_ptr; \ + for (j = 0; j < sz; j++) { \ + *dst_ptr++ = MACRO(*in); \ + in++; \ + } \ + src_ptr += stride; \ + } \ + } else { \ + for (i = 0; i < count; i++) { \ + const TYPE *in = (TYPE *)src_ptr; \ + for (j = 0; j < sz; j++) { \ + *dst_ptr++ = (GLfloat)(*in); \ + in++; \ + } \ + src_ptr += stride; \ + } \ + } \ +} while (0) + +/** + * Convert attribute data type to float + * If the attribute uses named buffer object replace the bo with newly allocated bo + */ +static void r700ConvertAttrib(GLcontext *ctx, int count, + const struct gl_client_array *input, + struct StreamDesc *attr) +{ + context_t *context = R700_CONTEXT(ctx); + const GLvoid *src_ptr; + GLboolean mapped_named_bo = GL_FALSE; + GLfloat *dst_ptr; + GLuint stride; + + stride = (input->StrideB == 0) ? getTypeSize(input->Type) * input->Size : input->StrideB; + + /* Convert value for first element only */ + if (input->StrideB == 0) + { + count = 1; + } + + if (input->BufferObj->Name) + { + if (!input->BufferObj->Pointer) + { + ctx->Driver.MapBuffer(ctx, GL_ARRAY_BUFFER, GL_READ_ONLY_ARB, input->BufferObj); + mapped_named_bo = GL_TRUE; + } + + src_ptr = ADD_POINTERS(input->BufferObj->Pointer, input->Ptr); + } + else + { + src_ptr = input->Ptr; + } + + radeonAllocDmaRegion(&context->radeon, &attr->bo, &attr->bo_offset, + sizeof(GLfloat) * input->Size * count, 32); + dst_ptr = (GLfloat *)ADD_POINTERS(attr->bo->ptr, attr->bo_offset); + + assert(src_ptr != NULL); + + switch (input->Type) + { + case GL_DOUBLE: + CONVERT(GLdouble, (GLfloat)); + break; + case GL_UNSIGNED_INT: + CONVERT(GLuint, UINT_TO_FLOAT); + break; + case GL_INT: + CONVERT(GLint, INT_TO_FLOAT); + break; + case GL_UNSIGNED_SHORT: + CONVERT(GLushort, USHORT_TO_FLOAT); + break; + case GL_SHORT: + CONVERT(GLshort, SHORT_TO_FLOAT); + break; + case GL_UNSIGNED_BYTE: + assert(input->Format != GL_BGRA); + CONVERT(GLubyte, UBYTE_TO_FLOAT); + break; + case GL_BYTE: + CONVERT(GLbyte, BYTE_TO_FLOAT); + break; + default: + assert(0); + break; + } + + if (mapped_named_bo) + { + ctx->Driver.UnmapBuffer(ctx, GL_ARRAY_BUFFER, input->BufferObj); + } +} + +static void r700AlignDataToDword(GLcontext *ctx, + const struct gl_client_array *input, + int count, + struct StreamDesc *attr) +{ + context_t *context = R700_CONTEXT(ctx); + const int dst_stride = (input->StrideB + 3) & ~3; + const int size = getTypeSize(input->Type) * input->Size * count; + GLboolean mapped_named_bo = GL_FALSE; + + radeonAllocDmaRegion(&context->radeon, &attr->bo, &attr->bo_offset, size, 32); + + if (!input->BufferObj->Pointer) + { + ctx->Driver.MapBuffer(ctx, GL_ARRAY_BUFFER, GL_READ_ONLY_ARB, input->BufferObj); + mapped_named_bo = GL_TRUE; + } + + { + GLvoid *src_ptr = ADD_POINTERS(input->BufferObj->Pointer, input->Ptr); + GLvoid *dst_ptr = ADD_POINTERS(attr->bo->ptr, attr->bo_offset); + int i; + + for (i = 0; i < count; ++i) + { + _mesa_memcpy(dst_ptr, src_ptr, input->StrideB); + src_ptr += input->StrideB; + dst_ptr += dst_stride; + } + } + + if (mapped_named_bo) + { + ctx->Driver.UnmapBuffer(ctx, GL_ARRAY_BUFFER, input->BufferObj); + } + + attr->stride = dst_stride; +} + +static void r700SetupStreams2(GLcontext *ctx, const struct gl_client_array *input[], int count) +{ + context_t *context = R700_CONTEXT(ctx); + GLuint stride; + int ret; + int i, index; + + R600_STATECHANGE(context, vtx); + + for(index = 0; index < context->nNumActiveAos; index++) + { + struct radeon_aos *aos = &context->radeon.tcl.aos[index]; + i = context->stream_desc[index].element; + + stride = (input[i]->StrideB == 0) ? getTypeSize(input[i]->Type) * input[i]->Size : input[i]->StrideB; + + if (input[i]->Type == GL_DOUBLE || input[i]->Type == GL_UNSIGNED_INT || input[i]->Type == GL_INT || +#if MESA_BIG_ENDIAN + getTypeSize(input[i]->Type) != 4 || +#endif + stride < 4) + { + r700ConvertAttrib(ctx, count, input[i], &context->stream_desc[index]); + } + else + { + if (input[i]->BufferObj->Name) + { + if (stride % 4 != 0) + { + assert(((intptr_t) input[i]->Ptr) % input[i]->StrideB == 0); + r700AlignDataToDword(ctx, input[i], count, &context->stream_desc[index]); + context->stream_desc[index].is_named_bo = GL_FALSE; + } + else + { + context->stream_desc[index].stride = input[i]->StrideB; + context->stream_desc[index].bo_offset = (intptr_t) input[i]->Ptr; + context->stream_desc[index].bo = get_radeon_buffer_object(input[i]->BufferObj)->bo; + context->stream_desc[index].is_named_bo = GL_TRUE; + } + } + else + { + int size; + int local_count = count; + uint32_t *dst; + + if (input[i]->StrideB == 0) + { + size = getTypeSize(input[i]->Type) * input[i]->Size; + local_count = 1; + } + else + { + size = getTypeSize(input[i]->Type) * input[i]->Size * local_count; + } + + radeonAllocDmaRegion(&context->radeon, &context->stream_desc[index].bo, + &context->stream_desc[index].bo_offset, size, 32); + assert(context->stream_desc[index].bo->ptr != NULL); + dst = (uint32_t *)ADD_POINTERS(context->stream_desc[index].bo->ptr, + context->stream_desc[index].bo_offset); + + switch (context->stream_desc[index].dwords) + { + case 1: + radeonEmitVec4(dst, input[i]->Ptr, input[i]->StrideB, local_count); + context->stream_desc[index].stride = 4; + break; + case 2: + radeonEmitVec8(dst, input[i]->Ptr, input[i]->StrideB, local_count); + context->stream_desc[index].stride = 8; + break; + case 3: + radeonEmitVec12(dst, input[i]->Ptr, input[i]->StrideB, local_count); + context->stream_desc[index].stride = 12; + break; + case 4: + radeonEmitVec16(dst, input[i]->Ptr, input[i]->StrideB, local_count); + context->stream_desc[index].stride = 16; + break; + default: + assert(0); + break; + } + } + } + + aos->count = context->stream_desc[index].stride == 0 ? 1 : count; + aos->stride = context->stream_desc[index].stride / sizeof(float); + aos->components = context->stream_desc[index].dwords; + aos->bo = context->stream_desc[index].bo; + aos->offset = context->stream_desc[index].bo_offset; + + if(context->stream_desc[index].is_named_bo) + { + radeon_cs_space_add_persistent_bo(context->radeon.cmdbuf.cs, + context->stream_desc[index].bo, + RADEON_GEM_DOMAIN_GTT, 0); + } + } + + context->radeon.tcl.aos_count = context->nNumActiveAos; + ret = radeon_cs_space_check_with_bo(context->radeon.cmdbuf.cs, + first_elem(&context->radeon.dma.reserved)->bo, + RADEON_GEM_DOMAIN_GTT, 0); +} + +static void r700FreeData(GLcontext *ctx) +{ + /* Need to zero tcl.aos[n].bo and tcl.elt_dma_bo + * to prevent double unref in radeonReleaseArrays + * called during context destroy + */ + context_t *context = R700_CONTEXT(ctx); + + int i; + + for (i = 0; i < context->nNumActiveAos; i++) + { + if (!context->stream_desc[i].is_named_bo) + { + radeon_bo_unref(context->stream_desc[i].bo); + } + context->radeon.tcl.aos[i].bo = NULL; + } + + if (context->ind_buf.bo != NULL) + { + if(context->ind_buf.bHostIb != GL_TRUE) + { + radeon_bo_unref(context->ind_buf.bo); + } + else + { + FREE(context->ind_buf.bo->ptr); + FREE(context->ind_buf.bo); + context->ind_buf.bo = NULL; + } + } +} + +static void r700FixupIndexBuffer(GLcontext *ctx, const struct _mesa_index_buffer *mesa_ind_buf) +{ + context_t *context = R700_CONTEXT(ctx); + GLvoid *src_ptr; + GLuint *out; + int i; + GLboolean mapped_named_bo = GL_FALSE; + + if (mesa_ind_buf->obj->Name && !mesa_ind_buf->obj->Pointer) + { + ctx->Driver.MapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER, GL_READ_ONLY_ARB, mesa_ind_buf->obj); + mapped_named_bo = GL_TRUE; + assert(mesa_ind_buf->obj->Pointer != NULL); + } + src_ptr = ADD_POINTERS(mesa_ind_buf->obj->Pointer, mesa_ind_buf->ptr); + + if (mesa_ind_buf->type == GL_UNSIGNED_BYTE) + { + GLuint size = sizeof(GLushort) * ((mesa_ind_buf->count + 1) & ~1); + GLubyte *in = (GLubyte *)src_ptr; + + if(context->ind_buf.bHostIb != GL_TRUE) + { + radeonAllocDmaRegion(&context->radeon, &context->ind_buf.bo, + &context->ind_buf.bo_offset, size, 4); + + assert(context->ind_buf.bo->ptr != NULL); + out = (GLuint *)ADD_POINTERS(context->ind_buf.bo->ptr, context->ind_buf.bo_offset); + } + else + { + context->ind_buf.bo = MALLOC_STRUCT(radeon_bo); + context->ind_buf.bo->ptr = ALIGN_MALLOC(size, 4); + context->ind_buf.bo_offset = 0; + out = (GLuint *)context->ind_buf.bo->ptr; + } + + for (i = 0; i + 1 < mesa_ind_buf->count; i += 2) + { + *out++ = in[i] | in[i + 1] << 16; + } + + if (i < mesa_ind_buf->count) + { + *out++ = in[i]; + } + +#if MESA_BIG_ENDIAN + } + else + { /* if (mesa_ind_buf->type == GL_UNSIGNED_SHORT) */ + GLushort *in = (GLushort *)src_ptr; + GLuint size = sizeof(GLushort) * ((mesa_ind_buf->count + 1) & ~1); + + if(context->ind_buf.bHostIb != GL_TRUE) + { + radeonAllocDmaRegion(&context->radeon, &r300->ind_buf.bo, + &context->ind_buf.bo_offset, size, 4); + + assert(context->ind_buf.bo->ptr != NULL); + out = (GLuint *)ADD_POINTERS(context->ind_buf.bo->ptr, context->ind_buf.bo_offset); + } + else + { + context->ind_buf.bo = MALLOC_STRUCT(radeon_bo); + context->ind_buf.bo->ptr = ALIGN_MALLOC(size, 4); + context->ind_buf.bo_offset = 0; + out = (GLuint *)context->ind_buf.bo->ptr; + } + + for (i = 0; i + 1 < mesa_ind_buf->count; i += 2) + { + *out++ = in[i] | in[i + 1] << 16; + } + + if (i < mesa_ind_buf->count) + { + *out++ = in[i]; + } +#endif + } + + context->ind_buf.is_32bit = GL_FALSE; + context->ind_buf.count = mesa_ind_buf->count; + + if (mapped_named_bo) + { + ctx->Driver.UnmapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER, mesa_ind_buf->obj); + } +} + +static void r700SetupIndexBuffer(GLcontext *ctx, const struct _mesa_index_buffer *mesa_ind_buf) +{ + context_t *context = R700_CONTEXT(ctx); + + if (!mesa_ind_buf) { + context->ind_buf.bo = NULL; + return; + } + + context->ind_buf.bHostIb = GL_TRUE; + +#if MESA_BIG_ENDIAN + if (mesa_ind_buf->type == GL_UNSIGNED_INT) + { +#else + if (mesa_ind_buf->type != GL_UNSIGNED_BYTE) + { +#endif + const GLvoid *src_ptr; + GLvoid *dst_ptr; + GLboolean mapped_named_bo = GL_FALSE; + + if (mesa_ind_buf->obj->Name && !mesa_ind_buf->obj->Pointer) + { + ctx->Driver.MapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER, GL_READ_ONLY_ARB, mesa_ind_buf->obj); + assert(mesa_ind_buf->obj->Pointer != NULL); + mapped_named_bo = GL_TRUE; + } + + src_ptr = ADD_POINTERS(mesa_ind_buf->obj->Pointer, mesa_ind_buf->ptr); + + const GLuint size = mesa_ind_buf->count * getTypeSize(mesa_ind_buf->type); + + if(context->ind_buf.bHostIb != GL_TRUE) + { + radeonAllocDmaRegion(&context->radeon, &context->ind_buf.bo, + &context->ind_buf.bo_offset, size, 4); + assert(context->ind_buf.bo->ptr != NULL); + dst_ptr = ADD_POINTERS(context->ind_buf.bo->ptr, context->ind_buf.bo_offset); + } + else + { + context->ind_buf.bo = MALLOC_STRUCT(radeon_bo); + context->ind_buf.bo->ptr = ALIGN_MALLOC(size, 4); + context->ind_buf.bo_offset = 0; + dst_ptr = context->ind_buf.bo->ptr; + } + + _mesa_memcpy(dst_ptr, src_ptr, size); + + context->ind_buf.is_32bit = (mesa_ind_buf->type == GL_UNSIGNED_INT); + context->ind_buf.count = mesa_ind_buf->count; + + if (mapped_named_bo) + { + ctx->Driver.UnmapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER, mesa_ind_buf->obj); + } + } + else + { + r700FixupIndexBuffer(ctx, mesa_ind_buf); + } +} + +static GLboolean r700TryDrawPrims(GLcontext *ctx, + const struct gl_client_array *arrays[], + const struct _mesa_prim *prim, + GLuint nr_prims, + const struct _mesa_index_buffer *ib, + GLuint min_index, + GLuint max_index ) +{ + context_t *context = R700_CONTEXT(ctx); + radeonContextPtr radeon = &context->radeon; + GLuint i, id = 0; + GLboolean bValidedbuffer; + struct radeon_renderbuffer *rrb; + + if (ctx->NewState) + { + _mesa_update_state( ctx ); + } + + bValidedbuffer = r600ValidateBuffers(ctx); + + /* always emit CB base to prevent + * lock ups on some chips. + */ + R600_STATECHANGE(context, cb_target); + /* mark vtx as dirty since it changes per-draw */ + R600_STATECHANGE(context, vtx); + + _tnl_UpdateFixedFunctionProgram(ctx); + r700SetVertexFormat(ctx, arrays, max_index + 1); + r700SetupStreams2(ctx, arrays, max_index + 1); + r700UpdateShaders2(ctx); + + r700SetScissor(context); + + r700SetupVertexProgram(ctx); + + r700SetupFragmentProgram(ctx); + + r600UpdateTextureState(ctx); + + GLuint emit_end = r700PredictRenderSize(ctx) + + context->radeon.cmdbuf.cs->cdw; + + r700SetupIndexBuffer(ctx, ib); + + radeonEmitState(radeon); + + for (i = 0; i < nr_prims; ++i) + { + r700RunRenderPrimitive(ctx, + prim[i].start, + prim[i].start + prim[i].count, + prim[i].mode); + } + + /* Flush render op cached for last several quads. */ + r700WaitForIdleClean(context); + + rrb = radeon_get_colorbuffer(&context->radeon); + if (rrb && rrb->bo) + r700SyncSurf(context, rrb->bo, 0, RADEON_GEM_DOMAIN_VRAM, + CB_ACTION_ENA_bit | (1 << (id + 6))); + + rrb = radeon_get_depthbuffer(&context->radeon); + if (rrb && rrb->bo) + r700SyncSurf(context, rrb->bo, 0, RADEON_GEM_DOMAIN_VRAM, + DB_ACTION_ENA_bit | DB_DEST_BASE_ENA_bit); + + r700FreeData(ctx); + + if (emit_end < context->radeon.cmdbuf.cs->cdw) + { + WARN_ONCE("Rendering was %d commands larger than predicted size." + " We might overflow command buffer.\n", context->radeon.cmdbuf.cs->cdw - emit_end); + } + + return GL_TRUE; +} + +static void r700DrawPrimsRe(GLcontext *ctx, + const struct gl_client_array *arrays[], + const struct _mesa_prim *prim, + GLuint nr_prims, + const struct _mesa_index_buffer *ib, + GLboolean index_bounds_valid, + GLuint min_index, + GLuint max_index) +{ + GLboolean retval = GL_FALSE; + + /* This check should get folded into just the places that + * min/max index are really needed. + */ + if (!index_bounds_valid) { + vbo_get_minmax_index(ctx, prim, ib, &min_index, &max_index); + } + + if (min_index) { + vbo_rebase_prims( ctx, arrays, prim, nr_prims, ib, min_index, max_index, r700DrawPrimsRe ); + return; + } + + /* Make an attempt at drawing */ + retval = r700TryDrawPrims(ctx, arrays, prim, nr_prims, ib, min_index, max_index); + + /* If failed run tnl pipeline - it should take care of fallbacks */ + if (!retval) + _tnl_draw_prims(ctx, arrays, prim, nr_prims, ib, min_index, max_index); +} + +static void r700DrawPrims(GLcontext *ctx, + const struct gl_client_array *arrays[], + const struct _mesa_prim *prim, + GLuint nr_prims, + const struct _mesa_index_buffer *ib, + GLboolean index_bounds_valid, + GLuint min_index, + GLuint max_index) +{ + context_t *context = R700_CONTEXT(ctx); + + /* For non indexed drawing, using tnl pipe. */ + if(!ib) + { + context->ind_buf.bo = NULL; + + _tnl_vbo_draw_prims(ctx, arrays, prim, nr_prims, ib, + index_bounds_valid, min_index, max_index); + return; + } + + r700DrawPrimsRe(ctx, arrays, prim, nr_prims, ib, index_bounds_valid, min_index, max_index); +} + +void r700InitDraw(GLcontext *ctx) +{ + struct vbo_context *vbo = vbo_context(ctx); + + vbo->draw_prims = r700DrawPrims; +} + diff --git a/src/mesa/drivers/dri/r600/r700_shader.c b/src/mesa/drivers/dri/r600/r700_shader.c index b4fd51c1370..955ea4e4e1d 100644 --- a/src/mesa/drivers/dri/r600/r700_shader.c +++ b/src/mesa/drivers/dri/r600/r700_shader.c @@ -60,6 +60,55 @@ void AddInstToList(TypedShaderList * plstCFInstructions, R700ShaderInstruction * plstCFInstructions->uNumOfNode++; } +void TakeInstOutFromList(TypedShaderList * plstCFInstructions, R700ShaderInstruction * pInst) +{ + GLuint ulIndex = 0; + GLboolean bFound = GL_FALSE; + R700ShaderInstruction * pPrevInst = NULL; + R700ShaderInstruction * pCurInst = plstCFInstructions->pHead; + + /* Need go thro list to make sure pInst is there. */ + while(NULL != pCurInst) + { + if(pCurInst == pInst) + { + bFound = GL_TRUE; + break; + } + + pPrevInst = pCurInst; + pCurInst = pCurInst->pNextInst; + } + if(GL_TRUE == bFound) + { + plstCFInstructions->uNumOfNode--; + + pCurInst = pInst->pNextInst; + ulIndex = pInst->m_uIndex; + while(NULL != pCurInst) + { + pCurInst->m_uIndex = ulIndex; + ulIndex++; + pCurInst = pCurInst->pNextInst; + } + + if(plstCFInstructions->pHead == pInst) + { + plstCFInstructions->pHead = pInst->pNextInst; + } + if(plstCFInstructions->pTail == pInst) + { + plstCFInstructions->pTail = pPrevInst; + } + if(NULL != pPrevInst) + { + pPrevInst->pNextInst = pInst->pNextInst; + } + + FREE(pInst); + } +} + void Init_R700_Shader(R700_Shader * pShader) { pShader->Type = R700_SHADER_INVALID; @@ -488,6 +537,47 @@ void DebugPrint(void) { } +void cleanup_vfetch_shaderinst(R700_Shader *pShader) +{ + R700ShaderInstruction *pInst; + R700ShaderInstruction *pInstToFree; + R700VertexInstruction *pVTXInst; + R700ControlFlowInstruction *pCFInst; + + pInst = pShader->lstVTXInstructions.pHead; + while(NULL != pInst) + { + pVTXInst = (R700VertexInstruction *)pInst; + pShader->uShaderBinaryDWORDSize -= GetInstructionSize(pVTXInst->m_ShaderInstType); + + if(NULL != pVTXInst->m_pLinkedGenericClause) + { + pCFInst = (R700ControlFlowInstruction*)(pVTXInst->m_pLinkedGenericClause); + + TakeInstOutFromList(&(pShader->lstCFInstructions), + (R700ShaderInstruction*)pCFInst); + + pShader->uShaderBinaryDWORDSize -= GetInstructionSize(pCFInst->m_ShaderInstType); + } + + pInst = pInst->pNextInst; + }; + + //destroy each item in pShader->lstVTXInstructions; + pInst = pShader->lstVTXInstructions.pHead; + while(NULL != pInst) + { + pInstToFree = pInst; + pInst = pInst->pNextInst; + FREE(pInstToFree); + }; + + //set NULL pShader->lstVTXInstructions + pShader->lstVTXInstructions.pHead=NULL; + pShader->lstVTXInstructions.pTail=NULL; + pShader->lstVTXInstructions.uNumOfNode=0; +} + void Clean_Up_Shader(R700_Shader *pShader) { FREE(pShader->pProgram); diff --git a/src/mesa/drivers/dri/r600/r700_shader.h b/src/mesa/drivers/dri/r600/r700_shader.h index bfd01e1a93a..997cb05aaf8 100644 --- a/src/mesa/drivers/dri/r600/r700_shader.h +++ b/src/mesa/drivers/dri/r600/r700_shader.h @@ -143,6 +143,7 @@ void LoadProgram(R700_Shader *pShader); void UpdateShaderRegisters(R700_Shader *pShader); void DeleteInstructions(R700_Shader *pShader); void DebugPrint(void); +void cleanup_vfetch_shaderinst(R700_Shader *pShader); void Clean_Up_Shader(R700_Shader *pShader); diff --git a/src/mesa/drivers/dri/r600/r700_state.c b/src/mesa/drivers/dri/r600/r700_state.c index fc0b5116843..1043eabb149 100644 --- a/src/mesa/drivers/dri/r600/r700_state.c +++ b/src/mesa/drivers/dri/r600/r700_state.c @@ -92,7 +92,25 @@ void r700UpdateShaders (GLcontext * ctx) //---------------------------------- } } - r700SelectVertexShader(ctx); + r700SelectVertexShader(ctx, 1); + r700UpdateStateParameters(ctx, _NEW_PROGRAM | _NEW_PROGRAM_CONSTANTS); + context->radeon.NewGLState = 0; +} + +void r700UpdateShaders2(GLcontext * ctx) +{ + context_t *context = R700_CONTEXT(ctx); + + /* should only happenen once, just after context is created */ + /* TODO: shouldn't we fallback to sw here? */ + if (!ctx->FragmentProgram._Current) { + _mesa_fprintf(stderr, "No ctx->FragmentProgram._Current!!\n"); + return; + } + + r700SelectFragmentShader(ctx); + + r700SelectVertexShader(ctx, 2); r700UpdateStateParameters(ctx, _NEW_PROGRAM | _NEW_PROGRAM_CONSTANTS); context->radeon.NewGLState = 0; } diff --git a/src/mesa/drivers/dri/r600/r700_state.h b/src/mesa/drivers/dri/r600/r700_state.h index 0f53d5b4c59..209189d8d72 100644 --- a/src/mesa/drivers/dri/r600/r700_state.h +++ b/src/mesa/drivers/dri/r600/r700_state.h @@ -35,6 +35,7 @@ extern void r700UpdateStateParameters(GLcontext * ctx, GLuint new_state); extern void r700UpdateShaders (GLcontext * ctx); +extern void r700UpdateShaders2(GLcontext * ctx); extern void r700UpdateViewportOffset(GLcontext * ctx); diff --git a/src/mesa/drivers/dri/r600/r700_vertprog.c b/src/mesa/drivers/dri/r600/r700_vertprog.c index 9ee26286d9b..e7a209be9d8 100644 --- a/src/mesa/drivers/dri/r600/r700_vertprog.c +++ b/src/mesa/drivers/dri/r600/r700_vertprog.c @@ -159,7 +159,35 @@ GLboolean Process_Vertex_Program_Vfetch_Instructions( return GL_TRUE; } -void Map_Vertex_Program(struct r700_vertex_program *vp, +GLboolean Process_Vertex_Program_Vfetch_Instructions2( + GLcontext *ctx, + struct r700_vertex_program *vp, + struct gl_vertex_program *mesa_vp) +{ + int i; + context_t *context = R700_CONTEXT(ctx); + + VTX_FETCH_METHOD vtxFetchMethod; + vtxFetchMethod.bEnableMini = GL_FALSE; + vtxFetchMethod.mega_fetch_remainder = 0; + + for(i=0; inNumActiveAos; i++) + { + assemble_vfetch_instruction2(&vp->r700AsmCode, + vp->r700AsmCode.ucVP_AttributeMap[context->stream_desc[i].element], + context->stream_desc[i].type, + context->stream_desc[i].size, + context->stream_desc[i].element, + context->stream_desc[i]._signed, + context->stream_desc[i].normalize, + &vtxFetchMethod); + } + + return GL_TRUE; +} + +void Map_Vertex_Program(GLcontext *ctx, + struct r700_vertex_program *vp, struct gl_vertex_program *mesa_vp) { GLuint ui; @@ -175,11 +203,22 @@ void Map_Vertex_Program(struct r700_vertex_program *vp, pAsm->number_used_registers += num_inputs; // Create VFETCH instructions for inputs - if (GL_TRUE != Process_Vertex_Program_Vfetch_Instructions(vp, mesa_vp) ) - { - radeon_error("Calling Process_Vertex_Program_Vfetch_Instructions return error. \n"); - return; //error - } + if(1 == vp->uiVersion) + { + if (GL_TRUE != Process_Vertex_Program_Vfetch_Instructions(vp, mesa_vp) ) + { + radeon_error("Calling Process_Vertex_Program_Vfetch_Instructions return error. \n"); + return; + } + } + else + { + if (GL_TRUE != Process_Vertex_Program_Vfetch_Instructions2(ctx, vp, mesa_vp) ) + { + radeon_error("Calling Process_Vertex_Program_Vfetch_Instructions2 return error. \n"); + return; + } + } // Map Outputs pAsm->number_of_exports = Map_Vertex_Output(pAsm, mesa_vp, pAsm->number_used_registers); @@ -261,7 +300,8 @@ GLboolean Find_Instruction_Dependencies_vp(struct r700_vertex_program *vp, } struct r700_vertex_program* r700TranslateVertexShader(GLcontext *ctx, - struct gl_vertex_program *mesa_vp) + struct gl_vertex_program *mesa_vp, + GLint nVer) { context_t *context = R700_CONTEXT(ctx); struct r700_vertex_program *vp; @@ -271,6 +311,7 @@ struct r700_vertex_program* r700TranslateVertexShader(GLcontext *ctx, unsigned int i; vp = _mesa_calloc(sizeof(*vp)); + vp->uiVersion = nVer; vp->mesa_program = (struct gl_vertex_program *)_mesa_clone_program(ctx, &mesa_vp->Base); if (mesa_vp->IsPositionInvariant) @@ -296,7 +337,7 @@ struct r700_vertex_program* r700TranslateVertexShader(GLcontext *ctx, //Init_Program Init_r700_AssemblerBase(SPT_VP, &(vp->r700AsmCode), &(vp->r700Shader) ); - Map_Vertex_Program( vp, vp->mesa_program ); + Map_Vertex_Program(ctx, vp, vp->mesa_program ); if(GL_FALSE == Find_Instruction_Dependencies_vp(vp, vp->mesa_program)) { @@ -325,7 +366,7 @@ struct r700_vertex_program* r700TranslateVertexShader(GLcontext *ctx, return vp; } -void r700SelectVertexShader(GLcontext *ctx) +void r700SelectVertexShader(GLcontext *ctx, GLint nVersion) { context_t *context = R700_CONTEXT(ctx); struct r700_vertex_program_cont *vpc; @@ -365,7 +406,7 @@ void r700SelectVertexShader(GLcontext *ctx) } } - vp = r700TranslateVertexShader(ctx, &(vpc->mesa_program) ); + vp = r700TranslateVertexShader(ctx, &(vpc->mesa_program), nVersion); if(!vp) { radeon_error("Failed to translate vertex shader. \n"); @@ -377,6 +418,140 @@ void r700SelectVertexShader(GLcontext *ctx) return; } +int getTypeSize(GLenum type) +{ + switch (type) + { + case GL_DOUBLE: + return sizeof(GLdouble); + case GL_FLOAT: + return sizeof(GLfloat); + case GL_INT: + return sizeof(GLint); + case GL_UNSIGNED_INT: + return sizeof(GLuint); + case GL_SHORT: + return sizeof(GLshort); + case GL_UNSIGNED_SHORT: + return sizeof(GLushort); + case GL_BYTE: + return sizeof(GLbyte); + case GL_UNSIGNED_BYTE: + return sizeof(GLubyte); + default: + assert(0); + return 0; + } +} + +static void r700TranslateAttrib(GLcontext *ctx, GLuint unLoc, int count, const struct gl_client_array *input) +{ + context_t *context = R700_CONTEXT(ctx); + + StreamDesc * pStreamDesc = &(context->stream_desc[context->nNumActiveAos]); + + GLuint stride; + + stride = (input->StrideB == 0) ? getTypeSize(input->Type) * input->Size + : input->StrideB; + + if (input->Type == GL_DOUBLE || input->Type == GL_UNSIGNED_INT || input->Type == GL_INT || +#if MESA_BIG_ENDIAN + getTypeSize(input->Type) != 4 || +#endif + stride < 4) + { + pStreamDesc->type = GL_FLOAT; + + if (input->StrideB == 0) + { + pStreamDesc->stride = 0; + } + else + { + pStreamDesc->stride = sizeof(GLfloat) * input->Size; + } + pStreamDesc->dwords = input->Size; + pStreamDesc->is_named_bo = GL_FALSE; + } + else + { + pStreamDesc->type = input->Type; + pStreamDesc->dwords = (getTypeSize(input->Type) * input->Size + 3)/ 4; + if (!input->BufferObj->Name) + { + if (input->StrideB == 0) + { + pStreamDesc->stride = 0; + } + else + { + pStreamDesc->stride = (getTypeSize(pStreamDesc->type) * input->Size + 3) & ~3; + } + + pStreamDesc->is_named_bo = GL_FALSE; + } + } + + pStreamDesc->size = input->Size; + pStreamDesc->dst_loc = context->nNumActiveAos; + pStreamDesc->element = unLoc; + + switch (pStreamDesc->type) + { //GetSurfaceFormat + case GL_FLOAT: + pStreamDesc->_signed = 0; + pStreamDesc->normalize = GL_FALSE; + break; + case GL_SHORT: + pStreamDesc->_signed = 1; + pStreamDesc->normalize = input->Normalized; + break; + case GL_BYTE: + pStreamDesc->_signed = 1; + pStreamDesc->normalize = input->Normalized; + break; + case GL_UNSIGNED_SHORT: + pStreamDesc->_signed = 0; + pStreamDesc->normalize = input->Normalized; + break; + case GL_UNSIGNED_BYTE: + pStreamDesc->_signed = 0; + pStreamDesc->normalize = input->Normalized; + break; + default: + case GL_INT: + case GL_UNSIGNED_INT: + case GL_DOUBLE: + assert(0); + break; + } + context->nNumActiveAos++; +} + +void r700SetVertexFormat(GLcontext *ctx, const struct gl_client_array *arrays[], int count) +{ + context_t *context = R700_CONTEXT(ctx); + struct r700_vertex_program *vpc + = (struct r700_vertex_program *)ctx->VertexProgram._Current; + + struct gl_vertex_program * mesa_vp = (struct gl_vertex_program *)&(vpc->mesa_program); + unsigned int unLoc = 0; + unsigned int unBit = mesa_vp->Base.InputsRead; + context->nNumActiveAos = 0; + + while(unBit) + { + if(unBit & 1) + { + r700TranslateAttrib(ctx, unLoc, count, arrays[unLoc]); + } + + unBit >>= 1; + ++unLoc; + } +} + void * r700GetActiveVpShaderBo(GLcontext * ctx) { context_t *context = R700_CONTEXT(ctx); diff --git a/src/mesa/drivers/dri/r600/r700_vertprog.h b/src/mesa/drivers/dri/r600/r700_vertprog.h index c48764c43ba..f9a3e395ee9 100644 --- a/src/mesa/drivers/dri/r600/r700_vertprog.h +++ b/src/mesa/drivers/dri/r600/r700_vertprog.h @@ -52,7 +52,7 @@ struct r700_vertex_program GLboolean translated; GLboolean loaded; - GLboolean needUpdateVF; + GLint uiVersion; void * shaderbo; @@ -76,19 +76,28 @@ unsigned int Map_Vertex_Input(r700_AssemblerBase *pAsm, GLboolean Process_Vertex_Program_Vfetch_Instructions( struct r700_vertex_program *vp, struct gl_vertex_program *mesa_vp); -void Map_Vertex_Program(struct r700_vertex_program *vp, +GLboolean Process_Vertex_Program_Vfetch_Instructions2( + GLcontext *ctx, + struct r700_vertex_program *vp, + struct gl_vertex_program *mesa_vp); +void Map_Vertex_Program(GLcontext *ctx, + struct r700_vertex_program *vp, struct gl_vertex_program *mesa_vp); GLboolean Find_Instruction_Dependencies_vp(struct r700_vertex_program *vp, struct gl_vertex_program *mesa_vp); struct r700_vertex_program* r700TranslateVertexShader(GLcontext *ctx, - struct gl_vertex_program *mesa_vp); + struct gl_vertex_program *mesa_vp, + GLint nVer); /* Interface */ -extern void r700SelectVertexShader(GLcontext *ctx); +extern void r700SelectVertexShader(GLcontext *ctx, GLint nVersion); +extern void r700SetVertexFormat(GLcontext *ctx, const struct gl_client_array *arrays[], int count); extern GLboolean r700SetupVertexProgram(GLcontext * ctx); extern void * r700GetActiveVpShaderBo(GLcontext * ctx); +extern int getTypeSize(GLenum type); + #endif /* _R700_VERTPROG_H_ */ diff --git a/src/mesa/drivers/dri/r600/radeon_buffer_objects.c b/src/mesa/drivers/dri/r600/radeon_buffer_objects.c new file mode 120000 index 00000000000..f6a5f664701 --- /dev/null +++ b/src/mesa/drivers/dri/r600/radeon_buffer_objects.c @@ -0,0 +1 @@ +../radeon/radeon_buffer_objects.c \ No newline at end of file diff --git a/src/mesa/drivers/dri/r600/radeon_buffer_objects.h b/src/mesa/drivers/dri/r600/radeon_buffer_objects.h new file mode 120000 index 00000000000..2f134fd17b8 --- /dev/null +++ b/src/mesa/drivers/dri/r600/radeon_buffer_objects.h @@ -0,0 +1 @@ +../radeon/radeon_buffer_objects.h \ No newline at end of file -- cgit v1.2.3 From 98d5ec10d0918f6619e7b2285278b83e9de6d86f Mon Sep 17 00:00:00 2001 From: Richard Li Date: Tue, 22 Sep 2009 17:26:23 -0400 Subject: r600 : add draw_prim support, make up one lost change. --- src/mesa/drivers/dri/r600/r600_context.c | 1 + 1 file changed, 1 insertion(+) (limited to 'src/mesa/drivers/dri') diff --git a/src/mesa/drivers/dri/r600/r600_context.c b/src/mesa/drivers/dri/r600/r600_context.c index 6fc6d9d7bfa..969144ba123 100644 --- a/src/mesa/drivers/dri/r600/r600_context.c +++ b/src/mesa/drivers/dri/r600/r600_context.c @@ -59,6 +59,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "radeon_debug.h" #include "r600_context.h" #include "radeon_common_context.h" +#include "radeon_buffer_objects.h" #include "radeon_span.h" #include "r600_cmdbuf.h" #include "r600_emit.h" -- cgit v1.2.3 From 2d2f49c91952e18f3362346e19b45c72b1f6db32 Mon Sep 17 00:00:00 2001 From: Andre Maasikas Date: Wed, 23 Sep 2009 14:20:59 +0300 Subject: r600: add support for CUBE textures, also TXP seems to work here ... --- src/mesa/drivers/dri/r600/r700_assembler.c | 306 ++++++++++++++++++++++++----- src/mesa/drivers/dri/r600/r700_assembler.h | 4 + 2 files changed, 263 insertions(+), 47 deletions(-) (limited to 'src/mesa/drivers/dri') diff --git a/src/mesa/drivers/dri/r600/r700_assembler.c b/src/mesa/drivers/dri/r600/r700_assembler.c index 81269350e43..dc25f3b418b 100644 --- a/src/mesa/drivers/dri/r600/r700_assembler.c +++ b/src/mesa/drivers/dri/r600/r700_assembler.c @@ -213,7 +213,7 @@ GLboolean is_reduction_opcode(PVSDWORD* dest) { if (dest->dst.op3 == 0) { - if ( (dest->dst.opcode == SQ_OP2_INST_DOT4 || dest->dst.opcode == SQ_OP2_INST_DOT4_IEEE) ) + if ( (dest->dst.opcode == SQ_OP2_INST_DOT4 || dest->dst.opcode == SQ_OP2_INST_DOT4_IEEE || dest->dst.opcode == SQ_OP2_INST_CUBE) ) { return GL_TRUE; } @@ -350,6 +350,7 @@ unsigned int r700GetNumOperands(r700_AssemblerBase* pAsm) case SQ_OP2_INST_PRED_SETNE: case SQ_OP2_INST_DOT4: case SQ_OP2_INST_DOT4_IEEE: + case SQ_OP2_INST_CUBE: return 2; case SQ_OP2_INST_MOV: @@ -469,6 +470,9 @@ int Init_r700_AssemblerBase(SHADER_PIPE_TYPE spt, r700_AssemblerBase* pAsm, R700 pAsm->number_of_inputs = 0; + pAsm->is_tex = GL_FALSE; + pAsm->need_tex_barrier = GL_FALSE; + return 0; } @@ -682,7 +686,7 @@ GLboolean add_tex_instruction(r700_AssemblerBase* pAsm, // If this clause constains any TEX instruction that is dependent on a previous instruction, // set the barrier bit - if( pAsm->pInstDeps[pAsm->uiCurInst].nDstDep > (-1) ) + if( pAsm->pInstDeps[pAsm->uiCurInst].nDstDep > (-1) || pAsm->need_tex_barrier == GL_TRUE ) { pAsm->cf_current_tex_clause_ptr->m_Word1.f.barrier = 0x1; } @@ -1279,42 +1283,48 @@ GLboolean tex_src(r700_AssemblerBase *pAsm) GLboolean bValidTexCoord = GL_FALSE; + if(pAsm->aArgSubst[1] >= 0) + { + bValidTexCoord = GL_TRUE; + setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); + pAsm->S[0].src.rtype = SRC_REG_TEMPORARY; + pAsm->S[0].src.reg = pAsm->aArgSubst[1]; + } + else + { switch (pILInst->SrcReg[0].File) { - case PROGRAM_CONSTANT: - case PROGRAM_LOCAL_PARAM: - case PROGRAM_ENV_PARAM: - case PROGRAM_STATE_VAR: - bValidTexCoord = GL_TRUE; - setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); - pAsm->S[0].src.rtype = SRC_REG_TEMPORARY; - pAsm->S[0].src.reg = pAsm->aArgSubst[1]; - break; - case PROGRAM_TEMPORARY: - bValidTexCoord = GL_TRUE; - pAsm->S[0].src.reg = pILInst->SrcReg[0].Index + - pAsm->starting_temp_register_number; - pAsm->S[0].src.rtype = SRC_REG_TEMPORARY; - break; - case PROGRAM_INPUT: - switch (pILInst->SrcReg[0].Index) - { - case FRAG_ATTRIB_COL0: - case FRAG_ATTRIB_COL1: - case FRAG_ATTRIB_TEX0: - case FRAG_ATTRIB_TEX1: - case FRAG_ATTRIB_TEX2: - case FRAG_ATTRIB_TEX3: - case FRAG_ATTRIB_TEX4: - case FRAG_ATTRIB_TEX5: - case FRAG_ATTRIB_TEX6: - case FRAG_ATTRIB_TEX7: - bValidTexCoord = GL_TRUE; - pAsm->S[0].src.reg = - pAsm->uiFP_AttributeMap[pILInst->SrcReg[0].Index]; - pAsm->S[0].src.rtype = SRC_REG_INPUT; - break; - } - break; + case PROGRAM_CONSTANT: + case PROGRAM_LOCAL_PARAM: + case PROGRAM_ENV_PARAM: + case PROGRAM_STATE_VAR: + break; + case PROGRAM_TEMPORARY: + bValidTexCoord = GL_TRUE; + pAsm->S[0].src.reg = pILInst->SrcReg[0].Index + + pAsm->starting_temp_register_number; + pAsm->S[0].src.rtype = SRC_REG_TEMPORARY; + break; + case PROGRAM_INPUT: + switch (pILInst->SrcReg[0].Index) + { + case FRAG_ATTRIB_COL0: + case FRAG_ATTRIB_COL1: + case FRAG_ATTRIB_TEX0: + case FRAG_ATTRIB_TEX1: + case FRAG_ATTRIB_TEX2: + case FRAG_ATTRIB_TEX3: + case FRAG_ATTRIB_TEX4: + case FRAG_ATTRIB_TEX5: + case FRAG_ATTRIB_TEX6: + case FRAG_ATTRIB_TEX7: + bValidTexCoord = GL_TRUE; + pAsm->S[0].src.reg = + pAsm->uiFP_AttributeMap[pILInst->SrcReg[0].Index]; + pAsm->S[0].src.rtype = SRC_REG_INPUT; + break; + } + break; + } } if(GL_TRUE == bValidTexCoord) @@ -2082,7 +2092,9 @@ GLboolean assemble_alu_instruction(r700_AssemblerBase *pAsm) { is_single_scalar_operation = GL_FALSE; number_of_scalar_operations = 4; - + +/* current assembler doesn't do more than 1 register per source */ +#if 0 /* check read port, only very preliminary algorithm, not count in src0/1 same comp case and prev slot repeat case; also not count relative addressing. TODO: improve performance. */ @@ -2117,6 +2129,7 @@ GLboolean assemble_alu_instruction(r700_AssemblerBase *pAsm) { bSplitInst = GL_TRUE; } +#endif } contiguous_slots_needed = 0; @@ -2337,9 +2350,7 @@ GLboolean next_ins(r700_AssemblerBase *pAsm) { struct prog_instruction *pILInst = &(pAsm->pILInst[pAsm->uiCurInst]); - if( GL_TRUE == IsTex(pILInst->Opcode) && - /* handle const moves to temp register */ - !(pAsm->D.dst.opcode == SQ_OP2_INST_MOV) ) + if( GL_TRUE == pAsm->is_tex ) { if (pILInst->TexSrcTarget == TEXTURE_RECT_INDEX) { if( GL_FALSE == assemble_tex_instruction(pAsm, GL_FALSE) ) @@ -2383,7 +2394,8 @@ GLboolean next_ins(r700_AssemblerBase *pAsm) pAsm->S[0].bits = 0; pAsm->S[1].bits = 0; pAsm->S[2].bits = 0; - + pAsm->is_tex = GL_FALSE; + pAsm->need_tex_barrier = GL_FALSE; return GL_TRUE; } @@ -3506,7 +3518,10 @@ GLboolean assemble_STP(r700_AssemblerBase *pAsm) GLboolean assemble_TEX(r700_AssemblerBase *pAsm) { GLboolean src_const; + GLboolean need_barrier = GL_FALSE; + checkop1(pAsm); + switch (pAsm->pILInst[pAsm->uiCurInst].SrcReg[0].File) { case PROGRAM_CONSTANT: @@ -3526,20 +3541,18 @@ GLboolean assemble_TEX(r700_AssemblerBase *pAsm) { if ( GL_FALSE == mov_temp(pAsm, 0) ) return GL_FALSE; + need_barrier = GL_TRUE; } switch (pAsm->pILInst[pAsm->uiCurInst].Opcode) { case OPCODE_TEX: - pAsm->D.dst.opcode = SQ_TEX_INST_SAMPLE; break; case OPCODE_TXB: radeon_error("do not support TXB yet\n"); return GL_FALSE; break; case OPCODE_TXP: - /* TODO : tex proj version : divid first 3 components by 4th */ - pAsm->D.dst.opcode = SQ_TEX_INST_SAMPLE; break; default: radeon_error("Internal error: bad texture op (not TEX)\n"); @@ -3547,6 +3560,190 @@ GLboolean assemble_TEX(r700_AssemblerBase *pAsm) break; } + if (pAsm->pILInst[pAsm->uiCurInst].Opcode == OPCODE_TXP) + { + GLuint tmp = gethelpr(pAsm); + pAsm->D.dst.opcode = SQ_OP2_INST_RECIP_IEEE; + pAsm->D.dst.math = 1; + setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); + pAsm->D.dst.rtype = DST_REG_TEMPORARY; + pAsm->D.dst.reg = tmp; + pAsm->D.dst.writew = 1; + + if( GL_FALSE == assemble_src(pAsm, 0, -1) ) + { + return GL_FALSE; + } + swizzleagain_PVSSRC(&(pAsm->S[0].src), SQ_SEL_W, SQ_SEL_W, SQ_SEL_W, SQ_SEL_W); + if( GL_FALSE == next_ins(pAsm) ) + { + return GL_FALSE; + } + + pAsm->D.dst.opcode = SQ_OP2_INST_MUL; + setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); + pAsm->D.dst.rtype = DST_REG_TEMPORARY; + pAsm->D.dst.reg = tmp; + pAsm->D.dst.writex = 1; + pAsm->D.dst.writey = 1; + pAsm->D.dst.writez = 1; + pAsm->D.dst.writew = 0; + + if( GL_FALSE == assemble_src(pAsm, 0, -1) ) + { + return GL_FALSE; + } + setaddrmode_PVSSRC(&(pAsm->S[1].src), ADDR_ABSOLUTE); + pAsm->S[1].src.rtype = SRC_REG_TEMPORARY; + pAsm->S[1].src.reg = tmp; + setswizzle_PVSSRC(&(pAsm->S[1].src), SQ_SEL_W); + + if( GL_FALSE == next_ins(pAsm) ) + { + return GL_FALSE; + } + + pAsm->aArgSubst[1] = tmp; + need_barrier = GL_TRUE; + } + + if (pAsm->pILInst[pAsm->uiCurInst].TexSrcTarget == TEXTURE_CUBE_INDEX ) + { + GLuint tmp1 = gethelpr(pAsm); + GLuint tmp2 = gethelpr(pAsm); + + /* tmp1.xyzw = CUBE(R0.zzxy, R0.yxzz) */ + pAsm->D.dst.opcode = SQ_OP2_INST_CUBE; + setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); + pAsm->D.dst.rtype = DST_REG_TEMPORARY; + pAsm->D.dst.reg = tmp1; + nomask_PVSDST(&(pAsm->D.dst)); + + if( GL_FALSE == assemble_src(pAsm, 0, -1) ) + { + return GL_FALSE; + } + + if( GL_FALSE == assemble_src(pAsm, 0, 1) ) + { + return GL_FALSE; + } + + swizzleagain_PVSSRC(&(pAsm->S[0].src), SQ_SEL_Z, SQ_SEL_Z, SQ_SEL_X, SQ_SEL_Y); + swizzleagain_PVSSRC(&(pAsm->S[1].src), SQ_SEL_Y, SQ_SEL_X, SQ_SEL_Z, SQ_SEL_Z); + + if( GL_FALSE == next_ins(pAsm) ) + { + return GL_FALSE; + } + + /* tmp1.z = ABS(tmp1.z) dont have abs support in assembler currently + * have to do explicit instruction + */ + pAsm->D.dst.opcode = SQ_OP2_INST_MAX; + setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); + pAsm->D.dst.rtype = DST_REG_TEMPORARY; + pAsm->D.dst.reg = tmp1; + pAsm->D.dst.writez = 1; + + setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); + pAsm->S[0].src.rtype = SRC_REG_TEMPORARY; + pAsm->S[0].src.reg = tmp1; + noswizzle_PVSSRC(&(pAsm->S[0].src)); + pAsm->S[1].bits = pAsm->S[0].bits; + flipneg_PVSSRC(&(pAsm->S[1].src)); + + next_ins(pAsm); + + /* tmp1.z = RCP_e(|tmp1.z|) */ + pAsm->D.dst.opcode = SQ_OP2_INST_RECIP_IEEE; + pAsm->D.dst.math = 1; + setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); + pAsm->D.dst.rtype = DST_REG_TEMPORARY; + pAsm->D.dst.reg = tmp1; + pAsm->D.dst.writez = 1; + + setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); + pAsm->S[0].src.rtype = SRC_REG_TEMPORARY; + pAsm->S[0].src.reg = tmp1; + pAsm->S[0].src.swizzlex = SQ_SEL_Z; + + next_ins(pAsm); + + /* MULADD R0.x, R0.x, PS1, (0x3FC00000, 1.5f).x + * MULADD R0.y, R0.y, PS1, (0x3FC00000, 1.5f).x + * muladd has no writemask, have to use another temp + * also no support for imm constants, so add 1 here + */ + pAsm->D.dst.opcode = SQ_OP3_INST_MULADD; + pAsm->D.dst.op3 = 1; + setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); + pAsm->D.dst.rtype = DST_REG_TEMPORARY; + pAsm->D.dst.reg = tmp2; + + setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); + pAsm->S[0].src.rtype = SRC_REG_TEMPORARY; + pAsm->S[0].src.reg = tmp1; + noswizzle_PVSSRC(&(pAsm->S[0].src)); + setaddrmode_PVSSRC(&(pAsm->S[1].src), ADDR_ABSOLUTE); + pAsm->S[1].src.rtype = SRC_REG_TEMPORARY; + pAsm->S[1].src.reg = tmp1; + setswizzle_PVSSRC(&(pAsm->S[1].src), SQ_SEL_Z); + setaddrmode_PVSSRC(&(pAsm->S[2].src), ADDR_ABSOLUTE); + pAsm->S[2].src.rtype = SRC_REG_TEMPORARY; + pAsm->S[2].src.reg = tmp1; + setswizzle_PVSSRC(&(pAsm->S[2].src), SQ_SEL_1); + + next_ins(pAsm); + + /* ADD the remaining .5 */ + pAsm->D.dst.opcode = SQ_OP2_INST_ADD; + setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); + pAsm->D.dst.rtype = DST_REG_TEMPORARY; + pAsm->D.dst.reg = tmp2; + pAsm->D.dst.writex = 1; + pAsm->D.dst.writey = 1; + pAsm->D.dst.writez = 0; + pAsm->D.dst.writew = 0; + + setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); + pAsm->S[0].src.rtype = SRC_REG_TEMPORARY; + pAsm->S[0].src.reg = tmp2; + noswizzle_PVSSRC(&(pAsm->S[0].src)); + setaddrmode_PVSSRC(&(pAsm->S[1].src), ADDR_ABSOLUTE); + pAsm->S[1].src.rtype = SRC_REG_TEMPORARY; + pAsm->S[1].src.reg = 252; // SQ_ALU_SRC_0_5 + noswizzle_PVSSRC(&(pAsm->S[1].src)); + + next_ins(pAsm); + + /* tmp1.xy = temp2.xy */ + pAsm->D.dst.opcode = SQ_OP2_INST_MOV; + setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); + pAsm->D.dst.rtype = DST_REG_TEMPORARY; + pAsm->D.dst.reg = tmp1; + pAsm->D.dst.writex = 1; + pAsm->D.dst.writey = 1; + pAsm->D.dst.writez = 0; + pAsm->D.dst.writew = 0; + + setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); + pAsm->S[0].src.rtype = SRC_REG_TEMPORARY; + pAsm->S[0].src.reg = tmp2; + noswizzle_PVSSRC(&(pAsm->S[0].src)); + + next_ins(pAsm); + pAsm->aArgSubst[1] = tmp1; + need_barrier = GL_TRUE; + + } + + pAsm->D.dst.opcode = SQ_TEX_INST_SAMPLE; + pAsm->is_tex = GL_TRUE; + if ( GL_TRUE == need_barrier ) + { + pAsm->need_tex_barrier = GL_TRUE; + } // Set src1 to tex unit id pAsm->S[1].src.reg = pAsm->pILInst[pAsm->uiCurInst].TexSrcUnit; pAsm->S[1].src.rtype = SRC_REG_TEMPORARY; @@ -3567,10 +3764,25 @@ GLboolean assemble_TEX(r700_AssemblerBase *pAsm) return GL_FALSE; } - if ( GL_FALSE == next_ins(pAsm) ) + if(pAsm->pILInst[pAsm->uiCurInst].Opcode == OPCODE_TXP) { - return GL_FALSE; + /* hopefully did swizzles before */ + noswizzle_PVSSRC(&(pAsm->S[0].src)); } + + if(pAsm->pILInst[pAsm->uiCurInst].TexSrcTarget == TEXTURE_CUBE_INDEX) + { + /* SAMPLE dst, tmp.yxwy, CUBE */ + pAsm->S[0].src.swizzlex = SQ_SEL_Y; + pAsm->S[0].src.swizzley = SQ_SEL_X; + pAsm->S[0].src.swizzlez = SQ_SEL_W; + pAsm->S[0].src.swizzlew = SQ_SEL_Y; + } + + if ( GL_FALSE == next_ins(pAsm) ) + { + return GL_FALSE; + } return GL_TRUE; } diff --git a/src/mesa/drivers/dri/r600/r700_assembler.h b/src/mesa/drivers/dri/r600/r700_assembler.h index 4e6e20011ad..bd6df94ff95 100644 --- a/src/mesa/drivers/dri/r600/r700_assembler.h +++ b/src/mesa/drivers/dri/r600/r700_assembler.h @@ -374,6 +374,10 @@ typedef struct r700_AssemblerBase struct prog_instruction * pILInst; GLuint uiCurInst; GLboolean bR6xx; + /* helper to decide which type of instruction to assemble */ + GLboolean is_tex; + /* we inserted helper intructions and need barrier on next TEX ins */ + GLboolean need_tex_barrier; } r700_AssemblerBase; //Internal use -- cgit v1.2.3 From ec205bbd577a2619e4b1910527e5e5d1d7426ddb Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 23 Sep 2009 14:56:56 -0400 Subject: r600: fix some warnings --- src/mesa/drivers/dri/r600/r700_assembler.c | 6 +++--- src/mesa/drivers/dri/r600/r700_assembler.h | 1 + 2 files changed, 4 insertions(+), 3 deletions(-) (limited to 'src/mesa/drivers/dri') diff --git a/src/mesa/drivers/dri/r600/r700_assembler.c b/src/mesa/drivers/dri/r600/r700_assembler.c index dc25f3b418b..903b6968be1 100644 --- a/src/mesa/drivers/dri/r600/r700_assembler.c +++ b/src/mesa/drivers/dri/r600/r700_assembler.c @@ -2078,9 +2078,9 @@ GLboolean assemble_alu_instruction(r700_AssemblerBase *pAsm) GLuint contiguous_slots_needed; GLuint uNumSrc = r700GetNumOperands(pAsm); - GLuint channel_swizzle, j; - GLuint chan_counter[4] = {0, 0, 0, 0}; - PVSSRC * pSource[3]; + //GLuint channel_swizzle, j; + //GLuint chan_counter[4] = {0, 0, 0, 0}; + //PVSSRC * pSource[3]; GLboolean bSplitInst = GL_FALSE; if (1 == pAsm->D.dst.math) diff --git a/src/mesa/drivers/dri/r600/r700_assembler.h b/src/mesa/drivers/dri/r600/r700_assembler.h index bd6df94ff95..0d4283e4bad 100644 --- a/src/mesa/drivers/dri/r600/r700_assembler.h +++ b/src/mesa/drivers/dri/r600/r700_assembler.h @@ -423,6 +423,7 @@ GLboolean assemble_vfetch_instruction2(r700_AssemblerBase* pAsm, GLuint _signed, GLboolean normalize, VTX_FETCH_METHOD * pFetchMethod); +GLboolean cleanup_vfetch_instructions(r700_AssemblerBase* pAsm); GLuint gethelpr(r700_AssemblerBase* pAsm); void resethelpr(r700_AssemblerBase* pAsm); void checkop_init(r700_AssemblerBase* pAsm); -- cgit v1.2.3 From 53051b8cb5b4804e3eab21262c91ea59f1ea24b8 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 23 Sep 2009 15:02:19 -0400 Subject: r600: fix copy/paste typo --- src/mesa/drivers/dri/r600/r700_render.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/mesa/drivers/dri') diff --git a/src/mesa/drivers/dri/r600/r700_render.c b/src/mesa/drivers/dri/r600/r700_render.c index b58859b6ba3..daa05f653dc 100644 --- a/src/mesa/drivers/dri/r600/r700_render.c +++ b/src/mesa/drivers/dri/r600/r700_render.c @@ -855,7 +855,7 @@ static void r700FixupIndexBuffer(GLcontext *ctx, const struct _mesa_index_buffer if(context->ind_buf.bHostIb != GL_TRUE) { - radeonAllocDmaRegion(&context->radeon, &r300->ind_buf.bo, + radeonAllocDmaRegion(&context->radeon, &context->ind_buf.bo, &context->ind_buf.bo_offset, size, 4); assert(context->ind_buf.bo->ptr != NULL); -- cgit v1.2.3 From 8abe77a75a681637cb00017711f5009601bcd348 Mon Sep 17 00:00:00 2001 From: Adam Jackson Date: Wed, 23 Sep 2009 15:22:19 -0400 Subject: Finish removing glcore --- src/mesa/drivers/dri/glcore/Makefile | 84 ------------------------------------ 1 file changed, 84 deletions(-) delete mode 100644 src/mesa/drivers/dri/glcore/Makefile (limited to 'src/mesa/drivers/dri') diff --git a/src/mesa/drivers/dri/glcore/Makefile b/src/mesa/drivers/dri/glcore/Makefile deleted file mode 100644 index ac7e1de9285..00000000000 --- a/src/mesa/drivers/dri/glcore/Makefile +++ /dev/null @@ -1,84 +0,0 @@ -# src/mesa/drivers/dri/glcore/Makefile - -TOP = ../../../../.. -include $(TOP)/configs/current - -LIBNAME = glcore_dri.so - -DRIVER_SOURCES = glcore_driver.c \ - $(TOP)/src/mesa/drivers/common/driverfuncs.c \ - ../common/dri_util.c - -C_SOURCES = \ - $(DRIVER_SOURCES) \ - $(DRI_SOURCES) - - -# Include directories -INCLUDE_DIRS = \ - -I. \ - -I../common \ - -I../dri_client \ - -I../dri_client/imports \ - -Iserver \ - -I$(TOP)/include \ - -I$(DRM_SOURCE_PATH)/shared-core \ - -I$(TOP)/src/mesa \ - -I$(TOP)/src/mesa/main \ - -I$(TOP)/src/mesa/glapi \ - -I$(TOP)/src/mesa/math \ - -I$(TOP)/src/mesa/transform \ - -I$(TOP)/src/mesa/shader \ - -I$(TOP)/src/mesa/swrast \ - -I$(TOP)/src/mesa/swrast_setup - -# Core Mesa objects -MESA_MODULES = $(TOP)/src/mesa/libmesa.a - -# Libraries that the driver shared lib depends on -LIB_DEPS = -lm -lpthread -lc -# LIB_DEPS = -lGL -lm -lpthread -lc - - -ASM_SOURCES = - -OBJECTS = $(C_SOURCES:.c=.o) \ - $(ASM_SOURCES:.S=.o) - - -##### RULES ##### - -.c.o: - $(CC) -c $(INCLUDE_DIRS) $(CFLAGS) $(DEFINES) $< -o $@ - -.S.o: - $(CC) -c $(INCLUDE_DIRS) $(CFLAGS) $(DEFINES) $< -o $@ - - -##### TARGETS ##### - -default: depend $(TOP)/$(LIB_DIR)/$(LIBNAME) - - -$(TOP)/$(LIB_DIR)/$(LIBNAME): $(OBJECTS) $(MESA_MODULES) $(WINOBJ) Makefile - CC="$(CC)" CXX="$(CXX)" $(TOP)/bin/mklib -o $(LIBNAME) -noprefix -install $(TOP)/$(LIB_DIR) \ - $(OBJECTS) $(WINLIB) $(LIB_DEPS) $(WINOBJ) $(MESA_MODULES) - - -depend: $(C_SOURCES) $(ASM_SOURCES) - rm -f depend - touch depend - $(MKDEP) $(MKDEP_OPTIONS) $(INCLUDE_DIRS) $(C_SOURCES) $(ASM_SOURCES) \ - > /dev/null - - -# Emacs tags -tags: - etags `find . -name \*.[ch]` `find ../include` - - -clean: - -rm -f *.o server/*.o - - -include depend -- cgit v1.2.3 From 8a2b0f6415654c03cd399e59b0946ab90dc44331 Mon Sep 17 00:00:00 2001 From: Richard Li Date: Wed, 23 Sep 2009 16:10:20 -0400 Subject: r600 : add hw index buffer draw support. --- src/mesa/drivers/dri/r600/r700_render.c | 73 +++++++++++++++++++++++++++------ 1 file changed, 61 insertions(+), 12 deletions(-) (limited to 'src/mesa/drivers/dri') diff --git a/src/mesa/drivers/dri/r600/r700_render.c b/src/mesa/drivers/dri/r600/r700_render.c index daa05f653dc..1f7a76e538c 100644 --- a/src/mesa/drivers/dri/r600/r700_render.c +++ b/src/mesa/drivers/dri/r600/r700_render.c @@ -262,6 +262,16 @@ static void r700RunRenderPrimitive(GLcontext * ctx, int start, int end, int prim TNLcontext *tnl = TNL_CONTEXT(ctx); struct vertex_buffer *vb = &tnl->vb; + GLboolean bUseDrawIndex; + if( (NULL != context->ind_buf.bo) && (GL_TRUE != context->ind_buf.bHostIb) ) + { + bUseDrawIndex = GL_TRUE; + } + else + { + bUseDrawIndex = GL_FALSE; + } + type = r700PrimitiveType(prim); num_indices = r700NumVerts(end - start, prim); @@ -272,10 +282,20 @@ static void r700RunRenderPrimitive(GLcontext * ctx, int start, int end, int prim if (type < 0 || num_indices <= 0) return; - total_emit = 3 /* VGT_PRIMITIVE_TYPE */ - + 2 /* VGT_INDEX_TYPE */ - + 2 /* NUM_INSTANCES */ + if(GL_TRUE == bUseDrawIndex) + { + total_emit = 3 /* VGT_PRIMITIVE_TYPE */ + + 2 /* VGT_INDEX_TYPE */ + + 2 /* NUM_INSTANCES */ + + 5+2; /* DRAW_INDEX */ + } + else + { + total_emit = 3 /* VGT_PRIMITIVE_TYPE */ + + 2 /* VGT_INDEX_TYPE */ + + 2 /* NUM_INSTANCES */ + num_indices + 3; /* DRAW_INDEX_IMMD */ + } BEGIN_BATCH_NO_AUTOSTATE(total_emit); // prim @@ -287,6 +307,15 @@ static void r700RunRenderPrimitive(GLcontext * ctx, int start, int end, int prim // index type SETfield(vgt_index_type, DI_INDEX_SIZE_32_BIT, INDEX_TYPE_shift, INDEX_TYPE_mask); + + if(GL_TRUE == bUseDrawIndex) + { + if(GL_TRUE != context->ind_buf.is_32bit) + { + SETfield(vgt_index_type, DI_INDEX_SIZE_16_BIT, INDEX_TYPE_shift, INDEX_TYPE_mask); + } + } + R600_OUT_BATCH(CP_PACKET3(R600_IT_INDEX_TYPE, 0)); R600_OUT_BATCH(vgt_index_type); @@ -296,12 +325,36 @@ static void r700RunRenderPrimitive(GLcontext * ctx, int start, int end, int prim // draw packet vgt_num_indices = num_indices; - SETfield(vgt_draw_initiator, DI_SRC_SEL_IMMEDIATE, SOURCE_SELECT_shift, SOURCE_SELECT_mask); + + if(GL_TRUE == bUseDrawIndex) + { + SETfield(vgt_draw_initiator, DI_SRC_SEL_DMA, SOURCE_SELECT_shift, SOURCE_SELECT_mask); + } + else + { + SETfield(vgt_draw_initiator, DI_SRC_SEL_IMMEDIATE, SOURCE_SELECT_shift, SOURCE_SELECT_mask); + } + SETfield(vgt_draw_initiator, DI_MAJOR_MODE_0, MAJOR_MODE_shift, MAJOR_MODE_mask); - R600_OUT_BATCH(CP_PACKET3(R600_IT_DRAW_INDEX_IMMD, (num_indices + 1))); - R600_OUT_BATCH(vgt_num_indices); - R600_OUT_BATCH(vgt_draw_initiator); + if(GL_TRUE == bUseDrawIndex) + { + R600_OUT_BATCH(CP_PACKET3(R600_IT_DRAW_INDEX, 3)); + R600_OUT_BATCH(context->ind_buf.bo_offset); + R600_OUT_BATCH(0); + R600_OUT_BATCH(vgt_num_indices); + R600_OUT_BATCH(vgt_draw_initiator); + R600_OUT_BATCH_RELOC(context->ind_buf.bo_offset, + context->ind_buf.bo, + context->ind_buf.bo_offset, + RADEON_GEM_DOMAIN_GTT, 0, 0); + } + else + { + R600_OUT_BATCH(CP_PACKET3(R600_IT_DRAW_INDEX_IMMD, (num_indices + 1))); + R600_OUT_BATCH(vgt_num_indices); + R600_OUT_BATCH(vgt_draw_initiator); + } if(NULL == context->ind_buf.bo) { @@ -340,10 +393,6 @@ static void r700RunRenderPrimitive(GLcontext * ctx, int start, int end, int prim } } } - else - { - /* TODO : hw ib draw */ - } } END_BATCH(); @@ -899,7 +948,7 @@ static void r700SetupIndexBuffer(GLcontext *ctx, const struct _mesa_index_buffer return; } - context->ind_buf.bHostIb = GL_TRUE; + context->ind_buf.bHostIb = GL_FALSE; #if MESA_BIG_ENDIAN if (mesa_ind_buf->type == GL_UNSIGNED_INT) -- cgit v1.2.3 From 20e77382935b24e9e2be89cd2b686fa2f1f67635 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 23 Sep 2009 16:54:12 -0400 Subject: r600: fix r700PredictRenderSize for draw prims path --- src/mesa/drivers/dri/r600/r700_render.c | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) (limited to 'src/mesa/drivers/dri') diff --git a/src/mesa/drivers/dri/r600/r700_render.c b/src/mesa/drivers/dri/r600/r700_render.c index 1f7a76e538c..bdf0bfc0e43 100644 --- a/src/mesa/drivers/dri/r600/r700_render.c +++ b/src/mesa/drivers/dri/r600/r700_render.c @@ -402,12 +402,10 @@ static void r700RunRenderPrimitive(GLcontext * ctx, int start, int end, int prim /* start 3d, idle, cb/db flush */ #define PRE_EMIT_STATE_BUFSZ 10 + 5 + 14 -static GLuint r700PredictRenderSize(GLcontext* ctx) +static GLuint r700PredictRenderSize(GLcontext* ctx, GLuint nr_prims) { context_t *context = R700_CONTEXT(ctx); - TNLcontext *tnl = TNL_CONTEXT(ctx); struct r700_vertex_program *vp = context->selected_vp; - struct vertex_buffer *vb = &tnl->vb; GLboolean flushed; GLuint dwords, i; GLuint state_size; @@ -415,8 +413,15 @@ static GLuint r700PredictRenderSize(GLcontext* ctx) context->radeon.tcl.aos_count = _mesa_bitcount(vp->mesa_program->Base.InputsRead); dwords = PRE_EMIT_STATE_BUFSZ; - for (i = 0; i < vb->PrimitiveCount; i++) - dwords += vb->Primitive[i].count + 10; + if (nr_prims) + dwords += nr_prims * 14; + else { + TNLcontext *tnl = TNL_CONTEXT(ctx); + struct vertex_buffer *vb = &tnl->vb; + + for (i = 0; i < vb->PrimitiveCount; i++) + dwords += vb->Primitive[i].count + 10; + } state_size = radeonCountStateEmitSize(&context->radeon); flushed = rcommonEnsureCmdBufSpace(&context->radeon, dwords + state_size, __FUNCTION__); @@ -456,7 +461,7 @@ static GLboolean r700RunRender(GLcontext * ctx, r700SetupFragmentProgram(ctx); r600UpdateTextureState(ctx); - GLuint emit_end = r700PredictRenderSize(ctx) + GLuint emit_end = r700PredictRenderSize(ctx, 0) + context->radeon.cmdbuf.cs->cdw; r700SetupStreams(ctx); @@ -1044,7 +1049,7 @@ static GLboolean r700TryDrawPrims(GLcontext *ctx, r600UpdateTextureState(ctx); - GLuint emit_end = r700PredictRenderSize(ctx) + GLuint emit_end = r700PredictRenderSize(ctx, nr_prims) + context->radeon.cmdbuf.cs->cdw; r700SetupIndexBuffer(ctx, ib); -- cgit v1.2.3 From 84c7afd9e0f2df72d90dd82d38384c4f2f45173e Mon Sep 17 00:00:00 2001 From: Maciej Cencora Date: Sat, 19 Sep 2009 18:45:59 +0200 Subject: r300: fallback to software rendering if we are out of free texcoords Fixes #22741 --- src/mesa/drivers/dri/r300/r300_fragprog_common.c | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) (limited to 'src/mesa/drivers/dri') diff --git a/src/mesa/drivers/dri/r300/r300_fragprog_common.c b/src/mesa/drivers/dri/r300/r300_fragprog_common.c index 469c278b510..0bdc90b4a84 100644 --- a/src/mesa/drivers/dri/r300/r300_fragprog_common.c +++ b/src/mesa/drivers/dri/r300/r300_fragprog_common.c @@ -99,8 +99,8 @@ static void insert_WPOS_trailer(struct r300_fragment_program_compiler *compiler, { int i; + fp->wpos_attr = FRAG_ATTRIB_MAX; if (!(compiler->Base.Program.InputsRead & FRAG_BIT_WPOS)) { - fp->wpos_attr = FRAG_ATTRIB_MAX; return; } @@ -112,6 +112,13 @@ static void insert_WPOS_trailer(struct r300_fragment_program_compiler *compiler, } } + /* No free texcoord found, fall-back to software rendering */ + if (fp->wpos_attr == FRAG_ATTRIB_MAX) + { + compiler->Base.Error = 1; + return; + } + rc_transform_fragment_wpos(&compiler->Base, FRAG_ATTRIB_WPOS, fp->wpos_attr); } @@ -127,8 +134,8 @@ static void rewriteFog(struct r300_fragment_program_compiler *compiler, struct r struct prog_src_register src; int i; + fp->fog_attr = FRAG_ATTRIB_MAX; if (!(compiler->Base.Program.InputsRead & FRAG_BIT_FOGC)) { - fp->fog_attr = FRAG_ATTRIB_MAX; return; } @@ -140,6 +147,13 @@ static void rewriteFog(struct r300_fragment_program_compiler *compiler, struct r } } + /* No free texcoord found, fall-back to software rendering */ + if (fp->fog_attr == FRAG_ATTRIB_MAX) + { + compiler->Base.Error = 1; + return; + } + memset(&src, 0, sizeof(src)); src.File = PROGRAM_INPUT; src.Index = fp->fog_attr; -- cgit v1.2.3 From 1bf0651d9b58a5c150fcf37016ae1bda425bb05a Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 23 Sep 2009 19:42:07 -0400 Subject: r600: fix up ordering of functions in draw prims path Shaders and IB need to be updated and allocated before calling validatebuffers. --- src/mesa/drivers/dri/r600/r700_render.c | 31 +++++++++++++++---------------- 1 file changed, 15 insertions(+), 16 deletions(-) (limited to 'src/mesa/drivers/dri') diff --git a/src/mesa/drivers/dri/r600/r700_render.c b/src/mesa/drivers/dri/r600/r700_render.c index bdf0bfc0e43..bbe364bc6aa 100644 --- a/src/mesa/drivers/dri/r600/r700_render.c +++ b/src/mesa/drivers/dri/r600/r700_render.c @@ -1019,7 +1019,6 @@ static GLboolean r700TryDrawPrims(GLcontext *ctx, context_t *context = R700_CONTEXT(ctx); radeonContextPtr radeon = &context->radeon; GLuint i, id = 0; - GLboolean bValidedbuffer; struct radeon_renderbuffer *rrb; if (ctx->NewState) @@ -1027,7 +1026,13 @@ static GLboolean r700TryDrawPrims(GLcontext *ctx, _mesa_update_state( ctx ); } - bValidedbuffer = r600ValidateBuffers(ctx); + _tnl_UpdateFixedFunctionProgram(ctx); + r700SetVertexFormat(ctx, arrays, max_index + 1); + r700SetupIndexBuffer(ctx, ib); + /* shaders need to be updated before buffers are validated */ + r700UpdateShaders2(ctx); + if (!r600ValidateBuffers(ctx)) + return GL_FALSE; /* always emit CB base to prevent * lock ups on some chips. @@ -1036,34 +1041,28 @@ static GLboolean r700TryDrawPrims(GLcontext *ctx, /* mark vtx as dirty since it changes per-draw */ R600_STATECHANGE(context, vtx); - _tnl_UpdateFixedFunctionProgram(ctx); - r700SetVertexFormat(ctx, arrays, max_index + 1); - r700SetupStreams2(ctx, arrays, max_index + 1); - r700UpdateShaders2(ctx); - r700SetScissor(context); - r700SetupVertexProgram(ctx); - r700SetupFragmentProgram(ctx); - r600UpdateTextureState(ctx); GLuint emit_end = r700PredictRenderSize(ctx, nr_prims) + context->radeon.cmdbuf.cs->cdw; - r700SetupIndexBuffer(ctx, ib); + r700SetupStreams2(ctx, arrays, max_index + 1); radeonEmitState(radeon); - for (i = 0; i < nr_prims; ++i) + radeon_debug_add_indent(); + for (i = 0; i < nr_prims; ++i) { - r700RunRenderPrimitive(ctx, - prim[i].start, - prim[i].start + prim[i].count, + r700RunRenderPrimitive(ctx, + prim[i].start, + prim[i].start + prim[i].count, prim[i].mode); } - + radeon_debug_remove_indent(); + /* Flush render op cached for last several quads. */ r700WaitForIdleClean(context); -- cgit v1.2.3 From cbab3d7f2a77f187fb688593c17396d4967c75b5 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 8 Sep 2009 16:03:25 -0400 Subject: r600: fix dri2 clipping --- src/mesa/drivers/dri/r600/r700_state.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src/mesa/drivers/dri') diff --git a/src/mesa/drivers/dri/r600/r700_state.c b/src/mesa/drivers/dri/r600/r700_state.c index 1f4724e8384..2a0b4192562 100644 --- a/src/mesa/drivers/dri/r600/r700_state.c +++ b/src/mesa/drivers/dri/r600/r700_state.c @@ -1272,8 +1272,8 @@ void r700SetScissor(context_t *context) //--------------- if (context->radeon.radeonScreen->driScreen->dri2.enabled) { x1 = 0; y1 = 0; - x2 = rrb->base.Width - 1; - y2 = rrb->base.Height - 1; + x2 = rrb->base.Width; + y2 = rrb->base.Height; } else { x1 = rrb->dPriv->x; y1 = rrb->dPriv->y; -- cgit v1.2.3 From b1e417413f2da8aad1872fa009949da101156431 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Wed, 9 Sep 2009 15:02:16 +1000 Subject: r600: don't setup hardware state if TFP if we have a BO here it means TFP and we should have set it up already. tested by b0le on #radeon --- src/mesa/drivers/dri/r600/r600_texstate.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'src/mesa/drivers/dri') diff --git a/src/mesa/drivers/dri/r600/r600_texstate.c b/src/mesa/drivers/dri/r600/r600_texstate.c index 49b603b65e1..6436a5d7e9b 100644 --- a/src/mesa/drivers/dri/r600/r600_texstate.c +++ b/src/mesa/drivers/dri/r600/r600_texstate.c @@ -565,6 +565,10 @@ static void setup_hardware_state(context_t *rmesa, struct gl_texture_object *tex int firstlevel = t->mt ? t->mt->firstLevel : 0; GLuint uTexelPitch, row_align; + if ( t->bo ) { + return GL_TRUE; + } + firstImage = t->base.Image[0][firstlevel]; if (!t->image_override) { -- cgit v1.2.3 From 65b01d449cc594e1c7e1a44c5d87fdc698300e9a Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 9 Sep 2009 01:41:46 -0400 Subject: r600: fix ftp for dri1 We use t->bo for dri1 since r600 uses CS for dri1. --- src/mesa/drivers/dri/r600/r600_texstate.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'src/mesa/drivers/dri') diff --git a/src/mesa/drivers/dri/r600/r600_texstate.c b/src/mesa/drivers/dri/r600/r600_texstate.c index 6436a5d7e9b..f30dd112301 100644 --- a/src/mesa/drivers/dri/r600/r600_texstate.c +++ b/src/mesa/drivers/dri/r600/r600_texstate.c @@ -565,9 +565,10 @@ static void setup_hardware_state(context_t *rmesa, struct gl_texture_object *tex int firstlevel = t->mt ? t->mt->firstLevel : 0; GLuint uTexelPitch, row_align; - if ( t->bo ) { - return GL_TRUE; - } + if (rmesa->radeon.radeonScreen->driScreen->dri2.enabled && + t->image_override && + t->bo) + return; firstImage = t->base.Image[0][firstlevel]; -- cgit v1.2.3 From 6552a103f903a2b767464cd2d267f706a6baf7d5 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 9 Sep 2009 11:14:17 -0400 Subject: r600: check if textures are actually enabled before submission noticed by taiu on IRC. --- src/mesa/drivers/dri/r600/r600_texstate.c | 2 +- src/mesa/drivers/dri/r600/r700_chip.c | 118 ++++++++++++++++-------------- 2 files changed, 64 insertions(+), 56 deletions(-) (limited to 'src/mesa/drivers/dri') diff --git a/src/mesa/drivers/dri/r600/r600_texstate.c b/src/mesa/drivers/dri/r600/r600_texstate.c index f30dd112301..bcb8d7c73d7 100644 --- a/src/mesa/drivers/dri/r600/r600_texstate.c +++ b/src/mesa/drivers/dri/r600/r600_texstate.c @@ -69,7 +69,7 @@ void r600UpdateTextureState(GLcontext * ctx) for (unit = 0; unit < R700_MAX_TEXTURE_UNITS; unit++) { texUnit = &ctx->Texture.Unit[unit]; t = radeon_tex_obj(ctx->Texture.Unit[unit]._Current); - + r700->textures[unit] = NULL; if (texUnit->_ReallyEnabled) { if (!t) continue; diff --git a/src/mesa/drivers/dri/r600/r700_chip.c b/src/mesa/drivers/dri/r600/r700_chip.c index 37bff56f5a9..312cacffda3 100644 --- a/src/mesa/drivers/dri/r600/r700_chip.c +++ b/src/mesa/drivers/dri/r600/r700_chip.c @@ -52,38 +52,40 @@ static void r700SendTexState(GLcontext *ctx, struct radeon_state_atom *atom) radeon_print(RADEON_STATE, RADEON_VERBOSE, "%s\n", __func__); for (i = 0; i < R700_TEXTURE_NUMBERUNITS; i++) { - radeonTexObj *t = r700->textures[i]; - if (t) { - if (!t->image_override) - bo = t->mt->bo; - else - bo = t->bo; - if (bo) { - - r700SyncSurf(context, bo, - RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM, - 0, TC_ACTION_ENA_bit); - - BEGIN_BATCH_NO_AUTOSTATE(9 + 4); - R600_OUT_BATCH(CP_PACKET3(R600_IT_SET_RESOURCE, 7)); - R600_OUT_BATCH(i * 7); - R600_OUT_BATCH(r700->textures[i]->SQ_TEX_RESOURCE0); - R600_OUT_BATCH(r700->textures[i]->SQ_TEX_RESOURCE1); - R600_OUT_BATCH(r700->textures[i]->SQ_TEX_RESOURCE2); - R600_OUT_BATCH(r700->textures[i]->SQ_TEX_RESOURCE3); - R600_OUT_BATCH(r700->textures[i]->SQ_TEX_RESOURCE4); - R600_OUT_BATCH(r700->textures[i]->SQ_TEX_RESOURCE5); - R600_OUT_BATCH(r700->textures[i]->SQ_TEX_RESOURCE6); - R600_OUT_BATCH_RELOC(r700->textures[i]->SQ_TEX_RESOURCE2, - bo, - 0, - RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM, 0, 0); - R600_OUT_BATCH_RELOC(r700->textures[i]->SQ_TEX_RESOURCE3, - bo, - r700->textures[i]->SQ_TEX_RESOURCE3, - RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM, 0, 0); - END_BATCH(); - COMMIT_BATCH(); + if (ctx->Texture.Unit[i]._ReallyEnabled) { + radeonTexObj *t = r700->textures[i]; + if (t) { + if (!t->image_override) + bo = t->mt->bo; + else + bo = t->bo; + if (bo) { + + r700SyncSurf(context, bo, + RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM, + 0, TC_ACTION_ENA_bit); + + BEGIN_BATCH_NO_AUTOSTATE(9 + 4); + R600_OUT_BATCH(CP_PACKET3(R600_IT_SET_RESOURCE, 7)); + R600_OUT_BATCH(i * 7); + R600_OUT_BATCH(r700->textures[i]->SQ_TEX_RESOURCE0); + R600_OUT_BATCH(r700->textures[i]->SQ_TEX_RESOURCE1); + R600_OUT_BATCH(r700->textures[i]->SQ_TEX_RESOURCE2); + R600_OUT_BATCH(r700->textures[i]->SQ_TEX_RESOURCE3); + R600_OUT_BATCH(r700->textures[i]->SQ_TEX_RESOURCE4); + R600_OUT_BATCH(r700->textures[i]->SQ_TEX_RESOURCE5); + R600_OUT_BATCH(r700->textures[i]->SQ_TEX_RESOURCE6); + R600_OUT_BATCH_RELOC(r700->textures[i]->SQ_TEX_RESOURCE2, + bo, + 0, + RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM, 0, 0); + R600_OUT_BATCH_RELOC(r700->textures[i]->SQ_TEX_RESOURCE3, + bo, + r700->textures[i]->SQ_TEX_RESOURCE3, + RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM, 0, 0); + END_BATCH(); + COMMIT_BATCH(); + } } } } @@ -98,16 +100,18 @@ static void r700SendTexSamplerState(GLcontext *ctx, struct radeon_state_atom *at radeon_print(RADEON_STATE, RADEON_VERBOSE, "%s\n", __func__); for (i = 0; i < R700_TEXTURE_NUMBERUNITS; i++) { - radeonTexObj *t = r700->textures[i]; - if (t) { - BEGIN_BATCH_NO_AUTOSTATE(5); - R600_OUT_BATCH(CP_PACKET3(R600_IT_SET_SAMPLER, 3)); - R600_OUT_BATCH(i * 3); - R600_OUT_BATCH(r700->textures[i]->SQ_TEX_SAMPLER0); - R600_OUT_BATCH(r700->textures[i]->SQ_TEX_SAMPLER1); - R600_OUT_BATCH(r700->textures[i]->SQ_TEX_SAMPLER2); - END_BATCH(); - COMMIT_BATCH(); + if (ctx->Texture.Unit[i]._ReallyEnabled) { + radeonTexObj *t = r700->textures[i]; + if (t) { + BEGIN_BATCH_NO_AUTOSTATE(5); + R600_OUT_BATCH(CP_PACKET3(R600_IT_SET_SAMPLER, 3)); + R600_OUT_BATCH(i * 3); + R600_OUT_BATCH(r700->textures[i]->SQ_TEX_SAMPLER0); + R600_OUT_BATCH(r700->textures[i]->SQ_TEX_SAMPLER1); + R600_OUT_BATCH(r700->textures[i]->SQ_TEX_SAMPLER2); + END_BATCH(); + COMMIT_BATCH(); + } } } } @@ -121,16 +125,18 @@ static void r700SendTexBorderColorState(GLcontext *ctx, struct radeon_state_atom radeon_print(RADEON_STATE, RADEON_VERBOSE, "%s\n", __func__); for (i = 0; i < R700_TEXTURE_NUMBERUNITS; i++) { - radeonTexObj *t = r700->textures[i]; - if (t) { - BEGIN_BATCH_NO_AUTOSTATE(2 + 4); - R600_OUT_BATCH_REGSEQ((TD_PS_SAMPLER0_BORDER_RED + (i * 16)), 4); - R600_OUT_BATCH(r700->textures[i]->TD_PS_SAMPLER0_BORDER_RED); - R600_OUT_BATCH(r700->textures[i]->TD_PS_SAMPLER0_BORDER_GREEN); - R600_OUT_BATCH(r700->textures[i]->TD_PS_SAMPLER0_BORDER_BLUE); - R600_OUT_BATCH(r700->textures[i]->TD_PS_SAMPLER0_BORDER_ALPHA); - END_BATCH(); - COMMIT_BATCH(); + if (ctx->Texture.Unit[i]._ReallyEnabled) { + radeonTexObj *t = r700->textures[i]; + if (t) { + BEGIN_BATCH_NO_AUTOSTATE(2 + 4); + R600_OUT_BATCH_REGSEQ((TD_PS_SAMPLER0_BORDER_RED + (i * 16)), 4); + R600_OUT_BATCH(r700->textures[i]->TD_PS_SAMPLER0_BORDER_RED); + R600_OUT_BATCH(r700->textures[i]->TD_PS_SAMPLER0_BORDER_GREEN); + R600_OUT_BATCH(r700->textures[i]->TD_PS_SAMPLER0_BORDER_BLUE); + R600_OUT_BATCH(r700->textures[i]->TD_PS_SAMPLER0_BORDER_ALPHA); + END_BATCH(); + COMMIT_BATCH(); + } } } } @@ -1176,9 +1182,11 @@ static int check_tx(GLcontext *ctx, struct radeon_state_atom *atom) R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&context->hw); for (i = 0; i < R700_TEXTURE_NUMBERUNITS; i++) { - radeonTexObj *t = r700->textures[i]; - if (t) - count++; + if (ctx->Texture.Unit[i]._ReallyEnabled) { + radeonTexObj *t = r700->textures[i]; + if (t) + count++; + } } radeon_print(RADEON_STATE, RADEON_TRACE, "%s %d\n", __func__, count); return count * 31; -- cgit v1.2.3 From 9edd1a441c3c0c3f018ae561cd5711398ca56f95 Mon Sep 17 00:00:00 2001 From: Andre Maasikas Date: Fri, 11 Sep 2009 10:59:05 -0400 Subject: r600: enable caching of vertex programs --- src/mesa/drivers/dri/r600/r600_context.h | 3 + src/mesa/drivers/dri/r600/r700_chip.c | 10 ++- src/mesa/drivers/dri/r600/r700_oglprog.c | 36 ++++++----- src/mesa/drivers/dri/r600/r700_render.c | 9 +-- src/mesa/drivers/dri/r600/r700_vertprog.c | 103 ++++++++++++++++++++---------- src/mesa/drivers/dri/r600/r700_vertprog.h | 11 +++- 6 files changed, 110 insertions(+), 62 deletions(-) (limited to 'src/mesa/drivers/dri') diff --git a/src/mesa/drivers/dri/r600/r600_context.h b/src/mesa/drivers/dri/r600/r600_context.h index 8ae05a301c7..c59df7505af 100644 --- a/src/mesa/drivers/dri/r600/r600_context.h +++ b/src/mesa/drivers/dri/r600/r600_context.h @@ -51,6 +51,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "r700_chip.h" #include "r600_tex.h" #include "r700_oglprog.h" +#include "r700_vertprog.h" struct r600_context; typedef struct r600_context context_t; @@ -155,6 +156,8 @@ struct r600_context { struct r600_hw_state atoms; + struct r700_vertex_program *selected_vp; + /* Vertex buffers */ GLvector4f dummy_attrib[_TNL_ATTRIB_MAX]; diff --git a/src/mesa/drivers/dri/r600/r700_chip.c b/src/mesa/drivers/dri/r600/r700_chip.c index 312cacffda3..1b560591974 100644 --- a/src/mesa/drivers/dri/r600/r700_chip.c +++ b/src/mesa/drivers/dri/r600/r700_chip.c @@ -211,8 +211,7 @@ static void r700SetupVTXConstants(GLcontext * ctx, void r700SetupStreams(GLcontext *ctx) { context_t *context = R700_CONTEXT(ctx); - struct r700_vertex_program *vpc - = (struct r700_vertex_program *)ctx->VertexProgram._Current; + struct r700_vertex_program *vp = context->selected_vp; TNLcontext *tnl = TNL_CONTEXT(ctx); struct vertex_buffer *vb = &tnl->vb; unsigned int i, j = 0; @@ -221,7 +220,7 @@ void r700SetupStreams(GLcontext *ctx) R600_STATECHANGE(context, vtx); for(i=0; imesa_program.Base.InputsRead & (1 << i)) { + if(vp->mesa_program->Base.InputsRead & (1 << i)) { rcommon_emit_vector(ctx, &context->radeon.tcl.aos[j], vb->AttribPtr[i]->data, @@ -237,8 +236,7 @@ void r700SetupStreams(GLcontext *ctx) static void r700SendVTXState(GLcontext *ctx, struct radeon_state_atom *atom) { context_t *context = R700_CONTEXT(ctx); - struct r700_vertex_program *vpc - = (struct r700_vertex_program *)ctx->VertexProgram._Current; + struct r700_vertex_program *vp = context->selected_vp; unsigned int i, j = 0; BATCH_LOCALS(&context->radeon); radeon_print(RADEON_STATE, RADEON_VERBOSE, "%s\n", __func__); @@ -258,7 +256,7 @@ static void r700SendVTXState(GLcontext *ctx, struct radeon_state_atom *atom) COMMIT_BATCH(); for(i=0; imesa_program.Base.InputsRead & (1 << i)) { + if(vp->mesa_program->Base.InputsRead & (1 << i)) { /* currently aos are packed */ r700SetupVTXConstants(ctx, i, diff --git a/src/mesa/drivers/dri/r600/r700_oglprog.c b/src/mesa/drivers/dri/r600/r700_oglprog.c index 3c8c1fd7a34..5290ef31be3 100644 --- a/src/mesa/drivers/dri/r600/r700_oglprog.c +++ b/src/mesa/drivers/dri/r600/r700_oglprog.c @@ -46,7 +46,7 @@ static struct gl_program *r700NewProgram(GLcontext * ctx, { struct gl_program *pProgram = NULL; - struct r700_vertex_program *vp; + struct r700_vertex_program_cont *vpc; struct r700_fragment_program *fp; radeon_print(RADEON_SHADER, RADEON_VERBOSE, @@ -56,16 +56,11 @@ static struct gl_program *r700NewProgram(GLcontext * ctx, { case GL_VERTEX_STATE_PROGRAM_NV: case GL_VERTEX_PROGRAM_ARB: - vp = CALLOC_STRUCT(r700_vertex_program); + vpc = CALLOC_STRUCT(r700_vertex_program_cont); pProgram = _mesa_init_vertex_program(ctx, - &vp->mesa_program, + &vpc->mesa_program, target, id); - vp->translated = GL_FALSE; - vp->loaded = GL_FALSE; - - vp->shaderbo = NULL; - break; case GL_FRAGMENT_PROGRAM_NV: case GL_FRAGMENT_PROGRAM_ARB: @@ -89,7 +84,8 @@ static struct gl_program *r700NewProgram(GLcontext * ctx, static void r700DeleteProgram(GLcontext * ctx, struct gl_program *prog) { - struct r700_vertex_program * vp; + struct r700_vertex_program_cont * vpc; + struct r700_vertex_program *vp, *tmp; struct r700_fragment_program * fp; radeon_print(RADEON_SHADER, RADEON_VERBOSE, @@ -99,14 +95,20 @@ static void r700DeleteProgram(GLcontext * ctx, struct gl_program *prog) { case GL_VERTEX_STATE_PROGRAM_NV: case GL_VERTEX_PROGRAM_ARB: - vp = (struct r700_vertex_program*)prog; - /* Release DMA region */ - - r600DeleteShader(ctx, vp->shaderbo); - - /* Clean up */ - Clean_Up_Assembler(&(vp->r700AsmCode)); - Clean_Up_Shader(&(vp->r700Shader)); + vpc = (struct r700_vertex_program_cont*)prog; + vp = vpc->progs; + while (vp) { + tmp = vp->next; + /* Release DMA region */ + + r600DeleteShader(ctx, vp->shaderbo); + + /* Clean up */ + Clean_Up_Assembler(&(vp->r700AsmCode)); + Clean_Up_Shader(&(vp->r700Shader)); + _mesa_free(vp); + vp = tmp; + } break; case GL_FRAGMENT_PROGRAM_NV: case GL_FRAGMENT_PROGRAM_ARB: diff --git a/src/mesa/drivers/dri/r600/r700_render.c b/src/mesa/drivers/dri/r600/r700_render.c index 3566bf3ca78..b1c3648ca56 100644 --- a/src/mesa/drivers/dri/r600/r700_render.c +++ b/src/mesa/drivers/dri/r600/r700_render.c @@ -319,14 +319,13 @@ static GLuint r700PredictRenderSize(GLcontext* ctx) { context_t *context = R700_CONTEXT(ctx); TNLcontext *tnl = TNL_CONTEXT(ctx); - struct r700_vertex_program *vpc - = (struct r700_vertex_program *)ctx->VertexProgram._Current; + struct r700_vertex_program *vp = context->selected_vp; struct vertex_buffer *vb = &tnl->vb; GLboolean flushed; GLuint dwords, i; GLuint state_size; /* pre calculate aos count so state prediction works */ - context->radeon.tcl.aos_count = _mesa_bitcount(vpc->mesa_program.Base.InputsRead); + context->radeon.tcl.aos_count = _mesa_bitcount(vp->mesa_program->Base.InputsRead); dwords = PRE_EMIT_STATE_BUFSZ; for (i = 0; i < vb->PrimitiveCount; i++) @@ -365,7 +364,6 @@ static GLboolean r700RunRender(GLcontext * ctx, /* mark vtx as dirty since it changes per-draw */ R600_STATECHANGE(context, vtx); - r700UpdateShaders(ctx); r700SetScissor(context); r700SetupVertexProgram(ctx); r700SetupFragmentProgram(ctx); @@ -427,7 +425,10 @@ static GLboolean r700RunTCLRender(GLcontext * ctx, /*----------------------*/ /* TODO : sw fallback */ + /* Need shader bo's setup before bo check */ + r700UpdateShaders(ctx); /** + * Ensure all enabled and complete textures are uploaded along with any buffers being used. */ if(!r600ValidateBuffers(ctx)) diff --git a/src/mesa/drivers/dri/r600/r700_vertprog.c b/src/mesa/drivers/dri/r600/r700_vertprog.c index d107f99e7ba..8c2b0071df9 100644 --- a/src/mesa/drivers/dri/r600/r700_vertprog.c +++ b/src/mesa/drivers/dri/r600/r700_vertprog.c @@ -35,6 +35,7 @@ #include "main/mtypes.h" #include "tnl/t_context.h" +#include "shader/program.h" #include "shader/prog_parameter.h" #include "shader/prog_statevars.h" @@ -258,28 +259,54 @@ GLboolean Find_Instruction_Dependencies_vp(struct r700_vertex_program *vp, return GL_TRUE; } -GLboolean r700TranslateVertexShader(struct r700_vertex_program *vp, - struct gl_vertex_program *mesa_vp) +struct r700_vertex_program* r700TranslateVertexShader(GLcontext *ctx, + struct gl_vertex_program *mesa_vp) { + context_t *context = R700_CONTEXT(ctx); + struct r700_vertex_program *vp; + TNLcontext *tnl = TNL_CONTEXT(ctx); + struct vertex_buffer *vb = &tnl->vb; + unsigned int unBit; + unsigned int i; + + vp = _mesa_calloc(sizeof(*vp)); + vp->mesa_program = (struct gl_vertex_program *)_mesa_clone_program(ctx, &mesa_vp->Base); + + for(i=0; imesa_program->Base.InputsRead & unBit) /* ctx->Array.ArrayObj->xxxxxxx */ + { + vp->aos_desc[i].size = vb->AttribPtr[i]->size; + vp->aos_desc[i].stride = vb->AttribPtr[i]->size * sizeof(GL_FLOAT);/* when emit array, data is packed. vb->AttribPtr[i]->stride;*/ + vp->aos_desc[i].type = GL_FLOAT; + } + } + + if (context->radeon.radeonScreen->chip_family < CHIP_FAMILY_RV770) + { + vp->r700AsmCode.bR6xx = 1; + } + //Init_Program Init_r700_AssemblerBase(SPT_VP, &(vp->r700AsmCode), &(vp->r700Shader) ); Map_Vertex_Program( vp, mesa_vp ); if(GL_FALSE == Find_Instruction_Dependencies_vp(vp, mesa_vp)) { - return GL_FALSE; + return NULL; } if(GL_FALSE == AssembleInstr(mesa_vp->Base.NumInstructions, &(mesa_vp->Base.Instructions[0]), &(vp->r700AsmCode)) ) { - return GL_FALSE; + return NULL; } if(GL_FALSE == Process_Vertex_Exports(&(vp->r700AsmCode), mesa_vp->Base.OutputsWritten) ) { - return GL_FALSE; + return NULL; } vp->r700Shader.nRegs = (vp->r700AsmCode.number_used_registers == 0) ? 0 @@ -289,72 +316,82 @@ GLboolean r700TranslateVertexShader(struct r700_vertex_program *vp, vp->translated = GL_TRUE; - return GL_TRUE; + return vp; } void r700SelectVertexShader(GLcontext *ctx) { context_t *context = R700_CONTEXT(ctx); - struct r700_vertex_program *vpc - = (struct r700_vertex_program *)ctx->VertexProgram._Current; + struct r700_vertex_program_cont *vpc; + struct r700_vertex_program *vp; TNLcontext *tnl = TNL_CONTEXT(ctx); struct vertex_buffer *vb = &tnl->vb; unsigned int unBit; unsigned int i; + GLboolean match; + vpc = (struct r700_vertex_program_cont *)ctx->VertexProgram._Current; + +#if 0 if (context->radeon.NewGLState & (_NEW_PROGRAM_CONSTANTS|_NEW_PROGRAM)) { vpc->needUpdateVF = 1; } +#endif - if (context->radeon.radeonScreen->chip_family < CHIP_FAMILY_RV770) + for (vp = vpc->progs; vp; vp = vp->next) { - vpc->r700AsmCode.bR6xx = 1; - } - + match = GL_TRUE; for(i=0; imesa_program.Base.InputsRead & unBit) /* ctx->Array.ArrayObj->xxxxxxx */ + if(vpc->mesa_program.Base.InputsRead & unBit) { - vpc->aos_desc[i].size = vb->AttribPtr[i]->size; - vpc->aos_desc[i].stride = vb->AttribPtr[i]->size * sizeof(GL_FLOAT);/* when emit array, data is packed. vb->AttribPtr[i]->stride;*/ - vpc->aos_desc[i].type = GL_FLOAT; + if (vp->aos_desc[i].size != vb->AttribPtr[i]->size) + match = GL_FALSE; + break; } } - - if(GL_FALSE == vpc->translated) { - r700TranslateVertexShader(vpc, &(vpc->mesa_program) ); + if (match) + { + context->selected_vp = vp; + return; } + } + + vp = r700TranslateVertexShader(ctx, &(vpc->mesa_program) ); + if(!vp) + { + radeon_error("Failed to translate vertex shader. \n"); + return; + } + vp->next = vpc->progs; + vpc->progs = vp; + context->selected_vp = vp; + return; } void * r700GetActiveVpShaderBo(GLcontext * ctx) { - struct r700_vertex_program *vp - = (struct r700_vertex_program *)ctx->VertexProgram._Current; + context_t *context = R700_CONTEXT(ctx); + struct r700_vertex_program *vp = context->selected_vp;; - return vp->shaderbo; + if (vp) + return vp->shaderbo; + else + return NULL; } GLboolean r700SetupVertexProgram(GLcontext * ctx) { context_t *context = R700_CONTEXT(ctx); R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&context->hw); - struct r700_vertex_program *vp - = (struct r700_vertex_program *)ctx->VertexProgram._Current; + struct r700_vertex_program *vp = context->selected_vp; struct gl_program_parameter_list *paramList; unsigned int unNumParamData; unsigned int ui; - if (vp->needUpdateVF) - { - vp->loaded = GL_FALSE; - vp->r700Shader.bNeedsAssembly = GL_TRUE; - Process_Vertex_Program_Vfetch_Instructions(vp, &(vp->mesa_program)); - r600DeleteShader(ctx, vp->shaderbo); - } - if(GL_FALSE == vp->loaded) { if(vp->r700Shader.bNeedsAssembly == GL_TRUE) @@ -410,7 +447,7 @@ GLboolean r700SetupVertexProgram(GLcontext * ctx) */ /* sent out shader constants. */ - paramList = vp->mesa_program.Base.Parameters; + paramList = vp->mesa_program->Base.Parameters; if(NULL != paramList) { _mesa_load_state_parameters(ctx, paramList); diff --git a/src/mesa/drivers/dri/r600/r700_vertprog.h b/src/mesa/drivers/dri/r600/r700_vertprog.h index e2e65021fd3..c48764c43ba 100644 --- a/src/mesa/drivers/dri/r600/r700_vertprog.h +++ b/src/mesa/drivers/dri/r600/r700_vertprog.h @@ -43,7 +43,7 @@ typedef struct ArrayDesc //TEMP struct r700_vertex_program { - struct gl_vertex_program mesa_program; /* Must be first */ + struct gl_vertex_program *mesa_program; /* Must be first */ struct r700_vertex_program *next; @@ -59,6 +59,13 @@ struct r700_vertex_program ArrayDesc aos_desc[VERT_ATTRIB_MAX]; }; +struct r700_vertex_program_cont +{ + struct gl_vertex_program mesa_program; + + struct r700_vertex_program *progs; +}; + //Internal unsigned int Map_Vertex_Output(r700_AssemblerBase *pAsm, struct gl_vertex_program *mesa_vp, @@ -74,7 +81,7 @@ void Map_Vertex_Program(struct r700_vertex_program *vp, GLboolean Find_Instruction_Dependencies_vp(struct r700_vertex_program *vp, struct gl_vertex_program *mesa_vp); -GLboolean r700TranslateVertexShader(struct r700_vertex_program *vp, +struct r700_vertex_program* r700TranslateVertexShader(GLcontext *ctx, struct gl_vertex_program *mesa_vp); /* Interface */ -- cgit v1.2.3 From 7f5a958c80f0fcd7681d515fd1c1b8bc00524a7a Mon Sep 17 00:00:00 2001 From: Andre Maasikas Date: Fri, 11 Sep 2009 15:59:55 -0400 Subject: r600: fix texcoords from constants with some minor updates from Richard. --- src/mesa/drivers/dri/r600/r700_assembler.c | 92 +++++++++++++++++------------- 1 file changed, 52 insertions(+), 40 deletions(-) (limited to 'src/mesa/drivers/dri') diff --git a/src/mesa/drivers/dri/r600/r700_assembler.c b/src/mesa/drivers/dri/r600/r700_assembler.c index 2d8480daaf7..fda66927253 100644 --- a/src/mesa/drivers/dri/r600/r700_assembler.c +++ b/src/mesa/drivers/dri/r600/r700_assembler.c @@ -1149,41 +1149,49 @@ GLboolean tex_dst(r700_AssemblerBase *pAsm) GLboolean tex_src(r700_AssemblerBase *pAsm) { struct prog_instruction *pILInst = &(pAsm->pILInst[pAsm->uiCurInst]); - + GLboolean bValidTexCoord = GL_FALSE; - switch (pILInst->SrcReg[0].File) - { + switch (pILInst->SrcReg[0].File) { + case PROGRAM_CONSTANT: + case PROGRAM_LOCAL_PARAM: + case PROGRAM_ENV_PARAM: + case PROGRAM_STATE_VAR: + bValidTexCoord = GL_TRUE; + setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); + pAsm->S[0].src.rtype = SRC_REG_TEMPORARY; + pAsm->S[0].src.reg = pAsm->aArgSubst[1]; + break; case PROGRAM_TEMPORARY: - bValidTexCoord = GL_TRUE; - - pAsm->S[0].src.reg = pILInst->SrcReg[0].Index + pAsm->starting_temp_register_number; - pAsm->S[0].src.rtype = SRC_REG_TEMPORARY; - - break; + bValidTexCoord = GL_TRUE; + pAsm->S[0].src.reg = pILInst->SrcReg[0].Index + + pAsm->starting_temp_register_number; + pAsm->S[0].src.rtype = SRC_REG_TEMPORARY; + break; case PROGRAM_INPUT: - switch (pILInst->SrcReg[0].Index) - { - case FRAG_ATTRIB_COL0: - case FRAG_ATTRIB_COL1: - case FRAG_ATTRIB_TEX0: - case FRAG_ATTRIB_TEX1: - case FRAG_ATTRIB_TEX2: - case FRAG_ATTRIB_TEX3: - case FRAG_ATTRIB_TEX4: - case FRAG_ATTRIB_TEX5: - case FRAG_ATTRIB_TEX6: - case FRAG_ATTRIB_TEX7: - bValidTexCoord = GL_TRUE; - - pAsm->S[0].src.reg = pAsm->uiFP_AttributeMap[pILInst->SrcReg[0].Index]; - pAsm->S[0].src.rtype = SRC_REG_INPUT; - } - break; + switch (pILInst->SrcReg[0].Index) + { + case FRAG_ATTRIB_COL0: + case FRAG_ATTRIB_COL1: + case FRAG_ATTRIB_TEX0: + case FRAG_ATTRIB_TEX1: + case FRAG_ATTRIB_TEX2: + case FRAG_ATTRIB_TEX3: + case FRAG_ATTRIB_TEX4: + case FRAG_ATTRIB_TEX5: + case FRAG_ATTRIB_TEX6: + case FRAG_ATTRIB_TEX7: + bValidTexCoord = GL_TRUE; + pAsm->S[0].src.reg = + pAsm->uiFP_AttributeMap[pILInst->SrcReg[0].Index]; + pAsm->S[0].src.rtype = SRC_REG_INPUT; + break; + } + break; } if(GL_TRUE == bValidTexCoord) - { + { setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); } else @@ -1201,7 +1209,7 @@ GLboolean tex_src(r700_AssemblerBase *pAsm) pAsm->S[0].src.negy = (pILInst->SrcReg[0].Negate >> 1) & 0x1; pAsm->S[0].src.negz = (pILInst->SrcReg[0].Negate >> 2) & 0x1; pAsm->S[0].src.negw = (pILInst->SrcReg[0].Negate >> 3) & 0x1; - + return GL_TRUE; } @@ -2202,7 +2210,9 @@ GLboolean next_ins(r700_AssemblerBase *pAsm) { struct prog_instruction *pILInst = &(pAsm->pILInst[pAsm->uiCurInst]); - if( GL_TRUE == IsTex(pILInst->Opcode) ) + if( GL_TRUE == IsTex(pILInst->Opcode) && + /* handle const moves to temp register */ + !(pAsm->D.dst.opcode == SQ_OP2_INST_MOV) ) { if (pILInst->TexSrcTarget == TEXTURE_RECT_INDEX) { if( GL_FALSE == assemble_tex_instruction(pAsm, GL_FALSE) ) @@ -3374,28 +3384,30 @@ GLboolean assemble_TEX(r700_AssemblerBase *pAsm) case PROGRAM_ENV_PARAM: case PROGRAM_STATE_VAR: src_const = GL_TRUE; + break; case PROGRAM_TEMPORARY: case PROGRAM_INPUT: src_const = GL_FALSE; + break; } - if (GL_TRUE == src_const) + if (GL_TRUE == src_const) { - radeon_error("TODO: Texture coordinates from a constant register not supported.\n"); - return GL_FALSE; + if ( GL_FALSE == mov_temp(pAsm, 0) ) + return GL_FALSE; } - switch (pAsm->pILInst[pAsm->uiCurInst].Opcode) + switch (pAsm->pILInst[pAsm->uiCurInst].Opcode) { case OPCODE_TEX: - pAsm->D.dst.opcode = SQ_TEX_INST_SAMPLE; + pAsm->D.dst.opcode = SQ_TEX_INST_SAMPLE; break; - case OPCODE_TXB: + case OPCODE_TXB: radeon_error("do not support TXB yet\n"); return GL_FALSE; break; - case OPCODE_TXP: - /* TODO : tex proj version : divid first 3 components by 4th */ + case OPCODE_TXP: + /* TODO : tex proj version : divid first 3 components by 4th */ pAsm->D.dst.opcode = SQ_TEX_INST_SAMPLE; break; default: @@ -3418,13 +3430,13 @@ GLboolean assemble_TEX(r700_AssemblerBase *pAsm) { return GL_FALSE; } - + if( GL_FALSE == tex_src(pAsm) ) { return GL_FALSE; } - if ( GL_FALSE == next_ins(pAsm) ) + if ( GL_FALSE == next_ins(pAsm) ) { return GL_FALSE; } -- cgit v1.2.3 From 93a7ea6ba0d5700e18b28c23da226e055f7c2fa1 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Mon, 14 Sep 2009 17:08:26 -0400 Subject: r600: fix warning Noticed by rnoland on IRC. --- src/mesa/drivers/dri/r600/r700_assembler.c | 1 + 1 file changed, 1 insertion(+) (limited to 'src/mesa/drivers/dri') diff --git a/src/mesa/drivers/dri/r600/r700_assembler.c b/src/mesa/drivers/dri/r600/r700_assembler.c index fda66927253..efeccb25f1e 100644 --- a/src/mesa/drivers/dri/r600/r700_assembler.c +++ b/src/mesa/drivers/dri/r600/r700_assembler.c @@ -3387,6 +3387,7 @@ GLboolean assemble_TEX(r700_AssemblerBase *pAsm) break; case PROGRAM_TEMPORARY: case PROGRAM_INPUT: + default: src_const = GL_FALSE; break; } -- cgit v1.2.3 From 9437ac9bccd294bd5a8b838e7ca7597e5dc6d5b0 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Mon, 14 Sep 2009 18:05:15 -0400 Subject: r600: add span support for 1D tiles 1D tile span support for depth/stencil/color/textures Z and stencil buffers are always tiled, so this fixes sw access to Z and stencil buffers. color and textures are currently linear, but this adds span support when we implement 1D tiling. This fixes the text in progs/demos/engine and progs/tests/z* --- src/mesa/drivers/dri/r600/r600_reg_auto_r6xx.h | 2 + src/mesa/drivers/dri/r600/r700_chip.c | 2 +- src/mesa/drivers/dri/radeon/radeon_span.c | 220 +++++++++++++++++++++++++ 3 files changed, 223 insertions(+), 1 deletion(-) (limited to 'src/mesa/drivers/dri') diff --git a/src/mesa/drivers/dri/r600/r600_reg_auto_r6xx.h b/src/mesa/drivers/dri/r600/r600_reg_auto_r6xx.h index 9d5aa3c7e49..edd85b0facc 100644 --- a/src/mesa/drivers/dri/r600/r600_reg_auto_r6xx.h +++ b/src/mesa/drivers/dri/r600/r600_reg_auto_r6xx.h @@ -1366,6 +1366,7 @@ enum { DB_DEPTH_INFO__READ_SIZE_bit = 1 << 3, DB_DEPTH_INFO__ARRAY_MODE_mask = 0x0f << 15, DB_DEPTH_INFO__ARRAY_MODE_shift = 15, + ARRAY_1D_TILED_THIN1 = 0x02, ARRAY_2D_TILED_THIN1 = 0x04, TILE_SURFACE_ENABLE_bit = 1 << 25, TILE_COMPACT_bit = 1 << 26, @@ -1449,6 +1450,7 @@ enum { CB_COLOR0_INFO__ARRAY_MODE_shift = 8, ARRAY_LINEAR_GENERAL = 0x00, ARRAY_LINEAR_ALIGNED = 0x01, +/* ARRAY_1D_TILED_THIN1 = 0x02, */ /* ARRAY_2D_TILED_THIN1 = 0x04, */ NUMBER_TYPE_mask = 0x07 << 12, NUMBER_TYPE_shift = 12, diff --git a/src/mesa/drivers/dri/r600/r700_chip.c b/src/mesa/drivers/dri/r600/r700_chip.c index 1b560591974..06d7e9c9ab1 100644 --- a/src/mesa/drivers/dri/r600/r700_chip.c +++ b/src/mesa/drivers/dri/r600/r700_chip.c @@ -351,7 +351,7 @@ static void r700SetDepthTarget(context_t *context) SETfield(r700->DB_DEPTH_INFO.u32All, DEPTH_16, DB_DEPTH_INFO__FORMAT_shift, DB_DEPTH_INFO__FORMAT_mask); } - SETfield(r700->DB_DEPTH_INFO.u32All, ARRAY_2D_TILED_THIN1, + SETfield(r700->DB_DEPTH_INFO.u32All, ARRAY_1D_TILED_THIN1, DB_DEPTH_INFO__ARRAY_MODE_shift, DB_DEPTH_INFO__ARRAY_MODE_mask); /* r700->DB_PREFETCH_LIMIT.bits.DEPTH_HEIGHT_TILE_MAX = (context->currentDraw->h >> 3) - 1; */ /* z buffer sie may much bigger than what need, so use actual used h. */ } diff --git a/src/mesa/drivers/dri/radeon/radeon_span.c b/src/mesa/drivers/dri/radeon/radeon_span.c index 4e100d854ed..aa2035338c2 100644 --- a/src/mesa/drivers/dri/radeon/radeon_span.c +++ b/src/mesa/drivers/dri/radeon/radeon_span.c @@ -106,6 +106,142 @@ static GLubyte *r200_depth_4byte(const struct radeon_renderbuffer * rrb, } #endif +/* r600 tiling + * two main types: + * - 1D (akin to macro-linear/micro-tiled on older asics) + * - 2D (akin to macro-tiled/micro-tiled on older asics) + * only 1D tiling is implemented below + */ +#if defined(RADEON_COMMON_FOR_R600) +static GLint r600_1d_tile_helper(const struct radeon_renderbuffer * rrb, + GLint x, GLint y, GLint is_depth, GLint is_stencil) +{ + GLint element_bytes = rrb->cpp; + GLint num_samples = 1; + GLint tile_width = 8; + GLint tile_height = 8; + GLint tile_thickness = 1; + GLint pitch_elements = rrb->pitch / element_bytes; + GLint height = rrb->base.Height; + GLint z = 0; + GLint sample_number = 0; + /* */ + GLint tile_bytes; + GLint tiles_per_row; + GLint tiles_per_slice; + GLint slice_offset; + GLint tile_row_index; + GLint tile_column_index; + GLint tile_offset; + GLint pixel_number = 0; + GLint element_offset; + GLint offset = 0; + + tile_bytes = tile_width * tile_height * tile_thickness * element_bytes * num_samples; + tiles_per_row = pitch_elements /tile_width; + tiles_per_slice = tiles_per_row * (height / tile_height); + slice_offset = (z / tile_thickness) * tiles_per_slice * tile_bytes; + tile_row_index = y / tile_height; + tile_column_index = x / tile_width; + tile_offset = ((tile_row_index * tiles_per_row) + tile_column_index) * tile_bytes; + + if (is_depth) { + GLint pixel_offset = 0; + + pixel_number |= ((x >> 0) & 1) << 0; // pn[0] = x[0] + pixel_number |= ((y >> 0) & 1) << 1; // pn[1] = y[0] + pixel_number |= ((x >> 1) & 1) << 2; // pn[2] = x[1] + pixel_number |= ((y >> 1) & 1) << 3; // pn[3] = y[1] + pixel_number |= ((x >> 2) & 1) << 4; // pn[4] = x[2] + pixel_number |= ((y >> 2) & 1) << 5; // pn[5] = y[2] + switch (element_bytes) { + case 2: + pixel_offset = pixel_number * element_bytes * num_samples; + element_offset = pixel_offset + (sample_number * element_bytes); + break; + case 4: + /* stencil and depth data are stored separately within a tile. + * stencil is stored in a contiguous tile before the depth tile. + * stencil element is 1 byte, depth element is 3 bytes. + * stencil tile is 64 bytes. + */ + if (is_stencil) + pixel_offset = pixel_number * 1 * num_samples; + else + pixel_offset = (pixel_number * 3 * num_samples) + 64; + break; + } + element_offset = pixel_offset + (sample_number * element_bytes); + } else { + GLint sample_offset; + + switch (element_bytes) { + case 1: + pixel_number |= ((x >> 0) & 1) << 0; // pn[0] = x[0] + pixel_number |= ((x >> 1) & 1) << 1; // pn[1] = x[1] + pixel_number |= ((x >> 2) & 1) << 2; // pn[2] = x[2] + pixel_number |= ((y >> 1) & 1) << 3; // pn[3] = y[1] + pixel_number |= ((y >> 0) & 1) << 4; // pn[4] = y[0] + pixel_number |= ((y >> 2) & 1) << 5; // pn[5] = y[2] + break; + case 2: + pixel_number |= ((x >> 0) & 1) << 0; // pn[0] = x[0] + pixel_number |= ((x >> 1) & 1) << 1; // pn[1] = x[1] + pixel_number |= ((x >> 2) & 1) << 2; // pn[2] = x[2] + pixel_number |= ((y >> 0) & 1) << 3; // pn[3] = y[0] + pixel_number |= ((y >> 1) & 1) << 4; // pn[4] = y[1] + pixel_number |= ((y >> 2) & 1) << 5; // pn[5] = y[2] + break; + case 4: + pixel_number |= ((x >> 0) & 1) << 0; // pn[0] = x[0] + pixel_number |= ((x >> 1) & 1) << 1; // pn[1] = x[1] + pixel_number |= ((y >> 0) & 1) << 2; // pn[2] = y[0] + pixel_number |= ((x >> 2) & 1) << 3; // pn[3] = x[2] + pixel_number |= ((y >> 1) & 1) << 4; // pn[4] = y[1] + pixel_number |= ((y >> 2) & 1) << 5; // pn[5] = y[2] + break; + } + sample_offset = sample_number * (tile_bytes / num_samples); + element_offset = sample_offset + (pixel_number * element_bytes); + } + offset = slice_offset + tile_offset + element_offset; + return offset; +} + +/* depth buffers */ +static GLubyte *r600_ptr_depth(const struct radeon_renderbuffer * rrb, + GLint x, GLint y) +{ + GLubyte *ptr = rrb->bo->ptr; + GLint offset = r600_1d_tile_helper(rrb, x, y, 1, 0); + return &ptr[offset]; +} + +static GLubyte *r600_ptr_stencil(const struct radeon_renderbuffer * rrb, + GLint x, GLint y) +{ + GLubyte *ptr = rrb->bo->ptr; + GLint offset = r600_1d_tile_helper(rrb, x, y, 1, 1); + return &ptr[offset]; +} + +static GLubyte *r600_ptr_color(const struct radeon_renderbuffer * rrb, + GLint x, GLint y) +{ + GLubyte *ptr = rrb->bo->ptr; + uint32_t mask = RADEON_BO_FLAGS_MACRO_TILE | RADEON_BO_FLAGS_MICRO_TILE; + GLint offset; + + if (rrb->has_surface || !(rrb->bo->flags & mask)) { + offset = x * rrb->cpp + y * rrb->pitch; + } else { + offset = r600_1d_tile_helper(rrb, x, y, 0, 0); + } + return &ptr[offset]; +} + +#endif + /* radeon tiling on r300-r500 has 4 states, macro-linear/micro-linear macro-linear/micro-tiled @@ -270,7 +406,11 @@ s8z24_to_z24s8(uint32_t val) #define TAG(x) radeon##x##_RGB565 #define TAG2(x,y) radeon##x##_RGB565##y +#if defined(RADEON_COMMON_FOR_R600) +#define GET_PTR(X,Y) r600_ptr_color(rrb, (X) + x_off, (Y) + y_off) +#else #define GET_PTR(X,Y) radeon_ptr_2byte_8x2(rrb, (X) + x_off, (Y) + y_off) +#endif #include "spantmp2.h" /* 16 bit, ARGB1555 color spanline and pixel functions @@ -280,7 +420,11 @@ s8z24_to_z24s8(uint32_t val) #define TAG(x) radeon##x##_ARGB1555 #define TAG2(x,y) radeon##x##_ARGB1555##y +#if defined(RADEON_COMMON_FOR_R600) +#define GET_PTR(X,Y) r600_ptr_color(rrb, (X) + x_off, (Y) + y_off) +#else #define GET_PTR(X,Y) radeon_ptr_2byte_8x2(rrb, (X) + x_off, (Y) + y_off) +#endif #include "spantmp2.h" /* 16 bit, RGBA4 color spanline and pixel functions @@ -290,7 +434,11 @@ s8z24_to_z24s8(uint32_t val) #define TAG(x) radeon##x##_ARGB4444 #define TAG2(x,y) radeon##x##_ARGB4444##y +#if defined(RADEON_COMMON_FOR_R600) +#define GET_PTR(X,Y) r600_ptr_color(rrb, (X) + x_off, (Y) + y_off) +#else #define GET_PTR(X,Y) radeon_ptr_2byte_8x2(rrb, (X) + x_off, (Y) + y_off) +#endif #include "spantmp2.h" /* 32 bit, xRGB8888 color spanline and pixel functions @@ -300,11 +448,19 @@ s8z24_to_z24s8(uint32_t val) #define TAG(x) radeon##x##_xRGB8888 #define TAG2(x,y) radeon##x##_xRGB8888##y +#if defined(RADEON_COMMON_FOR_R600) +#define GET_VALUE(_x, _y) ((*(GLuint*)(r600_ptr_color(rrb, _x + x_off, _y + y_off)) | 0xff000000)) +#define PUT_VALUE(_x, _y, d) { \ + GLuint *_ptr = (GLuint*)r600_ptr_color( rrb, _x + x_off, _y + y_off ); \ + *_ptr = d; \ +} while (0) +#else #define GET_VALUE(_x, _y) ((*(GLuint*)(radeon_ptr_4byte(rrb, _x + x_off, _y + y_off)) | 0xff000000)) #define PUT_VALUE(_x, _y, d) { \ GLuint *_ptr = (GLuint*)radeon_ptr_4byte( rrb, _x + x_off, _y + y_off ); \ *_ptr = d; \ } while (0) +#endif #include "spantmp2.h" /* 32 bit, ARGB8888 color spanline and pixel functions @@ -314,11 +470,19 @@ s8z24_to_z24s8(uint32_t val) #define TAG(x) radeon##x##_ARGB8888 #define TAG2(x,y) radeon##x##_ARGB8888##y +#if defined(RADEON_COMMON_FOR_R600) +#define GET_VALUE(_x, _y) (*(GLuint*)(r600_ptr_color(rrb, _x + x_off, _y + y_off))) +#define PUT_VALUE(_x, _y, d) { \ + GLuint *_ptr = (GLuint*)r600_ptr_color( rrb, _x + x_off, _y + y_off ); \ + *_ptr = d; \ +} while (0) +#else #define GET_VALUE(_x, _y) (*(GLuint*)(radeon_ptr_4byte(rrb, _x + x_off, _y + y_off))) #define PUT_VALUE(_x, _y, d) { \ GLuint *_ptr = (GLuint*)radeon_ptr_4byte( rrb, _x + x_off, _y + y_off ); \ *_ptr = d; \ } while (0) +#endif #include "spantmp2.h" /* ================================================================ @@ -342,6 +506,9 @@ s8z24_to_z24s8(uint32_t val) #if defined(RADEON_COMMON_FOR_R200) #define WRITE_DEPTH( _x, _y, d ) \ *(GLushort *)r200_depth_2byte(rrb, _x + x_off, _y + y_off) = d +#elif defined(RADEON_COMMON_FOR_R600) +#define WRITE_DEPTH( _x, _y, d ) \ + *(GLushort *)r600_ptr_depth(rrb, _x + x_off, _y + y_off) = d #else #define WRITE_DEPTH( _x, _y, d ) \ *(GLushort *)radeon_ptr_2byte_8x2(rrb, _x + x_off, _y + y_off) = d @@ -350,6 +517,9 @@ s8z24_to_z24s8(uint32_t val) #if defined(RADEON_COMMON_FOR_R200) #define READ_DEPTH( d, _x, _y ) \ d = *(GLushort *)r200_depth_2byte(rrb, _x + x_off, _y + y_off) +#elif defined(RADEON_COMMON_FOR_R600) +#define READ_DEPTH( d, _x, _y ) \ + d = *(GLushort *)r600_ptr_depth(rrb, _x + x_off, _y + y_off) #else #define READ_DEPTH( d, _x, _y ) \ d = *(GLushort *)radeon_ptr_2byte_8x2(rrb, _x + x_off, _y + y_off) @@ -374,6 +544,15 @@ do { \ tmp |= ((d << 8) & 0xffffff00); \ *_ptr = tmp; \ } while (0) +#elif defined(RADEON_COMMON_FOR_R600) +#define WRITE_DEPTH( _x, _y, d ) \ +do { \ + GLuint *_ptr = (GLuint*)r600_ptr_depth( rrb, _x + x_off, _y + y_off ); \ + GLuint tmp = *_ptr; \ + tmp &= 0xff000000; \ + tmp |= ((d) & 0x00ffffff); \ + *_ptr = tmp; \ +} while (0) #elif defined(RADEON_COMMON_FOR_R200) #define WRITE_DEPTH( _x, _y, d ) \ do { \ @@ -399,6 +578,11 @@ do { \ do { \ d = (*(GLuint*)(radeon_ptr_4byte(rrb, _x + x_off, _y + y_off)) & 0xffffff00) >> 8; \ }while(0) +#elif defined(RADEON_COMMON_FOR_R600) +#define READ_DEPTH( d, _x, _y ) \ + do { \ + d = (*(GLuint*)(r600_ptr_depth(rrb, _x + x_off, _y + y_off)) & 0x00ffffff); \ + }while(0) #elif defined(RADEON_COMMON_FOR_R200) #define READ_DEPTH( d, _x, _y ) \ do { \ @@ -426,6 +610,20 @@ do { \ GLuint *_ptr = (GLuint*)radeon_ptr_4byte( rrb, _x + x_off, _y + y_off ); \ *_ptr = d; \ } while (0) +#elif defined(RADEON_COMMON_FOR_R600) +#define WRITE_DEPTH( _x, _y, d ) \ +do { \ + GLuint *_ptr = (GLuint*)r600_ptr_depth( rrb, _x + x_off, _y + y_off ); \ + GLuint tmp = *_ptr; \ + tmp &= 0xff000000; \ + tmp |= (((d) >> 8) & 0x00ffffff); \ + *_ptr = tmp; \ + _ptr = (GLuint*)r600_ptr_stencil(rrb, _x + x_off, _y + y_off); \ + tmp = *_ptr; \ + tmp &= 0xffffff00; \ + tmp |= (d) & 0xff; \ + *_ptr = tmp; \ +} while (0) #elif defined(RADEON_COMMON_FOR_R200) #define WRITE_DEPTH( _x, _y, d ) \ do { \ @@ -447,6 +645,12 @@ do { \ do { \ d = (*(GLuint*)(radeon_ptr_4byte(rrb, _x + x_off, _y + y_off))); \ }while(0) +#elif defined(RADEON_COMMON_FOR_R600) +#define READ_DEPTH( d, _x, _y ) \ + do { \ + d = ((*(GLuint*)(r600_ptr_depth(rrb, _x + x_off, _y + y_off))) << 8) & 0xffffff00; \ + d |= (*(GLuint*)(r600_ptr_stencil(rrb, _x + x_off, _y + y_off))) & 0x000000ff; \ + }while(0) #elif defined(RADEON_COMMON_FOR_R200) #define READ_DEPTH( d, _x, _y ) \ do { \ @@ -476,6 +680,15 @@ do { \ tmp |= (d) & 0xff; \ *_ptr = tmp; \ } while (0) +#elif defined(RADEON_COMMON_FOR_R600) +#define WRITE_STENCIL( _x, _y, d ) \ +do { \ + GLuint *_ptr = (GLuint*)r600_ptr_stencil(rrb, _x + x_off, _y + y_off); \ + GLuint tmp = *_ptr; \ + tmp &= 0xffffff00; \ + tmp |= (d) & 0xff; \ + *_ptr = tmp; \ +} while (0) #elif defined(RADEON_COMMON_FOR_R200) #define WRITE_STENCIL( _x, _y, d ) \ do { \ @@ -503,6 +716,13 @@ do { \ GLuint tmp = *_ptr; \ d = tmp & 0x000000ff; \ } while (0) +#elif defined(RADEON_COMMON_FOR_R600) +#define READ_STENCIL( d, _x, _y ) \ +do { \ + GLuint *_ptr = (GLuint*)r600_ptr_stencil( rrb, _x + x_off, _y + y_off ); \ + GLuint tmp = *_ptr; \ + d = tmp & 0x000000ff; \ +} while (0) #elif defined(RADEON_COMMON_FOR_R200) #define READ_STENCIL( d, _x, _y ) \ do { \ -- cgit v1.2.3 From 2cd2dc34ac93dd929ec8f01cf1f7f8dfa6b34d0d Mon Sep 17 00:00:00 2001 From: Andre Maasikas Date: Tue, 15 Sep 2009 11:27:51 -0400 Subject: r600: support position_invariant programs --- src/mesa/drivers/dri/r600/r700_vertprog.c | 30 ++++++++++++++++++------------ 1 file changed, 18 insertions(+), 12 deletions(-) (limited to 'src/mesa/drivers/dri') diff --git a/src/mesa/drivers/dri/r600/r700_vertprog.c b/src/mesa/drivers/dri/r600/r700_vertprog.c index 8c2b0071df9..9ee26286d9b 100644 --- a/src/mesa/drivers/dri/r600/r700_vertprog.c +++ b/src/mesa/drivers/dri/r600/r700_vertprog.c @@ -42,6 +42,7 @@ #include "radeon_debug.h" #include "r600_context.h" #include "r600_cmdbuf.h" +#include "shader/programopt.c" #include "r700_debug.h" #include "r700_vertprog.h" @@ -272,6 +273,11 @@ struct r700_vertex_program* r700TranslateVertexShader(GLcontext *ctx, vp = _mesa_calloc(sizeof(*vp)); vp->mesa_program = (struct gl_vertex_program *)_mesa_clone_program(ctx, &mesa_vp->Base); + if (mesa_vp->IsPositionInvariant) + { + _mesa_insert_mvp_code(ctx, vp->mesa_program); + } + for(i=0; ir700AsmCode), &(vp->r700Shader) ); - Map_Vertex_Program( vp, mesa_vp ); + Map_Vertex_Program( vp, vp->mesa_program ); - if(GL_FALSE == Find_Instruction_Dependencies_vp(vp, mesa_vp)) + if(GL_FALSE == Find_Instruction_Dependencies_vp(vp, vp->mesa_program)) { return NULL; } - if(GL_FALSE == AssembleInstr(mesa_vp->Base.NumInstructions, - &(mesa_vp->Base.Instructions[0]), + if(GL_FALSE == AssembleInstr(vp->mesa_program->Base.NumInstructions, + &(vp->mesa_program->Base.Instructions[0]), &(vp->r700AsmCode)) ) { return NULL; } - if(GL_FALSE == Process_Vertex_Exports(&(vp->r700AsmCode), mesa_vp->Base.OutputsWritten) ) + if(GL_FALSE == Process_Vertex_Exports(&(vp->r700AsmCode), vp->mesa_program->Base.OutputsWritten) ) { return NULL; } @@ -329,23 +335,23 @@ void r700SelectVertexShader(GLcontext *ctx) unsigned int unBit; unsigned int i; GLboolean match; + GLbitfield InputsRead; vpc = (struct r700_vertex_program_cont *)ctx->VertexProgram._Current; -#if 0 - if (context->radeon.NewGLState & (_NEW_PROGRAM_CONSTANTS|_NEW_PROGRAM)) + InputsRead = vpc->mesa_program.Base.InputsRead; + if (vpc->mesa_program.IsPositionInvariant) { - vpc->needUpdateVF = 1; - } -#endif - + InputsRead |= VERT_BIT_POS; + } + for (vp = vpc->progs; vp; vp = vp->next) { match = GL_TRUE; for(i=0; imesa_program.Base.InputsRead & unBit) + if(InputsRead & unBit) { if (vp->aos_desc[i].size != vb->AttribPtr[i]->size) match = GL_FALSE; -- cgit v1.2.3 From dbec27be856584bc5205c7eeeca2b7e98299d4cb Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 15 Sep 2009 16:58:37 -0400 Subject: r600: minor span cleanups --- src/mesa/drivers/dri/radeon/radeon_span.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'src/mesa/drivers/dri') diff --git a/src/mesa/drivers/dri/radeon/radeon_span.c b/src/mesa/drivers/dri/radeon/radeon_span.c index aa2035338c2..9959da011e4 100644 --- a/src/mesa/drivers/dri/radeon/radeon_span.c +++ b/src/mesa/drivers/dri/radeon/radeon_span.c @@ -113,8 +113,8 @@ static GLubyte *r200_depth_4byte(const struct radeon_renderbuffer * rrb, * only 1D tiling is implemented below */ #if defined(RADEON_COMMON_FOR_R600) -static GLint r600_1d_tile_helper(const struct radeon_renderbuffer * rrb, - GLint x, GLint y, GLint is_depth, GLint is_stencil) +static inline GLint r600_1d_tile_helper(const struct radeon_renderbuffer * rrb, + GLint x, GLint y, GLint is_depth, GLint is_stencil) { GLint element_bytes = rrb->cpp; GLint num_samples = 1; @@ -138,7 +138,7 @@ static GLint r600_1d_tile_helper(const struct radeon_renderbuffer * rrb, GLint offset = 0; tile_bytes = tile_width * tile_height * tile_thickness * element_bytes * num_samples; - tiles_per_row = pitch_elements /tile_width; + tiles_per_row = pitch_elements / tile_width; tiles_per_slice = tiles_per_row * (height / tile_height); slice_offset = (z / tile_thickness) * tiles_per_slice * tile_bytes; tile_row_index = y / tile_height; @@ -157,7 +157,6 @@ static GLint r600_1d_tile_helper(const struct radeon_renderbuffer * rrb, switch (element_bytes) { case 2: pixel_offset = pixel_number * element_bytes * num_samples; - element_offset = pixel_offset + (sample_number * element_bytes); break; case 4: /* stencil and depth data are stored separately within a tile. -- cgit v1.2.3 From ec14d59afa952b4e53ad268971098584686a6fca Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 15 Sep 2009 17:12:03 -0400 Subject: radeon: don't build non-r600 span code on r600 --- src/mesa/drivers/dri/radeon/radeon_span.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'src/mesa/drivers/dri') diff --git a/src/mesa/drivers/dri/radeon/radeon_span.c b/src/mesa/drivers/dri/radeon/radeon_span.c index 9959da011e4..d603f52df7b 100644 --- a/src/mesa/drivers/dri/radeon/radeon_span.c +++ b/src/mesa/drivers/dri/radeon/radeon_span.c @@ -239,7 +239,7 @@ static GLubyte *r600_ptr_color(const struct radeon_renderbuffer * rrb, return &ptr[offset]; } -#endif +#else /* radeon tiling on r300-r500 has 4 states, macro-linear/micro-linear @@ -332,7 +332,10 @@ static GLubyte *radeon_ptr_2byte_8x2(const struct radeon_renderbuffer * rrb, return &ptr[offset]; } +#endif + #ifndef COMPILE_R300 +#ifndef COMPILE_R600 static uint32_t z24s8_to_s8z24(uint32_t val) { @@ -345,6 +348,7 @@ s8z24_to_z24s8(uint32_t val) return (val >> 24) | (val << 8); } #endif +#endif /* * Note that all information needed to access pixels in a renderbuffer -- cgit v1.2.3 From 095db818c6c7ed5706b5f31d17d0cb19c03cb67a Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Sat, 19 Sep 2009 14:46:06 -0400 Subject: r600: fix polygon offset --- src/mesa/drivers/dri/r600/r700_state.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) (limited to 'src/mesa/drivers/dri') diff --git a/src/mesa/drivers/dri/r600/r700_state.c b/src/mesa/drivers/dri/r600/r700_state.c index 2a0b4192562..d8190efe473 100644 --- a/src/mesa/drivers/dri/r600/r700_state.c +++ b/src/mesa/drivers/dri/r600/r700_state.c @@ -1122,20 +1122,25 @@ static void r700PolygonOffset(GLcontext * ctx, GLfloat factor, GLfloat units) // context_t *context = R700_CONTEXT(ctx); R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&context->hw); GLfloat constant = units; + GLchar depth = 0; + + R600_STATECHANGE(context, poly); switch (ctx->Visual.depthBits) { case 16: constant *= 4.0; + depth = -16; break; case 24: constant *= 2.0; + depth = -24; break; } factor *= 12.0; - - R600_STATECHANGE(context, poly); - + SETfield(r700->PA_SU_POLY_OFFSET_DB_FMT_CNTL.u32All, depth, + POLY_OFFSET_NEG_NUM_DB_BITS_shift, POLY_OFFSET_NEG_NUM_DB_BITS_mask); + //r700->PA_SU_POLY_OFFSET_CLAMP.f32All = constant; //??? r700->PA_SU_POLY_OFFSET_FRONT_SCALE.f32All = factor; r700->PA_SU_POLY_OFFSET_FRONT_OFFSET.f32All = constant; r700->PA_SU_POLY_OFFSET_BACK_SCALE.f32All = factor; -- cgit v1.2.3 From 48559c76056e09ca4f9e4f39e9008f6d32ecd5b0 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Sat, 19 Sep 2009 15:18:42 -0400 Subject: r600: fix point sizes registers takes radius --- src/mesa/drivers/dri/r600/r700_state.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'src/mesa/drivers/dri') diff --git a/src/mesa/drivers/dri/r600/r700_state.c b/src/mesa/drivers/dri/r600/r700_state.c index d8190efe473..8571563149e 100644 --- a/src/mesa/drivers/dri/r600/r700_state.c +++ b/src/mesa/drivers/dri/r600/r700_state.c @@ -837,9 +837,9 @@ static void r700PointSize(GLcontext * ctx, GLfloat size) size = CLAMP(size, ctx->Const.MinPointSize, ctx->Const.MaxPointSize); /* format is 12.4 fixed point */ - SETfield(r700->PA_SU_POINT_SIZE.u32All, (int)(size * 16), + SETfield(r700->PA_SU_POINT_SIZE.u32All, (int)(size * 8.0), PA_SU_POINT_SIZE__HEIGHT_shift, PA_SU_POINT_SIZE__HEIGHT_mask); - SETfield(r700->PA_SU_POINT_SIZE.u32All, (int)(size * 16), + SETfield(r700->PA_SU_POINT_SIZE.u32All, (int)(size * 8.0), PA_SU_POINT_SIZE__WIDTH_shift, PA_SU_POINT_SIZE__WIDTH_mask); } @@ -854,11 +854,11 @@ static void r700PointParameter(GLcontext * ctx, GLenum pname, const GLfloat * pa /* format is 12.4 fixed point */ switch (pname) { case GL_POINT_SIZE_MIN: - SETfield(r700->PA_SU_POINT_MINMAX.u32All, (int)(ctx->Point.MinSize * 16.0), + SETfield(r700->PA_SU_POINT_MINMAX.u32All, (int)(ctx->Point.MinSize * 8.0), MIN_SIZE_shift, MIN_SIZE_mask); break; case GL_POINT_SIZE_MAX: - SETfield(r700->PA_SU_POINT_MINMAX.u32All, (int)(ctx->Point.MaxSize * 16.0), + SETfield(r700->PA_SU_POINT_MINMAX.u32All, (int)(ctx->Point.MaxSize * 8.0), MAX_SIZE_shift, MAX_SIZE_mask); break; case GL_POINT_DISTANCE_ATTENUATION: -- cgit v1.2.3 From ed91d103477d563f73be3555d1022ec9af073467 Mon Sep 17 00:00:00 2001 From: Andre Maasikas Date: Mon, 21 Sep 2009 10:14:25 -0400 Subject: r600: fix some issues with LIT instruction - MUL_LIT is ALU.Trans instruction - some Trans instructions can take 3 arguments - don't clobber dst.x, use dst.z as temp, it'll get written correct value in last insn - respect source swizzles --- src/mesa/drivers/dri/r600/r700_assembler.c | 69 ++++++++++++++++-------------- 1 file changed, 36 insertions(+), 33 deletions(-) (limited to 'src/mesa/drivers/dri') diff --git a/src/mesa/drivers/dri/r600/r700_assembler.c b/src/mesa/drivers/dri/r600/r700_assembler.c index efeccb25f1e..f46bc322011 100644 --- a/src/mesa/drivers/dri/r600/r700_assembler.c +++ b/src/mesa/drivers/dri/r600/r700_assembler.c @@ -2024,7 +2024,7 @@ GLboolean assemble_alu_instruction(r700_AssemblerBase *pAsm) return GL_FALSE; } - if (pAsm->D.dst.math == 0) + if (uNumSrc > 1) { // Process source 1 current_source_index = 1; @@ -2880,6 +2880,11 @@ GLboolean assemble_LIT(r700_AssemblerBase *pAsm) return GL_FALSE; } + if( GL_FALSE == assemble_src(pAsm, 0, -1) ) + { + return GL_FALSE; + } + /* dst.y = max(src.x, 0.0) */ pAsm->D.dst.opcode = SQ_OP2_INST_MAX; pAsm->D.dst.rtype = dstType; @@ -2891,11 +2896,6 @@ GLboolean assemble_LIT(r700_AssemblerBase *pAsm) pAsm->S[0].src.rtype = srcType; pAsm->S[0].src.reg = srcReg; setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); - noneg_PVSSRC(&(pAsm->S[0].src)); - pAsm->S[0].src.swizzlex = SQ_SEL_X; - pAsm->S[0].src.swizzley = SQ_SEL_X; - pAsm->S[0].src.swizzlez = SQ_SEL_X; - pAsm->S[0].src.swizzlew = SQ_SEL_X; pAsm->S[1].src.rtype = SRC_REG_TEMPORARY; pAsm->S[1].src.reg = tmp; setaddrmode_PVSSRC(&(pAsm->S[1].src), ADDR_ABSOLUTE); @@ -2909,34 +2909,47 @@ GLboolean assemble_LIT(r700_AssemblerBase *pAsm) return GL_FALSE; } - /* before: dst.w = log(src.y) - * after : dst.x = log(src.y) - * why change dest register is that dst.w has been initialized as 1 before - */ + if( GL_FALSE == assemble_src(pAsm, 0, -1) ) + { + return GL_FALSE; + } + + swizzleagain_PVSSRC(&(pAsm->S[0].src), SQ_SEL_Y, SQ_SEL_Y, SQ_SEL_Y, SQ_SEL_Y); + + /* dst.z = log(src.y) */ pAsm->D.dst.opcode = SQ_OP2_INST_LOG_CLAMPED; pAsm->D.dst.math = 1; pAsm->D.dst.rtype = dstType; pAsm->D.dst.reg = dstReg; - pAsm->D.dst.writex = 1; + pAsm->D.dst.writex = 0; pAsm->D.dst.writey = 0; - pAsm->D.dst.writez = 0; + pAsm->D.dst.writez = 1; pAsm->D.dst.writew = 0; pAsm->S[0].src.rtype = srcType; pAsm->S[0].src.reg = srcReg; setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); - noneg_PVSSRC(&(pAsm->S[0].src)); - pAsm->S[0].src.swizzlex = SQ_SEL_Y; - pAsm->S[0].src.swizzley = SQ_SEL_Y; - pAsm->S[0].src.swizzlez = SQ_SEL_Y; - pAsm->S[0].src.swizzlew = SQ_SEL_Y; if( GL_FALSE == next_ins(pAsm) ) { return GL_FALSE; } - /* before: tmp.x = amd MUL_LIT(src.w, dst.w, src.x ) */ - /* after : tmp.x = amd MUL_LIT(src.w, dst.x, src.x ) */ + if( GL_FALSE == assemble_src(pAsm, 0, -1) ) + { + return GL_FALSE; + } + + if( GL_FALSE == assemble_src(pAsm, 0, 2) ) + { + return GL_FALSE; + } + + swizzleagain_PVSSRC(&(pAsm->S[0].src), SQ_SEL_W, SQ_SEL_W, SQ_SEL_W, SQ_SEL_W); + + swizzleagain_PVSSRC(&(pAsm->S[2].src), SQ_SEL_X, SQ_SEL_X, SQ_SEL_X, SQ_SEL_X); + + /* tmp.x = amd MUL_LIT(src.w, dst.z, src.x ) */ pAsm->D.dst.opcode = SQ_OP3_INST_MUL_LIT; + pAsm->D.dst.math = 1; pAsm->D.dst.op3 = 1; pAsm->D.dst.rtype = DST_REG_TEMPORARY; pAsm->D.dst.reg = tmp; @@ -2948,29 +2961,19 @@ GLboolean assemble_LIT(r700_AssemblerBase *pAsm) pAsm->S[0].src.rtype = srcType; pAsm->S[0].src.reg = srcReg; setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); - noneg_PVSSRC(&(pAsm->S[0].src)); - pAsm->S[0].src.swizzlex = SQ_SEL_W; - pAsm->S[0].src.swizzley = SQ_SEL_W; - pAsm->S[0].src.swizzlez = SQ_SEL_W; - pAsm->S[0].src.swizzlew = SQ_SEL_W; pAsm->S[1].src.rtype = SRC_REG_TEMPORARY; pAsm->S[1].src.reg = dstReg; setaddrmode_PVSSRC(&(pAsm->S[1].src), ADDR_ABSOLUTE); noneg_PVSSRC(&(pAsm->S[1].src)); - pAsm->S[1].src.swizzlex = SQ_SEL_X; - pAsm->S[1].src.swizzley = SQ_SEL_X; - pAsm->S[1].src.swizzlez = SQ_SEL_X; - pAsm->S[1].src.swizzlew = SQ_SEL_X; + pAsm->S[1].src.swizzlex = SQ_SEL_Z; + pAsm->S[1].src.swizzley = SQ_SEL_Z; + pAsm->S[1].src.swizzlez = SQ_SEL_Z; + pAsm->S[1].src.swizzlew = SQ_SEL_Z; pAsm->S[2].src.rtype = srcType; pAsm->S[2].src.reg = srcReg; setaddrmode_PVSSRC(&(pAsm->S[2].src), ADDR_ABSOLUTE); - noneg_PVSSRC(&(pAsm->S[2].src)); - pAsm->S[2].src.swizzlex = SQ_SEL_X; - pAsm->S[2].src.swizzley = SQ_SEL_X; - pAsm->S[2].src.swizzlez = SQ_SEL_X; - pAsm->S[2].src.swizzlew = SQ_SEL_X; if( GL_FALSE == next_ins(pAsm) ) { -- cgit v1.2.3 From 28308c92605229129a12a2273dda47c6a2ca4790 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Mon, 21 Sep 2009 16:30:14 -0400 Subject: r600: various cleanups - max texture size is 8k, but mesa doesn't support that at the moment. - attempt to set shader limits to what the hw actually supports - clean up some old r300 cruft - no need to explicitly disable irqs. This is fixed in the drm now. Signed-off-by: Alex Deucher --- src/mesa/drivers/dri/r600/r600_context.c | 43 +++++++++++----------- src/mesa/drivers/dri/r600/r600_context.h | 19 ---------- .../drivers/dri/radeon/radeon_common_context.c | 7 +--- 3 files changed, 24 insertions(+), 45 deletions(-) (limited to 'src/mesa/drivers/dri') diff --git a/src/mesa/drivers/dri/r600/r600_context.c b/src/mesa/drivers/dri/r600/r600_context.c index 251c124cbf4..414c5aec594 100644 --- a/src/mesa/drivers/dri/r600/r600_context.c +++ b/src/mesa/drivers/dri/r600/r600_context.c @@ -281,8 +281,8 @@ GLboolean r600CreateContext(const __GLcontextModes * glVisual, ctx->Const.MaxTextureMaxAnisotropy = 16.0; ctx->Const.MaxTextureLodBias = 16.0; - ctx->Const.MaxTextureLevels = 13; - ctx->Const.MaxTextureRectSize = 4096; + ctx->Const.MaxTextureLevels = 13; /* hw support 14 */ + ctx->Const.MaxTextureRectSize = 4096; /* hw support 8192 */ ctx->Const.MinPointSize = 0x0001 / 8.0; ctx->Const.MinPointSizeAA = 0x0001 / 8.0; @@ -328,25 +328,26 @@ GLboolean r600CreateContext(const __GLcontextModes * glVisual, _tnl_allow_vertex_fog(ctx, GL_TRUE); /* currently bogus data */ - ctx->Const.VertexProgram.MaxInstructions = VSF_MAX_FRAGMENT_LENGTH / 4; - ctx->Const.VertexProgram.MaxNativeInstructions = - VSF_MAX_FRAGMENT_LENGTH / 4; - ctx->Const.VertexProgram.MaxNativeAttribs = 16; /* r420 */ - ctx->Const.VertexProgram.MaxTemps = 32; - ctx->Const.VertexProgram.MaxNativeTemps = - /*VSF_MAX_FRAGMENT_TEMPS */ 32; - ctx->Const.VertexProgram.MaxNativeParameters = 256; /* r420 */ - ctx->Const.VertexProgram.MaxNativeAddressRegs = 1; - - ctx->Const.FragmentProgram.MaxNativeTemps = PFS_NUM_TEMP_REGS; - ctx->Const.FragmentProgram.MaxNativeAttribs = 11; /* copy i915... */ - ctx->Const.FragmentProgram.MaxNativeParameters = PFS_NUM_CONST_REGS; - ctx->Const.FragmentProgram.MaxNativeAluInstructions = PFS_MAX_ALU_INST; - ctx->Const.FragmentProgram.MaxNativeTexInstructions = PFS_MAX_TEX_INST; - ctx->Const.FragmentProgram.MaxNativeInstructions = - PFS_MAX_ALU_INST + PFS_MAX_TEX_INST; - ctx->Const.FragmentProgram.MaxNativeTexIndirections = - PFS_MAX_TEX_INDIRECT; + ctx->Const.VertexProgram.MaxInstructions = 8192; /* in theory no limit */ + ctx->Const.VertexProgram.MaxNativeInstructions = 8192; + ctx->Const.VertexProgram.MaxNativeAttribs = 160; + ctx->Const.VertexProgram.MaxTemps = 256; /* 256 for reg-based constants, inline consts also supported */ + ctx->Const.VertexProgram.MaxNativeTemps = 256; + ctx->Const.VertexProgram.MaxNativeParameters = 256; /* ??? */ + ctx->Const.VertexProgram.MaxNativeAddressRegs = 1; /* ??? */ + + ctx->Const.FragmentProgram.MaxNativeTemps = 256; + ctx->Const.FragmentProgram.MaxNativeAttribs = 32; + ctx->Const.FragmentProgram.MaxNativeParameters = 256; + ctx->Const.FragmentProgram.MaxNativeAluInstructions = 8192; + /* 8 per clause on r6xx, 16 on rv670/r7xx */ + if ((screen->chip_family == CHIP_FAMILY_RV670) || + (screen->chip_family >= CHIP_FAMILY_RV770)) + ctx->Const.FragmentProgram.MaxNativeTexInstructions = 16; + else + ctx->Const.FragmentProgram.MaxNativeTexInstructions = 8; + ctx->Const.FragmentProgram.MaxNativeInstructions = 8192; + ctx->Const.FragmentProgram.MaxNativeTexIndirections = 8; /* ??? */ ctx->Const.FragmentProgram.MaxNativeAddressRegs = 0; /* and these are?? */ ctx->VertexProgram._MaintainTnlProgram = GL_TRUE; ctx->FragmentProgram._MaintainTexEnvProgram = GL_TRUE; diff --git a/src/mesa/drivers/dri/r600/r600_context.h b/src/mesa/drivers/dri/r600/r600_context.h index c59df7505af..9397ecde81b 100644 --- a/src/mesa/drivers/dri/r600/r600_context.h +++ b/src/mesa/drivers/dri/r600/r600_context.h @@ -86,29 +86,10 @@ extern int hw_tcl_on; #include "tnl_dd/t_dd_vertex.h" #undef TAG -#define PFS_MAX_ALU_INST 64 -#define PFS_MAX_TEX_INST 64 -#define PFS_MAX_TEX_INDIRECT 4 -#define PFS_NUM_TEMP_REGS 32 -#define PFS_NUM_CONST_REGS 16 - -#define R600_MAX_AOS_ARRAYS 16 - -#define REG_COORDS 0 -#define REG_COLOR0 1 -#define REG_TEX0 2 - #define R600_FALLBACK_NONE 0 #define R600_FALLBACK_TCL 1 #define R600_FALLBACK_RAST 2 -enum -{ - NO_SHIFT = 0, - LEFT_SHIFT = 1, - RIGHT_SHIFT = 2, -}; - struct r600_hw_state { struct radeon_state_atom sq; struct radeon_state_atom db; diff --git a/src/mesa/drivers/dri/radeon/radeon_common_context.c b/src/mesa/drivers/dri/radeon/radeon_common_context.c index 71ee06d9a79..330721acee2 100644 --- a/src/mesa/drivers/dri/radeon/radeon_common_context.c +++ b/src/mesa/drivers/dri/radeon/radeon_common_context.c @@ -227,11 +227,8 @@ GLboolean radeonInitContext(radeonContextPtr radeon, fthrottle_mode = driQueryOptioni(&radeon->optionCache, "fthrottle_mode"); radeon->iw.irq_seq = -1; radeon->irqsEmitted = 0; - if (IS_R600_CLASS(radeon->radeonScreen)) - radeon->do_irqs = 0; - else - radeon->do_irqs = (fthrottle_mode == DRI_CONF_FTHROTTLE_IRQS && - radeon->radeonScreen->irq); + radeon->do_irqs = (fthrottle_mode == DRI_CONF_FTHROTTLE_IRQS && + radeon->radeonScreen->irq); radeon->do_usleeps = (fthrottle_mode == DRI_CONF_FTHROTTLE_USLEEPS); -- cgit v1.2.3 From 639fb1472d09281a8df3792c9bcbc59cd4424688 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Mon, 21 Sep 2009 16:48:55 -0400 Subject: r600: fix typo in the last commit 128 gprs, 256 reg-based consts --- src/mesa/drivers/dri/r600/r600_context.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'src/mesa/drivers/dri') diff --git a/src/mesa/drivers/dri/r600/r600_context.c b/src/mesa/drivers/dri/r600/r600_context.c index 414c5aec594..e0b77d43850 100644 --- a/src/mesa/drivers/dri/r600/r600_context.c +++ b/src/mesa/drivers/dri/r600/r600_context.c @@ -327,16 +327,16 @@ GLboolean r600CreateContext(const __GLcontextModes * glVisual, _tnl_allow_pixel_fog(ctx, GL_FALSE); _tnl_allow_vertex_fog(ctx, GL_TRUE); - /* currently bogus data */ + /* 256 for reg-based consts, inline consts also supported */ ctx->Const.VertexProgram.MaxInstructions = 8192; /* in theory no limit */ ctx->Const.VertexProgram.MaxNativeInstructions = 8192; ctx->Const.VertexProgram.MaxNativeAttribs = 160; - ctx->Const.VertexProgram.MaxTemps = 256; /* 256 for reg-based constants, inline consts also supported */ - ctx->Const.VertexProgram.MaxNativeTemps = 256; - ctx->Const.VertexProgram.MaxNativeParameters = 256; /* ??? */ + ctx->Const.VertexProgram.MaxTemps = 128; + ctx->Const.VertexProgram.MaxNativeTemps = 128; + ctx->Const.VertexProgram.MaxNativeParameters = 256; ctx->Const.VertexProgram.MaxNativeAddressRegs = 1; /* ??? */ - ctx->Const.FragmentProgram.MaxNativeTemps = 256; + ctx->Const.FragmentProgram.MaxNativeTemps = 128; ctx->Const.FragmentProgram.MaxNativeAttribs = 32; ctx->Const.FragmentProgram.MaxNativeParameters = 256; ctx->Const.FragmentProgram.MaxNativeAluInstructions = 8192; -- cgit v1.2.3 From 2058dfaa47704abc62aa5aa9719013624f26764d Mon Sep 17 00:00:00 2001 From: Andre Maasikas Date: Wed, 23 Sep 2009 14:20:59 +0300 Subject: r600: add support for CUBE textures, also TXP seems to work here ... --- src/mesa/drivers/dri/r600/r700_assembler.c | 306 ++++++++++++++++++++++++----- src/mesa/drivers/dri/r600/r700_assembler.h | 4 + 2 files changed, 263 insertions(+), 47 deletions(-) (limited to 'src/mesa/drivers/dri') diff --git a/src/mesa/drivers/dri/r600/r700_assembler.c b/src/mesa/drivers/dri/r600/r700_assembler.c index f46bc322011..00eda544d4c 100644 --- a/src/mesa/drivers/dri/r600/r700_assembler.c +++ b/src/mesa/drivers/dri/r600/r700_assembler.c @@ -213,7 +213,7 @@ GLboolean is_reduction_opcode(PVSDWORD* dest) { if (dest->dst.op3 == 0) { - if ( (dest->dst.opcode == SQ_OP2_INST_DOT4 || dest->dst.opcode == SQ_OP2_INST_DOT4_IEEE) ) + if ( (dest->dst.opcode == SQ_OP2_INST_DOT4 || dest->dst.opcode == SQ_OP2_INST_DOT4_IEEE || dest->dst.opcode == SQ_OP2_INST_CUBE) ) { return GL_TRUE; } @@ -350,6 +350,7 @@ unsigned int r700GetNumOperands(r700_AssemblerBase* pAsm) case SQ_OP2_INST_PRED_SETNE: case SQ_OP2_INST_DOT4: case SQ_OP2_INST_DOT4_IEEE: + case SQ_OP2_INST_CUBE: return 2; case SQ_OP2_INST_MOV: @@ -469,6 +470,9 @@ int Init_r700_AssemblerBase(SHADER_PIPE_TYPE spt, r700_AssemblerBase* pAsm, R700 pAsm->number_of_inputs = 0; + pAsm->is_tex = GL_FALSE; + pAsm->need_tex_barrier = GL_FALSE; + return 0; } @@ -682,7 +686,7 @@ GLboolean add_tex_instruction(r700_AssemblerBase* pAsm, // If this clause constains any TEX instruction that is dependent on a previous instruction, // set the barrier bit - if( pAsm->pInstDeps[pAsm->uiCurInst].nDstDep > (-1) ) + if( pAsm->pInstDeps[pAsm->uiCurInst].nDstDep > (-1) || pAsm->need_tex_barrier == GL_TRUE ) { pAsm->cf_current_tex_clause_ptr->m_Word1.f.barrier = 0x1; } @@ -1152,42 +1156,48 @@ GLboolean tex_src(r700_AssemblerBase *pAsm) GLboolean bValidTexCoord = GL_FALSE; + if(pAsm->aArgSubst[1] >= 0) + { + bValidTexCoord = GL_TRUE; + setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); + pAsm->S[0].src.rtype = SRC_REG_TEMPORARY; + pAsm->S[0].src.reg = pAsm->aArgSubst[1]; + } + else + { switch (pILInst->SrcReg[0].File) { - case PROGRAM_CONSTANT: - case PROGRAM_LOCAL_PARAM: - case PROGRAM_ENV_PARAM: - case PROGRAM_STATE_VAR: - bValidTexCoord = GL_TRUE; - setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); - pAsm->S[0].src.rtype = SRC_REG_TEMPORARY; - pAsm->S[0].src.reg = pAsm->aArgSubst[1]; - break; - case PROGRAM_TEMPORARY: - bValidTexCoord = GL_TRUE; - pAsm->S[0].src.reg = pILInst->SrcReg[0].Index + - pAsm->starting_temp_register_number; - pAsm->S[0].src.rtype = SRC_REG_TEMPORARY; - break; - case PROGRAM_INPUT: - switch (pILInst->SrcReg[0].Index) - { - case FRAG_ATTRIB_COL0: - case FRAG_ATTRIB_COL1: - case FRAG_ATTRIB_TEX0: - case FRAG_ATTRIB_TEX1: - case FRAG_ATTRIB_TEX2: - case FRAG_ATTRIB_TEX3: - case FRAG_ATTRIB_TEX4: - case FRAG_ATTRIB_TEX5: - case FRAG_ATTRIB_TEX6: - case FRAG_ATTRIB_TEX7: - bValidTexCoord = GL_TRUE; - pAsm->S[0].src.reg = - pAsm->uiFP_AttributeMap[pILInst->SrcReg[0].Index]; - pAsm->S[0].src.rtype = SRC_REG_INPUT; - break; - } - break; + case PROGRAM_CONSTANT: + case PROGRAM_LOCAL_PARAM: + case PROGRAM_ENV_PARAM: + case PROGRAM_STATE_VAR: + break; + case PROGRAM_TEMPORARY: + bValidTexCoord = GL_TRUE; + pAsm->S[0].src.reg = pILInst->SrcReg[0].Index + + pAsm->starting_temp_register_number; + pAsm->S[0].src.rtype = SRC_REG_TEMPORARY; + break; + case PROGRAM_INPUT: + switch (pILInst->SrcReg[0].Index) + { + case FRAG_ATTRIB_COL0: + case FRAG_ATTRIB_COL1: + case FRAG_ATTRIB_TEX0: + case FRAG_ATTRIB_TEX1: + case FRAG_ATTRIB_TEX2: + case FRAG_ATTRIB_TEX3: + case FRAG_ATTRIB_TEX4: + case FRAG_ATTRIB_TEX5: + case FRAG_ATTRIB_TEX6: + case FRAG_ATTRIB_TEX7: + bValidTexCoord = GL_TRUE; + pAsm->S[0].src.reg = + pAsm->uiFP_AttributeMap[pILInst->SrcReg[0].Index]; + pAsm->S[0].src.rtype = SRC_REG_INPUT; + break; + } + break; + } } if(GL_TRUE == bValidTexCoord) @@ -1955,7 +1965,9 @@ GLboolean assemble_alu_instruction(r700_AssemblerBase *pAsm) { is_single_scalar_operation = GL_FALSE; number_of_scalar_operations = 4; - + +/* current assembler doesn't do more than 1 register per source */ +#if 0 /* check read port, only very preliminary algorithm, not count in src0/1 same comp case and prev slot repeat case; also not count relative addressing. TODO: improve performance. */ @@ -1990,6 +2002,7 @@ GLboolean assemble_alu_instruction(r700_AssemblerBase *pAsm) { bSplitInst = GL_TRUE; } +#endif } contiguous_slots_needed = 0; @@ -2210,9 +2223,7 @@ GLboolean next_ins(r700_AssemblerBase *pAsm) { struct prog_instruction *pILInst = &(pAsm->pILInst[pAsm->uiCurInst]); - if( GL_TRUE == IsTex(pILInst->Opcode) && - /* handle const moves to temp register */ - !(pAsm->D.dst.opcode == SQ_OP2_INST_MOV) ) + if( GL_TRUE == pAsm->is_tex ) { if (pILInst->TexSrcTarget == TEXTURE_RECT_INDEX) { if( GL_FALSE == assemble_tex_instruction(pAsm, GL_FALSE) ) @@ -2256,7 +2267,8 @@ GLboolean next_ins(r700_AssemblerBase *pAsm) pAsm->S[0].bits = 0; pAsm->S[1].bits = 0; pAsm->S[2].bits = 0; - + pAsm->is_tex = GL_FALSE; + pAsm->need_tex_barrier = GL_FALSE; return GL_TRUE; } @@ -3379,7 +3391,10 @@ GLboolean assemble_STP(r700_AssemblerBase *pAsm) GLboolean assemble_TEX(r700_AssemblerBase *pAsm) { GLboolean src_const; + GLboolean need_barrier = GL_FALSE; + checkop1(pAsm); + switch (pAsm->pILInst[pAsm->uiCurInst].SrcReg[0].File) { case PROGRAM_CONSTANT: @@ -3399,20 +3414,18 @@ GLboolean assemble_TEX(r700_AssemblerBase *pAsm) { if ( GL_FALSE == mov_temp(pAsm, 0) ) return GL_FALSE; + need_barrier = GL_TRUE; } switch (pAsm->pILInst[pAsm->uiCurInst].Opcode) { case OPCODE_TEX: - pAsm->D.dst.opcode = SQ_TEX_INST_SAMPLE; break; case OPCODE_TXB: radeon_error("do not support TXB yet\n"); return GL_FALSE; break; case OPCODE_TXP: - /* TODO : tex proj version : divid first 3 components by 4th */ - pAsm->D.dst.opcode = SQ_TEX_INST_SAMPLE; break; default: radeon_error("Internal error: bad texture op (not TEX)\n"); @@ -3420,6 +3433,190 @@ GLboolean assemble_TEX(r700_AssemblerBase *pAsm) break; } + if (pAsm->pILInst[pAsm->uiCurInst].Opcode == OPCODE_TXP) + { + GLuint tmp = gethelpr(pAsm); + pAsm->D.dst.opcode = SQ_OP2_INST_RECIP_IEEE; + pAsm->D.dst.math = 1; + setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); + pAsm->D.dst.rtype = DST_REG_TEMPORARY; + pAsm->D.dst.reg = tmp; + pAsm->D.dst.writew = 1; + + if( GL_FALSE == assemble_src(pAsm, 0, -1) ) + { + return GL_FALSE; + } + swizzleagain_PVSSRC(&(pAsm->S[0].src), SQ_SEL_W, SQ_SEL_W, SQ_SEL_W, SQ_SEL_W); + if( GL_FALSE == next_ins(pAsm) ) + { + return GL_FALSE; + } + + pAsm->D.dst.opcode = SQ_OP2_INST_MUL; + setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); + pAsm->D.dst.rtype = DST_REG_TEMPORARY; + pAsm->D.dst.reg = tmp; + pAsm->D.dst.writex = 1; + pAsm->D.dst.writey = 1; + pAsm->D.dst.writez = 1; + pAsm->D.dst.writew = 0; + + if( GL_FALSE == assemble_src(pAsm, 0, -1) ) + { + return GL_FALSE; + } + setaddrmode_PVSSRC(&(pAsm->S[1].src), ADDR_ABSOLUTE); + pAsm->S[1].src.rtype = SRC_REG_TEMPORARY; + pAsm->S[1].src.reg = tmp; + setswizzle_PVSSRC(&(pAsm->S[1].src), SQ_SEL_W); + + if( GL_FALSE == next_ins(pAsm) ) + { + return GL_FALSE; + } + + pAsm->aArgSubst[1] = tmp; + need_barrier = GL_TRUE; + } + + if (pAsm->pILInst[pAsm->uiCurInst].TexSrcTarget == TEXTURE_CUBE_INDEX ) + { + GLuint tmp1 = gethelpr(pAsm); + GLuint tmp2 = gethelpr(pAsm); + + /* tmp1.xyzw = CUBE(R0.zzxy, R0.yxzz) */ + pAsm->D.dst.opcode = SQ_OP2_INST_CUBE; + setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); + pAsm->D.dst.rtype = DST_REG_TEMPORARY; + pAsm->D.dst.reg = tmp1; + nomask_PVSDST(&(pAsm->D.dst)); + + if( GL_FALSE == assemble_src(pAsm, 0, -1) ) + { + return GL_FALSE; + } + + if( GL_FALSE == assemble_src(pAsm, 0, 1) ) + { + return GL_FALSE; + } + + swizzleagain_PVSSRC(&(pAsm->S[0].src), SQ_SEL_Z, SQ_SEL_Z, SQ_SEL_X, SQ_SEL_Y); + swizzleagain_PVSSRC(&(pAsm->S[1].src), SQ_SEL_Y, SQ_SEL_X, SQ_SEL_Z, SQ_SEL_Z); + + if( GL_FALSE == next_ins(pAsm) ) + { + return GL_FALSE; + } + + /* tmp1.z = ABS(tmp1.z) dont have abs support in assembler currently + * have to do explicit instruction + */ + pAsm->D.dst.opcode = SQ_OP2_INST_MAX; + setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); + pAsm->D.dst.rtype = DST_REG_TEMPORARY; + pAsm->D.dst.reg = tmp1; + pAsm->D.dst.writez = 1; + + setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); + pAsm->S[0].src.rtype = SRC_REG_TEMPORARY; + pAsm->S[0].src.reg = tmp1; + noswizzle_PVSSRC(&(pAsm->S[0].src)); + pAsm->S[1].bits = pAsm->S[0].bits; + flipneg_PVSSRC(&(pAsm->S[1].src)); + + next_ins(pAsm); + + /* tmp1.z = RCP_e(|tmp1.z|) */ + pAsm->D.dst.opcode = SQ_OP2_INST_RECIP_IEEE; + pAsm->D.dst.math = 1; + setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); + pAsm->D.dst.rtype = DST_REG_TEMPORARY; + pAsm->D.dst.reg = tmp1; + pAsm->D.dst.writez = 1; + + setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); + pAsm->S[0].src.rtype = SRC_REG_TEMPORARY; + pAsm->S[0].src.reg = tmp1; + pAsm->S[0].src.swizzlex = SQ_SEL_Z; + + next_ins(pAsm); + + /* MULADD R0.x, R0.x, PS1, (0x3FC00000, 1.5f).x + * MULADD R0.y, R0.y, PS1, (0x3FC00000, 1.5f).x + * muladd has no writemask, have to use another temp + * also no support for imm constants, so add 1 here + */ + pAsm->D.dst.opcode = SQ_OP3_INST_MULADD; + pAsm->D.dst.op3 = 1; + setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); + pAsm->D.dst.rtype = DST_REG_TEMPORARY; + pAsm->D.dst.reg = tmp2; + + setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); + pAsm->S[0].src.rtype = SRC_REG_TEMPORARY; + pAsm->S[0].src.reg = tmp1; + noswizzle_PVSSRC(&(pAsm->S[0].src)); + setaddrmode_PVSSRC(&(pAsm->S[1].src), ADDR_ABSOLUTE); + pAsm->S[1].src.rtype = SRC_REG_TEMPORARY; + pAsm->S[1].src.reg = tmp1; + setswizzle_PVSSRC(&(pAsm->S[1].src), SQ_SEL_Z); + setaddrmode_PVSSRC(&(pAsm->S[2].src), ADDR_ABSOLUTE); + pAsm->S[2].src.rtype = SRC_REG_TEMPORARY; + pAsm->S[2].src.reg = tmp1; + setswizzle_PVSSRC(&(pAsm->S[2].src), SQ_SEL_1); + + next_ins(pAsm); + + /* ADD the remaining .5 */ + pAsm->D.dst.opcode = SQ_OP2_INST_ADD; + setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); + pAsm->D.dst.rtype = DST_REG_TEMPORARY; + pAsm->D.dst.reg = tmp2; + pAsm->D.dst.writex = 1; + pAsm->D.dst.writey = 1; + pAsm->D.dst.writez = 0; + pAsm->D.dst.writew = 0; + + setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); + pAsm->S[0].src.rtype = SRC_REG_TEMPORARY; + pAsm->S[0].src.reg = tmp2; + noswizzle_PVSSRC(&(pAsm->S[0].src)); + setaddrmode_PVSSRC(&(pAsm->S[1].src), ADDR_ABSOLUTE); + pAsm->S[1].src.rtype = SRC_REG_TEMPORARY; + pAsm->S[1].src.reg = 252; // SQ_ALU_SRC_0_5 + noswizzle_PVSSRC(&(pAsm->S[1].src)); + + next_ins(pAsm); + + /* tmp1.xy = temp2.xy */ + pAsm->D.dst.opcode = SQ_OP2_INST_MOV; + setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); + pAsm->D.dst.rtype = DST_REG_TEMPORARY; + pAsm->D.dst.reg = tmp1; + pAsm->D.dst.writex = 1; + pAsm->D.dst.writey = 1; + pAsm->D.dst.writez = 0; + pAsm->D.dst.writew = 0; + + setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); + pAsm->S[0].src.rtype = SRC_REG_TEMPORARY; + pAsm->S[0].src.reg = tmp2; + noswizzle_PVSSRC(&(pAsm->S[0].src)); + + next_ins(pAsm); + pAsm->aArgSubst[1] = tmp1; + need_barrier = GL_TRUE; + + } + + pAsm->D.dst.opcode = SQ_TEX_INST_SAMPLE; + pAsm->is_tex = GL_TRUE; + if ( GL_TRUE == need_barrier ) + { + pAsm->need_tex_barrier = GL_TRUE; + } // Set src1 to tex unit id pAsm->S[1].src.reg = pAsm->pILInst[pAsm->uiCurInst].TexSrcUnit; pAsm->S[1].src.rtype = SRC_REG_TEMPORARY; @@ -3440,10 +3637,25 @@ GLboolean assemble_TEX(r700_AssemblerBase *pAsm) return GL_FALSE; } - if ( GL_FALSE == next_ins(pAsm) ) + if(pAsm->pILInst[pAsm->uiCurInst].Opcode == OPCODE_TXP) { - return GL_FALSE; + /* hopefully did swizzles before */ + noswizzle_PVSSRC(&(pAsm->S[0].src)); } + + if(pAsm->pILInst[pAsm->uiCurInst].TexSrcTarget == TEXTURE_CUBE_INDEX) + { + /* SAMPLE dst, tmp.yxwy, CUBE */ + pAsm->S[0].src.swizzlex = SQ_SEL_Y; + pAsm->S[0].src.swizzley = SQ_SEL_X; + pAsm->S[0].src.swizzlez = SQ_SEL_W; + pAsm->S[0].src.swizzlew = SQ_SEL_Y; + } + + if ( GL_FALSE == next_ins(pAsm) ) + { + return GL_FALSE; + } return GL_TRUE; } diff --git a/src/mesa/drivers/dri/r600/r700_assembler.h b/src/mesa/drivers/dri/r600/r700_assembler.h index f9c4d849c65..73bb8bac55d 100644 --- a/src/mesa/drivers/dri/r600/r700_assembler.h +++ b/src/mesa/drivers/dri/r600/r700_assembler.h @@ -374,6 +374,10 @@ typedef struct r700_AssemblerBase struct prog_instruction * pILInst; GLuint uiCurInst; GLboolean bR6xx; + /* helper to decide which type of instruction to assemble */ + GLboolean is_tex; + /* we inserted helper intructions and need barrier on next TEX ins */ + GLboolean need_tex_barrier; } r700_AssemblerBase; //Internal use -- cgit v1.2.3 From 41c5f113b5d41649db2027c3f32deaf4d38035ce Mon Sep 17 00:00:00 2001 From: Richard Li Date: Thu, 24 Sep 2009 10:12:40 -0400 Subject: r600 : disable draw_prim for now. --- src/mesa/drivers/dri/r600/r700_render.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'src/mesa/drivers/dri') diff --git a/src/mesa/drivers/dri/r600/r700_render.c b/src/mesa/drivers/dri/r600/r700_render.c index bbe364bc6aa..5627984cf96 100644 --- a/src/mesa/drivers/dri/r600/r700_render.c +++ b/src/mesa/drivers/dri/r600/r700_render.c @@ -1145,8 +1145,11 @@ static void r700DrawPrims(GLcontext *ctx, void r700InitDraw(GLcontext *ctx) { struct vbo_context *vbo = vbo_context(ctx); - + + /* to be enabled */ + /* vbo->draw_prims = r700DrawPrims; + */ } -- cgit v1.2.3 From 5f06064b616099712dbb2854351d0740c1dbfc60 Mon Sep 17 00:00:00 2001 From: Richard Li Date: Thu, 24 Sep 2009 11:26:15 -0400 Subject: r600 : fix draw_prim bug: vertex fetcher setting. --- src/mesa/drivers/dri/r600/r700_chip.c | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) (limited to 'src/mesa/drivers/dri') diff --git a/src/mesa/drivers/dri/r600/r700_chip.c b/src/mesa/drivers/dri/r600/r700_chip.c index 783427a94c9..e3b8a4081a5 100644 --- a/src/mesa/drivers/dri/r600/r700_chip.c +++ b/src/mesa/drivers/dri/r600/r700_chip.c @@ -254,9 +254,22 @@ static void r700SetupVTXConstants2(GLcontext * ctx, SETfield(uSQ_VTX_CONSTANT_WORD2_0, GetSurfaceFormat(pStreamDesc->type, pStreamDesc->size, NULL), SQ_VTX_CONSTANT_WORD2_0__DATA_FORMAT_shift, SQ_VTX_CONSTANT_WORD2_0__DATA_FORMAT_mask); /* TODO : trace back api for initial data type, not only GL_FLOAT */ - SETfield(uSQ_VTX_CONSTANT_WORD2_0, SQ_NUM_FORMAT_SCALED, - SQ_VTX_CONSTANT_WORD2_0__NUM_FORMAT_ALL_shift, SQ_VTX_CONSTANT_WORD2_0__NUM_FORMAT_ALL_mask); - SETbit(uSQ_VTX_CONSTANT_WORD2_0, SQ_VTX_CONSTANT_WORD2_0__FORMAT_COMP_ALL_bit); + + if(GL_TRUE == pStreamDesc->normalize) + { + SETfield(uSQ_VTX_CONSTANT_WORD2_0, SQ_NUM_FORMAT_NORM, + SQ_VTX_CONSTANT_WORD2_0__NUM_FORMAT_ALL_shift, SQ_VTX_CONSTANT_WORD2_0__NUM_FORMAT_ALL_mask); + } + //else + //{ + // SETfield(uSQ_VTX_CONSTANT_WORD2_0, SQ_NUM_FORMAT_INT, + // SQ_VTX_CONSTANT_WORD2_0__NUM_FORMAT_ALL_shift, SQ_VTX_CONSTANT_WORD2_0__NUM_FORMAT_ALL_mask); + //} + + if(1 == pStreamDesc->_signed) + { + SETbit(uSQ_VTX_CONSTANT_WORD2_0, SQ_VTX_CONSTANT_WORD2_0__FORMAT_COMP_ALL_bit); + } SETfield(uSQ_VTX_CONSTANT_WORD3_0, 1, MEM_REQUEST_SIZE_shift, MEM_REQUEST_SIZE_mask); SETfield(uSQ_VTX_CONSTANT_WORD6_0, SQ_TEX_VTX_VALID_BUFFER, -- cgit v1.2.3 From 1d2dca194cebe6e25735b6820f85b8d1231aae63 Mon Sep 17 00:00:00 2001 From: Pauli Nieminen Date: Thu, 24 Sep 2009 19:58:09 +0300 Subject: radeon: Fix scissors for r600 KMS. Radeon generic scissors code had problem that some of code was using exclusive and some inclusive bottom right corner. Only r600 driver is using exclusive coordinate so changed generic code to pass inclusive coordinate and r600 driver changes BR coordinate to be exclusive. --- src/mesa/drivers/dri/r600/r700_state.c | 5 +++-- src/mesa/drivers/dri/radeon/radeon_common.c | 8 ++++---- 2 files changed, 7 insertions(+), 6 deletions(-) (limited to 'src/mesa/drivers/dri') diff --git a/src/mesa/drivers/dri/r600/r700_state.c b/src/mesa/drivers/dri/r600/r700_state.c index 8571563149e..124469b5a69 100644 --- a/src/mesa/drivers/dri/r600/r700_state.c +++ b/src/mesa/drivers/dri/r600/r700_state.c @@ -1269,10 +1269,11 @@ void r700SetScissor(context_t *context) //--------------- return; } if (context->radeon.state.scissor.enabled) { + /* r600 has exclusive scissors */ x1 = context->radeon.state.scissor.rect.x1; y1 = context->radeon.state.scissor.rect.y1; - x2 = context->radeon.state.scissor.rect.x2; - y2 = context->radeon.state.scissor.rect.y2; + x2 = context->radeon.state.scissor.rect.x2 + 1; + y2 = context->radeon.state.scissor.rect.y2 + 1; } else { if (context->radeon.radeonScreen->driScreen->dri2.enabled) { x1 = 0; diff --git a/src/mesa/drivers/dri/radeon/radeon_common.c b/src/mesa/drivers/dri/radeon/radeon_common.c index a4c7b40798a..9817ff856b9 100644 --- a/src/mesa/drivers/dri/radeon/radeon_common.c +++ b/src/mesa/drivers/dri/radeon/radeon_common.c @@ -232,13 +232,13 @@ void radeonUpdateScissor( GLcontext *ctx ) __DRIdrawablePrivate *dPriv = radeon_get_drawable(rmesa); x1 += dPriv->x; - x2 += dPriv->x + 1; + x2 += dPriv->x; min_x += dPriv->x; - max_x += dPriv->x + 1; + max_x += dPriv->x; y1 += dPriv->y; - y2 += dPriv->y + 1; + y2 += dPriv->y; min_y += dPriv->y; - max_y += dPriv->y + 1; + max_y += dPriv->y; } rmesa->state.scissor.rect.x1 = CLAMP(x1, min_x, max_x); -- cgit v1.2.3 From b849c6f1b3b38a68fae32d4dea16dd7431e41b6e Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Thu, 24 Sep 2009 12:41:14 -0600 Subject: intel: use default array/element buffers in intel_generate_mipmap() If there happened to be a bound VBO when intel_generate_mipmap() was called we blew up because of a bad vertex array pointer. Fixes regnumonline, bug 23859. --- src/mesa/drivers/dri/intel/intel_generatemipmap.c | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) (limited to 'src/mesa/drivers/dri') diff --git a/src/mesa/drivers/dri/intel/intel_generatemipmap.c b/src/mesa/drivers/dri/intel/intel_generatemipmap.c index fe986092db6..12059e122ca 100644 --- a/src/mesa/drivers/dri/intel/intel_generatemipmap.c +++ b/src/mesa/drivers/dri/intel/intel_generatemipmap.c @@ -125,6 +125,8 @@ intel_generate_mipmap_2d(GLcontext *ctx, GLuint fb_name; GLboolean success = GL_FALSE; struct gl_framebuffer *saved_fbo = NULL; + struct gl_buffer_object *saved_array_buffer = NULL; + struct gl_buffer_object *saved_element_buffer = NULL; _mesa_PushAttrib(GL_ENABLE_BIT | GL_TEXTURE_BIT | GL_CURRENT_BIT | GL_COLOR_BUFFER_BIT | @@ -133,6 +135,16 @@ intel_generate_mipmap_2d(GLcontext *ctx, old_active_texture = ctx->Texture.CurrentUnit; _mesa_reference_framebuffer(&saved_fbo, ctx->DrawBuffer); + /* use default array/index buffers */ + _mesa_reference_buffer_object(ctx, &saved_array_buffer, + ctx->Array.ArrayBufferObj); + _mesa_reference_buffer_object(ctx, &ctx->Array.ArrayBufferObj, + ctx->Shared->NullBufferObj); + _mesa_reference_buffer_object(ctx, &saved_element_buffer, + ctx->Array.ElementArrayBufferObj); + _mesa_reference_buffer_object(ctx, &ctx->Array.ElementArrayBufferObj, + ctx->Shared->NullBufferObj); + _mesa_Disable(GL_POLYGON_STIPPLE); _mesa_Disable(GL_DEPTH_TEST); _mesa_Disable(GL_STENCIL_TEST); @@ -205,6 +217,15 @@ fail: meta_restore_fragment_program(&intel->meta); meta_restore_vertex_program(&intel->meta); + /* restore array/index buffers */ + _mesa_reference_buffer_object(ctx, &ctx->Array.ArrayBufferObj, + saved_array_buffer); + _mesa_reference_buffer_object(ctx, &saved_array_buffer, NULL); + _mesa_reference_buffer_object(ctx, &ctx->Array.ElementArrayBufferObj, + saved_element_buffer); + _mesa_reference_buffer_object(ctx, &saved_element_buffer, NULL); + + _mesa_DeleteFramebuffersEXT(1, &fb_name); _mesa_ActiveTextureARB(GL_TEXTURE0_ARB + old_active_texture); if (saved_fbo) -- cgit v1.2.3 From 9018a7dd175caa9a0fbf940b7e66aa9411d2d965 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Thu, 24 Sep 2009 10:40:32 -0700 Subject: i965: Load NV program matrices when required. --- src/mesa/drivers/dri/i965/brw_curbe.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'src/mesa/drivers/dri') diff --git a/src/mesa/drivers/dri/i965/brw_curbe.c b/src/mesa/drivers/dri/i965/brw_curbe.c index 0b0e6931a06..4be6c77aa1e 100644 --- a/src/mesa/drivers/dri/i965/brw_curbe.c +++ b/src/mesa/drivers/dri/i965/brw_curbe.c @@ -248,6 +248,9 @@ static void prepare_constant_buffer(struct brw_context *brw) GLuint offset = brw->curbe.vs_start * 16; GLuint nr = brw->vs.prog_data->nr_params / 4; + if (brw->vertex_program->IsNVProgram) + _mesa_load_tracked_matrices(ctx); + /* Updates the ParamaterValues[i] pointers for all parameters of the * basic type of PROGRAM_STATE_VAR. */ -- cgit v1.2.3 From a9a47afe7e87075432ce2d393b55409fcb7149ac Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Wed, 23 Sep 2009 16:50:59 -0700 Subject: i965: Remove assert about NV_vp now that it somewhat works. --- src/mesa/drivers/dri/i965/brw_vs.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'src/mesa/drivers/dri') diff --git a/src/mesa/drivers/dri/i965/brw_vs.c b/src/mesa/drivers/dri/i965/brw_vs.c index e3111c66800..f0c79efbd96 100644 --- a/src/mesa/drivers/dri/i965/brw_vs.c +++ b/src/mesa/drivers/dri/i965/brw_vs.c @@ -90,8 +90,6 @@ static void brw_upload_vs_prog(struct brw_context *brw) struct brw_vertex_program *vp = (struct brw_vertex_program *)brw->vertex_program; - assert (vp && !vp->program.IsNVProgram); - memset(&key, 0, sizeof(key)); /* Just upload the program verbatim for now. Always send it all -- cgit v1.2.3 From 726a04a2cd1bf159a6c40584b4b2b9bc5948a82e Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Thu, 24 Sep 2009 11:58:33 -0700 Subject: i965: Emit zero initialization for NV VP temporaries as required. This is similar to what r300 does inside the driver, but I've added it as a generic option since it seems most hardware will want it. Fixes piglit nv-init-zero-reg.vpfp and nv-init-zero-addr.vpfp. --- src/mesa/drivers/dri/i965/brw_context.c | 1 + src/mesa/main/mtypes.h | 1 + src/mesa/shader/nvprogram.c | 44 +++++++++++++++++++++++++++++++++ src/mesa/shader/nvprogram.h | 4 +++ src/mesa/shader/nvvertparse.c | 1 + 5 files changed, 51 insertions(+) (limited to 'src/mesa/drivers/dri') diff --git a/src/mesa/drivers/dri/i965/brw_context.c b/src/mesa/drivers/dri/i965/brw_context.c index 3c5b8483197..c300c33adce 100644 --- a/src/mesa/drivers/dri/i965/brw_context.c +++ b/src/mesa/drivers/dri/i965/brw_context.c @@ -125,6 +125,7 @@ GLboolean brwCreateContext( const __GLcontextModes *mesaVis, /* We want the GLSL compiler to emit code that uses condition codes */ ctx->Shader.EmitCondCodes = GL_TRUE; + ctx->Shader.EmitNVTempInitialization = GL_TRUE; ctx->Const.VertexProgram.MaxNativeInstructions = (16 * 1024); ctx->Const.VertexProgram.MaxAluInstructions = 0; diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h index 6b64bf8139f..f8e4e41583d 100644 --- a/src/mesa/main/mtypes.h +++ b/src/mesa/main/mtypes.h @@ -2079,6 +2079,7 @@ struct gl_shader_state GLboolean EmitContReturn; /**< Emit CONT/RET opcodes? */ GLboolean EmitCondCodes; /**< Use condition codes? */ GLboolean EmitComments; /**< Annotated instructions */ + GLboolean EmitNVTempInitialization; /**< 0-fill NV temp registers */ void *MemPool; GLbitfield Flags; /**< Mask of GLSL_x flags */ struct gl_sl_pragmas DefaultPragmas; /**< Default #pragma settings */ diff --git a/src/mesa/shader/nvprogram.c b/src/mesa/shader/nvprogram.c index fdb2f4210ad..471a7358a2f 100644 --- a/src/mesa/shader/nvprogram.c +++ b/src/mesa/shader/nvprogram.c @@ -509,7 +509,51 @@ _mesa_GetVertexAttribPointervNV(GLuint index, GLenum pname, GLvoid **pointer) *pointer = (GLvoid *) ctx->Array.ArrayObj->VertexAttrib[index].Ptr; } +void +_mesa_emit_nv_temp_initialization(GLcontext *ctx, + struct gl_program *program) +{ + struct prog_instruction *inst; + int i; + + if (!ctx->Shader.EmitNVTempInitialization) + return; + /* We'll swizzle up a zero temporary so we can use it for the + * ARL. + */ + if (program->NumTemporaries == 0) + program->NumTemporaries = 1; + + _mesa_insert_instructions(program, 0, program->NumTemporaries + 1); + + for (i = 0; i < program->NumTemporaries; i++) { + struct prog_instruction *inst = &program->Instructions[i]; + + inst->Opcode = OPCODE_SWZ; + inst->DstReg.File = PROGRAM_TEMPORARY; + inst->DstReg.Index = i; + inst->DstReg.WriteMask = WRITEMASK_XYZW; + inst->SrcReg[0].File = PROGRAM_TEMPORARY; + inst->SrcReg[0].Index = 0; + inst->SrcReg[0].Swizzle = MAKE_SWIZZLE4(SWIZZLE_ZERO, + SWIZZLE_ZERO, + SWIZZLE_ZERO, + SWIZZLE_ZERO); + } + + inst = &program->Instructions[i]; + inst->Opcode = OPCODE_ARL; + inst->DstReg.File = PROGRAM_ADDRESS; + inst->DstReg.Index = 0; + inst->DstReg.WriteMask = WRITEMASK_XYZW; + inst->SrcReg[0].File = PROGRAM_TEMPORARY; + inst->SrcReg[0].Index = 0; + inst->SrcReg[0].Swizzle = SWIZZLE_XXXX; + + if (program->NumAddressRegs == 0) + program->NumAddressRegs = 1; +} void _mesa_setup_nv_temporary_count(GLcontext *ctx, struct gl_program *program) diff --git a/src/mesa/shader/nvprogram.h b/src/mesa/shader/nvprogram.h index 0ed143d52f7..8ee59661bd0 100644 --- a/src/mesa/shader/nvprogram.h +++ b/src/mesa/shader/nvprogram.h @@ -106,4 +106,8 @@ _mesa_GetProgramNamedParameterdvNV(GLuint id, GLsizei len, const GLubyte *name, extern void _mesa_setup_nv_temporary_count(GLcontext *ctx, struct gl_program *program); +extern void +_mesa_emit_nv_temp_initialization(GLcontext *ctx, + struct gl_program *program); + #endif diff --git a/src/mesa/shader/nvvertparse.c b/src/mesa/shader/nvvertparse.c index a94e6b8b803..6ab8a14cb9b 100644 --- a/src/mesa/shader/nvvertparse.c +++ b/src/mesa/shader/nvvertparse.c @@ -1423,6 +1423,7 @@ _mesa_parse_nv_vertex_program(GLcontext *ctx, GLenum dstTarget, program->Base.NumParameters = program->Base.Parameters->NumParameters; _mesa_setup_nv_temporary_count(ctx, &program->Base); + _mesa_emit_nv_temp_initialization(ctx, &program->Base); } else { /* Error! */ -- cgit v1.2.3 From 54107a097904129ff794534542acd09ed152ea2e Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Thu, 24 Sep 2009 14:53:49 -0700 Subject: i965: Clean up some mess with the batch cache. Its flagging of extra state that's already flagged by the vtbl new_batch when appropriate was confusing my tracking down of the OA clear bug. --- src/mesa/drivers/dri/i965/brw_state.h | 5 +---- src/mesa/drivers/dri/i965/brw_state_batch.c | 15 ++------------- src/mesa/drivers/dri/i965/brw_state_upload.c | 3 ++- 3 files changed, 5 insertions(+), 18 deletions(-) (limited to 'src/mesa/drivers/dri') diff --git a/src/mesa/drivers/dri/i965/brw_state.h b/src/mesa/drivers/dri/i965/brw_state.h index 78572356a3d..5335eac8951 100644 --- a/src/mesa/drivers/dri/i965/brw_state.h +++ b/src/mesa/drivers/dri/i965/brw_state.h @@ -86,9 +86,6 @@ const struct brw_tracked_state brw_psp_urb_cbs; const struct brw_tracked_state brw_pipe_control; -const struct brw_tracked_state brw_clear_surface_cache; -const struct brw_tracked_state brw_clear_batch_cache; - const struct brw_tracked_state brw_drawing_rect; const struct brw_tracked_state brw_indices; const struct brw_tracked_state brw_vertices; @@ -165,7 +162,7 @@ GLboolean brw_cached_batch_struct( struct brw_context *brw, const void *data, GLuint sz ); void brw_destroy_batch_cache( struct brw_context *brw ); -void brw_clear_batch_cache_flush( struct brw_context *brw ); +void brw_clear_batch_cache( struct brw_context *brw ); /* brw_wm_surface_state.c */ dri_bo * diff --git a/src/mesa/drivers/dri/i965/brw_state_batch.c b/src/mesa/drivers/dri/i965/brw_state_batch.c index 811940edc05..7821898cf9b 100644 --- a/src/mesa/drivers/dri/i965/brw_state_batch.c +++ b/src/mesa/drivers/dri/i965/brw_state_batch.c @@ -79,7 +79,7 @@ GLboolean brw_cached_batch_struct( struct brw_context *brw, return GL_TRUE; } -static void clear_batch_cache( struct brw_context *brw ) +void brw_clear_batch_cache( struct brw_context *brw ) { struct brw_cached_batch_item *item = brw->cached_batch_items; @@ -93,18 +93,7 @@ static void clear_batch_cache( struct brw_context *brw ) brw->cached_batch_items = NULL; } -void brw_clear_batch_cache_flush( struct brw_context *brw ) -{ - clear_batch_cache(brw); - - brw->state.dirty.mesa |= ~0; - brw->state.dirty.brw |= ~0; - brw->state.dirty.cache |= ~0; -} - - - void brw_destroy_batch_cache( struct brw_context *brw ) { - clear_batch_cache(brw); + brw_clear_batch_cache(brw); } diff --git a/src/mesa/drivers/dri/i965/brw_state_upload.c b/src/mesa/drivers/dri/i965/brw_state_upload.c index 414620d0b39..b817b741e77 100644 --- a/src/mesa/drivers/dri/i965/brw_state_upload.c +++ b/src/mesa/drivers/dri/i965/brw_state_upload.c @@ -287,6 +287,7 @@ void brw_validate_state( struct brw_context *brw ) if (brw->emit_state_always) { state->mesa |= ~0; state->brw |= ~0; + state->cache |= ~0; } if (brw->fragment_program != ctx->FragmentProgram._Current) { @@ -305,7 +306,7 @@ void brw_validate_state( struct brw_context *brw ) return; if (brw->state.dirty.brw & BRW_NEW_CONTEXT) - brw_clear_batch_cache_flush(brw); + brw_clear_batch_cache(brw); brw->intel.Fallback = 0; -- cgit v1.2.3 From cc8084932cc552587e3036dbbf62c77db3b4a08e Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Thu, 24 Sep 2009 16:15:52 -0700 Subject: intel: Flush the batch when we're about to subdata into a VBO. This fixes the clears in openarena with the new metaops clear code, and the new piglit vbo-subdata-sync test. Bug #23857. --- src/mesa/drivers/dri/intel/intel_buffer_objects.c | 6 +++++- src/mesa/drivers/dri/intel/intel_clear.c | 1 - 2 files changed, 5 insertions(+), 2 deletions(-) (limited to 'src/mesa/drivers/dri') diff --git a/src/mesa/drivers/dri/intel/intel_buffer_objects.c b/src/mesa/drivers/dri/intel/intel_buffer_objects.c index c55c5c426e0..7f6fb66d52f 100644 --- a/src/mesa/drivers/dri/intel/intel_buffer_objects.c +++ b/src/mesa/drivers/dri/intel/intel_buffer_objects.c @@ -207,8 +207,12 @@ intel_bufferobj_subdata(GLcontext * ctx, if (intel_obj->sys_buffer) memcpy((char *)intel_obj->sys_buffer + offset, data, size); - else + else { + /* Flush any existing batchbuffer that might reference this data. */ + intelFlush(ctx); + dri_bo_subdata(intel_obj->buffer, offset, size, data); + } } diff --git a/src/mesa/drivers/dri/intel/intel_clear.c b/src/mesa/drivers/dri/intel/intel_clear.c index 9010b910c7d..1b0e221789a 100644 --- a/src/mesa/drivers/dri/intel/intel_clear.c +++ b/src/mesa/drivers/dri/intel/intel_clear.c @@ -173,7 +173,6 @@ intelClear(GLcontext *ctx, GLbitfield mask) } _mesa_meta_clear(&intel->ctx, tri_mask); - intel_batchbuffer_flush(intel->batch); } if (swrast_mask) { -- cgit v1.2.3 From 126d62edd18f22ff9e744efea81e0383cd0a19c5 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Thu, 24 Sep 2009 20:03:21 -0700 Subject: i915: Fix GetBufferSubData in the case of a system-memory BO. Bug #23760 (crashes in wine) --- src/mesa/drivers/dri/intel/intel_buffer_objects.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'src/mesa/drivers/dri') diff --git a/src/mesa/drivers/dri/intel/intel_buffer_objects.c b/src/mesa/drivers/dri/intel/intel_buffer_objects.c index 2e6b77824df..db0de0343a8 100644 --- a/src/mesa/drivers/dri/intel/intel_buffer_objects.c +++ b/src/mesa/drivers/dri/intel/intel_buffer_objects.c @@ -209,7 +209,10 @@ intel_bufferobj_get_subdata(GLcontext * ctx, struct intel_buffer_object *intel_obj = intel_buffer_object(obj); assert(intel_obj); - dri_bo_get_subdata(intel_obj->buffer, offset, size, data); + if (intel_obj->sys_buffer) + memcpy(data, (char *)intel_obj->sys_buffer + offset, size); + else + dri_bo_get_subdata(intel_obj->buffer, offset, size, data); } -- cgit v1.2.3 From 02b81187dcf606ebf064ac23888e5c57d0528edf Mon Sep 17 00:00:00 2001 From: Michal Krol Date: Fri, 25 Sep 2009 18:51:55 +0200 Subject: radeon: Fix newlines. --- src/mesa/drivers/dri/r600/r700_chip.c | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) (limited to 'src/mesa/drivers/dri') diff --git a/src/mesa/drivers/dri/r600/r700_chip.c b/src/mesa/drivers/dri/r600/r700_chip.c index e3b8a4081a5..3b7f6fffe03 100644 --- a/src/mesa/drivers/dri/r600/r700_chip.c +++ b/src/mesa/drivers/dri/r600/r700_chip.c @@ -255,20 +255,20 @@ static void r700SetupVTXConstants2(GLcontext * ctx, SQ_VTX_CONSTANT_WORD2_0__DATA_FORMAT_shift, SQ_VTX_CONSTANT_WORD2_0__DATA_FORMAT_mask); /* TODO : trace back api for initial data type, not only GL_FLOAT */ - if(GL_TRUE == pStreamDesc->normalize) - { - SETfield(uSQ_VTX_CONSTANT_WORD2_0, SQ_NUM_FORMAT_NORM, - SQ_VTX_CONSTANT_WORD2_0__NUM_FORMAT_ALL_shift, SQ_VTX_CONSTANT_WORD2_0__NUM_FORMAT_ALL_mask); - } - //else - //{ - // SETfield(uSQ_VTX_CONSTANT_WORD2_0, SQ_NUM_FORMAT_INT, - // SQ_VTX_CONSTANT_WORD2_0__NUM_FORMAT_ALL_shift, SQ_VTX_CONSTANT_WORD2_0__NUM_FORMAT_ALL_mask); - //} - - if(1 == pStreamDesc->_signed) - { - SETbit(uSQ_VTX_CONSTANT_WORD2_0, SQ_VTX_CONSTANT_WORD2_0__FORMAT_COMP_ALL_bit); + if(GL_TRUE == pStreamDesc->normalize) + { + SETfield(uSQ_VTX_CONSTANT_WORD2_0, SQ_NUM_FORMAT_NORM, + SQ_VTX_CONSTANT_WORD2_0__NUM_FORMAT_ALL_shift, SQ_VTX_CONSTANT_WORD2_0__NUM_FORMAT_ALL_mask); + } + //else + //{ + // SETfield(uSQ_VTX_CONSTANT_WORD2_0, SQ_NUM_FORMAT_INT, + // SQ_VTX_CONSTANT_WORD2_0__NUM_FORMAT_ALL_shift, SQ_VTX_CONSTANT_WORD2_0__NUM_FORMAT_ALL_mask); + //} + + if(1 == pStreamDesc->_signed) + { + SETbit(uSQ_VTX_CONSTANT_WORD2_0, SQ_VTX_CONSTANT_WORD2_0__FORMAT_COMP_ALL_bit); } SETfield(uSQ_VTX_CONSTANT_WORD3_0, 1, MEM_REQUEST_SIZE_shift, MEM_REQUEST_SIZE_mask); -- cgit v1.2.3 From 151e0c0aeaa78f4eb6a87d2b3dd86b4807db1523 Mon Sep 17 00:00:00 2001 From: Michel Dänzer Date: Fri, 25 Sep 2009 20:59:44 +0200 Subject: intel: Handle GL_RGB8 for glCopyTex(Sub)Image. Avoids an unnecessary fallback. --- src/mesa/drivers/dri/intel/intel_tex_copy.c | 1 + 1 file changed, 1 insertion(+) (limited to 'src/mesa/drivers/dri') diff --git a/src/mesa/drivers/dri/intel/intel_tex_copy.c b/src/mesa/drivers/dri/intel/intel_tex_copy.c index 028b49c14d3..74f7f58bbe4 100644 --- a/src/mesa/drivers/dri/intel/intel_tex_copy.c +++ b/src/mesa/drivers/dri/intel/intel_tex_copy.c @@ -74,6 +74,7 @@ get_teximage_source(struct intel_context *intel, GLenum internalFormat) case GL_RGBA: case GL_RGBA8: case GL_RGB: + case GL_RGB8: return intel_readbuf_region(intel); default: return NULL; -- cgit v1.2.3 From eea30906de37ea3b2f8a594c2b33b643d3dde987 Mon Sep 17 00:00:00 2001 From: Richard Li Date: Sun, 27 Sep 2009 14:47:12 -0400 Subject: r600 : Enable draw_prim. --- src/mesa/drivers/dri/r600/r700_render.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'src/mesa/drivers/dri') diff --git a/src/mesa/drivers/dri/r600/r700_render.c b/src/mesa/drivers/dri/r600/r700_render.c index 5627984cf96..4949bf013d9 100644 --- a/src/mesa/drivers/dri/r600/r700_render.c +++ b/src/mesa/drivers/dri/r600/r700_render.c @@ -1147,9 +1147,7 @@ void r700InitDraw(GLcontext *ctx) struct vbo_context *vbo = vbo_context(ctx); /* to be enabled */ - /* vbo->draw_prims = r700DrawPrims; - */ } -- cgit v1.2.3 From 8b23755ce978247a92c00e390de2e459c0a9d5ad Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Mon, 21 Sep 2009 17:13:31 -0700 Subject: intel: Remove some dead metaops code. --- src/mesa/drivers/dri/i965/brw_context.h | 4 --- src/mesa/drivers/dri/i965/brw_tex.c | 32 ------------------------ src/mesa/drivers/dri/i965/brw_vtbl.c | 2 -- src/mesa/drivers/dri/i965/brw_wm_surface_state.c | 13 ++-------- src/mesa/drivers/dri/intel/intel_context.h | 3 --- src/mesa/drivers/dri/intel/intel_pixel.c | 14 ----------- src/mesa/drivers/dri/intel/intel_pixel.h | 2 -- 7 files changed, 2 insertions(+), 68 deletions(-) (limited to 'src/mesa/drivers/dri') diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index a5209ac41be..fa3e32c7ff1 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -705,10 +705,6 @@ void brw_debug_batch(struct intel_context *intel); /*====================================================================== * brw_tex.c */ -void brwUpdateTextureState( struct intel_context *intel ); -void brw_FrameBufferTexInit( struct brw_context *brw, - struct intel_region *region ); -void brw_FrameBufferTexDestroy( struct brw_context *brw ); void brw_validate_textures( struct brw_context *brw ); diff --git a/src/mesa/drivers/dri/i965/brw_tex.c b/src/mesa/drivers/dri/i965/brw_tex.c index 71bff166dda..e911b105b23 100644 --- a/src/mesa/drivers/dri/i965/brw_tex.c +++ b/src/mesa/drivers/dri/i965/brw_tex.c @@ -39,38 +39,6 @@ #include "intel_tex.h" #include "brw_context.h" - -void brw_FrameBufferTexInit( struct brw_context *brw, - struct intel_region *region ) -{ - struct intel_context *intel = &brw->intel; - GLcontext *ctx = &intel->ctx; - struct gl_texture_object *obj; - struct gl_texture_image *img; - - intel->frame_buffer_texobj = obj = - ctx->Driver.NewTextureObject( ctx, (GLuint) -1, GL_TEXTURE_2D ); - - obj->MinFilter = GL_NEAREST; - obj->MagFilter = GL_NEAREST; - - img = ctx->Driver.NewTextureImage( ctx ); - - _mesa_init_teximage_fields( ctx, GL_TEXTURE_2D, img, - region->pitch, region->height, 1, 0, - region->cpp == 4 ? GL_RGBA : GL_RGB ); - - _mesa_set_tex_image( obj, GL_TEXTURE_2D, 0, img ); -} - -void brw_FrameBufferTexDestroy( struct brw_context *brw ) -{ - if (brw->intel.frame_buffer_texobj != NULL) - brw->intel.ctx.Driver.DeleteTexture( &brw->intel.ctx, - brw->intel.frame_buffer_texobj ); - brw->intel.frame_buffer_texobj = NULL; -} - /** * Finalizes all textures, completing any rendering that needs to be done * to prepare them. diff --git a/src/mesa/drivers/dri/i965/brw_vtbl.c b/src/mesa/drivers/dri/i965/brw_vtbl.c index ac117901515..124fde25fe0 100644 --- a/src/mesa/drivers/dri/i965/brw_vtbl.c +++ b/src/mesa/drivers/dri/i965/brw_vtbl.c @@ -69,8 +69,6 @@ static void brw_destroy_context( struct intel_context *intel ) _mesa_free(brw->wm.compile_data); - brw_FrameBufferTexDestroy( brw ); - for (i = 0; i < brw->state.nr_color_regions; i++) intel_region_release(&brw->state.color_regions[i]); brw->state.nr_color_regions = 0; diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c index 51539ac1e73..4f651170c60 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c +++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c @@ -724,17 +724,8 @@ static void prepare_wm_surfaces(struct brw_context *brw ) /* _NEW_TEXTURE, BRW_NEW_TEXDATA */ if (texUnit->_ReallyEnabled) { - if (texUnit->_Current == intel->frame_buffer_texobj) { - /* render to texture */ - dri_bo_unreference(brw->wm.surf_bo[surf]); - brw->wm.surf_bo[surf] = brw->wm.surf_bo[0]; - dri_bo_reference(brw->wm.surf_bo[surf]); - brw->wm.nr_surfaces = surf + 1; - } else { - /* regular texture */ - brw_update_texture_surface(ctx, i); - brw->wm.nr_surfaces = surf + 1; - } + brw_update_texture_surface(ctx, i); + brw->wm.nr_surfaces = surf + 1; } else { dri_bo_unreference(brw->wm.surf_bo[surf]); brw->wm.surf_bo[surf] = NULL; diff --git a/src/mesa/drivers/dri/intel/intel_context.h b/src/mesa/drivers/dri/intel/intel_context.h index 03e7cf39d68..c743ab1c246 100644 --- a/src/mesa/drivers/dri/intel/intel_context.h +++ b/src/mesa/drivers/dri/intel/intel_context.h @@ -254,9 +254,6 @@ struct intel_context intel_line_func draw_line; intel_tri_func draw_tri; - /* These refer to the current drawing buffer: - */ - struct gl_texture_object *frame_buffer_texobj; /** * Set to true if a single constant cliprect should be used in the * batchbuffer. Otherwise, cliprects must be calculated at batchbuffer diff --git a/src/mesa/drivers/dri/intel/intel_pixel.c b/src/mesa/drivers/dri/intel/intel_pixel.c index a3001416559..993e427a992 100644 --- a/src/mesa/drivers/dri/intel/intel_pixel.c +++ b/src/mesa/drivers/dri/intel/intel_pixel.c @@ -129,20 +129,6 @@ intel_check_blit_fragment_ops(GLcontext * ctx, GLboolean src_alpha_is_one) return GL_TRUE; } - -GLboolean -intel_check_meta_tex_fragment_ops(GLcontext * ctx) -{ - if (ctx->NewState) - _mesa_update_state(ctx); - - /* Some of _ImageTransferState (scale, bias) could be done with - * fragment programs on i915. - */ - return !(ctx->_ImageTransferState || ctx->Fog.Enabled || /* not done yet */ - ctx->Texture._EnabledUnits || ctx->FragmentProgram._Enabled); -} - /* The intel_region struct doesn't really do enough to capture the * format of the pixels in the region. For now this code assumes that * the region is a display surface and hence is either ARGB8888 or diff --git a/src/mesa/drivers/dri/intel/intel_pixel.h b/src/mesa/drivers/dri/intel/intel_pixel.h index 96a6dd17b25..743b6497c52 100644 --- a/src/mesa/drivers/dri/intel/intel_pixel.h +++ b/src/mesa/drivers/dri/intel/intel_pixel.h @@ -34,8 +34,6 @@ void intelInitPixelFuncs(struct dd_function_table *functions); GLboolean intel_check_blit_fragment_ops(GLcontext * ctx, GLboolean src_alpha_is_one); -GLboolean intel_check_meta_tex_fragment_ops(GLcontext * ctx); - GLboolean intel_check_blit_format(struct intel_region *region, GLenum format, GLenum type); -- cgit v1.2.3 From e885cb48a0b9292b3df9204f1c2783bf1fe29a28 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Mon, 28 Sep 2009 11:42:31 -0700 Subject: intel: Drop my generatemipmap code in favor of the new shared code. --- src/mesa/drivers/common/driverfuncs.c | 2 +- src/mesa/drivers/dri/i915/Makefile | 1 - src/mesa/drivers/dri/i915/intel_generatemipmap.c | 1 - src/mesa/drivers/dri/i965/Makefile | 1 - src/mesa/drivers/dri/i965/intel_generatemipmap.c | 1 - src/mesa/drivers/dri/intel/intel_generatemipmap.c | 300 ---------------------- src/mesa/drivers/dri/intel/intel_tex.c | 1 - src/mesa/drivers/dri/intel/intel_tex.h | 3 - 8 files changed, 1 insertion(+), 309 deletions(-) delete mode 120000 src/mesa/drivers/dri/i915/intel_generatemipmap.c delete mode 120000 src/mesa/drivers/dri/i965/intel_generatemipmap.c delete mode 100644 src/mesa/drivers/dri/intel/intel_generatemipmap.c (limited to 'src/mesa/drivers/dri') diff --git a/src/mesa/drivers/common/driverfuncs.c b/src/mesa/drivers/common/driverfuncs.c index f09106b77c3..0f8447cb70f 100644 --- a/src/mesa/drivers/common/driverfuncs.c +++ b/src/mesa/drivers/common/driverfuncs.c @@ -106,7 +106,7 @@ _mesa_init_driver_functions(struct dd_function_table *driver) driver->CopyTexSubImage1D = _mesa_meta_CopyTexSubImage1D; driver->CopyTexSubImage2D = _mesa_meta_CopyTexSubImage2D; driver->CopyTexSubImage3D = _mesa_meta_CopyTexSubImage3D; - driver->GenerateMipmap = _mesa_generate_mipmap; + driver->GenerateMipmap = _mesa_meta_GenerateMipmap; driver->TestProxyTexImage = _mesa_test_proxy_teximage; driver->CompressedTexImage1D = _mesa_store_compressed_teximage1d; driver->CompressedTexImage2D = _mesa_store_compressed_teximage2d; diff --git a/src/mesa/drivers/dri/i915/Makefile b/src/mesa/drivers/dri/i915/Makefile index 9d049dea8fe..393312e7328 100644 --- a/src/mesa/drivers/dri/i915/Makefile +++ b/src/mesa/drivers/dri/i915/Makefile @@ -19,7 +19,6 @@ DRIVER_SOURCES = \ intel_batchbuffer.c \ intel_clear.c \ intel_extensions.c \ - intel_generatemipmap.c \ intel_mipmap_tree.c \ intel_tex_layout.c \ intel_tex_image.c \ diff --git a/src/mesa/drivers/dri/i915/intel_generatemipmap.c b/src/mesa/drivers/dri/i915/intel_generatemipmap.c deleted file mode 120000 index 4c6b37ada01..00000000000 --- a/src/mesa/drivers/dri/i915/intel_generatemipmap.c +++ /dev/null @@ -1 +0,0 @@ -../intel/intel_generatemipmap.c \ No newline at end of file diff --git a/src/mesa/drivers/dri/i965/Makefile b/src/mesa/drivers/dri/i965/Makefile index 6e9a9a29a39..57dcc915869 100644 --- a/src/mesa/drivers/dri/i965/Makefile +++ b/src/mesa/drivers/dri/i965/Makefile @@ -14,7 +14,6 @@ DRIVER_SOURCES = \ intel_decode.c \ intel_extensions.c \ intel_fbo.c \ - intel_generatemipmap.c \ intel_mipmap_tree.c \ intel_regions.c \ intel_screen.c \ diff --git a/src/mesa/drivers/dri/i965/intel_generatemipmap.c b/src/mesa/drivers/dri/i965/intel_generatemipmap.c deleted file mode 120000 index 4c6b37ada01..00000000000 --- a/src/mesa/drivers/dri/i965/intel_generatemipmap.c +++ /dev/null @@ -1 +0,0 @@ -../intel/intel_generatemipmap.c \ No newline at end of file diff --git a/src/mesa/drivers/dri/intel/intel_generatemipmap.c b/src/mesa/drivers/dri/intel/intel_generatemipmap.c deleted file mode 100644 index 5958b36c971..00000000000 --- a/src/mesa/drivers/dri/intel/intel_generatemipmap.c +++ /dev/null @@ -1,300 +0,0 @@ -/* - * Copyright (C) 1999-2007 Brian Paul All Rights Reserved. - * Copyright © 2009 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - * - * Authors: - * Eric Anholt - * - */ - -#include "main/glheader.h" -#include "main/enums.h" -#include "main/image.h" -#include "main/mtypes.h" -#include "main/macros.h" -#include "main/bufferobj.h" -#include "main/teximage.h" -#include "main/texenv.h" -#include "main/texobj.h" -#include "main/texstate.h" -#include "main/texparam.h" -#include "main/varray.h" -#include "main/attrib.h" -#include "main/enable.h" -#include "main/buffers.h" -#include "main/fbobject.h" -#include "main/framebuffer.h" -#include "main/renderbuffer.h" -#include "main/depth.h" -#include "main/hash.h" -#include "main/mipmap.h" -#include "main/blend.h" -#include "glapi/dispatch.h" -#include "swrast/swrast.h" - -#include "intel_screen.h" -#include "intel_context.h" -#include "intel_batchbuffer.h" -#include "intel_pixel.h" -#include "intel_tex.h" -#include "intel_mipmap_tree.h" - -static const char *intel_fp_tex2d = - "!!ARBfp1.0\n" - "TEX result.color, fragment.texcoord[0], texture[0], 2D;\n" - "END\n"; - -static GLboolean -intel_generate_mipmap_level(GLcontext *ctx, GLuint tex_name, - int level, int width, int height) -{ - struct intel_context *intel = intel_context(ctx); - GLfloat vertices[4][2]; - GLint status; - - /* Set to source from the previous level */ - _mesa_TexParameterf(GL_TEXTURE_2D, GL_TEXTURE_BASE_LEVEL, level - 1); - _mesa_TexParameterf(GL_TEXTURE_2D, GL_TEXTURE_MAX_LEVEL, level - 1); - - /* Set to draw into the current level */ - _mesa_FramebufferTexture2DEXT(GL_FRAMEBUFFER_EXT, - GL_COLOR_ATTACHMENT0_EXT, - GL_TEXTURE_2D, - tex_name, - level); - /* Choose to render to the color attachment. */ - _mesa_DrawBuffer(GL_COLOR_ATTACHMENT0_EXT); - - status = _mesa_CheckFramebufferStatusEXT (GL_FRAMEBUFFER_EXT); - if (status != GL_FRAMEBUFFER_COMPLETE_EXT) - return GL_FALSE; - - meta_set_passthrough_transform(&intel->meta); - - /* XXX: Doing it right would involve setting up the transformation to do - * 0-1 mapping or something, and not changing the vertex data. - */ - vertices[0][0] = 0; - vertices[0][1] = 0; - vertices[1][0] = width; - vertices[1][1] = 0; - vertices[2][0] = width; - vertices[2][1] = height; - vertices[3][0] = 0; - vertices[3][1] = height; - - _mesa_VertexPointer(2, GL_FLOAT, 2 * sizeof(GLfloat), &vertices); - _mesa_Enable(GL_VERTEX_ARRAY); - meta_set_default_texrect(&intel->meta); - - _mesa_DrawArrays(GL_TRIANGLE_FAN, 0, 4); - - meta_restore_texcoords(&intel->meta); - meta_restore_transform(&intel->meta); - - return GL_TRUE; -} - -static GLboolean -intel_generate_mipmap_2d(GLcontext *ctx, - GLenum target, - struct gl_texture_object *texObj) -{ - struct intel_context *intel = intel_context(ctx); - GLint old_active_texture; - int level, max_levels, start_level, end_level; - GLuint fb_name; - GLboolean success = GL_FALSE; - struct gl_framebuffer *saved_fbo = NULL; - struct gl_buffer_object *saved_array_buffer = NULL; - struct gl_buffer_object *saved_element_buffer = NULL; - - _mesa_PushAttrib(GL_ENABLE_BIT | GL_TEXTURE_BIT | - GL_CURRENT_BIT | GL_COLOR_BUFFER_BIT | - GL_DEPTH_BUFFER_BIT); - _mesa_PushClientAttrib(GL_CLIENT_VERTEX_ARRAY_BIT); - old_active_texture = ctx->Texture.CurrentUnit; - _mesa_reference_framebuffer(&saved_fbo, ctx->DrawBuffer); - - /* use default array/index buffers */ - _mesa_reference_buffer_object(ctx, &saved_array_buffer, - ctx->Array.ArrayBufferObj); - _mesa_reference_buffer_object(ctx, &ctx->Array.ArrayBufferObj, - ctx->Shared->NullBufferObj); - _mesa_reference_buffer_object(ctx, &saved_element_buffer, - ctx->Array.ElementArrayBufferObj); - _mesa_reference_buffer_object(ctx, &ctx->Array.ElementArrayBufferObj, - ctx->Shared->NullBufferObj); - - _mesa_Disable(GL_POLYGON_STIPPLE); - _mesa_Disable(GL_DEPTH_TEST); - _mesa_Disable(GL_STENCIL_TEST); - _mesa_ColorMask(GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE); - _mesa_DepthMask(GL_FALSE); - - /* Bind the given texture to GL_TEXTURE_2D with linear filtering for our - * minification. - */ - _mesa_ActiveTextureARB(GL_TEXTURE0_ARB); - _mesa_Enable(GL_TEXTURE_2D); - _mesa_BindTexture(GL_TEXTURE_2D, texObj->Name); - _mesa_TexParameterf(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, - GL_LINEAR_MIPMAP_NEAREST); - _mesa_TexParameterf(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR); - - /* Bind the new renderbuffer to the color attachment point. */ - _mesa_GenFramebuffersEXT(1, &fb_name); - _mesa_BindFramebufferEXT(GL_FRAMEBUFFER_EXT, fb_name); - - meta_set_fragment_program(&intel->meta, &intel->meta.tex2d_fp, - intel_fp_tex2d); - meta_set_passthrough_vertex_program(&intel->meta); - - max_levels = _mesa_max_texture_levels(ctx, texObj->Target); - start_level = texObj->BaseLevel; - end_level = texObj->MaxLevel; - - /* Loop generating level+1 from level. */ - for (level = start_level; level < end_level && level < max_levels - 1; level++) { - const struct gl_texture_image *srcImage; - int width, height; - - srcImage = _mesa_select_tex_image(ctx, texObj, target, level); - if (srcImage->Border != 0) - goto fail; - - width = srcImage->Width / 2; - if (width < 1) - width = 1; - height = srcImage->Height / 2; - if (height < 1) - height = 1; - - if (width == srcImage->Width && - height == srcImage->Height) { - /* Neither _mesa_max_texture_levels nor texObj->MaxLevel are the - * maximum texture level for the object, so break out when we've gone - * over the edge. - */ - break; - } - - /* Make sure that there's space allocated for the target level. - * We could skip this if there's already space allocated and save some - * time. - */ - _mesa_TexImage2D(GL_TEXTURE_2D, level + 1, srcImage->InternalFormat, - width, height, 0, - GL_RGBA, GL_UNSIGNED_INT, NULL); - - if (!intel_generate_mipmap_level(ctx, texObj->Name, level + 1, - width, height)) - goto fail; - } - - success = GL_TRUE; - -fail: - meta_restore_fragment_program(&intel->meta); - meta_restore_vertex_program(&intel->meta); - - /* restore array/index buffers */ - _mesa_reference_buffer_object(ctx, &ctx->Array.ArrayBufferObj, - saved_array_buffer); - _mesa_reference_buffer_object(ctx, &saved_array_buffer, NULL); - _mesa_reference_buffer_object(ctx, &ctx->Array.ElementArrayBufferObj, - saved_element_buffer); - _mesa_reference_buffer_object(ctx, &saved_element_buffer, NULL); - - - _mesa_DeleteFramebuffersEXT(1, &fb_name); - _mesa_ActiveTextureARB(GL_TEXTURE0_ARB + old_active_texture); - if (saved_fbo) - _mesa_BindFramebufferEXT(GL_FRAMEBUFFER_EXT, saved_fbo->Name); - _mesa_reference_framebuffer(&saved_fbo, NULL); - _mesa_PopClientAttrib(); - _mesa_PopAttrib(); - - return success; -} - - -/** - * Generate new mipmap data from BASE+1 to BASE+p (the minimally-sized mipmap - * level). - * - * The texture object's miptree must be mapped. - * - * This function should also include an accelerated path. - */ -void -intel_generate_mipmap(GLcontext *ctx, GLenum target, - struct gl_texture_object *texObj) -{ - struct intel_context *intel = intel_context(ctx); - struct intel_texture_object *intelObj = intel_texture_object(texObj); - GLuint nr_faces = (intelObj->base.Target == GL_TEXTURE_CUBE_MAP) ? 6 : 1; - int face, i; - - /* HW path */ - if (target == GL_TEXTURE_2D && - ctx->Extensions.EXT_framebuffer_object && - ctx->Extensions.ARB_fragment_program && - ctx->Extensions.ARB_vertex_program) { - GLboolean success; - - /* We'll be accessing this texture using GL entrypoints, which should - * be resilient against other access to this texture. - */ - _mesa_unlock_texture(ctx, texObj); - success = intel_generate_mipmap_2d(ctx, target, texObj); - _mesa_lock_texture(ctx, texObj); - - if (success) - return; - } - - /* SW path */ - intel_tex_map_level_images(intel, intelObj, texObj->BaseLevel); - _mesa_generate_mipmap(ctx, target, texObj); - intel_tex_unmap_level_images(intel, intelObj, texObj->BaseLevel); - - /* Update the level information in our private data in the new images, since - * it didn't get set as part of a normal TexImage path. - */ - for (face = 0; face < nr_faces; face++) { - for (i = texObj->BaseLevel + 1; i < texObj->MaxLevel; i++) { - struct intel_texture_image *intelImage; - - intelImage = intel_texture_image(texObj->Image[face][i]); - if (intelImage == NULL) - break; - - intelImage->level = i; - intelImage->face = face; - /* Unreference the miptree to signal that the new Data is a bare - * pointer from mesa. - */ - intel_miptree_release(intel, &intelImage->mt); - } - } -} diff --git a/src/mesa/drivers/dri/intel/intel_tex.c b/src/mesa/drivers/dri/intel/intel_tex.c index df63f29a42c..f5d877edc36 100644 --- a/src/mesa/drivers/dri/intel/intel_tex.c +++ b/src/mesa/drivers/dri/intel/intel_tex.c @@ -162,7 +162,6 @@ void intelInitTextureFuncs(struct dd_function_table *functions) { functions->ChooseTextureFormat = intelChooseTextureFormat; - functions->GenerateMipmap = intel_generate_mipmap; functions->NewTextureObject = intelNewTextureObject; functions->NewTextureImage = intelNewTextureImage; diff --git a/src/mesa/drivers/dri/intel/intel_tex.h b/src/mesa/drivers/dri/intel/intel_tex.h index 471aa2a240b..57ed0b1aabd 100644 --- a/src/mesa/drivers/dri/intel/intel_tex.h +++ b/src/mesa/drivers/dri/intel/intel_tex.h @@ -71,7 +71,4 @@ void intel_tex_unmap_images(struct intel_context *intel, int intel_compressed_num_bytes(GLuint mesaFormat); -void intel_generate_mipmap(GLcontext *ctx, GLenum target, - struct gl_texture_object *texObj); - #endif -- cgit v1.2.3 From a230ad2bc440e9d332482ea453e7ab7f4a5b8bd2 Mon Sep 17 00:00:00 2001 From: Andre Maasikas Date: Tue, 29 Sep 2009 09:46:29 +0300 Subject: r600: clear position enable bit when when wpos is not used by FP Makes doom3 alot nicer.. --- src/mesa/drivers/dri/r600/r700_fragprog.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'src/mesa/drivers/dri') diff --git a/src/mesa/drivers/dri/r600/r700_fragprog.c b/src/mesa/drivers/dri/r600/r700_fragprog.c index 78ce3ae4366..62a1ea1a22a 100644 --- a/src/mesa/drivers/dri/r600/r700_fragprog.c +++ b/src/mesa/drivers/dri/r600/r700_fragprog.c @@ -341,6 +341,11 @@ GLboolean r700SetupFragmentProgram(GLcontext * ctx) SETbit(r700->SPI_PS_IN_CONTROL_0.u32All, POSITION_ENA_bit); SETbit(r700->SPI_INPUT_Z.u32All, PROVIDE_Z_TO_SPI_bit); } + else + { + CLEARbit(r700->SPI_PS_IN_CONTROL_0.u32All, POSITION_ENA_bit); + CLEARbit(r700->SPI_INPUT_Z.u32All, PROVIDE_Z_TO_SPI_bit); + } ui = (unNumOfReg < ui) ? ui : unNumOfReg; -- cgit v1.2.3 From 7c5f3c3d8a63b0feee154092153e958fa4f24abd Mon Sep 17 00:00:00 2001 From: Andre Maasikas Date: Mon, 28 Sep 2009 10:42:35 +0300 Subject: r600: user correct alpha blend factor Signed-off-by: Dave Airlie --- src/mesa/drivers/dri/r600/r700_state.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src/mesa/drivers/dri') diff --git a/src/mesa/drivers/dri/r600/r700_state.c b/src/mesa/drivers/dri/r600/r700_state.c index fbff109455d..3ad6d74f536 100644 --- a/src/mesa/drivers/dri/r600/r700_state.c +++ b/src/mesa/drivers/dri/r600/r700_state.c @@ -493,10 +493,10 @@ static void r700SetBlendState(GLcontext * ctx) eqn, COLOR_COMB_FCN_shift, COLOR_COMB_FCN_mask); SETfield(blend_reg, - blend_factor(ctx->Color.BlendSrcRGB, GL_TRUE), + blend_factor(ctx->Color.BlendSrcA, GL_TRUE), ALPHA_SRCBLEND_shift, ALPHA_SRCBLEND_mask); SETfield(blend_reg, - blend_factor(ctx->Color.BlendDstRGB, GL_FALSE), + blend_factor(ctx->Color.BlendDstA, GL_FALSE), ALPHA_DESTBLEND_shift, ALPHA_DESTBLEND_mask); switch (ctx->Color.BlendEquationA) { -- cgit v1.2.3 From ac9c8b6359be770f1ed3e97100c497bd91338874 Mon Sep 17 00:00:00 2001 From: Andre Maasikas Date: Mon, 28 Sep 2009 11:23:49 +0300 Subject: r600: use CB_TARGET_MASK instead of CB_SHADER_MASK for setting color mask makes blend functions work better Signed-off-by: Dave Airlie --- src/mesa/drivers/dri/r600/r700_state.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'src/mesa/drivers/dri') diff --git a/src/mesa/drivers/dri/r600/r700_state.c b/src/mesa/drivers/dri/r600/r700_state.c index 3ad6d74f536..7e8b48f91eb 100644 --- a/src/mesa/drivers/dri/r600/r700_state.c +++ b/src/mesa/drivers/dri/r600/r700_state.c @@ -771,9 +771,9 @@ static void r700ColorMask(GLcontext * ctx, (b ? 4 : 0) | (a ? 8 : 0)); - if (mask != r700->CB_SHADER_MASK.u32All) { + if (mask != r700->CB_TARGET_MASK.u32All) { R600_STATECHANGE(context, cb); - SETfield(r700->CB_SHADER_MASK.u32All, mask, OUTPUT0_ENABLE_shift, OUTPUT0_ENABLE_mask); + SETfield(r700->CB_TARGET_MASK.u32All, mask, TARGET0_ENABLE_shift, TARGET0_ENABLE_mask); } } @@ -1780,7 +1780,7 @@ void r700InitState(GLcontext * ctx) //------------------- r700->CB_CLRCMP_MSK.u32All = 0xFFFFFFFF; /* screen/window/view */ - SETfield(r700->CB_TARGET_MASK.u32All, 0xF, (4 * id), TARGET0_ENABLE_mask); + SETfield(r700->CB_SHADER_MASK.u32All, 0xF, (4 * id), OUTPUT0_ENABLE_mask); context->radeon.hw.all_dirty = GL_TRUE; -- cgit v1.2.3 From 49fbdd18ed738feaf73b7faba4d3577cd9cc3e59 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Thu, 12 Feb 2009 03:54:58 -0800 Subject: i965: Fix massive memory allocation for streaming texture usage. Once we've freed a miptree, we won't see any more state cache requests that would hit the things that pointed at it until we've let the miptree get released back into the BO cache to be reused. By leaving those surface state and binding table pointers that pointed at it around, we would end up with up to (500 * texture size) in memory uselessly consumed by the state cache. Bug #20057 Bug #23530 --- src/mesa/drivers/dri/i965/brw_state.h | 1 + src/mesa/drivers/dri/i965/brw_state_cache.c | 49 ++++++++++++++++++++++++++ src/mesa/drivers/dri/intel/intel_mipmap_tree.c | 16 +++++++++ 3 files changed, 66 insertions(+) (limited to 'src/mesa/drivers/dri') diff --git a/src/mesa/drivers/dri/i965/brw_state.h b/src/mesa/drivers/dri/i965/brw_state.h index 5335eac8951..d639656b9d4 100644 --- a/src/mesa/drivers/dri/i965/brw_state.h +++ b/src/mesa/drivers/dri/i965/brw_state.h @@ -151,6 +151,7 @@ void brw_state_cache_check_size( struct brw_context *brw ); void brw_init_caches( struct brw_context *brw ); void brw_destroy_caches( struct brw_context *brw ); +void brw_state_cache_bo_delete(struct brw_cache *cache, dri_bo *bo); /*********************************************************************** * brw_state_batch.c diff --git a/src/mesa/drivers/dri/i965/brw_state_cache.c b/src/mesa/drivers/dri/i965/brw_state_cache.c index e40d7a04164..f8e46aacf71 100644 --- a/src/mesa/drivers/dri/i965/brw_state_cache.c +++ b/src/mesa/drivers/dri/i965/brw_state_cache.c @@ -517,6 +517,55 @@ brw_clear_cache(struct brw_context *brw, struct brw_cache *cache) brw->state.dirty.cache |= ~0; } +/* Clear all entries from the cache that point to the given bo. + * + * This lets us release memory for reuse earlier for known-dead buffers, + * at the cost of walking the entire hash table. + */ +void +brw_state_cache_bo_delete(struct brw_cache *cache, dri_bo *bo) +{ + struct brw_cache_item **prev; + GLuint i; + + if (INTEL_DEBUG & DEBUG_STATE) + _mesa_printf("%s\n", __FUNCTION__); + + for (i = 0; i < cache->size; i++) { + for (prev = &cache->items[i]; *prev;) { + struct brw_cache_item *c = *prev; + int j; + + for (j = 0; j < c->nr_reloc_bufs; j++) { + if (c->reloc_bufs[j] == bo) + break; + } + + if (j != c->nr_reloc_bufs) { + + *prev = c->next; + + for (j = 0; j < c->nr_reloc_bufs; j++) + dri_bo_unreference(c->reloc_bufs[j]); + dri_bo_unreference(c->bo); + free((void *)c->key); + free(c); + cache->n_items--; + + /* Delete up the tree. Notably we're trying to get from + * a request to delete the surface, to deleting the surface state + * object, to deleting the binding table. We're slack and restart + * the deletion process when we do this because the other delete + * may kill our *prev. + */ + brw_state_cache_bo_delete(cache, c->bo); + prev = &cache->items[i]; + } else { + prev = &(*prev)->next; + } + } + } +} void brw_state_cache_check_size(struct brw_context *brw) diff --git a/src/mesa/drivers/dri/intel/intel_mipmap_tree.c b/src/mesa/drivers/dri/intel/intel_mipmap_tree.c index c985da5aa25..4f5101a3128 100644 --- a/src/mesa/drivers/dri/intel/intel_mipmap_tree.c +++ b/src/mesa/drivers/dri/intel/intel_mipmap_tree.c @@ -29,6 +29,9 @@ #include "intel_mipmap_tree.h" #include "intel_regions.h" #include "intel_chipset.h" +#ifndef I915 +#include "brw_state.h" +#endif #include "main/enums.h" #define FILE_DEBUG_FLAG DEBUG_MIPTREE @@ -269,6 +272,19 @@ intel_miptree_release(struct intel_context *intel, DBG("%s deleting %p\n", __FUNCTION__, *mt); +#ifndef I915 + /* Free up cached binding tables holding a reference on our buffer, to + * avoid excessive memory consumption. + * + * This isn't as aggressive as we could be, as we'd like to do + * it from any time we free the last ref on a region. But intel_region.c + * is context-agnostic. Perhaps our constant state cache should be, as + * well. + */ + brw_state_cache_bo_delete(&brw_context(&intel->ctx)->surface_cache, + (*mt)->region->buffer); +#endif + intel_region_release(&((*mt)->region)); for (i = 0; i < MAX_TEXTURE_LEVELS; i++) -- cgit v1.2.3 From 994d1db079b4947e6f10ab22a4b366a676382345 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Thu, 30 Jul 2009 12:32:40 -0700 Subject: i915: Let i915_program_error take a format string, and don't use _mesa_problem. It's misleading to report things like the program having too many native instructions as a Mesa implementation error, when the program may just be too big for the hardware. --- src/mesa/drivers/dri/i915/i915_fragprog.c | 17 ++++++++++------- src/mesa/drivers/dri/i915/i915_program.c | 17 +++++++++++++---- src/mesa/drivers/dri/i915/i915_program.h | 2 +- 3 files changed, 24 insertions(+), 12 deletions(-) (limited to 'src/mesa/drivers/dri') diff --git a/src/mesa/drivers/dri/i915/i915_fragprog.c b/src/mesa/drivers/dri/i915/i915_fragprog.c index 2db10c60e99..beed8ef1979 100644 --- a/src/mesa/drivers/dri/i915/i915_fragprog.c +++ b/src/mesa/drivers/dri/i915/i915_fragprog.c @@ -89,7 +89,8 @@ src_vector(struct i915_fragment_program *p, */ case PROGRAM_TEMPORARY: if (source->Index >= I915_MAX_TEMPORARY) { - i915_program_error(p, "Exceeded max temporary reg"); + i915_program_error(p, "Exceeded max temporary reg: %d/%d", + source->Index, I915_MAX_TEMPORARY); return 0; } src = UREG(REG_TYPE_R, source->Index); @@ -124,7 +125,7 @@ src_vector(struct i915_fragment_program *p, break; default: - i915_program_error(p, "Bad source->Index"); + i915_program_error(p, "Bad source->Index: %d", source->Index); return 0; } break; @@ -153,7 +154,7 @@ src_vector(struct i915_fragment_program *p, break; default: - i915_program_error(p, "Bad source->File"); + i915_program_error(p, "Bad source->File: %d", source->File); return 0; } @@ -186,13 +187,14 @@ get_result_vector(struct i915_fragment_program *p, p->depth_written = 1; return UREG(REG_TYPE_OD, 0); default: - i915_program_error(p, "Bad inst->DstReg.Index"); + i915_program_error(p, "Bad inst->DstReg.Index: %d", + inst->DstReg.Index); return 0; } case PROGRAM_TEMPORARY: return UREG(REG_TYPE_R, inst->DstReg.Index); default: - i915_program_error(p, "Bad inst->DstReg.File"); + i915_program_error(p, "Bad inst->DstReg.File: %d", inst->DstReg.File); return 0; } } @@ -231,7 +233,7 @@ translate_tex_src_target(struct i915_fragment_program *p, GLubyte bit) case TEXTURE_CUBE_INDEX: return D0_SAMPLE_TYPE_CUBE; default: - i915_program_error(p, "TexSrcBit"); + i915_program_error(p, "TexSrcBit: %d", bit); return 0; } } @@ -870,7 +872,8 @@ upload_program(struct i915_fragment_program *p) return; default: - i915_program_error(p, "bad opcode"); + i915_program_error(p, "bad opcode: %s", + _mesa_opcode_string(inst->Opcode)); return; } diff --git a/src/mesa/drivers/dri/i915/i915_program.c b/src/mesa/drivers/dri/i915/i915_program.c index e87700f8e0a..85a1b0cf5d9 100644 --- a/src/mesa/drivers/dri/i915/i915_program.c +++ b/src/mesa/drivers/dri/i915/i915_program.c @@ -424,12 +424,21 @@ i915_emit_param4fv(struct i915_fragment_program * p, const GLfloat * values) return 0; } - - +/* Warning the user about program errors seems to be quite valuable, from + * our bug reports. It unfortunately means piglit reporting errors + * when we fall back to software due to an unsupportable program, though. + */ void -i915_program_error(struct i915_fragment_program *p, const char *msg) +i915_program_error(struct i915_fragment_program *p, const char *fmt, ...) { - _mesa_problem(NULL, "i915_program_error: %s", msg); + va_list args; + + fprintf(stderr, "i915_program_error: "); + va_start(args, fmt); + vfprintf(stderr, fmt, args); + va_end(args); + + fprintf(stderr, "\n"); p->error = 1; } diff --git a/src/mesa/drivers/dri/i915/i915_program.h b/src/mesa/drivers/dri/i915/i915_program.h index 14a3f08801f..1074d24f9e4 100644 --- a/src/mesa/drivers/dri/i915/i915_program.h +++ b/src/mesa/drivers/dri/i915/i915_program.h @@ -145,7 +145,7 @@ extern GLuint i915_emit_param4fv(struct i915_fragment_program *p, const GLfloat * values); extern void i915_program_error(struct i915_fragment_program *p, - const char *msg); + const char *fmt, ...); extern void i915_init_program(struct i915_context *i915, struct i915_fragment_program *p); -- cgit v1.2.3 From 4ff816751f74b0645c198372937eec589c458a60 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Wed, 29 Jul 2009 22:39:15 -0700 Subject: i915: Bail when the fragment program has too many total instructions. Previously, we'd go trashing the heap. --- src/mesa/drivers/dri/i915/i915_program.c | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'src/mesa/drivers/dri') diff --git a/src/mesa/drivers/dri/i915/i915_program.c b/src/mesa/drivers/dri/i915/i915_program.c index 85a1b0cf5d9..6ccc9eea3e0 100644 --- a/src/mesa/drivers/dri/i915/i915_program.c +++ b/src/mesa/drivers/dri/i915/i915_program.c @@ -186,6 +186,11 @@ i915_emit_arith(struct i915_fragment_program * p, p->utemp_flag = old_utemp_flag; /* restore */ } + if (p->csr >= p->program + I915_PROGRAM_SIZE) { + i915_program_error(p, "Program contains too many instructions"); + return UREG_BAD; + } + *(p->csr++) = (op | A0_DEST(dest) | mask | saturate | A0_SRC0(src0)); *(p->csr++) = (A1_SRC0(src0) | A1_SRC1(src1)); *(p->csr++) = (A2_SRC1(src1) | A2_SRC2(src2)); @@ -270,6 +275,11 @@ GLuint i915_emit_texld( struct i915_fragment_program *p, p->register_phases[GET_UREG_NR(coord)] == p->nr_tex_indirect) p->nr_tex_indirect++; + if (p->csr >= p->program + I915_PROGRAM_SIZE) { + i915_program_error(p, "Program contains too many instructions"); + return UREG_BAD; + } + *(p->csr++) = (op | T0_DEST( dest ) | T0_SAMPLER( sampler )); -- cgit v1.2.3 From d6fbf87575a59e24c5d47b8b6b8700ee4583709b Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Wed, 29 Jul 2009 22:46:14 -0700 Subject: Revert "i915: don't validate PS program when falling back to software" This reverts commit e7044d552c6d16389447880b8744a51de1cf0199. It prevented the driver from ever recovering from a software fallback due to a program error. The original bug it claimed to fix doesn't appear to exist post-revert. --- src/mesa/drivers/dri/i915/i915_vtbl.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'src/mesa/drivers/dri') diff --git a/src/mesa/drivers/dri/i915/i915_vtbl.c b/src/mesa/drivers/dri/i915/i915_vtbl.c index 9a723d3cd73..9e2523932f1 100644 --- a/src/mesa/drivers/dri/i915/i915_vtbl.c +++ b/src/mesa/drivers/dri/i915/i915_vtbl.c @@ -54,8 +54,7 @@ i915_render_prevalidate(struct intel_context *intel) { struct i915_context *i915 = i915_context(&intel->ctx); - if (!intel->Fallback) - i915ValidateFragmentProgram(i915); + i915ValidateFragmentProgram(i915); } static void -- cgit v1.2.3 From 61b512c47c9888f3ff117faf3aceccfb52d59c3a Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Wed, 29 Jul 2009 23:37:04 -0700 Subject: i915: Update and translate the fragment program along with state updates. Previously, we were doing it in the midst of the pipeline run, which gave an opportunity to enable/disable fallbacks, which is certainly the wrong time to be doing so. This manifested itself in a NULL dereference for PutRow after transitioning out of a fallback during a run_pipeline in glean glsl1. --- src/mesa/drivers/dri/i915/i915_context.c | 3 +++ src/mesa/drivers/dri/i915/i915_fragprog.c | 32 +++++++++++++++++++++---------- src/mesa/drivers/dri/i915/i915_program.c | 2 -- src/mesa/drivers/dri/i915/i915_program.h | 3 +-- 4 files changed, 26 insertions(+), 14 deletions(-) (limited to 'src/mesa/drivers/dri') diff --git a/src/mesa/drivers/dri/i915/i915_context.c b/src/mesa/drivers/dri/i915/i915_context.c index 3ab7d682ee7..b342503772b 100644 --- a/src/mesa/drivers/dri/i915/i915_context.c +++ b/src/mesa/drivers/dri/i915/i915_context.c @@ -40,6 +40,7 @@ #include "utils.h" #include "i915_reg.h" +#include "i915_program.h" #include "intel_regions.h" #include "intel_batchbuffer.h" @@ -80,6 +81,8 @@ i915InvalidateState(GLcontext * ctx, GLuint new_state) i915_update_stencil(ctx); if (new_state & (_NEW_LIGHT)) i915_update_provoking_vertex(ctx); + if (new_state & (_NEW_PROGRAM | _NEW_PROGRAM_CONSTANTS)) + i915_update_program(ctx); } diff --git a/src/mesa/drivers/dri/i915/i915_fragprog.c b/src/mesa/drivers/dri/i915/i915_fragprog.c index beed8ef1979..929a6eb8352 100644 --- a/src/mesa/drivers/dri/i915/i915_fragprog.c +++ b/src/mesa/drivers/dri/i915/i915_fragprog.c @@ -1058,6 +1058,28 @@ i915ProgramStringNotify(GLcontext * ctx, _tnl_program_string(ctx, target, prog); } +void +i915_update_program(GLcontext *ctx) +{ + struct intel_context *intel = intel_context(ctx); + struct i915_context *i915 = i915_context(&intel->ctx); + struct i915_fragment_program *fp = + (struct i915_fragment_program *) ctx->FragmentProgram._Current; + + if (i915->current_program != fp) { + if (i915->current_program) { + i915->current_program->on_hardware = 0; + i915->current_program->params_uptodate = 0; + } + + i915->current_program = fp; + } + + if (!fp->translated) + translate_program(fp); + + FALLBACK(&i915->intel, I915_FALLBACK_PROGRAM, fp->error); +} void i915ValidateFragmentProgram(struct i915_context *i915) @@ -1075,16 +1097,6 @@ i915ValidateFragmentProgram(struct i915_context *i915) GLuint s2 = S2_TEXCOORD_NONE; int i, offset = 0; - if (i915->current_program != p) { - if (i915->current_program) { - i915->current_program->on_hardware = 0; - i915->current_program->params_uptodate = 0; - } - - i915->current_program = p; - } - - /* Important: */ VB->AttribPtr[VERT_ATTRIB_POS] = VB->NdcPtr; diff --git a/src/mesa/drivers/dri/i915/i915_program.c b/src/mesa/drivers/dri/i915/i915_program.c index 6ccc9eea3e0..fd3019b234a 100644 --- a/src/mesa/drivers/dri/i915/i915_program.c +++ b/src/mesa/drivers/dri/i915/i915_program.c @@ -530,8 +530,6 @@ i915_upload_program(struct i915_context *i915, GLuint program_size = p->csr - p->program; GLuint decl_size = p->decl - p->declarations; - FALLBACK(&i915->intel, I915_FALLBACK_PROGRAM, p->error); - /* Could just go straight to the batchbuffer from here: */ if (i915->state.ProgramSize != (program_size + decl_size) || diff --git a/src/mesa/drivers/dri/i915/i915_program.h b/src/mesa/drivers/dri/i915/i915_program.h index 1074d24f9e4..0d17d048653 100644 --- a/src/mesa/drivers/dri/i915/i915_program.h +++ b/src/mesa/drivers/dri/i915/i915_program.h @@ -155,7 +155,6 @@ extern void i915_upload_program(struct i915_context *i915, extern void i915_fini_program(struct i915_fragment_program *p); - - +extern void i915_update_program(GLcontext *ctx); #endif -- cgit v1.2.3 From 96a3c69d48bb7c021181e061d010cca08198ae4c Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Thu, 30 Jul 2009 00:03:21 -0700 Subject: i915: Increase maximum program size to the hardware limits. This fixes potential heap trashing if the program of choice exceeds limits, and fixes the native instructions limit being lower than what can be used by valid programs. --- src/mesa/drivers/dri/i915/i915_context.h | 15 ++++++++++----- src/mesa/drivers/dri/i915/i915_program.c | 8 ++++++-- 2 files changed, 16 insertions(+), 7 deletions(-) (limited to 'src/mesa/drivers/dri') diff --git a/src/mesa/drivers/dri/i915/i915_context.h b/src/mesa/drivers/dri/i915/i915_context.h index 8de4a9d0d36..082d6144425 100644 --- a/src/mesa/drivers/dri/i915/i915_context.h +++ b/src/mesa/drivers/dri/i915/i915_context.h @@ -121,10 +121,14 @@ enum { #define I915_MAX_CONSTANT 32 #define I915_CONSTANT_SIZE (2+(4*I915_MAX_CONSTANT)) +#define I915_MAX_INSN (I915_MAX_DECL_INSN + \ + I915_MAX_TEX_INSN + \ + I915_MAX_ALU_INSN) -#define I915_PROGRAM_SIZE 192 - -#define I915_MAX_INSN (I915_MAX_TEX_INSN+I915_MAX_ALU_INSN) +/* Maximum size of the program packet, which matches the limits on + * decl, tex, and ALU instructions. + */ +#define I915_PROGRAM_SIZE (I915_MAX_INSN * 3 + 1) /* Hardware version of a parsed fragment program. "Derived" from the * mesa fragment_program struct. @@ -154,8 +158,9 @@ struct i915_fragment_program */ GLcontext *ctx; - GLuint declarations[I915_PROGRAM_SIZE]; - GLuint program[I915_PROGRAM_SIZE]; + /* declarations contains the packet header. */ + GLuint declarations[I915_MAX_DECL_INSN * 3 + 1]; + GLuint program[(I915_MAX_TEX_INSN + I915_MAX_ALU_INSN) * 3]; GLfloat constant[I915_MAX_CONSTANT][4]; GLuint constant_flags[I915_MAX_CONSTANT]; diff --git a/src/mesa/drivers/dri/i915/i915_program.c b/src/mesa/drivers/dri/i915/i915_program.c index fd3019b234a..e7908bd48fc 100644 --- a/src/mesa/drivers/dri/i915/i915_program.c +++ b/src/mesa/drivers/dri/i915/i915_program.c @@ -130,6 +130,7 @@ i915_emit_decl(struct i915_fragment_program *p, *(p->decl++) = (D0_DCL | D0_DEST(reg) | d0_flags); *(p->decl++) = D1_MBZ; *(p->decl++) = D2_MBZ; + assert(p->decl <= p->declarations + ARRAY_SIZE(p->declarations)); p->nr_decl_insn++; return reg; @@ -186,7 +187,7 @@ i915_emit_arith(struct i915_fragment_program * p, p->utemp_flag = old_utemp_flag; /* restore */ } - if (p->csr >= p->program + I915_PROGRAM_SIZE) { + if (p->csr >= p->program + ARRAY_SIZE(p->program)) { i915_program_error(p, "Program contains too many instructions"); return UREG_BAD; } @@ -275,7 +276,7 @@ GLuint i915_emit_texld( struct i915_fragment_program *p, p->register_phases[GET_UREG_NR(coord)] == p->nr_tex_indirect) p->nr_tex_indirect++; - if (p->csr >= p->program + I915_PROGRAM_SIZE) { + if (p->csr >= p->program + ARRAY_SIZE(p->program)) { i915_program_error(p, "Program contains too many instructions"); return UREG_BAD; } @@ -530,6 +531,9 @@ i915_upload_program(struct i915_context *i915, GLuint program_size = p->csr - p->program; GLuint decl_size = p->decl - p->declarations; + if (p->error) + return; + /* Could just go straight to the batchbuffer from here: */ if (i915->state.ProgramSize != (program_size + decl_size) || -- cgit v1.2.3 From 7d4b7460b0e565d0574c00d1d40c426cfebc290d Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Wed, 29 Jul 2009 12:15:14 -0700 Subject: i915: Enable ARB_vertex_shader for both i915 and i830. Since the TNL is all done in software anyway, it should be the same to the user who's probably using ARB_vertex_program otherwise, but gives them a nicer programming environment. --- src/mesa/drivers/dri/i915/intel_tris.c | 2 ++ src/mesa/drivers/dri/intel/intel_extensions.c | 8 +++--- src/mesa/drivers/dri/intel/intel_span.c | 37 +++++++++++++++++++++++++++ src/mesa/drivers/dri/intel/intel_span.h | 2 ++ 4 files changed, 45 insertions(+), 4 deletions(-) (limited to 'src/mesa/drivers/dri') diff --git a/src/mesa/drivers/dri/i915/intel_tris.c b/src/mesa/drivers/dri/i915/intel_tris.c index a905455342d..0641e6df9d7 100644 --- a/src/mesa/drivers/dri/i915/intel_tris.c +++ b/src/mesa/drivers/dri/i915/intel_tris.c @@ -1076,7 +1076,9 @@ intelRunPipeline(GLcontext * ctx) intel->NewGLState = 0; } + intel_map_vertex_shader_textures(ctx); _tnl_run_pipeline(ctx); + intel_unmap_vertex_shader_textures(ctx); _mesa_unlock_context_textures(ctx); } diff --git a/src/mesa/drivers/dri/intel/intel_extensions.c b/src/mesa/drivers/dri/intel/intel_extensions.c index 5431cf90a1b..f10eda7cc7a 100644 --- a/src/mesa/drivers/dri/intel/intel_extensions.c +++ b/src/mesa/drivers/dri/intel/intel_extensions.c @@ -80,6 +80,9 @@ static const struct dri_extension card_extensions[] = { { "GL_ARB_multitexture", NULL }, { "GL_ARB_point_parameters", GL_ARB_point_parameters_functions }, { "GL_ARB_point_sprite", NULL }, + { "GL_ARB_shader_objects", GL_ARB_shader_objects_functions }, + { "GL_ARB_shading_language_100", GL_VERSION_2_0_functions }, + { "GL_ARB_shading_language_120", GL_VERSION_2_1_functions }, { "GL_ARB_sync", GL_ARB_sync_functions }, { "GL_ARB_texture_border_clamp", NULL }, { "GL_ARB_texture_cube_map", NULL }, @@ -91,6 +94,7 @@ static const struct dri_extension card_extensions[] = { { "GL_ARB_texture_rectangle", NULL }, { "GL_ARB_vertex_array_object", GL_ARB_vertex_array_object_functions}, { "GL_ARB_vertex_program", GL_ARB_vertex_program_functions }, + { "GL_ARB_vertex_shader", GL_ARB_vertex_shader_functions }, { "GL_ARB_window_pos", GL_ARB_window_pos_functions }, { "GL_EXT_blend_color", GL_EXT_blend_color_functions }, { "GL_EXT_blend_equation_separate", GL_EXT_blend_equation_separate_functions }, @@ -150,13 +154,9 @@ static const struct dri_extension brw_extensions[] = { { "GL_ARB_occlusion_query", GL_ARB_occlusion_query_functions }, { "GL_ARB_point_sprite", NULL }, { "GL_ARB_seamless_cube_map", NULL }, - { "GL_ARB_shader_objects", GL_ARB_shader_objects_functions }, - { "GL_ARB_shading_language_100", GL_VERSION_2_0_functions }, - { "GL_ARB_shading_language_120", GL_VERSION_2_1_functions }, { "GL_ARB_shadow", NULL }, { "GL_MESA_texture_signed_rgba", NULL }, { "GL_ARB_texture_non_power_of_two", NULL }, - { "GL_ARB_vertex_shader", GL_ARB_vertex_shader_functions }, { "GL_EXT_shadow_funcs", NULL }, { "GL_EXT_stencil_two_side", GL_EXT_stencil_two_side_functions }, { "GL_EXT_texture_sRGB", NULL }, diff --git a/src/mesa/drivers/dri/intel/intel_span.c b/src/mesa/drivers/dri/intel/intel_span.c index 28eabbc0054..dcfcad1d952 100644 --- a/src/mesa/drivers/dri/intel/intel_span.c +++ b/src/mesa/drivers/dri/intel/intel_span.c @@ -558,6 +558,43 @@ intelInitSpanFuncs(GLcontext * ctx) swdd->SpanRenderFinish = intelSpanRenderFinish; } +void +intel_map_vertex_shader_textures(GLcontext *ctx) +{ + struct intel_context *intel = intel_context(ctx); + int i; + + if (ctx->VertexProgram._Current == NULL) + return; + + for (i = 0; i < ctx->Const.MaxTextureImageUnits; i++) { + if (ctx->Texture.Unit[i]._ReallyEnabled && + ctx->VertexProgram._Current->Base.TexturesUsed[i] != 0) { + struct gl_texture_object *texObj = ctx->Texture.Unit[i]._Current; + + intel_tex_map_images(intel, intel_texture_object(texObj)); + } + } +} + +void +intel_unmap_vertex_shader_textures(GLcontext *ctx) +{ + struct intel_context *intel = intel_context(ctx); + int i; + + if (ctx->VertexProgram._Current == NULL) + return; + + for (i = 0; i < ctx->Const.MaxTextureImageUnits; i++) { + if (ctx->Texture.Unit[i]._ReallyEnabled && + ctx->VertexProgram._Current->Base.TexturesUsed[i] != 0) { + struct gl_texture_object *texObj = ctx->Texture.Unit[i]._Current; + + intel_tex_unmap_images(intel, intel_texture_object(texObj)); + } + } +} /** * Plug in appropriate span read/write functions for the given renderbuffer. diff --git a/src/mesa/drivers/dri/intel/intel_span.h b/src/mesa/drivers/dri/intel/intel_span.h index acbeb4abe1c..bffe109aa5b 100644 --- a/src/mesa/drivers/dri/intel/intel_span.h +++ b/src/mesa/drivers/dri/intel/intel_span.h @@ -36,5 +36,7 @@ void intel_renderbuffer_map(struct intel_context *intel, struct gl_renderbuffer *rb); void intel_renderbuffer_unmap(struct intel_context *intel, struct gl_renderbuffer *rb); +void intel_map_vertex_shader_textures(GLcontext *ctx); +void intel_unmap_vertex_shader_textures(GLcontext *ctx); #endif -- cgit v1.2.3 From f9f31b25740887373806cb489e5480dc9b261805 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Thu, 1 Oct 2009 14:00:28 -0700 Subject: i915: Add support for varying inputs. --- src/mesa/drivers/dri/i915/i915_context.c | 2 +- src/mesa/drivers/dri/i915/i915_fragprog.c | 23 ++++++++++++++++++++++- 2 files changed, 23 insertions(+), 2 deletions(-) (limited to 'src/mesa/drivers/dri') diff --git a/src/mesa/drivers/dri/i915/i915_context.c b/src/mesa/drivers/dri/i915/i915_context.c index b342503772b..7d4c7cfbabb 100644 --- a/src/mesa/drivers/dri/i915/i915_context.c +++ b/src/mesa/drivers/dri/i915/i915_context.c @@ -142,7 +142,7 @@ i915CreateContext(const __GLcontextModes * mesaVis, ctx->Const.MaxTextureUnits = I915_TEX_UNITS; ctx->Const.MaxTextureImageUnits = I915_TEX_UNITS; ctx->Const.MaxTextureCoordUnits = I915_TEX_UNITS; - + ctx->Const.MaxVarying = I915_TEX_UNITS; /* Advertise the full hardware capabilities. The new memory * manager should cope much better with overload situations: diff --git a/src/mesa/drivers/dri/i915/i915_fragprog.c b/src/mesa/drivers/dri/i915/i915_fragprog.c index 929a6eb8352..66db7e1645c 100644 --- a/src/mesa/drivers/dri/i915/i915_fragprog.c +++ b/src/mesa/drivers/dri/i915/i915_fragprog.c @@ -122,6 +122,19 @@ src_vector(struct i915_fragment_program *p, src = i915_emit_decl(p, REG_TYPE_T, T_TEX0 + (source->Index - FRAG_ATTRIB_TEX0), D0_CHANNEL_ALL); + break; + + case FRAG_ATTRIB_VAR0: + case FRAG_ATTRIB_VAR0 + 1: + case FRAG_ATTRIB_VAR0 + 2: + case FRAG_ATTRIB_VAR0 + 3: + case FRAG_ATTRIB_VAR0 + 4: + case FRAG_ATTRIB_VAR0 + 5: + case FRAG_ATTRIB_VAR0 + 6: + case FRAG_ATTRIB_VAR0 + 7: + src = i915_emit_decl(p, REG_TYPE_T, + T_TEX0 + (source->Index - FRAG_ATTRIB_VAR0), + D0_CHANNEL_ALL); break; default: @@ -909,7 +922,7 @@ check_wpos(struct i915_fragment_program *p) p->wpos_tex = -1; for (i = 0; i < p->ctx->Const.MaxTextureCoordUnits; i++) { - if (inputs & FRAG_BIT_TEX(i)) + if (inputs & (FRAG_BIT_TEX(i) | FRAG_BIT_VAR(i))) continue; else if (inputs & FRAG_BIT_WPOS) { p->wpos_tex = i; @@ -1140,6 +1153,14 @@ i915ValidateFragmentProgram(struct i915_context *i915) EMIT_ATTR(_TNL_ATTRIB_TEX0 + i, EMIT_SZ(sz), 0, sz * 4); } + else if (inputsRead & FRAG_BIT_VAR(i)) { + int sz = VB->AttribPtr[_TNL_ATTRIB_GENERIC0 + i]->size; + + s2 &= ~S2_TEXCOORD_FMT(i, S2_TEXCOORD_FMT0_MASK); + s2 |= S2_TEXCOORD_FMT(i, SZ_TO_HW(sz)); + + EMIT_ATTR(_TNL_ATTRIB_GENERIC0 + i, EMIT_SZ(sz), 0, sz * 4); + } else if (i == p->wpos_tex) { /* If WPOS is required, duplicate the XYZ position data in an -- cgit v1.2.3 From 67f4d626d39f2c340fa1632d3e4344c547301508 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Wed, 29 Jul 2009 20:44:39 -0700 Subject: i915: Add support or fallbacks for GLSL fragment shader opcodes. --- src/mesa/drivers/dri/i915/i915_fragprog.c | 162 +++++++++++++++++++++++++++++- 1 file changed, 158 insertions(+), 4 deletions(-) (limited to 'src/mesa/drivers/dri') diff --git a/src/mesa/drivers/dri/i915/i915_fragprog.c b/src/mesa/drivers/dri/i915/i915_fragprog.c index 66db7e1645c..d5659e762f3 100644 --- a/src/mesa/drivers/dri/i915/i915_fragprog.c +++ b/src/mesa/drivers/dri/i915/i915_fragprog.c @@ -366,7 +366,7 @@ upload_program(struct i915_fragment_program *p) while (1) { GLuint src0, src1, src2, flags; - GLuint tmp = 0, consts0 = 0, consts1 = 0; + GLuint tmp = 0, dst, consts0 = 0, consts1 = 0; switch (inst->Opcode) { case OPCODE_ABS: @@ -518,6 +518,10 @@ upload_program(struct i915_fragment_program *p) EMIT_1ARG_ARITH(A0_FLR); break; + case OPCODE_TRUNC: + EMIT_1ARG_ARITH(A0_TRC); + break; + case OPCODE_FRC: EMIT_1ARG_ARITH(A0_FRC); break; @@ -531,6 +535,22 @@ upload_program(struct i915_fragment_program *p) 0, src0, T0_TEXKILL); break; + case OPCODE_KIL_NV: + if (inst->DstReg.CondMask == COND_TR) { + tmp = i915_get_utemp(p); + + i915_emit_texld(p, get_live_regs(p, inst), + tmp, A0_DEST_CHANNEL_ALL, + 0, /* use a dummy dest reg */ + swizzle(tmp, ONE, ONE, ONE, ONE), /* always */ + T0_TEXKILL); + } else { + p->error = 1; + i915_program_error(p, "Unsupported KIL_NV condition code: %d", + inst->DstReg.CondMask); + } + break; + case OPCODE_LG2: src0 = src_vector(p, &inst->SrcReg[0], program); @@ -630,6 +650,20 @@ upload_program(struct i915_fragment_program *p) EMIT_2ARG_ARITH(A0_MUL); break; + case OPCODE_NOISE1: + case OPCODE_NOISE2: + case OPCODE_NOISE3: + case OPCODE_NOISE4: + /* Don't implement noise because we just don't have the instructions + * to spare. We aren't the first vendor to do so. + */ + i915_program_error(p, "Stubbed-out noise functions"); + i915_emit_arith(p, + A0_MOV, + get_result_vector(p, inst), + get_result_flags(inst), 0, + swizzle(src0, ZERO, ZERO, ZERO, ZERO), 0, 0); + case OPCODE_POW: src0 = src_vector(p, &inst->SrcReg[0], program); src1 = src_vector(p, &inst->SrcReg[1], program); @@ -736,9 +770,38 @@ upload_program(struct i915_fragment_program *p) } break; - case OPCODE_SGE: - EMIT_2ARG_ARITH(A0_SGE); - break; + case OPCODE_SEQ: + tmp = i915_get_utemp(p); + flags = get_result_flags(inst); + dst = get_result_vector(p, inst); + + /* dst = src1 >= src2 */ + i915_emit_arith(p, + A0_SGE, + dst, + flags, 0, + src_vector(p, &inst->SrcReg[0], program), + src_vector(p, &inst->SrcReg[1], program), + 0); + /* tmp = src1 <= src2 */ + i915_emit_arith(p, + A0_SGE, + tmp, + flags, 0, + negate(src_vector(p, &inst->SrcReg[0], program), + 1, 1, 1, 1), + negate(src_vector(p, &inst->SrcReg[1], program), + 1, 1, 1, 1), + 0); + /* dst = tmp && dst */ + i915_emit_arith(p, + A0_MUL, + dst, + flags, 0, + dst, + tmp, + 0); + break; case OPCODE_SIN: src0 = src_vector(p, &inst->SrcReg[0], program); @@ -824,10 +887,71 @@ upload_program(struct i915_fragment_program *p) break; + case OPCODE_SGE: + EMIT_2ARG_ARITH(A0_SGE); + break; + + case OPCODE_SGT: + i915_emit_arith(p, + A0_SLT, + get_result_vector( p, inst ), + get_result_flags( inst ), 0, + negate(src_vector( p, &inst->SrcReg[0], program), + 1, 1, 1, 1), + negate(src_vector( p, &inst->SrcReg[1], program), + 1, 1, 1, 1), + 0); + break; + + case OPCODE_SLE: + i915_emit_arith(p, + A0_SGE, + get_result_vector( p, inst ), + get_result_flags( inst ), 0, + negate(src_vector( p, &inst->SrcReg[0], program), + 1, 1, 1, 1), + negate(src_vector( p, &inst->SrcReg[1], program), + 1, 1, 1, 1), + 0); + break; + case OPCODE_SLT: EMIT_2ARG_ARITH(A0_SLT); break; + case OPCODE_SNE: + tmp = i915_get_utemp(p); + flags = get_result_flags(inst); + dst = get_result_vector(p, inst); + + /* dst = src1 < src2 */ + i915_emit_arith(p, + A0_SLT, + dst, + flags, 0, + src_vector(p, &inst->SrcReg[0], program), + src_vector(p, &inst->SrcReg[1], program), + 0); + /* tmp = src1 > src2 */ + i915_emit_arith(p, + A0_SLT, + tmp, + flags, 0, + negate(src_vector(p, &inst->SrcReg[0], program), + 1, 1, 1, 1), + negate(src_vector(p, &inst->SrcReg[1], program), + 1, 1, 1, 1), + 0); + /* dst = tmp || dst */ + i915_emit_arith(p, + A0_ADD, + dst, + flags | A0_DEST_SATURATE, 0, + dst, + tmp, + 0); + break; + case OPCODE_SUB: src0 = src_vector(p, &inst->SrcReg[0], program); src1 = src_vector(p, &inst->SrcReg[1], program); @@ -884,6 +1008,36 @@ upload_program(struct i915_fragment_program *p) case OPCODE_END: return; + case OPCODE_BGNLOOP: + case OPCODE_BGNSUB: + case OPCODE_BRA: + case OPCODE_BRK: + case OPCODE_CAL: + case OPCODE_CONT: + case OPCODE_DDX: + case OPCODE_DDY: + case OPCODE_ELSE: + case OPCODE_ENDIF: + case OPCODE_ENDLOOP: + case OPCODE_ENDSUB: + case OPCODE_IF: + case OPCODE_RET: + p->error = 1; + i915_program_error(p, "Unsupported opcode: %s", + _mesa_opcode_string(inst->Opcode)); + return; + + case OPCODE_EXP: + case OPCODE_LOG: + /* These opcodes are claimed as GLSL, NV_vp, and ARB_vp in + * prog_instruction.h, but apparently GLSL doesn't ever emit them. + * Instead, it translates to EX2 or LG2. + */ + case OPCODE_TXD: + case OPCODE_TXL: + /* These opcodes are claimed by GLSL in prog_instruction.h, but + * only NV_vp/fp appears to emit them. + */ default: i915_program_error(p, "bad opcode: %s", _mesa_opcode_string(inst->Opcode)); -- cgit v1.2.3 From 862a2a55b35d1dec9224b025a6e7a0cf8593a6a7 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Wed, 29 Jul 2009 13:00:09 -0700 Subject: i915: Add optional support for ARB_fragment_shader under a driconf option. Other vendors have enabled ARB_fragment_shader as part of OpenGL 2.0 enablement even on hardware like the 915 with no dynamic branching or dFdx/dFdy support. But for now we'll leave it disabled because we don't do any flattening of ifs or loops, which is rather restrictive. This support is not complete, and may be unstable depending on your shaders. It passes 10/15 of the piglit glsl tests, but hangs on glean glsl1. --- src/mesa/drivers/dri/i915/i915_fragprog.c | 1 + src/mesa/drivers/dri/intel/intel_context.h | 1 - src/mesa/drivers/dri/intel/intel_extensions.c | 10 +++++++++- src/mesa/drivers/dri/intel/intel_screen.c | 6 +++++- 4 files changed, 15 insertions(+), 3 deletions(-) (limited to 'src/mesa/drivers/dri') diff --git a/src/mesa/drivers/dri/i915/i915_fragprog.c b/src/mesa/drivers/dri/i915/i915_fragprog.c index d5659e762f3..d9c61446f52 100644 --- a/src/mesa/drivers/dri/i915/i915_fragprog.c +++ b/src/mesa/drivers/dri/i915/i915_fragprog.c @@ -160,6 +160,7 @@ src_vector(struct i915_fragment_program *p, case PROGRAM_CONSTANT: case PROGRAM_STATE_VAR: case PROGRAM_NAMED_PARAM: + case PROGRAM_UNIFORM: src = i915_emit_param4fv(p, program->Base.Parameters->ParameterValues[source-> diff --git a/src/mesa/drivers/dri/intel/intel_context.h b/src/mesa/drivers/dri/intel/intel_context.h index c743ab1c246..b104096912c 100644 --- a/src/mesa/drivers/dri/intel/intel_context.h +++ b/src/mesa/drivers/dri/intel/intel_context.h @@ -293,7 +293,6 @@ struct intel_context GLboolean use_texture_tiling; GLboolean use_early_z; - drm_clip_rect_t fboRect; /**< cliprect for FBO rendering */ int perf_boxes; diff --git a/src/mesa/drivers/dri/intel/intel_extensions.c b/src/mesa/drivers/dri/intel/intel_extensions.c index f10eda7cc7a..6831cbbfc85 100644 --- a/src/mesa/drivers/dri/intel/intel_extensions.c +++ b/src/mesa/drivers/dri/intel/intel_extensions.c @@ -182,6 +182,10 @@ static const struct dri_extension ttm_extensions[] = { { NULL, NULL } }; +static const struct dri_extension fragment_shader_extensions[] = { + { "GL_ARB_fragment_shader", NULL }, + { NULL, NULL } +}; /** * Initializes potential list of extensions if ctx == NULL, or actually enables @@ -205,6 +209,10 @@ intelInitExtensions(GLcontext *ctx, GLboolean enable_imaging) driInitExtensions(ctx, brw_extensions, GL_FALSE); if (intel == NULL || IS_915(intel->intelScreen->deviceID) - || IS_945(intel->intelScreen->deviceID)) + || IS_945(intel->intelScreen->deviceID)) { driInitExtensions(ctx, i915_extensions, GL_FALSE); + + if (intel == NULL || driQueryOptionb(&intel->optionCache, "fragment_shader")) + driInitExtensions(ctx, fragment_shader_extensions, GL_FALSE); + } } diff --git a/src/mesa/drivers/dri/intel/intel_screen.c b/src/mesa/drivers/dri/intel/intel_screen.c index 1b8c56e68d6..2478e636373 100644 --- a/src/mesa/drivers/dri/intel/intel_screen.c +++ b/src/mesa/drivers/dri/intel/intel_screen.c @@ -79,6 +79,10 @@ PUBLIC const char __driConfigOptions[] = DRI_CONF_DESC(en, "Enable early Z in classic mode (unstable, 945-only).") DRI_CONF_OPT_END + DRI_CONF_OPT_BEGIN(fragment_shader, bool, false) + DRI_CONF_DESC(en, "Enable limited ARB_fragment_shader support on 915/945.") + DRI_CONF_OPT_END + DRI_CONF_SECTION_END DRI_CONF_SECTION_QUALITY DRI_CONF_FORCE_S3TC_ENABLE(false) @@ -91,7 +95,7 @@ PUBLIC const char __driConfigOptions[] = DRI_CONF_SECTION_END DRI_CONF_END; -const GLuint __driNConfigOptions = 10; +const GLuint __driNConfigOptions = 11; #ifdef USE_NEW_INTERFACE static PFNGLXCREATECONTEXTMODES create_context_modes = NULL; -- cgit v1.2.3 From 81aa5d717bd0098608e9cc292b316293800c7e11 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Wed, 29 Jul 2009 13:07:49 -0700 Subject: i915: Add stub ARB_occlusion_query support under a driconf debug option. This is useful for enabling our GLSL testcases using the 2.0 entrypoints even though we don't have full GL 2.0. --- src/mesa/drivers/dri/intel/intel_extensions.c | 5 +++++ src/mesa/drivers/dri/intel/intel_screen.c | 6 +++++- 2 files changed, 10 insertions(+), 1 deletion(-) (limited to 'src/mesa/drivers/dri') diff --git a/src/mesa/drivers/dri/intel/intel_extensions.c b/src/mesa/drivers/dri/intel/intel_extensions.c index 6831cbbfc85..2eb08a8f057 100644 --- a/src/mesa/drivers/dri/intel/intel_extensions.c +++ b/src/mesa/drivers/dri/intel/intel_extensions.c @@ -171,6 +171,7 @@ static const struct dri_extension brw_extensions[] = { static const struct dri_extension arb_oq_extensions[] = { + { "GL_ARB_occlusion_query", GL_ARB_occlusion_query_functions }, { NULL, NULL } }; @@ -214,5 +215,9 @@ intelInitExtensions(GLcontext *ctx, GLboolean enable_imaging) if (intel == NULL || driQueryOptionb(&intel->optionCache, "fragment_shader")) driInitExtensions(ctx, fragment_shader_extensions, GL_FALSE); + + if (intel == NULL || driQueryOptionb(&intel->optionCache, + "stub_occlusion_query")) + driInitExtensions(ctx, arb_oq_extensions, GL_FALSE); } } diff --git a/src/mesa/drivers/dri/intel/intel_screen.c b/src/mesa/drivers/dri/intel/intel_screen.c index 2478e636373..24f7fbc9922 100644 --- a/src/mesa/drivers/dri/intel/intel_screen.c +++ b/src/mesa/drivers/dri/intel/intel_screen.c @@ -92,10 +92,14 @@ PUBLIC const char __driConfigOptions[] = DRI_CONF_NO_RAST(false) DRI_CONF_ALWAYS_FLUSH_BATCH(false) DRI_CONF_ALWAYS_FLUSH_CACHE(false) + + DRI_CONF_OPT_BEGIN(stub_occlusion_query, bool, false) + DRI_CONF_DESC(en, "Enable stub ARB_occlusion_query support on 915/945.") + DRI_CONF_OPT_END DRI_CONF_SECTION_END DRI_CONF_END; -const GLuint __driNConfigOptions = 11; +const GLuint __driNConfigOptions = 12; #ifdef USE_NEW_INTERFACE static PFNGLXCREATECONTEXTMODES create_context_modes = NULL; -- cgit v1.2.3 From 7d4b348c67dbc2eff1d7dd0c043a76bc0eae57ab Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Fri, 2 Oct 2009 08:55:25 -0600 Subject: intel: wrap _mesa_meta_GenerateMipmap() Need to check if we'll take the software path so which requires mapping the src texture image. Fixes crash in piglit gen-compressed-teximage, bug 24219. However, the test still does not pass (it may never have). --- src/mesa/drivers/dri/intel/intel_tex.c | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) (limited to 'src/mesa/drivers/dri') diff --git a/src/mesa/drivers/dri/intel/intel_tex.c b/src/mesa/drivers/dri/intel/intel_tex.c index f5d877edc36..3cbc379dbd3 100644 --- a/src/mesa/drivers/dri/intel/intel_tex.c +++ b/src/mesa/drivers/dri/intel/intel_tex.c @@ -2,6 +2,7 @@ #include "main/texobj.h" #include "main/teximage.h" #include "main/mipmap.h" +#include "drivers/common/meta.h" #include "intel_context.h" #include "intel_mipmap_tree.h" #include "intel_tex.h" @@ -158,10 +159,36 @@ timed_memcpy(void *dest, const void *src, size_t n) } #endif /* DO_DEBUG */ + +/** + * Called via ctx->Driver.GenerateMipmap() + * This is basically a wrapper for _mesa_meta_GenerateMipmap() which checks + * if we'll be using software mipmap generation. In that case, we need to + * map/unmap the base level texture image. + */ +static void +intelGenerateMipmap(GLcontext *ctx, GLenum target, + struct gl_texture_object *texObj) +{ + if (_mesa_meta_check_generate_mipmap_fallback(ctx, target, texObj)) { + /* sw path: need to map texture images */ + struct intel_context *intel = intel_context(ctx); + struct intel_texture_object *intelObj = intel_texture_object(texObj); + intel_tex_map_level_images(intel, intelObj, texObj->BaseLevel); + _mesa_generate_mipmap(ctx, target, texObj); + intel_tex_unmap_level_images(intel, intelObj, texObj->BaseLevel); + } + else { + _mesa_meta_GenerateMipmap(ctx, target, texObj); + } +} + + void intelInitTextureFuncs(struct dd_function_table *functions) { functions->ChooseTextureFormat = intelChooseTextureFormat; + functions->GenerateMipmap = intelGenerateMipmap; functions->NewTextureObject = intelNewTextureObject; functions->NewTextureImage = intelNewTextureImage; -- cgit v1.2.3 From 3f623cfffee8db83ba8e0302fc5e3d1f40d1b0b5 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Fri, 2 Oct 2009 14:25:52 -0400 Subject: r600: remove support for host-based ibs no longer used now that the hw supports this natively. Also, clean up some formatting. --- src/mesa/drivers/dri/r600/r600_context.h | 26 ++-- src/mesa/drivers/dri/r600/r700_render.c | 204 +++++++++++-------------------- 2 files changed, 80 insertions(+), 150 deletions(-) (limited to 'src/mesa/drivers/dri') diff --git a/src/mesa/drivers/dri/r600/r600_context.h b/src/mesa/drivers/dri/r600/r600_context.h index a296ea23fa4..7f68820fda1 100644 --- a/src/mesa/drivers/dri/r600/r600_context.h +++ b/src/mesa/drivers/dri/r600/r600_context.h @@ -126,32 +126,30 @@ struct r600_hw_state { struct radeon_state_atom tx_brdr_clr; }; -typedef struct StreamDesc +typedef struct StreamDesc { GLint size; //number of data element GLenum type; //data element type GLsizei stride; - struct radeon_bo *bo; - GLint bo_offset; + struct radeon_bo *bo; + GLint bo_offset; - GLuint dwords; + GLuint dwords; GLuint dst_loc; GLuint _signed; GLboolean normalize; - GLboolean is_named_bo; - GLubyte element; + GLboolean is_named_bo; + GLubyte element; } StreamDesc; -typedef struct r700_index_buffer +typedef struct r700_index_buffer { - struct radeon_bo *bo; - int bo_offset; + struct radeon_bo *bo; + int bo_offset; - GLboolean is_32bit; - GLuint count; - - GLboolean bHostIb; + GLboolean is_32bit; + GLuint count; } r700_index_buffer; /** @@ -172,7 +170,7 @@ struct r600_context { GLvector4f dummy_attrib[_TNL_ATTRIB_MAX]; GLvector4f *temp_attrib[_TNL_ATTRIB_MAX]; - GLint nNumActiveAos; + GLint nNumActiveAos; StreamDesc stream_desc[VERT_ATTRIB_MAX]; struct r700_index_buffer ind_buf; }; diff --git a/src/mesa/drivers/dri/r600/r700_render.c b/src/mesa/drivers/dri/r600/r700_render.c index 4949bf013d9..0aef0b7ea1f 100644 --- a/src/mesa/drivers/dri/r600/r700_render.c +++ b/src/mesa/drivers/dri/r600/r700_render.c @@ -251,19 +251,19 @@ static int r700NumVerts(int num_verts, int prim) static void r700RunRenderPrimitive(GLcontext * ctx, int start, int end, int prim) { - context_t *context = R700_CONTEXT(ctx); - BATCH_LOCALS(&context->radeon); - int type, i, total_emit; - int num_indices; - uint32_t vgt_draw_initiator = 0; - uint32_t vgt_index_type = 0; - uint32_t vgt_primitive_type = 0; - uint32_t vgt_num_indices = 0; - TNLcontext *tnl = TNL_CONTEXT(ctx); - struct vertex_buffer *vb = &tnl->vb; - + context_t *context = R700_CONTEXT(ctx); + BATCH_LOCALS(&context->radeon); + int type, i, total_emit; + int num_indices; + uint32_t vgt_draw_initiator = 0; + uint32_t vgt_index_type = 0; + uint32_t vgt_primitive_type = 0; + uint32_t vgt_num_indices = 0; + TNLcontext *tnl = TNL_CONTEXT(ctx); + struct vertex_buffer *vb = &tnl->vb; GLboolean bUseDrawIndex; - if( (NULL != context->ind_buf.bo) && (GL_TRUE != context->ind_buf.bHostIb) ) + + if(NULL != context->ind_buf.bo) { bUseDrawIndex = GL_TRUE; } @@ -272,35 +272,35 @@ static void r700RunRenderPrimitive(GLcontext * ctx, int start, int end, int prim bUseDrawIndex = GL_FALSE; } - type = r700PrimitiveType(prim); - num_indices = r700NumVerts(end - start, prim); + type = r700PrimitiveType(prim); + num_indices = r700NumVerts(end - start, prim); - radeon_print(RADEON_RENDER, RADEON_TRACE, - "%s type %x num_indices %d\n", - __func__, type, num_indices); + radeon_print(RADEON_RENDER, RADEON_TRACE, + "%s type %x num_indices %d\n", + __func__, type, num_indices); - if (type < 0 || num_indices <= 0) - return; + if (type < 0 || num_indices <= 0) + return; if(GL_TRUE == bUseDrawIndex) { total_emit = 3 /* VGT_PRIMITIVE_TYPE */ - + 2 /* VGT_INDEX_TYPE */ - + 2 /* NUM_INSTANCES */ - + 5+2; /* DRAW_INDEX */ + + 2 /* VGT_INDEX_TYPE */ + + 2 /* NUM_INSTANCES */ + + 5 + 2; /* DRAW_INDEX */ } else { total_emit = 3 /* VGT_PRIMITIVE_TYPE */ - + 2 /* VGT_INDEX_TYPE */ - + 2 /* NUM_INSTANCES */ - + num_indices + 3; /* DRAW_INDEX_IMMD */ + + 2 /* VGT_INDEX_TYPE */ + + 2 /* NUM_INSTANCES */ + + num_indices + 3; /* DRAW_INDEX_IMMD */ } BEGIN_BATCH_NO_AUTOSTATE(total_emit); - // prim + // prim SETfield(vgt_primitive_type, type, - VGT_PRIMITIVE_TYPE__PRIM_TYPE_shift, VGT_PRIMITIVE_TYPE__PRIM_TYPE_mask); + VGT_PRIMITIVE_TYPE__PRIM_TYPE_shift, VGT_PRIMITIVE_TYPE__PRIM_TYPE_mask); R600_OUT_BATCH(CP_PACKET3(R600_IT_SET_CONFIG_REG, 1)); R600_OUT_BATCH(mmVGT_PRIMITIVE_TYPE - ASIC_CONFIG_BASE_INDEX); R600_OUT_BATCH(vgt_primitive_type); @@ -319,11 +319,11 @@ static void r700RunRenderPrimitive(GLcontext * ctx, int start, int end, int prim R600_OUT_BATCH(CP_PACKET3(R600_IT_INDEX_TYPE, 0)); R600_OUT_BATCH(vgt_index_type); - // num instances - R600_OUT_BATCH(CP_PACKET3(R600_IT_NUM_INSTANCES, 0)); - R600_OUT_BATCH(1); + // num instances + R600_OUT_BATCH(CP_PACKET3(R600_IT_NUM_INSTANCES, 0)); + R600_OUT_BATCH(1); - // draw packet + // draw packet vgt_num_indices = num_indices; if(GL_TRUE == bUseDrawIndex) @@ -354,44 +354,17 @@ static void r700RunRenderPrimitive(GLcontext * ctx, int start, int end, int prim R600_OUT_BATCH(CP_PACKET3(R600_IT_DRAW_INDEX_IMMD, (num_indices + 1))); R600_OUT_BATCH(vgt_num_indices); R600_OUT_BATCH(vgt_draw_initiator); - } - if(NULL == context->ind_buf.bo) - { - for (i = start; i < (start + num_indices); i++) { + for (i = start; i < (start + num_indices); i++) + { if(vb->Elts) { R600_OUT_BATCH(vb->Elts[i]); } else + { R600_OUT_BATCH(i); - } - } - else - { - if(GL_TRUE == context->ind_buf.bHostIb) - { - if(GL_TRUE != context->ind_buf.is_32bit) - { - GLushort * pIndex = (GLushort*)ADD_POINTERS(context->ind_buf.bo->ptr, context->ind_buf.bo_offset); - pIndex += start; - for (i = 0; i < num_indices; i++) - { - R600_OUT_BATCH(*pIndex); - pIndex++; - } - } - else - { - GLuint * pIndex = (GLuint*)ADD_POINTERS(context->ind_buf.bo->ptr, context->ind_buf.bo_offset); - pIndex += start; - - for (i = 0; i < num_indices; i++) - { - R600_OUT_BATCH(*pIndex); - pIndex++; - } - } + } } } @@ -826,30 +799,21 @@ static void r700FreeData(GLcontext *ctx) * called during context destroy */ context_t *context = R700_CONTEXT(ctx); - + int i; - for (i = 0; i < context->nNumActiveAos; i++) + for (i = 0; i < context->nNumActiveAos; i++) { - if (!context->stream_desc[i].is_named_bo) + if (!context->stream_desc[i].is_named_bo) { radeon_bo_unref(context->stream_desc[i].bo); } context->radeon.tcl.aos[i].bo = NULL; } - - if (context->ind_buf.bo != NULL) + + if (context->ind_buf.bo != NULL) { - if(context->ind_buf.bHostIb != GL_TRUE) - { radeon_bo_unref(context->ind_buf.bo); - } - else - { - FREE(context->ind_buf.bo->ptr); - FREE(context->ind_buf.bo); - context->ind_buf.bo = NULL; - } } } @@ -861,7 +825,7 @@ static void r700FixupIndexBuffer(GLcontext *ctx, const struct _mesa_index_buffer int i; GLboolean mapped_named_bo = GL_FALSE; - if (mesa_ind_buf->obj->Name && !mesa_ind_buf->obj->Pointer) + if (mesa_ind_buf->obj->Name && !mesa_ind_buf->obj->Pointer) { ctx->Driver.MapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER, GL_READ_ONLY_ARB, mesa_ind_buf->obj); mapped_named_bo = GL_TRUE; @@ -869,66 +833,46 @@ static void r700FixupIndexBuffer(GLcontext *ctx, const struct _mesa_index_buffer } src_ptr = ADD_POINTERS(mesa_ind_buf->obj->Pointer, mesa_ind_buf->ptr); - if (mesa_ind_buf->type == GL_UNSIGNED_BYTE) + if (mesa_ind_buf->type == GL_UNSIGNED_BYTE) { GLuint size = sizeof(GLushort) * ((mesa_ind_buf->count + 1) & ~1); GLubyte *in = (GLubyte *)src_ptr; - if(context->ind_buf.bHostIb != GL_TRUE) - { - radeonAllocDmaRegion(&context->radeon, &context->ind_buf.bo, - &context->ind_buf.bo_offset, size, 4); + radeonAllocDmaRegion(&context->radeon, &context->ind_buf.bo, + &context->ind_buf.bo_offset, size, 4); - assert(context->ind_buf.bo->ptr != NULL); - out = (GLuint *)ADD_POINTERS(context->ind_buf.bo->ptr, context->ind_buf.bo_offset); - } - else - { - context->ind_buf.bo = MALLOC_STRUCT(radeon_bo); - context->ind_buf.bo->ptr = ALIGN_MALLOC(size, 4); - context->ind_buf.bo_offset = 0; - out = (GLuint *)context->ind_buf.bo->ptr; - } + assert(context->ind_buf.bo->ptr != NULL); + out = (GLuint *)ADD_POINTERS(context->ind_buf.bo->ptr, context->ind_buf.bo_offset); - for (i = 0; i + 1 < mesa_ind_buf->count; i += 2) + for (i = 0; i + 1 < mesa_ind_buf->count; i += 2) { *out++ = in[i] | in[i + 1] << 16; } - if (i < mesa_ind_buf->count) + if (i < mesa_ind_buf->count) { *out++ = in[i]; } #if MESA_BIG_ENDIAN - } - else + } + else { /* if (mesa_ind_buf->type == GL_UNSIGNED_SHORT) */ GLushort *in = (GLushort *)src_ptr; GLuint size = sizeof(GLushort) * ((mesa_ind_buf->count + 1) & ~1); - if(context->ind_buf.bHostIb != GL_TRUE) - { - radeonAllocDmaRegion(&context->radeon, &context->ind_buf.bo, - &context->ind_buf.bo_offset, size, 4); + radeonAllocDmaRegion(&context->radeon, &context->ind_buf.bo, + &context->ind_buf.bo_offset, size, 4); - assert(context->ind_buf.bo->ptr != NULL); - out = (GLuint *)ADD_POINTERS(context->ind_buf.bo->ptr, context->ind_buf.bo_offset); - } - else - { - context->ind_buf.bo = MALLOC_STRUCT(radeon_bo); - context->ind_buf.bo->ptr = ALIGN_MALLOC(size, 4); - context->ind_buf.bo_offset = 0; - out = (GLuint *)context->ind_buf.bo->ptr; - } + assert(context->ind_buf.bo->ptr != NULL); + out = (GLuint *)ADD_POINTERS(context->ind_buf.bo->ptr, context->ind_buf.bo_offset); - for (i = 0; i + 1 < mesa_ind_buf->count; i += 2) + for (i = 0; i + 1 < mesa_ind_buf->count; i += 2) { *out++ = in[i] | in[i + 1] << 16; } - if (i < mesa_ind_buf->count) + if (i < mesa_ind_buf->count) { *out++ = in[i]; } @@ -938,7 +882,7 @@ static void r700FixupIndexBuffer(GLcontext *ctx, const struct _mesa_index_buffer context->ind_buf.is_32bit = GL_FALSE; context->ind_buf.count = mesa_ind_buf->count; - if (mapped_named_bo) + if (mapped_named_bo) { ctx->Driver.UnmapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER, mesa_ind_buf->obj); } @@ -953,20 +897,18 @@ static void r700SetupIndexBuffer(GLcontext *ctx, const struct _mesa_index_buffer return; } - context->ind_buf.bHostIb = GL_FALSE; - #if MESA_BIG_ENDIAN - if (mesa_ind_buf->type == GL_UNSIGNED_INT) + if (mesa_ind_buf->type == GL_UNSIGNED_INT) { #else - if (mesa_ind_buf->type != GL_UNSIGNED_BYTE) + if (mesa_ind_buf->type != GL_UNSIGNED_BYTE) { #endif const GLvoid *src_ptr; GLvoid *dst_ptr; GLboolean mapped_named_bo = GL_FALSE; - if (mesa_ind_buf->obj->Name && !mesa_ind_buf->obj->Pointer) + if (mesa_ind_buf->obj->Name && !mesa_ind_buf->obj->Pointer) { ctx->Driver.MapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER, GL_READ_ONLY_ARB, mesa_ind_buf->obj); assert(mesa_ind_buf->obj->Pointer != NULL); @@ -977,32 +919,22 @@ static void r700SetupIndexBuffer(GLcontext *ctx, const struct _mesa_index_buffer const GLuint size = mesa_ind_buf->count * getTypeSize(mesa_ind_buf->type); - if(context->ind_buf.bHostIb != GL_TRUE) - { - radeonAllocDmaRegion(&context->radeon, &context->ind_buf.bo, - &context->ind_buf.bo_offset, size, 4); - assert(context->ind_buf.bo->ptr != NULL); - dst_ptr = ADD_POINTERS(context->ind_buf.bo->ptr, context->ind_buf.bo_offset); - } - else - { - context->ind_buf.bo = MALLOC_STRUCT(radeon_bo); - context->ind_buf.bo->ptr = ALIGN_MALLOC(size, 4); - context->ind_buf.bo_offset = 0; - dst_ptr = context->ind_buf.bo->ptr; - } + radeonAllocDmaRegion(&context->radeon, &context->ind_buf.bo, + &context->ind_buf.bo_offset, size, 4); + assert(context->ind_buf.bo->ptr != NULL); + dst_ptr = ADD_POINTERS(context->ind_buf.bo->ptr, context->ind_buf.bo_offset); _mesa_memcpy(dst_ptr, src_ptr, size); context->ind_buf.is_32bit = (mesa_ind_buf->type == GL_UNSIGNED_INT); context->ind_buf.count = mesa_ind_buf->count; - if (mapped_named_bo) + if (mapped_named_bo) { ctx->Driver.UnmapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER, mesa_ind_buf->obj); } - } - else + } + else { r700FixupIndexBuffer(ctx, mesa_ind_buf); } @@ -1145,7 +1077,7 @@ static void r700DrawPrims(GLcontext *ctx, void r700InitDraw(GLcontext *ctx) { struct vbo_context *vbo = vbo_context(ctx); - + /* to be enabled */ vbo->draw_prims = r700DrawPrims; } -- cgit v1.2.3 From 3d78a86cd777aecce544d14b85177a71e9c142ce Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Thu, 1 Oct 2009 18:07:57 -0700 Subject: intel: Remove an unexplained flush from intelClearWithBlit. --- src/mesa/drivers/dri/intel/intel_blit.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'src/mesa/drivers/dri') diff --git a/src/mesa/drivers/dri/intel/intel_blit.c b/src/mesa/drivers/dri/intel/intel_blit.c index 43141c509c7..9e114db6c78 100644 --- a/src/mesa/drivers/dri/intel/intel_blit.c +++ b/src/mesa/drivers/dri/intel/intel_blit.c @@ -371,8 +371,6 @@ intelClearWithBlit(GLcontext *ctx, GLbitfield mask) skipBuffers = BUFFER_BIT_STENCIL; } - /* XXX Move this flush/lock into the following conditional? */ - intelFlush(&intel->ctx); LOCK_HARDWARE(intel); intel_get_cliprects(intel, &cliprects, &num_cliprects, &x_off, &y_off); -- cgit v1.2.3 From 4182b58169c1c37833c590d00d0a6a52b2b55326 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Fri, 2 Oct 2009 10:53:56 -0700 Subject: i965: Use a little stack space to avoid a malloc in wm_get_binding_table. --- src/mesa/drivers/dri/i965/brw_wm_surface_state.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'src/mesa/drivers/dri') diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c index 4f651170c60..9c28a22a298 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c +++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c @@ -660,7 +660,7 @@ brw_wm_get_binding_table(struct brw_context *brw) if (bind_bo == NULL) { GLuint data_size = brw->wm.nr_surfaces * sizeof(GLuint); - uint32_t *data = malloc(data_size); + uint32_t data[BRW_WM_MAX_SURF]; int i; for (i = 0; i < brw->wm.nr_surfaces; i++) @@ -685,8 +685,6 @@ brw_wm_get_binding_table(struct brw_context *brw) brw->wm.surf_bo[i]); } } - - free(data); } return bind_bo; -- cgit v1.2.3 From b330cebe01c5574e203fa6b9d49fee4c01e1adb6 Mon Sep 17 00:00:00 2001 From: Michel Dänzer Date: Sat, 3 Oct 2009 18:01:58 +0200 Subject: radeon: Cope better with texture images with no miptrees. Fixes crash with compiz magnifier plugin. --- src/mesa/drivers/dri/radeon/radeon_texture.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'src/mesa/drivers/dri') diff --git a/src/mesa/drivers/dri/radeon/radeon_texture.c b/src/mesa/drivers/dri/radeon/radeon_texture.c index 049284ef8c5..7b7392b2179 100644 --- a/src/mesa/drivers/dri/radeon/radeon_texture.c +++ b/src/mesa/drivers/dri/radeon/radeon_texture.c @@ -101,7 +101,12 @@ void radeonFreeTexImageData(GLcontext *ctx, struct gl_texture_image *timage) /* Set Data pointer and additional data for mapped texture image */ static void teximage_set_map_data(radeon_texture_image *image) { - radeon_mipmap_level *lvl = &image->mt->levels[image->mtlevel]; + radeon_mipmap_level *lvl; + + if (!image->mt) + return; + + lvl = &image->mt->levels[image->mtlevel]; image->base.Data = image->mt->bo->ptr + lvl->faces[image->mtface].offset; image->base.RowStride = lvl->rowstride / image->mt->bpp; @@ -969,7 +974,7 @@ int radeon_validate_texture_miptree(GLcontext * ctx, struct gl_texture_object *t radeon_texture_image *image = get_radeon_texture_image(texObj->Image[face][level]); if (RADEON_DEBUG & RADEON_TEXTURE) fprintf(stderr, " face %i, level %i... %p vs %p ", face, level, t->mt, image->mt); - if (t->mt == image->mt) { + if (t->mt == image->mt || (!image->mt && !image->base.Data)) { if (RADEON_DEBUG & RADEON_TEXTURE) fprintf(stderr, "OK\n"); -- cgit v1.2.3