diff options
Diffstat (limited to 'src/mesa/drivers/dri')
99 files changed, 1723 insertions, 1175 deletions
diff --git a/src/mesa/drivers/dri/common/utils.c b/src/mesa/drivers/dri/common/utils.c index c195c4fd8f5..42be77fd7c4 100644 --- a/src/mesa/drivers/dri/common/utils.c +++ b/src/mesa/drivers/dri/common/utils.c @@ -738,12 +738,18 @@ static const struct { unsigned int attrib, offset; } attribMap[] = { #define ARRAY_SIZE(a) (sizeof (a) / sizeof ((a)[0])) + +/** + * Return the value of a configuration attribute. The attribute is + * indicated by the index. + */ static int driGetConfigAttribIndex(const __DRIconfig *config, unsigned int index, unsigned int *value) { switch (attribMap[index].attrib) { case __DRI_ATTRIB_RENDER_TYPE: + /* no support for color index mode */ *value = __DRI_ATTRIB_RGBA_BIT; break; case __DRI_ATTRIB_CONFIG_CAVEAT: @@ -755,13 +761,16 @@ driGetConfigAttribIndex(const __DRIconfig *config, *value = 0; break; case __DRI_ATTRIB_SWAP_METHOD: + /* XXX no return value??? */ break; case __DRI_ATTRIB_FLOAT_MODE: + /* this field is not int-sized */ *value = config->modes.floatMode; break; default: + /* any other int-sized field */ *value = *(unsigned int *) ((char *) &config->modes + attribMap[index].offset); @@ -771,6 +780,13 @@ driGetConfigAttribIndex(const __DRIconfig *config, return GL_TRUE; } + +/** + * Get the value of a configuration attribute. + * \param attrib the attribute (one of the _DRI_ATTRIB_x tokens) + * \param value returns the attribute's value + * \return 1 for success, 0 for failure + */ int driGetConfigAttrib(const __DRIconfig *config, unsigned int attrib, unsigned int *value) @@ -784,6 +800,14 @@ driGetConfigAttrib(const __DRIconfig *config, return GL_FALSE; } + +/** + * Get a configuration attribute name and value, given an index. + * \param index which field of the __DRIconfig to query + * \param attrib returns the attribute name (one of the _DRI_ATTRIB_x tokens) + * \param value returns the attribute's value + * \return 1 for success, 0 for failure + */ int driIndexConfigAttrib(const __DRIconfig *config, int index, unsigned int *attrib, unsigned int *value) diff --git a/src/mesa/drivers/dri/i915/intel_structs.h b/src/mesa/drivers/dri/i915/intel_structs.h deleted file mode 100644 index 522e3bd92c2..00000000000 --- a/src/mesa/drivers/dri/i915/intel_structs.h +++ /dev/null @@ -1,132 +0,0 @@ -#ifndef INTEL_STRUCTS_H -#define INTEL_STRUCTS_H - -struct br0 { - GLuint length:8; - GLuint pad0:3; - GLuint dst_tiled:1; - GLuint pad1:8; - GLuint write_rgb:1; - GLuint write_alpha:1; - GLuint opcode:7; - GLuint client:3; -}; - - -struct br13 { - GLint dest_pitch:16; - GLuint rop:8; - GLuint color_depth:2; - GLuint pad1:3; - GLuint mono_source_transparency:1; - GLuint clipping_enable:1; - GLuint pad0:1; -}; - - - -/* This is an attempt to move some of the 2D interaction in this - * driver to using structs for packets rather than a bunch of #defines - * and dwords. - */ -struct xy_color_blit { - struct br0 br0; - struct br13 br13; - - struct { - GLuint dest_x1:16; - GLuint dest_y1:16; - } dw2; - - struct { - GLuint dest_x2:16; - GLuint dest_y2:16; - } dw3; - - GLuint dest_base_addr; - GLuint color; -}; - -struct xy_src_copy_blit { - struct br0 br0; - struct br13 br13; - - struct { - GLuint dest_x1:16; - GLuint dest_y1:16; - } dw2; - - struct { - GLuint dest_x2:16; - GLuint dest_y2:16; - } dw3; - - GLuint dest_base_addr; - - struct { - GLuint src_x1:16; - GLuint src_y1:16; - } dw5; - - struct { - GLint src_pitch:16; - GLuint pad:16; - } dw6; - - GLuint src_base_addr; -}; - -struct xy_setup_blit { - struct br0 br0; - struct br13 br13; - - struct { - GLuint clip_x1:16; - GLuint clip_y1:16; - } dw2; - - struct { - GLuint clip_x2:16; - GLuint clip_y2:16; - } dw3; - - GLuint dest_base_addr; - GLuint background_color; - GLuint foreground_color; - GLuint pattern_base_addr; -}; - - -struct xy_text_immediate_blit { - struct { - GLuint length:8; - GLuint pad2:3; - GLuint dst_tiled:1; - GLuint pad1:4; - GLuint byte_packed:1; - GLuint pad0:5; - GLuint opcode:7; - GLuint client:3; - } dw0; - - struct { - GLuint dest_x1:16; - GLuint dest_y1:16; - } dw1; - - struct { - GLuint dest_x2:16; - GLuint dest_y2:16; - } dw2; - - /* Src bitmap data follows as inline dwords. - */ -}; - - -#define CLIENT_2D 0x2 -#define OPCODE_XY_SETUP_BLT 0x1 -#define OPCODE_XY_COLOR_BLT 0x50 -#define OPCODE_XY_TEXT_IMMEDIATE_BLT 0x31 - -#endif diff --git a/src/mesa/drivers/dri/i965/brw_cc.c b/src/mesa/drivers/dri/i965/brw_cc.c index 00418760da3..a8369b07c35 100644 --- a/src/mesa/drivers/dri/i965/brw_cc.c +++ b/src/mesa/drivers/dri/i965/brw_cc.c @@ -204,7 +204,7 @@ static void upload_cc_unit(struct brw_context *brw) cc.cc2.depth_write_enable = ctx->Depth.Mask; } - if (intel->stats_wm || (INTEL_DEBUG & DEBUG_STATS)) + if (intel->stats_wm || unlikely(INTEL_DEBUG & DEBUG_STATS)) cc.cc5.statistics_enable = 1; /* CACHE_NEW_CC_VP */ diff --git a/src/mesa/drivers/dri/i965/brw_clip.c b/src/mesa/drivers/dri/i965/brw_clip.c index 15e60bf3ce3..1be165cc9a1 100644 --- a/src/mesa/drivers/dri/i965/brw_clip.c +++ b/src/mesa/drivers/dri/i965/brw_clip.c @@ -133,13 +133,13 @@ static void compile_clip_prog( struct brw_context *brw, */ program = brw_get_program(&c.func, &program_size); - if (INTEL_DEBUG & DEBUG_CLIP) { + if (unlikely(INTEL_DEBUG & DEBUG_CLIP)) { printf("clip:\n"); for (i = 0; i < program_size / sizeof(struct brw_instruction); i++) brw_disasm(stdout, &((struct brw_instruction *)program)[i], intel->gen); printf("\n"); - } + } /* Upload */ diff --git a/src/mesa/drivers/dri/i965/brw_clip_state.c b/src/mesa/drivers/dri/i965/brw_clip_state.c index 885167da908..60fd5fa7d9e 100644 --- a/src/mesa/drivers/dri/i965/brw_clip_state.c +++ b/src/mesa/drivers/dri/i965/brw_clip_state.c @@ -114,10 +114,10 @@ clip_unit_create_from_key(struct brw_context *brw, clip.thread4.max_threads = 1 - 1; } - if (INTEL_DEBUG & DEBUG_SINGLE_THREAD) + if (unlikely(INTEL_DEBUG & DEBUG_SINGLE_THREAD)) clip.thread4.max_threads = 0; - if (INTEL_DEBUG & DEBUG_STATS) + if (unlikely(INTEL_DEBUG & DEBUG_STATS)) clip.thread4.stats_enable = 1; clip.clip5.userclip_enable_flags = 0x7f; diff --git a/src/mesa/drivers/dri/i965/brw_context.c b/src/mesa/drivers/dri/i965/brw_context.c index 3c4ae8a7a4f..cb0a8b96c9c 100644 --- a/src/mesa/drivers/dri/i965/brw_context.c +++ b/src/mesa/drivers/dri/i965/brw_context.c @@ -124,7 +124,7 @@ GLboolean brwCreateContext( int api, (i == MESA_SHADER_FRAGMENT); if (intel->gen == 6) - ctx->ShaderCompilerOptions[i].EmitNoIfs = GL_TRUE; + ctx->ShaderCompilerOptions[i].EmitNoIfs = (i == MESA_SHADER_VERTEX); } ctx->Const.VertexProgram.MaxNativeInstructions = (16 * 1024); diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h index 6c3db61035a..239586a0366 100644 --- a/src/mesa/drivers/dri/i965/brw_defines.h +++ b/src/mesa/drivers/dri/i965/brw_defines.h @@ -930,6 +930,11 @@ #define CMD_3D_CLIP_STATE 0x7812 /* GEN6+ */ /* DW1 */ # define GEN6_CLIP_STATISTICS_ENABLE (1 << 10) +/** + * Just does cheap culling based on the clip distance. Bits must be + * disjoint with USER_CLIP_CLIP_DISTANCE bits. + */ +# define GEN6_USER_CLIP_CULL_DISTANCES_SHIFT 0 /* DW2 */ # define GEN6_CLIP_ENABLE (1 << 31) # define GEN6_CLIP_API_OGL (0 << 30) @@ -937,6 +942,8 @@ # define GEN6_CLIP_XY_TEST (1 << 28) # define GEN6_CLIP_Z_TEST (1 << 27) # define GEN6_CLIP_GB_TEST (1 << 26) +/** 8-bit field of which user clip distances to clip aganist. */ +# define GEN6_USER_CLIP_CLIP_DISTANCES_SHIFT 16 # define GEN6_CLIP_MODE_NORMAL (0 << 13) # define GEN6_CLIP_MODE_REJECT_ALL (3 << 13) # define GEN6_CLIP_MODE_ACCEPT_ALL (4 << 13) diff --git a/src/mesa/drivers/dri/i965/brw_draw.c b/src/mesa/drivers/dri/i965/brw_draw.c index 04bc8cb2db0..a1f403ca4e6 100644 --- a/src/mesa/drivers/dri/i965/brw_draw.c +++ b/src/mesa/drivers/dri/i965/brw_draw.c @@ -42,7 +42,7 @@ #include "intel_batchbuffer.h" -#define FILE_DEBUG_FLAG DEBUG_BATCH +#define FILE_DEBUG_FLAG DEBUG_PRIMS static GLuint prim_to_hw_prim[GL_POLYGON+1] = { _3DPRIM_POINTLIST, @@ -83,8 +83,7 @@ static GLuint brw_set_prim(struct brw_context *brw, struct gl_context *ctx = &brw->intel.ctx; GLenum mode = prim->mode; - if (INTEL_DEBUG & DEBUG_PRIMS) - printf("PRIM: %s\n", _mesa_lookup_enum_by_nr(prim->mode)); + DBG("PRIM: %s\n", _mesa_lookup_enum_by_nr(prim->mode)); /* Slight optimization to avoid the GS program when not needed: */ @@ -133,9 +132,8 @@ static void brw_emit_prim(struct brw_context *brw, struct brw_3d_primitive prim_packet; struct intel_context *intel = &brw->intel; - if (INTEL_DEBUG & DEBUG_PRIMS) - printf("PRIM: %s %d %d\n", _mesa_lookup_enum_by_nr(prim->mode), - prim->start, prim->count); + DBG("PRIM: %s %d %d\n", _mesa_lookup_enum_by_nr(prim->mode), + prim->start, prim->count); prim_packet.header.opcode = CMD_3D_PRIM; prim_packet.header.length = sizeof(prim_packet)/4 - 2; diff --git a/src/mesa/drivers/dri/i965/brw_draw_upload.c b/src/mesa/drivers/dri/i965/brw_draw_upload.c index c4654360d46..2cefe614dd2 100644 --- a/src/mesa/drivers/dri/i965/brw_draw_upload.c +++ b/src/mesa/drivers/dri/i965/brw_draw_upload.c @@ -168,7 +168,7 @@ static GLuint byte_types_scale[5] = { static GLuint get_surface_type( GLenum type, GLuint size, GLenum format, GLboolean normalized ) { - if (INTEL_DEBUG & DEBUG_VERTS) + if (unlikely(INTEL_DEBUG & DEBUG_VERTS)) printf("type %s size %d normalized %d\n", _mesa_lookup_enum_by_nr(type), size, normalized); diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c b/src/mesa/drivers/dri/i965/brw_eu_emit.c index 9cb99a2b999..9cb941dacfd 100644 --- a/src/mesa/drivers/dri/i965/brw_eu_emit.c +++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c @@ -969,7 +969,7 @@ void brw_ENDIF(struct brw_compile *p, brw_set_src0(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD)); brw_set_src1(insn, brw_imm_d(0x0)); } else { - brw_set_dest(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_W)); + brw_set_dest(insn, brw_imm_w(0)); brw_set_src0(insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); brw_set_src1(insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); } diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index 2ed59d3f5d4..283d5aad496 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -89,8 +89,6 @@ brw_compile_shader(struct gl_context *ctx, struct gl_shader *shader) GLboolean brw_link_shader(struct gl_context *ctx, struct gl_shader_program *prog) { - struct intel_context *intel = intel_context(ctx); - struct brw_shader *shader = (struct brw_shader *)prog->_LinkedShaders[MESA_SHADER_FRAGMENT]; if (shader != NULL) { @@ -132,9 +130,6 @@ brw_link_shader(struct gl_context *ctx, struct gl_shader_program *prog) GL_TRUE, /* temp */ GL_TRUE /* uniform */ ) || progress; - if (intel->gen == 6) { - progress = do_if_to_cond_assign(shader->ir) || progress; - } } while (progress); validate_ir_tree(shader->ir); @@ -3129,7 +3124,7 @@ fs_visitor::generate_code() const char *last_annotation_string = NULL; ir_instruction *last_annotation_ir = NULL; - if (INTEL_DEBUG & DEBUG_WM) { + if (unlikely(INTEL_DEBUG & DEBUG_WM)) { printf("Native code for fragment shader %d:\n", ctx->Shader.CurrentFragmentProgram->Name); } @@ -3141,7 +3136,7 @@ fs_visitor::generate_code() fs_inst *inst = (fs_inst *)iter.get(); struct brw_reg src[3], dst; - if (INTEL_DEBUG & DEBUG_WM) { + if (unlikely(INTEL_DEBUG & DEBUG_WM)) { if (last_annotation_ir != inst->ir) { last_annotation_ir = inst->ir; if (last_annotation_ir) { @@ -3335,7 +3330,7 @@ fs_visitor::generate_code() this->fail = true; } - if (INTEL_DEBUG & DEBUG_WM) { + if (unlikely(INTEL_DEBUG & DEBUG_WM)) { for (unsigned int i = last_native_inst; i < p->nr_insn; i++) { if (0) { printf("0x%08x 0x%08x 0x%08x 0x%08x ", @@ -3376,7 +3371,7 @@ brw_wm_fs_emit(struct brw_context *brw, struct brw_wm_compile *c) */ c->dispatch_width = 8; - if (INTEL_DEBUG & DEBUG_WM) { + if (unlikely(INTEL_DEBUG & DEBUG_WM)) { printf("GLSL IR for native fragment shader %d:\n", prog->Name); _mesa_print_ir(shader->ir, NULL); printf("\n"); diff --git a/src/mesa/drivers/dri/i965/brw_gs.c b/src/mesa/drivers/dri/i965/brw_gs.c index cfcc8ea4d6a..b0c76f4094d 100644 --- a/src/mesa/drivers/dri/i965/brw_gs.c +++ b/src/mesa/drivers/dri/i965/brw_gs.c @@ -127,8 +127,8 @@ static void compile_gs_prog( struct brw_context *brw, */ program = brw_get_program(&c.func, &program_size); - if (INTEL_DEBUG & DEBUG_GS) { - int i; + if (unlikely(INTEL_DEBUG & DEBUG_GS)) { + int i; printf("gs:\n"); for (i = 0; i < program_size / sizeof(struct brw_instruction); i++) diff --git a/src/mesa/drivers/dri/i965/brw_gs_state.c b/src/mesa/drivers/dri/i965/brw_gs_state.c index 63562ebcfc2..69a5f7a6667 100644 --- a/src/mesa/drivers/dri/i965/brw_gs_state.c +++ b/src/mesa/drivers/dri/i965/brw_gs_state.c @@ -101,7 +101,7 @@ gs_unit_create_from_key(struct brw_context *brw, struct brw_gs_unit_key *key) if (intel->gen == 5) gs.thread4.rendering_enable = 1; - if (INTEL_DEBUG & DEBUG_STATS) + if (unlikely(INTEL_DEBUG & DEBUG_STATS)) gs.thread4.stats_enable = 1; bo = brw_upload_cache(&brw->cache, BRW_GS_UNIT, diff --git a/src/mesa/drivers/dri/i965/brw_misc_state.c b/src/mesa/drivers/dri/i965/brw_misc_state.c index 24041e57b00..1d350bc0413 100644 --- a/src/mesa/drivers/dri/i965/brw_misc_state.c +++ b/src/mesa/drivers/dri/i965/brw_misc_state.c @@ -555,7 +555,7 @@ static void upload_invarient_state( struct brw_context *brw ) memset(&vfs, 0, sizeof(vfs)); vfs.opcode = brw->CMD_VF_STATISTICS; - if (INTEL_DEBUG & DEBUG_STATS) + if (unlikely(INTEL_DEBUG & DEBUG_STATS)) vfs.statistics_enable = 1; BRW_BATCH_STRUCT(brw, &vfs); diff --git a/src/mesa/drivers/dri/i965/brw_sf.c b/src/mesa/drivers/dri/i965/brw_sf.c index 7dbd70daaea..6da155b1a9b 100644 --- a/src/mesa/drivers/dri/i965/brw_sf.c +++ b/src/mesa/drivers/dri/i965/brw_sf.c @@ -108,7 +108,7 @@ static void compile_sf_prog( struct brw_context *brw, */ program = brw_get_program(&c.func, &program_size); - if (INTEL_DEBUG & DEBUG_SF) { + if (unlikely(INTEL_DEBUG & DEBUG_SF)) { printf("sf:\n"); for (i = 0; i < program_size / sizeof(struct brw_instruction); i++) brw_disasm(stdout, &((struct brw_instruction *)program)[i], diff --git a/src/mesa/drivers/dri/i965/brw_sf_state.c b/src/mesa/drivers/dri/i965/brw_sf_state.c index 6ad9e1b48a4..bd3a21ed9e2 100644 --- a/src/mesa/drivers/dri/i965/brw_sf_state.c +++ b/src/mesa/drivers/dri/i965/brw_sf_state.c @@ -210,10 +210,10 @@ sf_unit_create_from_key(struct brw_context *brw, struct brw_sf_unit_key *key, sf.thread4.max_threads = MIN2(chipset_max_threads, key->nr_urb_entries) - 1; - if (INTEL_DEBUG & DEBUG_SINGLE_THREAD) + if (unlikely(INTEL_DEBUG & DEBUG_SINGLE_THREAD)) sf.thread4.max_threads = 0; - if (INTEL_DEBUG & DEBUG_STATS) + if (unlikely(INTEL_DEBUG & DEBUG_STATS)) sf.thread4.stats_enable = 1; /* CACHE_NEW_SF_VP */ diff --git a/src/mesa/drivers/dri/i965/brw_state_cache.c b/src/mesa/drivers/dri/i965/brw_state_cache.c index b31d84953a1..58ff528d44b 100644 --- a/src/mesa/drivers/dri/i965/brw_state_cache.c +++ b/src/mesa/drivers/dri/i965/brw_state_cache.c @@ -61,6 +61,7 @@ #include "intel_batchbuffer.h" #include "brw_wm.h" +#define FILE_DEBUG_FLAG DEBUG_STATE static GLuint hash_key(struct brw_cache_item *item) @@ -265,10 +266,9 @@ brw_upload_cache_with_auxdata(struct brw_cache *cache, *(void **)aux_return = (void *)((char *)item->key + item->key_size); } - if (INTEL_DEBUG & DEBUG_STATE) - printf("upload %s: %d bytes to cache id %d\n", - cache->name[cache_id], - data_size, cache_id); + DBG("upload %s: %d bytes to cache id %d\n", + cache->name[cache_id], + data_size, cache_id); /* Copy data to the buffer */ drm_intel_bo_subdata(bo, 0, data_size, data); @@ -407,8 +407,7 @@ brw_clear_cache(struct brw_context *brw, struct brw_cache *cache) struct brw_cache_item *c, *next; GLuint i; - if (INTEL_DEBUG & DEBUG_STATE) - printf("%s\n", __FUNCTION__); + DBG("%s\n", __FUNCTION__); for (i = 0; i < cache->size; i++) { for (c = cache->items[i]; c; c = next) { @@ -434,8 +433,7 @@ brw_clear_cache(struct brw_context *brw, struct brw_cache *cache) void brw_state_cache_check_size(struct brw_context *brw) { - if (INTEL_DEBUG & DEBUG_STATE) - printf("%s (n_items=%d)\n", __FUNCTION__, brw->cache.n_items); + DBG("%s (n_items=%d)\n", __FUNCTION__, brw->cache.n_items); /* un-tuned guess. Each object is generally a page, so 1000 of them is 4 MB of * state cache. @@ -450,8 +448,7 @@ brw_destroy_cache(struct brw_context *brw, struct brw_cache *cache) { GLuint i; - if (INTEL_DEBUG & DEBUG_STATE) - printf("%s\n", __FUNCTION__); + DBG("%s\n", __FUNCTION__); brw_clear_cache(brw, cache); for (i = 0; i < BRW_MAX_CACHE; i++) { diff --git a/src/mesa/drivers/dri/i965/brw_state_upload.c b/src/mesa/drivers/dri/i965/brw_state_upload.c index f3b6a90f61a..338f3876b31 100644 --- a/src/mesa/drivers/dri/i965/brw_state_upload.c +++ b/src/mesa/drivers/dri/i965/brw_state_upload.c @@ -435,7 +435,7 @@ void brw_upload_state(struct brw_context *brw) brw_clear_validated_bos(brw); - if (INTEL_DEBUG) { + if (unlikely(INTEL_DEBUG)) { /* Debug version which enforces various sanity checks on the * state flags which are generated and checked to help ensure * state atoms are ordered correctly in the list. @@ -487,7 +487,7 @@ void brw_upload_state(struct brw_context *brw) } } - if (INTEL_DEBUG & DEBUG_STATE) { + if (unlikely(INTEL_DEBUG & DEBUG_STATE)) { brw_update_dirty_count(mesa_bits, state->mesa); brw_update_dirty_count(brw_bits, state->brw); brw_update_dirty_count(cache_bits, state->cache); diff --git a/src/mesa/drivers/dri/i965/brw_urb.c b/src/mesa/drivers/dri/i965/brw_urb.c index 0f597184b42..dfc1551aca6 100644 --- a/src/mesa/drivers/dri/i965/brw_urb.c +++ b/src/mesa/drivers/dri/i965/brw_urb.c @@ -190,12 +190,12 @@ static void recalculate_urb_fence( struct brw_context *brw ) exit(1); } - if (INTEL_DEBUG & (DEBUG_URB|DEBUG_FALLBACKS)) + if (unlikely(INTEL_DEBUG & (DEBUG_URB|DEBUG_FALLBACKS))) printf("URB CONSTRAINED\n"); } done: - if (INTEL_DEBUG & DEBUG_URB) + if (unlikely(INTEL_DEBUG & DEBUG_URB)) printf("URB fence: %d ..VS.. %d ..GS.. %d ..CLP.. %d ..SF.. %d ..CS.. %d\n", brw->urb.vs_start, brw->urb.gs_start, diff --git a/src/mesa/drivers/dri/i965/brw_vs_emit.c b/src/mesa/drivers/dri/i965/brw_vs_emit.c index ce334799965..7e43324a1f9 100644 --- a/src/mesa/drivers/dri/i965/brw_vs_emit.c +++ b/src/mesa/drivers/dri/i965/brw_vs_emit.c @@ -165,13 +165,20 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c ) /* User clip planes from curbe: */ if (c->key.nr_userclip) { - for (i = 0; i < c->key.nr_userclip; i++) { - c->userplane[i] = stride( brw_vec4_grf(reg+3+i/2, (i%2) * 4), 0, 4, 1); - } + if (intel->gen >= 6) { + for (i = 0; i < c->key.nr_userclip; i++) { + c->userplane[i] = stride(brw_vec4_grf(reg + i / 2, + (i % 2) * 4), 0, 4, 1); + } + reg += ALIGN(c->key.nr_userclip, 2) / 2; + } else { + for (i = 0; i < c->key.nr_userclip; i++) { + c->userplane[i] = stride(brw_vec4_grf(reg + (6 + i) / 2, + (i % 2) * 4), 0, 4, 1); + } + reg += (ALIGN(6 + c->key.nr_userclip, 4) / 4) * 2; + } - /* Deal with curbe alignment: - */ - reg += ((6 + c->key.nr_userclip + 3) / 4) * 2; } /* Vertex program parameters from curbe: @@ -253,9 +260,11 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c ) c->first_output = reg; c->first_overflow_output = 0; - if (intel->gen >= 6) - mrf = 3; /* no more pos store in attribute */ - else if (intel->gen == 5) + if (intel->gen >= 6) { + mrf = 3; + if (c->key.nr_userclip) + mrf += 2; + } else if (intel->gen == 5) mrf = 8; else mrf = 4; @@ -372,16 +381,20 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c ) /* See emit_vertex_write() for where the VUE's overhead on top of the * attributes comes from. */ - if (intel->gen >= 6) - c->prog_data.urb_entry_size = (attributes_in_vue + 2 + 7) / 8; - else if (intel->gen == 5) + if (intel->gen >= 6) { + int header_regs = 2; + if (c->key.nr_userclip) + header_regs += 2; + + c->prog_data.urb_entry_size = (attributes_in_vue + header_regs + 7) / 8; + } else if (intel->gen == 5) c->prog_data.urb_entry_size = (attributes_in_vue + 6 + 3) / 4; else c->prog_data.urb_entry_size = (attributes_in_vue + 2 + 3) / 4; c->prog_data.total_grf = reg; - if (INTEL_DEBUG & DEBUG_VS) { + if (unlikely(INTEL_DEBUG & DEBUG_VS)) { printf("%s NumAddrRegs %d\n", __FUNCTION__, c->vp->program.Base.NumAddressRegs); printf("%s NumTemps %d\n", __FUNCTION__, c->vp->program.Base.NumTemporaries); printf("%s reg = %d\n", __FUNCTION__, reg); @@ -576,12 +589,11 @@ static void emit_min( struct brw_compile *p, brw_set_predicate_control(p, BRW_PREDICATE_NONE); } - -static void emit_math1( struct brw_vs_compile *c, - GLuint function, - struct brw_reg dst, - struct brw_reg arg0, - GLuint precision) +static void emit_math1_gen4(struct brw_vs_compile *c, + GLuint function, + struct brw_reg dst, + struct brw_reg arg0, + GLuint precision) { /* There are various odd behaviours with SEND on the simulator. In * addition there are documented issues with the fact that the GEN4 @@ -591,14 +603,11 @@ static void emit_math1( struct brw_vs_compile *c, * whether that turns out to be a simulator bug or not: */ struct brw_compile *p = &c->func; - struct intel_context *intel = &p->brw->intel; struct brw_reg tmp = dst; GLboolean need_tmp = GL_FALSE; - if (dst.file != BRW_GENERAL_REGISTER_FILE) - need_tmp = GL_TRUE; - - if (intel->gen < 6 && dst.dw1.bits.writemask != 0xf) + if (dst.file != BRW_GENERAL_REGISTER_FILE || + dst.dw1.bits.writemask != 0xf) need_tmp = GL_TRUE; if (need_tmp) @@ -619,6 +628,57 @@ static void emit_math1( struct brw_vs_compile *c, } } +static void +emit_math1_gen6(struct brw_vs_compile *c, + GLuint function, + struct brw_reg dst, + struct brw_reg arg0, + GLuint precision) +{ + struct brw_compile *p = &c->func; + struct brw_reg tmp_src, tmp_dst; + + /* Something is strange on gen6 math in 16-wide mode, though the + * docs say it's supposed to work. Punt to using align1 mode, + * which doesn't do writemasking and swizzles. + */ + tmp_src = get_tmp(c); + tmp_dst = get_tmp(c); + + brw_MOV(p, tmp_src, arg0); + + brw_set_access_mode(p, BRW_ALIGN_1); + brw_math(p, + tmp_dst, + function, + BRW_MATH_SATURATE_NONE, + 2, + tmp_src, + BRW_MATH_DATA_SCALAR, + precision); + brw_set_access_mode(p, BRW_ALIGN_16); + + brw_MOV(p, dst, tmp_dst); + + release_tmp(c, tmp_src); + release_tmp(c, tmp_dst); +} + +static void +emit_math1(struct brw_vs_compile *c, + GLuint function, + struct brw_reg dst, + struct brw_reg arg0, + GLuint precision) +{ + struct brw_compile *p = &c->func; + struct intel_context *intel = &p->brw->intel; + + if (intel->gen >= 6) + emit_math1_gen6(c, function, dst, arg0, precision); + else + emit_math1_gen4(c, function, dst, arg0, precision); +} static void emit_math2( struct brw_vs_compile *c, GLuint function, @@ -1392,9 +1452,33 @@ static void emit_vertex_write( struct brw_vs_compile *c) /* Update the header for point size, user clipping flags, and -ve rhw * workaround. */ - if ((c->prog_data.outputs_written & BITFIELD64_BIT(VERT_RESULT_PSIZ)) || - c->key.nr_userclip || brw->has_negative_rhw_bug) - { + if (intel->gen >= 6) { + struct brw_reg m1 = brw_message_reg(1); + + /* On gen6, m1 has each value in a separate dword, so we never + * need to mess with a temporary for computing the m1 value. + */ + brw_MOV(p, retype(m1, BRW_REGISTER_TYPE_UD), brw_imm_ud(0)); + if (c->prog_data.outputs_written & BITFIELD64_BIT(VERT_RESULT_PSIZ)) { + brw_MOV(p, brw_writemask(m1, WRITEMASK_W), + brw_swizzle1(c->regs[PROGRAM_OUTPUT][VERT_RESULT_PSIZ], 0)); + } + + /* Set the user clip distances in dword 8-15. (m3-4)*/ + if (c->key.nr_userclip) { + for (i = 0; i < c->key.nr_userclip; i++) { + struct brw_reg m; + if (i < 4) + m = brw_message_reg(3); + else + m = brw_message_reg(4); + + brw_DP4(p, brw_writemask(m, (1 << (i & 7))),pos, c->userplane[i]); + } + } + } else if ((c->prog_data.outputs_written & + BITFIELD64_BIT(VERT_RESULT_PSIZ)) || + c->key.nr_userclip || brw->has_negative_rhw_bug) { struct brw_reg header1 = retype(get_tmp(c), BRW_REGISTER_TYPE_UD); GLuint i; @@ -1404,11 +1488,10 @@ static void emit_vertex_write( struct brw_vs_compile *c) if (c->prog_data.outputs_written & BITFIELD64_BIT(VERT_RESULT_PSIZ)) { struct brw_reg psiz = c->regs[PROGRAM_OUTPUT][VERT_RESULT_PSIZ]; - if (intel->gen < 6) { - brw_MUL(p, brw_writemask(header1, WRITEMASK_W), brw_swizzle1(psiz, 0), brw_imm_f(1<<11)); - brw_AND(p, brw_writemask(header1, WRITEMASK_W), header1, brw_imm_ud(0x7ff<<8)); - } else - brw_MOV(p, brw_writemask(header1, WRITEMASK_W), brw_swizzle1(psiz, 0)); + brw_MUL(p, brw_writemask(header1, WRITEMASK_W), + brw_swizzle1(psiz, 0), brw_imm_f(1<<11)); + brw_AND(p, brw_writemask(header1, WRITEMASK_W), + header1, brw_imm_ud(0x7ff<<8)); } for (i = 0; i < c->key.nr_userclip; i++) { @@ -1461,12 +1544,14 @@ static void emit_vertex_write( struct brw_vs_compile *c) * dword 0-3 (m1) of the header is indices, point width, clip flags. * dword 4-7 (m2) is the 4D space position * dword 8-15 (m3,m4) of the vertex header is the user clip distance if - * enabled. We don't use it, so skip it. - * m3 is the first vertex element data we fill, which is the vertex - * position. + * enabled. + * m3 or 5 is the first vertex element data we fill, which is + * the vertex position. */ brw_MOV(p, brw_message_reg(2), pos); len_vertex_header = 1; + if (c->key.nr_userclip > 0) + len_vertex_header += 2; } else if (intel->gen == 5) { /* There are 20 DWs (D0-D19) in VUE header on Ironlake: * dword 0-3 (m1) of the header is indices, point width, clip flags. @@ -1640,17 +1725,13 @@ void brw_vs_emit(struct brw_vs_compile *c ) GLuint index; GLuint file; - if (INTEL_DEBUG & DEBUG_VS) { + if (unlikely(INTEL_DEBUG & DEBUG_VS)) { printf("vs-mesa:\n"); _mesa_fprint_program_opt(stdout, &c->vp->program.Base, PROG_PRINT_DEBUG, GL_TRUE); printf("\n"); } - /* FIXME Need to fix conditional instruction to remove this */ - if (intel->gen >= 6) - p->single_program_flow = GL_TRUE; - brw_set_compression_control(p, BRW_COMPRESSION_NONE); brw_set_access_mode(p, BRW_ALIGN_16); if_depth_in_loop[loop_depth] = 0; @@ -2010,7 +2091,7 @@ void brw_vs_emit(struct brw_vs_compile *c ) brw_optimize(p); - if (INTEL_DEBUG & DEBUG_VS) { + if (unlikely(INTEL_DEBUG & DEBUG_VS)) { int i; printf("vs-native:\n"); diff --git a/src/mesa/drivers/dri/i965/brw_vs_state.c b/src/mesa/drivers/dri/i965/brw_vs_state.c index ebae94269f9..be923138617 100644 --- a/src/mesa/drivers/dri/i965/brw_vs_state.c +++ b/src/mesa/drivers/dri/i965/brw_vs_state.c @@ -154,7 +154,7 @@ vs_unit_create_from_key(struct brw_context *brw, struct brw_vs_unit_key *key) */ vs.vs5.sampler_count = 0; - if (INTEL_DEBUG & DEBUG_STATS) + if (unlikely(INTEL_DEBUG & DEBUG_STATS)) vs.thread4.stats_enable = 1; /* Vertex program always enabled: diff --git a/src/mesa/drivers/dri/i965/brw_wm.c b/src/mesa/drivers/dri/i965/brw_wm.c index 7f3ba5f0581..a6d2a2377f6 100644 --- a/src/mesa/drivers/dri/i965/brw_wm.c +++ b/src/mesa/drivers/dri/i965/brw_wm.c @@ -198,7 +198,7 @@ static void do_wm_prog( struct brw_context *brw, c->prog_data.total_scratch = 0; } - if (INTEL_DEBUG & DEBUG_WM) + if (unlikely(INTEL_DEBUG & DEBUG_WM)) fprintf(stderr, "\n"); /* get the program diff --git a/src/mesa/drivers/dri/i965/brw_wm_emit.c b/src/mesa/drivers/dri/i965/brw_wm_emit.c index d06c49fd5be..96fecc97ee2 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_emit.c +++ b/src/mesa/drivers/dri/i965/brw_wm_emit.c @@ -83,6 +83,7 @@ brw_wm_arg_can_be_immediate(enum prog_opcode opcode, int arg) [OPCODE_SLE] = 2, [OPCODE_SLT] = 2, [OPCODE_SNE] = 2, + [OPCODE_SWZ] = 1, [OPCODE_XPD] = 2, }; @@ -895,11 +896,12 @@ void emit_math1(struct brw_wm_compile *c, BRW_MATH_SATURATE_NONE); struct brw_reg src; - if (intel->gen >= 6 && arg0[0].hstride == BRW_HORIZONTAL_STRIDE_0) { - /* Gen6 math requires that source and dst horizontal stride be 1. - * + if (intel->gen >= 6 && (arg0[0].hstride == BRW_HORIZONTAL_STRIDE_0 || + arg0[0].file != BRW_GENERAL_REGISTER_FILE)) { + /* Gen6 math requires that source and dst horizontal stride be 1, + * and that the argument be in the GRF. */ - src = *dst; + src = dst[dst_chan]; brw_MOV(p, src, arg0[0]); } else { src = arg0[0]; @@ -1920,7 +1922,7 @@ void brw_wm_emit( struct brw_wm_compile *c ) brw_remove_grf_to_mrf_moves(p); } - if (INTEL_DEBUG & DEBUG_WM) { + if (unlikely(INTEL_DEBUG & DEBUG_WM)) { int i; printf("wm-native:\n"); diff --git a/src/mesa/drivers/dri/i965/brw_wm_fp.c b/src/mesa/drivers/dri/i965/brw_wm_fp.c index 15a238cda62..2cae6988804 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_fp.c +++ b/src/mesa/drivers/dri/i965/brw_wm_fp.c @@ -663,7 +663,7 @@ static void precalc_tex( struct brw_wm_compile *c, const struct prog_instruction *inst ) { struct prog_src_register coord; - struct prog_dst_register tmpcoord; + struct prog_dst_register tmpcoord = { 0 }; const GLuint unit = c->fp->program.Base.SamplerUnits[inst->TexSrcUnit]; assert(unit < BRW_MAX_TEX_UNIT); @@ -963,7 +963,7 @@ static void emit_render_target_writes( struct brw_wm_compile *c ) struct prog_src_register outcolor; GLuint i; - struct prog_instruction *inst, *last_inst; + struct prog_instruction *inst, *last_inst = NULL; /* The inst->Aux field is used for FB write target and the EOT marker */ @@ -1058,7 +1058,7 @@ void brw_wm_pass_fp( struct brw_wm_compile *c ) struct brw_fragment_program *fp = c->fp; GLuint insn; - if (INTEL_DEBUG & DEBUG_WM) { + if (unlikely(INTEL_DEBUG & DEBUG_WM)) { printf("pre-fp:\n"); _mesa_fprint_program_opt(stdout, &fp->program.Base, PROG_PRINT_DEBUG, GL_TRUE); @@ -1174,7 +1174,7 @@ void brw_wm_pass_fp( struct brw_wm_compile *c ) } } - if (INTEL_DEBUG & DEBUG_WM) { + if (unlikely(INTEL_DEBUG & DEBUG_WM)) { printf("pass_fp:\n"); print_insns( c->prog_instructions, c->nr_fp_insns ); printf("\n"); diff --git a/src/mesa/drivers/dri/i965/brw_wm_glsl.c b/src/mesa/drivers/dri/i965/brw_wm_glsl.c index d325f85ce00..7fe8ab1f334 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_glsl.c +++ b/src/mesa/drivers/dri/i965/brw_wm_glsl.c @@ -19,7 +19,7 @@ GLboolean brw_wm_is_glsl(const struct gl_fragment_program *fp) { int i; - if (INTEL_DEBUG & DEBUG_GLSL_FORCE) + if (unlikely(INTEL_DEBUG & DEBUG_GLSL_FORCE)) return GL_TRUE; for (i = 0; i < fp->Base.NumInstructions; i++) { @@ -1002,7 +1002,7 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c) } post_wm_emit(c); - if (INTEL_DEBUG & DEBUG_WM) { + if (unlikely(INTEL_DEBUG & DEBUG_WM)) { printf("wm-native:\n"); for (i = 0; i < p->nr_insn; i++) brw_disasm(stdout, &p->store[i], intel->gen); @@ -1016,7 +1016,7 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c) */ void brw_wm_glsl_emit(struct brw_context *brw, struct brw_wm_compile *c) { - if (INTEL_DEBUG & DEBUG_WM) { + if (unlikely(INTEL_DEBUG & DEBUG_WM)) { printf("brw_wm_glsl_emit:\n"); } @@ -1026,7 +1026,7 @@ void brw_wm_glsl_emit(struct brw_context *brw, struct brw_wm_compile *c) /* actual code generation */ brw_wm_emit_glsl(brw, c); - if (INTEL_DEBUG & DEBUG_WM) { + if (unlikely(INTEL_DEBUG & DEBUG_WM)) { brw_wm_print_program(c, "brw_wm_glsl_emit done"); } diff --git a/src/mesa/drivers/dri/i965/brw_wm_pass0.c b/src/mesa/drivers/dri/i965/brw_wm_pass0.c index d6aa9f957a2..83152526b3a 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_pass0.c +++ b/src/mesa/drivers/dri/i965/brw_wm_pass0.c @@ -440,7 +440,7 @@ void brw_wm_pass0( struct brw_wm_compile *c ) } } - if (INTEL_DEBUG & DEBUG_WM) { + if (unlikely(INTEL_DEBUG & DEBUG_WM)) { brw_wm_print_program(c, "pass0"); } } diff --git a/src/mesa/drivers/dri/i965/brw_wm_pass1.c b/src/mesa/drivers/dri/i965/brw_wm_pass1.c index 962515a99e9..3a2874b6ddf 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_pass1.c +++ b/src/mesa/drivers/dri/i965/brw_wm_pass1.c @@ -291,7 +291,7 @@ void brw_wm_pass1( struct brw_wm_compile *c ) track_arg(c, inst, 2, read2); } - if (INTEL_DEBUG & DEBUG_WM) { + if (unlikely(INTEL_DEBUG & DEBUG_WM)) { brw_wm_print_program(c, "pass1"); } } diff --git a/src/mesa/drivers/dri/i965/brw_wm_pass2.c b/src/mesa/drivers/dri/i965/brw_wm_pass2.c index 54acb3038b5..44e39538145 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_pass2.c +++ b/src/mesa/drivers/dri/i965/brw_wm_pass2.c @@ -331,13 +331,13 @@ void brw_wm_pass2( struct brw_wm_compile *c ) } } - if (INTEL_DEBUG & DEBUG_WM) { + if (unlikely(INTEL_DEBUG & DEBUG_WM)) { brw_wm_print_program(c, "pass2"); } c->state = PASS2_DONE; - if (INTEL_DEBUG & DEBUG_WM) { + if (unlikely(INTEL_DEBUG & DEBUG_WM)) { brw_wm_print_program(c, "pass2/done"); } } diff --git a/src/mesa/drivers/dri/i965/brw_wm_state.c b/src/mesa/drivers/dri/i965/brw_wm_state.c index 9a27b937103..76de7b7b6f6 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_state.c +++ b/src/mesa/drivers/dri/i965/brw_wm_state.c @@ -249,7 +249,7 @@ wm_unit_create_from_key(struct brw_context *brw, struct brw_wm_unit_key *key, wm.wm5.line_stipple = key->line_stipple; - if (INTEL_DEBUG & DEBUG_STATS || key->stats_wm) + if (unlikely(INTEL_DEBUG & DEBUG_STATS) || key->stats_wm) wm.wm4.stats_enable = 1; bo = brw_upload_cache(&brw->cache, BRW_WM_UNIT, diff --git a/src/mesa/drivers/dri/i965/gen6_cc.c b/src/mesa/drivers/dri/i965/gen6_cc.c index 0d6e923f734..800a2555214 100644 --- a/src/mesa/drivers/dri/i965/gen6_cc.c +++ b/src/mesa/drivers/dri/i965/gen6_cc.c @@ -254,7 +254,7 @@ prepare_color_calc_state(struct brw_context *brw) const struct brw_tracked_state gen6_color_calc_state = { .dirty = { - .mesa = _NEW_COLOR, + .mesa = _NEW_COLOR | _NEW_STENCIL, .brw = 0, .cache = 0, }, diff --git a/src/mesa/drivers/dri/i965/gen6_clip_state.c b/src/mesa/drivers/dri/i965/gen6_clip_state.c index cd2ac9d92fe..c65b41e2b6b 100644 --- a/src/mesa/drivers/dri/i965/gen6_clip_state.c +++ b/src/mesa/drivers/dri/i965/gen6_clip_state.c @@ -28,6 +28,7 @@ #include "brw_context.h" #include "brw_state.h" #include "brw_defines.h" +#include "brw_util.h" #include "intel_batchbuffer.h" static void @@ -36,7 +37,7 @@ upload_clip_state(struct brw_context *brw) struct intel_context *intel = &brw->intel; struct gl_context *ctx = &intel->ctx; uint32_t depth_clamp = 0; - uint32_t provoking; + uint32_t provoking, userclip; if (!ctx->Transform.DepthClamp) depth_clamp = GEN6_CLIP_Z_TEST; @@ -50,6 +51,9 @@ upload_clip_state(struct brw_context *brw) (1 << GEN6_CLIP_LINE_PROVOKE_SHIFT); } + /* _NEW_TRANSFORM */ + userclip = (1 << brw_count_bits(ctx->Transform.ClipPlanesEnabled)) - 1; + BEGIN_BATCH(4); OUT_BATCH(CMD_3D_CLIP_STATE << 16 | (4 - 2)); OUT_BATCH(GEN6_CLIP_STATISTICS_ENABLE); @@ -57,6 +61,7 @@ upload_clip_state(struct brw_context *brw) GEN6_CLIP_API_OGL | GEN6_CLIP_MODE_NORMAL | GEN6_CLIP_XY_TEST | + userclip << GEN6_USER_CLIP_CLIP_DISTANCES_SHIFT | depth_clamp | provoking); OUT_BATCH(GEN6_CLIP_FORCE_ZERO_RTAINDEX); diff --git a/src/mesa/drivers/dri/i965/gen6_sf_state.c b/src/mesa/drivers/dri/i965/gen6_sf_state.c index 55a70bea62f..471067e8f02 100644 --- a/src/mesa/drivers/dri/i965/gen6_sf_state.c +++ b/src/mesa/drivers/dri/i965/gen6_sf_state.c @@ -73,12 +73,19 @@ upload_sf_state(struct brw_context *brw) /* _NEW_BUFFER */ GLboolean render_to_fbo = brw->intel.ctx.DrawBuffer->Name != 0; int attr = 0; + int urb_start; + + /* _NEW_TRANSFORM */ + if (ctx->Transform.ClipPlanesEnabled) + urb_start = 2; + else + urb_start = 1; dw1 = GEN6_SF_SWIZZLE_ENABLE | num_outputs << GEN6_SF_NUM_OUTPUTS_SHIFT | (num_inputs + 1) / 2 << GEN6_SF_URB_ENTRY_READ_LENGTH_SHIFT | - 1 << GEN6_SF_URB_ENTRY_READ_OFFSET_SHIFT; + urb_start << GEN6_SF_URB_ENTRY_READ_OFFSET_SHIFT; dw2 = GEN6_SF_VIEWPORT_TRANSFORM_ENABLE | GEN6_SF_STATISTICS_ENABLE; dw3 = 0; @@ -195,7 +202,9 @@ const struct brw_tracked_state gen6_sf_state = { _NEW_POLYGON | _NEW_LINE | _NEW_SCISSOR | - _NEW_BUFFERS), + _NEW_BUFFERS | + _NEW_POINT | + _NEW_TRANSFORM), .brw = BRW_NEW_CONTEXT, .cache = CACHE_NEW_VS_PROG }, diff --git a/src/mesa/drivers/dri/i965/gen6_vs_state.c b/src/mesa/drivers/dri/i965/gen6_vs_state.c index 304eaddf409..e94d0c0ddbb 100644 --- a/src/mesa/drivers/dri/i965/gen6_vs_state.c +++ b/src/mesa/drivers/dri/i965/gen6_vs_state.c @@ -40,11 +40,11 @@ upload_vs_state(struct brw_context *brw) struct gl_context *ctx = &intel->ctx; const struct brw_vertex_program *vp = brw_vertex_program_const(brw->vertex_program); - unsigned int nr_params = vp->program.Base.Parameters->NumParameters; + unsigned int nr_params = brw->vs.prog_data->nr_params / 4; drm_intel_bo *constant_bo; int i; - if (vp->use_const_buffer || nr_params == 0) { + if (brw->vs.prog_data->nr_params == 0 && !ctx->Transform.ClipPlanesEnabled) { /* Disable the push constant buffers. */ BEGIN_BATCH(5); OUT_BATCH(CMD_3D_CONSTANT_VS_STATE << 16 | (5 - 2)); @@ -54,6 +54,9 @@ upload_vs_state(struct brw_context *brw) OUT_BATCH(0); ADVANCE_BATCH(); } else { + int params_uploaded = 0; + float *param; + if (brw->vertex_program->IsNVProgram) _mesa_load_tracked_matrices(ctx); @@ -63,14 +66,55 @@ upload_vs_state(struct brw_context *brw) _mesa_load_state_parameters(ctx, vp->program.Base.Parameters); constant_bo = drm_intel_bo_alloc(intel->bufmgr, "VS constant_bo", - nr_params * 4 * sizeof(float), + (MAX_CLIP_PLANES + nr_params) * + 4 * sizeof(float), 4096); drm_intel_gem_bo_map_gtt(constant_bo); - for (i = 0; i < nr_params; i++) { - memcpy((char *)constant_bo->virtual + i * 4 * sizeof(float), - vp->program.Base.Parameters->ParameterValues[i], - 4 * sizeof(float)); + param = constant_bo->virtual; + + /* This should be loaded like any other param, but it's ad-hoc + * until we redo the VS backend. + */ + for (i = 0; i < MAX_CLIP_PLANES; i++) { + if (ctx->Transform.ClipPlanesEnabled & (1 << i)) { + memcpy(param, ctx->Transform._ClipUserPlane[i], 4 * sizeof(float)); + param += 4; + params_uploaded++; + } } + /* Align to a reg for convenience for brw_vs_emit.c */ + if (params_uploaded & 1) { + param += 4; + params_uploaded++; + } + + if (vp->use_const_buffer) { + for (i = 0; i < vp->program.Base.Parameters->NumParameters; i++) { + if (brw->vs.constant_map[i] != -1) { + memcpy(param + brw->vs.constant_map[i] * 4, + vp->program.Base.Parameters->ParameterValues[i], + 4 * sizeof(float)); + params_uploaded++; + } + } + } else { + for (i = 0; i < nr_params; i++) { + memcpy(param, vp->program.Base.Parameters->ParameterValues[i], + 4 * sizeof(float)); + param += 4; + params_uploaded++; + } + } + + if (0) { + printf("VS constant buffer:\n"); + for (i = 0; i < params_uploaded; i++) { + float *buf = (float *)constant_bo->virtual + i * 4; + printf("%d: %f %f %f %f\n", + i, buf[0], buf[1], buf[2], buf[3]); + } + } + drm_intel_gem_bo_unmap_gtt(constant_bo); BEGIN_BATCH(5); @@ -79,7 +123,7 @@ upload_vs_state(struct brw_context *brw) (5 - 2)); OUT_RELOC(constant_bo, I915_GEM_DOMAIN_RENDER, 0, /* XXX: bad domain */ - ALIGN(nr_params, 2) / 2 - 1); + ALIGN(params_uploaded, 2) / 2 - 1); OUT_BATCH(0); OUT_BATCH(0); OUT_BATCH(0); @@ -91,7 +135,7 @@ upload_vs_state(struct brw_context *brw) BEGIN_BATCH(6); OUT_BATCH(CMD_3D_VS_STATE << 16 | (6 - 2)); OUT_RELOC(brw->vs.prog_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); - OUT_BATCH(GEN6_VS_SPF_MODE | (0 << GEN6_VS_SAMPLER_COUNT_SHIFT) | + OUT_BATCH((0 << GEN6_VS_SAMPLER_COUNT_SHIFT) | (brw->vs.nr_surfaces << GEN6_VS_BINDING_TABLE_ENTRY_COUNT_SHIFT)); OUT_BATCH(0); /* scratch space base offset */ OUT_BATCH((1 << GEN6_VS_DISPATCH_START_GRF_SHIFT) | diff --git a/src/mesa/drivers/dri/i965/gen6_wm_state.c b/src/mesa/drivers/dri/i965/gen6_wm_state.c index 36d4ab93ba9..ea5418bacf1 100644 --- a/src/mesa/drivers/dri/i965/gen6_wm_state.c +++ b/src/mesa/drivers/dri/i965/gen6_wm_state.c @@ -73,7 +73,7 @@ prepare_wm_constants(struct brw_context *brw) const struct brw_tracked_state gen6_wm_constants = { .dirty = { .mesa = _NEW_PROGRAM_CONSTANTS, - .brw = 0, + .brw = BRW_NEW_FRAGMENT_PROGRAM, .cache = 0, }, .prepare = prepare_wm_constants, diff --git a/src/mesa/drivers/dri/i965/intel_structs.h b/src/mesa/drivers/dri/i965/intel_structs.h deleted file mode 100644 index 522e3bd92c2..00000000000 --- a/src/mesa/drivers/dri/i965/intel_structs.h +++ /dev/null @@ -1,132 +0,0 @@ -#ifndef INTEL_STRUCTS_H -#define INTEL_STRUCTS_H - -struct br0 { - GLuint length:8; - GLuint pad0:3; - GLuint dst_tiled:1; - GLuint pad1:8; - GLuint write_rgb:1; - GLuint write_alpha:1; - GLuint opcode:7; - GLuint client:3; -}; - - -struct br13 { - GLint dest_pitch:16; - GLuint rop:8; - GLuint color_depth:2; - GLuint pad1:3; - GLuint mono_source_transparency:1; - GLuint clipping_enable:1; - GLuint pad0:1; -}; - - - -/* This is an attempt to move some of the 2D interaction in this - * driver to using structs for packets rather than a bunch of #defines - * and dwords. - */ -struct xy_color_blit { - struct br0 br0; - struct br13 br13; - - struct { - GLuint dest_x1:16; - GLuint dest_y1:16; - } dw2; - - struct { - GLuint dest_x2:16; - GLuint dest_y2:16; - } dw3; - - GLuint dest_base_addr; - GLuint color; -}; - -struct xy_src_copy_blit { - struct br0 br0; - struct br13 br13; - - struct { - GLuint dest_x1:16; - GLuint dest_y1:16; - } dw2; - - struct { - GLuint dest_x2:16; - GLuint dest_y2:16; - } dw3; - - GLuint dest_base_addr; - - struct { - GLuint src_x1:16; - GLuint src_y1:16; - } dw5; - - struct { - GLint src_pitch:16; - GLuint pad:16; - } dw6; - - GLuint src_base_addr; -}; - -struct xy_setup_blit { - struct br0 br0; - struct br13 br13; - - struct { - GLuint clip_x1:16; - GLuint clip_y1:16; - } dw2; - - struct { - GLuint clip_x2:16; - GLuint clip_y2:16; - } dw3; - - GLuint dest_base_addr; - GLuint background_color; - GLuint foreground_color; - GLuint pattern_base_addr; -}; - - -struct xy_text_immediate_blit { - struct { - GLuint length:8; - GLuint pad2:3; - GLuint dst_tiled:1; - GLuint pad1:4; - GLuint byte_packed:1; - GLuint pad0:5; - GLuint opcode:7; - GLuint client:3; - } dw0; - - struct { - GLuint dest_x1:16; - GLuint dest_y1:16; - } dw1; - - struct { - GLuint dest_x2:16; - GLuint dest_y2:16; - } dw2; - - /* Src bitmap data follows as inline dwords. - */ -}; - - -#define CLIENT_2D 0x2 -#define OPCODE_XY_SETUP_BLT 0x1 -#define OPCODE_XY_COLOR_BLT 0x50 -#define OPCODE_XY_TEXT_IMMEDIATE_BLT 0x31 - -#endif diff --git a/src/mesa/drivers/dri/intel/intel_batchbuffer.c b/src/mesa/drivers/dri/intel/intel_batchbuffer.c index 9b398239172..4b498f8c5b2 100644 --- a/src/mesa/drivers/dri/intel/intel_batchbuffer.c +++ b/src/mesa/drivers/dri/intel/intel_batchbuffer.c @@ -44,7 +44,9 @@ intel_batchbuffer_reset(struct intel_batchbuffer *batch) batch->buf = drm_intel_bo_alloc(intel->bufmgr, "batchbuffer", intel->maxBatchSize, 4096); - batch->map = batch->buffer; + drm_intel_gem_bo_map_gtt(batch->buf); + batch->map = batch->buf->virtual; + batch->size = intel->maxBatchSize; batch->ptr = batch->map; batch->reserved_space = BATCH_RESERVED; @@ -58,7 +60,6 @@ intel_batchbuffer_alloc(struct intel_context *intel) struct intel_batchbuffer *batch = calloc(sizeof(*batch), 1); batch->intel = intel; - batch->buffer = malloc(intel->maxBatchSize); intel_batchbuffer_reset(batch); return batch; @@ -67,8 +68,11 @@ intel_batchbuffer_alloc(struct intel_context *intel) void intel_batchbuffer_free(struct intel_batchbuffer *batch) { - free (batch->buffer); - drm_intel_bo_unreference(batch->buf); + if (batch->map) { + drm_intel_gem_bo_unmap_gtt(batch->buf); + batch->map = NULL; + } + dri_bo_unreference(batch->buf); batch->buf = NULL; free(batch); } @@ -84,13 +88,7 @@ do_flush_locked(struct intel_batchbuffer *batch, GLuint used) int ret = 0; int x_off = 0, y_off = 0; - drm_intel_bo_subdata(batch->buf, 0, used, batch->buffer); - if (batch->state_batch_offset != batch->size) { - drm_intel_bo_subdata(batch->buf, - batch->state_batch_offset, - batch->size - batch->state_batch_offset, - batch->buffer + batch->state_batch_offset); - } + drm_intel_gem_bo_unmap_gtt(batch->buf); batch->ptr = NULL; @@ -99,7 +97,7 @@ do_flush_locked(struct intel_batchbuffer *batch, GLuint used) (x_off & 0xffff) | (y_off << 16)); } - if (INTEL_DEBUG & DEBUG_BATCH) { + if (unlikely(INTEL_DEBUG & DEBUG_BATCH)) { drm_intel_bo_map(batch->buf, GL_FALSE); intel_decode(batch->buf->virtual, used / 4, batch->buf->offset, intel->intelScreen->deviceID, GL_TRUE); @@ -130,7 +128,7 @@ _intel_batchbuffer_flush(struct intel_batchbuffer *batch, const char *file, if (used == 0) return; - if (INTEL_DEBUG & DEBUG_BATCH) + if (unlikely(INTEL_DEBUG & DEBUG_BATCH)) fprintf(stderr, "%s:%d: Batchbuffer flush with %db used\n", file, line, used); @@ -174,7 +172,7 @@ _intel_batchbuffer_flush(struct intel_batchbuffer *batch, const char *file, do_flush_locked(batch, used); - if (INTEL_DEBUG & DEBUG_SYNC) { + if (unlikely(INTEL_DEBUG & DEBUG_SYNC)) { fprintf(stderr, "waiting for idle\n"); drm_intel_bo_map(batch->buf, GL_TRUE); drm_intel_bo_unmap(batch->buf); diff --git a/src/mesa/drivers/dri/intel/intel_batchbuffer.h b/src/mesa/drivers/dri/intel/intel_batchbuffer.h index ae53f455117..428c027c2f1 100644 --- a/src/mesa/drivers/dri/intel/intel_batchbuffer.h +++ b/src/mesa/drivers/dri/intel/intel_batchbuffer.h @@ -17,8 +17,6 @@ struct intel_batchbuffer drm_intel_bo *buf; - GLubyte *buffer; - GLubyte *map; GLubyte *ptr; diff --git a/src/mesa/drivers/dri/intel/intel_blit.c b/src/mesa/drivers/dri/intel/intel_blit.c index a74e21720fb..c2917e9b07e 100644 --- a/src/mesa/drivers/dri/intel/intel_blit.c +++ b/src/mesa/drivers/dri/intel/intel_blit.c @@ -483,8 +483,11 @@ intel_emit_linear_blit(struct intel_context *intel, /* Blits are in a different ringbuffer so we don't use them. */ assert(intel->gen < 6); - /* The pitch is a signed value. */ - pitch = MIN2(size, (1 << 15) - 1); + /* The pitch given to the GPU must be DWORD aligned, and + * we want width to match pitch. Max width is (1 << 15 - 1), + * rounding that down to the nearest DWORD is 1 << 15 - 4 + */ + pitch = MIN2(size, (1 << 15) - 4); height = size / pitch; ok = intelEmitCopyBlit(intel, 1, pitch, src_bo, src_offset, I915_TILING_NONE, @@ -499,6 +502,7 @@ intel_emit_linear_blit(struct intel_context *intel, dst_offset += pitch * height; size -= pitch * height; assert (size < (1 << 15)); + assert ((size & 3) == 0); /* Pitch must be DWORD aligned */ if (size != 0) { ok = intelEmitCopyBlit(intel, 1, size, src_bo, src_offset, I915_TILING_NONE, diff --git a/src/mesa/drivers/dri/intel/intel_chipset.h b/src/mesa/drivers/dri/intel/intel_chipset.h index 1e7ceed32a2..4fecdbed203 100644 --- a/src/mesa/drivers/dri/intel/intel_chipset.h +++ b/src/mesa/drivers/dri/intel/intel_chipset.h @@ -67,6 +67,7 @@ #define PCI_CHIP_G45_G 0x2E22 #define PCI_CHIP_G41_G 0x2E32 #define PCI_CHIP_B43_G 0x2E42 +#define PCI_CHIP_B43_G1 0x2E92 #define PCI_CHIP_ILD_G 0x0042 #define PCI_CHIP_ILM_G 0x0046 @@ -93,7 +94,8 @@ devid == PCI_CHIP_Q45_G || \ devid == PCI_CHIP_G45_G || \ devid == PCI_CHIP_G41_G || \ - devid == PCI_CHIP_B43_G) + devid == PCI_CHIP_B43_G || \ + devid == PCI_CHIP_B43_G1) #define IS_GM45(devid) (devid == PCI_CHIP_GM45_GM) #define IS_G4X(devid) (IS_G45(devid) || IS_GM45(devid)) diff --git a/src/mesa/drivers/dri/intel/intel_clear.c b/src/mesa/drivers/dri/intel/intel_clear.c index d7814635b72..fa451f0045e 100644 --- a/src/mesa/drivers/dri/intel/intel_clear.c +++ b/src/mesa/drivers/dri/intel/intel_clear.c @@ -58,6 +58,21 @@ static const char *buffer_names[] = { [BUFFER_COLOR7] = "color7", }; +static void +debug_mask(const char *name, GLbitfield mask) +{ + GLuint i; + + if (unlikely(INTEL_DEBUG & DEBUG_BLIT)) { + DBG("%s clear:", name); + for (i = 0; i < BUFFER_COUNT; i++) { + if (mask & (1 << i)) + DBG(" %s", buffer_names[i]); + } + DBG("\n"); + } +} + /** * Called by ctx->Driver.Clear. */ @@ -70,7 +85,6 @@ intelClear(struct gl_context *ctx, GLbitfield mask) GLbitfield blit_mask = 0; GLbitfield swrast_mask = 0; struct gl_framebuffer *fb = ctx->DrawBuffer; - GLuint i; if (mask & (BUFFER_BIT_FRONT_LEFT | BUFFER_BIT_FRONT_RIGHT)) { intel->front_buffer_dirty = GL_TRUE; @@ -162,39 +176,17 @@ intelClear(struct gl_context *ctx, GLbitfield mask) } if (blit_mask) { - if (INTEL_DEBUG & DEBUG_BLIT) { - DBG("blit clear:"); - for (i = 0; i < BUFFER_COUNT; i++) { - if (blit_mask & (1 << i)) - DBG(" %s", buffer_names[i]); - } - DBG("\n"); - } + debug_mask("blit", blit_mask); intelClearWithBlit(ctx, blit_mask); } if (tri_mask) { - if (INTEL_DEBUG & DEBUG_BLIT) { - DBG("tri clear:"); - for (i = 0; i < BUFFER_COUNT; i++) { - if (tri_mask & (1 << i)) - DBG(" %s", buffer_names[i]); - } - DBG("\n"); - } - + debug_mask("tri", tri_mask); _mesa_meta_Clear(&intel->ctx, tri_mask); } if (swrast_mask) { - if (INTEL_DEBUG & DEBUG_BLIT) { - DBG("swrast clear:"); - for (i = 0; i < BUFFER_COUNT; i++) { - if (swrast_mask & (1 << i)) - DBG(" %s", buffer_names[i]); - } - DBG("\n"); - } + debug_mask("swrast", swrast_mask); _swrast_Clear(ctx, swrast_mask); } } diff --git a/src/mesa/drivers/dri/intel/intel_context.c b/src/mesa/drivers/dri/intel/intel_context.c index 7ace50bde97..152cdcaf37d 100644 --- a/src/mesa/drivers/dri/intel/intel_context.c +++ b/src/mesa/drivers/dri/intel/intel_context.c @@ -155,6 +155,7 @@ intelGetString(struct gl_context * ctx, GLenum name) chipset = "Intel(R) G41"; break; case PCI_CHIP_B43_G: + case PCI_CHIP_B43_G1: chipset = "Intel(R) B43"; break; case PCI_CHIP_ILD_G: @@ -249,7 +250,7 @@ intel_update_renderbuffers(__DRIcontext *context, __DRIdrawable *drawable) * thus ignore the invalidate. */ drawable->lastStamp = drawable->dri2.stamp; - if (INTEL_DEBUG & DEBUG_DRI) + if (unlikely(INTEL_DEBUG & DEBUG_DRI)) fprintf(stderr, "enter %s, drawable %p\n", __func__, drawable); screen = intel->intelScreen->driScrnPriv; @@ -378,14 +379,14 @@ intel_update_renderbuffers(__DRIcontext *context, __DRIdrawable *drawable) if (rb->region && rb->region->name == buffers[i].name) continue; - if (INTEL_DEBUG & DEBUG_DRI) + if (unlikely(INTEL_DEBUG & DEBUG_DRI)) fprintf(stderr, "attaching buffer %d, at %d, cpp %d, pitch %d\n", buffers[i].name, buffers[i].attachment, buffers[i].cpp, buffers[i].pitch); if (buffers[i].attachment == __DRI_BUFFER_STENCIL && depth_region) { - if (INTEL_DEBUG & DEBUG_DRI) + if (unlikely(INTEL_DEBUG & DEBUG_DRI)) fprintf(stderr, "(reusing depth buffer as stencil)\n"); intel_region_reference(®ion, depth_region); } diff --git a/src/mesa/drivers/dri/intel/intel_context.h b/src/mesa/drivers/dri/intel/intel_context.h index 46d10d74ba3..9d5139c0000 100644 --- a/src/mesa/drivers/dri/intel/intel_context.h +++ b/src/mesa/drivers/dri/intel/intel_context.h @@ -98,6 +98,16 @@ extern void intelFallback(struct intel_context *intel, GLbitfield bit, #define INTEL_MAX_FIXUP 64 +#ifndef likely +#ifdef __GNUC__ +#define likely(expr) (__builtin_expect(expr, 1)) +#define unlikely(expr) (__builtin_expect(expr, 0)) +#else +#define likely(expr) (expr) +#define unlikely(expr) (expr) +#endif +#endif + struct intel_sync_object { struct gl_sync_object Base; @@ -180,9 +190,6 @@ struct intel_context } prim; GLuint stats_wm; - GLboolean locked; - char *prevLockFile; - int prevLockLine; /* Offsets of fields within the current vertex: */ @@ -359,10 +366,15 @@ extern int INTEL_DEBUG; #define DEBUG_CLIP 0x8000000 #define DBG(...) do { \ - if (INTEL_DEBUG & FILE_DEBUG_FLAG) \ + if (unlikely(INTEL_DEBUG & FILE_DEBUG_FLAG)) \ printf(__VA_ARGS__); \ } while(0) +#define fallback_debug(...) do { \ + if (unlikely(INTEL_DEBUG & DEBUG_FALLBACKS)) \ + printf(__VA_ARGS__); \ +} while(0) + #define PCI_CHIP_845_G 0x2562 #define PCI_CHIP_I830_M 0x3577 #define PCI_CHIP_I855_GM 0x3582 diff --git a/src/mesa/drivers/dri/intel/intel_pixel.c b/src/mesa/drivers/dri/intel/intel_pixel.c index 60583ef4c0d..d5c35775ce4 100644 --- a/src/mesa/drivers/dri/intel/intel_pixel.c +++ b/src/mesa/drivers/dri/intel/intel_pixel.c @@ -147,10 +147,9 @@ intel_check_blit_format(struct intel_region * region, return GL_TRUE; } - if (INTEL_DEBUG & DEBUG_PIXEL) - fprintf(stderr, "%s: bad format for blit (cpp %d, type %s format %s)\n", - __FUNCTION__, region->cpp, - _mesa_lookup_enum_by_nr(type), _mesa_lookup_enum_by_nr(format)); + DBG("%s: bad format for blit (cpp %d, type %s format %s)\n", + __FUNCTION__, region->cpp, + _mesa_lookup_enum_by_nr(type), _mesa_lookup_enum_by_nr(format)); return GL_FALSE; } diff --git a/src/mesa/drivers/dri/intel/intel_pixel_bitmap.c b/src/mesa/drivers/dri/intel/intel_pixel_bitmap.c index 63fb4b37b18..e7356a6da0d 100644 --- a/src/mesa/drivers/dri/intel/intel_pixel_bitmap.c +++ b/src/mesa/drivers/dri/intel/intel_pixel_bitmap.c @@ -113,9 +113,8 @@ static GLuint get_bitmap_rect(GLsizei width, GLsizei height, GLint incr; GLuint count = 0; - if (INTEL_DEBUG & DEBUG_PIXEL) - printf("%s %d,%d %dx%d bitmap %dx%d skip %d src_offset %d mask %d\n", - __FUNCTION__, x,y,w,h,width,height,unpack->SkipPixels, src_offset, mask); + DBG("%s %d,%d %dx%d bitmap %dx%d skip %d src_offset %d mask %d\n", + __FUNCTION__, x,y,w,h,width,height,unpack->SkipPixels, src_offset, mask); if (invert) { first = h-1; @@ -285,7 +284,7 @@ do_blit_bitmap( struct gl_context *ctx, } out: - if (INTEL_DEBUG & DEBUG_SYNC) + if (unlikely(INTEL_DEBUG & DEBUG_SYNC)) intel_batchbuffer_flush(intel->batch); if (_mesa_is_bufferobj(unpack->BufferObj)) { @@ -299,6 +298,7 @@ out: return GL_TRUE; } + /* There are a large number of possible ways to implement bitmap on * this hardware, most of them have some sort of drawback. Here are a * few that spring to mind: diff --git a/src/mesa/drivers/dri/intel/intel_pixel_copy.c b/src/mesa/drivers/dri/intel/intel_pixel_copy.c index c6b36ed4291..a7ca780e944 100644 --- a/src/mesa/drivers/dri/intel/intel_pixel_copy.c +++ b/src/mesa/drivers/dri/intel/intel_pixel_copy.c @@ -119,8 +119,7 @@ do_blit_copypixels(struct gl_context * ctx, GLboolean flip = GL_FALSE; if (type == GL_DEPTH || type == GL_STENCIL) { - if (INTEL_DEBUG & DEBUG_FALLBACKS) - fprintf(stderr, "glCopyPixels() fallback: GL_DEPTH || GL_STENCIL\n"); + fallback_debug("glCopyPixels() fallback: GL_DEPTH || GL_STENCIL\n"); return GL_FALSE; } @@ -203,8 +202,7 @@ intelCopyPixels(struct gl_context * ctx, GLsizei width, GLsizei height, GLint destx, GLint desty, GLenum type) { - if (INTEL_DEBUG & DEBUG_PIXEL) - fprintf(stderr, "%s\n", __FUNCTION__); + DBG("%s\n", __FUNCTION__); if (do_blit_copypixels(ctx, srcx, srcy, width, height, destx, desty, type)) return; diff --git a/src/mesa/drivers/dri/intel/intel_pixel_read.c b/src/mesa/drivers/dri/intel/intel_pixel_read.c index b249f9a5a0b..54da29236d2 100644 --- a/src/mesa/drivers/dri/intel/intel_pixel_read.c +++ b/src/mesa/drivers/dri/intel/intel_pixel_read.c @@ -42,6 +42,8 @@ #include "intel_pixel.h" #include "intel_buffer_objects.h" +#define FILE_DEBUG_FLAG DEBUG_PIXEL + /* For many applications, the new ability to pull the source buffers * back out of the GTT and then do the packing/conversion operations * in software will be as much of an improvement as trying to get the @@ -79,8 +81,7 @@ do_blit_readpixels(struct gl_context * ctx, GLboolean all; GLint dst_x, dst_y; - if (INTEL_DEBUG & DEBUG_PIXEL) - printf("%s\n", __FUNCTION__); + DBG("%s\n", __FUNCTION__); if (!src) return GL_FALSE; @@ -88,22 +89,19 @@ do_blit_readpixels(struct gl_context * ctx, if (!_mesa_is_bufferobj(pack->BufferObj)) { /* PBO only for now: */ - if (INTEL_DEBUG & DEBUG_PIXEL) - printf("%s - not PBO\n", __FUNCTION__); + DBG("%s - not PBO\n", __FUNCTION__); return GL_FALSE; } if (ctx->_ImageTransferState || !intel_check_blit_format(src, format, type)) { - if (INTEL_DEBUG & DEBUG_PIXEL) - printf("%s - bad format for blit\n", __FUNCTION__); + DBG("%s - bad format for blit\n", __FUNCTION__); return GL_FALSE; } if (pack->Alignment != 1 || pack->SwapBytes || pack->LsbFirst) { - if (INTEL_DEBUG & DEBUG_PIXEL) - printf("%s: bad packing params\n", __FUNCTION__); + DBG("%s: bad packing params\n", __FUNCTION__); return GL_FALSE; } @@ -113,8 +111,7 @@ do_blit_readpixels(struct gl_context * ctx, rowLength = width; if (pack->Invert) { - if (INTEL_DEBUG & DEBUG_PIXEL) - printf("%s: MESA_PACK_INVERT not done yet\n", __FUNCTION__); + DBG("%s: MESA_PACK_INVERT not done yet\n", __FUNCTION__); return GL_FALSE; } else { @@ -158,8 +155,7 @@ do_blit_readpixels(struct gl_context * ctx, return GL_FALSE; } - if (INTEL_DEBUG & DEBUG_PIXEL) - printf("%s - DONE\n", __FUNCTION__); + DBG("%s - DONE\n", __FUNCTION__); return GL_TRUE; } @@ -173,8 +169,7 @@ intelReadPixels(struct gl_context * ctx, struct intel_context *intel = intel_context(ctx); GLboolean dirty; - if (INTEL_DEBUG & DEBUG_PIXEL) - fprintf(stderr, "%s\n", __FUNCTION__); + DBG("%s\n", __FUNCTION__); intel_flush(ctx); @@ -188,8 +183,7 @@ intelReadPixels(struct gl_context * ctx, (ctx, x, y, width, height, format, type, pack, pixels)) return; - if (INTEL_DEBUG & DEBUG_PIXEL) - printf("%s: fallback to swrast\n", __FUNCTION__); + fallback_debug("%s: fallback to swrast\n", __FUNCTION__); /* Update Mesa state before calling down into _swrast_ReadPixels, as * the spans code requires the computed buffer states to be up to date, diff --git a/src/mesa/drivers/dri/intel/intel_tex.c b/src/mesa/drivers/dri/intel/intel_tex.c index 3d9a2549db0..2c21ea0576e 100644 --- a/src/mesa/drivers/dri/intel/intel_tex.c +++ b/src/mesa/drivers/dri/intel/intel_tex.c @@ -61,88 +61,6 @@ intelFreeTextureImageData(struct gl_context * ctx, struct gl_texture_image *texI } } - -/* The system memcpy (at least on ubuntu 5.10) has problems copying - * to agp (writecombined) memory from a source which isn't 64-byte - * aligned - there is a 4x performance falloff. - * - * The x86 __memcpy is immune to this but is slightly slower - * (10%-ish) than the system memcpy. - * - * The sse_memcpy seems to have a slight cliff at 64/32 bytes, but - * isn't much faster than x86_memcpy for agp copies. - * - * TODO: switch dynamically. - */ -static void * -do_memcpy(void *dest, const void *src, size_t n) -{ - if ((((unsigned long) src) & 63) || (((unsigned long) dest) & 63)) { - return __memcpy(dest, src, n); - } - else - return memcpy(dest, src, n); -} - - -#if DO_DEBUG && !defined(__ia64__) - -#ifndef __x86_64__ -static unsigned -fastrdtsc(void) -{ - unsigned eax; - __asm__ volatile ("\t" - "pushl %%ebx\n\t" - "cpuid\n\t" ".byte 0x0f, 0x31\n\t" - "popl %%ebx\n":"=a" (eax) - :"0"(0) - :"ecx", "edx", "cc"); - - return eax; -} -#else -static unsigned -fastrdtsc(void) -{ - unsigned eax; - __asm__ volatile ("\t" "cpuid\n\t" ".byte 0x0f, 0x31\n\t":"=a" (eax) - :"0"(0) - :"ecx", "edx", "ebx", "cc"); - - return eax; -} -#endif - -static unsigned -time_diff(unsigned t, unsigned t2) -{ - return ((t < t2) ? t2 - t : 0xFFFFFFFFU - (t - t2 - 1)); -} - - -static void * -timed_memcpy(void *dest, const void *src, size_t n) -{ - void *ret; - unsigned t1, t2; - double rate; - - if ((((unsigned) src) & 63) || (((unsigned) dest) & 63)) - printf("Warning - non-aligned texture copy!\n"); - - t1 = fastrdtsc(); - ret = do_memcpy(dest, src, n); - t2 = fastrdtsc(); - - rate = time_diff(t1, t2); - rate /= (double) n; - printf("timed_memcpy: %u %u --> %f clocks/byte\n", t1, t2, rate); - return ret; -} -#endif /* DO_DEBUG */ - - /** * Called via ctx->Driver.GenerateMipmap() * This is basically a wrapper for _mesa_meta_GenerateMipmap() which checks @@ -158,8 +76,7 @@ intelGenerateMipmap(struct gl_context *ctx, GLenum target, struct intel_context *intel = intel_context(ctx); struct intel_texture_object *intelObj = intel_texture_object(texObj); - if (INTEL_DEBUG & DEBUG_FALLBACKS) - fprintf(stderr, "%s - fallback to swrast\n", __FUNCTION__); + fallback_debug("%s - fallback to swrast\n", __FUNCTION__); intel_tex_map_level_images(intel, intelObj, texObj->BaseLevel); _mesa_generate_mipmap(ctx, target, texObj); @@ -203,11 +120,4 @@ intelInitTextureFuncs(struct dd_function_table *functions) functions->NewTextureImage = intelNewTextureImage; functions->DeleteTexture = intelDeleteTextureObject; functions->FreeTexImageData = intelFreeTextureImageData; - -#if DO_DEBUG && !defined(__ia64__) - if (INTEL_DEBUG & DEBUG_BUFMGR) - functions->TextureMemCpy = timed_memcpy; - else -#endif - functions->TextureMemCpy = do_memcpy; } diff --git a/src/mesa/drivers/dri/intel/intel_tex_copy.c b/src/mesa/drivers/dri/intel/intel_tex_copy.c index 2d046fd52d9..284ba19e8a3 100644 --- a/src/mesa/drivers/dri/intel/intel_tex_copy.c +++ b/src/mesa/drivers/dri/intel/intel_tex_copy.c @@ -105,16 +105,15 @@ do_copy_texsubimage(struct intel_context *intel, const struct intel_region *src = get_teximage_source(intel, internalFormat); if (!intelImage->mt || !src || !src->buffer) { - if (INTEL_DEBUG & DEBUG_FALLBACKS) + if (unlikely(INTEL_DEBUG & DEBUG_FALLBACKS)) fprintf(stderr, "%s fail %p %p (0x%08x)\n", __FUNCTION__, intelImage->mt, src, internalFormat); return GL_FALSE; } if (intelImage->mt->cpp != src->cpp) { - if (INTEL_DEBUG & DEBUG_FALLBACKS) - fprintf(stderr, "%s fail %d vs %d cpp\n", - __FUNCTION__, intelImage->mt->cpp, src->cpp); + fallback_debug("%s fail %d vs %d cpp\n", + __FUNCTION__, intelImage->mt->cpp, src->cpp); return GL_FALSE; } @@ -212,8 +211,7 @@ intelCopyTexImage1D(struct gl_context * ctx, GLenum target, GLint level, return; fail: - if (INTEL_DEBUG & DEBUG_FALLBACKS) - fprintf(stderr, "%s - fallback to swrast\n", __FUNCTION__); + fallback_debug("%s - fallback to swrast\n", __FUNCTION__); _mesa_meta_CopyTexImage1D(ctx, target, level, internalFormat, x, y, width, border); } @@ -261,8 +259,7 @@ intelCopyTexImage2D(struct gl_context * ctx, GLenum target, GLint level, return; fail: - if (INTEL_DEBUG & DEBUG_FALLBACKS) - fprintf(stderr, "%s - fallback to swrast\n", __FUNCTION__); + fallback_debug("%s - fallback to swrast\n", __FUNCTION__); _mesa_meta_CopyTexImage2D(ctx, target, level, internalFormat, x, y, width, height, border); } @@ -287,8 +284,7 @@ intelCopyTexSubImage1D(struct gl_context * ctx, GLenum target, GLint level, if (!do_copy_texsubimage(intel_context(ctx), target, intel_texture_image(texImage), internalFormat, xoffset, 0, x, y, width, 1)) { - if (INTEL_DEBUG & DEBUG_FALLBACKS) - fprintf(stderr, "%s - fallback to swrast\n", __FUNCTION__); + fallback_debug("%s - fallback to swrast\n", __FUNCTION__); _mesa_meta_CopyTexSubImage1D(ctx, target, level, xoffset, x, y, width); } } @@ -314,8 +310,7 @@ intelCopyTexSubImage2D(struct gl_context * ctx, GLenum target, GLint level, internalFormat, xoffset, yoffset, x, y, width, height)) { - if (INTEL_DEBUG & DEBUG_FALLBACKS) - fprintf(stderr, "%s - fallback to swrast\n", __FUNCTION__); + fallback_debug("%s - fallback to swrast\n", __FUNCTION__); _mesa_meta_CopyTexSubImage2D(ctx, target, level, xoffset, yoffset, x, y, width, height); } diff --git a/src/mesa/drivers/dri/intel/intel_tex_image.c b/src/mesa/drivers/dri/intel/intel_tex_image.c index 35f3d7d3829..50fe9bd9f33 100644 --- a/src/mesa/drivers/dri/intel/intel_tex_image.c +++ b/src/mesa/drivers/dri/intel/intel_tex_image.c @@ -66,7 +66,6 @@ guess_and_alloc_mipmap_tree(struct intel_context *intel, GLuint width = intelImage->base.Width; GLuint height = intelImage->base.Height; GLuint depth = intelImage->base.Depth; - GLuint l2width, l2height, l2depth; GLuint i, comp_byte = 0; GLuint texelBytes; @@ -114,10 +113,7 @@ guess_and_alloc_mipmap_tree(struct intel_context *intel, lastLevel = firstLevel; } else { - l2width = logbase2(width); - l2height = logbase2(height); - l2depth = logbase2(depth); - lastLevel = firstLevel + MAX2(MAX2(l2width, l2height), l2depth); + lastLevel = firstLevel + logbase2(MAX2(MAX2(width, height), depth)); } assert(!intelObj->mt); @@ -347,21 +343,6 @@ intelTexImage(struct gl_context * ctx, texImage->Data = NULL; } - /* If this is the only texture image in the tree, could call - * bmBufferData with NULL data to free the old block and avoid - * waiting on any outstanding fences. - */ - if (intelObj->mt && - intelObj->mt->first_level == level && - intelObj->mt->last_level == level && - intelObj->mt->target != GL_TEXTURE_CUBE_MAP_ARB && - !intel_miptree_match_image(intelObj->mt, &intelImage->base)) { - - DBG("release it\n"); - intel_miptree_release(intel, &intelObj->mt); - assert(!intelObj->mt); - } - if (!intelObj->mt) { guess_and_alloc_mipmap_tree(intel, intelObj, intelImage, pixels == NULL); if (!intelObj->mt) { diff --git a/src/mesa/drivers/dri/nouveau/Makefile b/src/mesa/drivers/dri/nouveau/Makefile index 7be19b26fda..3b506a91ffa 100644 --- a/src/mesa/drivers/dri/nouveau/Makefile +++ b/src/mesa/drivers/dri/nouveau/Makefile @@ -19,6 +19,8 @@ DRIVER_SOURCES = \ nouveau_bo_state.c \ nouveau_texture.c \ nouveau_surface.c \ + nouveau_scratch.c \ + nouveau_array.c \ nv04_context.c \ nv04_render.c \ nv04_state_fb.c \ diff --git a/src/mesa/drivers/dri/nouveau/nouveau_array.c b/src/mesa/drivers/dri/nouveau/nouveau_array.c new file mode 100644 index 00000000000..17e6d163a02 --- /dev/null +++ b/src/mesa/drivers/dri/nouveau/nouveau_array.c @@ -0,0 +1,136 @@ +/* + * Copyright (C) 2009-2010 Francisco Jerez. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "main/bufferobj.h" +#include "nouveau_driver.h" +#include "nouveau_array.h" +#include "nouveau_bufferobj.h" + +static void +get_array_extract(struct nouveau_array *a, extract_u_t *extract_u, + extract_f_t *extract_f) +{ +#define EXTRACT(in_t, out_t, k) \ + ({ \ + auto out_t f(struct nouveau_array *, int, int); \ + out_t f(struct nouveau_array *a, int i, int j) { \ + in_t x = ((in_t *)(a->buf + i * a->stride))[j]; \ + \ + return (out_t)x / (k); \ + }; \ + f; \ + }); + + switch (a->type) { + case GL_BYTE: + *extract_u = EXTRACT(char, unsigned, 1); + *extract_f = EXTRACT(char, float, SCHAR_MAX); + break; + case GL_UNSIGNED_BYTE: + *extract_u = EXTRACT(unsigned char, unsigned, 1); + *extract_f = EXTRACT(unsigned char, float, UCHAR_MAX); + break; + case GL_SHORT: + *extract_u = EXTRACT(short, unsigned, 1); + *extract_f = EXTRACT(short, float, SHRT_MAX); + break; + case GL_UNSIGNED_SHORT: + *extract_u = EXTRACT(unsigned short, unsigned, 1); + *extract_f = EXTRACT(unsigned short, float, USHRT_MAX); + break; + case GL_INT: + *extract_u = EXTRACT(int, unsigned, 1); + *extract_f = EXTRACT(int, float, INT_MAX); + break; + case GL_UNSIGNED_INT: + *extract_u = EXTRACT(unsigned int, unsigned, 1); + *extract_f = EXTRACT(unsigned int, float, UINT_MAX); + break; + case GL_FLOAT: + *extract_u = EXTRACT(float, unsigned, 1.0 / UINT_MAX); + *extract_f = EXTRACT(float, float, 1); + break; + default: + assert(0); + } +} + +void +nouveau_init_array(struct nouveau_array *a, int attr, int stride, + int fields, int type, struct gl_buffer_object *obj, + const void *ptr, GLboolean map) +{ + a->attr = attr; + a->stride = stride; + a->fields = fields; + a->type = type; + a->buf = NULL; + + if (obj) { + if (nouveau_bufferobj_hw(obj)) { + struct nouveau_bufferobj *nbo = + to_nouveau_bufferobj(obj); + + nouveau_bo_ref(nbo->bo, &a->bo); + a->offset = (intptr_t)ptr; + + if (map) { + nouveau_bo_map(a->bo, NOUVEAU_BO_RD); + a->buf = a->bo->map + a->offset; + } + + } else { + nouveau_bo_ref(NULL, &a->bo); + a->offset = 0; + + if (map) + a->buf = ADD_POINTERS( + nouveau_bufferobj_sys(obj), ptr); + } + } + + if (a->buf) + get_array_extract(a, &a->extract_u, &a->extract_f); +} + +void +nouveau_deinit_array(struct nouveau_array *a) +{ + if (a->bo) { + if (a->bo->map) + nouveau_bo_unmap(a->bo); + } + + a->buf = NULL; + a->fields = 0; +} + +void +nouveau_cleanup_array(struct nouveau_array *a) +{ + nouveau_deinit_array(a); + nouveau_bo_ref(NULL, &a->bo); +} diff --git a/src/mesa/drivers/dri/nouveau/nouveau_array.h b/src/mesa/drivers/dri/nouveau/nouveau_array.h new file mode 100644 index 00000000000..ad3d69b33d9 --- /dev/null +++ b/src/mesa/drivers/dri/nouveau/nouveau_array.h @@ -0,0 +1,58 @@ +/* + * Copyright (C) 2009-2010 Francisco Jerez. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __NOUVEAU_ARRAY_H__ +#define __NOUVEAU_ARRAY_H__ + +struct nouveau_array; + +typedef unsigned (*extract_u_t)(struct nouveau_array *, int, int); +typedef float (*extract_f_t)(struct nouveau_array *, int, int); + +struct nouveau_array { + int attr; + int stride, fields, type; + + struct nouveau_bo *bo; + unsigned offset; + const void *buf; + + extract_u_t extract_u; + extract_f_t extract_f; +}; + +void +nouveau_init_array(struct nouveau_array *a, int attr, int stride, + int fields, int type, struct gl_buffer_object *obj, + const void *ptr, GLboolean map); + +void +nouveau_deinit_array(struct nouveau_array *a); + +void +nouveau_cleanup_array(struct nouveau_array *a); + +#endif diff --git a/src/mesa/drivers/dri/nouveau/nouveau_bo_state.c b/src/mesa/drivers/dri/nouveau/nouveau_bo_state.c index f31772fe1d1..7eef8c1ee81 100644 --- a/src/mesa/drivers/dri/nouveau/nouveau_bo_state.c +++ b/src/mesa/drivers/dri/nouveau/nouveau_bo_state.c @@ -126,13 +126,13 @@ void nouveau_bo_context_reset(struct nouveau_bo_context *bctx) { struct nouveau_bo_state *s = &to_nouveau_context(bctx->ctx)->bo; - int i; - - for (i = 0; i < bctx->count; i++) - nouveau_bo_ref(NULL, &bctx->marker[i].bo); + int i, n = bctx->count; - s->count -= bctx->count; + s->count -= n; bctx->count = 0; + + for (i = 0; i < n; i++) + nouveau_bo_ref(NULL, &bctx->marker[i].bo); } GLboolean diff --git a/src/mesa/drivers/dri/nouveau/nouveau_bo_state.h b/src/mesa/drivers/dri/nouveau/nouveau_bo_state.h index 6119a8336e3..388a16a56ea 100644 --- a/src/mesa/drivers/dri/nouveau/nouveau_bo_state.h +++ b/src/mesa/drivers/dri/nouveau/nouveau_bo_state.h @@ -29,7 +29,7 @@ enum { NOUVEAU_BO_CONTEXT_FRAMEBUFFER = 0, - NOUVEAU_BO_CONTEXT_LMA_DEPTH, + NOUVEAU_BO_CONTEXT_HIERZ, NOUVEAU_BO_CONTEXT_SURFACE, NOUVEAU_BO_CONTEXT_TEXTURE0, NOUVEAU_BO_CONTEXT_TEXTURE1, diff --git a/src/mesa/drivers/dri/nouveau/nouveau_bufferobj.c b/src/mesa/drivers/dri/nouveau/nouveau_bufferobj.c index ad6e5bd805a..e60b91f64be 100644 --- a/src/mesa/drivers/dri/nouveau/nouveau_bufferobj.c +++ b/src/mesa/drivers/dri/nouveau/nouveau_bufferobj.c @@ -30,6 +30,23 @@ #include "main/bufferobj.h" +static inline char * +get_bufferobj_map(struct gl_buffer_object *obj, unsigned flags) +{ + struct nouveau_bufferobj *nbo = to_nouveau_bufferobj(obj); + void *map = NULL; + + if (nbo->sys) { + map = nbo->sys; + } else if (nbo->bo) { + nouveau_bo_map(nbo->bo, flags); + map = nbo->bo->map; + nouveau_bo_unmap(nbo->bo); + } + + return map; +} + static struct gl_buffer_object * nouveau_bufferobj_new(struct gl_context *ctx, GLuint buffer, GLenum target) { @@ -50,6 +67,7 @@ nouveau_bufferobj_del(struct gl_context *ctx, struct gl_buffer_object *obj) struct nouveau_bufferobj *nbo = to_nouveau_bufferobj(obj); nouveau_bo_ref(NULL, &nbo->bo); + FREE(nbo->sys); FREE(nbo); } @@ -64,18 +82,27 @@ nouveau_bufferobj_data(struct gl_context *ctx, GLenum target, GLsizeiptrARB size obj->Size = size; obj->Usage = usage; + /* Free previous storage */ nouveau_bo_ref(NULL, &nbo->bo); - ret = nouveau_bo_new(context_dev(ctx), - NOUVEAU_BO_GART | NOUVEAU_BO_MAP, 0, - size, &nbo->bo); - assert(!ret); - - if (data) { - nouveau_bo_map(nbo->bo, NOUVEAU_BO_WR); - memcpy(nbo->bo->map, data, size); - nouveau_bo_unmap(nbo->bo); + FREE(nbo->sys); + + if (target == GL_ELEMENT_ARRAY_BUFFER_ARB || + (size < 512 && usage == GL_DYNAMIC_DRAW_ARB) || + context_chipset(ctx) < 0x10) { + /* Heuristic: keep it in system ram */ + nbo->sys = MALLOC(size); + + } else { + /* Get a hardware BO */ + ret = nouveau_bo_new(context_dev(ctx), + NOUVEAU_BO_GART | NOUVEAU_BO_MAP, 0, + size, &nbo->bo); + assert(!ret); } + if (data) + memcpy(get_bufferobj_map(obj, NOUVEAU_BO_WR), data, size); + return GL_TRUE; } @@ -84,11 +111,7 @@ nouveau_bufferobj_subdata(struct gl_context *ctx, GLenum target, GLintptrARB off GLsizeiptrARB size, const GLvoid *data, struct gl_buffer_object *obj) { - struct nouveau_bufferobj *nbo = to_nouveau_bufferobj(obj); - - nouveau_bo_map(nbo->bo, NOUVEAU_BO_WR); - memcpy(nbo->bo->map + offset, data, size); - nouveau_bo_unmap(nbo->bo); + memcpy(get_bufferobj_map(obj, NOUVEAU_BO_WR) + offset, data, size); } static void @@ -96,44 +119,48 @@ nouveau_bufferobj_get_subdata(struct gl_context *ctx, GLenum target, GLintptrARB GLsizeiptrARB size, GLvoid *data, struct gl_buffer_object *obj) { - struct nouveau_bufferobj *nbo = to_nouveau_bufferobj(obj); - - nouveau_bo_map(nbo->bo, NOUVEAU_BO_RD); - memcpy(data, nbo->bo->map + offset, size); - nouveau_bo_unmap(nbo->bo); + memcpy(data, get_bufferobj_map(obj, NOUVEAU_BO_RD) + offset, size); } static void * nouveau_bufferobj_map(struct gl_context *ctx, GLenum target, GLenum access, struct gl_buffer_object *obj) { - return ctx->Driver.MapBufferRange(ctx, target, 0, obj->Size, access, + unsigned flags = 0; + + if (access == GL_READ_ONLY_ARB || + access == GL_READ_WRITE_ARB) + flags |= GL_MAP_READ_BIT; + if (access == GL_WRITE_ONLY_ARB || + access == GL_READ_WRITE_ARB) + flags |= GL_MAP_WRITE_BIT; + + return ctx->Driver.MapBufferRange(ctx, target, 0, obj->Size, flags, obj); } static void * nouveau_bufferobj_map_range(struct gl_context *ctx, GLenum target, GLintptr offset, - GLsizeiptr length, GLenum access, + GLsizeiptr length, GLbitfield access, struct gl_buffer_object *obj) { - struct nouveau_bufferobj *nbo = to_nouveau_bufferobj(obj); - uint32_t flags = 0; + unsigned flags = 0; + char *map; assert(!obj->Pointer); - if (!nbo->bo) - return NULL; - - if (access == GL_READ_ONLY_ARB || - access == GL_READ_WRITE_ARB) + if (access & GL_MAP_READ_BIT) flags |= NOUVEAU_BO_RD; - if (access == GL_WRITE_ONLY_ARB || - access == GL_READ_WRITE_ARB) + if (access & GL_MAP_WRITE_BIT) flags |= NOUVEAU_BO_WR; + if (access & GL_MAP_UNSYNCHRONIZED_BIT) + flags |= NOUVEAU_BO_NOSYNC; - nouveau_bo_map_range(nbo->bo, offset, length, flags); + map = get_bufferobj_map(obj, flags); + if (!map) + return NULL; - obj->Pointer = nbo->bo->map; + obj->Pointer = map + offset; obj->Offset = offset; obj->Length = length; obj->AccessFlags = access; @@ -144,12 +171,8 @@ nouveau_bufferobj_map_range(struct gl_context *ctx, GLenum target, GLintptr offs static GLboolean nouveau_bufferobj_unmap(struct gl_context *ctx, GLenum target, struct gl_buffer_object *obj) { - struct nouveau_bufferobj *nbo = to_nouveau_bufferobj(obj); - assert(obj->Pointer); - nouveau_bo_unmap(nbo->bo); - obj->Pointer = NULL; obj->Offset = 0; obj->Length = 0; diff --git a/src/mesa/drivers/dri/nouveau/nouveau_bufferobj.h b/src/mesa/drivers/dri/nouveau/nouveau_bufferobj.h index acfc4cb9a90..01ef0bad0fd 100644 --- a/src/mesa/drivers/dri/nouveau/nouveau_bufferobj.h +++ b/src/mesa/drivers/dri/nouveau/nouveau_bufferobj.h @@ -30,9 +30,16 @@ struct nouveau_bufferobj { struct gl_buffer_object base; struct nouveau_bo *bo; + void *sys; }; #define to_nouveau_bufferobj(x) ((struct nouveau_bufferobj *)(x)) +#define nouveau_bufferobj_hw(x) \ + (_mesa_is_bufferobj(x) ? to_nouveau_bufferobj(x)->bo : NULL) + +#define nouveau_bufferobj_sys(x) \ + (_mesa_is_bufferobj(x) ? to_nouveau_bufferobj(x)->sys : NULL) + void nouveau_bufferobj_functions_init(struct dd_function_table *functions); diff --git a/src/mesa/drivers/dri/nouveau/nouveau_class.h b/src/mesa/drivers/dri/nouveau/nouveau_class.h index d41d431f796..687b847797b 100644 --- a/src/mesa/drivers/dri/nouveau/nouveau_class.h +++ b/src/mesa/drivers/dri/nouveau/nouveau_class.h @@ -4954,6 +4954,8 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #define NV25TCL_DMA_IN_MEMORY5 0x000001a0 #define NV25TCL_DMA_IN_MEMORY8 0x000001ac #define NV25TCL_DMA_IN_MEMORY9 0x000001b0 +#define NV25TCL_HIERZ_PITCH 0x0000022c +#define NV25TCL_HIERZ_OFFSET 0x00000230 #endif /* NOUVEAU_REG_H */ diff --git a/src/mesa/drivers/dri/nouveau/nouveau_context.c b/src/mesa/drivers/dri/nouveau/nouveau_context.c index 0ace139b886..f80aaedb257 100644 --- a/src/mesa/drivers/dri/nouveau/nouveau_context.c +++ b/src/mesa/drivers/dri/nouveau/nouveau_context.c @@ -119,6 +119,7 @@ nouveau_context_init(struct gl_context *ctx, struct nouveau_screen *screen, nouveau_state_init(ctx); nouveau_bo_state_init(ctx); + nouveau_scratch_init(ctx); _mesa_meta_init(ctx); _swrast_CreateContext(ctx); _vbo_CreateContext(ctx); @@ -163,6 +164,7 @@ nouveau_context_deinit(struct gl_context *ctx) if (nctx->hw.chan) nouveau_channel_free(&nctx->hw.chan); + nouveau_scratch_destroy(ctx); nouveau_bo_state_destroy(ctx); _mesa_free_context_data(ctx); } @@ -325,10 +327,12 @@ nouveau_fallback(struct gl_context *ctx, enum nouveau_fallback mode) nctx->fallback = MAX2(HWTNL, mode); - if (mode < SWRAST) + if (mode < SWRAST) { nouveau_state_emit(ctx); - else + nouveau_bo_state_emit(ctx); + } else { FIRE_RING(context_chan(ctx)); + } } static void @@ -365,5 +369,6 @@ nouveau_validate_framebuffer(struct gl_context *ctx) validate_framebuffer(dri_ctx, dri_read, &dri_ctx->dri2.read_stamp); - nouveau_state_emit(ctx); + if (ctx->NewState & _NEW_BUFFERS) + _mesa_update_state(ctx); } diff --git a/src/mesa/drivers/dri/nouveau/nouveau_context.h b/src/mesa/drivers/dri/nouveau/nouveau_context.h index 23a87256728..7ebc676379e 100644 --- a/src/mesa/drivers/dri/nouveau/nouveau_context.h +++ b/src/mesa/drivers/dri/nouveau/nouveau_context.h @@ -30,6 +30,7 @@ #include "nouveau_screen.h" #include "nouveau_state.h" #include "nouveau_bo_state.h" +#include "nouveau_scratch.h" #include "nouveau_render.h" #include "main/bitset.h" @@ -67,6 +68,7 @@ struct nouveau_context { struct nouveau_hw_state hw; struct nouveau_bo_state bo; struct nouveau_render_state render; + struct nouveau_scratch_state scratch; struct { GLboolean clear_blocked; diff --git a/src/mesa/drivers/dri/nouveau/nouveau_render.h b/src/mesa/drivers/dri/nouveau/nouveau_render.h index 81c6119fcc6..0539c377585 100644 --- a/src/mesa/drivers/dri/nouveau/nouveau_render.h +++ b/src/mesa/drivers/dri/nouveau/nouveau_render.h @@ -28,46 +28,22 @@ #define __NOUVEAU_RENDER_H__ #include "vbo/vbo_context.h" - -struct nouveau_array_state; +#include "nouveau_array.h" typedef void (*dispatch_t)(struct gl_context *, unsigned int, int, unsigned int); -typedef unsigned (*extract_u_t)(struct nouveau_array_state *, int, int); -typedef float (*extract_f_t)(struct nouveau_array_state *, int, int); +typedef void (*emit_t)(struct gl_context *, struct nouveau_array *, const void *); struct nouveau_attr_info { int vbo_index; int imm_method; int imm_fields; - void (*emit)(struct gl_context *, struct nouveau_array_state *, const void *); -}; - -struct nouveau_array_state { - int attr; - int stride, fields, type; - - struct nouveau_bo *bo; - unsigned offset; - const void *buf; - - extract_u_t extract_u; - extract_f_t extract_f; -}; - -#define RENDER_SCRATCH_COUNT 2 -#define RENDER_SCRATCH_SIZE 2*1024*1024 - -struct nouveau_scratch_state { - struct nouveau_bo *bo[RENDER_SCRATCH_COUNT]; - - int index; - int offset; - void *buf; + emit_t emit; }; struct nouveau_swtnl_state { struct nouveau_bo *vbo; + unsigned offset; void *buf; unsigned vertex_count; GLenum primitive; @@ -79,8 +55,8 @@ struct nouveau_render_state { IMM } mode; - struct nouveau_array_state ib; - struct nouveau_array_state attrs[VERT_ATTRIB_MAX]; + struct nouveau_array ib; + struct nouveau_array attrs[VERT_ATTRIB_MAX]; /* Maps a HW VBO index or IMM emission order to an index in * the attrs array above (or -1 if unused). */ @@ -89,10 +65,16 @@ struct nouveau_render_state { int attr_count; int vertex_size; - struct nouveau_scratch_state scratch; struct nouveau_swtnl_state swtnl; }; #define to_render_state(ctx) (&to_nouveau_context(ctx)->render) +#define FOR_EACH_ATTR(render, i, attr) \ + for (i = 0; attr = (render)->map[i], i < NUM_VERTEX_ATTRS; i++) + +#define FOR_EACH_BOUND_ATTR(render, i, attr) \ + for (i = 0; attr = (render)->map[i], i < render->attr_count; i++) \ + if (attr >= 0) + #endif diff --git a/src/mesa/drivers/dri/nouveau/nouveau_render_t.c b/src/mesa/drivers/dri/nouveau/nouveau_render_t.c index dd38c14aa7c..e0cf727d11d 100644 --- a/src/mesa/drivers/dri/nouveau/nouveau_render_t.c +++ b/src/mesa/drivers/dri/nouveau/nouveau_render_t.c @@ -100,8 +100,8 @@ /* * Select an appropriate dispatch function for the given index buffer. */ -static void -get_array_dispatch(struct nouveau_array_state *a, dispatch_t *dispatch) +static dispatch_t +get_array_dispatch(struct nouveau_array *a) { if (!a->fields) { auto void f(struct gl_context *, unsigned int, int, unsigned int); @@ -114,7 +114,7 @@ get_array_dispatch(struct nouveau_array_state *a, dispatch_t *dispatch) EMIT_VBO(L, ctx, start, delta, n); }; - *dispatch = f; + return f; } else if (a->type == GL_UNSIGNED_INT) { auto void f(struct gl_context *, unsigned int, int, unsigned int); @@ -127,7 +127,7 @@ get_array_dispatch(struct nouveau_array_state *a, dispatch_t *dispatch) EMIT_VBO(I32, ctx, start, delta, n); }; - *dispatch = f; + return f; } else { auto void f(struct gl_context *, unsigned int, int, unsigned int); @@ -141,115 +141,11 @@ get_array_dispatch(struct nouveau_array_state *a, dispatch_t *dispatch) EMIT_VBO(I16, ctx, start, delta, n & ~1); }; - *dispatch = f; + return f; } } /* - * Select appropriate element extraction functions for the given - * array. - */ -static void -get_array_extract(struct nouveau_array_state *a, - extract_u_t *extract_u, extract_f_t *extract_f) -{ -#define EXTRACT(in_t, out_t, k) \ - ({ \ - auto out_t f(struct nouveau_array_state *, int, int); \ - out_t f(struct nouveau_array_state *a, int i, int j) { \ - in_t x = ((in_t *)(a->buf + i * a->stride))[j]; \ - \ - return (out_t)x / (k); \ - }; \ - f; \ - }); - - switch (a->type) { - case GL_BYTE: - *extract_u = EXTRACT(char, unsigned, 1); - *extract_f = EXTRACT(char, float, SCHAR_MAX); - break; - case GL_UNSIGNED_BYTE: - *extract_u = EXTRACT(unsigned char, unsigned, 1); - *extract_f = EXTRACT(unsigned char, float, UCHAR_MAX); - break; - case GL_SHORT: - *extract_u = EXTRACT(short, unsigned, 1); - *extract_f = EXTRACT(short, float, SHRT_MAX); - break; - case GL_UNSIGNED_SHORT: - *extract_u = EXTRACT(unsigned short, unsigned, 1); - *extract_f = EXTRACT(unsigned short, float, USHRT_MAX); - break; - case GL_INT: - *extract_u = EXTRACT(int, unsigned, 1); - *extract_f = EXTRACT(int, float, INT_MAX); - break; - case GL_UNSIGNED_INT: - *extract_u = EXTRACT(unsigned int, unsigned, 1); - *extract_f = EXTRACT(unsigned int, float, UINT_MAX); - break; - case GL_FLOAT: - *extract_u = EXTRACT(float, unsigned, 1.0 / UINT_MAX); - *extract_f = EXTRACT(float, float, 1); - break; - - default: - assert(0); - } -} - -/* - * Returns a pointer to a chunk of <size> bytes long GART memory. <bo> - * will be updated with the buffer object the memory is located in. - * - * If <offset> is provided, it will be updated with the offset within - * <bo> of the allocated memory. Otherwise the returned memory will - * always be located right at the beginning of <bo>. - */ -static inline void * -get_scratch_vbo(struct gl_context *ctx, unsigned size, struct nouveau_bo **bo, - unsigned *offset) -{ - struct nouveau_scratch_state *scratch = &to_render_state(ctx)->scratch; - void *buf; - - if (scratch->buf && offset && - size <= RENDER_SCRATCH_SIZE - scratch->offset) { - nouveau_bo_ref(scratch->bo[scratch->index], bo); - - buf = scratch->buf + scratch->offset; - *offset = scratch->offset; - scratch->offset += size; - - } else if (size <= RENDER_SCRATCH_SIZE) { - scratch->index = (scratch->index + 1) % RENDER_SCRATCH_COUNT; - nouveau_bo_ref(scratch->bo[scratch->index], bo); - - nouveau_bo_map(*bo, NOUVEAU_BO_WR); - buf = scratch->buf = (*bo)->map; - nouveau_bo_unmap(*bo); - - if (offset) - *offset = 0; - scratch->offset = size; - - } else { - nouveau_bo_new(context_dev(ctx), - NOUVEAU_BO_MAP | NOUVEAU_BO_GART, 0, size, bo); - - nouveau_bo_map(*bo, NOUVEAU_BO_WR); - buf = (*bo)->map; - nouveau_bo_unmap(*bo); - - if (offset) - *offset = 0; - } - - return buf; -} - -/* * Returns how many vertices you can draw using <n> pushbuf dwords. */ static inline unsigned @@ -277,6 +173,11 @@ get_max_vertices(struct gl_context *ctx, const struct _mesa_index_buffer *ib, case GL_UNSIGNED_BYTE: max_out = MAX_OUT_I16; break; + + default: + assert(0); + max_out = 0; + break; } } else { max_out = MAX_OUT_L; @@ -286,76 +187,26 @@ get_max_vertices(struct gl_context *ctx, const struct _mesa_index_buffer *ib, } } -#include "nouveau_vbo_t.c" -#include "nouveau_swtnl_t.c" - static void -TAG(emit_material)(struct gl_context *ctx, struct nouveau_array_state *a, +TAG(emit_material)(struct gl_context *ctx, struct nouveau_array *a, const void *v) { - const int attr = a->attr - VERT_ATTRIB_GENERIC0; - const int state = ((int []) { - NOUVEAU_STATE_MATERIAL_FRONT_AMBIENT, - NOUVEAU_STATE_MATERIAL_BACK_AMBIENT, - NOUVEAU_STATE_MATERIAL_FRONT_DIFFUSE, - NOUVEAU_STATE_MATERIAL_BACK_DIFFUSE, - NOUVEAU_STATE_MATERIAL_FRONT_SPECULAR, - NOUVEAU_STATE_MATERIAL_BACK_SPECULAR, - NOUVEAU_STATE_MATERIAL_FRONT_AMBIENT, - NOUVEAU_STATE_MATERIAL_BACK_AMBIENT, - NOUVEAU_STATE_MATERIAL_FRONT_SHININESS, - NOUVEAU_STATE_MATERIAL_BACK_SHININESS - }) [attr]; + int attr = a->attr - VERT_ATTRIB_GENERIC0; + int state = ((int []) { + NOUVEAU_STATE_MATERIAL_FRONT_AMBIENT, + NOUVEAU_STATE_MATERIAL_BACK_AMBIENT, + NOUVEAU_STATE_MATERIAL_FRONT_DIFFUSE, + NOUVEAU_STATE_MATERIAL_BACK_DIFFUSE, + NOUVEAU_STATE_MATERIAL_FRONT_SPECULAR, + NOUVEAU_STATE_MATERIAL_BACK_SPECULAR, + NOUVEAU_STATE_MATERIAL_FRONT_AMBIENT, + NOUVEAU_STATE_MATERIAL_BACK_AMBIENT, + NOUVEAU_STATE_MATERIAL_FRONT_SHININESS, + NOUVEAU_STATE_MATERIAL_BACK_SHININESS + }) [attr]; COPY_4V(ctx->Light.Material.Attrib[attr], (float *)v); _mesa_update_material(ctx, 1 << attr); context_drv(ctx)->emit[state](ctx, state); } - -static void -TAG(render_prims)(struct gl_context *ctx, const struct gl_client_array **arrays, - const struct _mesa_prim *prims, GLuint nr_prims, - const struct _mesa_index_buffer *ib, - GLboolean index_bounds_valid, - GLuint min_index, GLuint max_index) -{ - struct nouveau_context *nctx = to_nouveau_context(ctx); - - nouveau_validate_framebuffer(ctx); - - if (nctx->fallback == HWTNL) - TAG(vbo_render_prims)(ctx, arrays, prims, nr_prims, ib, - index_bounds_valid, min_index, max_index); - - if (nctx->fallback == SWTNL) - _tnl_vbo_draw_prims(ctx, arrays, prims, nr_prims, ib, - index_bounds_valid, min_index, max_index); -} - -void -TAG(render_init)(struct gl_context *ctx) -{ - struct nouveau_render_state *render = to_render_state(ctx); - struct nouveau_scratch_state *scratch = &render->scratch; - int ret, i; - - for (i = 0; i < RENDER_SCRATCH_COUNT; i++) { - ret = nouveau_bo_new(context_dev(ctx), - NOUVEAU_BO_MAP | NOUVEAU_BO_GART, - 0, RENDER_SCRATCH_SIZE, &scratch->bo[i]); - assert(!ret); - } - - for (i = 0; i < VERT_ATTRIB_MAX; i++) - render->map[i] = -1; - - TAG(swtnl_init)(ctx); - vbo_set_draw_func(ctx, TAG(render_prims)); -} - -void -TAG(render_destroy)(struct gl_context *ctx) -{ - TAG(swtnl_destroy)(ctx); -} diff --git a/src/mesa/drivers/dri/nouveau/nouveau_scratch.c b/src/mesa/drivers/dri/nouveau/nouveau_scratch.c new file mode 100644 index 00000000000..ddda67b2f14 --- /dev/null +++ b/src/mesa/drivers/dri/nouveau/nouveau_scratch.c @@ -0,0 +1,98 @@ +/* + * Copyright (C) 2009-2010 Francisco Jerez. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "nouveau_driver.h" +#include "nouveau_context.h" + +/* + * Returns a pointer to a chunk of 'size' bytes long GART memory. 'bo' + * and 'offset' will point to the returned memory. + */ +void * +nouveau_get_scratch(struct gl_context *ctx, unsigned size, + struct nouveau_bo **bo, unsigned *offset) +{ + struct nouveau_scratch_state *scratch = + &to_nouveau_context(ctx)->scratch; + void *buf; + + if (scratch->buf && size <= NOUVEAU_SCRATCH_SIZE - scratch->offset) { + nouveau_bo_ref(scratch->bo[scratch->index], bo); + + buf = scratch->buf + scratch->offset; + *offset = scratch->offset; + scratch->offset += size; + + } else if (size <= NOUVEAU_SCRATCH_SIZE) { + scratch->index = (scratch->index + 1) % NOUVEAU_SCRATCH_COUNT; + nouveau_bo_ref(scratch->bo[scratch->index], bo); + + nouveau_bo_map(*bo, NOUVEAU_BO_WR); + buf = scratch->buf = (*bo)->map; + nouveau_bo_unmap(*bo); + + *offset = 0; + scratch->offset = size; + + } else { + nouveau_bo_new(context_dev(ctx), + NOUVEAU_BO_MAP | NOUVEAU_BO_GART, 0, size, bo); + + nouveau_bo_map(*bo, NOUVEAU_BO_WR); + buf = (*bo)->map; + nouveau_bo_unmap(*bo); + + *offset = 0; + } + + return buf; +} + +void +nouveau_scratch_init(struct gl_context *ctx) +{ + struct nouveau_scratch_state *scratch = + &to_nouveau_context(ctx)->scratch; + int ret, i; + + for (i = 0; i < NOUVEAU_SCRATCH_COUNT; i++) { + ret = nouveau_bo_new(context_dev(ctx), + NOUVEAU_BO_MAP | NOUVEAU_BO_GART, + 0, NOUVEAU_SCRATCH_SIZE, &scratch->bo[i]); + assert(!ret); + } +} + +void +nouveau_scratch_destroy(struct gl_context *ctx) +{ + struct nouveau_scratch_state *scratch = + &to_nouveau_context(ctx)->scratch; + int i; + + for (i = 0; i < NOUVEAU_SCRATCH_COUNT; i++) + nouveau_bo_ref(NULL, &scratch->bo[i]); +} diff --git a/src/mesa/drivers/dri/nouveau/nouveau_scratch.h b/src/mesa/drivers/dri/nouveau/nouveau_scratch.h new file mode 100644 index 00000000000..b60b33dd1ac --- /dev/null +++ b/src/mesa/drivers/dri/nouveau/nouveau_scratch.h @@ -0,0 +1,51 @@ +/* + * Copyright (C) 2009-2010 Francisco Jerez. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __NOUVEAU_SCRATCH_H__ +#define __NOUVEAU_SCRATCH_H__ + +#define NOUVEAU_SCRATCH_COUNT 2 +#define NOUVEAU_SCRATCH_SIZE 3*1024*1024 + +struct nouveau_scratch_state { + struct nouveau_bo *bo[NOUVEAU_SCRATCH_COUNT]; + + int index; + int offset; + void *buf; +}; + +void * +nouveau_get_scratch(struct gl_context *ctx, unsigned size, + struct nouveau_bo **bo, unsigned *offset); + +void +nouveau_scratch_init(struct gl_context *ctx); + +void +nouveau_scratch_destroy(struct gl_context *ctx); + +#endif diff --git a/src/mesa/drivers/dri/nouveau/nouveau_state.c b/src/mesa/drivers/dri/nouveau/nouveau_state.c index 7b7ddd2f54d..1579d29efc2 100644 --- a/src/mesa/drivers/dri/nouveau/nouveau_state.c +++ b/src/mesa/drivers/dri/nouveau/nouveau_state.c @@ -113,6 +113,12 @@ nouveau_depth_range(struct gl_context *ctx, GLclampd nearval, GLclampd farval) } static void +nouveau_read_buffer(struct gl_context *ctx, GLenum buffer) +{ + nouveau_validate_framebuffer(ctx); +} + +static void nouveau_draw_buffers(struct gl_context *ctx, GLsizei n, const GLenum *buffers) { nouveau_validate_framebuffer(ctx); @@ -512,6 +518,7 @@ nouveau_state_init(struct gl_context *ctx) ctx->Driver.DepthFunc = nouveau_depth_func; ctx->Driver.DepthMask = nouveau_depth_mask; ctx->Driver.DepthRange = nouveau_depth_range; + ctx->Driver.ReadBuffer = nouveau_read_buffer; ctx->Driver.DrawBuffers = nouveau_draw_buffers; ctx->Driver.Enable = nouveau_enable; ctx->Driver.Fogfv = nouveau_fog; diff --git a/src/mesa/drivers/dri/nouveau/nouveau_swtnl_t.c b/src/mesa/drivers/dri/nouveau/nouveau_swtnl_t.c index b3588e8fd39..f084f89d29e 100644 --- a/src/mesa/drivers/dri/nouveau/nouveau_swtnl_t.c +++ b/src/mesa/drivers/dri/nouveau/nouveau_swtnl_t.c @@ -28,6 +28,8 @@ #include "tnl/t_pipeline.h" #include "tnl/t_vertex.h" +#define SWTNL_VBO_SIZE 65536 + static enum tnl_attr_format swtnl_get_format(int type, int fields) { switch (type) { @@ -105,7 +107,7 @@ swtnl_choose_attrs(struct gl_context *ctx) TNLcontext *tnl = TNL_CONTEXT(ctx); struct tnl_clipspace *vtx = &tnl->clipspace; static struct tnl_attr_map map[NUM_VERTEX_ATTRS]; - int fields, i, n = 0; + int fields, attr, i, n = 0; render->mode = VBO; render->attr_count = NUM_VERTEX_ATTRS; @@ -116,7 +118,7 @@ swtnl_choose_attrs(struct gl_context *ctx) for (i = 0; i < VERT_ATTRIB_MAX; i++) { struct nouveau_attr_info *ha = &TAG(vertex_attrs)[i]; struct swtnl_attr_info *sa = &swtnl_attrs[i]; - struct nouveau_array_state *a = &render->attrs[i]; + struct nouveau_array *a = &render->attrs[i]; if (!sa->fields) continue; /* Unsupported attribute. */ @@ -141,13 +143,8 @@ swtnl_choose_attrs(struct gl_context *ctx) _tnl_install_attrs(ctx, map, n, NULL, 0); - for (i = 0; i < vtx->attr_count; i++) { - struct tnl_clipspace_attr *ta = &vtx->attr[i]; - struct nouveau_array_state *a = &render->attrs[ta->attrib]; - - a->stride = vtx->vertex_size; - a->offset = ta->vertoffset; - } + FOR_EACH_BOUND_ATTR(render, i, attr) + render->attrs[attr].stride = vtx->vertex_size; TAG(render_set_format)(ctx); } @@ -158,8 +155,8 @@ swtnl_alloc_vertices(struct gl_context *ctx) struct nouveau_swtnl_state *swtnl = &to_render_state(ctx)->swtnl; nouveau_bo_ref(NULL, &swtnl->vbo); - swtnl->buf = get_scratch_vbo(ctx, RENDER_SCRATCH_SIZE, - &swtnl->vbo, NULL); + swtnl->buf = nouveau_get_scratch(ctx, SWTNL_VBO_SIZE, &swtnl->vbo, + &swtnl->offset); swtnl->vertex_count = 0; } @@ -168,14 +165,15 @@ swtnl_bind_vertices(struct gl_context *ctx) { struct nouveau_render_state *render = to_render_state(ctx); struct nouveau_swtnl_state *swtnl = &render->swtnl; + struct tnl_clipspace *vtx = &TNL_CONTEXT(ctx)->clipspace; int i; - for (i = 0; i < render->attr_count; i++) { - int attr = render->map[i]; + for (i = 0; i < vtx->attr_count; i++) { + struct tnl_clipspace_attr *ta = &vtx->attr[i]; + struct nouveau_array *a = &render->attrs[ta->attrib]; - if (attr >= 0) - nouveau_bo_ref(swtnl->vbo, - &render->attrs[attr].bo); + nouveau_bo_ref(swtnl->vbo, &a->bo); + a->offset = swtnl->offset + ta->vertoffset; } TAG(render_bind_vertices)(ctx); @@ -185,15 +183,11 @@ static void swtnl_unbind_vertices(struct gl_context *ctx) { struct nouveau_render_state *render = to_render_state(ctx); - int i; - - for (i = 0; i < render->attr_count; i++) { - int *attr = &render->map[i]; + int i, attr; - if (*attr >= 0) { - nouveau_bo_ref(NULL, &render->attrs[*attr].bo); - *attr = -1; - } + FOR_EACH_BOUND_ATTR(render, i, attr) { + nouveau_bo_ref(NULL, &render->attrs[attr].bo); + render->map[i] = -1; } render->attr_count = 0; @@ -260,7 +254,7 @@ swtnl_reset_stipple(struct gl_context *ctx) struct nouveau_swtnl_state *swtnl = &to_render_state(ctx)->swtnl; \ int vertex_len = TNL_CONTEXT(ctx)->clipspace.vertex_size; \ \ - if (swtnl->vertex_count + (n) > swtnl->vbo->size/vertex_len \ + if (swtnl->vertex_count + (n) > SWTNL_VBO_SIZE/vertex_len \ || (swtnl->vertex_count && swtnl->primitive != p)) \ swtnl_flush_vertices(ctx); \ \ @@ -280,7 +274,7 @@ swtnl_points(struct gl_context *ctx, GLuint first, GLuint last) while (first < last) { BEGIN_PRIMITIVE(GL_POINTS, last - first); - count = MIN2(swtnl->vbo->size / vertex_len, last - first); + count = MIN2(SWTNL_VBO_SIZE / vertex_len, last - first); for (i = 0; i < count; i++) OUT_VERTEX(first + i); @@ -316,7 +310,7 @@ swtnl_quad(struct gl_context *ctx, GLuint v1, GLuint v2, GLuint v3, GLuint v4) } /* TnL initialization. */ -static void +void TAG(swtnl_init)(struct gl_context *ctx) { TNLcontext *tnl = TNL_CONTEXT(ctx); @@ -347,7 +341,7 @@ TAG(swtnl_init)(struct gl_context *ctx) swtnl_alloc_vertices(ctx); } -static void +void TAG(swtnl_destroy)(struct gl_context *ctx) { nouveau_bo_ref(NULL, &to_render_state(ctx)->swtnl.vbo); diff --git a/src/mesa/drivers/dri/nouveau/nouveau_texture.c b/src/mesa/drivers/dri/nouveau/nouveau_texture.c index cd063702af0..060c2c5bcc0 100644 --- a/src/mesa/drivers/dri/nouveau/nouveau_texture.c +++ b/src/mesa/drivers/dri/nouveau/nouveau_texture.c @@ -79,26 +79,65 @@ nouveau_teximage_free(struct gl_context *ctx, struct gl_texture_image *ti) } static void -nouveau_teximage_map(struct gl_context *ctx, struct gl_texture_image *ti) +nouveau_teximage_map(struct gl_context *ctx, struct gl_texture_image *ti, + int access, int x, int y, int w, int h) { - struct nouveau_surface *s = &to_nouveau_teximage(ti)->surface; - int ret; + struct nouveau_teximage *nti = to_nouveau_teximage(ti); + struct nouveau_surface *s = &nti->surface; + struct nouveau_surface *st = &nti->transfer.surface; if (s->bo) { - ret = nouveau_bo_map(s->bo, NOUVEAU_BO_RDWR); - assert(!ret); - - ti->Data = s->bo->map; + if (!(access & GL_MAP_READ_BIT) && + nouveau_bo_pending(s->bo)) { + /* + * Heuristic: use a bounce buffer to pipeline + * teximage transfers. + */ + st->layout = LINEAR; + st->format = s->format; + st->cpp = s->cpp; + st->width = w; + st->height = h; + st->pitch = s->pitch; + nti->transfer.x = x; + nti->transfer.y = y; + + ti->Data = nouveau_get_scratch(ctx, st->pitch * h, + &st->bo, &st->offset); + + } else { + int ret, flags = 0; + + if (access & GL_MAP_READ_BIT) + flags |= NOUVEAU_BO_RD; + if (access & GL_MAP_WRITE_BIT) + flags |= NOUVEAU_BO_WR; + + ret = nouveau_bo_map(s->bo, flags); + assert(!ret); + + ti->Data = s->bo->map + y * s->pitch + x * s->cpp; + } } } static void nouveau_teximage_unmap(struct gl_context *ctx, struct gl_texture_image *ti) { - struct nouveau_surface *s = &to_nouveau_teximage(ti)->surface; + struct nouveau_teximage *nti = to_nouveau_teximage(ti); + struct nouveau_surface *s = &nti->surface; + struct nouveau_surface *st = &nti->transfer.surface; - if (s->bo) + if (st->bo) { + context_drv(ctx)->surface_copy(ctx, s, st, nti->transfer.x, + nti->transfer.y, 0, 0, + st->width, st->height); + nouveau_surface_ref(NULL, st); + + } else if (s->bo) { nouveau_bo_unmap(s->bo); + } + ti->Data = NULL; } @@ -115,6 +154,7 @@ nouveau_choose_tex_format(struct gl_context *ctx, GLint internalFormat, case GL_RGBA12: case GL_RGBA16: case GL_RGB10_A2: + case GL_COMPRESSED_RGBA: return MESA_FORMAT_ARGB8888; case GL_RGB5_A1: return MESA_FORMAT_ARGB1555; @@ -124,6 +164,7 @@ nouveau_choose_tex_format(struct gl_context *ctx, GLint internalFormat, case GL_RGB10: case GL_RGB12: case GL_RGB16: + case GL_COMPRESSED_RGB: return MESA_FORMAT_XRGB8888; case 3: case GL_R3_G3_B2: @@ -139,6 +180,7 @@ nouveau_choose_tex_format(struct gl_context *ctx, GLint internalFormat, case GL_LUMINANCE12_ALPHA12: case GL_LUMINANCE16_ALPHA16: case GL_LUMINANCE8_ALPHA8: + case GL_COMPRESSED_LUMINANCE_ALPHA: return MESA_FORMAT_ARGB8888; case 1: @@ -147,6 +189,7 @@ nouveau_choose_tex_format(struct gl_context *ctx, GLint internalFormat, case GL_LUMINANCE12: case GL_LUMINANCE16: case GL_LUMINANCE8: + case GL_COMPRESSED_LUMINANCE: return MESA_FORMAT_L8; case GL_ALPHA: @@ -154,6 +197,7 @@ nouveau_choose_tex_format(struct gl_context *ctx, GLint internalFormat, case GL_ALPHA12: case GL_ALPHA16: case GL_ALPHA8: + case GL_COMPRESSED_ALPHA: return MESA_FORMAT_A8; case GL_INTENSITY: @@ -356,7 +400,8 @@ nouveau_teximage(struct gl_context *ctx, GLint dims, GLenum target, GLint level, "glTexImage"); if (pixels) { /* Store the pixel data. */ - nouveau_teximage_map(ctx, ti); + nouveau_teximage_map(ctx, ti, GL_MAP_WRITE_BIT, + 0, 0, width, height); ret = _mesa_texstore(ctx, dims, ti->_BaseFormat, ti->TexFormat, ti->Data, @@ -443,13 +488,13 @@ nouveau_texsubimage(struct gl_context *ctx, GLint dims, GLenum target, GLint lev format, type, pixels, packing, "glTexSubImage"); if (pixels) { - nouveau_teximage_map(ctx, ti); + nouveau_teximage_map(ctx, ti, GL_MAP_WRITE_BIT, + xoffset, yoffset, width, height); ret = _mesa_texstore(ctx, 3, ti->_BaseFormat, ti->TexFormat, - ti->Data, xoffset, yoffset, zoffset, - s->pitch, ti->ImageOffsets, - width, height, depth, format, type, - pixels, packing); + ti->Data, 0, 0, 0, s->pitch, + ti->ImageOffsets, width, height, depth, + format, type, pixels, packing); assert(ret); nouveau_teximage_unmap(ctx, ti); @@ -508,7 +553,8 @@ nouveau_get_teximage(struct gl_context *ctx, GLenum target, GLint level, struct gl_texture_object *t, struct gl_texture_image *ti) { - nouveau_teximage_map(ctx, ti); + nouveau_teximage_map(ctx, ti, GL_MAP_READ_BIT, + 0, 0, ti->Width, ti->Height); _mesa_get_teximage(ctx, target, level, format, type, pixels, t, ti); nouveau_teximage_unmap(ctx, ti); @@ -579,8 +625,11 @@ nouveau_texture_map(struct gl_context *ctx, struct gl_texture_object *t) int i; for (i = t->BaseLevel; i < t->_MaxLevel; i++) { - if (t->Image[0][i]) - nouveau_teximage_map(ctx, t->Image[0][i]); + struct gl_texture_image *ti = t->Image[0][i]; + + if (ti) + nouveau_teximage_map(ctx, ti, GL_MAP_READ_BIT, + 0, 0, ti->Width, ti->Height); } } @@ -630,7 +679,8 @@ nouveau_generate_mipmap(struct gl_context *ctx, GLenum target, if (_mesa_meta_check_generate_mipmap_fallback(ctx, target, t)) { struct gl_texture_image *base = t->Image[0][t->BaseLevel]; - nouveau_teximage_map(ctx, base); + nouveau_teximage_map(ctx, base, GL_MAP_READ_BIT, + 0, 0, base->Width, base->Height); _mesa_generate_mipmap(ctx, target, t); nouveau_teximage_unmap(ctx, base); diff --git a/src/mesa/drivers/dri/nouveau/nouveau_texture.h b/src/mesa/drivers/dri/nouveau/nouveau_texture.h index fc170215f35..56e61c7337b 100644 --- a/src/mesa/drivers/dri/nouveau/nouveau_texture.h +++ b/src/mesa/drivers/dri/nouveau/nouveau_texture.h @@ -30,6 +30,10 @@ struct nouveau_teximage { struct gl_texture_image base; struct nouveau_surface surface; + struct { + struct nouveau_surface surface; + int x, y; + } transfer; }; #define to_nouveau_teximage(x) ((struct nouveau_teximage *)(x)) diff --git a/src/mesa/drivers/dri/nouveau/nouveau_vbo_t.c b/src/mesa/drivers/dri/nouveau/nouveau_vbo_t.c index 394f3c9b500..7a0eb9fc23d 100644 --- a/src/mesa/drivers/dri/nouveau/nouveau_vbo_t.c +++ b/src/mesa/drivers/dri/nouveau/nouveau_vbo_t.c @@ -31,59 +31,11 @@ #include "main/image.h" /* Arbitrary pushbuf length we can assume we can get with a single - * WAIT_RING. */ + * call to WAIT_RING. */ #define PUSHBUF_DWORDS 65536 -/* Functions to set up struct nouveau_array_state from something like - * a GL array or index buffer. */ - -static void -vbo_init_array(struct nouveau_array_state *a, int attr, int stride, - int fields, int type, struct gl_buffer_object *obj, - const void *ptr, GLboolean map) -{ - a->attr = attr; - a->stride = stride; - a->fields = fields; - a->type = type; - - if (_mesa_is_bufferobj(obj)) { - nouveau_bo_ref(to_nouveau_bufferobj(obj)->bo, &a->bo); - a->offset = (intptr_t)ptr; - - if (map) { - nouveau_bo_map(a->bo, NOUVEAU_BO_RD); - a->buf = a->bo->map + a->offset; - } else { - a->buf = NULL; - } - - } else { - nouveau_bo_ref(NULL, &a->bo); - a->offset = 0; - - if (map) - a->buf = ptr; - else - a->buf = NULL; - } - - if (a->buf) - get_array_extract(a, &a->extract_u, &a->extract_f); -} - -static void -vbo_deinit_array(struct nouveau_array_state *a) -{ - if (a->bo) { - if (a->bo->map) - nouveau_bo_unmap(a->bo); - nouveau_bo_ref(NULL, &a->bo); - } - - a->buf = NULL; - a->fields = 0; -} +/* Functions to turn GL arrays or index buffers into nouveau_array + * structures. */ static int get_array_stride(struct gl_context *ctx, const struct gl_client_array *a) @@ -102,48 +54,45 @@ vbo_init_arrays(struct gl_context *ctx, const struct _mesa_index_buffer *ib, const struct gl_client_array **arrays) { struct nouveau_render_state *render = to_render_state(ctx); - int i; + GLboolean imm = (render->mode == IMM); + int i, attr; if (ib) - vbo_init_array(&render->ib, 0, 0, ib->count, ib->type, - ib->obj, ib->ptr, GL_TRUE); + nouveau_init_array(&render->ib, 0, 0, ib->count, ib->type, + ib->obj, ib->ptr, GL_TRUE); - for (i = 0; i < render->attr_count; i++) { - int attr = render->map[i]; + FOR_EACH_BOUND_ATTR(render, i, attr) { + const struct gl_client_array *array = arrays[attr]; - if (attr >= 0) { - const struct gl_client_array *array = arrays[attr]; - - vbo_init_array(&render->attrs[attr], attr, - get_array_stride(ctx, array), - array->Size, array->Type, - array->BufferObj, array->Ptr, - render->mode == IMM); - } + nouveau_init_array(&render->attrs[attr], attr, + get_array_stride(ctx, array), + array->Size, array->Type, + imm ? array->BufferObj : NULL, + array->Ptr, imm); } } static void vbo_deinit_arrays(struct gl_context *ctx, const struct _mesa_index_buffer *ib, - const struct gl_client_array **arrays) + const struct gl_client_array **arrays) { struct nouveau_render_state *render = to_render_state(ctx); - int i; + int i, attr; if (ib) - vbo_deinit_array(&render->ib); + nouveau_cleanup_array(&render->ib); - for (i = 0; i < render->attr_count; i++) { - int *attr = &render->map[i]; + FOR_EACH_BOUND_ATTR(render, i, attr) { + struct nouveau_array *a = &render->attrs[attr]; - if (*attr >= 0) { - vbo_deinit_array(&render->attrs[*attr]); - *attr = -1; - } + if (render->mode == IMM) + nouveau_bo_ref(NULL, &a->bo); + + nouveau_deinit_array(a); + render->map[i] = -1; } render->attr_count = 0; - context_bctx(ctx, VERTEX); } /* Make some rendering decisions from the GL context. */ @@ -164,20 +113,16 @@ vbo_choose_render_mode(struct gl_context *ctx, const struct gl_client_array **ar } } } - - if (render->mode == VBO) - render->attr_count = NUM_VERTEX_ATTRS; - else - render->attr_count = 0; } static void -vbo_emit_attr(struct gl_context *ctx, const struct gl_client_array **arrays, int attr) +vbo_emit_attr(struct gl_context *ctx, const struct gl_client_array **arrays, + int attr) { struct nouveau_channel *chan = context_chan(ctx); struct nouveau_render_state *render = to_render_state(ctx); const struct gl_client_array *array = arrays[attr]; - struct nouveau_array_state *a = &render->attrs[attr]; + struct nouveau_array *a = &render->attrs[attr]; RENDER_LOCALS(ctx); if (!array->StrideB) { @@ -186,11 +131,11 @@ vbo_emit_attr(struct gl_context *ctx, const struct gl_client_array **arrays, int return; /* Constant attribute. */ - vbo_init_array(a, attr, array->StrideB, array->Size, - array->Type, array->BufferObj, array->Ptr, - GL_TRUE); + nouveau_init_array(a, attr, array->StrideB, array->Size, + array->Type, array->BufferObj, array->Ptr, + GL_TRUE); EMIT_IMM(ctx, a, 0); - vbo_deinit_array(a); + nouveau_deinit_array(a); } else { /* Varying attribute. */ @@ -199,10 +144,13 @@ vbo_emit_attr(struct gl_context *ctx, const struct gl_client_array **arrays, int if (render->mode == VBO) { render->map[info->vbo_index] = attr; render->vertex_size += array->_ElementSize; + render->attr_count = MAX2(render->attr_count, + info->vbo_index + 1); } else { render->map[render->attr_count++] = attr; render->vertex_size += 4 * info->imm_fields; } + } } @@ -216,6 +164,7 @@ vbo_choose_attrs(struct gl_context *ctx, const struct gl_client_array **arrays) /* Reset the vertex size. */ render->vertex_size = 0; + render->attr_count = 0; vbo_emit_attr(ctx, arrays, VERT_ATTRIB_COLOR0); if (ctx->Fog.ColorSumEnabled && !ctx->Light.Enabled) @@ -233,7 +182,7 @@ vbo_choose_attrs(struct gl_context *ctx, const struct gl_client_array **arrays) (ctx->Texture._GenFlags & TEXGEN_NEED_NORMALS)) vbo_emit_attr(ctx, arrays, VERT_ATTRIB_NORMAL); - if (ctx->Light.Enabled) { + if (ctx->Light.Enabled && render->mode == IMM) { vbo_emit_attr(ctx, arrays, MAT(FRONT_AMBIENT)); vbo_emit_attr(ctx, arrays, MAT(FRONT_DIFFUSE)); vbo_emit_attr(ctx, arrays, MAT(FRONT_SPECULAR)); @@ -254,17 +203,13 @@ static int get_max_client_stride(struct gl_context *ctx, const struct gl_client_array **arrays) { struct nouveau_render_state *render = to_render_state(ctx); - int i, s = 0; + int i, attr, s = 0; - for (i = 0; i < render->attr_count; i++) { - int attr = render->map[i]; + FOR_EACH_BOUND_ATTR(render, i, attr) { + const struct gl_client_array *a = arrays[attr]; - if (attr >= 0) { - const struct gl_client_array *a = arrays[attr]; - - if (!_mesa_is_bufferobj(a->BufferObj)) - s = MAX2(s, get_array_stride(ctx, a)); - } + if (!_mesa_is_bufferobj(a->BufferObj)) + s = MAX2(s, get_array_stride(ctx, a)); } return s; @@ -295,7 +240,7 @@ vbo_maybe_split(struct gl_context *ctx, const struct gl_client_array **arrays, if (render->mode == VBO && (stride = get_max_client_stride(ctx, arrays))) vert_avail = MIN2(vert_avail, - RENDER_SCRATCH_SIZE / stride); + NOUVEAU_SCRATCH_SIZE / stride); if (max_index - min_index > vert_avail || (ib && ib->count > idx_avail)) { @@ -315,42 +260,93 @@ vbo_maybe_split(struct gl_context *ctx, const struct gl_client_array **arrays, /* VBO rendering path. */ +static GLboolean +check_update_array(struct nouveau_array *a, unsigned offset, + struct nouveau_bo *bo, int *pdelta) +{ + int delta = *pdelta; + GLboolean dirty; + + if (a->bo == bo) { + if (delta < 0) + delta = ((int)offset - (int)a->offset) / a->stride; + + dirty = (delta < 0 || + offset != (a->offset + delta * a->stride)); + } else { + dirty = GL_TRUE; + } + + *pdelta = (dirty ? 0 : delta); + return dirty; +} + static void vbo_bind_vertices(struct gl_context *ctx, const struct gl_client_array **arrays, - GLint basevertex, GLuint min_index, GLuint max_index) + int base, unsigned min_index, unsigned max_index, int *pdelta) { struct nouveau_render_state *render = to_render_state(ctx); - int i; + struct nouveau_channel *chan = context_chan(ctx); + struct nouveau_bo *bo[NUM_VERTEX_ATTRS]; + unsigned offset[NUM_VERTEX_ATTRS]; + GLboolean dirty = GL_FALSE; + int i, j, attr; + RENDER_LOCALS(ctx); - for (i = 0; i < NUM_VERTEX_ATTRS; i++) { - int attr = render->map[i]; - - if (attr >= 0) { - const struct gl_client_array *array = arrays[attr]; - struct nouveau_array_state *a = &render->attrs[attr]; - unsigned delta = (basevertex + min_index) - * array->StrideB; - - if (a->bo) { - /* Array in a buffer obj. */ - a->offset = (intptr_t)array->Ptr + delta; - } else { - int j, n = max_index - min_index + 1; - char *sp = (char *)array->Ptr + delta; - char *dp = get_scratch_vbo(ctx, n * a->stride, - &a->bo, &a->offset); - - /* Array in client memory, move it to - * a scratch buffer obj. */ - for (j = 0; j < n; j++) - memcpy(dp + j * a->stride, - sp + j * array->StrideB, - a->stride); - } + *pdelta = -1; + + FOR_EACH_BOUND_ATTR(render, i, attr) { + const struct gl_client_array *array = arrays[attr]; + struct gl_buffer_object *obj = array->BufferObj; + struct nouveau_array *a = &render->attrs[attr]; + unsigned delta = (base + min_index) * array->StrideB; + + bo[i] = NULL; + + if (nouveau_bufferobj_hw(obj)) { + /* Array in a buffer obj. */ + nouveau_bo_ref(to_nouveau_bufferobj(obj)->bo, &bo[i]); + offset[i] = delta + (intptr_t)array->Ptr; + + } else { + int n = max_index - min_index + 1; + char *sp = (char *)ADD_POINTERS( + nouveau_bufferobj_sys(obj), array->Ptr) + delta; + char *dp = nouveau_get_scratch(ctx, n * a->stride, + &bo[i], &offset[i]); + + /* Array in client memory, move it to a + * scratch buffer obj. */ + for (j = 0; j < n; j++) + memcpy(dp + j * a->stride, + sp + j * array->StrideB, + a->stride); } + + dirty |= check_update_array(a, offset[i], bo[i], pdelta); + } + + *pdelta -= min_index; + + if (dirty) { + /* Buffers changed, update the attribute binding. */ + FOR_EACH_BOUND_ATTR(render, i, attr) { + struct nouveau_array *a = &render->attrs[attr]; + + nouveau_bo_ref(NULL, &a->bo); + a->offset = offset[i]; + a->bo = bo[i]; + } + + TAG(render_bind_vertices)(ctx); + + } else { + /* Just cleanup. */ + FOR_EACH_BOUND_ATTR(render, i, attr) + nouveau_bo_ref(NULL, &bo[i]); } - TAG(render_bind_vertices)(ctx); + BATCH_VALIDATE(); } static void @@ -360,12 +356,10 @@ vbo_draw_vbo(struct gl_context *ctx, const struct gl_client_array **arrays, GLuint max_index) { struct nouveau_channel *chan = context_chan(ctx); - dispatch_t dispatch; - int delta = -min_index, basevertex = 0, i; + dispatch_t dispatch = get_array_dispatch(&to_render_state(ctx)->ib); + int i, delta = 0, basevertex = 0; RENDER_LOCALS(ctx); - get_array_dispatch(&to_render_state(ctx)->ib, &dispatch); - TAG(render_set_format)(ctx); for (i = 0; i < nr_prims; i++) { @@ -374,8 +368,8 @@ vbo_draw_vbo(struct gl_context *ctx, const struct gl_client_array **arrays, if (i == 0 || basevertex != prims[i].basevertex) { basevertex = prims[i].basevertex; - vbo_bind_vertices(ctx, arrays, basevertex, - min_index, max_index); + vbo_bind_vertices(ctx, arrays, basevertex, min_index, + max_index, &delta); } if (count > get_max_vertices(ctx, ib, AVAIL_RING(chan))) @@ -390,7 +384,7 @@ vbo_draw_vbo(struct gl_context *ctx, const struct gl_client_array **arrays, /* Immediate rendering path. */ static unsigned -extract_id(struct nouveau_array_state *a, int i, int j) +extract_id(struct nouveau_array *a, int i, int j) { return j; } @@ -404,7 +398,7 @@ vbo_draw_imm(struct gl_context *ctx, const struct gl_client_array **arrays, struct nouveau_render_state *render = to_render_state(ctx); struct nouveau_channel *chan = context_chan(ctx); extract_u_t extract = ib ? render->ib.extract_u : extract_id; - int i, j, k; + int i, j, k, attr; RENDER_LOCALS(ctx); for (i = 0; i < nr_prims; i++) { @@ -421,9 +415,8 @@ vbo_draw_imm(struct gl_context *ctx, const struct gl_client_array **arrays, j = prims[i].basevertex + extract(&render->ib, 0, start); - for (k = 0; k < render->attr_count; k++) - EMIT_IMM(ctx, &render->attrs[render->map[k]], - j); + FOR_EACH_BOUND_ATTR(render, k, attr) + EMIT_IMM(ctx, &render->attrs[attr], j); } BATCH_END(); @@ -433,7 +426,8 @@ vbo_draw_imm(struct gl_context *ctx, const struct gl_client_array **arrays, /* draw_prims entry point when we're doing hw-tnl. */ static void -TAG(vbo_render_prims)(struct gl_context *ctx, const struct gl_client_array **arrays, +TAG(vbo_render_prims)(struct gl_context *ctx, + const struct gl_client_array **arrays, const struct _mesa_prim *prims, GLuint nr_prims, const struct _mesa_index_buffer *ib, GLboolean index_bounds_valid, @@ -462,3 +456,44 @@ TAG(vbo_render_prims)(struct gl_context *ctx, const struct gl_client_array **arr vbo_deinit_arrays(ctx, ib, arrays); } + +/* VBO rendering entry points. */ + +static void +TAG(vbo_check_render_prims)(struct gl_context *ctx, + const struct gl_client_array **arrays, + const struct _mesa_prim *prims, GLuint nr_prims, + const struct _mesa_index_buffer *ib, + GLboolean index_bounds_valid, + GLuint min_index, GLuint max_index) +{ + struct nouveau_context *nctx = to_nouveau_context(ctx); + + nouveau_validate_framebuffer(ctx); + + if (nctx->fallback == HWTNL) + TAG(vbo_render_prims)(ctx, arrays, prims, nr_prims, ib, + index_bounds_valid, min_index, max_index); + + if (nctx->fallback == SWTNL) + _tnl_vbo_draw_prims(ctx, arrays, prims, nr_prims, ib, + index_bounds_valid, min_index, max_index); +} + +void +TAG(vbo_init)(struct gl_context *ctx) +{ + struct nouveau_render_state *render = to_render_state(ctx); + int i; + + for (i = 0; i < VERT_ATTRIB_MAX; i++) + render->map[i] = -1; + + vbo_set_draw_func(ctx, TAG(vbo_check_render_prims)); + vbo_use_buffer_objects(ctx); +} + +void +TAG(vbo_destroy)(struct gl_context *ctx) +{ +} diff --git a/src/mesa/drivers/dri/nouveau/nv10_context.c b/src/mesa/drivers/dri/nouveau/nv10_context.c index fdcb43b7718..de2c93ec815 100644 --- a/src/mesa/drivers/dri/nouveau/nv10_context.c +++ b/src/mesa/drivers/dri/nouveau/nv10_context.c @@ -24,6 +24,7 @@ * */ +#include "main/state.h" #include "nouveau_driver.h" #include "nouveau_context.h" #include "nouveau_fbo.h" @@ -184,6 +185,9 @@ nv10_clear(struct gl_context *ctx, GLbitfield buffers) nv17_zclear(ctx, &buffers); else nv10_zclear(ctx, &buffers); + + /* Emit the zclear state if it's dirty */ + _mesa_update_state(ctx); } nouveau_clear(ctx, buffers); @@ -407,7 +411,8 @@ nv10_context_destroy(struct gl_context *ctx) struct nouveau_context *nctx = to_nouveau_context(ctx); nv04_surface_takedown(ctx); - nv10_render_destroy(ctx); + nv10_swtnl_destroy(ctx); + nv10_vbo_destroy(ctx); nouveau_grobj_free(&nctx->hw.eng3d); @@ -463,7 +468,8 @@ nv10_context_create(struct nouveau_screen *screen, const struct gl_config *visua goto fail; nv10_hwctx_init(ctx); - nv10_render_init(ctx); + nv10_vbo_init(ctx); + nv10_swtnl_init(ctx); return ctx; diff --git a/src/mesa/drivers/dri/nouveau/nv10_driver.h b/src/mesa/drivers/dri/nouveau/nv10_driver.h index dec3d64e7d2..6fdc4641623 100644 --- a/src/mesa/drivers/dri/nouveau/nv10_driver.h +++ b/src/mesa/drivers/dri/nouveau/nv10_driver.h @@ -45,10 +45,16 @@ nv10_transform_depth(struct gl_context *ctx, float z); /* nv10_render.c */ void -nv10_render_init(struct gl_context *ctx); +nv10_vbo_init(struct gl_context *ctx); void -nv10_render_destroy(struct gl_context *ctx); +nv10_vbo_destroy(struct gl_context *ctx); + +void +nv10_swtnl_init(struct gl_context *ctx); + +void +nv10_swtnl_destroy(struct gl_context *ctx); /* nv10_state_fb.c */ void diff --git a/src/mesa/drivers/dri/nouveau/nv10_render.c b/src/mesa/drivers/dri/nouveau/nv10_render.c index a03ace35366..7115739b5aa 100644 --- a/src/mesa/drivers/dri/nouveau/nv10_render.c +++ b/src/mesa/drivers/dri/nouveau/nv10_render.c @@ -32,7 +32,7 @@ #define NUM_VERTEX_ATTRS 8 static void -nv10_emit_material(struct gl_context *ctx, struct nouveau_array_state *a, +nv10_emit_material(struct gl_context *ctx, struct nouveau_array *a, const void *v); /* Vertex attribute format. */ @@ -111,13 +111,11 @@ nv10_render_set_format(struct gl_context *ctx) struct nouveau_render_state *render = to_render_state(ctx); struct nouveau_channel *chan = context_chan(ctx); struct nouveau_grobj *celsius = context_eng3d(ctx); - int i, hw_format; - - for (i = 0; i < NUM_VERTEX_ATTRS; i++) { - int attr = render->map[i]; + int i, attr, hw_format; + FOR_EACH_ATTR(render, i, attr) { if (attr >= 0) { - struct nouveau_array_state *a = &render->attrs[attr]; + struct nouveau_array *a = &render->attrs[attr]; hw_format = a->stride << 8 | a->fields << 4 | @@ -140,31 +138,27 @@ nv10_render_bind_vertices(struct gl_context *ctx) { struct nouveau_render_state *render = to_render_state(ctx); struct nouveau_bo_context *bctx = context_bctx(ctx, VERTEX); - struct nouveau_channel *chan = context_chan(ctx); struct nouveau_grobj *celsius = context_eng3d(ctx); - int i; + int i, attr; - for (i = 0; i < NUM_VERTEX_ATTRS; i++) { - int attr = render->map[i]; - - if (attr >= 0) { - struct nouveau_array_state *a = &render->attrs[attr]; + FOR_EACH_BOUND_ATTR(render, i, attr) { + struct nouveau_array *a = &render->attrs[attr]; - nouveau_bo_markl(bctx, celsius, - NV10TCL_VTXBUF_ADDRESS(i), - a->bo, a->offset, - NOUVEAU_BO_GART | NOUVEAU_BO_RD); - } + nouveau_bo_markl(bctx, celsius, + NV10TCL_VTXBUF_ADDRESS(i), + a->bo, a->offset, + NOUVEAU_BO_GART | NOUVEAU_BO_RD); } - - BEGIN_RING(chan, celsius, NV10TCL_VERTEX_ARRAY_VALIDATE, 1); - OUT_RING(chan, 0); } /* Vertex array rendering defs. */ #define RENDER_LOCALS(ctx) \ struct nouveau_grobj *celsius = context_eng3d(ctx) +#define BATCH_VALIDATE() \ + BEGIN_RING(chan, celsius, NV10TCL_VERTEX_ARRAY_VALIDATE, 1); \ + OUT_RING(chan, 0) + #define BATCH_BEGIN(prim) \ BEGIN_RING(chan, celsius, NV10TCL_VERTEX_BUFFER_BEGIN_END, 1); \ OUT_RING(chan, prim) @@ -199,3 +193,5 @@ nv10_render_bind_vertices(struct gl_context *ctx) #define TAG(x) nv10_##x #include "nouveau_render_t.c" +#include "nouveau_vbo_t.c" +#include "nouveau_swtnl_t.c" diff --git a/src/mesa/drivers/dri/nouveau/nv10_state_fb.c b/src/mesa/drivers/dri/nouveau/nv10_state_fb.c index d87fe96b1c0..0fda9faf49b 100644 --- a/src/mesa/drivers/dri/nouveau/nv10_state_fb.c +++ b/src/mesa/drivers/dri/nouveau/nv10_state_fb.c @@ -51,11 +51,11 @@ get_rt_format(gl_format format) } static void -setup_lma_buffer(struct gl_context *ctx) +setup_hierz_buffer(struct gl_context *ctx) { struct nouveau_channel *chan = context_chan(ctx); struct nouveau_grobj *celsius = context_eng3d(ctx); - struct nouveau_bo_context *bctx = context_bctx(ctx, LMA_DEPTH); + struct nouveau_bo_context *bctx = context_bctx(ctx, HIERZ); struct gl_framebuffer *fb = ctx->DrawBuffer; struct nouveau_framebuffer *nfb = to_nouveau_framebuffer(fb); unsigned pitch = align(fb->Width, 128), @@ -135,7 +135,7 @@ nv10_emit_framebuffer(struct gl_context *ctx, int emit) s->bo, 0, bo_flags); if (context_chipset(ctx) >= 0x17) { - setup_lma_buffer(ctx); + setup_hierz_buffer(ctx); context_dirty(ctx, ZCLEAR); } } diff --git a/src/mesa/drivers/dri/nouveau/nv20_context.c b/src/mesa/drivers/dri/nouveau/nv20_context.c index c6111a2a9a0..89200fb70da 100644 --- a/src/mesa/drivers/dri/nouveau/nv20_context.c +++ b/src/mesa/drivers/dri/nouveau/nv20_context.c @@ -26,6 +26,8 @@ #include "nouveau_driver.h" #include "nouveau_context.h" +#include "nouveau_fbo.h" +#include "nouveau_util.h" #include "nouveau_class.h" #include "nv04_driver.h" #include "nv10_driver.h" @@ -40,6 +42,57 @@ static const struct dri_extension nv20_extensions[] = { }; static void +nv20_clear(struct gl_context *ctx, GLbitfield buffers) +{ + struct nouveau_channel *chan = context_chan(ctx); + struct nouveau_grobj *kelvin = context_eng3d(ctx); + struct gl_framebuffer *fb = ctx->DrawBuffer; + uint32_t clear = 0; + + nouveau_validate_framebuffer(ctx); + + if (buffers & BUFFER_BITS_COLOR) { + struct nouveau_surface *s = &to_nouveau_renderbuffer( + fb->_ColorDrawBuffers[0])->surface; + + if (ctx->Color.ColorMask[0][RCOMP]) + clear |= NV20TCL_CLEAR_BUFFERS_COLOR_R; + if (ctx->Color.ColorMask[0][GCOMP]) + clear |= NV20TCL_CLEAR_BUFFERS_COLOR_G; + if (ctx->Color.ColorMask[0][BCOMP]) + clear |= NV20TCL_CLEAR_BUFFERS_COLOR_B; + if (ctx->Color.ColorMask[0][ACOMP]) + clear |= NV20TCL_CLEAR_BUFFERS_COLOR_A; + + BEGIN_RING(chan, kelvin, NV20TCL_CLEAR_VALUE, 1); + OUT_RING(chan, pack_rgba_f(s->format, ctx->Color.ClearColor)); + + buffers &= ~BUFFER_BITS_COLOR; + } + + if (buffers & (BUFFER_BIT_DEPTH | BUFFER_BIT_STENCIL)) { + struct nouveau_surface *s = &to_nouveau_renderbuffer( + fb->_DepthBuffer->Wrapped)->surface; + + if (buffers & BUFFER_BIT_DEPTH && ctx->Depth.Mask) + clear |= NV20TCL_CLEAR_BUFFERS_DEPTH; + if (buffers & BUFFER_BIT_STENCIL && ctx->Stencil.WriteMask[0]) + clear |= NV20TCL_CLEAR_BUFFERS_STENCIL; + + BEGIN_RING(chan, kelvin, NV20TCL_CLEAR_DEPTH_VALUE, 1); + OUT_RING(chan, pack_zs_f(s->format, ctx->Depth.Clear, + ctx->Stencil.Clear)); + + buffers &= ~(BUFFER_BIT_DEPTH | BUFFER_BIT_STENCIL); + } + + BEGIN_RING(chan, kelvin, NV20TCL_CLEAR_BUFFERS, 1); + OUT_RING(chan, clear); + + nouveau_clear(ctx, buffers); +} + +static void nv20_hwctx_init(struct gl_context *ctx) { struct nouveau_channel *chan = context_chan(ctx); @@ -134,10 +187,6 @@ nv20_hwctx_init(struct gl_context *ctx) OUT_RING (chan, 2); if (context_chipset(ctx) >= 0x25) { - BEGIN_RING(chan, kelvin, 0x022c, 2); - OUT_RING (chan, 0x280); - OUT_RING (chan, 0x07d28000); - BEGIN_RING(chan, kelvin, 0x1da4, 1); OUT_RING (chan, 0); } @@ -376,7 +425,8 @@ nv20_context_destroy(struct gl_context *ctx) struct nouveau_context *nctx = to_nouveau_context(ctx); nv04_surface_takedown(ctx); - nv20_render_destroy(ctx); + nv20_swtnl_destroy(ctx); + nv20_vbo_destroy(ctx); nouveau_grobj_free(&nctx->hw.eng3d); @@ -410,6 +460,7 @@ nv20_context_create(struct nouveau_screen *screen, const struct gl_config *visua ctx->Const.MaxTextureUnits = NV20_TEXTURE_UNITS; ctx->Const.MaxTextureMaxAnisotropy = 8; ctx->Const.MaxTextureLodBias = 15; + ctx->Driver.Clear = nv20_clear; /* 2D engine. */ ret = nv04_surface_init(ctx); @@ -428,7 +479,8 @@ nv20_context_create(struct nouveau_screen *screen, const struct gl_config *visua goto fail; nv20_hwctx_init(ctx); - nv20_render_init(ctx); + nv20_vbo_init(ctx); + nv20_swtnl_init(ctx); return ctx; diff --git a/src/mesa/drivers/dri/nouveau/nv20_driver.h b/src/mesa/drivers/dri/nouveau/nv20_driver.h index 7fbe6ccfa68..f2a6097b937 100644 --- a/src/mesa/drivers/dri/nouveau/nv20_driver.h +++ b/src/mesa/drivers/dri/nouveau/nv20_driver.h @@ -39,10 +39,16 @@ extern const struct nouveau_driver nv20_driver; /* nv20_render.c */ void -nv20_render_init(struct gl_context *ctx); +nv20_vbo_init(struct gl_context *ctx); void -nv20_render_destroy(struct gl_context *ctx); +nv20_vbo_destroy(struct gl_context *ctx); + +void +nv20_swtnl_init(struct gl_context *ctx); + +void +nv20_swtnl_destroy(struct gl_context *ctx); /* nv20_state_fb.c */ void diff --git a/src/mesa/drivers/dri/nouveau/nv20_render.c b/src/mesa/drivers/dri/nouveau/nv20_render.c index 6b668544627..dbdb85da203 100644 --- a/src/mesa/drivers/dri/nouveau/nv20_render.c +++ b/src/mesa/drivers/dri/nouveau/nv20_render.c @@ -32,7 +32,7 @@ #define NUM_VERTEX_ATTRS 16 static void -nv20_emit_material(struct gl_context *ctx, struct nouveau_array_state *a, +nv20_emit_material(struct gl_context *ctx, struct nouveau_array *a, const void *v); /* Vertex attribute format. */ @@ -135,13 +135,11 @@ nv20_render_set_format(struct gl_context *ctx) struct nouveau_render_state *render = to_render_state(ctx); struct nouveau_channel *chan = context_chan(ctx); struct nouveau_grobj *kelvin = context_eng3d(ctx); - int i, hw_format; - - for (i = 0; i < NUM_VERTEX_ATTRS; i++) { - int attr = render->map[i]; + int i, attr, hw_format; + FOR_EACH_ATTR(render, i, attr) { if (attr >= 0) { - struct nouveau_array_state *a = &render->attrs[attr]; + struct nouveau_array *a = &render->attrs[attr]; hw_format = a->stride << 8 | a->fields << 4 | @@ -162,33 +160,29 @@ nv20_render_bind_vertices(struct gl_context *ctx) { struct nouveau_render_state *render = to_render_state(ctx); struct nouveau_bo_context *bctx = context_bctx(ctx, VERTEX); - struct nouveau_channel *chan = context_chan(ctx); struct nouveau_grobj *kelvin = context_eng3d(ctx); - int i; + int i, attr; - for (i = 0; i < NUM_VERTEX_ATTRS; i++) { - int attr = render->map[i]; + FOR_EACH_BOUND_ATTR(render, i, attr) { + struct nouveau_array *a = &render->attrs[attr]; - if (attr >= 0) { - struct nouveau_array_state *a = &render->attrs[attr]; - - nouveau_bo_mark(bctx, kelvin, - NV20TCL_VTXBUF_ADDRESS(i), - a->bo, a->offset, 0, - 0, NV20TCL_VTXBUF_ADDRESS_DMA1, - NOUVEAU_BO_LOW | NOUVEAU_BO_OR | - NOUVEAU_BO_GART | NOUVEAU_BO_RD); - } + nouveau_bo_mark(bctx, kelvin, + NV20TCL_VTXBUF_ADDRESS(i), + a->bo, a->offset, 0, + 0, NV20TCL_VTXBUF_ADDRESS_DMA1, + NOUVEAU_BO_LOW | NOUVEAU_BO_OR | + NOUVEAU_BO_GART | NOUVEAU_BO_RD); } - - BEGIN_RING(chan, kelvin, NV20TCL_VTX_CACHE_INVALIDATE, 1); - OUT_RING(chan, 0); } /* Vertex array rendering defs. */ #define RENDER_LOCALS(ctx) \ struct nouveau_grobj *kelvin = context_eng3d(ctx) +#define BATCH_VALIDATE() \ + BEGIN_RING(chan, kelvin, NV20TCL_VTX_CACHE_INVALIDATE, 1); \ + OUT_RING(chan, 0) + #define BATCH_BEGIN(prim) \ BEGIN_RING(chan, kelvin, NV20TCL_VERTEX_BEGIN_END, 1); \ OUT_RING(chan, prim) @@ -223,3 +217,5 @@ nv20_render_bind_vertices(struct gl_context *ctx) #define TAG(x) nv20_##x #include "nouveau_render_t.c" +#include "nouveau_vbo_t.c" +#include "nouveau_swtnl_t.c" diff --git a/src/mesa/drivers/dri/nouveau/nv20_state_fb.c b/src/mesa/drivers/dri/nouveau/nv20_state_fb.c index 7822ca2a098..854392f9ff3 100644 --- a/src/mesa/drivers/dri/nouveau/nv20_state_fb.c +++ b/src/mesa/drivers/dri/nouveau/nv20_state_fb.c @@ -51,6 +51,31 @@ get_rt_format(gl_format format) } } +static void +setup_hierz_buffer(struct gl_context *ctx) +{ + struct nouveau_channel *chan = context_chan(ctx); + struct nouveau_grobj *kelvin = context_eng3d(ctx); + struct nouveau_bo_context *bctx = context_bctx(ctx, HIERZ); + struct gl_framebuffer *fb = ctx->DrawBuffer; + struct nouveau_framebuffer *nfb = to_nouveau_framebuffer(fb); + unsigned pitch = align(fb->Width, 128), + height = align(fb->Height, 2), + size = pitch * height; + + if (!nfb->hierz.bo || nfb->hierz.bo->size != size) { + nouveau_bo_ref(NULL, &nfb->hierz.bo); + nouveau_bo_new(context_dev(ctx), NOUVEAU_BO_VRAM, 0, size, + &nfb->hierz.bo); + } + + BEGIN_RING(chan, kelvin, NV25TCL_HIERZ_PITCH, 1); + OUT_RING(chan, pitch); + + nouveau_bo_markl(bctx, kelvin, NV25TCL_HIERZ_OFFSET, nfb->hierz.bo, + 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR); +} + void nv20_emit_framebuffer(struct gl_context *ctx, int emit) { @@ -88,6 +113,9 @@ nv20_emit_framebuffer(struct gl_context *ctx, int emit) nouveau_bo_markl(bctx, kelvin, NV20TCL_ZETA_OFFSET, s->bo, 0, bo_flags); + + if (context_chipset(ctx) >= 0x25) + setup_hierz_buffer(ctx); } else { rt_format |= get_rt_format(MESA_FORMAT_Z24_S8); zeta_pitch = rt_pitch; diff --git a/src/mesa/drivers/dri/r200/r200_context.c b/src/mesa/drivers/dri/r200/r200_context.c index 723e31401de..5abfc9dac51 100644 --- a/src/mesa/drivers/dri/r200/r200_context.c +++ b/src/mesa/drivers/dri/r200/r200_context.c @@ -71,6 +71,8 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #define need_GL_NV_vertex_program #define need_GL_ARB_point_parameters #define need_GL_EXT_framebuffer_object +#define need_GL_OES_EGL_image + #include "main/remap_helper.h" #define DRIVER_DATE "20060602" @@ -137,6 +139,9 @@ static const struct dri_extension card_extensions[] = { "GL_ATI_texture_mirror_once", NULL }, { "GL_MESA_pack_invert", NULL }, { "GL_NV_blend_square", NULL }, +#if FEATURE_OES_EGL_image + { "GL_OES_EGL_image", GL_OES_EGL_image_functions }, +#endif { NULL, NULL } }; diff --git a/src/mesa/drivers/dri/r200/r200_swtcl.c b/src/mesa/drivers/dri/r200/r200_swtcl.c index 38864162ced..c56a49d5ad6 100644 --- a/src/mesa/drivers/dri/r200/r200_swtcl.c +++ b/src/mesa/drivers/dri/r200/r200_swtcl.c @@ -319,10 +319,9 @@ static INLINE GLuint reduced_hw_prim( struct gl_context *ctx, GLuint prim) { switch (prim) { case GL_POINTS: - return (ctx->Point.PointSprite || - ((ctx->_TriangleCaps & (DD_POINT_SIZE | DD_POINT_ATTEN)) && - !(ctx->_TriangleCaps & (DD_POINT_SMOOTH)))) ? - R200_VF_PRIM_POINT_SPRITES : R200_VF_PRIM_POINTS; + return (((R200_CONTEXT(ctx))->radeon.radeonScreen->drmSupportsPointSprites && + !(ctx->_TriangleCaps & DD_POINT_SMOOTH)) ? + R200_VF_PRIM_POINT_SPRITES : R200_VF_PRIM_POINTS); case GL_LINES: /* fallthrough */ case GL_LINE_LOOP: diff --git a/src/mesa/drivers/dri/r200/r200_tcl.c b/src/mesa/drivers/dri/r200/r200_tcl.c index 84db7c9d4eb..7aed116f0b3 100644 --- a/src/mesa/drivers/dri/r200/r200_tcl.c +++ b/src/mesa/drivers/dri/r200/r200_tcl.c @@ -68,9 +68,8 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #define HAVE_ELTS 1 -#define HW_POINTS ((ctx->Point.PointSprite || \ - ((ctx->_TriangleCaps & (DD_POINT_SIZE | DD_POINT_ATTEN)) && \ - !(ctx->_TriangleCaps & (DD_POINT_SMOOTH)))) ? \ +#define HW_POINTS (((R200_CONTEXT(ctx))->radeon.radeonScreen->drmSupportsPointSprites && \ + !(ctx->_TriangleCaps & DD_POINT_SMOOTH)) ? \ R200_VF_PRIM_POINT_SPRITES : R200_VF_PRIM_POINTS) #define HW_LINES R200_VF_PRIM_LINES #define HW_LINE_LOOP 0 diff --git a/src/mesa/drivers/dri/r200/r200_tex.c b/src/mesa/drivers/dri/r200/r200_tex.c index 5207c2901a3..064324731b5 100644 --- a/src/mesa/drivers/dri/r200/r200_tex.c +++ b/src/mesa/drivers/dri/r200/r200_tex.c @@ -537,6 +537,10 @@ void r200InitTextureFuncs( radeonContextPtr radeon, struct dd_function_table *fu functions->MapTexture = radeonMapTexture; functions->UnmapTexture = radeonUnmapTexture; +#if FEATURE_OES_EGL_image + functions->EGLImageTargetTexture2D = radeon_image_target_texture_2d; +#endif + driInitTextureFormats(); } diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.c b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.c index 5927498818b..fd94194dc34 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.c @@ -465,15 +465,16 @@ static void get_readers_normal_read_callback( { struct get_readers_callback_data * d = userdata; unsigned int read_mask; + unsigned int shared_mask; if (src->RelAddr) d->ReaderData->Abort = 1; - unsigned int shared_mask = rc_src_reads_dst_mask(src->File, src->Index, - src->Swizzle, - d->ReaderData->Writer->U.I.DstReg.File, - d->ReaderData->Writer->U.I.DstReg.Index, - d->AliveWriteMask); + shared_mask = rc_src_reads_dst_mask(src->File, src->Index, + src->Swizzle, + d->ReaderData->Writer->U.I.DstReg.File, + d->ReaderData->Writer->U.I.DstReg.Index, + d->AliveWriteMask); if (shared_mask == RC_MASK_NONE) return; @@ -624,6 +625,9 @@ void rc_get_readers_normal( data->Abort = 1; return; case RC_OPCODE_IF: + /* XXX We can do better here, but this will have to + * do until this dataflow analysis is more mature. */ + data->Abort = 1; branch_depth++; break; case RC_OPCODE_ELSE: diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c b/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c index 5556927357b..15b9c5e7dc3 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c @@ -32,9 +32,11 @@ #include "radeon_compiler_util.h" #include "radeon_swizzle.h" -struct src_clobbered_data { - unsigned int NumSrcRegs; - unsigned int SrcMasks[3]; +struct src_clobbered_reads_cb_data { + rc_register_file File; + unsigned int Index; + unsigned int Mask; + struct rc_reader_data * ReaderData; }; typedef void (*rc_presub_replace_fn)(struct rc_instruction *, @@ -99,6 +101,25 @@ static void copy_propagate_scan_read(void * data, struct rc_instruction * inst, } } +static void src_clobbered_reads_cb( + void * data, + struct rc_instruction * inst, + struct rc_src_register * src) +{ + struct src_clobbered_reads_cb_data * sc_data = data; + + if (src->File == sc_data->File + && src->Index == sc_data->Index + && (rc_swizzle_to_writemask(src->Swizzle) & sc_data->Mask)) { + + sc_data->ReaderData->AbortOnRead = 1; + } + + if (src->RelAddr && sc_data->File == RC_FILE_ADDRESS) { + sc_data->ReaderData->AbortOnRead = 1; + } +} + static void is_src_clobbered_scan_write( void * data, struct rc_instruction * inst, @@ -106,29 +127,19 @@ static void is_src_clobbered_scan_write( unsigned int index, unsigned int mask) { - unsigned int i; + struct src_clobbered_reads_cb_data sc_data; struct rc_reader_data * reader_data = data; - struct src_clobbered_data * d = reader_data->CbData; - for (i = 0; i < d->NumSrcRegs; i++) { - if (file == reader_data->Writer->U.I.SrcReg[i].File - && index == reader_data->Writer->U.I.SrcReg[i].Index - && (mask & d->SrcMasks[i])){ - - reader_data->AbortOnRead = 1; - return; - } - if (reader_data->Writer->U.I.SrcReg[i].RelAddr && - file == RC_FILE_ADDRESS) { - reader_data->AbortOnRead = 1; - return; - } - } + sc_data.File = file; + sc_data.Index = index; + sc_data.Mask = mask; + sc_data.ReaderData = reader_data; + rc_for_all_reads_src(reader_data->Writer, + src_clobbered_reads_cb, &sc_data); } static void copy_propagate(struct radeon_compiler * c, struct rc_instruction * inst_mov) { struct rc_reader_data reader_data; - struct src_clobbered_data sc_data; unsigned int i; if (inst_mov->U.I.DstReg.File != RC_FILE_TEMPORARY || @@ -137,12 +148,6 @@ static void copy_propagate(struct radeon_compiler * c, struct rc_instruction * i inst_mov->U.I.SaturateMode) return; - sc_data.NumSrcRegs = 1; - sc_data.SrcMasks[0] = rc_swizzle_to_writemask( - inst_mov->U.I.SrcReg[0].Swizzle); - - reader_data.CbData = &sc_data; - /* Get a list of all the readers of this MOV instruction. */ rc_get_readers_normal(c, inst_mov, &reader_data, copy_propagate_scan_read, is_src_clobbered_scan_write); @@ -203,8 +208,8 @@ static int is_src_uniform_constant(struct rc_src_register src, static void constant_folding_mad(struct rc_instruction * inst) { - rc_swizzle swz; - unsigned int negate; + rc_swizzle swz = 0; + unsigned int negate= 0; if (is_src_uniform_constant(inst->U.I.SrcReg[2], &swz, &negate)) { if (swz == RC_SWIZZLE_ZERO) { @@ -244,8 +249,8 @@ static void constant_folding_mad(struct rc_instruction * inst) static void constant_folding_mul(struct rc_instruction * inst) { - rc_swizzle swz; - unsigned int negate; + rc_swizzle swz = 0; + unsigned int negate = 0; if (is_src_uniform_constant(inst->U.I.SrcReg[0], &swz, &negate)) { if (swz == RC_SWIZZLE_ONE) { @@ -277,8 +282,8 @@ static void constant_folding_mul(struct rc_instruction * inst) static void constant_folding_add(struct rc_instruction * inst) { - rc_swizzle swz; - unsigned int negate; + rc_swizzle swz = 0; + unsigned int negate = 0; if (is_src_uniform_constant(inst->U.I.SrcReg[0], &swz, &negate)) { if (swz == RC_SWIZZLE_ZERO) { @@ -448,15 +453,8 @@ static int presub_helper( rc_presub_replace_fn presub_replace) { struct rc_reader_data reader_data; - struct src_clobbered_data sc_data; unsigned int i; - sc_data.NumSrcRegs = 2; - sc_data.SrcMasks[0] = rc_swizzle_to_writemask( - inst_add->U.I.SrcReg[0].Swizzle); - sc_data.SrcMasks[1] = rc_swizzle_to_writemask( - inst_add->U.I.SrcReg[1].Swizzle); - reader_data.CbData = &sc_data; rc_get_readers_normal(c, inst_add, &reader_data, presub_scan_read, is_src_clobbered_scan_write); diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_pair_schedule.c b/src/mesa/drivers/dri/r300/compiler/radeon_pair_schedule.c index d4a38607d9e..553e9dcf7c1 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_pair_schedule.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_pair_schedule.c @@ -290,6 +290,7 @@ static int merge_presub_sources( { unsigned int srcp_src, srcp_regs, is_rgb, is_alpha; struct rc_pair_sub_instruction * dst_sub; + const struct rc_opcode_info * info; assert(dst_full->Alpha.Opcode == RC_OPCODE_NOP); @@ -309,8 +310,8 @@ static int merge_presub_sources( return 0; } - const struct rc_opcode_info * info = - rc_get_opcode_info(dst_full->RGB.Opcode); + info = rc_get_opcode_info(dst_full->RGB.Opcode); + if (dst_sub->Src[RC_PAIR_PRESUB_SRC].Used) return 0; diff --git a/src/mesa/drivers/dri/r300/r300_context.c b/src/mesa/drivers/dri/r300/r300_context.c index 9fbd36bfe63..c288834d243 100644 --- a/src/mesa/drivers/dri/r300/r300_context.c +++ b/src/mesa/drivers/dri/r300/r300_context.c @@ -86,6 +86,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #define need_GL_EXT_stencil_two_side #define need_GL_ATI_separate_stencil #define need_GL_NV_vertex_program +#define need_GL_OES_EGL_image #include "main/remap_helper.h" @@ -134,6 +135,9 @@ static const struct dri_extension card_extensions[] = { {"GL_MESAX_texture_float", NULL}, {"GL_NV_blend_square", NULL}, {"GL_NV_vertex_program", GL_NV_vertex_program_functions}, +#if FEATURE_OES_EGL_image + {"GL_OES_EGL_image", GL_OES_EGL_image_functions }, +#endif {NULL, NULL} /* *INDENT-ON* */ }; diff --git a/src/mesa/drivers/dri/r300/r300_draw.c b/src/mesa/drivers/dri/r300/r300_draw.c index 81769e1ee5f..0c4d8537c61 100644 --- a/src/mesa/drivers/dri/r300/r300_draw.c +++ b/src/mesa/drivers/dri/r300/r300_draw.c @@ -717,6 +717,10 @@ static void r300DrawPrims(struct gl_context *ctx, GLuint max_index) { GLboolean retval; + struct r300_context *r300 = R300_CONTEXT(ctx); + radeonContextPtr radeon = &r300->radeon; + + radeon_prepare_render(radeon); /* This check should get folded into just the places that * min/max index are really needed. diff --git a/src/mesa/drivers/dri/r300/r300_render.c b/src/mesa/drivers/dri/r300/r300_render.c index 821318e7a59..44090ec2894 100644 --- a/src/mesa/drivers/dri/r300/r300_render.c +++ b/src/mesa/drivers/dri/r300/r300_render.c @@ -327,8 +327,6 @@ void r300RunRenderPrimitive(struct gl_context * ctx, int start, int end, int pri BATCH_LOCALS(&rmesa->radeon); int type, num_verts; - radeon_prepare_render(&rmesa->radeon); - type = r300PrimitiveType(rmesa, prim); num_verts = r300NumVerts(rmesa, end - start, prim); diff --git a/src/mesa/drivers/dri/r300/r300_tex.c b/src/mesa/drivers/dri/r300/r300_tex.c index a6bda0e4990..de662939992 100644 --- a/src/mesa/drivers/dri/r300/r300_tex.c +++ b/src/mesa/drivers/dri/r300/r300_tex.c @@ -382,5 +382,9 @@ void r300InitTextureFuncs(radeonContextPtr radeon, struct dd_function_table *fun functions->GenerateMipmap = radeonGenerateMipmap; +#if FEATURE_OES_EGL_image + functions->EGLImageTargetTexture2D = radeon_image_target_texture_2d; +#endif + driInitTextureFormats(); } diff --git a/src/mesa/drivers/dri/r600/r600_context.c b/src/mesa/drivers/dri/r600/r600_context.c index c882a9cce9e..b6443bf0c53 100644 --- a/src/mesa/drivers/dri/r600/r600_context.c +++ b/src/mesa/drivers/dri/r600/r600_context.c @@ -94,6 +94,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #define need_GL_EXT_stencil_two_side #define need_GL_ATI_separate_stencil #define need_GL_NV_vertex_program +#define need_GL_OES_EGL_image #include "main/remap_helper.h" @@ -148,6 +149,9 @@ static const struct dri_extension card_extensions[] = { {"GL_NV_vertex_program", GL_NV_vertex_program_functions}, {"GL_ARB_pixel_buffer_object", NULL}, {"GL_ARB_draw_elements_base_vertex", GL_ARB_draw_elements_base_vertex_functions }, +#if FEATURE_OES_EGL_image + {"GL_OES_EGL_image", GL_OES_EGL_image_functions}, +#endif {NULL, NULL} /* *INDENT-ON* */ }; diff --git a/src/mesa/drivers/dri/r600/r600_tex.c b/src/mesa/drivers/dri/r600/r600_tex.c index d6a58f410cc..c3d68c41e57 100644 --- a/src/mesa/drivers/dri/r600/r600_tex.c +++ b/src/mesa/drivers/dri/r600/r600_tex.c @@ -475,5 +475,9 @@ void r600InitTextureFuncs(radeonContextPtr radeon, struct dd_function_table *fun functions->GenerateMipmap = radeonGenerateMipmap; +#if FEATURE_OES_EGL_image + functions->EGLImageTargetTexture2D = radeon_image_target_texture_2d; +#endif + driInitTextureFormats(); } diff --git a/src/mesa/drivers/dri/radeon/radeon_common.c b/src/mesa/drivers/dri/radeon/radeon_common.c index 43a6355ad8b..7361adffcf7 100644 --- a/src/mesa/drivers/dri/radeon/radeon_common.c +++ b/src/mesa/drivers/dri/radeon/radeon_common.c @@ -171,6 +171,10 @@ void radeonSetCliprects(radeonContextPtr radeon) { __DRIdrawable *const drawable = radeon_get_drawable(radeon); __DRIdrawable *const readable = radeon_get_readable(radeon); + + if(drawable == NULL && readable == NULL) + return; + struct radeon_framebuffer *const draw_rfb = drawable->driverPrivate; struct radeon_framebuffer *const read_rfb = readable->driverPrivate; int x_off, y_off; diff --git a/src/mesa/drivers/dri/radeon/radeon_common_context.c b/src/mesa/drivers/dri/radeon/radeon_common_context.c index 40544860b3b..a436ec112cc 100644 --- a/src/mesa/drivers/dri/radeon/radeon_common_context.c +++ b/src/mesa/drivers/dri/radeon/radeon_common_context.c @@ -39,6 +39,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "drirenderbuffer.h" #include "drivers/common/meta.h" #include "main/context.h" +#include "main/framebuffer.h" #include "main/renderbuffer.h" #include "main/state.h" #include "main/simple_list.h" @@ -251,9 +252,9 @@ GLboolean radeonInitContext(radeonContextPtr radeon, radeon->texture_rect_row_align = 512; radeon->texture_compressed_row_align = 512; } else { - radeon->texture_row_align = 256; - radeon->texture_rect_row_align = 256; - radeon->texture_compressed_row_align = 256; + radeon->texture_row_align = radeon->radeonScreen->group_bytes; + radeon->texture_rect_row_align = radeon->radeonScreen->group_bytes; + radeon->texture_compressed_row_align = radeon->radeonScreen->group_bytes; } } else if (IS_R200_CLASS(radeon->radeonScreen) || IS_R100_CLASS(radeon->radeonScreen)) { @@ -379,12 +380,12 @@ GLboolean radeonUnbindContext(__DRIcontext * driContextPriv) static void radeon_make_kernel_renderbuffer_current(radeonContextPtr radeon, - struct radeon_framebuffer *draw) + struct gl_framebuffer *draw) { /* if radeon->fake */ struct radeon_renderbuffer *rb; - if ((rb = (void *)draw->base.Attachment[BUFFER_FRONT_LEFT].Renderbuffer)) { + if ((rb = (void *)draw->Attachment[BUFFER_FRONT_LEFT].Renderbuffer)) { if (!rb->bo) { rb->bo = radeon_bo_open(radeon->radeonScreen->bom, radeon->radeonScreen->frontOffset, @@ -396,7 +397,7 @@ radeon_make_kernel_renderbuffer_current(radeonContextPtr radeon, rb->cpp = radeon->radeonScreen->cpp; rb->pitch = radeon->radeonScreen->frontPitch * rb->cpp; } - if ((rb = (void *)draw->base.Attachment[BUFFER_BACK_LEFT].Renderbuffer)) { + if ((rb = (void *)draw->Attachment[BUFFER_BACK_LEFT].Renderbuffer)) { if (!rb->bo) { rb->bo = radeon_bo_open(radeon->radeonScreen->bom, radeon->radeonScreen->backOffset, @@ -408,7 +409,7 @@ radeon_make_kernel_renderbuffer_current(radeonContextPtr radeon, rb->cpp = radeon->radeonScreen->cpp; rb->pitch = radeon->radeonScreen->backPitch * rb->cpp; } - if ((rb = (void *)draw->base.Attachment[BUFFER_DEPTH].Renderbuffer)) { + if ((rb = (void *)draw->Attachment[BUFFER_DEPTH].Renderbuffer)) { if (!rb->bo) { rb->bo = radeon_bo_open(radeon->radeonScreen->bom, radeon->radeonScreen->depthOffset, @@ -420,7 +421,7 @@ radeon_make_kernel_renderbuffer_current(radeonContextPtr radeon, rb->cpp = radeon->radeonScreen->cpp; rb->pitch = radeon->radeonScreen->depthPitch * rb->cpp; } - if ((rb = (void *)draw->base.Attachment[BUFFER_STENCIL].Renderbuffer)) { + if ((rb = (void *)draw->Attachment[BUFFER_STENCIL].Renderbuffer)) { if (!rb->bo) { rb->bo = radeon_bo_open(radeon->radeonScreen->bom, radeon->radeonScreen->depthOffset, @@ -436,7 +437,7 @@ radeon_make_kernel_renderbuffer_current(radeonContextPtr radeon, static void radeon_make_renderbuffer_current(radeonContextPtr radeon, - struct radeon_framebuffer *draw) + struct gl_framebuffer *draw) { int size = 4096*4096*4; /* if radeon->fake */ @@ -448,7 +449,7 @@ radeon_make_renderbuffer_current(radeonContextPtr radeon, } - if ((rb = (void *)draw->base.Attachment[BUFFER_FRONT_LEFT].Renderbuffer)) { + if ((rb = (void *)draw->Attachment[BUFFER_FRONT_LEFT].Renderbuffer)) { if (!rb->bo) { rb->bo = radeon_bo_open(radeon->radeonScreen->bom, radeon->radeonScreen->frontOffset + @@ -461,7 +462,7 @@ radeon_make_renderbuffer_current(radeonContextPtr radeon, rb->cpp = radeon->radeonScreen->cpp; rb->pitch = radeon->radeonScreen->frontPitch * rb->cpp; } - if ((rb = (void *)draw->base.Attachment[BUFFER_BACK_LEFT].Renderbuffer)) { + if ((rb = (void *)draw->Attachment[BUFFER_BACK_LEFT].Renderbuffer)) { if (!rb->bo) { rb->bo = radeon_bo_open(radeon->radeonScreen->bom, radeon->radeonScreen->backOffset + @@ -474,7 +475,7 @@ radeon_make_renderbuffer_current(radeonContextPtr radeon, rb->cpp = radeon->radeonScreen->cpp; rb->pitch = radeon->radeonScreen->backPitch * rb->cpp; } - if ((rb = (void *)draw->base.Attachment[BUFFER_DEPTH].Renderbuffer)) { + if ((rb = (void *)draw->Attachment[BUFFER_DEPTH].Renderbuffer)) { if (!rb->bo) { rb->bo = radeon_bo_open(radeon->radeonScreen->bom, radeon->radeonScreen->depthOffset + @@ -487,7 +488,7 @@ radeon_make_renderbuffer_current(radeonContextPtr radeon, rb->cpp = radeon->radeonScreen->cpp; rb->pitch = radeon->radeonScreen->depthPitch * rb->cpp; } - if ((rb = (void *)draw->base.Attachment[BUFFER_STENCIL].Renderbuffer)) { + if ((rb = (void *)draw->Attachment[BUFFER_STENCIL].Renderbuffer)) { if (!rb->bo) { rb->bo = radeon_bo_open(radeon->radeonScreen->bom, radeon->radeonScreen->depthOffset + @@ -793,8 +794,8 @@ GLboolean radeonMakeCurrent(__DRIcontext * driContextPriv, __DRIdrawable * driReadPriv) { radeonContextPtr radeon; - struct radeon_framebuffer *drfb; - struct gl_framebuffer *readfb; + struct radeon_framebuffer *rdrfb; + struct gl_framebuffer *drfb, *readfb; if (!driContextPriv) { if (RADEON_DEBUG & RADEON_DRI) @@ -804,17 +805,25 @@ GLboolean radeonMakeCurrent(__DRIcontext * driContextPriv, } radeon = (radeonContextPtr) driContextPriv->driverPrivate; - drfb = driDrawPriv->driverPrivate; - readfb = driReadPriv->driverPrivate; + + if(driDrawPriv == NULL && driReadPriv == NULL) { + drfb = _mesa_create_framebuffer(&radeon->glCtx->Visual); + readfb = drfb; + } + else { + drfb = driDrawPriv->driverPrivate; + readfb = driReadPriv->driverPrivate; + } if (driContextPriv->driScreenPriv->dri2.enabled) { - radeon_update_renderbuffers(driContextPriv, driDrawPriv, GL_FALSE); + if(driDrawPriv) + radeon_update_renderbuffers(driContextPriv, driDrawPriv, GL_FALSE); if (driDrawPriv != driReadPriv) radeon_update_renderbuffers(driContextPriv, driReadPriv, GL_FALSE); _mesa_reference_renderbuffer(&radeon->state.color.rb, - &(radeon_get_renderbuffer(&drfb->base, BUFFER_BACK_LEFT)->base)); + &(radeon_get_renderbuffer(drfb, BUFFER_BACK_LEFT)->base)); _mesa_reference_renderbuffer(&radeon->state.depth.rb, - &(radeon_get_renderbuffer(&drfb->base, BUFFER_DEPTH)->base)); + &(radeon_get_renderbuffer(drfb, BUFFER_DEPTH)->base)); } else { radeon_make_renderbuffer_current(radeon, drfb); } @@ -822,35 +831,40 @@ GLboolean radeonMakeCurrent(__DRIcontext * driContextPriv, if (RADEON_DEBUG & RADEON_DRI) fprintf(stderr, "%s ctx %p dfb %p rfb %p\n", __FUNCTION__, radeon->glCtx, drfb, readfb); - driUpdateFramebufferSize(radeon->glCtx, driDrawPriv); + if(driDrawPriv) + driUpdateFramebufferSize(radeon->glCtx, driDrawPriv); if (driReadPriv != driDrawPriv) driUpdateFramebufferSize(radeon->glCtx, driReadPriv); - _mesa_make_current(radeon->glCtx, &drfb->base, readfb); + _mesa_make_current(radeon->glCtx, drfb, readfb); + if (driDrawPriv == NULL && driReadPriv == NULL) + _mesa_reference_framebuffer(&drfb, NULL); _mesa_update_state(radeon->glCtx); - if (radeon->glCtx->DrawBuffer == &drfb->base) { - if (driDrawPriv->swap_interval == (unsigned)-1) { - int i; - driDrawPriv->vblFlags = - (radeon->radeonScreen->irq != 0) - ? driGetDefaultVBlankFlags(&radeon-> - optionCache) - : VBLANK_FLAG_NO_IRQ; - - driDrawableInitVBlank(driDrawPriv); - drfb->vbl_waited = driDrawPriv->vblSeq; - - for (i = 0; i < 2; i++) { - if (drfb->color_rb[i]) - drfb->color_rb[i]->vbl_pending = driDrawPriv->vblSeq; + if (radeon->glCtx->DrawBuffer == drfb) { + if(driDrawPriv != NULL) { + rdrfb = (struct radeon_framebuffer *)drfb; + if (driDrawPriv->swap_interval == (unsigned)-1) { + int i; + driDrawPriv->vblFlags = + (radeon->radeonScreen->irq != 0) + ? driGetDefaultVBlankFlags(&radeon-> + optionCache) + : VBLANK_FLAG_NO_IRQ; + + driDrawableInitVBlank(driDrawPriv); + rdrfb->vbl_waited = driDrawPriv->vblSeq; + + for (i = 0; i < 2; i++) { + if (rdrfb->color_rb[i]) + rdrfb->color_rb[i]->vbl_pending = driDrawPriv->vblSeq; + } } - + radeon_window_moved(radeon); } - radeon_window_moved(radeon); - radeon_draw_buffer(radeon->glCtx, &drfb->base); + radeon_draw_buffer(radeon->glCtx, drfb); } diff --git a/src/mesa/drivers/dri/radeon/radeon_context.c b/src/mesa/drivers/dri/radeon/radeon_context.c index cc9590213c4..e3de534b5f7 100644 --- a/src/mesa/drivers/dri/radeon/radeon_context.c +++ b/src/mesa/drivers/dri/radeon/radeon_context.c @@ -66,6 +66,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #define need_GL_EXT_fog_coord #define need_GL_EXT_secondary_color #define need_GL_EXT_framebuffer_object +#define need_GL_OES_EGL_image #include "main/remap_helper.h" #define DRIVER_DATE "20061018" @@ -101,6 +102,9 @@ static const struct dri_extension card_extensions[] = { "GL_ATI_texture_mirror_once", NULL }, { "GL_MESA_ycbcr_texture", NULL }, { "GL_NV_blend_square", NULL }, +#if FEATURE_OES_EGL_image + { "GL_OES_EGL_image", GL_OES_EGL_image_functions }, +#endif { NULL, NULL } }; diff --git a/src/mesa/drivers/dri/radeon/radeon_fbo.c b/src/mesa/drivers/dri/radeon/radeon_fbo.c index 2a6fbaeaf09..a36a1dc94ac 100644 --- a/src/mesa/drivers/dri/radeon/radeon_fbo.c +++ b/src/mesa/drivers/dri/radeon/radeon_fbo.c @@ -199,6 +199,48 @@ radeon_alloc_renderbuffer_storage(struct gl_context * ctx, struct gl_renderbuffe } +#if FEATURE_OES_EGL_image +static void +radeon_image_target_renderbuffer_storage(struct gl_context *ctx, + struct gl_renderbuffer *rb, + void *image_handle) +{ + radeonContextPtr radeon = RADEON_CONTEXT(ctx); + struct radeon_renderbuffer *rrb; + __DRIscreen *screen; + __DRIimage *image; + + screen = radeon->radeonScreen->driScreen; + image = screen->dri2.image->lookupEGLImage(screen, image_handle, + screen->loaderPrivate); + if (image == NULL) + return; + + rrb = radeon_renderbuffer(rb); + + if (ctx->Driver.Flush) + ctx->Driver.Flush(ctx); /* +r6/r7 */ + + if (rrb->bo) + radeon_bo_unref(rrb->bo); + rrb->bo = image->bo; + radeon_bo_ref(rrb->bo); + fprintf(stderr, "image->bo: %p, name: %d, rbs: w %d -> p %d\n", image->bo, image->bo->handle, + image->width, image->pitch); + + rrb->cpp = image->cpp; + rrb->pitch = image->pitch * image->cpp; + + rb->Format = image->format; + rb->InternalFormat = image->internal_format; + rb->Width = image->width; + rb->Height = image->height; + rb->Format = image->format; + rb->DataType = image->data_type; + rb->_BaseFormat = _mesa_base_fbo_format(radeon->glCtx, + image->internal_format); +} +#endif /** * Called for each hardware renderbuffer when a _window_ is resized. @@ -622,6 +664,10 @@ void radeon_fbo_init(struct radeon_context *radeon) #if FEATURE_EXT_framebuffer_blit radeon->glCtx->Driver.BlitFramebuffer = _mesa_meta_BlitFramebuffer; #endif +#if FEATURE_OES_EGL_image + radeon->glCtx->Driver.EGLImageTargetRenderbufferStorage = + radeon_image_target_renderbuffer_storage; +#endif } diff --git a/src/mesa/drivers/dri/radeon/radeon_screen.c b/src/mesa/drivers/dri/radeon/radeon_screen.c index 43ebc810939..1ea52f96d7e 100644 --- a/src/mesa/drivers/dri/radeon/radeon_screen.c +++ b/src/mesa/drivers/dri/radeon/radeon_screen.c @@ -41,12 +41,14 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "main/mtypes.h" #include "main/framebuffer.h" #include "main/renderbuffer.h" +#include "main/fbobject.h" #define STANDALONE_MMIO #include "radeon_chipset.h" #include "radeon_macros.h" #include "radeon_screen.h" #include "radeon_common.h" +#include "radeon_common_context.h" #if defined(RADEON_R100) #include "radeon_context.h" #include "radeon_tex.h" @@ -398,6 +400,188 @@ static const struct __DRI2flushExtensionRec radeonFlushExtension = { dri2InvalidateDrawable, }; +static __DRIimage * +radeon_create_image_from_name(__DRIcontext *context, + int width, int height, int format, + int name, int pitch, void *loaderPrivate) +{ + __DRIimage *image; + radeonContextPtr radeon = context->driverPrivate; + + if (name == 0) + return NULL; + + image = CALLOC(sizeof *image); + if (image == NULL) + return NULL; + + switch (format) { + case __DRI_IMAGE_FORMAT_RGB565: + image->format = MESA_FORMAT_RGB565; + image->internal_format = GL_RGB; + image->data_type = GL_UNSIGNED_BYTE; + break; + case __DRI_IMAGE_FORMAT_XRGB8888: + image->format = MESA_FORMAT_XRGB8888; + image->internal_format = GL_RGB; + image->data_type = GL_UNSIGNED_BYTE; + break; + case __DRI_IMAGE_FORMAT_ARGB8888: + image->format = MESA_FORMAT_ARGB8888; + image->internal_format = GL_RGBA; + image->data_type = GL_UNSIGNED_BYTE; + break; + default: + free(image); + return NULL; + } + + image->data = loaderPrivate; + image->cpp = _mesa_get_format_bytes(image->format); + image->width = width; + image->pitch = pitch; + image->height = height; + + image->bo = radeon_bo_open(radeon->radeonScreen->bom, + (uint32_t)name, + image->pitch * image->height * image->cpp, + 0, + RADEON_GEM_DOMAIN_VRAM, + 0); + + if (image->bo == NULL) { + FREE(image); + return NULL; + } + + return image; +} + +static __DRIimage * +radeon_create_image_from_renderbuffer(__DRIcontext *context, + int renderbuffer, void *loaderPrivate) +{ + __DRIimage *image; + radeonContextPtr radeon = context->driverPrivate; + struct gl_renderbuffer *rb; + struct radeon_renderbuffer *rrb; + + rb = _mesa_lookup_renderbuffer(radeon->glCtx, renderbuffer); + if (!rb) { + _mesa_error(radeon->glCtx, + GL_INVALID_OPERATION, "glRenderbufferExternalMESA"); + return NULL; + } + + rrb = radeon_renderbuffer(rb); + image = CALLOC(sizeof *image); + if (image == NULL) + return NULL; + + image->internal_format = rb->InternalFormat; + image->format = rb->Format; + image->cpp = rrb->cpp; + image->data_type = rb->DataType; + image->data = loaderPrivate; + radeon_bo_ref(rrb->bo); + image->bo = rrb->bo; + + image->width = rb->Width; + image->height = rb->Height; + image->pitch = rrb->pitch / image->cpp; + + return image; +} + +static void +radeon_destroy_image(__DRIimage *image) +{ + radeon_bo_unref(image->bo); + FREE(image); +} + +static __DRIimage * +radeon_create_image(__DRIscreen *screen, + int width, int height, int format, + unsigned int use, + void *loaderPrivate) +{ + __DRIimage *image; + radeonScreenPtr radeonScreen = screen->private; + + image = CALLOC(sizeof *image); + if (image == NULL) + return NULL; + + switch (format) { + case __DRI_IMAGE_FORMAT_RGB565: + image->format = MESA_FORMAT_RGB565; + image->internal_format = GL_RGB; + image->data_type = GL_UNSIGNED_BYTE; + break; + case __DRI_IMAGE_FORMAT_XRGB8888: + image->format = MESA_FORMAT_XRGB8888; + image->internal_format = GL_RGB; + image->data_type = GL_UNSIGNED_BYTE; + break; + case __DRI_IMAGE_FORMAT_ARGB8888: + image->format = MESA_FORMAT_ARGB8888; + image->internal_format = GL_RGBA; + image->data_type = GL_UNSIGNED_BYTE; + break; + default: + free(image); + return NULL; + } + + image->data = loaderPrivate; + image->cpp = _mesa_get_format_bytes(image->format); + image->width = width; + image->height = height; + image->pitch = ((image->cpp * image->width + 255) & ~255) / image->cpp; + + image->bo = radeon_bo_open(radeonScreen->bom, + 0, + image->pitch * image->height * image->cpp, + 0, + RADEON_GEM_DOMAIN_VRAM, + 0); + + if (image->bo == NULL) { + FREE(image); + return NULL; + } + + return image; +} + +static GLboolean +radeon_query_image(__DRIimage *image, int attrib, int *value) +{ + switch (attrib) { + case __DRI_IMAGE_ATTRIB_STRIDE: + *value = image->pitch * image->cpp; + return GL_TRUE; + case __DRI_IMAGE_ATTRIB_HANDLE: + *value = image->bo->handle; + return GL_TRUE; + case __DRI_IMAGE_ATTRIB_NAME: + radeon_gem_get_kernel_name(image->bo, (uint32_t *) value); + return GL_TRUE; + default: + return GL_FALSE; + } +} + +static struct __DRIimageExtensionRec radeonImageExtension = { + { __DRI_IMAGE, __DRI_IMAGE_VERSION }, + radeon_create_image_from_name, + radeon_create_image_from_renderbuffer, + radeon_destroy_image, + radeon_create_image, + radeon_query_image +}; + static int radeon_set_screen_flags(radeonScreenPtr screen, int device_id) { screen->device_id = device_id; @@ -1138,6 +1322,8 @@ radeonCreateScreen( __DRIscreen *sPriv ) else screen->chip_flags |= RADEON_CLASS_R600; + /* set group bytes for r6xx+ */ + screen->group_bytes = 256; screen->cpp = dri_priv->bpp / 8; screen->AGPMode = dri_priv->AGPMode; @@ -1382,7 +1568,8 @@ radeonCreateScreen2(__DRIscreen *sPriv) else screen->chip_flags |= RADEON_CLASS_R600; - /* r6xx+ tiling */ + /* r6xx+ tiling, default to 256 group bytes */ + screen->group_bytes = 256; if (IS_R600_CLASS(screen) && (sPriv->drm_version.minor >= 6)) { ret = radeonGetParam(sPriv, RADEON_INFO_TILE_CONFIG, &temp); if (ret) @@ -1507,6 +1694,7 @@ radeonCreateScreen2(__DRIscreen *sPriv) #endif screen->extensions[i++] = &radeonFlushExtension.base; + screen->extensions[i++] = &radeonImageExtension.base; screen->extensions[i++] = NULL; sPriv->extensions = screen->extensions; diff --git a/src/mesa/drivers/dri/radeon/radeon_screen.h b/src/mesa/drivers/dri/radeon/radeon_screen.h index 2b33201a538..417ebf3b067 100644 --- a/src/mesa/drivers/dri/radeon/radeon_screen.h +++ b/src/mesa/drivers/dri/radeon/radeon_screen.h @@ -121,6 +121,17 @@ typedef struct radeon_screen { GLint r7xx_bank_op; } radeonScreenRec, *radeonScreenPtr; +struct __DRIimageRec { + struct radeon_bo *bo; + GLenum internal_format; + GLuint format; + GLenum data_type; + int width, height; /* in pixels */ + int pitch; /* in pixels */ + int cpp; + void *data; +}; + #define IS_R100_CLASS(screen) \ ((screen->chip_flags & RADEON_CLASS_MASK) == RADEON_CLASS_R100) #define IS_R200_CLASS(screen) \ diff --git a/src/mesa/drivers/dri/radeon/radeon_tex.c b/src/mesa/drivers/dri/radeon/radeon_tex.c index d5285e24cd5..83b1d1b1d74 100644 --- a/src/mesa/drivers/dri/radeon/radeon_tex.c +++ b/src/mesa/drivers/dri/radeon/radeon_tex.c @@ -465,5 +465,9 @@ void radeonInitTextureFuncs( radeonContextPtr radeon, struct dd_function_table * functions->MapTexture = radeonMapTexture; functions->UnmapTexture = radeonUnmapTexture; +#if FEATURE_OES_EGL_image + functions->EGLImageTargetTexture2D = radeon_image_target_texture_2d; +#endif + driInitTextureFormats(); } diff --git a/src/mesa/drivers/dri/radeon/radeon_texture.c b/src/mesa/drivers/dri/radeon/radeon_texture.c index 18ccb512d7a..8b1e34fe766 100644 --- a/src/mesa/drivers/dri/radeon/radeon_texture.c +++ b/src/mesa/drivers/dri/radeon/radeon_texture.c @@ -1007,3 +1007,67 @@ unsigned radeonIsFormatRenderable(gl_format mesa_format) return 0; } } + +#if FEATURE_OES_EGL_image +void radeon_image_target_texture_2d(struct gl_context *ctx, GLenum target, + struct gl_texture_object *texObj, + struct gl_texture_image *texImage, + GLeglImageOES image_handle) +{ + radeonContextPtr radeon = RADEON_CONTEXT(ctx); + radeonTexObj *t = radeon_tex_obj(texObj); + radeon_texture_image *radeonImage = get_radeon_texture_image(texImage); + __DRIscreen *screen; + __DRIimage *image; + + screen = radeon->dri.screen; + image = screen->dri2.image->lookupEGLImage(screen, image_handle, + screen->loaderPrivate); + if (image == NULL) + return; + + radeonFreeTexImageData(ctx, texImage); + + texImage->Width = image->width; + texImage->Height = image->height; + texImage->Depth = 1; + texImage->_BaseFormat = GL_RGBA; + texImage->TexFormat = image->format; + texImage->RowStride = image->pitch; + texImage->InternalFormat = image->internal_format; + + if(t->mt) + { + radeon_miptree_unreference(&t->mt); + t->mt = NULL; + } + + /* NOTE: The following is *very* ugly and will probably break. But + I don't know how to deal with it, without creating a whole new + function like radeon_miptree_from_bo() so I'm going with the + easy but error-prone way. */ + + radeon_try_alloc_miptree(radeon, t); + + radeonImage->mtface = _mesa_tex_target_to_face(target); + radeonImage->mtlevel = 0; + radeon_miptree_reference(t->mt, &radeonImage->mt); + + if (t->mt == NULL) + { + radeon_print(RADEON_TEXTURE, RADEON_VERBOSE, + "%s Failed to allocate miptree.\n", __func__); + return; + } + + /* Particularly ugly: this is guaranteed to break, if image->bo is + not of the required size for a miptree. */ + radeon_bo_unref(t->mt->bo); + radeon_bo_ref(image->bo); + t->mt->bo = image->bo; + + if (!radeon_miptree_matches_image(t->mt, &radeonImage->base, + radeonImage->mtface, 0)) + fprintf(stderr, "miptree doesn't match image\n"); +} +#endif diff --git a/src/mesa/drivers/dri/radeon/radeon_texture.h b/src/mesa/drivers/dri/radeon/radeon_texture.h index 9138a7d5548..a1908c6bc72 100644 --- a/src/mesa/drivers/dri/radeon/radeon_texture.h +++ b/src/mesa/drivers/dri/radeon/radeon_texture.h @@ -137,4 +137,11 @@ void radeonCopyTexSubImage2D(struct gl_context *ctx, GLenum target, GLint level, unsigned radeonIsFormatRenderable(gl_format mesa_format); +#if FEATURE_OES_EGL_image +void radeon_image_target_texture_2d(struct gl_context *ctx, GLenum target, + struct gl_texture_object *texObj, + struct gl_texture_image *texImage, + GLeglImageOES image_handle); +#endif + #endif diff --git a/src/mesa/drivers/dri/savage/savage_xmesa.c b/src/mesa/drivers/dri/savage/savage_xmesa.c index b3aaa0e504e..92fb4f44884 100644 --- a/src/mesa/drivers/dri/savage/savage_xmesa.c +++ b/src/mesa/drivers/dri/savage/savage_xmesa.c @@ -50,7 +50,6 @@ #include "savagespan.h" #include "savagetris.h" #include "savageioctl.h" -#include "savage_bci.h" #include "savage_dri.h" |