diff options
Diffstat (limited to 'src/gallium/drivers/r300')
-rw-r--r-- | src/gallium/drivers/r300/r300_blit.c | 74 | ||||
-rw-r--r-- | src/gallium/drivers/r300/r300_chipset.h | 2 | ||||
-rw-r--r-- | src/gallium/drivers/r300/r300_context.c | 38 | ||||
-rw-r--r-- | src/gallium/drivers/r300/r300_context.h | 50 | ||||
-rw-r--r-- | src/gallium/drivers/r300/r300_emit.c | 132 | ||||
-rw-r--r-- | src/gallium/drivers/r300/r300_flush.c | 2 | ||||
-rw-r--r-- | src/gallium/drivers/r300/r300_hyperz.c | 212 | ||||
-rw-r--r-- | src/gallium/drivers/r300/r300_hyperz.h | 35 | ||||
-rw-r--r-- | src/gallium/drivers/r300/r300_reg.h | 4 | ||||
-rw-r--r-- | src/gallium/drivers/r300/r300_render.c | 22 | ||||
-rw-r--r-- | src/gallium/drivers/r300/r300_screen.c | 4 | ||||
-rw-r--r-- | src/gallium/drivers/r300/r300_state.c | 68 | ||||
-rw-r--r-- | src/gallium/drivers/r300/r300_state_derived.c | 24 | ||||
-rw-r--r-- | src/gallium/drivers/r300/r300_texture.c | 60 | ||||
-rw-r--r-- | src/gallium/drivers/r300/r300_texture_desc.c | 118 | ||||
-rw-r--r-- | src/gallium/drivers/r300/r300_winsys.h | 4 |
16 files changed, 383 insertions, 466 deletions
diff --git a/src/gallium/drivers/r300/r300_blit.c b/src/gallium/drivers/r300/r300_blit.c index 4f86db39926..6391ea7f3be 100644 --- a/src/gallium/drivers/r300/r300_blit.c +++ b/src/gallium/drivers/r300/r300_blit.c @@ -22,7 +22,6 @@ #include "r300_context.h" #include "r300_emit.h" -#include "r300_hyperz.h" #include "r300_texture.h" #include "r300_winsys.h" @@ -117,6 +116,14 @@ static boolean r300_fast_zclear_allowed(struct r300_context *r300) return r300_resource(fb->zsbuf->texture)->tex.zmask_dwords[fb->zsbuf->u.tex.level]; } +static boolean r300_hiz_clear_allowed(struct r300_context *r300) +{ + struct pipe_framebuffer_state *fb = + (struct pipe_framebuffer_state*)r300->fb_state.state; + + return r300_resource(fb->zsbuf->texture)->tex.hiz_dwords[fb->zsbuf->u.tex.level]; +} + static uint32_t r300_depth_clear_value(enum pipe_format format, double depth, unsigned stencil) { @@ -134,6 +141,13 @@ static uint32_t r300_depth_clear_value(enum pipe_format format, } } +static uint32_t r300_hiz_clear_value(double depth) +{ + uint32_t r = (uint32_t)(CLAMP(depth, 0, 1) * 255.5); + assert(r <= 255); + return r | (r << 8) | (r << 16) | (r << 24); +} + /* Clear currently bound buffers. */ static void r300_clear(struct pipe_context* pipe, unsigned buffers, @@ -190,8 +204,6 @@ static void r300_clear(struct pipe_context* pipe, (struct pipe_framebuffer_state*)r300->fb_state.state; struct r300_hyperz_state *hyperz = (struct r300_hyperz_state*)r300->hyperz_state.state; - struct r300_resource *zstex = - fb->zsbuf ? r300_resource(fb->zsbuf->texture) : NULL; uint32_t width = fb->width; uint32_t height = fb->height; boolean can_hyperz = r300->rws->get_value(r300->rws, R300_CAN_HYPERZ); @@ -200,20 +212,18 @@ static void r300_clear(struct pipe_context* pipe, /* Enable fast Z clear. * The zbuffer must be in micro-tiled mode, otherwise it locks up. */ if ((buffers & PIPE_CLEAR_DEPTHSTENCIL) && can_hyperz) { - hyperz_dcv = hyperz->zb_depthclearvalue = - r300_depth_clear_value(fb->zsbuf->format, depth, stencil); - if (r300_fast_zclear_allowed(r300)) { + hyperz_dcv = hyperz->zb_depthclearvalue = + r300_depth_clear_value(fb->zsbuf->format, depth, stencil); + r300_mark_atom_dirty(r300, &r300->zmask_clear); buffers &= ~PIPE_CLEAR_DEPTHSTENCIL; } - if (zstex->hiz_mem[fb->zsbuf->u.tex.level]) + if (r300_hiz_clear_allowed(r300)) { + r300->hiz_clear_value = r300_hiz_clear_value(depth); r300_mark_atom_dirty(r300, &r300->hiz_clear); - - /* XXX Change this to r300_mark_atom_dirty(r300, &r300->hyperz_state); - * once hiz offset is constant. */ - r300_mark_fb_state_dirty(r300, R300_CHANGED_HYPERZ_FLAG); + } } /* Enable CBZB clear. */ @@ -240,14 +250,14 @@ static void r300_clear(struct pipe_context* pipe, fb->nr_cbufs, buffers, rgba, depth, stencil); r300_blitter_end(r300); - } else if (r300->zmask_clear.dirty) { + } else if (r300->zmask_clear.dirty || r300->hiz_clear.dirty) { /* Just clear zmask and hiz now, this does not use the standard draw * procedure. */ unsigned dwords; /* Calculate zmask_clear and hiz_clear atom sizes. */ r300_update_hyperz_state(r300); - dwords = r300->zmask_clear.size + + dwords = (r300->zmask_clear.dirty ? r300->zmask_clear.size : 0) + (r300->hiz_clear.dirty ? r300->hiz_clear.size : 0) + r300_get_num_cs_end_dwords(r300); @@ -257,9 +267,11 @@ static void r300_clear(struct pipe_context* pipe, } /* Emit clear packets. */ - r300_emit_zmask_clear(r300, r300->zmask_clear.size, - r300->zmask_clear.state); - r300->zmask_clear.dirty = FALSE; + if (r300->zmask_clear.dirty) { + r300_emit_zmask_clear(r300, r300->zmask_clear.size, + r300->zmask_clear.state); + r300->zmask_clear.dirty = FALSE; + } if (r300->hiz_clear.dirty) { r300_emit_hiz_clear(r300, r300->hiz_clear.size, r300->hiz_clear.state); @@ -279,9 +291,8 @@ static void r300_clear(struct pipe_context* pipe, /* Enable fastfill and/or hiz. * * If we cleared zmask/hiz, it's in use now. The Hyper-Z state update - * looks if zmask/hiz is in use and enables fastfill accordingly. */ - if (r300->zmask_in_use || - (zstex && zstex->hiz_in_use[fb->zsbuf->u.tex.level])) { + * looks if zmask/hiz is in use and programs hardware accordingly. */ + if (r300->zmask_in_use || r300->hiz_in_use) { r300_mark_atom_dirty(r300, &r300->hyperz_state); } } @@ -295,7 +306,7 @@ static void r300_clear_render_target(struct pipe_context *pipe, { struct r300_context *r300 = r300_context(pipe); - r300->zmask_locked = TRUE; + r300->hyperz_locked = TRUE; r300_mark_atom_dirty(r300, &r300->hyperz_state); r300_blitter_begin(r300, R300_CLEAR_SURFACE); @@ -303,7 +314,7 @@ static void r300_clear_render_target(struct pipe_context *pipe, dstx, dsty, width, height); r300_blitter_end(r300); - r300->zmask_locked = FALSE; + r300->hyperz_locked = FALSE; r300_mark_atom_dirty(r300, &r300->hyperz_state); } @@ -320,11 +331,11 @@ static void r300_clear_depth_stencil(struct pipe_context *pipe, struct pipe_framebuffer_state *fb = (struct pipe_framebuffer_state*)r300->fb_state.state; - if (r300->zmask_in_use && !r300->zmask_locked) { + if (r300->zmask_in_use && !r300->hyperz_locked) { if (fb->zsbuf->texture == dst->texture) { r300_decompress_zmask(r300); } else { - r300->zmask_locked = TRUE; + r300->hyperz_locked = TRUE; r300_mark_atom_dirty(r300, &r300->hyperz_state); } } @@ -334,8 +345,8 @@ static void r300_clear_depth_stencil(struct pipe_context *pipe, dstx, dsty, width, height); r300_blitter_end(r300); - if (r300->zmask_locked) { - r300->zmask_locked = FALSE; + if (r300->hyperz_locked) { + r300->hyperz_locked = FALSE; r300_mark_atom_dirty(r300, &r300->hyperz_state); } } @@ -345,7 +356,7 @@ void r300_decompress_zmask(struct r300_context *r300) struct pipe_framebuffer_state *fb = (struct pipe_framebuffer_state*)r300->fb_state.state; - if (!r300->zmask_in_use || r300->zmask_locked) + if (!r300->zmask_in_use || r300->hyperz_locked) return; r300->zmask_decompress = TRUE; @@ -420,12 +431,12 @@ static void r300_resource_copy_region(struct pipe_context *pipe, util_format_description(dst->format); struct pipe_box box; - if (r300->zmask_in_use && !r300->zmask_locked) { + if (r300->zmask_in_use && !r300->hyperz_locked) { if (fb->zsbuf->texture == src || fb->zsbuf->texture == dst) { r300_decompress_zmask(r300); } else { - r300->zmask_locked = TRUE; + r300->hyperz_locked = TRUE; r300_mark_atom_dirty(r300, &r300->hyperz_state); } } @@ -463,7 +474,8 @@ static void r300_resource_copy_region(struct pipe_context *pipe, } /* Handle compressed formats. */ - if (desc->layout == UTIL_FORMAT_LAYOUT_S3TC) { + if (desc->layout == UTIL_FORMAT_LAYOUT_S3TC || + desc->layout == UTIL_FORMAT_LAYOUT_RGTC) { switch (util_format_get_blocksize(old_dst.format)) { case 8: /* 1 pixel = 4 bits, @@ -502,8 +514,8 @@ static void r300_resource_copy_region(struct pipe_context *pipe, if (old_dst.format != new_dst.format) r300_resource_set_properties(pipe->screen, dst, 0, &old_dst); - if (r300->zmask_locked) { - r300->zmask_locked = FALSE; + if (r300->hyperz_locked) { + r300->hyperz_locked = FALSE; r300_mark_atom_dirty(r300, &r300->hyperz_state); } } diff --git a/src/gallium/drivers/r300/r300_chipset.h b/src/gallium/drivers/r300/r300_chipset.h index 0be161fa07a..68943d561ba 100644 --- a/src/gallium/drivers/r300/r300_chipset.h +++ b/src/gallium/drivers/r300/r300_chipset.h @@ -90,8 +90,6 @@ struct r300_capabilities { boolean high_second_pipe; /* DXTC texture swizzling. */ boolean dxtc_swizzle; - /* Index bias (AKA index offset). */ - boolean index_bias_supported; }; /* Enumerations for legibility and telling which card we're running on. */ diff --git a/src/gallium/drivers/r300/r300_context.c b/src/gallium/drivers/r300/r300_context.c index d422ffe03f8..166d965aa5b 100644 --- a/src/gallium/drivers/r300/r300_context.c +++ b/src/gallium/drivers/r300/r300_context.c @@ -30,7 +30,6 @@ #include "r300_cb.h" #include "r300_context.h" #include "r300_emit.h" -#include "r300_hyperz.h" #include "r300_screen.h" #include "r300_screen_buffer.h" #include "r300_winsys.h" @@ -170,7 +169,6 @@ static boolean r300_setup_atoms(struct r300_context* r300) boolean is_rv350 = r300->screen->caps.is_rv350; boolean is_r500 = r300->screen->caps.is_r500; boolean has_tcl = r300->screen->caps.has_tcl; - boolean drm_2_3_0 = r300->rws->get_value(r300->rws, R300_VID_DRM_2_3_0); boolean drm_2_6_0 = r300->rws->get_value(r300->rws, R300_VID_DRM_2_6_0); boolean can_hyperz = r300->rws->get_value(r300->rws, R300_CAN_HYPERZ); boolean has_hiz_ram = r300->screen->caps.hiz_ram > 0; @@ -203,11 +201,11 @@ static boolean r300_setup_atoms(struct r300_context* r300) /* SC. */ R300_INIT_ATOM(scissor_state, 3); /* GB, FG, GA, SU, SC, RB3D. */ - R300_INIT_ATOM(invariant_state, 18 + (is_rv350 ? 4 : 0)); + R300_INIT_ATOM(invariant_state, 18 + (is_rv350 ? 4 : 0) + (is_r500 ? 4 : 0)); /* VAP. */ R300_INIT_ATOM(viewport_state, 9); R300_INIT_ATOM(pvs_flush, 2); - R300_INIT_ATOM(vap_invariant_state, 9); + R300_INIT_ATOM(vap_invariant_state, is_r500 ? 11 : 9); R300_INIT_ATOM(vertex_stream_state, 0); R300_INIT_ATOM(vs_state, 0); R300_INIT_ATOM(vs_constants, 0); @@ -216,7 +214,7 @@ static boolean r300_setup_atoms(struct r300_context* r300) R300_INIT_ATOM(rs_block_state, 0); R300_INIT_ATOM(rs_state, 0); /* SC, US. */ - R300_INIT_ATOM(fb_state_pipelined, 5 + (drm_2_3_0 ? 3 : 0)); + R300_INIT_ATOM(fb_state_pipelined, 8); /* US. */ R300_INIT_ATOM(fs, 0); R300_INIT_ATOM(fs_rc_constant_state, 0); @@ -227,7 +225,7 @@ static boolean r300_setup_atoms(struct r300_context* r300) if (can_hyperz) { /* HiZ Clear */ if (has_hiz_ram) - R300_INIT_ATOM(hiz_clear, 0); + R300_INIT_ATOM(hiz_clear, 4); /* zmask clear */ R300_INIT_ATOM(zmask_clear, 4); } @@ -331,7 +329,7 @@ static void r300_init_states(struct pipe_context *pipe) /* Initialize the VAP invariant state. */ { - BEGIN_CB(vap_invariant->cb, 9); + BEGIN_CB(vap_invariant->cb, r300->vap_invariant_state.size); OUT_CB_REG(VAP_PVS_VTX_TIMEOUT_REG, 0xffff); OUT_CB_REG_SEQ(R300_VAP_GB_VERT_CLIP_ADJ, 4); OUT_CB_32F(1.0); @@ -339,6 +337,10 @@ static void r300_init_states(struct pipe_context *pipe) OUT_CB_32F(1.0); OUT_CB_32F(1.0); OUT_CB_REG(R300_VAP_PSC_SGN_NORM_CNTL, R300_SGN_NORM_NO_ZERO); + + if (r300->screen->caps.is_r500) { + OUT_CB_REG(R500_VAP_TEX_TO_COLOR_CNTL, 0); + } END_CB; } @@ -359,6 +361,11 @@ static void r300_init_states(struct pipe_context *pipe) OUT_CB_REG(R500_RB3D_DISCARD_SRC_PIXEL_LTE_THRESHOLD, 0x01010101); OUT_CB_REG(R500_RB3D_DISCARD_SRC_PIXEL_GTE_THRESHOLD, 0xFEFEFEFE); } + + if (r300->screen->caps.is_r500) { + OUT_CB_REG(R500_GA_COLOR_CONTROL_PS3, 0); + OUT_CB_REG(R500_SU_TEX_WRAP_PS3, 0); + } END_CB; } @@ -447,16 +454,10 @@ struct pipe_context* r300_create_context(struct pipe_screen* screen, /* Render functions must be initialized after blitter. */ r300_init_render_functions(r300); + r300_init_states(&r300->context); rws->cs_set_flush(r300->cs, r300_flush_cb, r300); - /* setup hyper-z mm */ - if (r300->rws->get_value(r300->rws, R300_CAN_HYPERZ)) - if (!r300_hyperz_init_mm(r300)) - goto fail; - - r300_init_states(&r300->context); - /* The KIL opcode needs the first texture unit to be enabled * on r3xx-r4xx. In order to calm down the CS checker, we bind this * dummy texture there. */ @@ -507,10 +508,10 @@ struct pipe_context* r300_create_context(struct pipe_screen* screen, } /* Print driver info. */ -#ifdef NDEBUG - if (DBG_ON(r300, DBG_INFO)) { -#else +#ifdef DEBUG { +#else + if (DBG_ON(r300, DBG_INFO)) { #endif fprintf(stderr, "r300: DRM version: %d.%d.%d, Name: %s, ID: 0x%04x, GB: %d, Z: %d\n" @@ -526,7 +527,8 @@ struct pipe_context* r300_create_context(struct pipe_screen* screen, rws->get_value(rws, R300_VID_GART_SIZE) >> 20, rws->get_value(rws, R300_VID_VRAM_SIZE) >> 20, rws->get_value(rws, R300_CAN_AACOMPRESS) ? "YES" : "NO", - rws->get_value(rws, R300_CAN_HYPERZ) ? "YES" : "NO", + rws->get_value(rws, R300_CAN_HYPERZ) && + r300->screen->caps.zmask_ram ? "YES" : "NO", rws->get_value(rws, R300_CAN_HYPERZ) && r300->screen->caps.hiz_ram ? "YES" : "NO"); } diff --git a/src/gallium/drivers/r300/r300_context.h b/src/gallium/drivers/r300/r300_context.h index e9c7d7bf63f..6f2aab69ab1 100644 --- a/src/gallium/drivers/r300/r300_context.h +++ b/src/gallium/drivers/r300/r300_context.h @@ -102,7 +102,6 @@ struct r300_dsa_state { }; struct r300_hyperz_state { - int current_func; /* -1 after a clear before first op */ int flush; /* This is actually a command buffer with named dwords. */ uint32_t cb_flush_begin; @@ -220,11 +219,11 @@ struct r300_vertex_stream_state { }; struct r300_invariant_state { - uint32_t cb[22]; + uint32_t cb[26]; }; struct r300_vap_invariant_state { - uint32_t cb[9]; + uint32_t cb[11]; }; struct r300_viewport_state { @@ -295,6 +294,8 @@ struct r300_surface { uint32_t offset; /* COLOROFFSET or DEPTHOFFSET. */ uint32_t pitch; /* COLORPITCH or DEPTHPITCH. */ + uint32_t pitch_zmask; /* ZMASK_PITCH */ + uint32_t pitch_hiz; /* HIZ_PITCH */ uint32_t format; /* US_OUT_FMT or ZB_FORMAT. */ /* Parameters dedicated to the CBZB clear. */ @@ -363,8 +364,12 @@ struct r300_texture_desc { /* Zbuffer compression info for each miplevel. */ boolean zcomp8x8[R300_MAX_TEXTURE_LEVELS]; - /* If zero, then disable compression. */ + /* If zero, then disable Z compression/HiZ. */ unsigned zmask_dwords[R300_MAX_TEXTURE_LEVELS]; + unsigned hiz_dwords[R300_MAX_TEXTURE_LEVELS]; + /* Zmask/HiZ strides for each miplevel. */ + unsigned zmask_stride_in_pixels[R300_MAX_TEXTURE_LEVELS]; + unsigned hiz_stride_in_pixels[R300_MAX_TEXTURE_LEVELS]; }; struct r300_resource @@ -390,10 +395,6 @@ struct r300_resource /* Where the texture starts in the buffer. */ unsigned tex_offset; - /* HiZ memory allocations. */ - struct mem_block *hiz_mem[R300_MAX_TEXTURE_LEVELS]; - boolean hiz_in_use[R300_MAX_TEXTURE_LEVELS]; - /* This is the level tiling flags were last time set for. * It's used to prevent redundant tiling-flags changes from happening.*/ unsigned surface_level; @@ -412,6 +413,21 @@ struct r300_vertex_element_state { struct r300_vertex_stream_state vertex_stream; }; +enum r300_hiz_func { + HIZ_FUNC_NONE, + + /* The function, when determined, is set in stone + * until the next HiZ clear. */ + + /* MAX is written to the HiZ buffer. + * Used for LESS, LEQUAL. */ + HIZ_FUNC_MAX, + + /* MIN is written to the HiZ buffer. + * Used for GREATER, GEQUAL. */ + HIZ_FUNC_MIN, +}; + struct r300_context { /* Parent class */ struct pipe_context context; @@ -545,22 +561,25 @@ struct r300_context { int sprite_coord_enable; /* Whether two-sided color selection is enabled (AKA light_twoside). */ boolean two_sided_color; - + /* Whether fast color clear is enabled. */ boolean cbzb_clear; /* Whether ZMASK is enabled. */ boolean zmask_in_use; /* Whether ZMASK is being decompressed. */ boolean zmask_decompress; - /* Whether ZMASK is locked, i.e. should be disabled and cannot be taken over. */ - boolean zmask_locked; + /* Whether ZMASK/HIZ is locked, i.e. should be disabled and cannot be taken over. */ + boolean hyperz_locked; /* The zbuffer the ZMASK of which is locked. */ struct pipe_surface *locked_zbuffer; + /* Whether HIZ is enabled. */ + boolean hiz_in_use; + /* HiZ function. Can be either MIN or MAX. */ + enum r300_hiz_func hiz_func; + /* HiZ clear value. */ + uint32_t hiz_clear_value; void *dsa_decompress_zmask; - /* two mem block managers for hiz/zmask ram space */ - struct mem_block *hiz_mm; - struct u_vbuf_mgr *vbuf_mgr; struct util_slab_mempool pool_transfers; @@ -644,6 +663,9 @@ void r300_decompress_zmask(struct r300_context *r300); void r300_decompress_zmask_locked_unsafe(struct r300_context *r300); void r300_decompress_zmask_locked(struct r300_context *r300); +/* r300_hyperz.c */ +void r300_update_hyperz_state(struct r300_context* r300); + /* r300_query.c */ void r300_resume_query(struct r300_context *r300, struct r300_query *query); diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c index e2e4719ec82..24c82a3efd2 100644 --- a/src/gallium/drivers/r300/r300_emit.c +++ b/src/gallium/drivers/r300/r300_emit.c @@ -425,27 +425,12 @@ void r300_emit_fb_state(struct r300_context* r300, unsigned size, void* state) OUT_CS_RELOC(surf); if (can_hyperz) { - uint32_t surf_pitch; - struct r300_resource *tex; - int level = surf->base.u.tex.level; - tex = r300_resource(surf->base.texture); - - surf_pitch = surf->pitch & R300_DEPTHPITCH_MASK; - /* HiZ RAM. */ - if (r300->screen->caps.hiz_ram) { - if (tex->hiz_mem[level]) { - OUT_CS_REG(R300_ZB_HIZ_OFFSET, tex->hiz_mem[level]->ofs << 2); - OUT_CS_REG(R300_ZB_HIZ_PITCH, surf_pitch); - } else { - OUT_CS_REG(R300_ZB_HIZ_OFFSET, 0); - OUT_CS_REG(R300_ZB_HIZ_PITCH, 0); - } - } - + OUT_CS_REG(R300_ZB_HIZ_OFFSET, 0); + OUT_CS_REG(R300_ZB_HIZ_PITCH, surf->pitch_hiz); /* Z Mask RAM. (compressed zbuffer) */ OUT_CS_REG(R300_ZB_ZMASK_OFFSET, 0); - OUT_CS_REG(R300_ZB_ZMASK_PITCH, surf_pitch); + OUT_CS_REG(R300_ZB_ZMASK_PITCH, surf->pitch_zmask); } } @@ -484,6 +469,7 @@ void r300_emit_fb_state_pipelined(struct r300_context *r300, struct pipe_framebuffer_state* fb = (struct pipe_framebuffer_state*)r300->fb_state.state; unsigned i, num_cbufs = fb->nr_cbufs; + unsigned mspos0, mspos1; CS_LOCALS(r300); /* If we use the multiwrite feature, the colorbuffers 2,3,4 must be @@ -507,38 +493,36 @@ void r300_emit_fb_state_pipelined(struct r300_context *r300, /* Multisampling. Depends on framebuffer sample count. * These are pipelined regs and as such cannot be moved * to the AA state. */ - if (r300->rws->get_value(r300->rws, R300_VID_DRM_2_3_0)) { - unsigned mspos0 = 0x66666666; - unsigned mspos1 = 0x6666666; - - if (fb->nr_cbufs && fb->cbufs[0]->texture->nr_samples > 1) { - /* Subsample placement. These may not be optimal. */ - switch (fb->cbufs[0]->texture->nr_samples) { - case 2: - mspos0 = 0x33996633; - mspos1 = 0x6666663; - break; - case 3: - mspos0 = 0x33936933; - mspos1 = 0x6666663; - break; - case 4: - mspos0 = 0x33939933; - mspos1 = 0x3966663; - break; - case 6: - mspos0 = 0x22a2aa22; - mspos1 = 0x2a65672; - break; - default: - debug_printf("r300: Bad number of multisamples!\n"); - } - } + mspos0 = 0x66666666; + mspos1 = 0x6666666; - OUT_CS_REG_SEQ(R300_GB_MSPOS0, 2); - OUT_CS(mspos0); - OUT_CS(mspos1); + if (fb->nr_cbufs && fb->cbufs[0]->texture->nr_samples > 1) { + /* Subsample placement. These may not be optimal. */ + switch (fb->cbufs[0]->texture->nr_samples) { + case 2: + mspos0 = 0x33996633; + mspos1 = 0x6666663; + break; + case 3: + mspos0 = 0x33936933; + mspos1 = 0x6666663; + break; + case 4: + mspos0 = 0x33939933; + mspos1 = 0x3966663; + break; + case 6: + mspos0 = 0x22a2aa22; + mspos1 = 0x2a65672; + break; + default: + debug_printf("r300: Bad number of multisamples!\n"); + } } + + OUT_CS_REG_SEQ(R300_GB_MSPOS0, 2); + OUT_CS(mspos0); + OUT_CS(mspos1); END_CS; } @@ -1039,56 +1023,26 @@ void r300_emit_viewport_state(struct r300_context* r300, END_CS; } -static void r300_emit_hiz_line_clear(struct r300_context *r300, int start, uint16_t count, uint32_t val) -{ - CS_LOCALS(r300); - BEGIN_CS(4); - OUT_CS_PKT3(R300_PACKET3_3D_CLEAR_HIZ, 2); - OUT_CS(start); - OUT_CS(count); - OUT_CS(val); - END_CS; -} - -#define ALIGN_DIVUP(x, y) (((x) + (y) - 1) / (y)) - void r300_emit_hiz_clear(struct r300_context *r300, unsigned size, void *state) { struct pipe_framebuffer_state *fb = (struct pipe_framebuffer_state*)r300->fb_state.state; - struct r300_hyperz_state *z = - (struct r300_hyperz_state*)r300->hyperz_state.state; - struct r300_screen* r300screen = r300->screen; - uint32_t stride, offset = 0, height, offset_shift; struct r300_resource* tex; - int i; + CS_LOCALS(r300); tex = r300_resource(fb->zsbuf->texture); - offset = tex->hiz_mem[fb->zsbuf->u.tex.level]->ofs; - stride = tex->tex.stride_in_pixels[fb->zsbuf->u.tex.level]; - - /* convert from pixels to 4x4 blocks */ - stride = ALIGN_DIVUP(stride, 4); - - stride = ALIGN_DIVUP(stride, r300screen->caps.num_frag_pipes); - /* there are 4 blocks per dwords */ - stride = ALIGN_DIVUP(stride, 4); - - height = ALIGN_DIVUP(fb->zsbuf->height, 4); - - offset_shift = 2; - offset_shift += (r300screen->caps.num_frag_pipes / 2); - - for (i = 0; i < height; i++) { - offset = i * stride; - offset <<= offset_shift; - r300_emit_hiz_line_clear(r300, offset, stride, 0xffffffff); - } - z->current_func = -1; + BEGIN_CS(size); + OUT_CS_PKT3(R300_PACKET3_3D_CLEAR_HIZ, 2); + OUT_CS(0); + OUT_CS(tex->tex.hiz_dwords[fb->zsbuf->u.tex.level]); + OUT_CS(r300->hiz_clear_value); + END_CS; /* Mark the current zbuffer's hiz ram as in use. */ - tex->hiz_in_use[fb->zsbuf->u.tex.level] = TRUE; + r300->hiz_in_use = TRUE; + r300->hiz_func = HIZ_FUNC_NONE; + r300_mark_atom_dirty(r300, &r300->hyperz_state); } void r300_emit_zmask_clear(struct r300_context *r300, unsigned size, void *state) @@ -1236,7 +1190,7 @@ unsigned r300_get_num_cs_end_dwords(struct r300_context *r300) /* Emitted in flush. */ dwords += 26; /* emit_query_end */ dwords += r300->hyperz_state.size + 2; /* emit_hyperz_end + zcache flush */ - if (r300->screen->caps.index_bias_supported) + if (r300->screen->caps.is_r500) dwords += 2; return dwords; diff --git a/src/gallium/drivers/r300/r300_flush.c b/src/gallium/drivers/r300/r300_flush.c index c77cc08539d..9c41a1383ce 100644 --- a/src/gallium/drivers/r300/r300_flush.c +++ b/src/gallium/drivers/r300/r300_flush.c @@ -57,7 +57,7 @@ static void r300_flush(struct pipe_context* pipe, if (r300->dirty_hw) { r300_emit_hyperz_end(r300); r300_emit_query_end(r300); - if (r300->screen->caps.index_bias_supported) + if (r300->screen->caps.is_r500) r500_emit_index_bias(r300, 0); r300->flush_counter++; diff --git a/src/gallium/drivers/r300/r300_hyperz.c b/src/gallium/drivers/r300/r300_hyperz.c index 873e0209d42..ecaadf4af8e 100644 --- a/src/gallium/drivers/r300/r300_hyperz.c +++ b/src/gallium/drivers/r300/r300_hyperz.c @@ -22,7 +22,6 @@ * USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include "r300_context.h" -#include "r300_hyperz.h" #include "r300_reg.h" #include "r300_fs.h" #include "r300_winsys.h" @@ -41,58 +40,74 @@ /* The HyperZ setup */ /*****************************************************************************/ -static bool r300_get_sc_hz_max(struct r300_context *r300) +static enum r300_hiz_func r300_get_hiz_func(struct r300_context *r300) { - struct r300_dsa_state *dsa_state = r300->dsa_state.state; - int func = dsa_state->z_stencil_control & R300_ZS_MASK; - int ret = R300_SC_HYPERZ_MIN; + struct r300_dsa_state *dsa = r300->dsa_state.state; - if (func >= R300_ZS_GEQUAL && func <= R300_ZS_ALWAYS) - ret = R300_SC_HYPERZ_MAX; - return ret; + if (!dsa->dsa.depth.enabled || !dsa->dsa.depth.writemask) + return HIZ_FUNC_NONE; + + switch (dsa->dsa.depth.func) { + case PIPE_FUNC_NEVER: + case PIPE_FUNC_EQUAL: + case PIPE_FUNC_NOTEQUAL: + case PIPE_FUNC_ALWAYS: + return HIZ_FUNC_NONE; + + case PIPE_FUNC_LESS: + case PIPE_FUNC_LEQUAL: + return HIZ_FUNC_MAX; + + case PIPE_FUNC_GREATER: + case PIPE_FUNC_GEQUAL: + return HIZ_FUNC_MIN; + + default: + assert(0); + return HIZ_FUNC_NONE; + } } -static bool r300_zfunc_same_direction(int func1, int func2) +/* Return what's used for the depth test (either minimum or maximum). */ +static unsigned r300_get_sc_hz_max(struct r300_context *r300) { - /* func1 is less/lessthan */ - if ((func1 == R300_ZS_LESS || func1 == R300_ZS_LEQUAL) && - (func2 == R300_ZS_EQUAL || func2 == R300_ZS_GEQUAL || - func2 == R300_ZS_GREATER)) - return FALSE; - - /* func1 is greater/greaterthan */ - if ((func1 == R300_ZS_GEQUAL || func1 == R300_ZS_GREATER) && - (func2 == R300_ZS_LESS || func2 == R300_ZS_LEQUAL)) - return FALSE; + struct r300_dsa_state *dsa = r300->dsa_state.state; + unsigned func = dsa->dsa.depth.func; - return TRUE; + return func >= PIPE_FUNC_GREATER ? R300_SC_HYPERZ_MAX : R300_SC_HYPERZ_MIN; } -static int r300_get_hiz_min(struct r300_context *r300) +static boolean r300_is_hiz_func_valid(struct r300_context *r300) { - struct r300_dsa_state *dsa_state = r300->dsa_state.state; - int func = dsa_state->z_stencil_control & R300_ZS_MASK; - int ret = R300_HIZ_MIN; + struct r300_dsa_state *dsa = r300->dsa_state.state; + unsigned func = dsa->dsa.depth.func; + + if (r300->hiz_func == HIZ_FUNC_NONE) + return TRUE; + + /* func1 is less/lessthan */ + if (r300->hiz_func == HIZ_FUNC_MAX && + (func == PIPE_FUNC_GEQUAL || func == PIPE_FUNC_GREATER)) + return FALSE; - if (func == R300_ZS_LESS || func == R300_ZS_LEQUAL) - ret = R300_HIZ_MAX; - return ret; + /* func1 is greater/greaterthan */ + if (r300->hiz_func == HIZ_FUNC_MIN && + (func == PIPE_FUNC_LESS || func == PIPE_FUNC_LEQUAL)) + return FALSE; + + return TRUE; } static boolean r300_dsa_stencil_op_not_keep(struct pipe_stencil_state *s) { - if (s->enabled && (s->fail_op != PIPE_STENCIL_OP_KEEP || - s->zfail_op != PIPE_STENCIL_OP_KEEP)) - return TRUE; - return FALSE; + return s->enabled && (s->fail_op != PIPE_STENCIL_OP_KEEP || + s->zfail_op != PIPE_STENCIL_OP_KEEP); } static boolean r300_can_hiz(struct r300_context *r300) { - struct r300_dsa_state *dsa_state = r300->dsa_state.state; - struct pipe_depth_stencil_alpha_state *dsa = &dsa_state->dsa; - struct r300_screen* r300screen = r300->screen; - struct r300_hyperz_state *z = r300->hyperz_state.state; + struct r300_dsa_state *dsa = r300->dsa_state.state; + struct r300_screen *r300screen = r300->screen; /* shader writes depth - no HiZ */ if (r300_fragment_shader_writes_depth(r300_fs(r300))) /* (5) */ @@ -100,34 +115,21 @@ static boolean r300_can_hiz(struct r300_context *r300) if (r300->query_current) return FALSE; + /* if stencil fail/zfail op is not KEEP */ - if (r300_dsa_stencil_op_not_keep(&dsa->stencil[0]) || - r300_dsa_stencil_op_not_keep(&dsa->stencil[1])) + if (r300_dsa_stencil_op_not_keep(&dsa->dsa.stencil[0]) || + r300_dsa_stencil_op_not_keep(&dsa->dsa.stencil[1])) return FALSE; - if (dsa->depth.enabled) { + if (dsa->dsa.depth.enabled) { /* if depth func is EQUAL pre-r500 */ - if (dsa->depth.func == PIPE_FUNC_EQUAL && !r300screen->caps.is_r500) + if (dsa->dsa.depth.func == PIPE_FUNC_EQUAL && !r300screen->caps.is_r500) return FALSE; + /* if depth func is NOTEQUAL */ - if (dsa->depth.func == PIPE_FUNC_NOTEQUAL) + if (dsa->dsa.depth.func == PIPE_FUNC_NOTEQUAL) return FALSE; } - /* depth comparison function - if just cleared save and return okay */ - if (z->current_func == -1) { - int func = dsa_state->z_stencil_control & R300_ZS_MASK; - if (func != 0 && func != 7) - z->current_func = dsa_state->z_stencil_control & R300_ZS_MASK; - } else { - /* simple don't change */ - if (!r300_zfunc_same_direction(z->current_func, - (dsa_state->z_stencil_control & R300_ZS_MASK))) { - DBG(r300, DBG_HYPERZ, - "z func changed direction - disabling hyper-z %d -> %d\n", - z->current_func, dsa_state->z_stencil_control); - return FALSE; - } - } return TRUE; } @@ -139,7 +141,6 @@ static void r300_update_hyperz(struct r300_context* r300) (struct pipe_framebuffer_state*)r300->fb_state.state; struct r300_resource *zstex = fb->zsbuf ? r300_resource(fb->zsbuf->texture) : NULL; - boolean hiz_in_use = FALSE; z->gb_z_peq_config = 0; z->zb_bw_cntl = 0; @@ -151,16 +152,12 @@ static void r300_update_hyperz(struct r300_context* r300) return; } - if (!zstex) + if (!zstex || + !r300->rws->get_value(r300->rws, R300_CAN_HYPERZ)) return; - if (!r300->rws->get_value(r300->rws, R300_CAN_HYPERZ)) - return; - - hiz_in_use = zstex->hiz_in_use[fb->zsbuf->u.tex.level]; - /* Zbuffer compression. */ - if (r300->zmask_in_use && !r300->zmask_locked) { + if (r300->zmask_in_use && !r300->hyperz_locked) { z->zb_bw_cntl |= R300_FAST_FILL_ENABLE | /*R300_FORCE_COMPRESSED_STENCIL_VALUE_ENABLE |*/ R300_RD_COMP_ENABLE; @@ -174,16 +171,28 @@ static void r300_update_hyperz(struct r300_context* r300) z->gb_z_peq_config |= R300_GB_Z_PEQ_CONFIG_Z_PEQ_SIZE_8_8; } - if (hiz_in_use && r300_can_hiz(r300)) { - z->zb_bw_cntl |= R300_HIZ_ENABLE | - r300_get_hiz_min(r300); - - z->sc_hyperz |= R300_SC_HYPERZ_ENABLE | - r300_get_sc_hz_max(r300); + /* HiZ. */ + if (r300->hiz_in_use && !r300->hyperz_locked) { + /* Set the HiZ function if needed. */ + if (r300->hiz_func == HIZ_FUNC_NONE) { + r300->hiz_func = r300_get_hiz_func(r300); + } - if (r300->screen->caps.is_r500) { - z->zb_bw_cntl |= R500_HIZ_FP_EXP_BITS_3 | - R500_HIZ_EQUAL_REJECT_ENABLE; + /* If the depth function is inverted, HiZ must be disabled. */ + if (!r300_is_hiz_func_valid(r300)) { + r300->hiz_in_use = FALSE; + } else if (r300_can_hiz(r300)) { + /* Setup the HiZ bits. */ + z->zb_bw_cntl |= + R300_HIZ_ENABLE | + (r300->hiz_func == HIZ_FUNC_MIN ? R300_HIZ_MIN : R300_HIZ_MAX); + + z->sc_hyperz |= R300_SC_HYPERZ_ENABLE | + r300_get_sc_hz_max(r300); + + if (r300->screen->caps.is_r500) { + z->zb_bw_cntl |= R500_HIZ_EQUAL_REJECT_ENABLE; + } } } @@ -282,18 +291,6 @@ static void r300_update_ztop(struct r300_context* r300) r300_mark_atom_dirty(r300, &r300->ztop_state); } -#define ALIGN_DIVUP(x, y) (((x) + (y) - 1) / (y)) - -static void r300_update_hiz_clear(struct r300_context *r300) -{ - struct pipe_framebuffer_state *fb = - (struct pipe_framebuffer_state*)r300->fb_state.state; - uint32_t height; - - height = ALIGN_DIVUP(fb->zsbuf->height, 4); - r300->hiz_clear.size = height * 4; -} - void r300_update_hyperz_state(struct r300_context* r300) { r300_update_ztop(r300); @@ -301,51 +298,4 @@ void r300_update_hyperz_state(struct r300_context* r300) if (r300->hyperz_state.dirty) { r300_update_hyperz(r300); } - - if (r300->hiz_clear.dirty) { - r300_update_hiz_clear(r300); - } -} - -void r300_hiz_alloc_block(struct r300_context *r300, struct r300_surface *surf) -{ - struct r300_resource *tex; - uint32_t zsize, ndw; - int level = surf->base.u.tex.level; - - tex = r300_resource(surf->base.texture); - - if (tex->hiz_mem[level]) - return; - - zsize = tex->tex.layer_size_in_bytes[level]; - zsize /= util_format_get_blocksize(tex->b.b.b.format); - ndw = ALIGN_DIVUP(zsize, 64); - - tex->hiz_mem[level] = u_mmAllocMem(r300->hiz_mm, ndw, 0, 0); -} - -boolean r300_hyperz_init_mm(struct r300_context *r300) -{ - struct r300_screen* r300screen = r300->screen; - int frag_pipes = r300screen->caps.num_frag_pipes; - - if (r300screen->caps.hiz_ram) { - r300->hiz_mm = u_mmInit(0, r300screen->caps.hiz_ram * frag_pipes); - if (!r300->hiz_mm) { - return FALSE; - } - } - - return TRUE; -} - -void r300_hyperz_destroy_mm(struct r300_context *r300) -{ - struct r300_screen* r300screen = r300->screen; - - if (r300screen->caps.hiz_ram) { - u_mmDestroy(r300->hiz_mm); - r300->hiz_mm = NULL; - } } diff --git a/src/gallium/drivers/r300/r300_hyperz.h b/src/gallium/drivers/r300/r300_hyperz.h deleted file mode 100644 index d4c8e7c60a9..00000000000 --- a/src/gallium/drivers/r300/r300_hyperz.h +++ /dev/null @@ -1,35 +0,0 @@ -/* - * Copyright 2010 Marek Olšák <[email protected]> - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. */ - -#ifndef R300_HYPERZ_H -#define R300_HYPERZ_H - -struct r300_context; - -void r300_update_hyperz_state(struct r300_context* r300); - -void r300_hiz_alloc_block(struct r300_context *r300, struct r300_surface *surf); - -boolean r300_hyperz_init_mm(struct r300_context *r300); -void r300_hyperz_destroy_mm(struct r300_context *r300); - -#endif diff --git a/src/gallium/drivers/r300/r300_reg.h b/src/gallium/drivers/r300/r300_reg.h index 1d93dab2ca2..bb30b1ab0be 100644 --- a/src/gallium/drivers/r300/r300_reg.h +++ b/src/gallium/drivers/r300/r300_reg.h @@ -467,6 +467,8 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. * * See bug #9871. http://bugs.freedesktop.org/attachment.cgi?id=10672&action=view */ +#define R500_VAP_TEX_TO_COLOR_CNTL 0x2218 + #define R300_VAP_CLIP_CNTL 0x221C # define R300_VAP_UCP_ENABLE_0 (1 << 0) # define R300_VAP_UCP_ENABLE_1 (1 << 1) @@ -857,6 +859,8 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. # define R500_TX_DIRECTION_HORIZONTAL (0<<27) # define R500_TX_DIRECTION_VERITCAL (1<<27) +#define R500_SU_TEX_WRAP_PS3 0x4114 + /* S Texture Coordinate of Vertex 0 for Point texture stuffing (LLC) */ #define R300_GA_POINT_S0 0x4200 diff --git a/src/gallium/drivers/r300/r300_render.c b/src/gallium/drivers/r300/r300_render.c index 2ead8667bda..0ec4a225865 100644 --- a/src/gallium/drivers/r300/r300_render.c +++ b/src/gallium/drivers/r300/r300_render.c @@ -205,7 +205,7 @@ static boolean r300_reserve_cs_dwords(struct r300_context *r300, if (first_draw) { cs_dwords += r300_get_num_dirty_dwords(r300); - if (r300->screen->caps.index_bias_supported) + if (r300->screen->caps.is_r500) cs_dwords += 2; /* emit_index_offset */ if (emit_vertex_arrays) @@ -257,7 +257,7 @@ static boolean r300_emit_states(struct r300_context *r300, } r300_emit_dirty_state(r300); - if (r300->screen->caps.index_bias_supported) { + if (r300->screen->caps.is_r500) { if (r300->screen->caps.has_tcl) r500_emit_index_bias(r300, index_bias); else @@ -557,7 +557,7 @@ static void r300_draw_elements_immediate(struct r300_context *r300, OUT_CS(R300_VAP_VF_CNTL__PRIM_WALK_INDICES | (count << 16) | r300_translate_primitive(mode)); - if (indexBias && !r300->screen->caps.index_bias_supported) { + if (indexBias && !r300->screen->caps.is_r500) { for (i = 0; i < count-1; i += 2) OUT_CS(((ptr1[i+1] + indexBias) << 16) | (ptr1[i] + indexBias)); @@ -581,7 +581,7 @@ static void r300_draw_elements_immediate(struct r300_context *r300, OUT_CS(R300_VAP_VF_CNTL__PRIM_WALK_INDICES | (count << 16) | r300_translate_primitive(mode)); - if (indexBias && !r300->screen->caps.index_bias_supported) { + if (indexBias && !r300->screen->caps.is_r500) { for (i = 0; i < count-1; i += 2) OUT_CS(((ptr2[i+1] + indexBias) << 16) | (ptr2[i] + indexBias)); @@ -601,7 +601,7 @@ static void r300_draw_elements_immediate(struct r300_context *r300, R300_VAP_VF_CNTL__INDEX_SIZE_32bit | r300_translate_primitive(mode)); - if (indexBias && !r300->screen->caps.index_bias_supported) { + if (indexBias && !r300->screen->caps.is_r500) { for (i = 0; i < count; i++) OUT_CS(ptr4[i] + indexBias); } else { @@ -620,13 +620,12 @@ static void r300_draw_elements(struct r300_context *r300, int indexBias, unsigned indexSize = r300->index_buffer.index_size; struct pipe_resource* orgIndexBuffer = indexBuffer; boolean alt_num_verts = r300->screen->caps.is_r500 && - count > 65536 && - r300->rws->get_value(r300->rws, R300_VID_DRM_2_3_0); + count > 65536; unsigned short_count; int buffer_offset = 0, index_offset = 0; /* for index bias emulation */ uint16_t indices3[3]; - if (indexBias && !r300->screen->caps.index_bias_supported) { + if (indexBias && !r300->screen->caps.is_r500) { r300_split_index_bias(r300, indexBias, &buffer_offset, &index_offset); } @@ -702,8 +701,7 @@ static void r300_draw_arrays(struct r300_context *r300, unsigned mode, unsigned start, unsigned count) { boolean alt_num_verts = r300->screen->caps.is_r500 && - count > 65536 && - r300->rws->get_value(r300->rws, R300_VID_DRM_2_3_0); + count > 65536; unsigned short_count; /* 9 spare dwords for emit_draw_arrays. Give up if the function fails. */ @@ -748,6 +746,8 @@ static void r300_draw_vbo(struct pipe_context* pipe, return; } + r300_update_derived_state(r300); + /* Start the vbuf manager and update buffers if needed. */ u_vbuf_mgr_draw_begin(r300->vbuf_mgr, info, &buffers_updated, &uploader_flushed); @@ -756,8 +756,6 @@ static void r300_draw_vbo(struct pipe_context* pipe, } /* Draw. */ - r300_update_derived_state(r300); - if (indexed) { if (count <= 8 && r300_resource(r300->index_buffer.buffer)->b.user_ptr) { diff --git a/src/gallium/drivers/r300/r300_screen.c b/src/gallium/drivers/r300/r300_screen.c index 77a9c6ad86f..52d0247fbfd 100644 --- a/src/gallium/drivers/r300/r300_screen.c +++ b/src/gallium/drivers/r300/r300_screen.c @@ -456,10 +456,6 @@ struct pipe_screen* r300_screen_create(struct r300_winsys_screen *rws) if (SCREEN_DBG_ON(r300screen, DBG_NO_HIZ)) r300screen->caps.hiz_ram = 0; - r300screen->caps.index_bias_supported = - r300screen->caps.is_r500 && - rws->get_value(rws, R300_VID_DRM_2_3_0); - pipe_mutex_init(r300screen->num_contexts_mutex); util_slab_create(&r300screen->pool_buffers, diff --git a/src/gallium/drivers/r300/r300_state.c b/src/gallium/drivers/r300/r300_state.c index 09f18b3e624..b810f4081c8 100644 --- a/src/gallium/drivers/r300/r300_state.c +++ b/src/gallium/drivers/r300/r300_state.c @@ -45,7 +45,6 @@ #include "r300_texture.h" #include "r300_vs.h" #include "r300_winsys.h" -#include "r300_hyperz.h" /* r300_state: Functions used to intialize state context by translating * Gallium state objects into semi-native r300 state objects. */ @@ -707,7 +706,7 @@ void r300_mark_fb_state_dirty(struct r300_context *r300, else if (state->zsbuf) { r300->fb_state.size += 10; if (can_hyperz) - r300->fb_state.size += r300->screen->caps.hiz_ram ? 8 : 4; + r300->fb_state.size += 8; } /* The size of the rest of atoms stays the same. */ @@ -720,7 +719,6 @@ r300_set_framebuffer_state(struct pipe_context* pipe, struct r300_context* r300 = r300_context(pipe); struct r300_aa_state *aa = (struct r300_aa_state*)r300->aa_state.state; struct pipe_framebuffer_state *old_state = r300->fb_state.state; - boolean can_hyperz = r300->rws->get_value(r300->rws, R300_CAN_HYPERZ); unsigned max_width, max_height, i; uint32_t zbuffer_bpp = 0; @@ -738,28 +736,30 @@ r300_set_framebuffer_state(struct pipe_context* pipe, return; } - if (old_state->zsbuf && r300->zmask_in_use && !r300->zmask_locked) { + if (old_state->zsbuf && r300->zmask_in_use && !r300->hyperz_locked) { /* There is a zmask in use, what are we gonna do? */ if (state->zsbuf) { if (!pipe_surface_equal(old_state->zsbuf, state->zsbuf)) { /* Decompress the currently bound zbuffer before we bind another one. */ r300_decompress_zmask(r300); + r300->hiz_in_use = FALSE; } } else { /* We don't bind another zbuffer, so lock the current one. */ - r300->zmask_locked = TRUE; + r300->hyperz_locked = TRUE; pipe_surface_reference(&r300->locked_zbuffer, old_state->zsbuf); } - } else if (r300->zmask_locked && r300->locked_zbuffer) { + } else if (r300->hyperz_locked && r300->locked_zbuffer) { /* We have a locked zbuffer now, what are we gonna do? */ if (state->zsbuf) { if (!pipe_surface_equal(r300->locked_zbuffer, state->zsbuf)) { /* We are binding some other zbuffer, so decompress the locked one, * it gets unlocked automatically. */ r300_decompress_zmask_locked_unsafe(r300); + r300->hiz_in_use = FALSE; } else { /* We are binding the locked zbuffer again, so unlock it. */ - r300->zmask_locked = FALSE; + r300->hyperz_locked = FALSE; } } } @@ -778,7 +778,7 @@ r300_set_framebuffer_state(struct pipe_context* pipe, util_copy_framebuffer_state(r300->fb_state.state, state); - if (!r300->zmask_locked) { + if (!r300->hyperz_locked) { pipe_surface_reference(&r300->locked_zbuffer, NULL); } @@ -794,20 +794,6 @@ r300_set_framebuffer_state(struct pipe_context* pipe, break; } - /* Setup Hyper-Z. */ - if (can_hyperz) { - struct r300_surface *zs_surf = r300_surface(state->zsbuf); - struct r300_resource *tex = r300_resource(zs_surf->base.texture); - int level = zs_surf->base.u.tex.level; - - /* work out whether we can support hiz on this buffer */ - r300_hiz_alloc_block(r300, zs_surf); - - DBG(r300, DBG_HYPERZ, - "hyper-z features: hiz: %d @ %08x\n", tex->hiz_mem[level] ? 1 : 0, - tex->hiz_mem[level] ? tex->hiz_mem[level]->ofs : 0xdeadbeef); - } - /* Polygon offset depends on the zbuffer bit depth. */ if (r300->zbuffer_bpp != zbuffer_bpp) { r300->zbuffer_bpp = zbuffer_bpp; @@ -818,27 +804,25 @@ r300_set_framebuffer_state(struct pipe_context* pipe, } /* Set up AA config. */ - if (r300->rws->get_value(r300->rws, R300_VID_DRM_2_3_0)) { - if (state->nr_cbufs && state->cbufs[0]->texture->nr_samples > 1) { - aa->aa_config = R300_GB_AA_CONFIG_AA_ENABLE; - - switch (state->cbufs[0]->texture->nr_samples) { - case 2: - aa->aa_config |= R300_GB_AA_CONFIG_NUM_AA_SUBSAMPLES_2; - break; - case 3: - aa->aa_config |= R300_GB_AA_CONFIG_NUM_AA_SUBSAMPLES_3; - break; - case 4: - aa->aa_config |= R300_GB_AA_CONFIG_NUM_AA_SUBSAMPLES_4; - break; - case 6: - aa->aa_config |= R300_GB_AA_CONFIG_NUM_AA_SUBSAMPLES_6; - break; - } - } else { - aa->aa_config = 0; + if (state->nr_cbufs && state->cbufs[0]->texture->nr_samples > 1) { + aa->aa_config = R300_GB_AA_CONFIG_AA_ENABLE; + + switch (state->cbufs[0]->texture->nr_samples) { + case 2: + aa->aa_config |= R300_GB_AA_CONFIG_NUM_AA_SUBSAMPLES_2; + break; + case 3: + aa->aa_config |= R300_GB_AA_CONFIG_NUM_AA_SUBSAMPLES_3; + break; + case 4: + aa->aa_config |= R300_GB_AA_CONFIG_NUM_AA_SUBSAMPLES_4; + break; + case 6: + aa->aa_config |= R300_GB_AA_CONFIG_NUM_AA_SUBSAMPLES_6; + break; } + } else { + aa->aa_config = 0; } if (DBG_ON(r300, DBG_FB)) { diff --git a/src/gallium/drivers/r300/r300_state_derived.c b/src/gallium/drivers/r300/r300_state_derived.c index 003fe9a58cd..a1e116f4b61 100644 --- a/src/gallium/drivers/r300/r300_state_derived.c +++ b/src/gallium/drivers/r300/r300_state_derived.c @@ -29,7 +29,6 @@ #include "r300_context.h" #include "r300_fs.h" -#include "r300_hyperz.h" #include "r300_screen.h" #include "r300_shader_semantics.h" #include "r300_state_inlines.h" @@ -642,8 +641,25 @@ static uint32_t r300_get_border_color(enum pipe_format format, /* Compressed formats. */ if (util_format_is_compressed(format)) { - util_pack_color(border_swizzled, PIPE_FORMAT_R8G8B8A8_UNORM, &uc); - return uc.ui; + switch (format) { + case PIPE_FORMAT_RGTC1_SNORM: + case PIPE_FORMAT_RGTC1_UNORM: + /* Add 1/32 to round the border color instead of truncating. */ + /* The Y component is used for the border color. */ + border_swizzled[1] = border_swizzled[2] + 1.0f/32; + util_pack_color(border_swizzled, PIPE_FORMAT_B4G4R4A4_UNORM, &uc); + return uc.ui; + case PIPE_FORMAT_RGTC2_SNORM: + border_swizzled[0] = border_swizzled[2]; + util_pack_color(border_swizzled, PIPE_FORMAT_R8G8B8A8_SNORM, &uc); + return uc.ui; + case PIPE_FORMAT_RGTC2_UNORM: + util_pack_color(border_swizzled, PIPE_FORMAT_B8G8R8A8_UNORM, &uc); + return uc.ui; + default: + util_pack_color(border_swizzled, PIPE_FORMAT_R8G8B8A8_UNORM, &uc); + return uc.ui; + } } switch (desc->channel[0].size) { @@ -937,7 +953,7 @@ static void r300_decompress_depth_textures(struct r300_context *r300) state->sampler_state_count); unsigned i; - if (!r300->zmask_locked || !r300->locked_zbuffer) { + if (!r300->hyperz_locked || !r300->locked_zbuffer) { return; } diff --git a/src/gallium/drivers/r300/r300_texture.c b/src/gallium/drivers/r300/r300_texture.c index b97c45ac198..86ad0b8b8e0 100644 --- a/src/gallium/drivers/r300/r300_texture.c +++ b/src/gallium/drivers/r300/r300_texture.c @@ -171,8 +171,16 @@ uint32_t r300_translate_texformat(enum pipe_format format, } } - result |= r300_get_swizzle_combined(desc->swizzle, swizzle_view, - util_format_is_compressed(format) && dxtc_swizzle); + if (util_format_is_compressed(format) && + dxtc_swizzle && + format != PIPE_FORMAT_RGTC2_UNORM && + format != PIPE_FORMAT_RGTC2_SNORM) { + result |= r300_get_swizzle_combined(desc->swizzle, swizzle_view, + dxtc_swizzle); + } else { + result |= r300_get_swizzle_combined(desc->swizzle, swizzle_view, + FALSE); + } /* S3TC formats. */ if (desc->layout == UTIL_FORMAT_LAYOUT_S3TC) { @@ -197,41 +205,36 @@ uint32_t r300_translate_texformat(enum pipe_format format, } } - /* Add sign. */ - for (i = 0; i < desc->nr_channels; i++) { - if (desc->channel[i].type == UTIL_FORMAT_TYPE_SIGNED) { - result |= sign_bit[i]; - } - } - - /* This is truly a special format. - * It stores R8G8 and B is computed using sqrt(1 - R^2 - G^2) - * in the sampler unit. Also known as D3DFMT_CxV8U8. */ - if (format == PIPE_FORMAT_R8G8Bx_SNORM) { - return R300_TX_FORMAT_CxV8U8 | result; - } - /* RGTC formats. */ if (desc->layout == UTIL_FORMAT_LAYOUT_RGTC) { switch (format) { case PIPE_FORMAT_RGTC1_SNORM: - result |= sign_bit[0]; + result |= sign_bit[1]; case PIPE_FORMAT_RGTC1_UNORM: - result &= ~(0xfff << 9); /* mask off swizzle */ - result |= R300_TX_FORMAT_Y << R300_TX_FORMAT_R_SHIFT; return R500_TX_FORMAT_ATI1N | result; case PIPE_FORMAT_RGTC2_SNORM: - result |= sign_bit[0] | sign_bit[1]; + result |= sign_bit[2] | sign_bit[3]; case PIPE_FORMAT_RGTC2_UNORM: - result &= ~(0xfff << 9); /* mask off swizzle */ - result |= R300_TX_FORMAT_Y << R300_TX_FORMAT_R_SHIFT | - R300_TX_FORMAT_X << R300_TX_FORMAT_G_SHIFT; return R400_TX_FORMAT_ATI2N | result; default: return ~0; /* Unsupported/unknown. */ } } + /* This is truly a special format. + * It stores R8G8 and B is computed using sqrt(1 - R^2 - G^2) + * in the sampler unit. Also known as D3DFMT_CxV8U8. */ + if (format == PIPE_FORMAT_R8G8Bx_SNORM) { + return R300_TX_FORMAT_CxV8U8 | result; + } + + /* Add sign. */ + for (i = 0; i < desc->nr_channels; i++) { + if (desc->channel[i].type == UTIL_FORMAT_TYPE_SIGNED) { + result |= sign_bit[i]; + } + } + /* See whether the components are of the same size. */ for (i = 1; i < desc->nr_channels; i++) { uniform = uniform && desc->channel[0].size == desc->channel[i].size; @@ -676,6 +679,8 @@ static void r300_texture_setup_fb_state(struct r300_surface *surf) R300_DEPTHMACROTILE(tex->tex.macrotile[level]) | R300_DEPTHMICROTILE(tex->tex.microtile); surf->format = r300_translate_zsformat(surf->base.format); + surf->pitch_zmask = tex->tex.zmask_stride_in_pixels[level]; + surf->pitch_hiz = tex->tex.hiz_stride_in_pixels[level]; } else { surf->pitch = tex->tex.stride_in_pixels[level] | @@ -713,14 +718,8 @@ static void r300_texture_destroy(struct pipe_screen *screen, struct pipe_resource* texture) { struct r300_resource* tex = (struct r300_resource*)texture; - int i; r300_winsys_bo_reference(&tex->buf, NULL); - for (i = 0; i < R300_MAX_TEXTURE_LEVELS; i++) { - if (tex->hiz_mem[i]) - u_mmFreeMem(tex->hiz_mem[i]); - } - FREE(tex); } @@ -868,8 +867,7 @@ struct pipe_resource *r300_texture_from_handle(struct pipe_screen *screen, break; case 2: - if (rws->get_value(rws, R300_VID_DRM_2_1_0)) - microtile = R300_BUFFER_SQUARETILED; + microtile = R300_BUFFER_SQUARETILED; break; } } diff --git a/src/gallium/drivers/r300/r300_texture_desc.c b/src/gallium/drivers/r300/r300_texture_desc.c index 2cfeec7d751..3846fb8b6b3 100644 --- a/src/gallium/drivers/r300/r300_texture_desc.c +++ b/src/gallium/drivers/r300/r300_texture_desc.c @@ -207,29 +207,6 @@ static unsigned r300_texture_get_nblocksy(struct r300_resource *tex, return util_format_get_nblocksy(tex->b.b.b.format, height); } -static void r300_texture_3d_fix_mipmapping(struct r300_screen *screen, - struct r300_resource *tex) -{ - /* The kernels <= 2.6.34-rc4 compute the size of mipmapped 3D textures - * incorrectly. This is a workaround to prevent CS from being rejected. */ - - unsigned i, size; - - if (!screen->rws->get_value(screen->rws, R300_VID_DRM_2_3_0) && - tex->b.b.b.target == PIPE_TEXTURE_3D && - tex->b.b.b.last_level > 0) { - size = 0; - - for (i = 0; i <= tex->b.b.b.last_level; i++) { - size += tex->tex.stride_in_bytes[i] * - r300_texture_get_nblocksy(tex, i, FALSE); - } - - size *= tex->tex.depth0; - tex->tex.size_in_bytes = size; - } -} - /* Get a width in pixels from a stride in bytes. */ static unsigned stride_to_width(enum pipe_format format, unsigned stride_in_bytes) @@ -334,12 +311,17 @@ static void r300_setup_cbzb_flags(struct r300_screen *rscreen, tex->tex.cbzb_allowed[i] = first_level_valid && tex->tex.macrotile[i]; } -#define ALIGN_DIVUP(x, y) (((x) + (y) - 1) / (y)) +static unsigned r300_pixels_to_dwords(unsigned stride, + unsigned height, + unsigned xblock, unsigned yblock) +{ + return (align(stride, xblock) * align(height, yblock)) / (xblock * yblock); +} -static void r300_setup_zmask_flags(struct r300_screen *screen, - struct r300_resource *tex) +static void r300_setup_hyperz_properties(struct r300_screen *screen, + struct r300_resource *tex) { - /* The tile size of 1 DWORD is: + /* The tile size of 1 DWORD in ZMASK RAM is: * * GPU Pipes 4x4 mode 8x8 mode * ------------------------------------------ @@ -348,8 +330,31 @@ static void r300_setup_zmask_flags(struct r300_screen *screen, * RV530 1P/2Z 32x16 64x32 * 1P/1Z 16x16 32x32 */ - static unsigned num_blocks_x_per_dw[4] = {4, 8, 12, 8}; - static unsigned num_blocks_y_per_dw[4] = {4, 4, 4, 8}; + static unsigned zmask_blocks_x_per_dw[4] = {4, 8, 12, 8}; + static unsigned zmask_blocks_y_per_dw[4] = {4, 4, 4, 8}; + + /* In HIZ RAM, one dword is always 8x8 pixels (each byte is 4x4 pixels), + * but the blocks have very weird ordering. + * + * With 2 pipes and an image of size 8xY, where Y >= 1, + * clearing 4 dwords clears blocks like this: + * + * 01012323 + * + * where numbers correspond to dword indices. The blocks are interleaved + * in the X direction, so the alignment must be 4x1 blocks (32x8 pixels). + * + * With 4 pipes and an image of size 8xY, where Y >= 4, + * clearing 8 dwords clears blocks like this: + * 01012323 + * 45456767 + * 01012323 + * 45456767 + * where numbers correspond to dword indices. The blocks are interleaved + * in both directions, so the alignment must be 4x4 blocks (32x32 pixels) + */ + static unsigned hiz_align_x[4] = {8, 32, 48, 32}; + static unsigned hiz_align_y[4] = {8, 8, 8, 32}; if (util_format_is_depth_or_stencil(tex->b.b.b.format) && util_format_get_blocksizebits(tex->b.b.b.format) == 32 && @@ -363,30 +368,49 @@ static void r300_setup_zmask_flags(struct r300_screen *screen, } for (i = 0; i <= tex->b.b.b.last_level; i++) { - unsigned numdw, compsize; + unsigned zcomp_numdw, zcompsize, hiz_numdw, stride, height; + + stride = align(tex->tex.stride_in_pixels[i], 16); + height = u_minify(tex->b.b.b.height0, i); /* The 8x8 compression mode needs macrotiling. */ - compsize = screen->caps.z_compress == R300_ZCOMP_8X8 && + zcompsize = screen->caps.z_compress == R300_ZCOMP_8X8 && tex->tex.macrotile[i] && tex->b.b.b.nr_samples <= 1 ? 8 : 4; - /* Get the zbuffer size (with the aligned width and height). */ - numdw = align(tex->tex.stride_in_pixels[i], - num_blocks_x_per_dw[pipes-1] * compsize) * - align(u_minify(tex->b.b.b.height0, i), - num_blocks_y_per_dw[pipes-1] * compsize); + /* Get the ZMASK buffer size in dwords. */ + zcomp_numdw = r300_pixels_to_dwords(stride, height, + zmask_blocks_x_per_dw[pipes-1] * zcompsize, + zmask_blocks_y_per_dw[pipes-1] * zcompsize); - /* Convert pixels -> dwords. */ - numdw = ALIGN_DIVUP(numdw, num_blocks_x_per_dw[pipes-1] * compsize * - num_blocks_y_per_dw[pipes-1] * compsize); + /* Check whether we have enough ZMASK memory. */ + if (util_format_get_blocksizebits(tex->b.b.b.format) == 32 && + zcomp_numdw <= screen->caps.zmask_ram * pipes) { + tex->tex.zmask_dwords[i] = zcomp_numdw; + tex->tex.zcomp8x8[i] = zcompsize == 8; - /* Check that we have enough ZMASK memory. */ - if (numdw <= screen->caps.zmask_ram * pipes) { - tex->tex.zmask_dwords[i] = numdw; - tex->tex.zcomp8x8[i] = compsize == 8; + tex->tex.zmask_stride_in_pixels[i] = + align(stride, zmask_blocks_x_per_dw[pipes-1] * zcompsize); } else { tex->tex.zmask_dwords[i] = 0; tex->tex.zcomp8x8[i] = FALSE; + tex->tex.zmask_stride_in_pixels[i] = 0; + } + + /* Now setup HIZ. */ + stride = align(stride, hiz_align_x[pipes-1]); + height = align(height, hiz_align_y[pipes-1]); + + /* Get the HIZ buffer size in dwords. */ + hiz_numdw = (stride * height) / (8*8 * pipes); + + /* Check whether we have enough HIZ memory. */ + if (hiz_numdw <= screen->caps.hiz_ram * pipes) { + tex->tex.hiz_dwords[i] = hiz_numdw; + tex->tex.hiz_stride_in_pixels[i] = stride; + } else { + tex->tex.hiz_dwords[i] = 0; + tex->tex.hiz_stride_in_pixels[i] = 0; } } } @@ -395,7 +419,6 @@ static void r300_setup_zmask_flags(struct r300_screen *screen, static void r300_setup_tiling(struct r300_screen *screen, struct r300_resource *tex) { - struct r300_winsys_screen *rws = screen->rws; enum pipe_format format = tex->b.b.b.format; boolean rv350_mode = screen->caps.family >= CHIP_FAMILY_R350; boolean is_zb = util_format_is_depth_or_stencil(format); @@ -422,9 +445,7 @@ static void r300_setup_tiling(struct r300_screen *screen, break; case 2: - if (rws->get_value(rws, R300_VID_DRM_2_1_0)) { - tex->tex.microtile = R300_BUFFER_SQUARETILED; - } + tex->tex.microtile = R300_BUFFER_SQUARETILED; break; } @@ -494,8 +515,7 @@ boolean r300_texture_desc_init(struct r300_screen *rscreen, r300_setup_miptree(rscreen, tex, FALSE); } - r300_texture_3d_fix_mipmapping(rscreen, tex); - r300_setup_zmask_flags(rscreen, tex); + r300_setup_hyperz_properties(rscreen, tex); if (tex->buf_size) { /* Make sure the buffer we got is large enough. */ diff --git a/src/gallium/drivers/r300/r300_winsys.h b/src/gallium/drivers/r300/r300_winsys.h index d5c73585c81..c0b66899f8b 100644 --- a/src/gallium/drivers/r300/r300_winsys.h +++ b/src/gallium/drivers/r300/r300_winsys.h @@ -60,10 +60,8 @@ enum r300_value_id { R300_VID_DRM_PATCHLEVEL, /* These should probably go away: */ - R300_VID_DRM_2_1_0, /* Square tiling. */ - R300_VID_DRM_2_3_0, /* R500 VAP regs, MSPOS regs, fixed tex3D size checking */ R300_VID_DRM_2_6_0, /* Hyper-Z, GB_Z_PEQ_CONFIG on rv350->r4xx, R500 FG_ALPHA_VALUE */ - R300_VID_DRM_2_8_0, /* R500 US_FORMAT regs, R500 ARGB2101010 colorbuffer, CMask */ + R300_VID_DRM_2_8_0, /* R500 US_FORMAT regs, R500 ARGB2101010 colorbuffer, CMask, R16F/RG16F */ R300_CAN_HYPERZ, /* ZMask + HiZ */ R300_CAN_AACOMPRESS, /* CMask */ |