summaryrefslogtreecommitdiffstats
path: root/src/gallium/drivers/r300
diff options
context:
space:
mode:
Diffstat (limited to 'src/gallium/drivers/r300')
-rw-r--r--src/gallium/drivers/r300/Makefile6
-rw-r--r--src/gallium/drivers/r300/SConscript5
-rw-r--r--src/gallium/drivers/r300/r300_blit.c403
-rw-r--r--src/gallium/drivers/r300/r300_blit.h48
-rw-r--r--src/gallium/drivers/r300/r300_cb.h139
-rw-r--r--src/gallium/drivers/r300/r300_chipset.c47
-rw-r--r--src/gallium/drivers/r300/r300_chipset.h12
-rw-r--r--src/gallium/drivers/r300/r300_context.c463
-rw-r--r--src/gallium/drivers/r300/r300_context.h474
-rw-r--r--src/gallium/drivers/r300/r300_cs.h202
-rw-r--r--src/gallium/drivers/r300/r300_debug.c151
-rw-r--r--src/gallium/drivers/r300/r300_defines.h16
-rw-r--r--src/gallium/drivers/r300/r300_emit.c996
-rw-r--r--src/gallium/drivers/r300/r300_emit.h39
-rw-r--r--src/gallium/drivers/r300/r300_flush.c35
-rw-r--r--src/gallium/drivers/r300/r300_flush.h28
-rw-r--r--src/gallium/drivers/r300/r300_fs.c251
-rw-r--r--src/gallium/drivers/r300/r300_fs.h3
-rw-r--r--src/gallium/drivers/r300/r300_hyperz.c337
-rw-r--r--src/gallium/drivers/r300/r300_hyperz.h5
-rw-r--r--src/gallium/drivers/r300/r300_public.h9
-rw-r--r--src/gallium/drivers/r300/r300_query.c197
-rw-r--r--src/gallium/drivers/r300/r300_query.h35
-rw-r--r--src/gallium/drivers/r300/r300_reg.h93
-rw-r--r--src/gallium/drivers/r300/r300_render.c1109
-rw-r--r--src/gallium/drivers/r300/r300_render.h97
-rw-r--r--src/gallium/drivers/r300/r300_render_stencilref.c129
-rw-r--r--src/gallium/drivers/r300/r300_render_translate.c218
-rw-r--r--src/gallium/drivers/r300/r300_resource.c10
-rw-r--r--src/gallium/drivers/r300/r300_screen.c247
-rw-r--r--src/gallium/drivers/r300/r300_screen.h54
-rw-r--r--src/gallium/drivers/r300/r300_screen_buffer.c199
-rw-r--r--src/gallium/drivers/r300/r300_screen_buffer.h41
-rw-r--r--src/gallium/drivers/r300/r300_shader_semantics.h2
-rw-r--r--src/gallium/drivers/r300/r300_state.c1074
-rw-r--r--src/gallium/drivers/r300/r300_state.h30
-rw-r--r--src/gallium/drivers/r300/r300_state_derived.c534
-rw-r--r--src/gallium/drivers/r300/r300_state_inlines.h49
-rw-r--r--src/gallium/drivers/r300/r300_state_invariant.c121
-rw-r--r--src/gallium/drivers/r300/r300_state_invariant.h31
-rw-r--r--src/gallium/drivers/r300/r300_texture.c826
-rw-r--r--src/gallium/drivers/r300/r300_texture.h27
-rw-r--r--src/gallium/drivers/r300/r300_texture_desc.c495
-rw-r--r--src/gallium/drivers/r300/r300_texture_desc.h58
-rw-r--r--src/gallium/drivers/r300/r300_tgsi_to_rc.c14
-rw-r--r--src/gallium/drivers/r300/r300_transfer.c173
-rw-r--r--src/gallium/drivers/r300/r300_transfer.h2
-rw-r--r--src/gallium/drivers/r300/r300_vs.c182
-rw-r--r--src/gallium/drivers/r300/r300_vs.h18
-rw-r--r--src/gallium/drivers/r300/r300_vs_draw.c358
-rw-r--r--src/gallium/drivers/r300/r300_winsys.h338
51 files changed, 6883 insertions, 3547 deletions
diff --git a/src/gallium/drivers/r300/Makefile b/src/gallium/drivers/r300/Makefile
index d3cd6bef96e..66d900ebb5f 100644
--- a/src/gallium/drivers/r300/Makefile
+++ b/src/gallium/drivers/r300/Makefile
@@ -14,14 +14,17 @@ C_SOURCES = \
r300_hyperz.c \
r300_query.c \
r300_render.c \
+ r300_render_stencilref.c \
+ r300_render_translate.c \
r300_resource.c \
r300_screen.c \
r300_screen_buffer.c \
r300_state.c \
r300_state_derived.c \
- r300_state_invariant.c \
r300_vs.c \
+ r300_vs_draw.c \
r300_texture.c \
+ r300_texture_desc.c \
r300_tgsi_to_rc.c \
r300_transfer.c
@@ -36,5 +39,6 @@ EXTRA_OBJECTS = \
include ../../Makefile.template
+.PHONY: $(COMPILER_ARCHIVE)
$(COMPILER_ARCHIVE):
$(MAKE) -C $(TOP)/src/mesa/drivers/dri/r300/compiler
diff --git a/src/gallium/drivers/r300/SConscript b/src/gallium/drivers/r300/SConscript
index 3921085d76a..bf023daaa56 100644
--- a/src/gallium/drivers/r300/SConscript
+++ b/src/gallium/drivers/r300/SConscript
@@ -24,14 +24,17 @@ r300 = env.ConvenienceLibrary(
'r300_hyperz.c',
'r300_query.c',
'r300_render.c',
+ 'r300_render_stencilref.c',
+ 'r300_render_translate.c',
'r300_resource.c',
'r300_screen.c',
'r300_screen_buffer.c',
'r300_state.c',
'r300_state_derived.c',
- 'r300_state_invariant.c',
'r300_vs.c',
+ 'r300_vs_draw.c',
'r300_texture.c',
+ 'r300_texture_desc.c',
'r300_tgsi_to_rc.c',
'r300_transfer.c',
] + r300compiler) + r300compiler
diff --git a/src/gallium/drivers/r300/r300_blit.c b/src/gallium/drivers/r300/r300_blit.c
index 928ad300eee..91a374a5838 100644
--- a/src/gallium/drivers/r300/r300_blit.c
+++ b/src/gallium/drivers/r300/r300_blit.c
@@ -20,14 +20,32 @@
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
* USE OR OTHER DEALINGS IN THE SOFTWARE. */
-#include "r300_blit.h"
#include "r300_context.h"
+#include "r300_emit.h"
+#include "r300_hyperz.h"
#include "r300_texture.h"
+#include "r300_winsys.h"
#include "util/u_format.h"
+#include "util/u_pack_color.h"
-static void r300_blitter_save_states(struct r300_context* r300)
+enum r300_blitter_op /* bitmask */
{
+ R300_CLEAR = 1,
+ R300_CLEAR_SURFACE = 2,
+ R300_COPY = 4
+};
+
+static void r300_blitter_begin(struct r300_context* r300, enum r300_blitter_op op)
+{
+ if (r300->query_current) {
+ r300->blitter_saved_query = r300->query_current;
+ r300_stop_query(r300);
+ }
+
+ /* Yeah we have to save all those states to ensure the blitter operation
+ * is really transparent. The states will be restored by the blitter once
+ * copying is done. */
util_blitter_save_blend(r300->blitter, r300->blend_state.state);
util_blitter_save_depth_stencil_alpha(r300->blitter, r300->dsa_state.state);
util_blitter_save_stencil_ref(r300->blitter, &(r300->stencil_ref));
@@ -35,109 +53,315 @@ static void r300_blitter_save_states(struct r300_context* r300)
util_blitter_save_fragment_shader(r300->blitter, r300->fs.state);
util_blitter_save_vertex_shader(r300->blitter, r300->vs_state.state);
util_blitter_save_viewport(r300->blitter, &r300->viewport);
- util_blitter_save_clip(r300->blitter, &r300->clip);
+ util_blitter_save_clip(r300->blitter, (struct pipe_clip_state*)r300->clip_state.state);
util_blitter_save_vertex_elements(r300->blitter, r300->velems);
- /* XXX this crashes the driver
util_blitter_save_vertex_buffers(r300->blitter, r300->vertex_buffer_count,
- r300->vertex_buffer); */
+ r300->vertex_buffer);
+
+ if (op & (R300_CLEAR_SURFACE | R300_COPY))
+ util_blitter_save_framebuffer(r300->blitter, r300->fb_state.state);
+
+ if (op & R300_COPY) {
+ struct r300_textures_state* state =
+ (struct r300_textures_state*)r300->textures_state.state;
+
+ util_blitter_save_fragment_sampler_states(
+ r300->blitter, state->sampler_state_count,
+ (void**)state->sampler_states);
+
+ util_blitter_save_fragment_sampler_views(
+ r300->blitter, state->sampler_view_count,
+ (struct pipe_sampler_view**)state->sampler_views);
+ }
+}
+
+static void r300_blitter_end(struct r300_context *r300)
+{
+ if (r300->blitter_saved_query) {
+ r300_resume_query(r300, r300->blitter_saved_query);
+ r300->blitter_saved_query = NULL;
+ }
+}
+
+static uint32_t r300_depth_clear_cb_value(enum pipe_format format,
+ const float* rgba)
+{
+ union util_color uc;
+ util_pack_color(rgba, format, &uc);
+
+ if (util_format_get_blocksizebits(format) == 32)
+ return uc.ui;
+ else
+ return uc.us | (uc.us << 16);
+}
+
+static boolean r300_cbzb_clear_allowed(struct r300_context *r300,
+ unsigned clear_buffers)
+{
+ struct pipe_framebuffer_state *fb =
+ (struct pipe_framebuffer_state*)r300->fb_state.state;
+
+ /* Only color clear allowed, and only one colorbuffer. */
+ if (clear_buffers != PIPE_CLEAR_COLOR || fb->nr_cbufs != 1)
+ return FALSE;
+
+ return r300_surface(fb->cbufs[0])->cbzb_allowed;
+}
+
+static uint32_t r300_depth_clear_value(enum pipe_format format,
+ double depth, unsigned stencil)
+{
+ switch (format) {
+ case PIPE_FORMAT_Z16_UNORM:
+ case PIPE_FORMAT_X8Z24_UNORM:
+ return util_pack_z(format, depth);
+
+ case PIPE_FORMAT_S8_USCALED_Z24_UNORM:
+ return util_pack_z_stencil(format, depth, stencil);
+
+ default:
+ assert(0);
+ return 0;
+ }
}
/* Clear currently bound buffers. */
-void r300_clear(struct pipe_context* pipe,
- unsigned buffers,
- const float* rgba,
- double depth,
- unsigned stencil)
+static void r300_clear(struct pipe_context* pipe,
+ unsigned buffers,
+ const float* rgba,
+ double depth,
+ unsigned stencil)
{
- /* XXX Implement fastfill.
+ /* My notes about fastfill:
+ *
+ * 1) Only the zbuffer is cleared.
+ *
+ * 2) The zbuffer must be micro-tiled and whole microtiles must be
+ * written. If microtiling is disabled, it locks up.
*
- * If fastfill is enabled, a few facts should be considered:
+ * 3) There is Z Mask RAM which contains a compressed zbuffer and
+ * it interacts with fastfill. We should figure out how to use it
+ * to get more performance.
+ * This is what we know about the Z Mask:
*
- * 1) Zbuffer must be micro-tiled and whole microtiles must be
- * written.
+ * Each dword of the Z Mask contains compression information
+ * for 16 4x4 pixel blocks, that is 2 bits for each block.
+ * On chips with 2 Z pipes, every other dword maps to a different
+ * pipe.
*
- * 2) ZB_DEPTHCLEARVALUE is used to clear a zbuffer and Z Mask must be
- * equal to 0.
+ * 4) ZB_DEPTHCLEARVALUE is used to clear the zbuffer and the Z Mask must
+ * be equal to 0. (clear the Z Mask RAM with zeros)
*
- * 3) For 16-bit integer buffering, compression causes a hung with one or
+ * 5) For 16-bit zbuffer, compression causes a hung with one or
* two samples and should not be used.
*
- * 4) Fastfill must not be used if reading of compressed Z data is disabled
+ * 6) FORCE_COMPRESSED_STENCIL_VALUE should be enabled for stencil clears
+ * to avoid needless decompression.
+ *
+ * 7) Fastfill must not be used if reading of compressed Z data is disabled
* and writing of compressed Z data is enabled (RD/WR_COMP_ENABLE),
* i.e. it cannot be used to compress the zbuffer.
- * (what the hell does that mean and how does it fit in clearing
- * the buffers?)
+ *
+ * 8) ZB_CB_CLEAR does not interact with fastfill in any way.
*
* - Marek
*/
struct r300_context* r300 = r300_context(pipe);
- struct pipe_framebuffer_state* fb =
+ struct pipe_framebuffer_state *fb =
(struct pipe_framebuffer_state*)r300->fb_state.state;
+ struct r300_hyperz_state *hyperz =
+ (struct r300_hyperz_state*)r300->hyperz_state.state;
+ struct r300_texture *zstex =
+ fb->zsbuf ? r300_texture(fb->zsbuf->texture) : NULL;
+ uint32_t width = fb->width;
+ uint32_t height = fb->height;
+ boolean has_hyperz = r300->rws->get_value(r300->rws, R300_CAN_HYPERZ);
+ uint32_t hyperz_dcv = hyperz->zb_depthclearvalue;
+
+ /* Enable fast Z clear.
+ * The zbuffer must be in micro-tiled mode, otherwise it locks up. */
+ if ((buffers & PIPE_CLEAR_DEPTHSTENCIL) && has_hyperz) {
+ hyperz_dcv = hyperz->zb_depthclearvalue =
+ r300_depth_clear_value(fb->zsbuf->format, depth, stencil);
+
+ r300_mark_fb_state_dirty(r300, R300_CHANGED_ZCLEAR_FLAG);
+ if (zstex->zmask_mem[fb->zsbuf->level]) {
+ r300->zmask_clear.dirty = TRUE;
+ buffers &= ~PIPE_CLEAR_DEPTHSTENCIL;
+ }
+ if (zstex->hiz_mem[fb->zsbuf->level])
+ r300->hiz_clear.dirty = TRUE;
+ }
+
+ /* Enable CBZB clear. */
+ if (r300_cbzb_clear_allowed(r300, buffers)) {
+ struct r300_surface *surf = r300_surface(fb->cbufs[0]);
- r300_blitter_save_states(r300);
+ hyperz->zb_depthclearvalue =
+ r300_depth_clear_cb_value(surf->base.format, rgba);
- util_blitter_clear(r300->blitter,
- fb->width,
- fb->height,
- fb->nr_cbufs,
- buffers, rgba, depth, stencil);
+ width = surf->cbzb_width;
+ height = surf->cbzb_height;
+
+ r300->cbzb_clear = TRUE;
+ r300_mark_fb_state_dirty(r300, R300_CHANGED_CBZB_FLAG);
+ }
+
+ /* Clear. */
+ if (buffers) {
+ /* Clear using the blitter. */
+ r300_blitter_begin(r300, R300_CLEAR);
+ util_blitter_clear(r300->blitter,
+ width,
+ height,
+ fb->nr_cbufs,
+ buffers, rgba, depth, stencil);
+ r300_blitter_end(r300);
+ } else if (r300->zmask_clear.dirty) {
+ /* Just clear zmask and hiz now, this does not use a standard draw
+ * procedure. */
+ unsigned dwords;
+
+ /* Calculate zmask_clear and hiz_clear atom sizes. */
+ r300_update_hyperz_state(r300);
+ dwords = r300->zmask_clear.size +
+ (r300->hiz_clear.dirty ? r300->hiz_clear.size : 0) +
+ r300_get_num_cs_end_dwords(r300);
+
+ /* Reserve CS space. */
+ if (dwords > (r300->cs->ndw - r300->cs->cdw)) {
+ r300->context.flush(&r300->context, 0, NULL);
+ }
+
+ /* Emit clear packets. */
+ r300_emit_zmask_clear(r300, r300->zmask_clear.size,
+ r300->zmask_clear.state);
+ r300->zmask_clear.dirty = FALSE;
+ if (r300->hiz_clear.dirty) {
+ r300_emit_hiz_clear(r300, r300->hiz_clear.size,
+ r300->hiz_clear.state);
+ r300->hiz_clear.dirty = FALSE;
+ }
+ } else {
+ assert(0);
+ }
+
+ /* Disable CBZB clear. */
+ if (r300->cbzb_clear) {
+ r300->cbzb_clear = FALSE;
+ hyperz->zb_depthclearvalue = hyperz_dcv;
+ r300_mark_fb_state_dirty(r300, R300_CHANGED_CBZB_FLAG);
+ }
+
+ /* Enable fastfill and/or hiz.
+ *
+ * If we cleared zmask/hiz, it's in use now. The Hyper-Z state update
+ * looks if zmask/hiz is in use and enables fastfill accordingly. */
+ if (zstex &&
+ (zstex->zmask_in_use[fb->zsbuf->level] ||
+ zstex->hiz_in_use[fb->zsbuf->level])) {
+ r300->hyperz_state.dirty = TRUE;
+ }
}
-/* Copy a block of pixels from one surface to another using HW. */
-static void r300_hw_copy(struct pipe_context* pipe,
- struct pipe_surface* dst,
- unsigned dstx, unsigned dsty,
- struct pipe_surface* src,
- unsigned srcx, unsigned srcy,
- unsigned width, unsigned height)
+/* Clear a region of a color surface to a constant value. */
+static void r300_clear_render_target(struct pipe_context *pipe,
+ struct pipe_surface *dst,
+ const float *rgba,
+ unsigned dstx, unsigned dsty,
+ unsigned width, unsigned height)
{
- struct r300_context* r300 = r300_context(pipe);
- struct r300_textures_state* state =
- (struct r300_textures_state*)r300->textures_state.state;
+ struct r300_context *r300 = r300_context(pipe);
- /* Yeah we have to save all those states to ensure this blitter operation
- * is really transparent. The states will be restored by the blitter once
- * copying is done. */
- r300_blitter_save_states(r300);
- util_blitter_save_framebuffer(r300->blitter, r300->fb_state.state);
+ r300_blitter_begin(r300, R300_CLEAR_SURFACE);
+ util_blitter_clear_render_target(r300->blitter, dst, rgba,
+ dstx, dsty, width, height);
+ r300_blitter_end(r300);
+}
+
+/* Clear a region of a depth stencil surface. */
+static void r300_clear_depth_stencil(struct pipe_context *pipe,
+ struct pipe_surface *dst,
+ unsigned clear_flags,
+ double depth,
+ unsigned stencil,
+ unsigned dstx, unsigned dsty,
+ unsigned width, unsigned height)
+{
+ struct r300_context *r300 = r300_context(pipe);
+
+ r300_blitter_begin(r300, R300_CLEAR_SURFACE);
+ util_blitter_clear_depth_stencil(r300->blitter, dst, clear_flags, depth, stencil,
+ dstx, dsty, width, height);
+ r300_blitter_end(r300);
+}
+
+/* Flush a depth stencil buffer. */
+void r300_flush_depth_stencil(struct pipe_context *pipe,
+ struct pipe_resource *dst,
+ struct pipe_subresource subdst,
+ unsigned zslice)
+{
+ struct r300_context *r300 = r300_context(pipe);
+ struct pipe_surface *dstsurf;
+ struct r300_texture *tex = r300_texture(dst);
- util_blitter_save_fragment_sampler_states(
- r300->blitter, state->sampler_state_count,
- (void**)state->sampler_states);
+ if (!tex->zmask_mem[subdst.level])
+ return;
+ if (!tex->zmask_in_use[subdst.level])
+ return;
- util_blitter_save_fragment_sampler_views(
- r300->blitter, state->sampler_view_count,
- (struct pipe_sampler_view**)state->sampler_views);
+ dstsurf = pipe->screen->get_tex_surface(pipe->screen, dst,
+ subdst.face, subdst.level, zslice,
+ PIPE_BIND_DEPTH_STENCIL);
+ r300->z_decomp_rd = TRUE;
+ r300_blitter_begin(r300, R300_CLEAR_SURFACE);
+ util_blitter_flush_depth_stencil(r300->blitter, dstsurf);
+ r300_blitter_end(r300);
+ r300->z_decomp_rd = FALSE;
- /* Do a copy */
- util_blitter_copy(r300->blitter,
- dst, dstx, dsty, src, srcx, srcy, width, height, TRUE);
+ tex->zmask_in_use[subdst.level] = FALSE;
}
-/* Copy a block of pixels from one surface to another. */
-void r300_surface_copy(struct pipe_context* pipe,
- struct pipe_surface* dst,
- unsigned dstx, unsigned dsty,
- struct pipe_surface* src,
- unsigned srcx, unsigned srcy,
- unsigned width, unsigned height)
+/* Copy a block of pixels from one surface to another using HW. */
+static void r300_hw_copy_region(struct pipe_context* pipe,
+ struct pipe_resource *dst,
+ struct pipe_subresource subdst,
+ unsigned dstx, unsigned dsty, unsigned dstz,
+ struct pipe_resource *src,
+ struct pipe_subresource subsrc,
+ unsigned srcx, unsigned srcy, unsigned srcz,
+ unsigned width, unsigned height)
{
- enum pipe_format old_format = dst->texture->format;
- enum pipe_format new_format = old_format;
+ struct r300_context* r300 = r300_context(pipe);
- if (dst->texture->format != src->texture->format) {
- debug_printf("r300: Implementation error: Format mismatch in %s\n"
- " : src: %s dst: %s\n", __FUNCTION__,
- util_format_name(src->texture->format),
- util_format_name(dst->texture->format));
- debug_assert(0);
- }
+ r300_blitter_begin(r300, R300_COPY);
+ util_blitter_copy_region(r300->blitter, dst, subdst, dstx, dsty, dstz,
+ src, subsrc, srcx, srcy, srcz, width, height,
+ TRUE);
+ r300_blitter_end(r300);
+}
+/* Copy a block of pixels from one surface to another. */
+static void r300_resource_copy_region(struct pipe_context *pipe,
+ struct pipe_resource *dst,
+ struct pipe_subresource subdst,
+ unsigned dstx, unsigned dsty, unsigned dstz,
+ struct pipe_resource *src,
+ struct pipe_subresource subsrc,
+ unsigned srcx, unsigned srcy, unsigned srcz,
+ unsigned width, unsigned height)
+{
+ enum pipe_format old_format = dst->format;
+ enum pipe_format new_format = old_format;
+ boolean is_depth;
if (!pipe->screen->is_format_supported(pipe->screen,
- old_format, src->texture->target,
+ old_format, src->target,
+ src->nr_samples,
PIPE_BIND_RENDER_TARGET |
- PIPE_BIND_SAMPLER_VIEW, 0)) {
+ PIPE_BIND_SAMPLER_VIEW, 0) &&
+ util_format_is_plain(old_format)) {
switch (util_format_get_blocksize(old_format)) {
case 1:
new_format = PIPE_FORMAT_I8_UNORM;
@@ -154,45 +378,36 @@ void r300_surface_copy(struct pipe_context* pipe,
default:
debug_printf("r300: surface_copy: Unhandled format: %s. Falling back to software.\n"
"r300: surface_copy: Software fallback doesn't work for tiled textures.\n",
- util_format_name(old_format));
+ util_format_short_name(old_format));
}
}
+ is_depth = util_format_get_component_bits(src->format, UTIL_FORMAT_COLORSPACE_ZS, 0) != 0;
+ if (is_depth) {
+ r300_flush_depth_stencil(pipe, src, subsrc, srcz);
+ }
if (old_format != new_format) {
- dst->format = new_format;
- src->format = new_format;
-
r300_texture_reinterpret_format(pipe->screen,
- dst->texture, new_format);
+ dst, new_format);
r300_texture_reinterpret_format(pipe->screen,
- src->texture, new_format);
+ src, new_format);
}
- r300_hw_copy(pipe, dst, dstx, dsty, src, srcx, srcy, width, height);
+ r300_hw_copy_region(pipe, dst, subdst, dstx, dsty, dstz,
+ src, subsrc, srcx, srcy, srcz, width, height);
if (old_format != new_format) {
- dst->format = old_format;
- src->format = old_format;
-
r300_texture_reinterpret_format(pipe->screen,
- dst->texture, old_format);
+ dst, old_format);
r300_texture_reinterpret_format(pipe->screen,
- src->texture, old_format);
+ src, old_format);
}
}
-/* Fill a region of a surface with a constant value. */
-void r300_surface_fill(struct pipe_context* pipe,
- struct pipe_surface* dst,
- unsigned dstx, unsigned dsty,
- unsigned width, unsigned height,
- unsigned value)
+void r300_init_blit_functions(struct r300_context *r300)
{
- struct r300_context* r300 = r300_context(pipe);
-
- r300_blitter_save_states(r300);
- util_blitter_save_framebuffer(r300->blitter, r300->fb_state.state);
-
- util_blitter_fill(r300->blitter,
- dst, dstx, dsty, width, height, value);
+ r300->context.clear = r300_clear;
+ r300->context.clear_render_target = r300_clear_render_target;
+ r300->context.clear_depth_stencil = r300_clear_depth_stencil;
+ r300->context.resource_copy_region = r300_resource_copy_region;
}
diff --git a/src/gallium/drivers/r300/r300_blit.h b/src/gallium/drivers/r300/r300_blit.h
deleted file mode 100644
index 029e4f98e7d..00000000000
--- a/src/gallium/drivers/r300/r300_blit.h
+++ /dev/null
@@ -1,48 +0,0 @@
-/*
- * Copyright 2008 Marek Olšák <[email protected]>
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * on the rights to use, copy, modify, merge, publish, distribute, sub
- * license, and/or sell copies of the Software, and to permit persons to whom
- * the Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE. */
-
-#ifndef R300_BLIT_H
-#define R300_BLIT_H
-
-struct pipe_context;
-struct pipe_surface;
-
-void r300_clear(struct pipe_context* pipe,
- unsigned buffers,
- const float* rgba,
- double depth,
- unsigned stencil);
-
-void r300_surface_copy(struct pipe_context* pipe,
- struct pipe_surface* dst,
- unsigned dstx, unsigned dsty,
- struct pipe_surface* src,
- unsigned srcx, unsigned srcy,
- unsigned width, unsigned height);
-
-void r300_surface_fill(struct pipe_context* pipe,
- struct pipe_surface* dst,
- unsigned dstx, unsigned dsty,
- unsigned width, unsigned height,
- unsigned value);
-
-#endif /* R300_BLIT_H */
diff --git a/src/gallium/drivers/r300/r300_cb.h b/src/gallium/drivers/r300/r300_cb.h
new file mode 100644
index 00000000000..9d3d4fc1b19
--- /dev/null
+++ b/src/gallium/drivers/r300/r300_cb.h
@@ -0,0 +1,139 @@
+/*
+ * Copyright 2008 Corbin Simpson <[email protected]>
+ * Copyright 2010 Marek Olšák <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE. */
+
+/**
+ * This file contains macros for building command buffers in memory.
+ *
+ * Use NEW_CB for buffers with a varying size and it will also allocate
+ * the buffer.
+ * Use BEGIN_CB for arrays with a static size.
+ *
+ * Example:
+ *
+ * uint32_t cb[3];
+ * CB_LOCALS;
+ *
+ * BEGIN_CB(cb, 3);
+ * OUT_CB_REG_SEQ(R500_RB3D_CONSTANT_COLOR_AR, 2);
+ * OUT_CB(blend_color_red_alpha);
+ * OUT_CB(blend_color_green_blue);
+ * END_CB;
+ *
+ * And later:
+ *
+ * CS_LOCALS;
+ * WRITE_CS_TABLE(cb, 3);
+ *
+ * Or using a little slower variant:
+ *
+ * CS_LOCALS;
+ * BEGIN_CS(cb, 3);
+ * OUT_CS_TABLE(cb, 3);
+ * END_CS;
+ */
+
+#ifndef R300_CB_H
+#define R300_CB_H
+
+#include "r300_reg.h"
+
+/* Yes, I know macros are ugly. However, they are much prettier than the code
+ * that they neatly hide away, and don't have the cost of function setup, so
+ * we're going to use them. */
+
+#ifdef DEBUG
+#define CB_DEBUG(x) x
+#else
+#define CB_DEBUG(x)
+#endif
+
+
+/**
+ * Command buffer setup.
+ */
+
+#define CB_LOCALS \
+ CB_DEBUG(int cs_count = 0;) \
+ uint32_t *cs_ptr = NULL; \
+ CB_DEBUG((void) cs_count;) (void) cs_ptr;
+
+#define NEW_CB(ptr, size) do { \
+ assert(sizeof(*ptr) == sizeof(uint32_t)); \
+ cs_ptr = (ptr) = (uint32_t*)malloc((size) * sizeof(uint32_t)); \
+ CB_DEBUG(cs_count = size;) \
+} while (0)
+
+#define BEGIN_CB(ptr, size) do { \
+ assert(sizeof(*ptr) == sizeof(uint32_t)); \
+ cs_ptr = ptr; \
+ CB_DEBUG(cs_count = size;) \
+} while (0)
+
+#define END_CB do { \
+ CB_DEBUG(if (cs_count != 0) \
+ debug_printf("r300: Warning: cs_count off by %d at (%s, %s:%i)\n", \
+ cs_count, __FUNCTION__, __FILE__, __LINE__);) \
+} while (0)
+
+
+/**
+ * Storing pure DWORDs.
+ */
+
+#define OUT_CB(value) do { \
+ *cs_ptr = (value); \
+ cs_ptr++; \
+ CB_DEBUG(cs_count--;) \
+} while (0)
+
+#define OUT_CB_TABLE(values, count) do { \
+ memcpy(cs_ptr, values, count * sizeof(uint32_t)); \
+ cs_ptr += count; \
+ CB_DEBUG(cs_count -= count;) \
+} while (0)
+
+#define OUT_CB_32F(value) \
+ OUT_CB(fui(value));
+
+#define OUT_CB_REG(register, value) do { \
+ assert(register); \
+ OUT_CB(CP_PACKET0(register, 0)); \
+ OUT_CB(value); \
+} while (0)
+
+/* Note: This expects count to be the number of registers,
+ * not the actual packet0 count! */
+#define OUT_CB_REG_SEQ(register, count) do { \
+ assert(register); \
+ OUT_CB(CP_PACKET0(register, (count) - 1)); \
+} while (0)
+
+#define OUT_CB_ONE_REG(register, count) do { \
+ assert(register); \
+ OUT_CB(CP_PACKET0(register, (count) - 1) | RADEON_ONE_REG_WR); \
+} while (0)
+
+#define OUT_CB_PKT3(op, count) \
+ OUT_CB(CP_PACKET3(op, count))
+
+#endif /* R300_CB_H */
diff --git a/src/gallium/drivers/r300/r300_chipset.c b/src/gallium/drivers/r300/r300_chipset.c
index e6dca66d4a0..48c24092114 100644
--- a/src/gallium/drivers/r300/r300_chipset.c
+++ b/src/gallium/drivers/r300/r300_chipset.c
@@ -36,6 +36,7 @@ void r300_parse_chipset(struct r300_capabilities* caps)
caps->num_vert_fpus = 2;
caps->num_tex_units = 16;
caps->has_tcl = debug_get_bool_option("RADEON_NO_TCL", FALSE) ? FALSE : TRUE;
+ caps->hiz_ram = 0;
caps->is_r400 = FALSE;
caps->is_r500 = FALSE;
caps->high_second_pipe = FALSE;
@@ -48,6 +49,8 @@ void r300_parse_chipset(struct r300_capabilities* caps)
caps->family = CHIP_FAMILY_R300;
caps->high_second_pipe = TRUE;
caps->num_vert_fpus = 4;
+ caps->hiz_ram = R300_HIZ_LIMIT;
+ caps->zmask_ram = PIPE_ZMASK_SIZE;
break;
case 0x4145:
@@ -60,6 +63,8 @@ void r300_parse_chipset(struct r300_capabilities* caps)
caps->family = CHIP_FAMILY_R300;
caps->high_second_pipe = TRUE;
caps->num_vert_fpus = 4;
+ caps->hiz_ram = R300_HIZ_LIMIT;
+ caps->zmask_ram = PIPE_ZMASK_SIZE;
break;
case 0x4150:
@@ -77,6 +82,7 @@ void r300_parse_chipset(struct r300_capabilities* caps)
case 0x4E56:
caps->family = CHIP_FAMILY_RV350;
caps->high_second_pipe = TRUE;
+ caps->zmask_ram = RV3xx_ZMASK_SIZE;
break;
case 0x4148:
@@ -89,12 +95,16 @@ void r300_parse_chipset(struct r300_capabilities* caps)
caps->family = CHIP_FAMILY_R350;
caps->high_second_pipe = TRUE;
caps->num_vert_fpus = 4;
+ caps->hiz_ram = R300_HIZ_LIMIT;
+ caps->zmask_ram = PIPE_ZMASK_SIZE;
break;
case 0x4E4A:
caps->family = CHIP_FAMILY_R360;
caps->high_second_pipe = TRUE;
caps->num_vert_fpus = 4;
+ caps->hiz_ram = R300_HIZ_LIMIT;
+ caps->zmask_ram = PIPE_ZMASK_SIZE;
break;
case 0x5460:
@@ -107,6 +117,7 @@ void r300_parse_chipset(struct r300_capabilities* caps)
case 0x5B65:
caps->family = CHIP_FAMILY_RV370;
caps->high_second_pipe = TRUE;
+ caps->zmask_ram = RV3xx_ZMASK_SIZE;
break;
case 0x3150:
@@ -117,6 +128,8 @@ void r300_parse_chipset(struct r300_capabilities* caps)
case 0x3E54:
caps->family = CHIP_FAMILY_RV380;
caps->high_second_pipe = TRUE;
+ caps->hiz_ram = R300_HIZ_LIMIT;
+ caps->zmask_ram = RV3xx_ZMASK_SIZE;
break;
case 0x4A48:
@@ -132,6 +145,8 @@ void r300_parse_chipset(struct r300_capabilities* caps)
caps->family = CHIP_FAMILY_R420;
caps->num_vert_fpus = 6;
caps->is_r400 = TRUE;
+ caps->hiz_ram = R300_HIZ_LIMIT;
+ caps->zmask_ram = PIPE_ZMASK_SIZE;
break;
case 0x5548:
@@ -146,6 +161,8 @@ void r300_parse_chipset(struct r300_capabilities* caps)
caps->family = CHIP_FAMILY_R423;
caps->num_vert_fpus = 6;
caps->is_r400 = TRUE;
+ caps->hiz_ram = R300_HIZ_LIMIT;
+ caps->zmask_ram = PIPE_ZMASK_SIZE;
break;
case 0x554C:
@@ -158,6 +175,8 @@ void r300_parse_chipset(struct r300_capabilities* caps)
caps->family = CHIP_FAMILY_R430;
caps->num_vert_fpus = 6;
caps->is_r400 = TRUE;
+ caps->hiz_ram = R300_HIZ_LIMIT;
+ caps->zmask_ram = PIPE_ZMASK_SIZE;
break;
case 0x5D4C:
@@ -169,6 +188,8 @@ void r300_parse_chipset(struct r300_capabilities* caps)
caps->family = CHIP_FAMILY_R480;
caps->num_vert_fpus = 6;
caps->is_r400 = TRUE;
+ caps->hiz_ram = R300_HIZ_LIMIT;
+ caps->zmask_ram = PIPE_ZMASK_SIZE;
break;
case 0x4B48:
@@ -179,6 +200,8 @@ void r300_parse_chipset(struct r300_capabilities* caps)
caps->family = CHIP_FAMILY_R481;
caps->num_vert_fpus = 6;
caps->is_r400 = TRUE;
+ caps->hiz_ram = R300_HIZ_LIMIT;
+ caps->zmask_ram = PIPE_ZMASK_SIZE;
break;
case 0x5E4C:
@@ -196,30 +219,36 @@ void r300_parse_chipset(struct r300_capabilities* caps)
caps->family = CHIP_FAMILY_RV410;
caps->num_vert_fpus = 6;
caps->is_r400 = TRUE;
+ caps->hiz_ram = R300_HIZ_LIMIT;
+ caps->zmask_ram = PIPE_ZMASK_SIZE;
break;
case 0x5954:
case 0x5955:
caps->family = CHIP_FAMILY_RS480;
caps->has_tcl = FALSE;
+ caps->zmask_ram = RV3xx_ZMASK_SIZE;
break;
case 0x5974:
case 0x5975:
caps->family = CHIP_FAMILY_RS482;
caps->has_tcl = FALSE;
+ caps->zmask_ram = RV3xx_ZMASK_SIZE;
break;
case 0x5A41:
case 0x5A42:
caps->family = CHIP_FAMILY_RS400;
caps->has_tcl = FALSE;
+ caps->zmask_ram = RV3xx_ZMASK_SIZE;
break;
case 0x5A61:
case 0x5A62:
caps->family = CHIP_FAMILY_RC410;
caps->has_tcl = FALSE;
+ caps->zmask_ram = RV3xx_ZMASK_SIZE;
break;
case 0x791E:
@@ -227,6 +256,8 @@ void r300_parse_chipset(struct r300_capabilities* caps)
caps->family = CHIP_FAMILY_RS690;
caps->has_tcl = FALSE;
caps->is_r400 = TRUE;
+ caps->hiz_ram = R300_HIZ_LIMIT;
+ caps->zmask_ram = PIPE_ZMASK_SIZE;
break;
case 0x793F:
@@ -235,6 +266,8 @@ void r300_parse_chipset(struct r300_capabilities* caps)
caps->family = CHIP_FAMILY_RS600;
caps->has_tcl = FALSE;
caps->is_r400 = TRUE;
+ caps->hiz_ram = R300_HIZ_LIMIT;
+ caps->zmask_ram = PIPE_ZMASK_SIZE;
break;
case 0x796C:
@@ -244,6 +277,8 @@ void r300_parse_chipset(struct r300_capabilities* caps)
caps->family = CHIP_FAMILY_RS740;
caps->has_tcl = FALSE;
caps->is_r400 = TRUE;
+ caps->hiz_ram = R300_HIZ_LIMIT;
+ caps->zmask_ram = PIPE_ZMASK_SIZE;
break;
case 0x7100:
@@ -263,6 +298,8 @@ void r300_parse_chipset(struct r300_capabilities* caps)
caps->family = CHIP_FAMILY_R520;
caps->num_vert_fpus = 8;
caps->is_r500 = TRUE;
+ caps->hiz_ram = R300_HIZ_LIMIT;
+ caps->zmask_ram = PIPE_ZMASK_SIZE;
break;
case 0x7140:
@@ -306,6 +343,8 @@ void r300_parse_chipset(struct r300_capabilities* caps)
caps->family = CHIP_FAMILY_RV515;
caps->num_vert_fpus = 2;
caps->is_r500 = TRUE;
+ caps->hiz_ram = R300_HIZ_LIMIT;
+ caps->zmask_ram = PIPE_ZMASK_SIZE;
break;
case 0x71C0:
@@ -327,6 +366,8 @@ void r300_parse_chipset(struct r300_capabilities* caps)
caps->family = CHIP_FAMILY_RV530;
caps->num_vert_fpus = 5;
caps->is_r500 = TRUE;
+ /*caps->hiz_ram = RV530_HIZ_LIMIT;*/
+ caps->zmask_ram = PIPE_ZMASK_SIZE;
break;
case 0x7240:
@@ -347,12 +388,16 @@ void r300_parse_chipset(struct r300_capabilities* caps)
caps->family = CHIP_FAMILY_R580;
caps->num_vert_fpus = 8;
caps->is_r500 = TRUE;
+ caps->hiz_ram = RV530_HIZ_LIMIT;
+ caps->zmask_ram = PIPE_ZMASK_SIZE;
break;
case 0x7280:
caps->family = CHIP_FAMILY_RV570;
caps->num_vert_fpus = 8;
caps->is_r500 = TRUE;
+ caps->hiz_ram = RV530_HIZ_LIMIT;
+ caps->zmask_ram = PIPE_ZMASK_SIZE;
break;
case 0x7281:
@@ -369,6 +414,8 @@ void r300_parse_chipset(struct r300_capabilities* caps)
caps->family = CHIP_FAMILY_RV560;
caps->num_vert_fpus = 8;
caps->is_r500 = TRUE;
+ caps->hiz_ram = RV530_HIZ_LIMIT;
+ caps->zmask_ram = PIPE_ZMASK_SIZE;
break;
default:
diff --git a/src/gallium/drivers/r300/r300_chipset.h b/src/gallium/drivers/r300/r300_chipset.h
index ab649c38573..e7ca642b4f3 100644
--- a/src/gallium/drivers/r300/r300_chipset.h
+++ b/src/gallium/drivers/r300/r300_chipset.h
@@ -25,6 +25,14 @@
#include "pipe/p_compiler.h"
+/* these are sizes in dwords */
+#define R300_HIZ_LIMIT 10240
+#define RV530_HIZ_LIMIT 15360
+
+/* rv3xx have only one pipe */
+#define PIPE_ZMASK_SIZE 4096
+#define RV3xx_ZMASK_SIZE 5120
+
/* Structure containing all the possible information about a specific Radeon
* in the R3xx, R4xx, and R5xx families. */
struct r300_capabilities {
@@ -42,6 +50,10 @@ struct r300_capabilities {
unsigned num_tex_units;
/* Whether or not TCL is physically present */
boolean has_tcl;
+ /* Some chipsets do not have HiZ RAM - other have varying amounts . */
+ int hiz_ram;
+ /* some chipsets have zmask ram per pipe some don't */
+ int zmask_ram;
/* Whether or not this is RV350 or newer, including all r400 and r500
* chipsets. The differences compared to the oldest r300 chips are:
* - Blend LTE/GTE thresholds
diff --git a/src/gallium/drivers/r300/r300_context.c b/src/gallium/drivers/r300/r300_context.c
index 4f721ebb599..624dadd07d7 100644
--- a/src/gallium/drivers/r300/r300_context.c
+++ b/src/gallium/drivers/r300/r300_context.c
@@ -23,30 +23,86 @@
#include "draw/draw_context.h"
#include "util/u_memory.h"
+#include "util/u_sampler.h"
#include "util/u_simple_list.h"
#include "util/u_upload_mgr.h"
-#include "r300_blit.h"
+#include "r300_cb.h"
#include "r300_context.h"
#include "r300_emit.h"
-#include "r300_flush.h"
-#include "r300_query.h"
-#include "r300_render.h"
+#include "r300_hyperz.h"
#include "r300_screen.h"
#include "r300_screen_buffer.h"
-#include "r300_state_invariant.h"
#include "r300_winsys.h"
#include <inttypes.h>
+static void r300_update_num_contexts(struct r300_screen *r300screen,
+ int diff)
+{
+ if (diff > 0) {
+ p_atomic_inc(&r300screen->num_contexts);
+
+ if (r300screen->num_contexts > 1)
+ util_mempool_set_thread_safety(&r300screen->pool_buffers,
+ UTIL_MEMPOOL_MULTITHREADED);
+ } else {
+ p_atomic_dec(&r300screen->num_contexts);
+
+ if (r300screen->num_contexts <= 1)
+ util_mempool_set_thread_safety(&r300screen->pool_buffers,
+ UTIL_MEMPOOL_SINGLETHREADED);
+ }
+}
+
+static void r300_release_referenced_objects(struct r300_context *r300)
+{
+ struct pipe_framebuffer_state *fb =
+ (struct pipe_framebuffer_state*)r300->fb_state.state;
+ struct r300_textures_state *textures =
+ (struct r300_textures_state*)r300->textures_state.state;
+ struct r300_query *query, *temp;
+ unsigned i;
+
+ /* Framebuffer state. */
+ util_unreference_framebuffer_state(fb);
+
+ /* Textures. */
+ for (i = 0; i < textures->sampler_view_count; i++)
+ pipe_sampler_view_reference(
+ (struct pipe_sampler_view**)&textures->sampler_views[i], NULL);
+
+ /* The special dummy texture for texkill. */
+ if (r300->texkill_sampler) {
+ pipe_sampler_view_reference(
+ (struct pipe_sampler_view**)&r300->texkill_sampler,
+ NULL);
+ }
+
+ /* The SWTCL VBO. */
+ pipe_resource_reference(&r300->vbo, NULL);
+
+ /* Vertex buffers. */
+ for (i = 0; i < r300->vertex_buffer_count; i++) {
+ pipe_resource_reference(&r300->vertex_buffer[i].buffer, NULL);
+ }
+
+ /* If there are any queries pending or not destroyed, remove them now. */
+ foreach_s(query, temp, &r300->query_list) {
+ remove_from_list(query);
+ FREE(query);
+ }
+}
+
static void r300_destroy_context(struct pipe_context* context)
{
struct r300_context* r300 = r300_context(context);
- struct r300_query* query, * temp;
struct r300_atom *atom;
- util_blitter_destroy(r300->blitter);
- draw_destroy(r300->draw);
+ if (r300->blitter)
+ util_blitter_destroy(r300->blitter);
+ if (r300->draw)
+ draw_destroy(r300->draw);
/* Print stats, if enabled. */
if (SCREEN_DBG_ON(r300->screen, DBG_STATS)) {
@@ -58,33 +114,53 @@ static void r300_destroy_context(struct pipe_context* context)
}
}
- /* Free the OQ BO. */
- context->screen->resource_destroy(context->screen, r300->oqbo);
-
- /* If there are any queries pending or not destroyed, remove them now. */
- foreach_s(query, temp, &r300->query_list) {
- remove_from_list(query);
- FREE(query);
+ if (r300->upload_vb)
+ u_upload_destroy(r300->upload_vb);
+ if (r300->upload_ib)
+ u_upload_destroy(r300->upload_ib);
+
+ if (r300->tran.translate_cache)
+ translate_cache_destroy(r300->tran.translate_cache);
+
+ /* XXX: This function assumes r300->query_list was initialized */
+ r300_release_referenced_objects(r300);
+
+ if (r300->zmask_mm)
+ r300_hyperz_destroy_mm(r300);
+
+ if (r300->cs)
+ r300->rws->cs_destroy(r300->cs);
+
+ /* XXX: No way to tell if this was initialized or not? */
+ util_mempool_destroy(&r300->pool_transfers);
+
+ r300_update_num_contexts(r300->screen, -1);
+
+ /* Free the structs allocated in r300_setup_atoms() */
+ if (r300->aa_state.state) {
+ FREE(r300->aa_state.state);
+ FREE(r300->blend_color_state.state);
+ FREE(r300->clip_state.state);
+ FREE(r300->fb_state.state);
+ FREE(r300->gpu_flush.state);
+ FREE(r300->hyperz_state.state);
+ FREE(r300->invariant_state.state);
+ FREE(r300->rs_block_state.state);
+ FREE(r300->scissor_state.state);
+ FREE(r300->textures_state.state);
+ FREE(r300->vap_invariant_state.state);
+ FREE(r300->viewport_state.state);
+ FREE(r300->ztop_state.state);
+ FREE(r300->fs_constants.state);
+ FREE(r300->vs_constants.state);
+ if (!r300->screen->caps.has_tcl) {
+ FREE(r300->vertex_stream_state.state);
+ }
}
-
- u_upload_destroy(r300->upload_vb);
- u_upload_destroy(r300->upload_ib);
-
- FREE(r300->blend_color_state.state);
- FREE(r300->clip_state.state);
- FREE(r300->fb_state.state);
- FREE(r300->rs_block_state.state);
- FREE(r300->scissor_state.state);
- FREE(r300->textures_state.state);
- FREE(r300->vap_output_state.state);
- FREE(r300->viewport_state.state);
- FREE(r300->ztop_state.state);
- FREE(r300->fs_constants.state);
- FREE(r300->vs_constants.state);
FREE(r300);
}
-static void r300_flush_cb(void *data)
+void r300_flush_cb(void *data)
{
struct r300_context* const cs_context_copy = data;
@@ -92,17 +168,24 @@ static void r300_flush_cb(void *data)
}
#define R300_INIT_ATOM(atomname, atomsize) \
+ do { \
r300->atomname.name = #atomname; \
r300->atomname.state = NULL; \
r300->atomname.size = atomsize; \
r300->atomname.emit = r300_emit_##atomname; \
r300->atomname.dirty = FALSE; \
- insert_at_tail(&r300->atom_list, &r300->atomname);
+ insert_at_tail(&r300->atom_list, &r300->atomname); \
+ } while (0)
static void r300_setup_atoms(struct r300_context* r300)
{
+ boolean is_rv350 = r300->screen->caps.is_rv350;
boolean is_r500 = r300->screen->caps.is_r500;
boolean has_tcl = r300->screen->caps.has_tcl;
+ boolean drm_2_3_0 = r300->rws->get_value(r300->rws, R300_VID_DRM_2_3_0);
+ boolean drm_2_6_0 = r300->rws->get_value(r300->rws, R300_VID_DRM_2_6_0);
+ boolean has_hyperz = r300->rws->get_value(r300->rws, R300_CAN_HYPERZ);
+ boolean has_hiz_ram = r300->screen->caps.hiz_ram > 0;
/* Create the actual atom list.
*
@@ -110,50 +193,206 @@ static void r300_setup_atoms(struct r300_context* r300)
* can affect performance and conformance if not handled with care.
*
* Some atoms never change size, others change every emit - those have
- * the size of 0 here. */
+ * the size of 0 here.
+ *
+ * NOTE: The framebuffer state is split into these atoms:
+ * - gpu_flush (unpipelined regs)
+ * - aa_state (unpipelined regs)
+ * - fb_state (unpipelined regs)
+ * - hyperz_state (unpipelined regs followed by pipelined ones)
+ * - fb_state_pipelined (pipelined regs)
+ * The motivation behind this is to be able to emit a strict
+ * subset of the regs, and to have reasonable register ordering. */
make_empty_list(&r300->atom_list);
- R300_INIT_ATOM(invariant_state, 71);
- R300_INIT_ATOM(query_start, 4);
+ /* SC, GB (unpipelined), RB3D (unpipelined), ZB (unpipelined). */
+ R300_INIT_ATOM(gpu_flush, 9);
+ R300_INIT_ATOM(aa_state, 4);
+ R300_INIT_ATOM(fb_state, 0);
+ R300_INIT_ATOM(hyperz_state, is_r500 || (is_rv350 && drm_2_6_0) ? 10 : 8);
+ /* ZB (unpipelined), SC. */
R300_INIT_ATOM(ztop_state, 2);
+ /* ZB, FG. */
+ R300_INIT_ATOM(dsa_state, is_r500 ? 8 : 6);
+ /* RB3D. */
R300_INIT_ATOM(blend_state, 8);
R300_INIT_ATOM(blend_color_state, is_r500 ? 3 : 2);
- R300_INIT_ATOM(clip_state, has_tcl ? 5 + (6 * 4) : 2);
- R300_INIT_ATOM(dsa_state, is_r500 ? 8 : 6);
- R300_INIT_ATOM(fb_state, 0);
- R300_INIT_ATOM(rs_state, 0);
+ /* SC. */
R300_INIT_ATOM(scissor_state, 3);
+ /* GB, FG, GA, SU, SC, RB3D. */
+ R300_INIT_ATOM(invariant_state, 16 + (is_rv350 ? 4 : 0));
+ /* VAP. */
R300_INIT_ATOM(viewport_state, 9);
- R300_INIT_ATOM(rs_block_state, 0);
- R300_INIT_ATOM(vertex_stream_state, 0);
- R300_INIT_ATOM(vap_output_state, 6);
R300_INIT_ATOM(pvs_flush, 2);
+ R300_INIT_ATOM(vap_invariant_state, 9);
+ R300_INIT_ATOM(vertex_stream_state, 0);
R300_INIT_ATOM(vs_state, 0);
R300_INIT_ATOM(vs_constants, 0);
- R300_INIT_ATOM(texture_cache_inval, 2);
- R300_INIT_ATOM(textures_state, 0);
+ R300_INIT_ATOM(clip_state, has_tcl ? 5 + (6 * 4) : 2);
+ /* VAP, RS, GA, GB, SU, SC. */
+ R300_INIT_ATOM(rs_block_state, 0);
+ R300_INIT_ATOM(rs_state, 0);
+ /* SC, US. */
+ R300_INIT_ATOM(fb_state_pipelined, 5 + (drm_2_3_0 ? 3 : 0));
+ /* US. */
R300_INIT_ATOM(fs, 0);
R300_INIT_ATOM(fs_rc_constant_state, 0);
R300_INIT_ATOM(fs_constants, 0);
+ /* TX. */
+ R300_INIT_ATOM(texture_cache_inval, 2);
+ R300_INIT_ATOM(textures_state, 0);
+ if (has_hyperz) {
+ /* HiZ Clear */
+ if (has_hiz_ram)
+ R300_INIT_ATOM(hiz_clear, 0);
+ /* zmask clear */
+ R300_INIT_ATOM(zmask_clear, 0);
+ }
+ /* ZB (unpipelined), SU. */
+ R300_INIT_ATOM(query_start, 4);
/* Replace emission functions for r500. */
- if (r300->screen->caps.is_r500) {
+ if (is_r500) {
r300->fs.emit = r500_emit_fs;
r300->fs_rc_constant_state.emit = r500_emit_fs_rc_constant_state;
r300->fs_constants.emit = r500_emit_fs_constants;
}
/* Some non-CSO atoms need explicit space to store the state locally. */
+ r300->aa_state.state = CALLOC_STRUCT(r300_aa_state);
r300->blend_color_state.state = CALLOC_STRUCT(r300_blend_color_state);
- r300->clip_state.state = CALLOC_STRUCT(pipe_clip_state);
+ r300->clip_state.state = CALLOC_STRUCT(r300_clip_state);
r300->fb_state.state = CALLOC_STRUCT(pipe_framebuffer_state);
+ r300->gpu_flush.state = CALLOC_STRUCT(pipe_framebuffer_state);
+ r300->hyperz_state.state = CALLOC_STRUCT(r300_hyperz_state);
+ r300->invariant_state.state = CALLOC_STRUCT(r300_invariant_state);
r300->rs_block_state.state = CALLOC_STRUCT(r300_rs_block);
r300->scissor_state.state = CALLOC_STRUCT(pipe_scissor_state);
r300->textures_state.state = CALLOC_STRUCT(r300_textures_state);
- r300->vap_output_state.state = CALLOC_STRUCT(r300_vap_output_state);
+ r300->vap_invariant_state.state = CALLOC_STRUCT(r300_vap_invariant_state);
r300->viewport_state.state = CALLOC_STRUCT(r300_viewport_state);
r300->ztop_state.state = CALLOC_STRUCT(r300_ztop_state);
r300->fs_constants.state = CALLOC_STRUCT(r300_constant_buffer);
r300->vs_constants.state = CALLOC_STRUCT(r300_constant_buffer);
+ if (!r300->screen->caps.has_tcl) {
+ r300->vertex_stream_state.state = CALLOC_STRUCT(r300_vertex_stream_state);
+ }
+
+ /* Some non-CSO atoms don't use the state pointer. */
+ r300->fb_state_pipelined.allow_null_state = TRUE;
+ r300->fs_rc_constant_state.allow_null_state = TRUE;
+ r300->pvs_flush.allow_null_state = TRUE;
+ r300->query_start.allow_null_state = TRUE;
+ r300->texture_cache_inval.allow_null_state = TRUE;
+
+ /* Some states must be marked as dirty here to properly set up
+ * hardware in the first command stream. */
+ r300->invariant_state.dirty = TRUE;
+ r300->pvs_flush.dirty = TRUE;
+ r300->vap_invariant_state.dirty = TRUE;
+ r300->texture_cache_inval.dirty = TRUE;
+ r300->textures_state.dirty = TRUE;
+}
+
+/* Not every state tracker calls every driver function before the first draw
+ * call and we must initialize the command buffers somehow. */
+static void r300_init_states(struct pipe_context *pipe)
+{
+ struct r300_context *r300 = r300_context(pipe);
+ struct pipe_blend_color bc = {{0}};
+ struct pipe_clip_state cs = {{{0}}};
+ struct pipe_scissor_state ss = {0};
+ struct r300_clip_state *clip =
+ (struct r300_clip_state*)r300->clip_state.state;
+ struct r300_gpu_flush *gpuflush =
+ (struct r300_gpu_flush*)r300->gpu_flush.state;
+ struct r300_vap_invariant_state *vap_invariant =
+ (struct r300_vap_invariant_state*)r300->vap_invariant_state.state;
+ struct r300_invariant_state *invariant =
+ (struct r300_invariant_state*)r300->invariant_state.state;
+
+ CB_LOCALS;
+
+ pipe->set_blend_color(pipe, &bc);
+ pipe->set_scissor_state(pipe, &ss);
+
+ /* Initialize the clip state. */
+ if (r300_context(pipe)->screen->caps.has_tcl) {
+ pipe->set_clip_state(pipe, &cs);
+ } else {
+ BEGIN_CB(clip->cb, 2);
+ OUT_CB_REG(R300_VAP_CLIP_CNTL, R300_CLIP_DISABLE);
+ END_CB;
+ }
+
+ /* Initialize the GPU flush. */
+ {
+ BEGIN_CB(gpuflush->cb_flush_clean, 6);
+
+ /* Flush and free renderbuffer caches. */
+ OUT_CB_REG(R300_RB3D_DSTCACHE_CTLSTAT,
+ R300_RB3D_DSTCACHE_CTLSTAT_DC_FREE_FREE_3D_TAGS |
+ R300_RB3D_DSTCACHE_CTLSTAT_DC_FLUSH_FLUSH_DIRTY_3D);
+ OUT_CB_REG(R300_ZB_ZCACHE_CTLSTAT,
+ R300_ZB_ZCACHE_CTLSTAT_ZC_FLUSH_FLUSH_AND_FREE |
+ R300_ZB_ZCACHE_CTLSTAT_ZC_FREE_FREE);
+
+ /* Wait until the GPU is idle.
+ * This fixes random pixels sometimes appearing probably caused
+ * by incomplete rendering. */
+ OUT_CB_REG(RADEON_WAIT_UNTIL, RADEON_WAIT_3D_IDLECLEAN);
+ END_CB;
+ }
+
+ /* Initialize the VAP invariant state. */
+ {
+ BEGIN_CB(vap_invariant->cb, 9);
+ OUT_CB_REG(VAP_PVS_VTX_TIMEOUT_REG, 0xffff);
+ OUT_CB_REG_SEQ(R300_VAP_GB_VERT_CLIP_ADJ, 4);
+ OUT_CB_32F(1.0);
+ OUT_CB_32F(1.0);
+ OUT_CB_32F(1.0);
+ OUT_CB_32F(1.0);
+ OUT_CB_REG(R300_VAP_PSC_SGN_NORM_CNTL, R300_SGN_NORM_NO_ZERO);
+ END_CB;
+ }
+
+ /* Initialize the invariant state. */
+ {
+ BEGIN_CB(invariant->cb, r300->invariant_state.size);
+ OUT_CB_REG(R300_GB_SELECT, 0);
+ OUT_CB_REG(R300_FG_FOG_BLEND, 0);
+ OUT_CB_REG(R300_GA_ROUND_MODE, 1);
+ OUT_CB_REG(R300_GA_OFFSET, 0);
+ OUT_CB_REG(R300_SU_TEX_WRAP, 0);
+ OUT_CB_REG(R300_SU_DEPTH_SCALE, 0x4B7FFFFF);
+ OUT_CB_REG(R300_SU_DEPTH_OFFSET, 0);
+ OUT_CB_REG(R300_SC_EDGERULE, 0x2DA49525);
+
+ if (r300->screen->caps.is_rv350) {
+ OUT_CB_REG(R500_RB3D_DISCARD_SRC_PIXEL_LTE_THRESHOLD, 0x01010101);
+ OUT_CB_REG(R500_RB3D_DISCARD_SRC_PIXEL_GTE_THRESHOLD, 0xFEFEFEFE);
+ }
+ END_CB;
+ }
+
+ /* Initialize the hyperz state. */
+ {
+ struct r300_hyperz_state *hyperz =
+ (struct r300_hyperz_state*)r300->hyperz_state.state;
+ BEGIN_CB(&hyperz->cb_flush_begin, r300->hyperz_state.size);
+ OUT_CB_REG(R300_ZB_ZCACHE_CTLSTAT,
+ R300_ZB_ZCACHE_CTLSTAT_ZC_FLUSH_FLUSH_AND_FREE);
+ OUT_CB_REG(R300_ZB_BW_CNTL, 0);
+ OUT_CB_REG(R300_ZB_DEPTHCLEARVALUE, 0);
+ OUT_CB_REG(R300_SC_HYPERZ, R300_SC_HYPERZ_ADJ_2);
+
+ if (r300->screen->caps.is_r500 ||
+ (r300->screen->caps.is_rv350 &&
+ r300->rws->get_value(r300->rws, R300_VID_DRM_2_6_0))) {
+ OUT_CB_REG(R300_GB_Z_PEQ_CONFIG, 0);
+ }
+ END_CB;
+ }
}
struct pipe_context* r300_create_context(struct pipe_screen* screen,
@@ -166,6 +405,8 @@ struct pipe_context* r300_create_context(struct pipe_screen* screen,
if (!r300)
return NULL;
+ r300_update_num_contexts(r300screen, 1);
+
r300->rws = rws;
r300->screen = r300screen;
@@ -175,77 +416,121 @@ struct pipe_context* r300_create_context(struct pipe_screen* screen,
r300->context.destroy = r300_destroy_context;
- r300->context.clear = r300_clear;
- r300->context.surface_copy = r300_surface_copy;
- r300->context.surface_fill = r300_surface_fill;
-
- if (r300screen->caps.has_tcl) {
- r300->context.draw_arrays = r300_draw_arrays;
- r300->context.draw_elements = r300_draw_elements;
- r300->context.draw_range_elements = r300_draw_range_elements;
-
- if (r300screen->caps.is_r500) {
- r300->emit_draw_arrays_immediate = r500_emit_draw_arrays_immediate;
- r300->emit_draw_arrays = r500_emit_draw_arrays;
- r300->emit_draw_elements = r500_emit_draw_elements;
- } else {
- r300->emit_draw_arrays_immediate = r300_emit_draw_arrays_immediate;
- r300->emit_draw_arrays = r300_emit_draw_arrays;
- r300->emit_draw_elements = r300_emit_draw_elements;
- }
- } else {
- r300->context.draw_arrays = r300_swtcl_draw_arrays;
- r300->context.draw_elements = r300_draw_elements;
- r300->context.draw_range_elements = r300_swtcl_draw_range_elements;
+ make_empty_list(&r300->query_list);
+
+ util_mempool_create(&r300->pool_transfers,
+ sizeof(struct pipe_transfer), 64,
+ UTIL_MEMPOOL_SINGLETHREADED);
+
+ r300->cs = rws->cs_create(rws);
+ if (r300->cs == NULL)
+ goto fail;
+ if (!r300screen->caps.has_tcl) {
/* Create a Draw. This is used for SW TCL. */
r300->draw = draw_create(&r300->context);
/* Enable our renderer. */
draw_set_rasterize_stage(r300->draw, r300_draw_stage(r300));
- /* Enable Draw's clipping. */
- draw_set_driver_clipping(r300->draw, FALSE);
- /* Force Draw to never do viewport transform, since we can do
- * transform in hardware, always. */
- draw_set_viewport_state(r300->draw, &r300_viewport_identity);
+ /* Disable converting points/lines to triangles. */
+ draw_wide_line_threshold(r300->draw, 10000000.f);
+ draw_wide_point_threshold(r300->draw, 10000000.f);
}
r300_setup_atoms(r300);
- /* Open up the OQ BO. */
- r300->oqbo = pipe_buffer_create(screen,
- R300_BIND_OQBO, 4096);
- make_empty_list(&r300->query_list);
-
+ r300_init_blit_functions(r300);
r300_init_flush_functions(r300);
r300_init_query_functions(r300);
r300_init_state_functions(r300);
r300_init_resource_functions(r300);
- r300->invariant_state.dirty = TRUE;
+ r300->blitter = util_blitter_create(&r300->context);
+ if (r300->blitter == NULL)
+ goto fail;
- rws->set_flush_cb(r300->rws, r300_flush_cb, r300);
- r300->dirty_hw++;
+ /* Render functions must be initialized after blitter. */
+ r300_init_render_functions(r300);
- r300->blitter = util_blitter_create(&r300->context);
+ rws->cs_set_flush(r300->cs, r300_flush_cb, r300);
+
+ /* setup hyper-z mm */
+ if (r300->rws->get_value(r300->rws, R300_CAN_HYPERZ))
+ if (!r300_hyperz_init_mm(r300))
+ goto fail;
r300->upload_ib = u_upload_create(&r300->context,
32 * 1024, 16,
PIPE_BIND_INDEX_BUFFER);
if (r300->upload_ib == NULL)
- goto no_upload_ib;
+ goto fail;
r300->upload_vb = u_upload_create(&r300->context,
128 * 1024, 16,
PIPE_BIND_VERTEX_BUFFER);
if (r300->upload_vb == NULL)
- goto no_upload_vb;
+ goto fail;
+
+ r300->tran.translate_cache = translate_cache_create();
+ if (r300->tran.translate_cache == NULL)
+ goto fail;
+
+ r300_init_states(&r300->context);
+
+ /* The KIL opcode needs the first texture unit to be enabled
+ * on r3xx-r4xx. In order to calm down the CS checker, we bind this
+ * dummy texture there. */
+ if (!r300->screen->caps.is_r500) {
+ struct pipe_resource *tex;
+ struct pipe_resource rtempl = {{0}};
+ struct pipe_sampler_view vtempl = {{0}};
+
+ rtempl.target = PIPE_TEXTURE_2D;
+ rtempl.format = PIPE_FORMAT_I8_UNORM;
+ rtempl.bind = PIPE_BIND_SAMPLER_VIEW;
+ rtempl.width0 = 1;
+ rtempl.height0 = 1;
+ rtempl.depth0 = 1;
+ tex = screen->resource_create(screen, &rtempl);
+
+ u_sampler_view_default_template(&vtempl, tex, tex->format);
+
+ r300->texkill_sampler = (struct r300_sampler_view*)
+ r300->context.create_sampler_view(&r300->context, tex, &vtempl);
+
+ pipe_resource_reference(&tex, NULL);
+ }
return &r300->context;
- no_upload_ib:
- u_upload_destroy(r300->upload_ib);
- no_upload_vb:
- FREE(r300);
+ fail:
+ r300_destroy_context(&r300->context);
return NULL;
}
+
+void r300_finish(struct r300_context *r300)
+{
+ struct pipe_framebuffer_state *fb;
+ unsigned i;
+
+ /* This is a preliminary implementation of glFinish.
+ *
+ * The ideal implementation should use something like EmitIrqLocked and
+ * WaitIrq, or better, real fences.
+ */
+ if (r300->fb_state.state) {
+ fb = r300->fb_state.state;
+
+ for (i = 0; i < fb->nr_cbufs; i++) {
+ if (fb->cbufs[i]->texture) {
+ r300->rws->buffer_wait(r300->rws,
+ r300_texture(fb->cbufs[i]->texture)->buffer);
+ return;
+ }
+ }
+ if (fb->zsbuf && fb->zsbuf->texture) {
+ r300->rws->buffer_wait(r300->rws,
+ r300_texture(fb->zsbuf->texture)->buffer);
+ }
+ }
+}
diff --git a/src/gallium/drivers/r300/r300_context.h b/src/gallium/drivers/r300/r300_context.h
index b6e20591eda..b59bc002610 100644
--- a/src/gallium/drivers/r300/r300_context.h
+++ b/src/gallium/drivers/r300/r300_context.h
@@ -31,14 +31,16 @@
#include "util/u_inlines.h"
#include "util/u_transfer.h"
+#include "translate/translate_cache.h"
+
#include "r300_defines.h"
#include "r300_screen.h"
struct u_upload_mgr;
struct r300_context;
-
struct r300_fragment_shader;
struct r300_vertex_shader;
+struct r300_stencilref_context;
struct r300_atom {
/* List pointers. */
@@ -55,70 +57,103 @@ struct r300_atom {
unsigned size;
/* Whether this atom should be emitted. */
boolean dirty;
+ /* Whether this atom may be emitted with state == NULL. */
+ boolean allow_null_state;
+};
+
+struct r300_aa_state {
+ struct r300_surface *dest;
+
+ uint32_t aa_config;
+ uint32_t aaresolve_ctl;
};
struct r300_blend_state {
- uint32_t blend_control; /* R300_RB3D_CBLEND: 0x4e04 */
- uint32_t alpha_blend_control; /* R300_RB3D_ABLEND: 0x4e08 */
- uint32_t color_channel_mask; /* R300_RB3D_COLOR_CHANNEL_MASK: 0x4e0c */
- uint32_t rop; /* R300_RB3D_ROPCNTL: 0x4e18 */
- uint32_t dither; /* R300_RB3D_DITHER_CTL: 0x4e50 */
+ uint32_t cb[8];
+ uint32_t cb_no_readwrite[8];
};
struct r300_blend_color_state {
- /* RV515 and earlier */
- uint32_t blend_color; /* R300_RB3D_BLEND_COLOR: 0x4e10 */
- /* R520 and newer */
- uint32_t blend_color_red_alpha; /* R500_RB3D_CONSTANT_COLOR_AR: 0x4ef8 */
- uint32_t blend_color_green_blue; /* R500_RB3D_CONSTANT_COLOR_GB: 0x4efc */
+ uint32_t cb[3];
+};
+
+struct r300_clip_state {
+ struct pipe_clip_state clip;
+
+ uint32_t cb[29];
};
struct r300_dsa_state {
+ struct pipe_depth_stencil_alpha_state dsa;
+
+ /* This is actually a command buffer with named dwords. */
+ uint32_t cb_begin;
uint32_t alpha_function; /* R300_FG_ALPHA_FUNC: 0x4bd4 */
- uint32_t alpha_reference; /* R500_FG_ALPHA_VALUE: 0x4be0 */
+ uint32_t cb_reg_seq;
uint32_t z_buffer_control; /* R300_ZB_CNTL: 0x4f00 */
uint32_t z_stencil_control; /* R300_ZB_ZSTENCILCNTL: 0x4f04 */
uint32_t stencil_ref_mask; /* R300_ZB_STENCILREFMASK: 0x4f08 */
+ uint32_t cb_reg;
uint32_t stencil_ref_bf; /* R500_ZB_STENCILREFMASK_BF: 0x4fd4 */
+ /* The second command buffer disables zbuffer reads and writes. */
+ uint32_t cb_no_readwrite[8];
+
/* Whether a two-sided stencil is enabled. */
boolean two_sided;
/* Whether a fallback should be used for a two-sided stencil ref value. */
- boolean stencil_ref_bf_fallback;
+ boolean two_sided_stencil_ref;
};
+struct r300_hyperz_state {
+ int current_func; /* -1 after a clear before first op */
+ int flush;
+ /* This is actually a command buffer with named dwords. */
+ uint32_t cb_flush_begin;
+ uint32_t zb_zcache_ctlstat; /* R300_ZB_CACHE_CNTL */
+ uint32_t cb_begin;
+ uint32_t zb_bw_cntl; /* R300_ZB_BW_CNTL */
+ uint32_t cb_reg1;
+ uint32_t zb_depthclearvalue; /* R300_ZB_DEPTHCLEARVALUE */
+ uint32_t cb_reg2;
+ uint32_t sc_hyperz; /* R300_SC_HYPERZ */
+ uint32_t cb_reg3;
+ uint32_t gb_z_peq_config; /* R300_GB_Z_PEQ_CONFIG: 0x4028 */
+};
+
+struct r300_gpu_flush {
+ uint32_t cb_flush_clean[6];
+};
+
+#define RS_STATE_MAIN_SIZE 23
+
struct r300_rs_state {
- /* Draw-specific rasterizer state */
+ /* Original rasterizer state. */
struct pipe_rasterizer_state rs;
+ /* Draw-specific rasterizer state. */
+ struct pipe_rasterizer_state rs_draw;
+
+ /* Command buffers. */
+ uint32_t cb_main[RS_STATE_MAIN_SIZE];
+ uint32_t cb_poly_offset_zb16[5];
+ uint32_t cb_poly_offset_zb24[5];
+
+ /* The index to cb_main where the cull_mode register value resides. */
+ unsigned cull_mode_index;
- uint32_t vap_control_status; /* R300_VAP_CNTL_STATUS: 0x2140 */
- uint32_t antialiasing_config; /* R300_GB_AA_CONFIG: 0x4020 */
- uint32_t point_size; /* R300_GA_POINT_SIZE: 0x421c */
- uint32_t line_control; /* R300_GA_LINE_CNTL: 0x4234 */
- float depth_scale; /* R300_SU_POLY_OFFSET_FRONT_SCALE: 0x42a4 */
- /* R300_SU_POLY_OFFSET_BACK_SCALE: 0x42ac */
- float depth_offset; /* R300_SU_POLY_OFFSET_FRONT_OFFSET: 0x42a8 */
- /* R300_SU_POLY_OFFSET_BACK_OFFSET: 0x42b0 */
- uint32_t polygon_offset_enable; /* R300_SU_POLY_OFFSET_ENABLE: 0x42b4 */
- uint32_t cull_mode; /* R300_SU_CULL_MODE: 0x42b8 */
- uint32_t line_stipple_config; /* R300_GA_LINE_STIPPLE_CONFIG: 0x4328 */
- uint32_t line_stipple_value; /* R300_GA_LINE_STIPPLE_VALUE: 0x4260 */
+ /* Whether polygon offset is enabled. */
+ boolean polygon_offset_enable;
+
+ /* This is emitted in the draw function. */
uint32_t color_control; /* R300_GA_COLOR_CONTROL: 0x4278 */
- uint32_t polygon_mode; /* R300_GA_POLY_MODE: 0x4288 */
- uint32_t clip_rule; /* R300_SC_CLIP_RULE: 0x43D0 */
-
- /* Specifies top of Raster pipe specific enable controls,
- * i.e. texture coordinates stuffing for points, lines, triangles */
- uint32_t stuffing_enable; /* R300_GB_ENABLE: 0x4008 */
-
- /* Point sprites texture coordinates, 0: lower left, 1: upper right */
- float point_texcoord_left; /* R300_GA_POINT_S0: 0x4200 */
- float point_texcoord_bottom; /* R300_GA_POINT_T0: 0x4204 */
- float point_texcoord_right; /* R300_GA_POINT_S1: 0x4208 */
- float point_texcoord_top; /* R300_GA_POINT_T1: 0x420c */
};
struct r300_rs_block {
+ uint32_t vap_vtx_state_cntl; /* R300_VAP_VTX_STATE_CNTL: 0x2180 */
+ uint32_t vap_vsm_vtx_assm; /* R300_VAP_VSM_VTX_ASSM: 0x2184 */
+ uint32_t vap_out_vtx_fmt[2]; /* R300_VAP_OUTPUT_VTX_FMT_[0-1]: 0x2090 */
+ uint32_t gb_enable;
+
uint32_t ip[8]; /* R300_RS_IP_[0-7], R500_RS_IP_[0-7] */
uint32_t count; /* R300_RS_COUNT */
uint32_t inst_count; /* R300_RS_INST_COUNT */
@@ -130,7 +165,6 @@ struct r300_sampler_state {
uint32_t filter0; /* R300_TX_FILTER0: 0x4400 */
uint32_t filter1; /* R300_TX_FILTER1: 0x4440 */
- uint32_t border_color; /* R300_TX_BORDER_COLOR: 0x45c0 */
/* Min/max LOD must be clamped to [0, last_level], thus
* it's dependent on a currently bound texture */
@@ -147,19 +181,28 @@ struct r300_texture_format_state {
struct r300_sampler_view {
struct pipe_sampler_view base;
+ /* Swizzles in the UTIL_FORMAT_SWIZZLE_* representation,
+ * derived from base. */
+ unsigned char swizzle[4];
+
/* Copy of r300_texture::texture_format_state with format-specific bits
* added. */
struct r300_texture_format_state format;
+
+ /* The texture cache region for this texture. */
+ uint32_t texcache_region;
};
struct r300_texture_fb_state {
- /* Colorbuffer. */
- uint32_t colorpitch[R300_MAX_TEXTURE_LEVELS]; /* R300_RB3D_COLORPITCH[0-3]*/
- uint32_t us_out_fmt; /* R300_US_OUT_FMT[0-3] */
+ uint32_t pitch[R300_MAX_TEXTURE_LEVELS]; /* COLORPITCH or DEPTHPITCH. */
+ uint32_t format; /* US_OUT_FMT or R300_ZB_FORMAT */
+};
- /* Zbuffer. */
- uint32_t depthpitch[R300_MAX_TEXTURE_LEVELS]; /* R300_RB3D_DEPTHPITCH */
- uint32_t zb_format; /* R300_ZB_FORMAT */
+struct r300_texture_sampler_state {
+ struct r300_texture_format_state format;
+ uint32_t filter0; /* R300_TX_FILTER0: 0x4400 */
+ uint32_t filter1; /* R300_TX_FILTER1: 0x4440 */
+ uint32_t border_color; /* R300_TX_BORDER_COLOR: 0x45c0 */
};
struct r300_textures_state {
@@ -173,12 +216,7 @@ struct r300_textures_state {
/* This is the merge of the texture and sampler states. */
unsigned count;
uint32_t tx_enable; /* R300_TX_ENABLE: 0x4101 */
- struct r300_texture_sampler_state {
- struct r300_texture_format_state format;
- uint32_t filter0; /* R300_TX_FILTER0: 0x4400 */
- uint32_t filter1; /* R300_TX_FILTER1: 0x4440 */
- uint32_t border_color; /* R300_TX_BORDER_COLOR: 0x45c0 */
- } regs[16];
+ struct r300_texture_sampler_state regs[16];
};
struct r300_vertex_stream_state {
@@ -190,10 +228,12 @@ struct r300_vertex_stream_state {
unsigned count;
};
-struct r300_vap_output_state {
- uint32_t vap_vtx_state_cntl; /* R300_VAP_VTX_STATE_CNTL: 0x2180 */
- uint32_t vap_vsm_vtx_assm; /* R300_VAP_VSM_VTX_ASSM: 0x2184 */
- uint32_t vap_out_vtx_fmt[2]; /* R300_VAP_OUTPUT_VTX_FMT_[0-1]: 0x2090 */
+struct r300_invariant_state {
+ uint32_t cb[20];
+};
+
+struct r300_vap_invariant_state {
+ uint32_t cb[9];
};
struct r300_viewport_state {
@@ -215,9 +255,9 @@ struct r300_ztop_state {
struct r300_constant_buffer {
/* Buffer of constants */
- float constants[256][4];
- /* Total number of constants */
- unsigned count;
+ uint32_t *ptr;
+ /* Remapping table. */
+ unsigned *remap_table;
};
/* Query object.
@@ -230,38 +270,102 @@ struct r300_constant_buffer {
struct r300_query {
/* The kind of query. Currently only OQ is supported. */
unsigned type;
- /* Whether this query is currently active. Only active queries will
- * get emitted into the command stream, and only active queries get
- * tallied. */
- boolean active;
- /* The current count of this query. Required to be at least 32 bits. */
- unsigned int count;
- /* The offset of this query into the query buffer, in bytes. */
- unsigned offset;
+ /* The number of pipes where query results are stored. */
+ unsigned num_pipes;
+ /* How many results have been written, in dwords. It's incremented
+ * after end_query and flush. */
+ unsigned num_results;
/* if we've flushed the query */
boolean flushed;
/* if begin has been emitted */
boolean begin_emitted;
+
+ /* The buffer where query results are stored. */
+ struct r300_winsys_buffer *buffer;
+ /* The size of the buffer. */
+ unsigned buffer_size;
+ /* The domain of the buffer. */
+ enum r300_buffer_domain domain;
+
/* Linked list members. */
struct r300_query* prev;
struct r300_query* next;
};
-struct r300_texture {
- /* Parent class */
+/* Fence object.
+ *
+ * This is a fake fence. Instead of syncing with the fence, we sync
+ * with the context, which is inefficient but compliant.
+ *
+ * This is not a subclass of pipe_fence_handle because pipe_fence_handle is
+ * never actually fully defined. So, rather than have it as a member, and do
+ * subclass-style casting, we treat pipe_fence_handle as an opaque, and just
+ * trust that our state tracker does not ever mess up fence objects.
+ */
+struct r300_fence {
+ struct pipe_reference reference;
+ struct r300_context *ctx;
+ boolean signalled;
+};
+
+struct r300_surface {
+ struct pipe_surface base;
+
+ /* Winsys buffer backing the texture. */
+ struct r300_winsys_buffer *buffer;
+
+ enum r300_buffer_domain domain;
+
+ uint32_t offset; /* COLOROFFSET or DEPTHOFFSET. */
+ uint32_t pitch; /* COLORPITCH or DEPTHPITCH. */
+ uint32_t format; /* US_OUT_FMT or ZB_FORMAT. */
+
+ /* Parameters dedicated to the CBZB clear. */
+ uint32_t cbzb_width; /* Aligned width. */
+ uint32_t cbzb_height; /* Half of the height. */
+ uint32_t cbzb_midpoint_offset; /* DEPTHOFFSET. */
+ uint32_t cbzb_pitch; /* DEPTHPITCH. */
+ uint32_t cbzb_format; /* ZB_FORMAT. */
+
+ /* Whether the CBZB clear is allowed on the surface. */
+ boolean cbzb_allowed;
+
+};
+
+struct r300_texture_desc {
+ /* Parent class. */
struct u_resource b;
+ /* Width, height, and depth.
+ * Most of the time, these are equal to pipe_texture::width0, height0,
+ * and depth0. However, NPOT 3D textures must have dimensions aligned
+ * to POT, and this is the only case when these variables differ from
+ * pipe_texture. */
+ unsigned width0, height0, depth0;
+
+ /* Buffer tiling.
+ * Macrotiling is specified per-level because small mipmaps cannot
+ * be macrotiled. */
+ enum r300_buffer_tiling microtile;
+ enum r300_buffer_tiling macrotile[R300_MAX_TEXTURE_LEVELS];
+
/* Offsets into the buffer. */
- unsigned offset[R300_MAX_TEXTURE_LEVELS];
+ unsigned offset_in_bytes[R300_MAX_TEXTURE_LEVELS];
- /* A pitch for each mip-level */
- unsigned pitch[R300_MAX_TEXTURE_LEVELS];
+ /* Strides for each mip-level. */
+ unsigned stride_in_pixels[R300_MAX_TEXTURE_LEVELS];
+ unsigned stride_in_bytes[R300_MAX_TEXTURE_LEVELS];
- /* Size of one zslice or face based on the texture target */
- unsigned layer_size[R300_MAX_TEXTURE_LEVELS];
+ /* Size of one zslice or face or 2D image based on the texture target. */
+ unsigned layer_size_in_bytes[R300_MAX_TEXTURE_LEVELS];
- /* Whether the mipmap level is macrotiled. */
- enum r300_buffer_tiling mip_macrotile[R300_MAX_TEXTURE_LEVELS];
+ /* Total size of this texture, in bytes,
+ * derived from the texture properties. */
+ unsigned size_in_bytes;
+
+ /* Total size of the buffer backing this texture, in bytes.
+ * It must be >= size. */
+ unsigned buffer_size_in_bytes;
/**
* If non-zero, override the natural texture layout with
@@ -271,16 +375,24 @@ struct r300_texture {
*
* \sa r300_texture_get_stride
*/
- unsigned stride_override;
+ unsigned stride_in_bytes_override;
- /* Total size of this texture, in bytes. */
- unsigned size;
+ /* Whether this texture has non-power-of-two dimensions.
+ * It can be either a regular texture or a rectangle one. */
+ boolean is_npot;
- /* Whether this texture has non-power-of-two dimensions
- * or a user-specified pitch.
- * It can be either a regular texture or a rectangle one.
- */
- boolean uses_pitch;
+ /* This flag says that hardware must use the stride for addressing
+ * instead of the width. */
+ boolean uses_stride_addressing;
+
+ /* Whether CBZB fast color clear is allowed on the miplevel. */
+ boolean cbzb_allowed[R300_MAX_TEXTURE_LEVELS];
+};
+
+struct r300_texture {
+ struct r300_texture_desc desc;
+
+ enum r300_buffer_domain domain;
/* Pipe buffer backing this texture. */
struct r300_winsys_buffer *buffer;
@@ -291,72 +403,95 @@ struct r300_texture {
/* All bits should be filled in. */
struct r300_texture_fb_state fb_state;
- /* Buffer tiling */
- enum r300_buffer_tiling microtile, macrotile;
-};
+ /* hyper-z memory allocs */
+ struct mem_block *hiz_mem[R300_MAX_TEXTURE_LEVELS];
+ struct mem_block *zmask_mem[R300_MAX_TEXTURE_LEVELS];
+ boolean zmask_in_use[R300_MAX_TEXTURE_LEVELS];
+ boolean hiz_in_use[R300_MAX_TEXTURE_LEVELS];
-struct r300_vertex_info {
- /* Parent class */
- struct vertex_info vinfo;
-
- /* R300_VAP_PROG_STREAK_CNTL_[0-7] */
- uint32_t vap_prog_stream_cntl[8];
- /* R300_VAP_PROG_STREAK_CNTL_EXT_[0-7] */
- uint32_t vap_prog_stream_cntl_ext[8];
+ /* This is the level tiling flags were last time set for.
+ * It's used to prevent redundant tiling-flags changes from happening.*/
+ unsigned surface_level;
};
struct r300_vertex_element_state {
unsigned count;
struct pipe_vertex_element velem[PIPE_MAX_ATTRIBS];
+ /* If (velem[i].src_format != hw_format[i]), the vertex buffer
+ * referenced by this vertex element cannot be used for rendering and
+ * its vertex data must be translated to hw_format[i]. */
+ enum pipe_format hw_format[PIPE_MAX_ATTRIBS];
+ unsigned hw_format_size[PIPE_MAX_ATTRIBS];
+
+ /* The size of the vertex, in dwords. */
+ unsigned vertex_size_dwords;
+
+ /* This might mean two things:
+ * - src_format != hw_format, as discussed above.
+ * - src_offset % 4 != 0. */
+ boolean incompatible_layout;
+
struct r300_vertex_stream_state vertex_stream;
};
-extern struct pipe_viewport_state r300_viewport_identity;
+struct r300_translate_context {
+ /* Translate cache for incompatible vertex offset/stride/format fallback. */
+ struct translate_cache *translate_cache;
+
+ /* The vertex buffer slot containing the translated buffer. */
+ unsigned vb_slot;
+
+ /* Saved and new vertex element state. */
+ void *saved_velems, *new_velems;
+};
struct r300_context {
/* Parent class */
struct pipe_context context;
- /* Emission of drawing packets. */
- void (*emit_draw_arrays_immediate)(
- struct r300_context *r300,
- unsigned mode, unsigned start, unsigned count);
-
- void (*emit_draw_arrays)(
- struct r300_context *r300,
- unsigned mode, unsigned count);
-
- void (*emit_draw_elements)(
- struct r300_context *r300, struct pipe_resource* indexBuffer,
- unsigned indexSize, int indexBias,
- unsigned minIndex, unsigned maxIndex,
- unsigned mode, unsigned start, unsigned count);
-
-
/* The interface to the windowing system, etc. */
struct r300_winsys_screen *rws;
+ /* The command stream. */
+ struct r300_winsys_cs *cs;
/* Screen. */
struct r300_screen *screen;
+
/* Draw module. Used mostly for SW TCL. */
struct draw_context* draw;
+ /* Vertex buffer for SW TCL. */
+ struct pipe_resource* vbo;
+ /* Offset and size into the SW TCL VBO. */
+ size_t draw_vbo_offset;
+ size_t draw_vbo_size;
+ /* Whether the VBO must not be flushed. */
+ boolean draw_vbo_locked;
+ boolean draw_first_emitted;
+
/* Accelerated blit support. */
struct blitter_context* blitter;
+ /* Stencil two-sided reference value fallback. */
+ struct r300_stencilref_context *stencilref_fallback;
+ /* For translating vertex buffers having incompatible vertex layout. */
+ struct r300_translate_context tran;
- /* Vertex buffer for rendering. */
- struct pipe_resource* vbo;
- /* Offset into the VBO. */
- size_t vbo_offset;
+ /* The KIL opcode needs the first texture unit to be enabled
+ * on r3xx-r4xx. In order to calm down the CS checker, we bind this
+ * dummy texture there. */
+ struct r300_sampler_view *texkill_sampler;
- /* Occlusion query buffer. */
- struct pipe_resource* oqbo;
- /* Query list. */
+ /* The currently active query. */
struct r300_query *query_current;
+ /* The saved query for blitter operations. */
+ struct r300_query *blitter_saved_query;
+ /* Query list. */
struct r300_query query_list;
/* Various CSO state objects. */
/* Beginning of atom list. */
struct r300_atom atom_list;
+ /* Anti-aliasing (MSAA) state. */
+ struct r300_atom aa_state;
/* Blend state. */
struct r300_atom blend_state;
/* Blend color state. */
@@ -373,11 +508,15 @@ struct r300_context {
struct r300_atom fs_constants;
/* Framebuffer state. */
struct r300_atom fb_state;
+ /* Framebuffer state (pipelined regs). */
+ struct r300_atom fb_state_pipelined;
+ /* HyperZ state (various SC/ZB bits). */
+ struct r300_atom hyperz_state;
/* Occlusion query. */
struct r300_atom query_start;
/* Rasterizer state. */
struct r300_atom rs_state;
- /* RS block state. */
+ /* RS block state + VAP (vertex shader) output mapping state. */
struct r300_atom rs_block_state;
/* Scissor state. */
struct r300_atom scissor_state;
@@ -385,8 +524,6 @@ struct r300_context {
struct r300_atom textures_state;
/* Vertex stream formatting state. */
struct r300_atom vertex_stream_state;
- /* VAP (vertex shader) output mapping state. */
- struct r300_atom vap_output_state;
/* Vertex shader. */
struct r300_atom vs_state;
/* Vertex shader constant buffer. */
@@ -397,8 +534,16 @@ struct r300_context {
struct r300_atom ztop_state;
/* PVS flush. */
struct r300_atom pvs_flush;
+ /* VAP invariant state. */
+ struct r300_atom vap_invariant_state;
/* Texture cache invalidate. */
struct r300_atom texture_cache_inval;
+ /* GPU flush. */
+ struct r300_atom gpu_flush;
+ /* HiZ clear */
+ struct r300_atom hiz_clear;
+ /* zmask clear */
+ struct r300_atom zmask_clear;
/* Invariant state. This must be emitted to get the engine started. */
struct r300_atom invariant_state;
@@ -411,15 +556,17 @@ struct r300_context {
struct r300_vertex_element_state *velems;
bool any_user_vbs;
+ struct pipe_index_buffer index_buffer;
+
/* Vertex info for Draw. */
struct vertex_info vertex_info;
struct pipe_stencil_ref stencil_ref;
-
- struct pipe_clip_state clip;
-
struct pipe_viewport_state viewport;
+ /* Stream locations for SWTCL. */
+ int stream_loc_notcl[16];
+
/* Flag indicating whether or not the HW is dirty. */
uint32_t dirty_hw;
/* Whether polygon offset is enabled. */
@@ -428,21 +575,43 @@ struct r300_context {
uint32_t zbuffer_bpp;
/* Whether rendering is conditional and should be skipped. */
boolean skip_rendering;
- /* Whether the two-sided stencil ref value is different for front and
- * back faces, and fallback should be used for r3xx-r4xx. */
- boolean stencil_ref_bf_fallback;
/* Point sprites texcoord index, 1 bit per texcoord */
int sprite_coord_enable;
+ /* Whether two-sided color selection is enabled (AKA light_twoside). */
+ boolean two_sided_color;
+ /* Incompatible vertex buffer layout? (misaligned stride or buffer_offset) */
+ boolean incompatible_vb_layout;
+#define R300_Z_COMPRESS_44 1
+#define RV350_Z_COMPRESS_88 2
+ int z_compression;
+ boolean cbzb_clear;
+ boolean z_decomp_rd;
+
+ /* two mem block managers for hiz/zmask ram space */
+ struct mem_block *hiz_mm;
+ struct mem_block *zmask_mm;
/* upload managers */
struct u_upload_mgr *upload_vb;
struct u_upload_mgr *upload_ib;
+ struct util_mempool pool_transfers;
+
/* Stat counter. */
uint64_t flush_counter;
};
-/* Convenience cast wrapper. */
+/* Convenience cast wrappers. */
+static INLINE struct r300_query* r300_query(struct pipe_query* q)
+{
+ return (struct r300_query*)q;
+}
+
+static INLINE struct r300_surface* r300_surface(struct pipe_surface* surf)
+{
+ return (struct r300_surface*)surf;
+}
+
static INLINE struct r300_texture* r300_texture(struct pipe_resource* tex)
{
return (struct r300_texture*)tex;
@@ -461,11 +630,60 @@ static INLINE struct r300_fragment_shader *r300_fs(struct r300_context *r300)
struct pipe_context* r300_create_context(struct pipe_screen* screen,
void *priv);
+void r300_finish(struct r300_context *r300);
+void r300_flush_cb(void *data);
+
/* Context initialization. */
struct draw_stage* r300_draw_stage(struct r300_context* r300);
+void r300_init_blit_functions(struct r300_context *r300);
+void r300_init_flush_functions(struct r300_context* r300);
+void r300_init_query_functions(struct r300_context* r300);
+void r300_init_render_functions(struct r300_context *r300);
void r300_init_state_functions(struct r300_context* r300);
void r300_init_resource_functions(struct r300_context* r300);
+/* r300_blit.c */
+void r300_flush_depth_stencil(struct pipe_context *pipe,
+ struct pipe_resource *dst,
+ struct pipe_subresource subdst,
+ unsigned zslice);
+
+/* r300_query.c */
+void r300_resume_query(struct r300_context *r300,
+ struct r300_query *query);
+void r300_stop_query(struct r300_context *r300);
+
+/* r300_render_translate.c */
+void r300_begin_vertex_translate(struct r300_context *r300);
+void r300_end_vertex_translate(struct r300_context *r300);
+void r300_translate_index_buffer(struct r300_context *r300,
+ struct pipe_resource **index_buffer,
+ unsigned *index_size, unsigned index_offset,
+ unsigned *start, unsigned count);
+
+/* r300_render_stencilref.c */
+void r300_plug_in_stencil_ref_fallback(struct r300_context *r300);
+
+/* r300_render.c */
+void r300_draw_flush_vbuf(struct r300_context *r300);
+boolean r500_index_bias_supported(struct r300_context *r300);
+void r500_emit_index_bias(struct r300_context *r300, int index_bias);
+
+/* r300_state.c */
+enum r300_fb_state_change {
+ R300_CHANGED_FB_STATE = 0,
+ R300_CHANGED_CBZB_FLAG,
+ R300_CHANGED_ZCLEAR_FLAG
+};
+
+void r300_mark_fb_state_dirty(struct r300_context *r300,
+ enum r300_fb_state_change change);
+void r300_mark_fs_code_dirty(struct r300_context *r300);
+
+/* r300_debug.c */
+void r500_dump_rs_block(struct r300_rs_block *rs);
+
+
static INLINE boolean CTX_DBG_ON(struct r300_context * ctx, unsigned flags)
{
return SCREEN_DBG_ON(ctx->screen, flags);
diff --git a/src/gallium/drivers/r300/r300_cs.h b/src/gallium/drivers/r300/r300_cs.h
index 996a4f491e7..c194d6a1b08 100644
--- a/src/gallium/drivers/r300/r300_cs.h
+++ b/src/gallium/drivers/r300/r300_cs.h
@@ -20,176 +20,124 @@
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
* USE OR OTHER DEALINGS IN THE SOFTWARE. */
+/**
+ * This file contains macros for immediate command submission.
+ */
+
#ifndef R300_CS_H
#define R300_CS_H
-#include "util/u_math.h"
-
#include "r300_reg.h"
+#include "r300_context.h"
#include "r300_winsys.h"
/* Yes, I know macros are ugly. However, they are much prettier than the code
* that they neatly hide away, and don't have the cost of function setup,so
* we're going to use them. */
-#define MAX_CS_SIZE 64 * 1024 / 4
-
-#define VERY_VERBOSE_CS 1
-#define VERY_VERBOSE_REGISTERS 1
-
-/* XXX stolen from radeon_drm.h */
-#define RADEON_GEM_DOMAIN_CPU 0x1
-#define RADEON_GEM_DOMAIN_GTT 0x2
-#define RADEON_GEM_DOMAIN_VRAM 0x4
+#ifdef DEBUG
+#define CS_DEBUG(x) x
+#else
+#define CS_DEBUG(x)
+#endif
-/* XXX stolen from radeon_reg.h */
-#define RADEON_CP_PACKET0 0x0
-
-#define CP_PACKET0(register, count) \
- (RADEON_CP_PACKET0 | ((count) << 16) | ((register) >> 2))
+/**
+ * Command submission setup.
+ */
#define CS_LOCALS(context) \
- struct r300_context* const cs_context_copy = (context); \
- struct r300_winsys_screen *cs_winsys = cs_context_copy->rws; \
- int cs_count = 0; (void) cs_count;
-
-#define CHECK_CS(size) \
- assert(cs_winsys->check_cs(cs_winsys, (size)))
+ struct r300_winsys_cs *cs_copy = (context)->cs; \
+ struct r300_winsys_screen *cs_winsys = (context)->rws; \
+ int cs_count = 0; (void) cs_count; (void) cs_winsys;
#define BEGIN_CS(size) do { \
- CHECK_CS(size); \
- if (VERY_VERBOSE_CS) { \
- DBG(cs_context_copy, DBG_CS, "r300: BEGIN_CS, count %d, in %s (%s:%d)\n", \
- size, __FUNCTION__, __FILE__, __LINE__); \
- } \
- cs_winsys->begin_cs(cs_winsys, (size), \
- __FILE__, __FUNCTION__, __LINE__); \
- cs_count = size; \
+ assert(size <= (cs_copy->ndw - cs_copy->cdw)); \
+ CS_DEBUG(cs_count = size;) \
} while (0)
-#define OUT_CS(value) do { \
- if (VERY_VERBOSE_CS || VERY_VERBOSE_REGISTERS) { \
- DBG(cs_context_copy, DBG_CS, "r300: writing %08x\n", value); \
- } \
- cs_winsys->write_cs_dword(cs_winsys, (value)); \
- cs_count--; \
+#ifdef DEBUG
+#define END_CS do { \
+ if (cs_count != 0) \
+ debug_printf("r300: Warning: cs_count off by %d at (%s, %s:%i)\n", \
+ cs_count, __FUNCTION__, __FILE__, __LINE__); \
+ cs_count = 0; \
} while (0)
+#else
+#define END_CS
+#endif
+
-#define OUT_CS_32F(value) do { \
- if (VERY_VERBOSE_CS || VERY_VERBOSE_REGISTERS) { \
- DBG(cs_context_copy, DBG_CS, "r300: writing %f\n", value); \
- } \
- cs_winsys->write_cs_dword(cs_winsys, fui(value)); \
- cs_count--; \
+/**
+ * Writing pure DWORDs.
+ */
+
+#define OUT_CS(value) do { \
+ cs_copy->ptr[cs_copy->cdw++] = (value); \
+ CS_DEBUG(cs_count--;) \
} while (0)
+#define OUT_CS_32F(value) \
+ OUT_CS(fui(value))
+
#define OUT_CS_REG(register, value) do { \
- if (VERY_VERBOSE_REGISTERS) \
- DBG(cs_context_copy, DBG_CS, "r300: writing 0x%08X to register 0x%04X\n", \
- value, register); \
- assert(register); \
- cs_winsys->write_cs_dword(cs_winsys, CP_PACKET0(register, 0)); \
- cs_winsys->write_cs_dword(cs_winsys, value); \
- cs_count -= 2; \
+ OUT_CS(CP_PACKET0(register, 0)); \
+ OUT_CS(value); \
} while (0)
/* Note: This expects count to be the number of registers,
* not the actual packet0 count! */
-#define OUT_CS_REG_SEQ(register, count) do { \
- if (VERY_VERBOSE_REGISTERS) \
- DBG(cs_context_copy, DBG_CS, "r300: writing register sequence of %d to 0x%04X\n", \
- count, register); \
- assert(register); \
- cs_winsys->write_cs_dword(cs_winsys, CP_PACKET0((register), ((count) - 1))); \
- cs_count--; \
-} while (0)
-
-#define OUT_CS_TABLE(values, count) do { \
- if (VERY_VERBOSE_REGISTERS) \
- DBG(cs_context_copy, DBG_CS, "r300: writing table of %d dwords\n", count); \
- cs_winsys->write_cs_table(cs_winsys, values, count); \
- cs_count -= count; \
-} while (0)
+#define OUT_CS_REG_SEQ(register, count) \
+ OUT_CS(CP_PACKET0((register), ((count) - 1)))
-#define OUT_CS_BUF_RELOC(bo, offset, rd, wd, flags) do { \
- DBG(cs_context_copy, DBG_CS, "r300: writing relocation for buffer %p, offset %d, " \
- "domains (%d, %d, %d)\n", \
- bo, offset, rd, wd, flags); \
- assert(bo); \
- cs_winsys->write_cs_dword(cs_winsys, offset); \
- r300_buffer_write_reloc(cs_winsys, r300_buffer(bo), rd, wd, flags); \
- cs_count -= 3; \
-} while (0)
+#define OUT_CS_ONE_REG(register, count) \
+ OUT_CS(CP_PACKET0((register), ((count) - 1)) | RADEON_ONE_REG_WR)
+#define OUT_CS_PKT3(op, count) \
+ OUT_CS(CP_PACKET3(op, count))
-#define OUT_CS_TEX_RELOC(tex, offset, rd, wd, flags) do { \
- DBG(cs_context_copy, DBG_CS, "r300: writing relocation for texture %p, offset %d, " \
- "domains (%d, %d, %d)\n", \
- tex, offset, rd, wd, flags); \
- assert(tex); \
- cs_winsys->write_cs_dword(cs_winsys, offset); \
- r300_texture_write_reloc(cs_winsys, tex, rd, wd, flags); \
- cs_count -= 3; \
+#define OUT_CS_TABLE(values, count) do { \
+ memcpy(cs_copy->ptr + cs_copy->cdw, values, count * 4); \
+ cs_copy->cdw += count; \
+ CS_DEBUG(cs_count -= count;) \
} while (0)
-#define OUT_CS_BUF_RELOC_NO_OFFSET(bo, rd, wd, flags) do { \
- DBG(cs_context_copy, DBG_CS, "r300: writing relocation for buffer %p, " \
- "domains (%d, %d, %d)\n", \
- bo, rd, wd, flags); \
+/**
+ * Writing relocations.
+ */
+
+#define OUT_CS_RELOC(bo, offset, rd, wd) do { \
assert(bo); \
- r300_buffer_write_reloc(cs_winsys, r300_buffer(bo), rd, wd, flags); \
- cs_count -= 2; \
+ OUT_CS(offset); \
+ cs_winsys->cs_write_reloc(cs_copy, bo, rd, wd); \
+ CS_DEBUG(cs_count -= 2;) \
} while (0)
-#define END_CS do { \
- if (VERY_VERBOSE_CS) { \
- DBG(cs_context_copy, DBG_CS, "r300: END_CS in %s (%s:%d)\n", __FUNCTION__, \
- __FILE__, __LINE__); \
- } \
- if (cs_count != 0) \
- debug_printf("r300: Warning: cs_count off by %d\n", cs_count); \
- cs_winsys->end_cs(cs_winsys, __FILE__, __FUNCTION__, __LINE__); \
+#define OUT_CS_BUF_RELOC(bo, offset, rd, wd) do { \
+ assert(bo); \
+ OUT_CS_RELOC(r300_buffer(bo)->buf, offset, rd, wd); \
} while (0)
-#define FLUSH_CS do { \
- if (VERY_VERBOSE_CS) { \
- DBG(cs_context_copy, DBG_CS, "r300: FLUSH_CS in %s (%s:%d)\n\n", __FUNCTION__, \
- __FILE__, __LINE__); \
- } \
- if (SCREEN_DBG_ON(r300->screen, DBG_STATS)) { \
- r300->flush_counter++; \
- } \
- cs_winsys->flush_cs(cs_winsys); \
+#define OUT_CS_TEX_RELOC(tex, offset, rd, wd) do { \
+ assert(tex); \
+ OUT_CS_RELOC(tex->buffer, offset, rd, wd); \
} while (0)
-#define RADEON_ONE_REG_WR (1 << 15)
-
-#define OUT_CS_ONE_REG(register, count) do { \
- if (VERY_VERBOSE_REGISTERS) \
- DBG(cs_context_copy, DBG_CS, "r300: writing data sequence of %d to 0x%04X\n", \
- count, register); \
- assert(register); \
- cs_winsys->write_cs_dword(cs_winsys, CP_PACKET0((register), ((count) - 1)) | RADEON_ONE_REG_WR); \
- cs_count--; \
+#define OUT_CS_BUF_RELOC_NO_OFFSET(bo, rd, wd) do { \
+ assert(bo); \
+ cs_winsys->cs_write_reloc(cs_copy, r300_buffer(bo)->buf, rd, wd); \
+ CS_DEBUG(cs_count -= 2;) \
} while (0)
-#define CP_PACKET3(op, count) \
- (RADEON_CP_PACKET3 | (op) | ((count) << 16))
-#define OUT_CS_PKT3(op, count) do { \
- cs_winsys->write_cs_dword(cs_winsys, CP_PACKET3(op, count)); \
- cs_count--; \
-} while (0)
+/**
+ * Command buffer emission.
+ */
-#define OUT_CS_INDEX_RELOC(bo, offset, count, rd, wd, flags) do { \
- DBG(cs_context_copy, DBG_CS, "r300: writing relocation for index buffer %p," \
- "offset %d\n", bo, offset); \
- assert(bo); \
- cs_winsys->write_cs_dword(cs_winsys, offset); \
- cs_winsys->write_cs_dword(cs_winsys, count); \
- cs_winsys->write_cs_reloc(cs_winsys, bo, rd, wd, flags); \
- cs_count -= 4; \
+#define WRITE_CS_TABLE(values, count) do { \
+ CS_DEBUG(assert(cs_count == 0);) \
+ memcpy(cs_copy->ptr + cs_copy->cdw, (values), (count) * 4); \
+ cs_copy->cdw += (count); \
} while (0)
#endif /* R300_CS_H */
diff --git a/src/gallium/drivers/r300/r300_debug.c b/src/gallium/drivers/r300/r300_debug.c
index aed0ffeeccb..145a7985da3 100644
--- a/src/gallium/drivers/r300/r300_debug.c
+++ b/src/gallium/drivers/r300/r300_debug.c
@@ -22,76 +22,119 @@
#include "r300_context.h"
-#include <stdio.h>
+#include "util/u_debug.h"
-struct debug_option {
- const char * name;
- unsigned flag;
- const char * description;
-};
+#include <stdio.h>
-static struct debug_option debug_options[] = {
- { "help", DBG_HELP, "Helpful meta-information about the driver" },
- { "fp", DBG_FP, "Fragment program handling (for debugging)" },
- { "vp", DBG_VP, "Vertex program handling (for debugging)" },
- { "cs", DBG_CS, "Command submissions (for debugging)" },
- { "draw", DBG_DRAW, "Draw and emit (for debugging)" },
- { "tex", DBG_TEX, "Textures (for debugging)" },
- { "fall", DBG_FALL, "Fallbacks (for debugging)" },
- { "anisohq", DBG_ANISOHQ, "High quality anisotropic filtering (for benchmarking)" },
- { "notiling", DBG_NO_TILING, "Disable tiling (for benchmarking)" },
- { "noimmd", DBG_NO_IMMD, "Disable immediate mode (for benchmarking)" },
- { "stats", DBG_STATS, "Gather statistics (for lulz)" },
-
- { "all", ~0, "Convenience option that enables all debug flags" },
+static const struct debug_named_value debug_options[] = {
+ { "fp", DBG_FP, "Log fragment program compilation" },
+ { "vp", DBG_VP, "Log vertex program compilation" },
+ { "draw", DBG_DRAW, "Log draw calls" },
+ { "swtcl", DBG_SWTCL, "Log SWTCL-specific info" },
+ { "rsblock", DBG_RS_BLOCK, "Log rasterizer registers" },
+ { "psc", DBG_PSC, "Log vertex stream registers" },
+ { "tex", DBG_TEX, "Log basic info about textures" },
+ { "texalloc", DBG_TEXALLOC, "Log texture mipmap tree info" },
+ { "fall", DBG_FALL, "Log fallbacks" },
+ { "rs", DBG_RS, "Log rasterizer" },
+ { "fb", DBG_FB, "Log framebuffer" },
+ { "cbzb", DBG_CBZB, "Log fast color clear info" },
+ { "stats", DBG_STATS, "Log emission statistics" },
+ { "hyperz", DBG_HYPERZ, "Log HyperZ info" },
+ { "scissor", DBG_SCISSOR, "Log scissor info" },
+ { "fakeocc", DBG_FAKE_OCC, "Use fake occlusion queries" },
+ { "anisohq", DBG_ANISOHQ, "Use high quality anisotropic filtering" },
+ { "notiling", DBG_NO_TILING, "Disable tiling" },
+ { "noimmd", DBG_NO_IMMD, "Disable immediate mode" },
+ { "noopt", DBG_NO_OPT, "Disable shader optimizations" },
+ { "nocbzb", DBG_NO_CBZB, "Disable fast color clear" },
/* must be last */
- { 0, 0, 0 }
+ DEBUG_NAMED_VALUE_END
};
void r300_init_debug(struct r300_screen * screen)
{
- const char * options = debug_get_option("RADEON_DEBUG", 0);
- boolean printhint = FALSE;
- size_t length;
- struct debug_option * opt;
-
- if (options) {
- while(*options) {
- if (*options == ' ' || *options == ',') {
- options++;
- continue;
- }
+ screen->debug = debug_get_flags_option("RADEON_DEBUG", debug_options, 0);
+}
- length = strcspn(options, " ,");
+void r500_dump_rs_block(struct r300_rs_block *rs)
+{
+ unsigned count, ip, it_count, ic_count, i, j;
+ unsigned tex_ptr;
+ unsigned col_ptr, col_fmt;
- for(opt = debug_options; opt->name; ++opt) {
- if (!strncmp(options, opt->name, length)) {
- screen->debug |= opt->flag;
- break;
- }
- }
+ count = rs->inst_count & 0xf;
+ count++;
- if (!opt->name) {
- fprintf(stderr, "Unknown debug option: %s\n", options);
- printhint = TRUE;
- }
+ it_count = rs->count & 0x7f;
+ ic_count = (rs->count >> 7) & 0xf;
+
+ fprintf(stderr, "RS Block: %d texcoords (linear), %d colors (perspective)\n",
+ it_count, ic_count);
+ fprintf(stderr, "%d instructions\n", count);
+
+ for (i = 0; i < count; i++) {
+ if (rs->inst[i] & 0x10) {
+ ip = rs->inst[i] & 0xf;
+ fprintf(stderr, "texture: ip %d to psf %d\n",
+ ip, (rs->inst[i] >> 5) & 0x7f);
+
+ tex_ptr = rs->ip[ip] & 0xffffff;
+ fprintf(stderr, " : ");
- options += length;
+ j = 3;
+ do {
+ if ((tex_ptr & 0x3f) == 63) {
+ fprintf(stderr, "1.0");
+ } else if ((tex_ptr & 0x3f) == 62) {
+ fprintf(stderr, "0.0");
+ } else {
+ fprintf(stderr, "[%d]", tex_ptr & 0x3f);
+ }
+ } while (j-- && fprintf(stderr, "/"));
+ fprintf(stderr, "\n");
}
- if (!screen->debug)
- printhint = TRUE;
- }
+ if (rs->inst[i] & 0x10000) {
+ ip = (rs->inst[i] >> 12) & 0xf;
+ fprintf(stderr, "color: ip %d to psf %d\n",
+ ip, (rs->inst[i] >> 18) & 0x7f);
- if (printhint || screen->debug & DBG_HELP) {
- fprintf(stderr, "You can enable debug output by setting "
- "the RADEON_DEBUG environment variable\n"
- "to a comma-separated list of debug options. "
- "Available options are:\n");
+ col_ptr = (rs->ip[ip] >> 24) & 0x7;
+ col_fmt = (rs->ip[ip] >> 27) & 0xf;
+ fprintf(stderr, " : offset %d ", col_ptr);
- for(opt = debug_options; opt->name; ++opt) {
- fprintf(stderr, " %s: %s\n", opt->name, opt->description);
+ switch (col_fmt) {
+ case 0:
+ fprintf(stderr, "(R/G/B/A)");
+ break;
+ case 1:
+ fprintf(stderr, "(R/G/B/0)");
+ break;
+ case 2:
+ fprintf(stderr, "(R/G/B/1)");
+ break;
+ case 4:
+ fprintf(stderr, "(0/0/0/A)");
+ break;
+ case 5:
+ fprintf(stderr, "(0/0/0/0)");
+ break;
+ case 6:
+ fprintf(stderr, "(0/0/0/1)");
+ break;
+ case 8:
+ fprintf(stderr, "(1/1/1/A)");
+ break;
+ case 9:
+ fprintf(stderr, "(1/1/1/0)");
+ break;
+ case 10:
+ fprintf(stderr, "(1/1/1/1)");
+ break;
+ }
+ fprintf(stderr, "\n");
}
}
}
diff --git a/src/gallium/drivers/r300/r300_defines.h b/src/gallium/drivers/r300/r300_defines.h
index 98ee3c1edec..896aeef395d 100644
--- a/src/gallium/drivers/r300/r300_defines.h
+++ b/src/gallium/drivers/r300/r300_defines.h
@@ -30,17 +30,21 @@
#define R300_RESOURCE_FLAG_TRANSFER PIPE_RESOURCE_FLAG_DRV_PRIV
-/* XXX: this is just a bandaid on larger problems in
- * r300_screen_buffer.h which doesn't seem to be fully ported to
- * gallium-resources.
- */
-#define R300_BIND_OQBO (1<<21)
+#define R300_INVALID_FORMAT 0xffff
/* Tiling flags. */
enum r300_buffer_tiling {
R300_BUFFER_LINEAR = 0,
R300_BUFFER_TILED,
- R300_BUFFER_SQUARETILED
+ R300_BUFFER_SQUARETILED,
+
+ R300_BUFFER_UNKNOWN,
+ R300_BUFFER_SELECT_LAYOUT = R300_BUFFER_UNKNOWN
+};
+
+enum r300_buffer_domain { /* bitfield */
+ R300_DOMAIN_GTT = 1,
+ R300_DOMAIN_VRAM = 2
};
#endif
diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c
index 92b7517b8d6..3a1085d2dc5 100644
--- a/src/gallium/drivers/r300/r300_emit.c
+++ b/src/gallium/drivers/r300/r300_emit.c
@@ -25,6 +25,7 @@
#include "util/u_format.h"
#include "util/u_math.h"
+#include "util/u_mm.h"
#include "util/u_simple_list.h"
#include "r300_context.h"
@@ -43,21 +44,11 @@ void r300_emit_blend_state(struct r300_context* r300,
(struct pipe_framebuffer_state*)r300->fb_state.state;
CS_LOCALS(r300);
- BEGIN_CS(size);
- OUT_CS_REG(R300_RB3D_ROPCNTL, blend->rop);
- OUT_CS_REG_SEQ(R300_RB3D_CBLEND, 3);
if (fb->nr_cbufs) {
- OUT_CS(blend->blend_control);
- OUT_CS(blend->alpha_blend_control);
- OUT_CS(blend->color_channel_mask);
+ WRITE_CS_TABLE(blend->cb, size);
} else {
- OUT_CS(0);
- OUT_CS(0);
- OUT_CS(0);
- /* XXX also disable fastfill here once it's supported */
+ WRITE_CS_TABLE(blend->cb_no_readwrite, size);
}
- OUT_CS_REG(R300_RB3D_DITHER_CTL, blend->dither);
- END_CS;
}
void r300_emit_blend_color_state(struct r300_context* r300,
@@ -66,40 +57,16 @@ void r300_emit_blend_color_state(struct r300_context* r300,
struct r300_blend_color_state* bc = (struct r300_blend_color_state*)state;
CS_LOCALS(r300);
- if (r300->screen->caps.is_r500) {
- BEGIN_CS(size);
- OUT_CS_REG_SEQ(R500_RB3D_CONSTANT_COLOR_AR, 2);
- OUT_CS(bc->blend_color_red_alpha);
- OUT_CS(bc->blend_color_green_blue);
- END_CS;
- } else {
- BEGIN_CS(size);
- OUT_CS_REG(R300_RB3D_BLEND_COLOR, bc->blend_color);
- END_CS;
- }
+ WRITE_CS_TABLE(bc->cb, size);
}
void r300_emit_clip_state(struct r300_context* r300,
unsigned size, void* state)
{
- struct pipe_clip_state* clip = (struct pipe_clip_state*)state;
+ struct r300_clip_state* clip = (struct r300_clip_state*)state;
CS_LOCALS(r300);
- if (r300->screen->caps.has_tcl) {
- BEGIN_CS(size);
- OUT_CS_REG(R300_VAP_PVS_VECTOR_INDX_REG,
- (r300->screen->caps.is_r500 ?
- R500_PVS_UCP_START : R300_PVS_UCP_START));
- OUT_CS_ONE_REG(R300_VAP_PVS_UPLOAD_DATA, 6 * 4);
- OUT_CS_TABLE(clip->ucp, 6 * 4);
- OUT_CS_REG(R300_VAP_CLIP_CNTL, ((1 << clip->nr) - 1) |
- R300_PS_UCP_MODE_CLIP_AS_TRIFAN);
- END_CS;
- } else {
- BEGIN_CS(size);
- OUT_CS_REG(R300_VAP_CLIP_CNTL, R300_CLIP_DISABLE);
- END_CS;
- }
+ WRITE_CS_TABLE(clip->cb, size);
}
void r300_emit_dsa_state(struct r300_context* r300, unsigned size, void* state)
@@ -107,37 +74,22 @@ void r300_emit_dsa_state(struct r300_context* r300, unsigned size, void* state)
struct r300_dsa_state* dsa = (struct r300_dsa_state*)state;
struct pipe_framebuffer_state* fb =
(struct pipe_framebuffer_state*)r300->fb_state.state;
- struct pipe_stencil_ref stencil_ref = r300->stencil_ref;
CS_LOCALS(r300);
- BEGIN_CS(size);
- OUT_CS_REG(R300_FG_ALPHA_FUNC, dsa->alpha_function);
- OUT_CS_REG_SEQ(R300_ZB_CNTL, 3);
-
if (fb->zsbuf) {
- OUT_CS(dsa->z_buffer_control);
- OUT_CS(dsa->z_stencil_control);
+ WRITE_CS_TABLE(&dsa->cb_begin, size);
} else {
- OUT_CS(0);
- OUT_CS(0);
- }
-
- OUT_CS(dsa->stencil_ref_mask | stencil_ref.ref_value[0]);
-
- if (r300->screen->caps.is_r500) {
- OUT_CS_REG(R500_ZB_STENCILREFMASK_BF, dsa->stencil_ref_bf | stencil_ref.ref_value[1]);
+ WRITE_CS_TABLE(dsa->cb_no_readwrite, size);
}
- END_CS;
}
static const float * get_rc_constant_state(
struct r300_context * r300,
struct rc_constant * constant)
{
- struct r300_viewport_state* viewport = r300->viewport_state.state;
struct r300_textures_state* texstate = r300->textures_state.state;
static float vec[4] = { 0.0, 0.0, 0.0, 1.0 };
- struct pipe_resource *tex;
+ struct r300_texture *tex;
assert(constant->Type == RC_CONSTANT_STATE);
@@ -145,21 +97,29 @@ static const float * get_rc_constant_state(
/* Factor for converting rectangle coords to
* normalized coords. Should only show up on non-r500. */
case RC_STATE_R300_TEXRECT_FACTOR:
- tex = texstate->sampler_views[constant->u.State[1]]->base.texture;
- vec[0] = 1.0 / tex->width0;
- vec[1] = 1.0 / tex->height0;
+ tex = r300_texture(texstate->sampler_views[constant->u.State[1]]->base.texture);
+ vec[0] = 1.0 / tex->desc.width0;
+ vec[1] = 1.0 / tex->desc.height0;
+ break;
+
+ case RC_STATE_R300_TEXSCALE_FACTOR:
+ tex = r300_texture(texstate->sampler_views[constant->u.State[1]]->base.texture);
+ /* Add a small number to the texture size to work around rounding errors in hw. */
+ vec[0] = tex->desc.b.b.width0 / (tex->desc.width0 + 0.001f);
+ vec[1] = tex->desc.b.b.height0 / (tex->desc.height0 + 0.001f);
+ vec[2] = tex->desc.b.b.depth0 / (tex->desc.depth0 + 0.001f);
break;
case RC_STATE_R300_VIEWPORT_SCALE:
- vec[0] = viewport->xscale;
- vec[1] = viewport->yscale;
- vec[2] = viewport->zscale;
+ vec[0] = r300->viewport.scale[0];
+ vec[1] = r300->viewport.scale[1];
+ vec[2] = r300->viewport.scale[2];
break;
case RC_STATE_R300_VIEWPORT_OFFSET:
- vec[0] = viewport->xoffset;
- vec[1] = viewport->yoffset;
- vec[2] = viewport->zoffset;
+ vec[0] = r300->viewport.translate[0];
+ vec[1] = r300->viewport.translate[1];
+ vec[2] = r300->viewport.translate[2];
break;
default:
@@ -176,7 +136,7 @@ static const float * get_rc_constant_state(
/* Convert a normal single-precision float into the 7.16 format
* used by the R300 fragment shader.
*/
-static uint32_t pack_float24(float f)
+uint32_t pack_float24(float f)
{
union {
float fl;
@@ -207,85 +167,20 @@ static uint32_t pack_float24(float f)
return float24;
}
-unsigned r300_get_fs_atom_size(struct r300_context *r300)
-{
- struct r300_fragment_shader *fs = r300_fs(r300);
- unsigned imm_count = fs->shader->immediates_count;
- struct r300_fragment_program_code *code = &fs->shader->code.code.r300;
-
- return 19 +
- code->alu.length * 4 +
- (code->tex.length ? (1 + code->tex.length) : 0) +
- (imm_count ? imm_count * 5 : 0);
-}
-
void r300_emit_fs(struct r300_context* r300, unsigned size, void *state)
{
struct r300_fragment_shader *fs = r300_fs(r300);
- struct rX00_fragment_program_code* generic_code = &fs->shader->code;
- struct r300_fragment_program_code * code = &generic_code->code.r300;
- unsigned i;
- unsigned imm_count = fs->shader->immediates_count;
- unsigned imm_first = fs->shader->externals_count;
- unsigned imm_end = generic_code->constants.Count;
- struct rc_constant *constants = generic_code->constants.Constants;
CS_LOCALS(r300);
- BEGIN_CS(size);
- OUT_CS_REG(R300_US_CONFIG, code->config);
- OUT_CS_REG(R300_US_PIXSIZE, code->pixsize);
- OUT_CS_REG(R300_US_CODE_OFFSET, code->code_offset);
-
- OUT_CS_REG_SEQ(R300_US_CODE_ADDR_0, 4);
- OUT_CS_TABLE(code->code_addr, 4);
-
- OUT_CS_REG_SEQ(R300_US_ALU_RGB_INST_0, code->alu.length);
- for (i = 0; i < code->alu.length; i++)
- OUT_CS(code->alu.inst[i].rgb_inst);
-
- OUT_CS_REG_SEQ(R300_US_ALU_RGB_ADDR_0, code->alu.length);
- for (i = 0; i < code->alu.length; i++)
- OUT_CS(code->alu.inst[i].rgb_addr);
-
- OUT_CS_REG_SEQ(R300_US_ALU_ALPHA_INST_0, code->alu.length);
- for (i = 0; i < code->alu.length; i++)
- OUT_CS(code->alu.inst[i].alpha_inst);
-
- OUT_CS_REG_SEQ(R300_US_ALU_ALPHA_ADDR_0, code->alu.length);
- for (i = 0; i < code->alu.length; i++)
- OUT_CS(code->alu.inst[i].alpha_addr);
-
- if (code->tex.length) {
- OUT_CS_REG_SEQ(R300_US_TEX_INST_0, code->tex.length);
- OUT_CS_TABLE(code->tex.inst, code->tex.length);
- }
-
- /* Emit immediates. */
- if (imm_count) {
- for(i = imm_first; i < imm_end; ++i) {
- if (constants[i].Type == RC_CONSTANT_IMMEDIATE) {
- const float *data = constants[i].u.Immediate;
-
- OUT_CS_REG_SEQ(R300_PFS_PARAM_0_X + i * 16, 4);
- OUT_CS(pack_float24(data[0]));
- OUT_CS(pack_float24(data[1]));
- OUT_CS(pack_float24(data[2]));
- OUT_CS(pack_float24(data[3]));
- }
- }
- }
-
- OUT_CS_REG(R300_FG_DEPTH_SRC, fs->shader->fg_depth_src);
- OUT_CS_REG(R300_US_W_FMT, fs->shader->us_out_w);
- END_CS;
+ WRITE_CS_TABLE(fs->shader->cb_code, fs->shader->cb_code_size);
}
void r300_emit_fs_constants(struct r300_context* r300, unsigned size, void *state)
{
struct r300_fragment_shader *fs = r300_fs(r300);
- struct rc_constant_list *constants = &fs->shader->code.constants;
struct r300_constant_buffer *buf = (struct r300_constant_buffer*)state;
- unsigned i, count = fs->shader->externals_count;
+ unsigned count = fs->shader->externals_count;
+ unsigned i, j;
CS_LOCALS(r300);
if (count == 0)
@@ -293,15 +188,18 @@ void r300_emit_fs_constants(struct r300_context* r300, unsigned size, void *stat
BEGIN_CS(size);
OUT_CS_REG_SEQ(R300_PFS_PARAM_0_X, count * 4);
- for(i = 0; i < count; ++i) {
- const float *data;
- assert(constants->Constants[i].Type == RC_CONSTANT_EXTERNAL);
- data = buf->constants[i];
- OUT_CS(pack_float24(data[0]));
- OUT_CS(pack_float24(data[1]));
- OUT_CS(pack_float24(data[2]));
- OUT_CS(pack_float24(data[3]));
+ if (buf->remap_table){
+ for (i = 0; i < count; i++) {
+ float *data = (float*)&buf->ptr[buf->remap_table[i]*4];
+ for (j = 0; j < 4; j++)
+ OUT_CS(pack_float24(data[j]));
+ }
+ } else {
+ for (i = 0; i < count; i++)
+ for (j = 0; j < 4; j++)
+ OUT_CS(pack_float24(*(float*)&buf->ptr[i*4+j]));
}
+
END_CS;
}
@@ -313,6 +211,7 @@ void r300_emit_fs_rc_constant_state(struct r300_context* r300, unsigned size, vo
unsigned count = fs->shader->rc_state_count;
unsigned first = fs->shader->externals_count;
unsigned end = constants->Count;
+ unsigned j;
CS_LOCALS(r300);
if (count == 0)
@@ -325,84 +224,26 @@ void r300_emit_fs_rc_constant_state(struct r300_context* r300, unsigned size, vo
get_rc_constant_state(r300, &constants->Constants[i]);
OUT_CS_REG_SEQ(R300_PFS_PARAM_0_X + i * 16, 4);
- OUT_CS(pack_float24(data[0]));
- OUT_CS(pack_float24(data[1]));
- OUT_CS(pack_float24(data[2]));
- OUT_CS(pack_float24(data[3]));
+ for (j = 0; j < 4; j++)
+ OUT_CS(pack_float24(data[j]));
}
}
END_CS;
}
-unsigned r500_get_fs_atom_size(struct r300_context *r300)
-{
- struct r300_fragment_shader *fs = r300_fs(r300);
- unsigned imm_count = fs->shader->immediates_count;
- struct r500_fragment_program_code *code = &fs->shader->code.code.r500;
-
- return 17 +
- ((code->inst_end + 1) * 6) +
- (imm_count ? imm_count * 7 : 0);
-}
-
void r500_emit_fs(struct r300_context* r300, unsigned size, void *state)
{
struct r300_fragment_shader *fs = r300_fs(r300);
- struct rX00_fragment_program_code* generic_code = &fs->shader->code;
- struct r500_fragment_program_code * code = &generic_code->code.r500;
- unsigned i;
- unsigned imm_count = fs->shader->immediates_count;
- unsigned imm_first = fs->shader->externals_count;
- unsigned imm_end = generic_code->constants.Count;
- struct rc_constant *constants = generic_code->constants.Constants;
CS_LOCALS(r300);
- BEGIN_CS(size);
- OUT_CS_REG(R500_US_CONFIG, R500_ZERO_TIMES_ANYTHING_EQUALS_ZERO);
- OUT_CS_REG(R500_US_PIXSIZE, code->max_temp_idx);
- OUT_CS_REG(R500_US_CODE_RANGE,
- R500_US_CODE_RANGE_ADDR(0) | R500_US_CODE_RANGE_SIZE(code->inst_end));
- OUT_CS_REG(R500_US_CODE_OFFSET, 0);
- OUT_CS_REG(R500_US_CODE_ADDR,
- R500_US_CODE_START_ADDR(0) | R500_US_CODE_END_ADDR(code->inst_end));
-
- OUT_CS_REG(R500_GA_US_VECTOR_INDEX, R500_GA_US_VECTOR_INDEX_TYPE_INSTR);
- OUT_CS_ONE_REG(R500_GA_US_VECTOR_DATA, (code->inst_end + 1) * 6);
- for (i = 0; i <= code->inst_end; i++) {
- OUT_CS(code->inst[i].inst0);
- OUT_CS(code->inst[i].inst1);
- OUT_CS(code->inst[i].inst2);
- OUT_CS(code->inst[i].inst3);
- OUT_CS(code->inst[i].inst4);
- OUT_CS(code->inst[i].inst5);
- }
-
- /* Emit immediates. */
- if (imm_count) {
- for(i = imm_first; i < imm_end; ++i) {
- if (constants[i].Type == RC_CONSTANT_IMMEDIATE) {
- const float *data = constants[i].u.Immediate;
-
- OUT_CS_REG(R500_GA_US_VECTOR_INDEX,
- R500_GA_US_VECTOR_INDEX_TYPE_CONST |
- (i & R500_GA_US_VECTOR_INDEX_MASK));
- OUT_CS_ONE_REG(R500_GA_US_VECTOR_DATA, 4);
- OUT_CS_TABLE(data, 4);
- }
- }
- }
-
- OUT_CS_REG(R300_FG_DEPTH_SRC, fs->shader->fg_depth_src);
- OUT_CS_REG(R300_US_W_FMT, fs->shader->us_out_w);
- END_CS;
+ WRITE_CS_TABLE(fs->shader->cb_code, fs->shader->cb_code_size);
}
void r500_emit_fs_constants(struct r300_context* r300, unsigned size, void *state)
{
struct r300_fragment_shader *fs = r300_fs(r300);
- struct rc_constant_list *constants = &fs->shader->code.constants;
struct r300_constant_buffer *buf = (struct r300_constant_buffer*)state;
- unsigned i, count = fs->shader->externals_count;
+ unsigned count = fs->shader->externals_count;
CS_LOCALS(r300);
if (count == 0)
@@ -411,10 +252,14 @@ void r500_emit_fs_constants(struct r300_context* r300, unsigned size, void *stat
BEGIN_CS(size);
OUT_CS_REG(R500_GA_US_VECTOR_INDEX, R500_GA_US_VECTOR_INDEX_TYPE_CONST);
OUT_CS_ONE_REG(R500_GA_US_VECTOR_DATA, count * 4);
- for(i = 0; i < count; ++i) {
- assert(constants->Constants[i].Type == RC_CONSTANT_EXTERNAL);
+ if (buf->remap_table){
+ for (unsigned i = 0; i < count; i++) {
+ uint32_t *data = &buf->ptr[buf->remap_table[i]*4];
+ OUT_CS_TABLE(data, 4);
+ }
+ } else {
+ OUT_CS_TABLE(buf->ptr, count * 4);
}
- OUT_CS_TABLE(buf->constants, count * 4);
END_CS;
}
@@ -447,86 +292,236 @@ void r500_emit_fs_rc_constant_state(struct r300_context* r300, unsigned size, vo
END_CS;
}
+void r300_emit_gpu_flush(struct r300_context *r300, unsigned size, void *state)
+{
+ struct r300_gpu_flush *gpuflush = (struct r300_gpu_flush*)state;
+ struct pipe_framebuffer_state* fb =
+ (struct pipe_framebuffer_state*)r300->fb_state.state;
+ uint32_t height = fb->height;
+ uint32_t width = fb->width;
+ CS_LOCALS(r300);
+
+ if (r300->cbzb_clear) {
+ struct r300_surface *surf = r300_surface(fb->cbufs[0]);
+
+ height = surf->cbzb_height;
+ width = surf->cbzb_width;
+ }
+
+ DBG(r300, DBG_SCISSOR,
+ "r300: Scissor width: %i, height: %i, CBZB clear: %s\n",
+ width, height, r300->cbzb_clear ? "YES" : "NO");
+
+ BEGIN_CS(size);
+
+ /* Set up scissors.
+ * By writing to the SC registers, SC & US assert idle. */
+ OUT_CS_REG_SEQ(R300_SC_SCISSORS_TL, 2);
+ if (r300->screen->caps.is_r500) {
+ OUT_CS(0);
+ OUT_CS(((width - 1) << R300_SCISSORS_X_SHIFT) |
+ ((height - 1) << R300_SCISSORS_Y_SHIFT));
+ } else {
+ OUT_CS((1440 << R300_SCISSORS_X_SHIFT) |
+ (1440 << R300_SCISSORS_Y_SHIFT));
+ OUT_CS(((width + 1440-1) << R300_SCISSORS_X_SHIFT) |
+ ((height + 1440-1) << R300_SCISSORS_Y_SHIFT));
+ }
+
+ /* Flush CB & ZB caches and wait until the 3D engine is idle and clean. */
+ OUT_CS_TABLE(gpuflush->cb_flush_clean, 6);
+ END_CS;
+}
+
+void r300_emit_aa_state(struct r300_context *r300, unsigned size, void *state)
+{
+ struct r300_aa_state *aa = (struct r300_aa_state*)state;
+ CS_LOCALS(r300);
+
+ BEGIN_CS(size);
+ OUT_CS_REG(R300_GB_AA_CONFIG, aa->aa_config);
+
+ if (aa->dest) {
+ OUT_CS_REG_SEQ(R300_RB3D_AARESOLVE_OFFSET, 1);
+ OUT_CS_RELOC(aa->dest->buffer, aa->dest->offset, 0, aa->dest->domain);
+
+ OUT_CS_REG_SEQ(R300_RB3D_AARESOLVE_PITCH, 1);
+ OUT_CS_RELOC(aa->dest->buffer, aa->dest->pitch, 0, aa->dest->domain);
+ }
+
+ OUT_CS_REG(R300_RB3D_AARESOLVE_CTL, aa->aaresolve_ctl);
+ END_CS;
+}
+
void r300_emit_fb_state(struct r300_context* r300, unsigned size, void* state)
{
struct pipe_framebuffer_state* fb = (struct pipe_framebuffer_state*)state;
- struct r300_texture* tex;
- struct pipe_surface* surf;
- int i;
+ struct r300_surface* surf;
+ unsigned i;
+ boolean has_hyperz = r300->rws->get_value(r300->rws, R300_CAN_HYPERZ);
CS_LOCALS(r300);
BEGIN_CS(size);
- /* Flush and free renderbuffer caches. */
- OUT_CS_REG(R300_RB3D_DSTCACHE_CTLSTAT,
- R300_RB3D_DSTCACHE_CTLSTAT_DC_FREE_FREE_3D_TAGS |
- R300_RB3D_DSTCACHE_CTLSTAT_DC_FLUSH_FLUSH_DIRTY_3D);
- OUT_CS_REG(R300_ZB_ZCACHE_CTLSTAT,
- R300_ZB_ZCACHE_CTLSTAT_ZC_FLUSH_FLUSH_AND_FREE |
- R300_ZB_ZCACHE_CTLSTAT_ZC_FREE_FREE);
-
- /* Set the number of colorbuffers. */
- if (fb->nr_cbufs > 1) {
- if (r300->screen->caps.is_r500) {
- OUT_CS_REG(R300_RB3D_CCTL,
- R300_RB3D_CCTL_NUM_MULTIWRITES(fb->nr_cbufs) |
- R300_RB3D_CCTL_INDEPENDENT_COLORFORMAT_ENABLE_ENABLE);
- } else {
- OUT_CS_REG(R300_RB3D_CCTL,
- R300_RB3D_CCTL_NUM_MULTIWRITES(fb->nr_cbufs));
- }
+ /* NUM_MULTIWRITES replicates COLOR[0] to all colorbuffers, which is not
+ * what we usually want. */
+ if (r300->screen->caps.is_r500) {
+ OUT_CS_REG(R300_RB3D_CCTL,
+ R300_RB3D_CCTL_INDEPENDENT_COLORFORMAT_ENABLE_ENABLE);
} else {
- OUT_CS_REG(R300_RB3D_CCTL, 0x0);
+ OUT_CS_REG(R300_RB3D_CCTL, 0);
}
/* Set up colorbuffers. */
for (i = 0; i < fb->nr_cbufs; i++) {
- surf = fb->cbufs[i];
- tex = r300_texture(surf->texture);
- assert(tex && tex->buffer && "cbuf is marked, but NULL!");
+ surf = r300_surface(fb->cbufs[i]);
OUT_CS_REG_SEQ(R300_RB3D_COLOROFFSET0 + (4 * i), 1);
- OUT_CS_TEX_RELOC(tex, surf->offset, 0, RADEON_GEM_DOMAIN_VRAM, 0);
+ OUT_CS_RELOC(surf->buffer, surf->offset, 0, surf->domain);
OUT_CS_REG_SEQ(R300_RB3D_COLORPITCH0 + (4 * i), 1);
- OUT_CS_TEX_RELOC(tex, tex->fb_state.colorpitch[surf->level],
- 0, RADEON_GEM_DOMAIN_VRAM, 0);
-
- OUT_CS_REG(R300_US_OUT_FMT_0 + (4 * i), tex->fb_state.us_out_fmt);
- }
- for (; i < 4; i++) {
- OUT_CS_REG(R300_US_OUT_FMT_0 + (4 * i), R300_US_OUT_FMT_UNUSED);
+ OUT_CS_RELOC(surf->buffer, surf->pitch, 0, surf->domain);
}
- /* Set up a zbuffer. */
- if (fb->zsbuf) {
- surf = fb->zsbuf;
- tex = r300_texture(surf->texture);
- assert(tex && tex->buffer && "zsbuf is marked, but NULL!");
+ /* Set up the ZB part of the CBZB clear. */
+ if (r300->cbzb_clear) {
+ surf = r300_surface(fb->cbufs[0]);
+
+ OUT_CS_REG(R300_ZB_FORMAT, surf->cbzb_format);
OUT_CS_REG_SEQ(R300_ZB_DEPTHOFFSET, 1);
- OUT_CS_TEX_RELOC(tex, surf->offset, 0, RADEON_GEM_DOMAIN_VRAM, 0);
+ OUT_CS_RELOC(surf->buffer, surf->cbzb_midpoint_offset, 0, surf->domain);
+
+ OUT_CS_REG_SEQ(R300_ZB_DEPTHPITCH, 1);
+ OUT_CS_RELOC(surf->buffer, surf->cbzb_pitch, 0, surf->domain);
+
+ DBG(r300, DBG_CBZB,
+ "CBZB clearing cbuf %08x %08x\n", surf->cbzb_format,
+ surf->cbzb_pitch);
+ }
+ /* Set up a zbuffer. */
+ else if (fb->zsbuf) {
+ surf = r300_surface(fb->zsbuf);
- OUT_CS_REG(R300_ZB_FORMAT, tex->fb_state.zb_format);
+ OUT_CS_REG(R300_ZB_FORMAT, surf->format);
+
+ OUT_CS_REG_SEQ(R300_ZB_DEPTHOFFSET, 1);
+ OUT_CS_RELOC(surf->buffer, surf->offset, 0, surf->domain);
OUT_CS_REG_SEQ(R300_ZB_DEPTHPITCH, 1);
- OUT_CS_TEX_RELOC(tex, tex->fb_state.depthpitch[surf->level],
- 0, RADEON_GEM_DOMAIN_VRAM, 0);
+ OUT_CS_RELOC(surf->buffer, surf->pitch, 0, surf->domain);
+
+ if (has_hyperz) {
+ uint32_t surf_pitch;
+ struct r300_texture *tex;
+ int level = surf->base.level;
+ tex = r300_texture(surf->base.texture);
+
+ surf_pitch = surf->pitch & R300_DEPTHPITCH_MASK;
+ /* HiZ RAM. */
+ if (r300->screen->caps.hiz_ram) {
+ if (tex->hiz_mem[level]) {
+ OUT_CS_REG(R300_ZB_HIZ_OFFSET, tex->hiz_mem[level]->ofs << 2);
+ OUT_CS_REG(R300_ZB_HIZ_PITCH, surf_pitch);
+ } else {
+ OUT_CS_REG(R300_ZB_HIZ_OFFSET, 0);
+ OUT_CS_REG(R300_ZB_HIZ_PITCH, 0);
+ }
+ }
+ /* Z Mask RAM. (compressed zbuffer) */
+ if (tex->zmask_mem[level]) {
+ OUT_CS_REG(R300_ZB_ZMASK_OFFSET, tex->zmask_mem[level]->ofs << 2);
+ OUT_CS_REG(R300_ZB_ZMASK_PITCH, surf_pitch);
+ } else {
+ OUT_CS_REG(R300_ZB_ZMASK_OFFSET, 0);
+ OUT_CS_REG(R300_ZB_ZMASK_PITCH, 0);
+ }
+ }
}
- OUT_CS_REG_SEQ(R300_SC_SCISSORS_TL, 2);
- if (r300->screen->caps.is_r500) {
- OUT_CS(0);
- OUT_CS(((fb->width - 1) << R300_SCISSORS_X_SHIFT) |
- ((fb->height - 1) << R300_SCISSORS_Y_SHIFT));
- } else {
- OUT_CS((1440 << R300_SCISSORS_X_SHIFT) |
- (1440 << R300_SCISSORS_Y_SHIFT));
- OUT_CS(((fb->width + 1440-1) << R300_SCISSORS_X_SHIFT) |
- ((fb->height + 1440-1) << R300_SCISSORS_Y_SHIFT));
+ END_CS;
+}
+
+void r300_emit_hyperz_state(struct r300_context *r300,
+ unsigned size, void *state)
+{
+ struct r300_hyperz_state *z = state;
+ CS_LOCALS(r300);
+ if (z->flush)
+ WRITE_CS_TABLE(&z->cb_flush_begin, size);
+ else
+ WRITE_CS_TABLE(&z->cb_begin, size - 2);
+}
+
+void r300_emit_hyperz_end(struct r300_context *r300)
+{
+ struct r300_hyperz_state z =
+ *(struct r300_hyperz_state*)r300->hyperz_state.state;
+
+ z.flush = 1;
+ z.zb_bw_cntl = 0;
+ z.zb_depthclearvalue = 0;
+ z.sc_hyperz = R300_SC_HYPERZ_ADJ_2;
+ z.gb_z_peq_config = 0;
+
+ r300_emit_hyperz_state(r300, r300->hyperz_state.size, &z);
+}
+
+void r300_emit_fb_state_pipelined(struct r300_context *r300,
+ unsigned size, void *state)
+{
+ struct pipe_framebuffer_state* fb =
+ (struct pipe_framebuffer_state*)r300->fb_state.state;
+ unsigned i;
+ CS_LOCALS(r300);
+
+ BEGIN_CS(size);
+
+ /* Colorbuffer format in the US block.
+ * (must be written after unpipelined regs) */
+ OUT_CS_REG_SEQ(R300_US_OUT_FMT_0, 4);
+ for (i = 0; i < fb->nr_cbufs; i++) {
+ OUT_CS(r300_surface(fb->cbufs[i])->format);
+ }
+ for (; i < 4; i++) {
+ OUT_CS(R300_US_OUT_FMT_UNUSED);
+ }
+
+ /* Multisampling. Depends on framebuffer sample count.
+ * These are pipelined regs and as such cannot be moved
+ * to the AA state. */
+ if (r300->rws->get_value(r300->rws, R300_VID_DRM_2_3_0)) {
+ unsigned mspos0 = 0x66666666;
+ unsigned mspos1 = 0x6666666;
+
+ if (fb->nr_cbufs && fb->cbufs[0]->texture->nr_samples > 1) {
+ /* Subsample placement. These may not be optimal. */
+ switch (fb->cbufs[0]->texture->nr_samples) {
+ case 2:
+ mspos0 = 0x33996633;
+ mspos1 = 0x6666663;
+ break;
+ case 3:
+ mspos0 = 0x33936933;
+ mspos1 = 0x6666663;
+ break;
+ case 4:
+ mspos0 = 0x33939933;
+ mspos1 = 0x3966663;
+ break;
+ case 6:
+ mspos0 = 0x22a2aa22;
+ mspos1 = 0x2a65672;
+ break;
+ default:
+ debug_printf("r300: Bad number of multisamples!\n");
+ }
+ }
+
+ OUT_CS_REG_SEQ(R300_GB_MSPOS0, 2);
+ OUT_CS(mspos0);
+ OUT_CS(mspos1);
}
- OUT_CS_REG(R300_GA_POINT_MINMAX,
- (MAX2(fb->width, fb->height) * 6) << R300_GA_POINT_MINMAX_MAX_SHIFT);
END_CS;
}
@@ -547,13 +542,14 @@ void r300_emit_query_start(struct r300_context *r300, unsigned size, void*state)
OUT_CS_REG(R300_ZB_ZPASS_DATA, 0);
END_CS;
query->begin_emitted = TRUE;
+ query->flushed = FALSE;
}
-
-static void r300_emit_query_finish(struct r300_context *r300,
- struct r300_query *query)
+static void r300_emit_query_end_frag_pipes(struct r300_context *r300,
+ struct r300_query *query)
{
struct r300_capabilities* caps = &r300->screen->caps;
+ struct r300_winsys_buffer *buf = r300->query_current->buffer;
CS_LOCALS(r300);
assert(caps->num_frag_pipes);
@@ -572,28 +568,28 @@ static void r300_emit_query_finish(struct r300_context *r300,
/* pipe 3 only */
OUT_CS_REG(R300_SU_REG_DEST, 1 << 3);
OUT_CS_REG_SEQ(R300_ZB_ZPASS_ADDR, 1);
- OUT_CS_BUF_RELOC(r300->oqbo, query->offset + (sizeof(uint32_t) * 3),
- 0, RADEON_GEM_DOMAIN_GTT, 0);
+ OUT_CS_RELOC(buf, (query->num_results + 3) * 4,
+ 0, query->domain);
case 3:
/* pipe 2 only */
OUT_CS_REG(R300_SU_REG_DEST, 1 << 2);
OUT_CS_REG_SEQ(R300_ZB_ZPASS_ADDR, 1);
- OUT_CS_BUF_RELOC(r300->oqbo, query->offset + (sizeof(uint32_t) * 2),
- 0, RADEON_GEM_DOMAIN_GTT, 0);
+ OUT_CS_RELOC(buf, (query->num_results + 2) * 4,
+ 0, query->domain);
case 2:
/* pipe 1 only */
/* As mentioned above, accomodate RV380 and older. */
OUT_CS_REG(R300_SU_REG_DEST,
1 << (caps->high_second_pipe ? 3 : 1));
OUT_CS_REG_SEQ(R300_ZB_ZPASS_ADDR, 1);
- OUT_CS_BUF_RELOC(r300->oqbo, query->offset + (sizeof(uint32_t) * 1),
- 0, RADEON_GEM_DOMAIN_GTT, 0);
+ OUT_CS_RELOC(buf, (query->num_results + 1) * 4,
+ 0, query->domain);
case 1:
/* pipe 0 only */
OUT_CS_REG(R300_SU_REG_DEST, 1 << 0);
OUT_CS_REG_SEQ(R300_ZB_ZPASS_ADDR, 1);
- OUT_CS_BUF_RELOC(r300->oqbo, query->offset + (sizeof(uint32_t) * 0),
- 0, RADEON_GEM_DOMAIN_GTT, 0);
+ OUT_CS_RELOC(buf, (query->num_results + 0) * 4,
+ 0, query->domain);
break;
default:
fprintf(stderr, "r300: Implementation error: Chipset reports %d"
@@ -606,31 +602,33 @@ static void r300_emit_query_finish(struct r300_context *r300,
END_CS;
}
-static void rv530_emit_query_single(struct r300_context *r300,
- struct r300_query *query)
+static void rv530_emit_query_end_single_z(struct r300_context *r300,
+ struct r300_query *query)
{
+ struct r300_winsys_buffer *buf = r300->query_current->buffer;
CS_LOCALS(r300);
BEGIN_CS(8);
OUT_CS_REG(RV530_FG_ZBREG_DEST, RV530_FG_ZBREG_DEST_PIPE_SELECT_0);
OUT_CS_REG_SEQ(R300_ZB_ZPASS_ADDR, 1);
- OUT_CS_BUF_RELOC(r300->oqbo, query->offset, 0, RADEON_GEM_DOMAIN_GTT, 0);
+ OUT_CS_RELOC(buf, query->num_results * 4, 0, query->domain);
OUT_CS_REG(RV530_FG_ZBREG_DEST, RV530_FG_ZBREG_DEST_PIPE_SELECT_ALL);
END_CS;
}
-static void rv530_emit_query_double(struct r300_context *r300,
- struct r300_query *query)
+static void rv530_emit_query_end_double_z(struct r300_context *r300,
+ struct r300_query *query)
{
+ struct r300_winsys_buffer *buf = r300->query_current->buffer;
CS_LOCALS(r300);
BEGIN_CS(14);
OUT_CS_REG(RV530_FG_ZBREG_DEST, RV530_FG_ZBREG_DEST_PIPE_SELECT_0);
OUT_CS_REG_SEQ(R300_ZB_ZPASS_ADDR, 1);
- OUT_CS_BUF_RELOC(r300->oqbo, query->offset, 0, RADEON_GEM_DOMAIN_GTT, 0);
+ OUT_CS_RELOC(buf, (query->num_results + 0) * 4, 0, query->domain);
OUT_CS_REG(RV530_FG_ZBREG_DEST, RV530_FG_ZBREG_DEST_PIPE_SELECT_1);
OUT_CS_REG_SEQ(R300_ZB_ZPASS_ADDR, 1);
- OUT_CS_BUF_RELOC(r300->oqbo, query->offset + sizeof(uint32_t), 0, RADEON_GEM_DOMAIN_GTT, 0);
+ OUT_CS_RELOC(buf, (query->num_results + 1) * 4, 0, query->domain);
OUT_CS_REG(RV530_FG_ZBREG_DEST, RV530_FG_ZBREG_DEST_PIPE_SELECT_ALL);
END_CS;
}
@@ -648,60 +646,43 @@ void r300_emit_query_end(struct r300_context* r300)
if (caps->family == CHIP_FAMILY_RV530) {
if (caps->num_z_pipes == 2)
- rv530_emit_query_double(r300, query);
+ rv530_emit_query_end_double_z(r300, query);
else
- rv530_emit_query_single(r300, query);
+ rv530_emit_query_end_single_z(r300, query);
} else
- r300_emit_query_finish(r300, query);
+ r300_emit_query_end_frag_pipes(r300, query);
+
+ query->begin_emitted = FALSE;
+ query->num_results += query->num_pipes;
+
+ /* XXX grab all the results and reset the counter. */
+ if (query->num_results >= query->buffer_size / 4 - 4) {
+ query->num_results = (query->buffer_size / 4) / 2;
+ fprintf(stderr, "r300: Rewinding OQBO...\n");
+ }
+}
+
+void r300_emit_invariant_state(struct r300_context *r300,
+ unsigned size, void *state)
+{
+ CS_LOCALS(r300);
+ WRITE_CS_TABLE(state, size);
}
void r300_emit_rs_state(struct r300_context* r300, unsigned size, void* state)
{
- struct r300_rs_state* rs = (struct r300_rs_state*)state;
- float scale, offset;
+ struct r300_rs_state* rs = state;
CS_LOCALS(r300);
BEGIN_CS(size);
- OUT_CS_REG(R300_VAP_CNTL_STATUS, rs->vap_control_status);
-
- OUT_CS_REG(R300_GB_AA_CONFIG, rs->antialiasing_config);
-
- OUT_CS_REG(R300_GA_POINT_SIZE, rs->point_size);
- OUT_CS_REG(R300_GA_LINE_CNTL, rs->line_control);
-
+ OUT_CS_TABLE(rs->cb_main, RS_STATE_MAIN_SIZE);
if (rs->polygon_offset_enable) {
- scale = rs->depth_scale * 12;
- offset = rs->depth_offset;
-
- switch (r300->zbuffer_bpp) {
- case 16:
- offset *= 4;
- break;
- case 24:
- offset *= 2;
- break;
+ if (r300->zbuffer_bpp == 16) {
+ OUT_CS_TABLE(rs->cb_poly_offset_zb16, 5);
+ } else {
+ OUT_CS_TABLE(rs->cb_poly_offset_zb24, 5);
}
-
- OUT_CS_REG_SEQ(R300_SU_POLY_OFFSET_FRONT_SCALE, 4);
- OUT_CS_32F(scale);
- OUT_CS_32F(offset);
- OUT_CS_32F(scale);
- OUT_CS_32F(offset);
}
-
- OUT_CS_REG_SEQ(R300_SU_POLY_OFFSET_ENABLE, 2);
- OUT_CS(rs->polygon_offset_enable);
- OUT_CS(rs->cull_mode);
- OUT_CS_REG(R300_GA_LINE_STIPPLE_CONFIG, rs->line_stipple_config);
- OUT_CS_REG(R300_GA_LINE_STIPPLE_VALUE, rs->line_stipple_value);
- OUT_CS_REG(R300_GA_POLY_MODE, rs->polygon_mode);
- OUT_CS_REG(R300_SC_CLIP_RULE, rs->clip_rule);
- OUT_CS_REG(R300_GB_ENABLE, rs->stuffing_enable);
- OUT_CS_REG_SEQ(R300_GA_POINT_S0, 4);
- OUT_CS_32F(rs->point_texcoord_left);
- OUT_CS_32F(rs->point_texcoord_bottom);
- OUT_CS_32F(rs->point_texcoord_right);
- OUT_CS_32F(rs->point_texcoord_top);
END_CS;
}
@@ -714,18 +695,37 @@ void r300_emit_rs_block_state(struct r300_context* r300,
unsigned count = (rs->inst_count & R300_RS_INST_COUNT_MASK) + 1;
CS_LOCALS(r300);
- DBG(r300, DBG_DRAW, "r300: RS emit:\n");
+ if (DBG_ON(r300, DBG_RS_BLOCK)) {
+ r500_dump_rs_block(rs);
+
+ fprintf(stderr, "r300: RS emit:\n");
+
+ for (i = 0; i < count; i++)
+ fprintf(stderr, " : ip %d: 0x%08x\n", i, rs->ip[i]);
+
+ for (i = 0; i < count; i++)
+ fprintf(stderr, " : inst %d: 0x%08x\n", i, rs->inst[i]);
+
+ fprintf(stderr, " : count: 0x%08x inst_count: 0x%08x\n",
+ rs->count, rs->inst_count);
+ }
BEGIN_CS(size);
+ OUT_CS_REG_SEQ(R300_VAP_VTX_STATE_CNTL, 2);
+ OUT_CS(rs->vap_vtx_state_cntl);
+ OUT_CS(rs->vap_vsm_vtx_assm);
+ OUT_CS_REG_SEQ(R300_VAP_OUTPUT_VTX_FMT_0, 2);
+ OUT_CS(rs->vap_out_vtx_fmt[0]);
+ OUT_CS(rs->vap_out_vtx_fmt[1]);
+ OUT_CS_REG_SEQ(R300_GB_ENABLE, 1);
+ OUT_CS(rs->gb_enable);
+
if (r300->screen->caps.is_r500) {
OUT_CS_REG_SEQ(R500_RS_IP_0, count);
} else {
OUT_CS_REG_SEQ(R300_RS_IP_0, count);
}
OUT_CS_TABLE(rs->ip, count);
- for (i = 0; i < count; i++) {
- DBG(r300, DBG_DRAW, " : ip %d: 0x%08x\n", i, rs->ip[i]);
- }
OUT_CS_REG_SEQ(R300_RS_COUNT, 2);
OUT_CS(rs->count);
@@ -737,13 +737,6 @@ void r300_emit_rs_block_state(struct r300_context* r300,
OUT_CS_REG_SEQ(R300_RS_INST_0, count);
}
OUT_CS_TABLE(rs->inst, count);
- for (i = 0; i < count; i++) {
- DBG(r300, DBG_DRAW, " : inst %d: 0x%08x\n", i, rs->inst[i]);
- }
-
- DBG(r300, DBG_DRAW, " : count: 0x%08x inst_count: 0x%08x\n",
- rs->count, rs->inst_count);
-
END_CS;
}
@@ -774,6 +767,7 @@ void r300_emit_textures_state(struct r300_context *r300,
{
struct r300_textures_state *allstate = (struct r300_textures_state*)state;
struct r300_texture_sampler_state *texstate;
+ struct r300_texture *tex;
unsigned i;
CS_LOCALS(r300);
@@ -783,6 +777,7 @@ void r300_emit_textures_state(struct r300_context *r300,
for (i = 0; i < allstate->count; i++) {
if ((1 << i) & allstate->tx_enable) {
texstate = &allstate->regs[i];
+ tex = r300_texture(allstate->sampler_views[i]->base.texture);
OUT_CS_REG(R300_TX_FILTER0_0 + (i * 4), texstate->filter0);
OUT_CS_REG(R300_TX_FILTER1_0 + (i * 4), texstate->filter1);
@@ -794,32 +789,24 @@ void r300_emit_textures_state(struct r300_context *r300,
OUT_CS_REG(R300_TX_FORMAT2_0 + (i * 4), texstate->format.format2);
OUT_CS_REG_SEQ(R300_TX_OFFSET_0 + (i * 4), 1);
- OUT_CS_TEX_RELOC(r300_texture(allstate->sampler_views[i]->base.texture),
- texstate->format.tile_config,
- RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM, 0, 0);
+ OUT_CS_TEX_RELOC(tex, texstate->format.tile_config, tex->domain,
+ 0);
}
}
END_CS;
}
-void r300_emit_aos(struct r300_context* r300, unsigned offset, boolean indexed)
+void r300_emit_aos(struct r300_context* r300, int offset, boolean indexed)
{
struct pipe_vertex_buffer *vb1, *vb2, *vbuf = r300->vertex_buffer;
struct pipe_vertex_element *velem = r300->velems->velem;
+ struct r300_buffer *buf;
int i;
+ unsigned *hw_format_size = r300->velems->hw_format_size;
unsigned size1, size2, aos_count = r300->velems->count;
unsigned packet_size = (aos_count * 3 + 1) / 2;
CS_LOCALS(r300);
- for (i = 0; i < aos_count; i++) {
- if ((vbuf[velem[i].vertex_buffer_index].buffer_offset + velem[i].src_offset) % 4 != 0) {
- /* XXX We must align the buffer. */
- assert(0);
- fprintf(stderr, "r300: Unaligned vertex buffer offsets aren't supported, aborting..\n");
- abort();
- }
- }
-
BEGIN_CS(2 + packet_size + aos_count * 2);
OUT_CS_PKT3(R300_PACKET3_3D_LOAD_VBPNTR, packet_size);
OUT_CS(aos_count | (!indexed ? R300_VC_FORCE_PREFETCH : 0));
@@ -827,8 +814,8 @@ void r300_emit_aos(struct r300_context* r300, unsigned offset, boolean indexed)
for (i = 0; i < aos_count - 1; i += 2) {
vb1 = &vbuf[velem[i].vertex_buffer_index];
vb2 = &vbuf[velem[i+1].vertex_buffer_index];
- size1 = util_format_get_blocksize(velem[i].src_format);
- size2 = util_format_get_blocksize(velem[i+1].src_format);
+ size1 = hw_format_size[i];
+ size2 = hw_format_size[i+1];
OUT_CS(R300_VBPNTR_SIZE0(size1) | R300_VBPNTR_STRIDE0(vb1->stride) |
R300_VBPNTR_SIZE1(size2) | R300_VBPNTR_STRIDE1(vb2->stride));
@@ -838,24 +825,24 @@ void r300_emit_aos(struct r300_context* r300, unsigned offset, boolean indexed)
if (aos_count & 1) {
vb1 = &vbuf[velem[i].vertex_buffer_index];
- size1 = util_format_get_blocksize(velem[i].src_format);
+ size1 = hw_format_size[i];
OUT_CS(R300_VBPNTR_SIZE0(size1) | R300_VBPNTR_STRIDE0(vb1->stride));
OUT_CS(vb1->buffer_offset + velem[i].src_offset + offset * vb1->stride);
}
for (i = 0; i < aos_count; i++) {
- OUT_CS_BUF_RELOC_NO_OFFSET(vbuf[velem[i].vertex_buffer_index].buffer,
- RADEON_GEM_DOMAIN_GTT, 0, 0);
+ buf = r300_buffer(vbuf[velem[i].vertex_buffer_index].buffer);
+ OUT_CS_BUF_RELOC_NO_OFFSET(&buf->b.b, buf->domain, 0);
}
END_CS;
}
-void r300_emit_vertex_buffer(struct r300_context* r300)
+void r300_emit_aos_swtcl(struct r300_context *r300, boolean indexed)
{
CS_LOCALS(r300);
- DBG(r300, DBG_DRAW, "r300: Preparing vertex buffer %p for render, "
+ DBG(r300, DBG_SWTCL, "r300: Preparing vertex buffer %p for render, "
"vertex size %d\n", r300->vbo,
r300->vertex_info.size);
/* Set the pointer to our vertex buffer. The emitted values are this:
@@ -867,11 +854,11 @@ void r300_emit_vertex_buffer(struct r300_context* r300)
*/
BEGIN_CS(7);
OUT_CS_PKT3(R300_PACKET3_3D_LOAD_VBPNTR, 3);
- OUT_CS(1);
+ OUT_CS(1 | (!indexed ? R300_VC_FORCE_PREFETCH : 0));
OUT_CS(r300->vertex_info.size |
(r300->vertex_info.size << 8));
- OUT_CS(r300->vbo_offset);
- OUT_CS_BUF_RELOC(r300->vbo, 0, RADEON_GEM_DOMAIN_GTT, 0, 0);
+ OUT_CS(r300->draw_vbo_offset);
+ OUT_CS_BUF_RELOC(r300->vbo, 0, r300_buffer(r300->vbo)->domain, 0);
END_CS;
}
@@ -883,50 +870,42 @@ void r300_emit_vertex_stream_state(struct r300_context* r300,
unsigned i;
CS_LOCALS(r300);
- DBG(r300, DBG_DRAW, "r300: PSC emit:\n");
+ if (DBG_ON(r300, DBG_PSC)) {
+ fprintf(stderr, "r300: PSC emit:\n");
+
+ for (i = 0; i < streams->count; i++) {
+ fprintf(stderr, " : prog_stream_cntl%d: 0x%08x\n", i,
+ streams->vap_prog_stream_cntl[i]);
+ }
+
+ for (i = 0; i < streams->count; i++) {
+ fprintf(stderr, " : prog_stream_cntl_ext%d: 0x%08x\n", i,
+ streams->vap_prog_stream_cntl_ext[i]);
+ }
+ }
BEGIN_CS(size);
OUT_CS_REG_SEQ(R300_VAP_PROG_STREAM_CNTL_0, streams->count);
OUT_CS_TABLE(streams->vap_prog_stream_cntl, streams->count);
- for (i = 0; i < streams->count; i++) {
- DBG(r300, DBG_DRAW, " : prog_stream_cntl%d: 0x%08x\n", i,
- streams->vap_prog_stream_cntl[i]);
- }
OUT_CS_REG_SEQ(R300_VAP_PROG_STREAM_CNTL_EXT_0, streams->count);
OUT_CS_TABLE(streams->vap_prog_stream_cntl_ext, streams->count);
- for (i = 0; i < streams->count; i++) {
- DBG(r300, DBG_DRAW, " : prog_stream_cntl_ext%d: 0x%08x\n", i,
- streams->vap_prog_stream_cntl_ext[i]);
- }
END_CS;
}
-void r300_emit_vap_output_state(struct r300_context* r300,
- unsigned size, void* state)
+void r300_emit_pvs_flush(struct r300_context* r300, unsigned size, void* state)
{
- struct r300_vap_output_state *vap_out_state =
- (struct r300_vap_output_state*)state;
CS_LOCALS(r300);
- DBG(r300, DBG_DRAW, "r300: VAP emit:\n");
-
BEGIN_CS(size);
- OUT_CS_REG_SEQ(R300_VAP_VTX_STATE_CNTL, 2);
- OUT_CS(vap_out_state->vap_vtx_state_cntl);
- OUT_CS(vap_out_state->vap_vsm_vtx_assm);
- OUT_CS_REG_SEQ(R300_VAP_OUTPUT_VTX_FMT_0, 2);
- OUT_CS(vap_out_state->vap_out_vtx_fmt[0]);
- OUT_CS(vap_out_state->vap_out_vtx_fmt[1]);
+ OUT_CS_REG(R300_VAP_PVS_STATE_FLUSH_REG, 0x0);
END_CS;
}
-void r300_emit_pvs_flush(struct r300_context* r300, unsigned size, void* state)
+void r300_emit_vap_invariant_state(struct r300_context *r300,
+ unsigned size, void *state)
{
CS_LOCALS(r300);
-
- BEGIN_CS(size);
- OUT_CS_REG(R300_VAP_PVS_STATE_FLUSH_REG, 0x0);
- END_CS;
+ WRITE_CS_TABLE(state, size);
}
void r300_emit_vs_state(struct r300_context* r300, unsigned size, void* state)
@@ -944,7 +923,7 @@ void r300_emit_vs_state(struct r300_context* r300, unsigned size, void* state)
unsigned pvs_num_slots = MIN3(vtx_mem_size / input_count,
vtx_mem_size / output_count, 10);
- unsigned pvs_num_controllers = MIN2(vtx_mem_size / temp_count, 6);
+ unsigned pvs_num_controllers = MIN2(vtx_mem_size / temp_count, 5);
unsigned imm_first = vs->externals_count;
unsigned imm_end = vs->code.constants.Count;
@@ -953,6 +932,7 @@ void r300_emit_vs_state(struct r300_context* r300, unsigned size, void* state)
CS_LOCALS(r300);
BEGIN_CS(size);
+
/* R300_VAP_PVS_CODE_CNTL_0
* R300_VAP_PVS_CONST_CNTL
* R300_VAP_PVS_CODE_CNTL_1
@@ -986,6 +966,22 @@ void r300_emit_vs_state(struct r300_context* r300, unsigned size, void* state)
OUT_CS_TABLE(data, 4);
}
}
+
+ /* Emit flow control instructions. */
+ if (code->num_fc_ops) {
+
+ OUT_CS_REG(R300_VAP_PVS_FLOW_CNTL_OPC, code->fc_ops);
+ if (r300screen->caps.is_r500) {
+ OUT_CS_REG_SEQ(R500_VAP_PVS_FLOW_CNTL_ADDRS_LW_0, code->num_fc_ops * 2);
+ OUT_CS_TABLE(code->fc_op_addrs.r500, code->num_fc_ops * 2);
+ } else {
+ OUT_CS_REG_SEQ(R300_VAP_PVS_FLOW_CNTL_ADDRS_0, code->num_fc_ops);
+ OUT_CS_TABLE(code->fc_op_addrs.r300, code->num_fc_ops);
+ }
+ OUT_CS_REG_SEQ(R300_VAP_PVS_FLOW_CNTL_LOOP_INDEX_0, code->num_fc_ops);
+ OUT_CS_TABLE(code->fc_loop_index, code->num_fc_ops);
+ }
+
END_CS;
}
@@ -995,6 +991,7 @@ void r300_emit_vs_constants(struct r300_context* r300,
unsigned count =
((struct r300_vertex_shader*)r300->vs_state.state)->externals_count;
struct r300_constant_buffer *buf = (struct r300_constant_buffer*)state;
+ unsigned i;
CS_LOCALS(r300);
if (!count)
@@ -1005,7 +1002,14 @@ void r300_emit_vs_constants(struct r300_context* r300,
(r300->screen->caps.is_r500 ?
R500_PVS_CONST_START : R300_PVS_CONST_START));
OUT_CS_ONE_REG(R300_VAP_PVS_UPLOAD_DATA, count * 4);
- OUT_CS_TABLE(buf->constants, count * 4);
+ if (buf->remap_table){
+ for (i = 0; i < count; i++) {
+ uint32_t *data = &buf->ptr[buf->remap_table[i]*4];
+ OUT_CS_TABLE(data, 4);
+ }
+ } else {
+ OUT_CS_TABLE(buf->ptr, count * 4);
+ }
END_CS;
}
@@ -1015,16 +1019,116 @@ void r300_emit_viewport_state(struct r300_context* r300,
struct r300_viewport_state* viewport = (struct r300_viewport_state*)state;
CS_LOCALS(r300);
- BEGIN_CS(size);
- OUT_CS_REG_SEQ(R300_SE_VPORT_XSCALE, 6);
- OUT_CS_32F(viewport->xscale);
- OUT_CS_32F(viewport->xoffset);
- OUT_CS_32F(viewport->yscale);
- OUT_CS_32F(viewport->yoffset);
- OUT_CS_32F(viewport->zscale);
- OUT_CS_32F(viewport->zoffset);
- OUT_CS_REG(R300_VAP_VTE_CNTL, viewport->vte_control);
- END_CS;
+ BEGIN_CS(size);
+ OUT_CS_REG_SEQ(R300_SE_VPORT_XSCALE, 6);
+ OUT_CS_TABLE(&viewport->xscale, 6);
+ OUT_CS_REG(R300_VAP_VTE_CNTL, viewport->vte_control);
+ END_CS;
+}
+
+static void r300_emit_hiz_line_clear(struct r300_context *r300, int start, uint16_t count, uint32_t val)
+{
+ CS_LOCALS(r300);
+ BEGIN_CS(4);
+ OUT_CS_PKT3(R300_PACKET3_3D_CLEAR_HIZ, 2);
+ OUT_CS(start);
+ OUT_CS(count);
+ OUT_CS(val);
+ END_CS;
+}
+
+static void r300_emit_zmask_line_clear(struct r300_context *r300, int start, uint16_t count, uint32_t val)
+{
+ CS_LOCALS(r300);
+ BEGIN_CS(4);
+ OUT_CS_PKT3(R300_PACKET3_3D_CLEAR_ZMASK, 2);
+ OUT_CS(start);
+ OUT_CS(count);
+ OUT_CS(val);
+ END_CS;
+}
+
+#define ALIGN_DIVUP(x, y) (((x) + (y) - 1) / (y))
+
+void r300_emit_hiz_clear(struct r300_context *r300, unsigned size, void *state)
+{
+ struct pipe_framebuffer_state *fb =
+ (struct pipe_framebuffer_state*)r300->fb_state.state;
+ struct r300_hyperz_state *z =
+ (struct r300_hyperz_state*)r300->hyperz_state.state;
+ struct r300_screen* r300screen = r300->screen;
+ uint32_t stride, offset = 0, height, offset_shift;
+ struct r300_texture* tex;
+ int i;
+
+ tex = r300_texture(fb->zsbuf->texture);
+
+ offset = tex->hiz_mem[fb->zsbuf->level]->ofs;
+ stride = tex->desc.stride_in_pixels[fb->zsbuf->level];
+
+ /* convert from pixels to 4x4 blocks */
+ stride = ALIGN_DIVUP(stride, 4);
+
+ stride = ALIGN_DIVUP(stride, r300screen->caps.num_frag_pipes);
+ /* there are 4 blocks per dwords */
+ stride = ALIGN_DIVUP(stride, 4);
+
+ height = ALIGN_DIVUP(fb->zsbuf->height, 4);
+
+ offset_shift = 2;
+ offset_shift += (r300screen->caps.num_frag_pipes / 2);
+
+ for (i = 0; i < height; i++) {
+ offset = i * stride;
+ offset <<= offset_shift;
+ r300_emit_hiz_line_clear(r300, offset, stride, 0xffffffff);
+ }
+ z->current_func = -1;
+
+ /* Mark the current zbuffer's hiz ram as in use. */
+ tex->hiz_in_use[fb->zsbuf->level] = TRUE;
+}
+
+void r300_emit_zmask_clear(struct r300_context *r300, unsigned size, void *state)
+{
+ struct pipe_framebuffer_state *fb =
+ (struct pipe_framebuffer_state*)r300->fb_state.state;
+ struct r300_screen* r300screen = r300->screen;
+ uint32_t stride, offset = 0;
+ struct r300_texture* tex;
+ uint32_t i, height;
+ int mult, offset_shift;
+
+ tex = r300_texture(fb->zsbuf->texture);
+ stride = tex->desc.stride_in_pixels[fb->zsbuf->level];
+
+ offset = tex->zmask_mem[fb->zsbuf->level]->ofs;
+
+ if (r300->z_compression == RV350_Z_COMPRESS_88)
+ mult = 8;
+ else
+ mult = 4;
+
+ height = ALIGN_DIVUP(fb->zsbuf->height, mult);
+
+ offset_shift = 4;
+ offset_shift += (r300screen->caps.num_frag_pipes / 2);
+ stride = ALIGN_DIVUP(stride, r300screen->caps.num_frag_pipes);
+
+ /* okay have width in pixels - divide by block width */
+ stride = ALIGN_DIVUP(stride, mult);
+ /* have width in blocks - divide by number of fragment pipes screen width */
+ /* 16 blocks per dword */
+ stride = ALIGN_DIVUP(stride, 16);
+
+ for (i = 0; i < height; i++) {
+ offset = i * stride;
+ offset <<= offset_shift;
+ r300_emit_zmask_line_clear(r300, offset, stride, 0x0);//0xffffffff);
+ }
+
+ /* Mark the current zbuffer's zmask as in use. */
+ tex->zmask_in_use[fb->zsbuf->level] = TRUE;
}
void r300_emit_ztop_state(struct r300_context* r300,
@@ -1047,9 +1151,9 @@ void r300_emit_texture_cache_inval(struct r300_context* r300, unsigned size, voi
END_CS;
}
-void r300_emit_buffer_validate(struct r300_context *r300,
- boolean do_validate_vertex_buffers,
- struct pipe_resource *index_buffer)
+boolean r300_emit_buffer_validate(struct r300_context *r300,
+ boolean do_validate_vertex_buffers,
+ struct pipe_resource *index_buffer)
{
struct pipe_framebuffer_state* fb =
(struct pipe_framebuffer_state*)r300->fb_state.state;
@@ -1060,37 +1164,29 @@ void r300_emit_buffer_validate(struct r300_context *r300,
struct pipe_vertex_element *velem = r300->velems->velem;
struct pipe_resource *pbuf;
unsigned i;
- boolean invalid = FALSE;
/* upload buffers first */
- if (r300->any_user_vbs) {
+ if (r300->screen->caps.has_tcl && r300->any_user_vbs) {
r300_upload_user_buffers(r300);
r300->any_user_vbs = false;
}
/* Clean out BOs. */
- r300->rws->reset_bos(r300->rws);
+ r300->rws->cs_reset_buffers(r300->cs);
-validate:
/* Color buffers... */
for (i = 0; i < fb->nr_cbufs; i++) {
tex = r300_texture(fb->cbufs[i]->texture);
assert(tex && tex->buffer && "cbuf is marked, but NULL!");
- if (!r300_add_texture(r300->rws, tex,
- 0, RADEON_GEM_DOMAIN_VRAM)) {
- r300->context.flush(&r300->context, 0, NULL);
- goto validate;
- }
+ r300->rws->cs_add_buffer(r300->cs, tex->buffer, 0,
+ r300_surface(fb->cbufs[i])->domain);
}
/* ...depth buffer... */
if (fb->zsbuf) {
tex = r300_texture(fb->zsbuf->texture);
assert(tex && tex->buffer && "zsbuf is marked, but NULL!");
- if (!r300_add_texture(r300->rws, tex,
- 0, RADEON_GEM_DOMAIN_VRAM)) {
- r300->context.flush(&r300->context, 0, NULL);
- goto validate;
- }
+ r300->rws->cs_add_buffer(r300->cs, tex->buffer, 0,
+ r300_surface(fb->zsbuf)->domain);
}
/* ...textures... */
for (i = 0; i < texstate->count; i++) {
@@ -1099,58 +1195,35 @@ validate:
}
tex = r300_texture(texstate->sampler_views[i]->base.texture);
- if (!r300_add_texture(r300->rws, tex,
- RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM, 0)) {
- r300->context.flush(&r300->context, 0, NULL);
- goto validate;
- }
+ r300->rws->cs_add_buffer(r300->cs, tex->buffer, tex->domain, 0);
}
/* ...occlusion query buffer... */
- if (r300->query_start.dirty) {
- if (!r300_add_buffer(r300->rws, r300->oqbo,
- 0, RADEON_GEM_DOMAIN_GTT)) {
- r300->context.flush(&r300->context, 0, NULL);
- goto validate;
- }
- }
+ if (r300->query_current)
+ r300->rws->cs_add_buffer(r300->cs, r300->query_current->buffer,
+ 0, r300->query_current->domain);
/* ...vertex buffer for SWTCL path... */
- if (r300->vbo) {
- if (!r300_add_buffer(r300->rws, r300->vbo,
- RADEON_GEM_DOMAIN_GTT, 0)) {
- r300->context.flush(&r300->context, 0, NULL);
- goto validate;
- }
- }
+ if (r300->vbo)
+ r300->rws->cs_add_buffer(r300->cs, r300_buffer(r300->vbo)->buf,
+ r300_buffer(r300->vbo)->domain, 0);
/* ...vertex buffers for HWTCL path... */
if (do_validate_vertex_buffers) {
for (i = 0; i < r300->velems->count; i++) {
pbuf = vbuf[velem[i].vertex_buffer_index].buffer;
- if (!r300_add_buffer(r300->rws, pbuf,
- RADEON_GEM_DOMAIN_GTT, 0)) {
- r300->context.flush(&r300->context, 0, NULL);
- goto validate;
- }
+ r300->rws->cs_add_buffer(r300->cs, r300_buffer(pbuf)->buf,
+ r300_buffer(pbuf)->domain, 0);
}
}
/* ...and index buffer for HWTCL path. */
- if (index_buffer) {
- if (!r300_add_buffer(r300->rws, index_buffer,
- RADEON_GEM_DOMAIN_GTT, 0)) {
- r300->context.flush(&r300->context, 0, NULL);
- goto validate;
- }
- }
- if (!r300->rws->validate(r300->rws)) {
- r300->context.flush(&r300->context, 0, NULL);
- if (invalid) {
- /* Well, hell. */
- fprintf(stderr, "r300: Stuck in validation loop, gonna quit now.\n");
- abort();
- }
- invalid = TRUE;
- goto validate;
+ if (index_buffer)
+ r300->rws->cs_add_buffer(r300->cs, r300_buffer(index_buffer)->buf,
+ r300_buffer(index_buffer)->domain, 0);
+
+ if (!r300->rws->cs_validate(r300->cs)) {
+ return FALSE;
}
+
+ return TRUE;
}
unsigned r300_get_num_dirty_dwords(struct r300_context *r300)
@@ -1164,18 +1237,28 @@ unsigned r300_get_num_dirty_dwords(struct r300_context *r300)
}
}
- /* emit_query_end is not atomized. */
- dwords += 26;
/* let's reserve some more, just in case */
dwords += 32;
return dwords;
}
+unsigned r300_get_num_cs_end_dwords(struct r300_context *r300)
+{
+ unsigned dwords = 0;
+
+ /* Emitted in flush. */
+ dwords += 26; /* emit_query_end */
+ dwords += r300->hyperz_state.size + 2; /* emit_hyperz_end + zcache flush */
+ if (r500_index_bias_supported(r300))
+ dwords += 2;
+
+ return dwords;
+}
+
/* Emit all dirty state. */
void r300_emit_dirty_state(struct r300_context* r300)
{
- struct r300_screen* r300screen = r300->screen;
struct r300_atom* atom;
foreach(atom, &r300->atom_list) {
@@ -1188,10 +1271,5 @@ void r300_emit_dirty_state(struct r300_context* r300)
}
}
- /* Emit the VBO for SWTCL. */
- if (!r300screen->caps.has_tcl) {
- r300_emit_vertex_buffer(r300);
- }
-
r300->dirty_hw++;
}
diff --git a/src/gallium/drivers/r300/r300_emit.h b/src/gallium/drivers/r300/r300_emit.h
index c3eb195d4e7..278dbcb4c7c 100644
--- a/src/gallium/drivers/r300/r300_emit.h
+++ b/src/gallium/drivers/r300/r300_emit.h
@@ -29,7 +29,9 @@
struct rX00_fragment_program_code;
struct r300_vertex_program_code;
-void r300_emit_aos(struct r300_context* r300, unsigned offset, boolean indexed);
+uint32_t pack_float24(float f);
+
+void r300_emit_aos(struct r300_context* r300, int offset, boolean indexed);
void r300_emit_blend_state(struct r300_context* r300,
unsigned size, void* state);
@@ -43,7 +45,10 @@ void r300_emit_clip_state(struct r300_context* r300,
void r300_emit_dsa_state(struct r300_context* r300,
unsigned size, void* state);
-unsigned r300_get_fs_atom_size(struct r300_context *r300);
+void r300_emit_hyperz_state(struct r300_context *r300,
+ unsigned size, void *state);
+
+void r300_emit_hyperz_end(struct r300_context *r300);
void r300_emit_fs(struct r300_context* r300, unsigned size, void *state);
@@ -51,8 +56,6 @@ void r300_emit_fs_constants(struct r300_context* r300, unsigned size, void *stat
void r300_emit_fs_rc_constant_state(struct r300_context* r300, unsigned size, void *state);
-unsigned r500_get_fs_atom_size(struct r300_context *r300);
-
void r500_emit_fs(struct r300_context* r300, unsigned size, void *state);
void r500_emit_fs_constants(struct r300_context* r300, unsigned size, void *state);
@@ -61,6 +64,13 @@ void r500_emit_fs_rc_constant_state(struct r300_context* r300, unsigned size, vo
void r300_emit_fb_state(struct r300_context* r300, unsigned size, void* state);
+void r300_emit_fb_state_pipelined(struct r300_context *r300,
+ unsigned size, void *state);
+
+void r300_emit_gpu_flush(struct r300_context *r300, unsigned size, void *state);
+
+void r300_emit_aa_state(struct r300_context *r300, unsigned size, void *state);
+
void r300_emit_query_start(struct r300_context *r300, unsigned size, void *state);
void r300_emit_query_end(struct r300_context* r300);
@@ -76,14 +86,14 @@ void r300_emit_scissor_state(struct r300_context* r300,
void r300_emit_textures_state(struct r300_context *r300,
unsigned size, void *state);
-void r300_emit_vertex_buffer(struct r300_context* r300);
+void r300_emit_aos_swtcl(struct r300_context *r300, boolean indexed);
+
+void r300_emit_vap_invariant_state(struct r300_context *r300,
+ unsigned size, void *state);
void r300_emit_vertex_stream_state(struct r300_context* r300,
unsigned size, void* state);
-void r300_emit_vap_output_state(struct r300_context* r300,
- unsigned size, void* state);
-
void r300_emit_vs_constants(struct r300_context* r300,
unsigned size, void *state);
@@ -99,13 +109,20 @@ void r300_emit_pvs_flush(struct r300_context* r300, unsigned size, void* state);
void r300_emit_texture_cache_inval(struct r300_context* r300, unsigned size, void* state);
+void r300_emit_invariant_state(struct r300_context *r300,
+ unsigned size, void *state);
+
+void r300_emit_hiz_clear(struct r300_context *r300, unsigned size, void *state);
+void r300_emit_zmask_clear(struct r300_context *r300, unsigned size, void *state);
+
unsigned r300_get_num_dirty_dwords(struct r300_context *r300);
+unsigned r300_get_num_cs_end_dwords(struct r300_context *r300);
/* Emit all dirty state. */
void r300_emit_dirty_state(struct r300_context* r300);
-void r300_emit_buffer_validate(struct r300_context *r300,
- boolean do_validate_vertex_buffers,
- struct pipe_resource *index_buffer);
+boolean r300_emit_buffer_validate(struct r300_context *r300,
+ boolean do_validate_vertex_buffers,
+ struct pipe_resource *index_buffer);
#endif /* R300_EMIT_H */
diff --git a/src/gallium/drivers/r300/r300_flush.c b/src/gallium/drivers/r300/r300_flush.c
index e78c6a3624f..1afd27f0938 100644
--- a/src/gallium/drivers/r300/r300_flush.c
+++ b/src/gallium/drivers/r300/r300_flush.c
@@ -1,5 +1,6 @@
/*
* Copyright 2008 Corbin Simpson <[email protected]>
+ * Copyright 2010 Marek Olšák <[email protected]>
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -24,11 +25,11 @@
#include "draw/draw_private.h"
#include "util/u_simple_list.h"
+#include "util/u_upload_mgr.h"
#include "r300_context.h"
#include "r300_cs.h"
#include "r300_emit.h"
-#include "r300_flush.h"
static void r300_flush(struct pipe_context* pipe,
unsigned flags,
@@ -37,26 +38,27 @@ static void r300_flush(struct pipe_context* pipe,
struct r300_context *r300 = r300_context(pipe);
struct r300_query *query;
struct r300_atom *atom;
+ struct r300_fence **rfence = (struct r300_fence**)fence;
- CS_LOCALS(r300);
- (void) cs_count;
- /* We probably need to flush Draw, but we may have been called from
- * within Draw. This feels kludgy, but it might be the best thing.
- *
- * Of course, the best thing is to kill Draw with fire. :3 */
- if (r300->draw && !r300->draw->flushing) {
- draw_flush(r300->draw);
- }
+ u_upload_flush(r300->upload_vb);
+ u_upload_flush(r300->upload_ib);
- r300_emit_query_end(r300);
+ if (r300->draw && !r300->draw_vbo_locked)
+ r300_draw_flush_vbuf(r300);
if (r300->dirty_hw) {
- FLUSH_CS;
+ r300_emit_hyperz_end(r300);
+ r300_emit_query_end(r300);
+ if (r500_index_bias_supported(r300))
+ r500_emit_index_bias(r300, 0);
+
+ r300->flush_counter++;
+ r300->rws->cs_flush(r300->cs);
r300->dirty_hw = 0;
/* New kitchen sink, baby. */
foreach(atom, &r300->atom_list) {
- if (atom->state) {
+ if (atom->state || atom->allow_null_state) {
atom->dirty = TRUE;
}
}
@@ -72,6 +74,13 @@ static void r300_flush(struct pipe_context* pipe,
foreach(query, &r300->query_list) {
query->flushed = TRUE;
}
+
+ /* Create a new fence. */
+ if (rfence) {
+ *rfence = CALLOC_STRUCT(r300_fence);
+ pipe_reference_init(&(*rfence)->reference, 1);
+ (*rfence)->ctx = r300;
+ }
}
void r300_init_flush_functions(struct r300_context* r300)
diff --git a/src/gallium/drivers/r300/r300_flush.h b/src/gallium/drivers/r300/r300_flush.h
deleted file mode 100644
index 0e9e6106bb7..00000000000
--- a/src/gallium/drivers/r300/r300_flush.h
+++ /dev/null
@@ -1,28 +0,0 @@
-/*
- * Copyright 2008 Corbin Simpson <[email protected]>
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * on the rights to use, copy, modify, merge, publish, distribute, sub
- * license, and/or sell copies of the Software, and to permit persons to whom
- * the Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE. */
-
-#ifndef R300_FLUSH_H
-#define R300_FLUSH_H
-
-void r300_init_flush_functions(struct r300_context* r300);
-
-#endif /* R300_FLUSH_H */
diff --git a/src/gallium/drivers/r300/r300_fs.c b/src/gallium/drivers/r300/r300_fs.c
index 88303f074cd..d9d4a9304df 100644
--- a/src/gallium/drivers/r300/r300_fs.c
+++ b/src/gallium/drivers/r300/r300_fs.c
@@ -28,7 +28,9 @@
#include "tgsi/tgsi_dump.h"
#include "tgsi/tgsi_ureg.h"
+#include "r300_cb.h"
#include "r300_context.h"
+#include "r300_emit.h"
#include "r300_screen.h"
#include "r300_fs.h"
#include "r300_reg.h"
@@ -70,6 +72,11 @@ void r300_shader_read_fs_inputs(struct tgsi_shader_info* info,
fs_inputs->wpos = i;
break;
+ case TGSI_SEMANTIC_FACE:
+ assert(index == 0);
+ fs_inputs->face = i;
+ break;
+
default:
fprintf(stderr, "r300: FP: Unknown input semantic: %i\n",
info->input_semantic_name[i]);
@@ -118,6 +125,9 @@ static void allocate_hardware_inputs(
allocate(mydata, inputs->color[i], reg++);
}
}
+ if (inputs->face != ATTR_UNUSED) {
+ allocate(mydata, inputs->face, reg++);
+ }
for (i = 0; i < ATTR_GENERIC_COUNT; i++) {
if (inputs->generic[i] != ATTR_UNUSED) {
allocate(mydata, inputs->generic[i], reg++);
@@ -137,18 +147,32 @@ static void get_external_state(
{
struct r300_textures_state *texstate = r300->textures_state.state;
unsigned i;
+ unsigned char *swizzle;
for (i = 0; i < texstate->sampler_state_count; i++) {
- struct r300_sampler_state* s = texstate->sampler_states[i];
+ struct r300_sampler_state *s = texstate->sampler_states[i];
+ struct r300_sampler_view *v = texstate->sampler_views[i];
+ struct r300_texture *t;
- if (!s) {
+ if (!s || !v) {
continue;
}
+ t = r300_texture(texstate->sampler_views[i]->base.texture);
+
if (s->state.compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) {
- /* XXX Gallium doesn't provide us with any information regarding
- * this mode, so we are screwed. I'm setting 0 = LUMINANCE. */
- state->unit[i].depth_texture_mode = 0;
+ state->unit[i].compare_mode_enabled = 1;
+
+ /* Pass depth texture swizzling to the compiler. */
+ if (texstate->sampler_views[i]) {
+ swizzle = texstate->sampler_views[i]->swizzle;
+
+ state->unit[i].depth_texture_swizzle =
+ RC_MAKE_SWIZZLE(swizzle[0], swizzle[1],
+ swizzle[2], swizzle[3]);
+ } else {
+ state->unit[i].depth_texture_swizzle = RC_SWIZZLE_XYZW;
+ }
/* Fortunately, no need to translate this. */
state->unit[i].texture_compare_func = s->state.compare_func;
@@ -156,35 +180,29 @@ static void get_external_state(
state->unit[i].non_normalized_coords = !s->state.normalized_coords;
- if (texstate->sampler_views[i]) {
- struct r300_texture *t;
- t = (struct r300_texture*)texstate->sampler_views[i]->base.texture;
-
- /* XXX this should probably take into account STR, not just S. */
- if (t->uses_pitch) {
- switch (s->state.wrap_s) {
- case PIPE_TEX_WRAP_REPEAT:
- state->unit[i].wrap_mode = RC_WRAP_REPEAT;
- state->unit[i].fake_npot = TRUE;
- break;
-
- case PIPE_TEX_WRAP_MIRROR_REPEAT:
- state->unit[i].wrap_mode = RC_WRAP_MIRRORED_REPEAT;
- state->unit[i].fake_npot = TRUE;
- break;
-
- case PIPE_TEX_WRAP_MIRROR_CLAMP:
- case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
- case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
- state->unit[i].wrap_mode = RC_WRAP_MIRRORED_CLAMP;
- state->unit[i].fake_npot = TRUE;
- break;
-
- default:
- state->unit[i].wrap_mode = RC_WRAP_NONE;
- break;
- }
+ /* XXX this should probably take into account STR, not just S. */
+ if (t->desc.is_npot) {
+ switch (s->state.wrap_s) {
+ case PIPE_TEX_WRAP_REPEAT:
+ state->unit[i].wrap_mode = RC_WRAP_REPEAT;
+ break;
+
+ case PIPE_TEX_WRAP_MIRROR_REPEAT:
+ state->unit[i].wrap_mode = RC_WRAP_MIRRORED_REPEAT;
+ break;
+
+ case PIPE_TEX_WRAP_MIRROR_CLAMP:
+ case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
+ case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
+ state->unit[i].wrap_mode = RC_WRAP_MIRRORED_CLAMP;
+ break;
+
+ default:
+ state->unit[i].wrap_mode = RC_WRAP_NONE;
}
+
+ if (t->desc.b.b.target == PIPE_TEXTURE_3D)
+ state->unit[i].clamp_and_scale_before_fetch = TRUE;
}
}
}
@@ -219,6 +237,128 @@ static void r300_dummy_fragment_shader(
ureg_destroy(ureg);
}
+static void r300_emit_fs_code_to_buffer(
+ struct r300_context *r300,
+ struct r300_fragment_shader_code *shader)
+{
+ struct rX00_fragment_program_code *generic_code = &shader->code;
+ unsigned imm_count = shader->immediates_count;
+ unsigned imm_first = shader->externals_count;
+ unsigned imm_end = generic_code->constants.Count;
+ struct rc_constant *constants = generic_code->constants.Constants;
+ unsigned i;
+ CB_LOCALS;
+
+ if (r300->screen->caps.is_r500) {
+ struct r500_fragment_program_code *code = &generic_code->code.r500;
+
+ shader->cb_code_size = 19 +
+ ((code->inst_end + 1) * 6) +
+ imm_count * 7 +
+ code->int_constant_count * 2;
+
+ NEW_CB(shader->cb_code, shader->cb_code_size);
+ OUT_CB_REG(R500_US_CONFIG, R500_ZERO_TIMES_ANYTHING_EQUALS_ZERO);
+ OUT_CB_REG(R500_US_PIXSIZE, code->max_temp_idx);
+ OUT_CB_REG(R500_US_FC_CTRL, code->us_fc_ctrl);
+ for(i = 0; i < code->int_constant_count; i++){
+ OUT_CB_REG(R500_US_FC_INT_CONST_0 + (i * 4),
+ code->int_constants[i]);
+ }
+ OUT_CB_REG(R500_US_CODE_RANGE,
+ R500_US_CODE_RANGE_ADDR(0) | R500_US_CODE_RANGE_SIZE(code->inst_end));
+ OUT_CB_REG(R500_US_CODE_OFFSET, 0);
+ OUT_CB_REG(R500_US_CODE_ADDR,
+ R500_US_CODE_START_ADDR(0) | R500_US_CODE_END_ADDR(code->inst_end));
+
+ OUT_CB_REG(R500_GA_US_VECTOR_INDEX, R500_GA_US_VECTOR_INDEX_TYPE_INSTR);
+ OUT_CB_ONE_REG(R500_GA_US_VECTOR_DATA, (code->inst_end + 1) * 6);
+ for (i = 0; i <= code->inst_end; i++) {
+ OUT_CB(code->inst[i].inst0);
+ OUT_CB(code->inst[i].inst1);
+ OUT_CB(code->inst[i].inst2);
+ OUT_CB(code->inst[i].inst3);
+ OUT_CB(code->inst[i].inst4);
+ OUT_CB(code->inst[i].inst5);
+ }
+
+ /* Emit immediates. */
+ if (imm_count) {
+ for(i = imm_first; i < imm_end; ++i) {
+ if (constants[i].Type == RC_CONSTANT_IMMEDIATE) {
+ const float *data = constants[i].u.Immediate;
+
+ OUT_CB_REG(R500_GA_US_VECTOR_INDEX,
+ R500_GA_US_VECTOR_INDEX_TYPE_CONST |
+ (i & R500_GA_US_VECTOR_INDEX_MASK));
+ OUT_CB_ONE_REG(R500_GA_US_VECTOR_DATA, 4);
+ OUT_CB_TABLE(data, 4);
+ }
+ }
+ }
+ } else { /* r300 */
+ struct r300_fragment_program_code *code = &generic_code->code.r300;
+
+ shader->cb_code_size = 19 +
+ (r300->screen->caps.is_r400 ? 2 : 0) +
+ code->alu.length * 4 +
+ (code->tex.length ? (1 + code->tex.length) : 0) +
+ imm_count * 5;
+
+ NEW_CB(shader->cb_code, shader->cb_code_size);
+
+ if (r300->screen->caps.is_r400)
+ OUT_CB_REG(R400_US_CODE_BANK, 0);
+
+ OUT_CB_REG(R300_US_CONFIG, code->config);
+ OUT_CB_REG(R300_US_PIXSIZE, code->pixsize);
+ OUT_CB_REG(R300_US_CODE_OFFSET, code->code_offset);
+
+ OUT_CB_REG_SEQ(R300_US_CODE_ADDR_0, 4);
+ OUT_CB_TABLE(code->code_addr, 4);
+
+ OUT_CB_REG_SEQ(R300_US_ALU_RGB_INST_0, code->alu.length);
+ for (i = 0; i < code->alu.length; i++)
+ OUT_CB(code->alu.inst[i].rgb_inst);
+
+ OUT_CB_REG_SEQ(R300_US_ALU_RGB_ADDR_0, code->alu.length);
+ for (i = 0; i < code->alu.length; i++)
+ OUT_CB(code->alu.inst[i].rgb_addr);
+
+ OUT_CB_REG_SEQ(R300_US_ALU_ALPHA_INST_0, code->alu.length);
+ for (i = 0; i < code->alu.length; i++)
+ OUT_CB(code->alu.inst[i].alpha_inst);
+
+ OUT_CB_REG_SEQ(R300_US_ALU_ALPHA_ADDR_0, code->alu.length);
+ for (i = 0; i < code->alu.length; i++)
+ OUT_CB(code->alu.inst[i].alpha_addr);
+
+ if (code->tex.length) {
+ OUT_CB_REG_SEQ(R300_US_TEX_INST_0, code->tex.length);
+ OUT_CB_TABLE(code->tex.inst, code->tex.length);
+ }
+
+ /* Emit immediates. */
+ if (imm_count) {
+ for(i = imm_first; i < imm_end; ++i) {
+ if (constants[i].Type == RC_CONSTANT_IMMEDIATE) {
+ const float *data = constants[i].u.Immediate;
+
+ OUT_CB_REG_SEQ(R300_PFS_PARAM_0_X + i * 16, 4);
+ OUT_CB(pack_float24(data[0]));
+ OUT_CB(pack_float24(data[1]));
+ OUT_CB(pack_float24(data[2]));
+ OUT_CB(pack_float24(data[3]));
+ }
+ }
+ }
+ }
+
+ OUT_CB_REG(R300_FG_DEPTH_SRC, shader->fg_depth_src);
+ OUT_CB_REG(R300_US_W_FMT, shader->us_out_w);
+ END_CB;
+}
+
static void r300_translate_fragment_shader(
struct r300_context* r300,
struct r300_fragment_shader_code* shader,
@@ -226,13 +366,14 @@ static void r300_translate_fragment_shader(
{
struct r300_fragment_program_compiler compiler;
struct tgsi_to_rc ttr;
- int wpos;
+ int wpos, face;
unsigned i;
tgsi_scan_shader(tokens, &shader->info);
r300_shader_read_fs_inputs(&shader->info, &shader->inputs);
wpos = shader->inputs.wpos;
+ face = shader->inputs.face;
/* Setup the compiler. */
memset(&compiler, 0, sizeof(compiler));
@@ -241,15 +382,21 @@ static void r300_translate_fragment_shader(
compiler.code = &shader->code;
compiler.state = shader->compare_state;
- compiler.is_r500 = r300->screen->caps.is_r500;
- compiler.max_temp_regs = compiler.is_r500 ? 128 : 32;
+ compiler.Base.is_r500 = r300->screen->caps.is_r500;
+ compiler.Base.disable_optimizations = DBG_ON(r300, DBG_NO_OPT);
+ compiler.Base.has_half_swizzles = TRUE;
+ compiler.Base.has_presub = TRUE;
+ compiler.Base.max_temp_regs = compiler.Base.is_r500 ? 128 : 32;
+ compiler.Base.max_constants = compiler.Base.is_r500 ? 256 : 32;
+ compiler.Base.max_alu_insts = compiler.Base.is_r500 ? 512 : 64;
+ compiler.Base.remove_unused_constants = TRUE;
compiler.AllocateHwInputs = &allocate_hardware_inputs;
compiler.UserData = &shader->inputs;
find_output_registers(&compiler, shader);
if (compiler.Base.Debug) {
- debug_printf("r300: Initial fragment program\n");
+ DBG(r300, DBG_FP, "r300: Initial fragment program\n");
tgsi_dump(tokens, 0);
}
@@ -272,17 +419,13 @@ static void r300_translate_fragment_shader(
rc_transform_fragment_wpos(&compiler.Base, wpos, wpos, TRUE);
}
+ if (face != ATTR_UNUSED) {
+ rc_transform_fragment_face(&compiler.Base, face);
+ }
+
/* Invoke the compiler */
r3xx_compile_fragment_program(&compiler);
- /* Shaders with zero instructions are invalid,
- * use the dummy shader instead. */
- if (shader->code.code.r500.inst_end == -1) {
- rc_destroy(&compiler.Base);
- r300_dummy_fragment_shader(r300, shader);
- return;
- }
-
if (compiler.Base.Error) {
fprintf(stderr, "r300 FP: Compiler Error:\n%sUsing a dummy shader"
" instead.\n", compiler.Base.ErrorMsg);
@@ -298,8 +441,21 @@ static void r300_translate_fragment_shader(
return;
}
+ /* Shaders with zero instructions are invalid,
+ * use the dummy shader instead. */
+ if (shader->code.code.r500.inst_end == -1) {
+ rc_destroy(&compiler.Base);
+ r300_dummy_fragment_shader(r300, shader);
+ return;
+ }
+
/* Initialize numbers of constants for each type. */
- shader->externals_count = ttr.immediate_offset;
+ shader->externals_count = 0;
+ for (i = 0;
+ i < shader->code.constants.Count &&
+ shader->code.constants.Constants[i].Type == RC_CONSTANT_EXTERNAL; i++) {
+ shader->externals_count = i+1;
+ }
shader->immediates_count = 0;
shader->rc_state_count = 0;
@@ -327,6 +483,9 @@ static void r300_translate_fragment_shader(
/* And, finally... */
rc_destroy(&compiler.Base);
+
+ /* Build the command buffer. */
+ r300_emit_fs_code_to_buffer(r300, shader);
}
boolean r300_pick_fragment_shader(struct r300_context* r300)
diff --git a/src/gallium/drivers/r300/r300_fs.h b/src/gallium/drivers/r300/r300_fs.h
index 1cc43553595..51bfa88c5ef 100644
--- a/src/gallium/drivers/r300/r300_fs.h
+++ b/src/gallium/drivers/r300/r300_fs.h
@@ -50,6 +50,9 @@ struct r300_fragment_shader_code {
struct r300_fragment_program_external_state compare_state;
struct rX00_fragment_program_code code;
+ unsigned cb_code_size;
+ uint32_t *cb_code;
+
struct r300_fragment_shader_code* next;
};
diff --git a/src/gallium/drivers/r300/r300_hyperz.c b/src/gallium/drivers/r300/r300_hyperz.c
index b41b6b1508d..eb5b0c36f8f 100644
--- a/src/gallium/drivers/r300/r300_hyperz.c
+++ b/src/gallium/drivers/r300/r300_hyperz.c
@@ -21,43 +21,220 @@
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
* USE OR OTHER DEALINGS IN THE SOFTWARE. */
-
-#include "r300_hyperz.h"
#include "r300_context.h"
+#include "r300_hyperz.h"
#include "r300_reg.h"
#include "r300_fs.h"
+#include "r300_winsys.h"
+
+#include "util/u_format.h"
+#include "util/u_mm.h"
+
+/*
+ HiZ rules - taken from various docs
+ 1. HiZ only works on depth values
+ 2. Cannot HiZ if stencil fail or zfail is !KEEP
+ 3. on R300/400, HiZ is disabled if depth test is EQUAL
+ 4. comparison changes without clears usually mean disabling HiZ
+*/
+/*****************************************************************************/
+/* The HyperZ setup */
+/*****************************************************************************/
+
+static bool r300_get_sc_hz_max(struct r300_context *r300)
+{
+ struct r300_dsa_state *dsa_state = r300->dsa_state.state;
+ int func = dsa_state->z_stencil_control & 0x7;
+ int ret = R300_SC_HYPERZ_MIN;
+
+ if (func >= 4 && func <= 7)
+ ret = R300_SC_HYPERZ_MAX;
+ return ret;
+}
+
+static bool r300_zfunc_same_direction(int func1, int func2)
+{
+ /* func1 is less/lessthan */
+ if (func1 == 1 || func1 == 2)
+ if (func2 == 3 || func2 == 4 || func2 == 5)
+ return FALSE;
+
+ if (func2 == 1 || func2 == 2)
+ if (func1 == 4 || func1 == 5)
+ return FALSE;
+ return TRUE;
+}
+
+static int r300_get_hiz_min(struct r300_context *r300)
+{
+ struct r300_dsa_state *dsa_state = r300->dsa_state.state;
+ int func = dsa_state->z_stencil_control & 0x7;
+ int ret = R300_HIZ_MIN;
+
+ if (func == 1 || func == 2)
+ ret = R300_HIZ_MAX;
+ return ret;
+}
+
+static boolean r300_dsa_stencil_op_not_keep(struct pipe_stencil_state *s)
+{
+ if (s->enabled && (s->fail_op != PIPE_STENCIL_OP_KEEP ||
+ s->zfail_op != PIPE_STENCIL_OP_KEEP))
+ return TRUE;
+ return FALSE;
+}
+
+static boolean r300_can_hiz(struct r300_context *r300)
+{
+ struct r300_dsa_state *dsa_state = r300->dsa_state.state;
+ struct pipe_depth_stencil_alpha_state *dsa = &dsa_state->dsa;
+ struct r300_screen* r300screen = r300->screen;
+ struct r300_hyperz_state *z = r300->hyperz_state.state;
+
+ /* shader writes depth - no HiZ */
+ if (r300_fragment_shader_writes_depth(r300_fs(r300))) /* (5) */
+ return FALSE;
+
+ if (r300->query_current)
+ return FALSE;
+ /* if stencil fail/zfail op is not KEEP */
+ if (r300_dsa_stencil_op_not_keep(&dsa->stencil[0]) ||
+ r300_dsa_stencil_op_not_keep(&dsa->stencil[1]))
+ return FALSE;
+
+ if (dsa->depth.enabled) {
+ /* if depth func is EQUAL pre-r500 */
+ if (dsa->depth.func == PIPE_FUNC_EQUAL && !r300screen->caps.is_r500)
+ return FALSE;
+ /* if depth func is NOTEQUAL */
+ if (dsa->depth.func == PIPE_FUNC_NOTEQUAL)
+ return FALSE;
+ }
+ /* depth comparison function - if just cleared save and return okay */
+ if (z->current_func == -1) {
+ int func = dsa_state->z_stencil_control & 0x7;
+ if (func != 0 && func != 7)
+ z->current_func = dsa_state->z_stencil_control & 0x7;
+ } else {
+ /* simple don't change */
+ if (!r300_zfunc_same_direction(z->current_func, (dsa_state->z_stencil_control & 0x7))) {
+ DBG(r300, DBG_HYPERZ, "z func changed direction - disabling hyper-z %d -> %d\n", z->current_func, dsa_state->z_stencil_control);
+ return FALSE;
+ }
+ }
+ return TRUE;
+}
+
+static void r300_update_hyperz(struct r300_context* r300)
+{
+ struct r300_hyperz_state *z =
+ (struct r300_hyperz_state*)r300->hyperz_state.state;
+ struct pipe_framebuffer_state *fb =
+ (struct pipe_framebuffer_state*)r300->fb_state.state;
+ struct r300_texture *zstex =
+ fb->zsbuf ? r300_texture(fb->zsbuf->texture) : NULL;
+ boolean zmask_in_use = FALSE;
+ boolean hiz_in_use = FALSE;
+
+ z->gb_z_peq_config = 0;
+ z->zb_bw_cntl = 0;
+ z->sc_hyperz = R300_SC_HYPERZ_ADJ_2;
+ z->flush = 0;
+
+ if (r300->cbzb_clear) {
+ z->zb_bw_cntl |= R300_ZB_CB_CLEAR_CACHE_LINE_WRITE_ONLY;
+ return;
+ }
+
+ if (!zstex)
+ return;
+
+ if (!r300->rws->get_value(r300->rws, R300_CAN_HYPERZ))
+ return;
+
+ zmask_in_use = zstex->zmask_in_use[fb->zsbuf->level];
+ hiz_in_use = zstex->hiz_in_use[fb->zsbuf->level];
+
+ /* Z fastfill. */
+ if (zmask_in_use) {
+ z->zb_bw_cntl |= R300_FAST_FILL_ENABLE; /* | R300_FORCE_COMPRESSED_STENCIL_VALUE_ENABLE;*/
+ }
+
+ /* Zbuffer compression. */
+ if (zmask_in_use && r300->z_compression) {
+ z->zb_bw_cntl |= R300_RD_COMP_ENABLE;
+ if (r300->z_decomp_rd == false)
+ z->zb_bw_cntl |= R300_WR_COMP_ENABLE;
+ }
+ /* RV350 and up optimizations. */
+ /* The section 10.4.9 in the docs is a lie. */
+ if (r300->z_compression == RV350_Z_COMPRESS_88)
+ z->gb_z_peq_config |= R300_GB_Z_PEQ_CONFIG_Z_PEQ_SIZE_8_8;
+
+ if (hiz_in_use) {
+ bool can_hiz = r300_can_hiz(r300);
+ if (can_hiz) {
+ z->zb_bw_cntl |= R300_HIZ_ENABLE;
+ z->sc_hyperz |= R300_SC_HYPERZ_ENABLE;
+ z->sc_hyperz |= r300_get_sc_hz_max(r300);
+ z->zb_bw_cntl |= r300_get_hiz_min(r300);
+ }
+ }
+
+ /* R500-specific features and optimizations. */
+ if (r300->screen->caps.is_r500) {
+ z->zb_bw_cntl |= R500_HIZ_FP_EXP_BITS_3;
+ z->zb_bw_cntl |=
+ R500_HIZ_EQUAL_REJECT_ENABLE |
+ R500_PEQ_PACKING_ENABLE |
+ R500_COVERED_PTR_MASKING_ENABLE;
+ }
+}
/*****************************************************************************/
/* The ZTOP state */
/*****************************************************************************/
-static boolean r300_dsa_writes_depth_stencil(struct r300_dsa_state* dsa)
+static boolean r300_dsa_writes_stencil(
+ struct pipe_stencil_state *s)
+{
+ return s->enabled && s->writemask &&
+ (s->fail_op != PIPE_STENCIL_OP_KEEP ||
+ s->zfail_op != PIPE_STENCIL_OP_KEEP ||
+ s->zpass_op != PIPE_STENCIL_OP_KEEP);
+}
+
+static boolean r300_dsa_writes_depth_stencil(
+ struct pipe_depth_stencil_alpha_state *dsa)
{
- /* We are interested only in the cases when a new depth or stencil value
- * can be written and changed. */
+ /* We are interested only in the cases when a depth or stencil value
+ * can be changed. */
+
+ if (dsa->depth.enabled && dsa->depth.writemask &&
+ dsa->depth.func != PIPE_FUNC_NEVER)
+ return TRUE;
- /* We might optionally check for [Z func: never] and inspect the stencil
- * state in a similar fashion, but it's not terribly important. */
- return (dsa->z_buffer_control & R300_Z_WRITE_ENABLE) ||
- (dsa->stencil_ref_mask & R300_STENCILWRITEMASK_MASK) ||
- ((dsa->z_buffer_control & R500_STENCIL_REFMASK_FRONT_BACK) &&
- (dsa->stencil_ref_bf & R300_STENCILWRITEMASK_MASK));
+ if (r300_dsa_writes_stencil(&dsa->stencil[0]) ||
+ r300_dsa_writes_stencil(&dsa->stencil[1]))
+ return TRUE;
+
+ return FALSE;
}
-static boolean r300_dsa_alpha_test_enabled(struct r300_dsa_state* dsa)
+static boolean r300_dsa_alpha_test_enabled(
+ struct pipe_depth_stencil_alpha_state *dsa)
{
/* We are interested only in the cases when alpha testing can kill
* a fragment. */
- uint32_t af = dsa->alpha_function;
- return (af & R300_FG_ALPHA_FUNC_ENABLE) &&
- (af & R300_FG_ALPHA_FUNC_ALWAYS) != R300_FG_ALPHA_FUNC_ALWAYS;
+ return dsa->alpha.enabled && dsa->alpha.func != PIPE_FUNC_ALWAYS;
}
static void r300_update_ztop(struct r300_context* r300)
{
struct r300_ztop_state* ztop_state =
(struct r300_ztop_state*)r300->ztop_state.state;
+ uint32_t old_ztop = ztop_state->z_buffer_top;
/* This is important enough that I felt it warranted a comment.
*
@@ -98,11 +275,139 @@ static void r300_update_ztop(struct r300_context* r300)
} else {
ztop_state->z_buffer_top = R300_ZTOP_ENABLE;
}
+ if (ztop_state->z_buffer_top != old_ztop)
+ r300->ztop_state.dirty = TRUE;
+}
+
+#define ALIGN_DIVUP(x, y) (((x) + (y) - 1) / (y))
+
+static void r300_update_hiz_clear(struct r300_context *r300)
+{
+ struct pipe_framebuffer_state *fb =
+ (struct pipe_framebuffer_state*)r300->fb_state.state;
+ uint32_t height;
+
+ height = ALIGN_DIVUP(fb->zsbuf->height, 4);
+ r300->hiz_clear.size = height * 4;
+}
+
+static void r300_update_zmask_clear(struct r300_context *r300)
+{
+ struct pipe_framebuffer_state *fb =
+ (struct pipe_framebuffer_state*)r300->fb_state.state;
+ uint32_t height;
+ int mult;
- r300->ztop_state.dirty = TRUE;
+ if (r300->z_compression == RV350_Z_COMPRESS_88)
+ mult = 8;
+ else
+ mult = 4;
+
+ height = ALIGN_DIVUP(fb->zsbuf->height, mult);
+
+ r300->zmask_clear.size = height * 4;
}
void r300_update_hyperz_state(struct r300_context* r300)
{
r300_update_ztop(r300);
+ if (r300->hyperz_state.dirty) {
+ r300_update_hyperz(r300);
+ }
+
+ if (r300->hiz_clear.dirty) {
+ r300_update_hiz_clear(r300);
+ }
+ if (r300->zmask_clear.dirty) {
+ r300_update_zmask_clear(r300);
+ }
+}
+
+void r300_hiz_alloc_block(struct r300_context *r300, struct r300_surface *surf)
+{
+ struct r300_texture *tex;
+ uint32_t zsize, ndw;
+ int level = surf->base.level;
+
+ tex = r300_texture(surf->base.texture);
+
+ if (tex->hiz_mem[level])
+ return;
+
+ zsize = tex->desc.layer_size_in_bytes[level];
+ zsize /= util_format_get_blocksize(tex->desc.b.b.format);
+ ndw = ALIGN_DIVUP(zsize, 64);
+
+ tex->hiz_mem[level] = u_mmAllocMem(r300->hiz_mm, ndw, 0, 0);
+ return;
+}
+
+void r300_zmask_alloc_block(struct r300_context *r300, struct r300_surface *surf, int compress)
+{
+ int bsize = 256;
+ uint32_t zsize, ndw;
+ int level = surf->base.level;
+ struct r300_texture *tex;
+
+ tex = r300_texture(surf->base.texture);
+
+ /* We currently don't handle decompression for 3D textures and cubemaps
+ * correctly. */
+ if (tex->desc.b.b.target != PIPE_TEXTURE_1D &&
+ tex->desc.b.b.target != PIPE_TEXTURE_2D &&
+ tex->desc.b.b.target != PIPE_TEXTURE_RECT)
+ return;
+
+ /* Cannot flush zmask of 16-bit zbuffers. */
+ if (util_format_get_blocksizebits(tex->desc.b.b.format) == 16)
+ return;
+
+ if (tex->zmask_mem[level])
+ return;
+
+ zsize = tex->desc.layer_size_in_bytes[level];
+ zsize /= util_format_get_blocksize(tex->desc.b.b.format);
+
+ /* each zmask dword represents 16 4x4 blocks - which is 256 pixels
+ or 16 8x8 depending on the gb peq flag = 1024 pixels */
+ if (compress == RV350_Z_COMPRESS_88)
+ bsize = 1024;
+
+ ndw = ALIGN_DIVUP(zsize, bsize);
+ tex->zmask_mem[level] = u_mmAllocMem(r300->zmask_mm, ndw, 0, 0);
+ return;
+}
+
+boolean r300_hyperz_init_mm(struct r300_context *r300)
+{
+ struct r300_screen* r300screen = r300->screen;
+ int frag_pipes = r300screen->caps.num_frag_pipes;
+
+ r300->zmask_mm = u_mmInit(0, r300screen->caps.zmask_ram * frag_pipes);
+ if (!r300->zmask_mm)
+ return FALSE;
+
+ if (r300screen->caps.hiz_ram) {
+ r300->hiz_mm = u_mmInit(0, r300screen->caps.hiz_ram * frag_pipes);
+ if (!r300->hiz_mm) {
+ u_mmDestroy(r300->zmask_mm);
+ r300->zmask_mm = NULL;
+ return FALSE;
+ }
+ }
+
+ return TRUE;
+}
+
+void r300_hyperz_destroy_mm(struct r300_context *r300)
+{
+ struct r300_screen* r300screen = r300->screen;
+
+ if (r300screen->caps.hiz_ram) {
+ u_mmDestroy(r300->hiz_mm);
+ r300->hiz_mm = NULL;
+ }
+
+ u_mmDestroy(r300->zmask_mm);
+ r300->zmask_mm = NULL;
}
diff --git a/src/gallium/drivers/r300/r300_hyperz.h b/src/gallium/drivers/r300/r300_hyperz.h
index 3df5053b896..30a23ec6493 100644
--- a/src/gallium/drivers/r300/r300_hyperz.h
+++ b/src/gallium/drivers/r300/r300_hyperz.h
@@ -27,4 +27,9 @@ struct r300_context;
void r300_update_hyperz_state(struct r300_context* r300);
+void r300_hiz_alloc_block(struct r300_context *r300, struct r300_surface *surf);
+void r300_zmask_alloc_block(struct r300_context *r300, struct r300_surface *surf, int compress);
+
+boolean r300_hyperz_init_mm(struct r300_context *r300);
+void r300_hyperz_destroy_mm(struct r300_context *r300);
#endif
diff --git a/src/gallium/drivers/r300/r300_public.h b/src/gallium/drivers/r300/r300_public.h
new file mode 100644
index 00000000000..8e7a963c55d
--- /dev/null
+++ b/src/gallium/drivers/r300/r300_public.h
@@ -0,0 +1,9 @@
+
+#ifndef R300_PUBLIC_H
+#define R300_PUBLIC_H
+
+struct r300_winsys_screen;
+
+struct pipe_screen* r300_screen_create(struct r300_winsys_screen *rws);
+
+#endif
diff --git a/src/gallium/drivers/r300/r300_query.c b/src/gallium/drivers/r300/r300_query.c
index 5c27796e894..5f34fcb2744 100644
--- a/src/gallium/drivers/r300/r300_query.c
+++ b/src/gallium/drivers/r300/r300_query.c
@@ -26,7 +26,7 @@
#include "r300_context.h"
#include "r300_screen.h"
#include "r300_emit.h"
-#include "r300_query.h"
+#include "r300_winsys.h"
#include <stdio.h>
@@ -35,31 +35,31 @@ static struct pipe_query *r300_create_query(struct pipe_context *pipe,
{
struct r300_context *r300 = r300_context(pipe);
struct r300_screen *r300screen = r300->screen;
- unsigned query_size;
- struct r300_query *q, *qptr;
+ struct r300_query *q;
+
+ if (query_type != PIPE_QUERY_OCCLUSION_COUNTER) {
+ return NULL;
+ }
q = CALLOC_STRUCT(r300_query);
+ if (!q)
+ return NULL;
q->type = query_type;
- assert(q->type == PIPE_QUERY_OCCLUSION_COUNTER);
-
- q->active = FALSE;
+ q->domain = R300_DOMAIN_GTT;
+ q->buffer_size = 4096;
if (r300screen->caps.family == CHIP_FAMILY_RV530)
- query_size = r300screen->caps.num_z_pipes * sizeof(uint32_t);
+ q->num_pipes = r300screen->caps.num_z_pipes;
else
- query_size = r300screen->caps.num_frag_pipes * sizeof(uint32_t);
+ q->num_pipes = r300screen->caps.num_frag_pipes;
- if (!is_empty_list(&r300->query_list)) {
- qptr = last_elem(&r300->query_list);
- q->offset = qptr->offset + query_size;
- }
insert_at_tail(&r300->query_list, q);
- /* XXX */
- if (q->offset >= 4096) {
- q->offset = 0;
- }
+ /* Open up the occlusion query buffer. */
+ q->buffer = r300->rws->buffer_create(r300->rws, q->buffer_size, 4096,
+ PIPE_BIND_CUSTOM, PIPE_USAGE_STREAM,
+ q->domain);
return (struct pipe_query*)q;
}
@@ -67,92 +67,87 @@ static struct pipe_query *r300_create_query(struct pipe_context *pipe,
static void r300_destroy_query(struct pipe_context* pipe,
struct pipe_query* query)
{
- struct r300_query* q = (struct r300_query*)query;
+ struct r300_context *r300 = r300_context(pipe);
+ struct r300_query* q = r300_query(query);
+ r300->rws->buffer_reference(r300->rws, &q->buffer, NULL);
remove_from_list(q);
FREE(query);
}
+void r300_resume_query(struct r300_context *r300,
+ struct r300_query *query)
+{
+ r300->query_current = query;
+ r300->query_start.dirty = TRUE;
+}
+
static void r300_begin_query(struct pipe_context* pipe,
struct pipe_query* query)
{
- uint32_t value = ~0U;
struct r300_context* r300 = r300_context(pipe);
- struct r300_query* q = (struct r300_query*)query;
+ struct r300_query* q = r300_query(query);
- assert(r300->query_current == NULL);
+ if (r300->query_current != NULL) {
+ fprintf(stderr, "r300: begin_query: "
+ "Some other query has already been started.\n");
+ assert(0);
+ return;
+ }
- pipe_buffer_write(pipe,
- r300->oqbo,
- q->offset,
- sizeof value,
- &value);
+ q->num_results = 0;
+ r300_resume_query(r300, q);
+}
- q->flushed = FALSE;
- r300->query_current = q;
- r300->query_start.dirty = TRUE;
+void r300_stop_query(struct r300_context *r300)
+{
+ r300_emit_query_end(r300);
+ r300->query_current = NULL;
}
static void r300_end_query(struct pipe_context* pipe,
struct pipe_query* query)
{
struct r300_context* r300 = r300_context(pipe);
- struct r300_query* q = (struct r300_query*)query;
+ struct r300_query *q = r300_query(query);
- r300_emit_query_end(r300);
- q->begin_emitted = false;
- r300->query_current = NULL;
+ if (q != r300->query_current) {
+ fprintf(stderr, "r300: end_query: Got invalid query.\n");
+ assert(0);
+ return;
+ }
+
+ r300_stop_query(r300);
}
static boolean r300_get_query_result(struct pipe_context* pipe,
struct pipe_query* query,
boolean wait,
- uint64_t* result)
+ void* vresult)
{
struct r300_context* r300 = r300_context(pipe);
- struct r300_screen* r300screen = r300->screen;
- struct r300_query *q = (struct r300_query*)query;
- struct pipe_transfer *transfer;
- unsigned flags = PIPE_TRANSFER_READ;
- uint32_t* map;
- uint32_t temp = 0;
- unsigned i, num_results;
-
- if (q->flushed == FALSE)
+ struct r300_query *q = r300_query(query);
+ unsigned flags, i;
+ uint32_t temp, *map;
+ uint64_t *result = (uint64_t*)vresult;
+
+ if (!q->flushed)
pipe->flush(pipe, 0, NULL);
- if (!wait) {
- flags |= PIPE_TRANSFER_DONTBLOCK;
- }
- map = pipe_buffer_map(pipe, r300->oqbo, flags, &transfer);
+ flags = PIPE_TRANSFER_READ | (!wait ? PIPE_TRANSFER_DONTBLOCK : 0);
+
+ map = r300->rws->buffer_map(r300->rws, q->buffer, r300->cs, flags);
if (!map)
return FALSE;
- map += q->offset / 4;
- if (r300screen->caps.family == CHIP_FAMILY_RV530)
- num_results = r300screen->caps.num_z_pipes;
- else
- num_results = r300screen->caps.num_frag_pipes;
-
- for (i = 0; i < num_results; i++) {
- if (*map == ~0U) {
- /* Looks like our results aren't ready yet. */
- if (wait) {
- fprintf(stderr, "r300: Despite waiting, OQ results haven't "
- "come in yet.\n");
- }
- temp = ~0U;
- break;
- }
+ /* Sum up the results. */
+ temp = 0;
+ for (i = 0; i < q->num_results; i++) {
temp += *map;
map++;
}
- pipe_buffer_unmap(pipe, r300->oqbo, transfer);
- if (temp == ~0U) {
- /* Our results haven't been written yet... */
- return FALSE;
- }
+ r300->rws->buffer_unmap(r300->rws, q->buffer);
*result = temp;
return TRUE;
@@ -163,7 +158,7 @@ static void r300_render_condition(struct pipe_context *pipe,
uint mode)
{
struct r300_context *r300 = r300_context(pipe);
- uint64_t result;
+ uint64_t result = 0;
boolean wait;
if (query) {
@@ -172,19 +167,69 @@ static void r300_render_condition(struct pipe_context *pipe,
if (!r300_get_query_result(pipe, query, wait, &result)) {
r300->skip_rendering = FALSE;
+ } else {
+ r300->skip_rendering = result == 0;
}
-
- r300->skip_rendering = result == 0;
} else {
r300->skip_rendering = FALSE;
}
}
+/***************************************************************************
+ * Fake occlusion queries (for debugging)
+ ***************************************************************************/
+
+static unsigned r300_fake_query;
+
+static struct pipe_query *r300_fake_create_query(struct pipe_context *pipe,
+ unsigned query_type)
+{
+ return (struct pipe_query*)&r300_fake_query;
+}
+
+static void r300_fake_destroy_query(struct pipe_context* pipe,
+ struct pipe_query* query)
+{
+}
+
+static void r300_fake_begin_query(struct pipe_context* pipe,
+ struct pipe_query* query)
+{
+}
+
+static void r300_fake_end_query(struct pipe_context* pipe,
+ struct pipe_query* query)
+{
+}
+
+static boolean r300_fake_get_query_result(struct pipe_context* pipe,
+ struct pipe_query* query,
+ boolean wait, void* vresult)
+{
+ uint64_t *result = (uint64_t*)vresult;
+ *result = 1000000;
+ return TRUE;
+}
+
+static void r300_fake_render_condition(struct pipe_context *pipe,
+ struct pipe_query *query, uint mode)
+{
+}
+
void r300_init_query_functions(struct r300_context* r300) {
- r300->context.create_query = r300_create_query;
- r300->context.destroy_query = r300_destroy_query;
- r300->context.begin_query = r300_begin_query;
- r300->context.end_query = r300_end_query;
- r300->context.get_query_result = r300_get_query_result;
- r300->context.render_condition = r300_render_condition;
+ if (DBG_ON(r300, DBG_FAKE_OCC)) {
+ r300->context.create_query = r300_fake_create_query;
+ r300->context.destroy_query = r300_fake_destroy_query;
+ r300->context.begin_query = r300_fake_begin_query;
+ r300->context.end_query = r300_fake_end_query;
+ r300->context.get_query_result = r300_fake_get_query_result;
+ r300->context.render_condition = r300_fake_render_condition;
+ } else {
+ r300->context.create_query = r300_create_query;
+ r300->context.destroy_query = r300_destroy_query;
+ r300->context.begin_query = r300_begin_query;
+ r300->context.end_query = r300_end_query;
+ r300->context.get_query_result = r300_get_query_result;
+ r300->context.render_condition = r300_render_condition;
+ }
}
diff --git a/src/gallium/drivers/r300/r300_query.h b/src/gallium/drivers/r300/r300_query.h
deleted file mode 100644
index 48876da3123..00000000000
--- a/src/gallium/drivers/r300/r300_query.h
+++ /dev/null
@@ -1,35 +0,0 @@
-/*
- * Copyright 2009 Corbin Simpson <[email protected]>
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * on the rights to use, copy, modify, merge, publish, distribute, sub
- * license, and/or sell copies of the Software, and to permit persons to whom
- * the Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE. */
-
-#ifndef R300_QUERY_H
-#define R300_QUERY_H
-
-struct r300_context;
-
-static INLINE struct r300_query* r300_query(struct pipe_query* q)
-{
- return (struct r300_query*)q;
-}
-
-void r300_init_query_functions(struct r300_context* r300);
-
-#endif /* R300_QUERY_H */
diff --git a/src/gallium/drivers/r300/r300_reg.h b/src/gallium/drivers/r300/r300_reg.h
index 675a9317f9c..6bea783f697 100644
--- a/src/gallium/drivers/r300/r300_reg.h
+++ b/src/gallium/drivers/r300/r300_reg.h
@@ -496,6 +496,12 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
#define R300_VAP_GB_HORZ_CLIP_ADJ 0x2228
#define R300_VAP_GB_HORZ_DISC_ADJ 0x222c
+#define R300_VAP_PVS_FLOW_CNTL_ADDRS_0 0x2230
+#define R300_PVS_FC_ACT_ADRS(x) ((x) << 0)
+#define R300_PVS_FC_LOOP_CNT_JMP_INST(x) ((x) << 8)
+#define R300_PVS_FC_LAST_INST(x) ((x) << 16)
+#define R300_PVS_FC_RTN_INST(x) ((x) << 24)
+
/* gap */
/* Sometimes, END_OF_PKT and 0x2284=0 are the only commands sent between
@@ -514,6 +520,10 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
# define R300_2288_R300 0x00750000 /* -- nh */
# define R300_2288_RV350 0x0000FFFF /* -- Vladimir */
+#define R300_VAP_PVS_FLOW_CNTL_LOOP_INDEX_0 0x2290
+#define R300_PVS_FC_LOOP_INIT_VAL(x) ((x) << 0)
+#define R300_PVS_FC_LOOP_STEP_VAL(x) ((x) << 8)
+
/* gap */
/* Addresses are relative to the vertex program instruction area of the
@@ -548,6 +558,9 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
#define R300_VAP_PVS_CODE_CNTL_1 0x22D8
# define R300_PVS_LAST_VTX_SRC_INST_SHIFT 0
#define R300_VAP_PVS_FLOW_CNTL_OPC 0x22DC
+#define R300_VAP_PVS_FC_OPC_JUMP(x) (1 << (2 * (x)))
+#define R300_VAP_PVS_FC_OPC_LOOP(x) (2 << (2 * (x)))
+#define R300_VAP_PVS_FC_OPC_JSR(x) (3 << (2 * (x)))
/* The entire range from 0x2300 to 0x2AC inclusive seems to be used for
* immediate vertices
@@ -564,6 +577,14 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
/* write 0 to indicate end of packet? */
#define R300_VAP_VTX_END_OF_PKT 0x24AC
+#define R500_VAP_PVS_FLOW_CNTL_ADDRS_LW_0 0x2500
+#define R500_PVS_FC_ACT_ADRS(x) ((x) << 0)
+#define R500_PVS_FC_LOOP_CNT_JMP_INST(x) ((x) << 16)
+
+#define R500_VAP_PVS_FLOW_CNTL_ADDRS_UW_0 0x2504
+#define R500_PVS_FC_LAST_INST(x) ((x) << 0)
+#define R500_PVS_FC_RTN_INST(x) ((x) << 16)
+
/* gap */
/* These are values from r300_reg/r300_reg.h - they are known to be correct
@@ -1575,6 +1596,7 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
# define R500_TX_FORMAT_Y10X10 0x3
# define R500_TX_FORMAT_W10Z10Y10X10 0x4
# define R500_TX_FORMAT_ATI1N 0x5
+# define R500_TX_FORMAT_Y8X24 0x6
# define R300_TX_FORMAT_SIGNED_W (1 << 5)
@@ -1585,6 +1607,7 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
# define R300_TX_FORMAT_3D (1 << 25)
# define R300_TX_FORMAT_CUBIC_MAP (2 << 25)
+# define R300_TX_FORMAT_TEX_COORD_TYPE_MASK (0x3 << 25)
/* alpha modes, convenience mostly */
/* if you have alpha, pick constant appropriate to the
@@ -1629,6 +1652,40 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
# define R300_TX_FORMAT_GAMMA (1 << 21)
# define R300_TX_FORMAT_YUV_TO_RGB (1 << 22)
+# define R300_TX_CACHE(x) ((x) << 27)
+# define R300_TX_CACHE_WHOLE 0
+/* reserved */
+# define R300_TX_CACHE_HALF_0 2
+# define R300_TX_CACHE_HALF_1 3
+# define R300_TX_CACHE_FOURTH_0 4
+# define R300_TX_CACHE_FOURTH_1 5
+# define R300_TX_CACHE_FOURTH_2 6
+# define R300_TX_CACHE_FOURTH_3 7
+# define R300_TX_CACHE_EIGHTH_0 8
+# define R300_TX_CACHE_EIGHTH_1 9
+# define R300_TX_CACHE_EIGHTH_2 10
+# define R300_TX_CACHE_EIGHTH_3 11
+# define R300_TX_CACHE_EIGHTH_4 12
+# define R300_TX_CACHE_EIGHTH_5 13
+# define R300_TX_CACHE_EIGHTH_6 14
+# define R300_TX_CACHE_EIGHTH_7 15
+# define R300_TX_CACHE_SIXTEENTH_0 16
+# define R300_TX_CACHE_SIXTEENTH_1 17
+# define R300_TX_CACHE_SIXTEENTH_2 18
+# define R300_TX_CACHE_SIXTEENTH_3 19
+# define R300_TX_CACHE_SIXTEENTH_4 20
+# define R300_TX_CACHE_SIXTEENTH_5 21
+# define R300_TX_CACHE_SIXTEENTH_6 22
+# define R300_TX_CACHE_SIXTEENTH_7 23
+# define R300_TX_CACHE_SIXTEENTH_8 24
+# define R300_TX_CACHE_SIXTEENTH_9 25
+# define R300_TX_CACHE_SIXTEENTH_10 26
+# define R300_TX_CACHE_SIXTEENTH_11 27
+# define R300_TX_CACHE_SIXTEENTH_12 28
+# define R300_TX_CACHE_SIXTEENTH_13 29
+# define R300_TX_CACHE_SIXTEENTH_14 30
+# define R300_TX_CACHE_SIXTEENTH_15 31
+
#define R300_TX_FORMAT2_0 0x4500 /* obvious missing in gap */
# define R300_TX_PITCHMASK_SHIFT 0
# define R300_TX_PITCHMASK_MASK (2047 << 0)
@@ -2582,7 +2639,7 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
# define R300_WR_COMP_DISABLE (0 << 4)
# define R300_WR_COMP_ENABLE (1 << 4)
# define R300_ZB_CB_CLEAR_RMW (0 << 5)
-# define R300_ZB_CB_CLEAR_CACHE_LINEAR (1 << 5)
+# define R300_ZB_CB_CLEAR_CACHE_LINE_WRITE_ONLY (1 << 5)
# define R300_FORCE_COMPRESSED_STENCIL_VALUE_DISABLE (0 << 6)
# define R300_FORCE_COMPRESSED_STENCIL_VALUE_ENABLE (1 << 6)
@@ -2638,6 +2695,24 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
/* Z Buffer Clear Value */
#define R300_ZB_DEPTHCLEARVALUE 0x4f28
+/* Z Mask RAM is a Z compression buffer.
+ * Each dword of the Z Mask contains compression info for 16 4x4 pixel blocks,
+ * that is 2 bits for each block.
+ * On chips with 2 Z pipes, every other dword maps to a different pipe.
+ */
+
+/* The dword offset into Z mask RAM (bits 18:4) */
+#define R300_ZB_ZMASK_OFFSET 0x4f30
+
+/* Z Mask Pitch. */
+#define R300_ZB_ZMASK_PITCH 0x4f34
+
+/* Access to Z Mask RAM in a manner similar to HiZ RAM.
+ * The indices are autoincrementing. */
+#define R300_ZB_ZMASK_WRINDEX 0x4f38
+#define R300_ZB_ZMASK_DWORD 0x4f3c
+#define R300_ZB_ZMASK_RDINDEX 0x4f40
+
/* Hierarchical Z Memory Offset */
#define R300_ZB_HIZ_OFFSET 0x4f44
@@ -3229,8 +3304,8 @@ enum {
# define R500_FC_B_OP0_NONE (0 << 24)
# define R500_FC_B_OP0_DECR (1 << 24)
# define R500_FC_B_OP0_INCR (2 << 24)
-# define R500_FC_B_OP1_DECR (0 << 26)
-# define R500_FC_B_OP1_NONE (1 << 26)
+# define R500_FC_B_OP1_NONE (0 << 26)
+# define R500_FC_B_OP1_DECR (1 << 26)
# define R500_FC_B_OP1_INCR (2 << 26)
# define R500_FC_IGNORE_UNCOVERED (1 << 28)
#define R500_US_FC_INT_CONST_0 0x4c00
@@ -3383,6 +3458,7 @@ enum {
# define R300_VBPNTR_SIZE1(x) (((x) >> 2) << 16)
# define R300_VBPNTR_STRIDE1(x) (((x) >> 2) << 24)
+#define R300_PACKET3_3D_CLEAR_ZMASK 0x00003200
#define R300_PACKET3_INDX_BUFFER 0x00003300
# define R300_INDX_BUFFER_DST_SHIFT 0
# define R300_INDX_BUFFER_SKIP_SHIFT 16
@@ -3436,9 +3512,18 @@ enum {
# define RADEON_WAIT_3D_IDLECLEAN (1 << 17)
# define RADEON_WAIT_HOST_IDLECLEAN (1 << 18)
+#define R200_3D_DRAW_IMMD_2 0xC0003500
+
+#define RADEON_CP_PACKET0 0x0 /* XXX stolen from radeon_reg.h */
#define RADEON_CP_PACKET3 0xC0000000
-#define R200_3D_DRAW_IMMD_2 0xC0003500
+#define RADEON_ONE_REG_WR (1 << 15)
+
+#define CP_PACKET0(register, count) \
+ (RADEON_CP_PACKET0 | ((count) << 16) | ((register) >> 2))
+
+#define CP_PACKET3(op, count) \
+ (RADEON_CP_PACKET3 | (op) | ((count) << 16))
#endif /* _R300_REG_H */
diff --git a/src/gallium/drivers/r300/r300_render.c b/src/gallium/drivers/r300/r300_render.c
index 7c3a7902a49..2f00c878f5b 100644
--- a/src/gallium/drivers/r300/r300_render.c
+++ b/src/gallium/drivers/r300/r300_render.c
@@ -1,5 +1,6 @@
/*
* Copyright 2009 Corbin Simpson <[email protected]>
+ * Copyright 2010 Marek Olšák <[email protected]>
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -38,9 +39,12 @@
#include "r300_screen_buffer.h"
#include "r300_emit.h"
#include "r300_reg.h"
-#include "r300_render.h"
#include "r300_state_derived.h"
+#include <limits.h>
+
+#define IMMD_DWORDS 32
+
static uint32_t r300_translate_primitive(unsigned prim)
{
switch (prim) {
@@ -114,16 +118,178 @@ static uint32_t r300_provoking_vertex_fixes(struct r300_context *r300,
return color_control;
}
-/* Check if the requested number of dwords is available in the CS and
- * if not, flush. Return TRUE if the flush occured. */
-static boolean r300_reserve_cs_space(struct r300_context *r300,
- unsigned dwords)
+boolean r500_index_bias_supported(struct r300_context *r300)
+{
+ return r300->screen->caps.is_r500 &&
+ r300->rws->get_value(r300->rws, R300_VID_DRM_2_3_0);
+}
+
+void r500_emit_index_bias(struct r300_context *r300, int index_bias)
{
- if (!r300->rws->check_cs(r300->rws, dwords)) {
+ CS_LOCALS(r300);
+
+ BEGIN_CS(2);
+ OUT_CS_REG(R500_VAP_INDEX_OFFSET,
+ (index_bias & 0xFFFFFF) | (index_bias < 0 ? 1<<24 : 0));
+ END_CS;
+}
+
+/* This function splits the index bias value into two parts:
+ * - buffer_offset: the value that can be safely added to buffer offsets
+ * in r300_emit_aos (it must yield a positive offset when added to
+ * a vertex buffer offset)
+ * - index_offset: the value that must be manually subtracted from indices
+ * in an index buffer to achieve negative offsets. */
+static void r300_split_index_bias(struct r300_context *r300, int index_bias,
+ int *buffer_offset, int *index_offset)
+{
+ struct pipe_vertex_buffer *vb, *vbufs = r300->vertex_buffer;
+ struct pipe_vertex_element *velem = r300->velems->velem;
+ unsigned i, size;
+ int max_neg_bias;
+
+ if (index_bias < 0) {
+ /* See how large index bias we may subtract. We must be careful
+ * here because negative buffer offsets are not allowed
+ * by the DRM API. */
+ max_neg_bias = INT_MAX;
+ for (i = 0; i < r300->velems->count; i++) {
+ vb = &vbufs[velem[i].vertex_buffer_index];
+ size = (vb->buffer_offset + velem[i].src_offset) / vb->stride;
+ max_neg_bias = MIN2(max_neg_bias, size);
+ }
+
+ /* Now set the minimum allowed value. */
+ *buffer_offset = MAX2(-max_neg_bias, index_bias);
+ } else {
+ /* A positive index bias is OK. */
+ *buffer_offset = index_bias;
+ }
+
+ *index_offset = index_bias - *buffer_offset;
+}
+
+enum r300_prepare_flags {
+ PREP_FIRST_DRAW = (1 << 0), /* call emit_dirty_state and friends? */
+ PREP_VALIDATE_VBOS = (1 << 1), /* validate VBOs? */
+ PREP_EMIT_AOS = (1 << 2), /* call emit_aos? */
+ PREP_EMIT_AOS_SWTCL = (1 << 3), /* call emit_aos_swtcl? */
+ PREP_INDEXED = (1 << 4) /* is this draw_elements? */
+};
+
+/**
+ * Check if the requested number of dwords is available in the CS and
+ * if not, flush.
+ * \param r300 The context.
+ * \param flags See r300_prepare_flags.
+ * \param cs_dwords The number of dwords to reserve in CS.
+ * \return TRUE if the CS was flushed
+ */
+static boolean r300_reserve_cs_dwords(struct r300_context *r300,
+ enum r300_prepare_flags flags,
+ unsigned cs_dwords)
+{
+ boolean flushed = FALSE;
+ boolean first_draw = flags & PREP_FIRST_DRAW;
+ boolean emit_aos = flags & PREP_EMIT_AOS;
+ boolean emit_aos_swtcl = flags & PREP_EMIT_AOS_SWTCL;
+ boolean hw_index_bias = r500_index_bias_supported(r300);
+
+ /* Add dirty state, index offset, and AOS. */
+ if (first_draw) {
+ cs_dwords += r300_get_num_dirty_dwords(r300);
+
+ if (hw_index_bias)
+ cs_dwords += 2; /* emit_index_offset */
+
+ if (emit_aos)
+ cs_dwords += 55; /* emit_aos */
+
+ if (emit_aos_swtcl)
+ cs_dwords += 7; /* emit_aos_swtcl */
+ }
+
+ cs_dwords += r300_get_num_cs_end_dwords(r300);
+
+ /* Reserve requested CS space. */
+ if (cs_dwords > (r300->cs->ndw - r300->cs->cdw)) {
r300->context.flush(&r300->context, 0, NULL);
- return TRUE;
+ flushed = TRUE;
}
- return FALSE;
+
+ return flushed;
+}
+
+/**
+ * Validate buffers and emit dirty state.
+ * \param r300 The context.
+ * \param flags See r300_prepare_flags.
+ * \param index_buffer The index buffer to validate. The parameter may be NULL.
+ * \param aos_offset The offset passed to emit_aos.
+ * \param index_bias The index bias to emit.
+ * \return TRUE if rendering should be skipped
+ */
+static boolean r300_emit_states(struct r300_context *r300,
+ enum r300_prepare_flags flags,
+ struct pipe_resource *index_buffer,
+ int aos_offset,
+ int index_bias)
+{
+ boolean first_draw = flags & PREP_FIRST_DRAW;
+ boolean emit_aos = flags & PREP_EMIT_AOS;
+ boolean emit_aos_swtcl = flags & PREP_EMIT_AOS_SWTCL;
+ boolean indexed = flags & PREP_INDEXED;
+ boolean hw_index_bias = r500_index_bias_supported(r300);
+
+ /* Validate buffers and emit dirty state if needed. */
+ if (first_draw) {
+ if (!r300_emit_buffer_validate(r300, flags & PREP_VALIDATE_VBOS,
+ index_buffer)) {
+ fprintf(stderr, "r300: CS space validation failed. "
+ "(not enough memory?) Skipping rendering.\n");
+ return FALSE;
+ }
+
+ r300_emit_dirty_state(r300);
+ if (hw_index_bias) {
+ if (r300->screen->caps.has_tcl)
+ r500_emit_index_bias(r300, index_bias);
+ else
+ r500_emit_index_bias(r300, 0);
+ }
+
+ if (emit_aos)
+ r300_emit_aos(r300, aos_offset, indexed);
+
+ if (emit_aos_swtcl)
+ r300_emit_aos_swtcl(r300, indexed);
+ }
+
+ return TRUE;
+}
+
+/**
+ * Check if the requested number of dwords is available in the CS and
+ * if not, flush. Then validate buffers and emit dirty state.
+ * \param r300 The context.
+ * \param flags See r300_prepare_flags.
+ * \param index_buffer The index buffer to validate. The parameter may be NULL.
+ * \param cs_dwords The number of dwords to reserve in CS.
+ * \param aos_offset The offset passed to emit_aos.
+ * \param index_bias The index bias to emit.
+ * \return TRUE if rendering should be skipped
+ */
+static boolean r300_prepare_for_rendering(struct r300_context *r300,
+ enum r300_prepare_flags flags,
+ struct pipe_resource *index_buffer,
+ unsigned cs_dwords,
+ int aos_offset,
+ int index_bias)
+{
+ if (r300_reserve_cs_dwords(r300, flags, cs_dwords))
+ flags |= PREP_FIRST_DRAW;
+
+ return r300_emit_states(r300, flags, index_buffer, aos_offset, index_bias);
}
static boolean immd_is_good_idea(struct r300_context *r300,
@@ -135,13 +301,20 @@ static boolean immd_is_good_idea(struct r300_context *r300,
unsigned vertex_element_count = r300->velems->count;
unsigned i, vbi;
- if (count > 10 || DBG_ON(r300, DBG_NO_IMMD)) {
+ if (DBG_ON(r300, DBG_NO_IMMD)) {
+ return FALSE;
+ }
+
+ if (r300->draw) {
+ return FALSE;
+ }
+
+ if (count * r300->velems->vertex_size_dwords > IMMD_DWORDS) {
return FALSE;
}
/* We shouldn't map buffers referenced by CS, busy buffers,
* and ones placed in VRAM. */
- /* XXX Check for VRAM buffers. */
for (i = 0; i < vertex_element_count; i++) {
velem = &r300->velems->velem[i];
vbi = velem->vertex_buffer_index;
@@ -149,6 +322,10 @@ static boolean immd_is_good_idea(struct r300_context *r300,
if (!checked[vbi]) {
vbuf = &r300->vertex_buffer[vbi];
+ if (!(r300_buffer(vbuf->buffer)->domain & R300_DOMAIN_GTT)) {
+ return FALSE;
+ }
+
if (r300_buffer_is_referenced(&r300->context,
vbuf->buffer,
R300_REF_CS | R300_REF_HW)) {
@@ -162,85 +339,61 @@ static boolean immd_is_good_idea(struct r300_context *r300,
}
/*****************************************************************************
- * The emission of draw packets for r500. Older GPUs may use these functions *
- * after resolving fallback issues (e.g. stencil ref two-sided). *
+ * The HWTCL draw functions. *
****************************************************************************/
-static boolean r500_emit_index_offset(struct r300_context *r300, int indexBias)
-{
- CS_LOCALS(r300);
-
- if (r300->screen->caps.is_r500 &&
- r300->rws->get_value(r300->rws, R300_VID_DRM_2_3_0)) {
- BEGIN_CS(2);
- OUT_CS_REG(R500_VAP_INDEX_OFFSET,
- (indexBias & 0xFFFFFF) | (indexBias < 0 ? 1<<24 : 0));
- END_CS;
- } else {
- if (indexBias)
- return FALSE; /* Can't do anything :( */
- }
-
- return TRUE;
-}
-
-void r500_emit_draw_arrays_immediate(struct r300_context *r300,
- unsigned mode,
- unsigned start,
- unsigned count)
+static void r300_emit_draw_arrays_immediate(struct r300_context *r300,
+ unsigned mode,
+ unsigned start,
+ unsigned count)
{
struct pipe_vertex_element* velem;
struct pipe_vertex_buffer* vbuf;
unsigned vertex_element_count = r300->velems->count;
- unsigned i, v, vbi, dw, elem_offset, dwords;
+ unsigned i, v, vbi;
/* Size of the vertex, in dwords. */
- unsigned vertex_size = 0;
+ unsigned vertex_size = r300->velems->vertex_size_dwords;
- /* Offsets of the attribute, in dwords, from the start of the vertex. */
- unsigned offset[PIPE_MAX_ATTRIBS];
+ /* The number of dwords for this draw operation. */
+ unsigned dwords = 9 + count * vertex_size;
/* Size of the vertex element, in dwords. */
unsigned size[PIPE_MAX_ATTRIBS];
/* Stride to the same attrib in the next vertex in the vertex buffer,
* in dwords. */
- unsigned stride[PIPE_MAX_ATTRIBS] = {0};
+ unsigned stride[PIPE_MAX_ATTRIBS];
/* Mapped vertex buffers. */
- uint32_t* map[PIPE_MAX_ATTRIBS] = {0};
- struct pipe_transfer* transfer[PIPE_MAX_ATTRIBS] = {NULL};
+ uint32_t* map[PIPE_MAX_ATTRIBS];
+ uint32_t* mapelem[PIPE_MAX_ATTRIBS];
+ struct pipe_transfer* transfer[PIPE_MAX_ATTRIBS] = {0};
CS_LOCALS(r300);
+ if (!r300_prepare_for_rendering(r300, PREP_FIRST_DRAW, NULL, dwords, 0, 0))
+ return;
+
/* Calculate the vertex size, offsets, strides etc. and map the buffers. */
for (i = 0; i < vertex_element_count; i++) {
velem = &r300->velems->velem[i];
- offset[i] = velem->src_offset / 4;
- size[i] = util_format_get_blocksize(velem->src_format) / 4;
- vertex_size += size[i];
+ size[i] = r300->velems->hw_format_size[i] / 4;
vbi = velem->vertex_buffer_index;
+ vbuf = &r300->vertex_buffer[vbi];
+ stride[i] = vbuf->stride / 4;
/* Map the buffer. */
- if (!map[vbi]) {
- vbuf = &r300->vertex_buffer[vbi];
+ if (!transfer[vbi]) {
map[vbi] = (uint32_t*)pipe_buffer_map(&r300->context,
vbuf->buffer,
PIPE_TRANSFER_READ,
&transfer[vbi]);
- map[vbi] += vbuf->buffer_offset / 4;
- stride[vbi] = vbuf->stride / 4;
+ map[vbi] += (vbuf->buffer_offset / 4) + stride[i] * start;
}
+ mapelem[i] = map[vbi] + (velem->src_offset / 4);
}
- dwords = 9 + count * vertex_size;
-
- r300_reserve_cs_space(r300, r300_get_num_dirty_dwords(r300) + 2 + dwords);
- r300_emit_buffer_validate(r300, FALSE, NULL);
- r300_emit_dirty_state(r300);
-
- r500_emit_index_offset(r300, 0);
-
BEGIN_CS(dwords);
OUT_CS_REG(R300_GA_COLOR_CONTROL,
r300_provoking_vertex_fixes(r300, mode));
@@ -255,13 +408,7 @@ void r500_emit_draw_arrays_immediate(struct r300_context *r300,
/* Emit vertices. */
for (v = 0; v < count; v++) {
for (i = 0; i < vertex_element_count; i++) {
- velem = &r300->velems->velem[i];
- vbi = velem->vertex_buffer_index;
- elem_offset = offset[i] + stride[vbi] * (v + start);
-
- for (dw = 0; dw < size[i]; dw++) {
- OUT_CS(map[vbi][elem_offset + dw]);
- }
+ OUT_CS_TABLE(&mapelem[i][stride[i] * v], size[i]);
}
}
END_CS;
@@ -270,17 +417,17 @@ void r500_emit_draw_arrays_immediate(struct r300_context *r300,
for (i = 0; i < vertex_element_count; i++) {
vbi = r300->velems->velem[i].vertex_buffer_index;
- if (map[vbi]) {
+ if (transfer[vbi]) {
vbuf = &r300->vertex_buffer[vbi];
pipe_buffer_unmap(&r300->context, vbuf->buffer, transfer[vbi]);
- map[vbi] = NULL;
+ transfer[vbi] = NULL;
}
}
}
-void r500_emit_draw_arrays(struct r300_context *r300,
- unsigned mode,
- unsigned count)
+static void r300_emit_draw_arrays(struct r300_context *r300,
+ unsigned mode,
+ unsigned count)
{
boolean alt_num_verts = count > 65535;
CS_LOCALS(r300);
@@ -291,8 +438,6 @@ void r500_emit_draw_arrays(struct r300_context *r300,
return;
}
- r500_emit_index_offset(r300, 0);
-
BEGIN_CS(7 + (alt_num_verts ? 2 : 0));
if (alt_num_verts) {
OUT_CS_REG(R500_VAP_ALT_NUM_VERTICES, count);
@@ -309,15 +454,14 @@ void r500_emit_draw_arrays(struct r300_context *r300,
END_CS;
}
-void r500_emit_draw_elements(struct r300_context *r300,
- struct pipe_resource* indexBuffer,
- unsigned indexSize,
- int indexBias,
- unsigned minIndex,
- unsigned maxIndex,
- unsigned mode,
- unsigned start,
- unsigned count)
+static void r300_emit_draw_elements(struct r300_context *r300,
+ struct pipe_resource* indexBuffer,
+ unsigned indexSize,
+ unsigned minIndex,
+ unsigned maxIndex,
+ unsigned mode,
+ unsigned start,
+ unsigned count)
{
uint32_t count_dwords;
uint32_t offset_dwords = indexSize * start / sizeof(uint32_t);
@@ -335,12 +479,6 @@ void r500_emit_draw_elements(struct r300_context *r300,
DBG(r300, DBG_DRAW, "r300: Indexbuf of %u indices, min %u max %u\n",
count, minIndex, maxIndex);
- if (!r500_emit_index_offset(r300, indexBias)) {
- fprintf(stderr, "r300: Got a non-zero index bias, "
- "refusing to render.\n");
- return;
- }
-
BEGIN_CS(13 + (alt_num_verts ? 2 : 0));
if (alt_num_verts) {
OUT_CS_REG(R500_VAP_ALT_NUM_VERTICES, count);
@@ -374,189 +512,31 @@ void r500_emit_draw_elements(struct r300_context *r300,
(0 << R300_INDX_BUFFER_SKIP_SHIFT));
OUT_CS(offset_dwords << 2);
OUT_CS_BUF_RELOC(indexBuffer, count_dwords,
- RADEON_GEM_DOMAIN_GTT, 0, 0);
-
- END_CS;
-}
-
-/*****************************************************************************
- * The emission of draw packets for r300 which take care of the two-sided *
- * stencil ref fallback and call r500's functions. *
- ****************************************************************************/
-
-/* Set drawing for front faces. */
-static void r300_begin_stencil_ref_fallback(struct r300_context *r300)
-{
- struct r300_rs_state *rs = (struct r300_rs_state*)r300->rs_state.state;
- CS_LOCALS(r300);
+ r300_buffer(indexBuffer)->domain, 0);
- BEGIN_CS(2);
- OUT_CS_REG(R300_SU_CULL_MODE, rs->cull_mode | R300_CULL_BACK);
- END_CS;
-}
-
-/* Set drawing for back faces. */
-static void r300_switch_stencil_ref_side(struct r300_context *r300)
-{
- struct r300_rs_state *rs = (struct r300_rs_state*)r300->rs_state.state;
- struct r300_dsa_state *dsa = (struct r300_dsa_state*)r300->dsa_state.state;
- CS_LOCALS(r300);
-
- BEGIN_CS(4);
- OUT_CS_REG(R300_SU_CULL_MODE, rs->cull_mode | R300_CULL_FRONT);
- OUT_CS_REG(R300_ZB_STENCILREFMASK,
- dsa->stencil_ref_bf | r300->stencil_ref.ref_value[1]);
- END_CS;
-}
-
-/* Restore the original state. */
-static void r300_end_stencil_ref_fallback(struct r300_context *r300)
-{
- struct r300_rs_state *rs = (struct r300_rs_state*)r300->rs_state.state;
- struct r300_dsa_state *dsa = (struct r300_dsa_state*)r300->dsa_state.state;
- CS_LOCALS(r300);
-
- BEGIN_CS(4);
- OUT_CS_REG(R300_SU_CULL_MODE, rs->cull_mode);
- OUT_CS_REG(R300_ZB_STENCILREFMASK,
- dsa->stencil_ref_mask | r300->stencil_ref.ref_value[0]);
END_CS;
}
-void r300_emit_draw_arrays_immediate(struct r300_context *r300,
+/* This is the fast-path drawing & emission for HW TCL. */
+static void r300_draw_range_elements(struct pipe_context* pipe,
+ struct pipe_resource* indexBuffer,
+ unsigned indexSize,
+ int indexBias,
+ unsigned minIndex,
+ unsigned maxIndex,
unsigned mode,
unsigned start,
unsigned count)
{
- if (!r300->stencil_ref_bf_fallback) {
- r500_emit_draw_arrays_immediate(r300, mode, start, count);
- } else {
- r300_begin_stencil_ref_fallback(r300);
- r500_emit_draw_arrays_immediate(r300, mode, start, count);
- r300_switch_stencil_ref_side(r300);
- r500_emit_draw_arrays_immediate(r300, mode, start, count);
- r300_end_stencil_ref_fallback(r300);
- }
-}
-
-void r300_emit_draw_arrays(struct r300_context *r300,
- unsigned mode,
- unsigned count)
-{
- if (!r300->stencil_ref_bf_fallback) {
- r500_emit_draw_arrays(r300, mode, count);
- } else {
- r300_begin_stencil_ref_fallback(r300);
- r500_emit_draw_arrays(r300, mode, count);
- r300_switch_stencil_ref_side(r300);
- r500_emit_draw_arrays(r300, mode, count);
- r300_end_stencil_ref_fallback(r300);
- }
-}
-
-void r300_emit_draw_elements(struct r300_context *r300,
- struct pipe_resource* indexBuffer,
- unsigned indexSize,
- int indexBias,
- unsigned minIndex,
- unsigned maxIndex,
- unsigned mode,
- unsigned start,
- unsigned count)
-{
- if (!r300->stencil_ref_bf_fallback) {
- r500_emit_draw_elements(r300, indexBuffer, indexSize, indexBias,
- minIndex, maxIndex, mode, start, count);
- } else {
- r300_begin_stencil_ref_fallback(r300);
- r500_emit_draw_elements(r300, indexBuffer, indexSize, indexBias,
- minIndex, maxIndex, mode, start, count);
- r300_switch_stencil_ref_side(r300);
- r500_emit_draw_elements(r300, indexBuffer, indexSize, indexBias,
- minIndex, maxIndex, mode, start, count);
- r300_end_stencil_ref_fallback(r300);
- }
-}
-
-static void r300_shorten_ubyte_elts(struct r300_context* r300,
- struct pipe_resource** elts,
- unsigned start,
- unsigned count)
-{
- struct pipe_context* context = &r300->context;
- struct pipe_screen* screen = r300->context.screen;
- struct pipe_resource* new_elts;
- unsigned char *in_map;
- unsigned short *out_map;
- struct pipe_transfer *src_transfer, *dst_transfer;
- unsigned i;
-
- new_elts = pipe_buffer_create(screen,
- PIPE_BIND_INDEX_BUFFER,
- 2 * count);
-
- in_map = pipe_buffer_map(context, *elts, PIPE_TRANSFER_READ, &src_transfer);
- out_map = pipe_buffer_map(context, new_elts, PIPE_TRANSFER_WRITE, &dst_transfer);
-
- in_map += start;
-
- for (i = 0; i < count; i++) {
- *out_map = (unsigned short)*in_map;
- in_map++;
- out_map++;
- }
-
- pipe_buffer_unmap(context, *elts, src_transfer);
- pipe_buffer_unmap(context, new_elts, dst_transfer);
-
- *elts = new_elts;
-}
-
-static void r300_align_ushort_elts(struct r300_context *r300,
- struct pipe_resource **elts,
- unsigned start, unsigned count)
-{
- struct pipe_context* context = &r300->context;
- struct pipe_transfer *in_transfer = NULL;
- struct pipe_transfer *out_transfer = NULL;
- struct pipe_resource* new_elts;
- unsigned short *in_map;
- unsigned short *out_map;
-
- new_elts = pipe_buffer_create(context->screen,
- PIPE_BIND_INDEX_BUFFER,
- 2 * count);
-
- in_map = pipe_buffer_map(context, *elts,
- PIPE_TRANSFER_READ, &in_transfer);
- out_map = pipe_buffer_map(context, new_elts,
- PIPE_TRANSFER_WRITE, &out_transfer);
-
- memcpy(out_map, in_map+start, 2 * count);
-
- pipe_buffer_unmap(context, *elts, in_transfer);
- pipe_buffer_unmap(context, new_elts, out_transfer);
-
- *elts = new_elts;
-}
-
-/* This is the fast-path drawing & emission for HW TCL. */
-void r300_draw_range_elements(struct pipe_context* pipe,
- struct pipe_resource* indexBuffer,
- unsigned indexSize,
- int indexBias,
- unsigned minIndex,
- unsigned maxIndex,
- unsigned mode,
- unsigned start,
- unsigned count)
-{
struct r300_context* r300 = r300_context(pipe);
struct pipe_resource* orgIndexBuffer = indexBuffer;
boolean alt_num_verts = r300->screen->caps.is_r500 &&
count > 65536 &&
r300->rws->get_value(r300->rws, R300_VID_DRM_2_3_0);
unsigned short_count;
+ int buffer_offset = 0, index_offset = 0; /* for index bias emulation */
+ boolean translate = FALSE;
+ unsigned new_offset;
if (r300->skip_rendering) {
return;
@@ -566,76 +546,78 @@ void r300_draw_range_elements(struct pipe_context* pipe,
return;
}
- if (indexSize == 1) {
- r300_shorten_ubyte_elts(r300, &indexBuffer, start, count);
- indexSize = 2;
- start = 0;
- } else if (indexSize == 2 && start % 2 != 0) {
- r300_align_ushort_elts(r300, &indexBuffer, start, count);
- start = 0;
+ /* Index buffer range checking. */
+ if ((start + count) * indexSize > indexBuffer->width0) {
+ fprintf(stderr, "r300: Invalid index buffer range. Skipping rendering.\n");
+ return;
}
+ /* Set up fallback for incompatible vertex layout if needed. */
+ if (r300->incompatible_vb_layout || r300->velems->incompatible_layout) {
+ r300_begin_vertex_translate(r300);
+ translate = TRUE;
+ }
+
+ if (indexBias && !r500_index_bias_supported(r300)) {
+ r300_split_index_bias(r300, indexBias, &buffer_offset, &index_offset);
+ }
+
+ r300_translate_index_buffer(r300, &indexBuffer, &indexSize, index_offset,
+ &start, count);
+
r300_update_derived_state(r300);
+ r300_upload_index_buffer(r300, &indexBuffer, indexSize, start, count, &new_offset);
- r300_upload_index_buffer(r300, &indexBuffer, indexSize, start, count);
+ start = new_offset;
- /* 128 dwords for emit_aos and emit_draw_elements */
- r300_reserve_cs_space(r300, r300_get_num_dirty_dwords(r300) + 128);
- r300_emit_buffer_validate(r300, TRUE, indexBuffer);
- r300_emit_dirty_state(r300);
- r300_emit_aos(r300, 0, TRUE);
+ /* 15 dwords for emit_draw_elements. Give up if the function fails. */
+ if (!r300_prepare_for_rendering(r300,
+ PREP_FIRST_DRAW | PREP_VALIDATE_VBOS | PREP_EMIT_AOS |
+ PREP_INDEXED, indexBuffer, 15, buffer_offset, indexBias))
+ goto done;
- u_upload_flush(r300->upload_vb);
- u_upload_flush(r300->upload_ib);
if (alt_num_verts || count <= 65535) {
- r300->emit_draw_elements(r300, indexBuffer, indexSize, indexBias,
- minIndex, maxIndex, mode, start, count);
+ r300_emit_draw_elements(r300, indexBuffer, indexSize,
+ minIndex, maxIndex, mode, start, count);
} else {
do {
short_count = MIN2(count, 65534);
- r300->emit_draw_elements(r300, indexBuffer, indexSize, indexBias,
+ r300_emit_draw_elements(r300, indexBuffer, indexSize,
minIndex, maxIndex,
mode, start, short_count);
start += short_count;
count -= short_count;
- /* 16 spare dwords are enough for emit_draw_elements.
- * Also reserve some space for emit_query_end. */
- if (count && r300_reserve_cs_space(r300, 74)) {
- r300_emit_buffer_validate(r300, TRUE, indexBuffer);
- r300_emit_dirty_state(r300);
- r300_emit_aos(r300, 0, TRUE);
+ /* 15 dwords for emit_draw_elements */
+ if (count) {
+ if (!r300_prepare_for_rendering(r300,
+ PREP_VALIDATE_VBOS | PREP_EMIT_AOS | PREP_INDEXED,
+ indexBuffer, 15, buffer_offset, indexBias))
+ goto done;
}
} while (count);
}
+done:
if (indexBuffer != orgIndexBuffer) {
pipe_resource_reference( &indexBuffer, NULL );
}
-}
-
-/* Simple helpers for context setup. Should probably be moved to util. */
-void r300_draw_elements(struct pipe_context* pipe,
- struct pipe_resource* indexBuffer,
- unsigned indexSize, int indexBias, unsigned mode,
- unsigned start, unsigned count)
-{
- struct r300_context *r300 = r300_context(pipe);
- pipe->draw_range_elements(pipe, indexBuffer, indexSize, indexBias,
- 0, r300->vertex_buffer_max_index,
- mode, start, count);
+ if (translate) {
+ r300_end_vertex_translate(r300);
+ }
}
-void r300_draw_arrays(struct pipe_context* pipe, unsigned mode,
- unsigned start, unsigned count)
+static void r300_draw_arrays(struct pipe_context* pipe, unsigned mode,
+ unsigned start, unsigned count)
{
struct r300_context* r300 = r300_context(pipe);
boolean alt_num_verts = r300->screen->caps.is_r500 &&
count > 65536 &&
r300->rws->get_value(r300->rws, R300_VID_DRM_2_3_0);
unsigned short_count;
+ boolean translate = FALSE;
if (r300->skip_rendering) {
return;
@@ -645,135 +627,149 @@ void r300_draw_arrays(struct pipe_context* pipe, unsigned mode,
return;
}
+ /* Set up fallback for incompatible vertex layout if needed. */
+ if (r300->incompatible_vb_layout || r300->velems->incompatible_layout) {
+ r300_begin_vertex_translate(r300);
+ translate = TRUE;
+ }
+
r300_update_derived_state(r300);
if (immd_is_good_idea(r300, count)) {
- r300->emit_draw_arrays_immediate(r300, mode, start, count);
+ r300_emit_draw_arrays_immediate(r300, mode, start, count);
} else {
- /* Make sure there are at least 128 spare dwords in the command buffer.
- * (most of it being consumed by emit_aos) */
- r300_reserve_cs_space(r300, r300_get_num_dirty_dwords(r300) + 128);
- r300_emit_buffer_validate(r300, TRUE, NULL);
- r300_emit_dirty_state(r300);
+ /* 9 spare dwords for emit_draw_arrays. Give up if the function fails. */
+ if (!r300_prepare_for_rendering(r300,
+ PREP_FIRST_DRAW | PREP_VALIDATE_VBOS | PREP_EMIT_AOS,
+ NULL, 9, start, 0))
+ goto done;
if (alt_num_verts || count <= 65535) {
- r300_emit_aos(r300, start, FALSE);
- r300->emit_draw_arrays(r300, mode, count);
+ r300_emit_draw_arrays(r300, mode, count);
} else {
do {
short_count = MIN2(count, 65535);
- r300_emit_aos(r300, start, FALSE);
- r300->emit_draw_arrays(r300, mode, short_count);
+ r300_emit_draw_arrays(r300, mode, short_count);
start += short_count;
count -= short_count;
- /* Again, we emit both AOS and draw_arrays so there should be
- * at least 128 spare dwords.
- * Also reserve some space for emit_query_end. */
- if (count && r300_reserve_cs_space(r300, 186)) {
- r300_emit_buffer_validate(r300, TRUE, NULL);
- r300_emit_dirty_state(r300);
+ /* 9 spare dwords for emit_draw_arrays. Give up if the function fails. */
+ if (count) {
+ if (!r300_prepare_for_rendering(r300,
+ PREP_VALIDATE_VBOS | PREP_EMIT_AOS, NULL, 9,
+ start, 0))
+ goto done;
}
} while (count);
}
- u_upload_flush(r300->upload_vb);
}
-}
-/****************************************************************************
- * The rest of this file is for SW TCL rendering only. Please be polite and *
- * keep these functions separated so that they are easier to locate. ~C. *
- ***************************************************************************/
+done:
+ if (translate) {
+ r300_end_vertex_translate(r300);
+ }
+}
-/* SW TCL arrays, using Draw. */
-void r300_swtcl_draw_arrays(struct pipe_context* pipe,
- unsigned mode,
- unsigned start,
- unsigned count)
+static void r300_draw_vbo(struct pipe_context* pipe,
+ const struct pipe_draw_info *info)
{
struct r300_context* r300 = r300_context(pipe);
- struct pipe_transfer *vb_transfer[PIPE_MAX_ATTRIBS];
- int i;
-
- if (r300->skip_rendering) {
- return;
- }
-
- if (!u_trim_pipe_prim(mode, &count)) {
- return;
- }
- for (i = 0; i < r300->vertex_buffer_count; i++) {
- void* buf = pipe_buffer_map(pipe,
- r300->vertex_buffer[i].buffer,
- PIPE_TRANSFER_READ,
- &vb_transfer[i]);
- draw_set_mapped_vertex_buffer(r300->draw, i, buf);
- }
+ if (!r300->velems->count || !r300->vertex_buffer_count)
+ return;
- draw_set_mapped_element_buffer(r300->draw, 0, 0, NULL);
+ if (info->indexed && r300->index_buffer.buffer) {
+ unsigned offset;
- draw_arrays(r300->draw, mode, start, count);
+ assert(r300->index_buffer.offset % r300->index_buffer.index_size == 0);
+ offset = r300->index_buffer.offset / r300->index_buffer.index_size;
- for (i = 0; i < r300->vertex_buffer_count; i++) {
- pipe_buffer_unmap(pipe, r300->vertex_buffer[i].buffer,
- vb_transfer[i]);
- draw_set_mapped_vertex_buffer(r300->draw, i, NULL);
+ r300_draw_range_elements(pipe,
+ r300->index_buffer.buffer,
+ r300->index_buffer.index_size,
+ info->index_bias,
+ info->min_index,
+ info->max_index,
+ info->mode,
+ info->start + offset,
+ info->count);
+ }
+ else {
+ r300_draw_arrays(pipe,
+ info->mode,
+ info->start,
+ info->count);
}
}
+/****************************************************************************
+ * The rest of this file is for SW TCL rendering only. Please be polite and *
+ * keep these functions separated so that they are easier to locate. ~C. *
+ ***************************************************************************/
+
/* SW TCL elements, using Draw. */
-void r300_swtcl_draw_range_elements(struct pipe_context* pipe,
- struct pipe_resource* indexBuffer,
- unsigned indexSize,
- int indexBias,
- unsigned minIndex,
- unsigned maxIndex,
- unsigned mode,
- unsigned start,
- unsigned count)
+static void r300_swtcl_draw_vbo(struct pipe_context* pipe,
+ const struct pipe_draw_info *info)
{
struct r300_context* r300 = r300_context(pipe);
struct pipe_transfer *vb_transfer[PIPE_MAX_ATTRIBS];
- struct pipe_transfer *ib_transfer;
+ struct pipe_transfer *ib_transfer = NULL;
+ unsigned count = info->count;
int i;
- void* indices;
+ void *indices = NULL;
+ boolean indexed = info->indexed && r300->index_buffer.buffer;
if (r300->skip_rendering) {
return;
}
- if (!u_trim_pipe_prim(mode, &count)) {
+ if (!u_trim_pipe_prim(info->mode, &count)) {
return;
}
+ r300_update_derived_state(r300);
+
+ r300_reserve_cs_dwords(r300,
+ PREP_FIRST_DRAW | PREP_EMIT_AOS_SWTCL |
+ (indexed ? PREP_INDEXED : 0),
+ indexed ? 256 : 6);
+
for (i = 0; i < r300->vertex_buffer_count; i++) {
- void* buf = pipe_buffer_map(pipe,
- r300->vertex_buffer[i].buffer,
- PIPE_TRANSFER_READ,
- &vb_transfer[i]);
- draw_set_mapped_vertex_buffer(r300->draw, i, buf);
+ if (r300->vertex_buffer[i].buffer) {
+ void *buf = pipe_buffer_map(pipe,
+ r300->vertex_buffer[i].buffer,
+ PIPE_TRANSFER_READ,
+ &vb_transfer[i]);
+ draw_set_mapped_vertex_buffer(r300->draw, i, buf);
+ }
+ }
+
+ if (indexed) {
+ indices = pipe_buffer_map(pipe, r300->index_buffer.buffer,
+ PIPE_TRANSFER_READ, &ib_transfer);
}
- indices = pipe_buffer_map(pipe, indexBuffer,
- PIPE_TRANSFER_READ, &ib_transfer);
- draw_set_mapped_element_buffer_range(r300->draw, indexSize, indexBias,
- minIndex, maxIndex, indices);
+ draw_set_mapped_index_buffer(r300->draw, indices);
- draw_arrays(r300->draw, mode, start, count);
+ r300->draw_vbo_locked = TRUE;
+ r300->draw_first_emitted = FALSE;
+ draw_vbo(r300->draw, info);
+ draw_flush(r300->draw);
+ r300->draw_vbo_locked = FALSE;
for (i = 0; i < r300->vertex_buffer_count; i++) {
- pipe_buffer_unmap(pipe, r300->vertex_buffer[i].buffer,
- vb_transfer[i]);
- draw_set_mapped_vertex_buffer(r300->draw, i, NULL);
+ if (r300->vertex_buffer[i].buffer) {
+ pipe_buffer_unmap(pipe, r300->vertex_buffer[i].buffer,
+ vb_transfer[i]);
+ draw_set_mapped_vertex_buffer(r300->draw, i, NULL);
+ }
}
- pipe_buffer_unmap(pipe, indexBuffer,
- ib_transfer);
- draw_set_mapped_element_buffer_range(r300->draw, 0, 0,
- start, start + count - 1,
- NULL);
+ if (indexed) {
+ pipe_buffer_unmap(pipe, r300->index_buffer.buffer, ib_transfer);
+ draw_set_mapped_index_buffer(r300->draw, NULL);
+ }
}
/* Object for rendering using Draw. */
@@ -790,13 +786,10 @@ struct r300_render {
unsigned hwprim;
/* VBO */
- struct pipe_resource* vbo;
- size_t vbo_size;
- size_t vbo_offset;
size_t vbo_max_used;
void * vbo_ptr;
- struct pipe_transfer *vbo_transfer;
+ struct pipe_transfer *vbo_transfer;
};
static INLINE struct r300_render*
@@ -811,8 +804,6 @@ r300_render_get_vertex_info(struct vbuf_render* render)
struct r300_render* r300render = r300_render(render);
struct r300_context* r300 = r300render->r300;
- r300_update_derived_state(r300);
-
return &r300->vertex_info;
}
@@ -825,33 +816,40 @@ static boolean r300_render_allocate_vertices(struct vbuf_render* render,
struct pipe_screen* screen = r300->context.screen;
size_t size = (size_t)vertex_size * (size_t)count;
- if (size + r300render->vbo_offset > r300render->vbo_size)
+ DBG(r300, DBG_DRAW, "r300: render_allocate_vertices (size: %d)\n", size);
+
+ if (size + r300->draw_vbo_offset > r300->draw_vbo_size)
{
- pipe_resource_reference(&r300->vbo, NULL);
- r300render->vbo = pipe_buffer_create(screen,
- PIPE_BIND_VERTEX_BUFFER,
- R300_MAX_DRAW_VBO_SIZE);
- r300render->vbo_offset = 0;
- r300render->vbo_size = R300_MAX_DRAW_VBO_SIZE;
+ pipe_resource_reference(&r300->vbo, NULL);
+ r300->vbo = pipe_buffer_create(screen,
+ PIPE_BIND_VERTEX_BUFFER,
+ R300_MAX_DRAW_VBO_SIZE);
+ r300->draw_vbo_offset = 0;
+ r300->draw_vbo_size = R300_MAX_DRAW_VBO_SIZE;
}
r300render->vertex_size = vertex_size;
- r300->vbo = r300render->vbo;
- r300->vbo_offset = r300render->vbo_offset;
- return (r300render->vbo) ? TRUE : FALSE;
+ return (r300->vbo) ? TRUE : FALSE;
}
static void* r300_render_map_vertices(struct vbuf_render* render)
{
struct r300_render* r300render = r300_render(render);
+ struct r300_context* r300 = r300render->r300;
+
+ assert(!r300render->vbo_transfer);
+
+ DBG(r300, DBG_DRAW, "r300: render_map_vertices\n");
r300render->vbo_ptr = pipe_buffer_map(&r300render->r300->context,
- r300render->vbo,
+ r300->vbo,
PIPE_TRANSFER_WRITE,
&r300render->vbo_transfer);
- return ((uint8_t*)r300render->vbo_ptr + r300render->vbo_offset);
+ assert(r300render->vbo_ptr);
+
+ return ((uint8_t*)r300render->vbo_ptr + r300->draw_vbo_offset);
}
static void r300_render_unmap_vertices(struct vbuf_render* render,
@@ -860,21 +858,27 @@ static void r300_render_unmap_vertices(struct vbuf_render* render,
{
struct r300_render* r300render = r300_render(render);
struct pipe_context* context = &r300render->r300->context;
- CS_LOCALS(r300render->r300);
- BEGIN_CS(2);
- OUT_CS_REG(R300_VAP_VF_MAX_VTX_INDX, max);
- END_CS;
+ struct r300_context* r300 = r300render->r300;
+
+ assert(r300render->vbo_transfer);
+
+ DBG(r300, DBG_DRAW, "r300: render_unmap_vertices\n");
r300render->vbo_max_used = MAX2(r300render->vbo_max_used,
r300render->vertex_size * (max + 1));
- pipe_buffer_unmap(context, r300render->vbo, r300render->vbo_transfer);
+ pipe_buffer_unmap(context, r300->vbo, r300render->vbo_transfer);
+
+ r300render->vbo_transfer = NULL;
}
static void r300_render_release_vertices(struct vbuf_render* render)
{
struct r300_render* r300render = r300_render(render);
+ struct r300_context* r300 = r300render->r300;
- r300render->vbo_offset += r300render->vbo_max_used;
+ DBG(r300, DBG_DRAW, "r300: render_release_vertices\n");
+
+ r300->draw_vbo_offset += r300render->vbo_max_used;
r300render->vbo_max_used = 0;
}
@@ -889,92 +893,131 @@ static boolean r300_render_set_primitive(struct vbuf_render* render,
return TRUE;
}
-static void r500_render_draw_arrays(struct vbuf_render* render,
+static void r300_render_draw_arrays(struct vbuf_render* render,
unsigned start,
unsigned count)
{
struct r300_render* r300render = r300_render(render);
struct r300_context* r300 = r300render->r300;
+ uint8_t* ptr;
+ unsigned i;
+ unsigned dwords = 6;
CS_LOCALS(r300);
+ (void) i; (void) ptr;
- r300_reserve_cs_space(r300, r300_get_num_dirty_dwords(r300) + 2);
- r300_emit_buffer_validate(r300, FALSE, NULL);
- r300_emit_dirty_state(r300);
+ DBG(r300, DBG_DRAW, "r300: render_draw_arrays (count: %d)\n", count);
- DBG(r300, DBG_DRAW, "r300: Doing vbuf render, count %d\n", count);
+ if (r300->draw_first_emitted) {
+ if (!r300_prepare_for_rendering(r300,
+ PREP_FIRST_DRAW | PREP_EMIT_AOS_SWTCL,
+ NULL, 6, 0, 0))
+ return;
+ } else {
+ if (!r300_emit_states(r300,
+ PREP_FIRST_DRAW | PREP_EMIT_AOS_SWTCL,
+ NULL, 0, 0))
+ return;
+ }
- r500_emit_index_offset(r300, 0);
+ /* Uncomment to dump all VBOs rendered through this interface.
+ * Slow and noisy!
+ ptr = pipe_buffer_map(&r300render->r300->context,
+ r300render->vbo, PIPE_TRANSFER_READ,
+ &r300render->vbo_transfer);
- BEGIN_CS(2);
+ for (i = 0; i < count; i++) {
+ printf("r300: Vertex %d\n", i);
+ draw_dump_emitted_vertex(&r300->vertex_info, ptr);
+ ptr += r300->vertex_info.size * 4;
+ printf("\n");
+ }
+
+ pipe_buffer_unmap(&r300render->r300->context, r300render->vbo,
+ r300render->vbo_transfer);
+ */
+
+ BEGIN_CS(dwords);
+ OUT_CS_REG(R300_GA_COLOR_CONTROL,
+ r300_provoking_vertex_fixes(r300, r300render->prim));
+ OUT_CS_REG(R300_VAP_VF_MAX_VTX_INDX, count - 1);
OUT_CS_PKT3(R300_PACKET3_3D_DRAW_VBUF_2, 0);
OUT_CS(R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_LIST | (count << 16) |
r300render->hwprim);
END_CS;
+
+ r300->draw_first_emitted = TRUE;
}
-static void r500_render_draw(struct vbuf_render* render,
- const ushort* indices,
- uint count)
+static void r300_render_draw_elements(struct vbuf_render* render,
+ const ushort* indices,
+ uint count)
{
struct r300_render* r300render = r300_render(render);
struct r300_context* r300 = r300render->r300;
int i;
- unsigned dwords = 2 + (count+1)/2;
+ unsigned end_cs_dwords;
+ unsigned max_index = (r300->draw_vbo_size - r300->draw_vbo_offset) /
+ (r300render->r300->vertex_info.size * 4) - 1;
+ unsigned short_count;
+ unsigned free_dwords;
CS_LOCALS(r300);
+ DBG(r300, DBG_DRAW, "r300: render_draw_elements (count: %d)\n", count);
- r300_reserve_cs_space(r300, r300_get_num_dirty_dwords(r300) + dwords);
- r300_emit_buffer_validate(r300, FALSE, NULL);
- r300_emit_dirty_state(r300);
+ if (r300->draw_first_emitted) {
+ if (!r300_prepare_for_rendering(r300,
+ PREP_FIRST_DRAW | PREP_EMIT_AOS_SWTCL | PREP_INDEXED,
+ NULL, 256, 0, 0))
+ return;
+ } else {
+ if (!r300_emit_states(r300,
+ PREP_FIRST_DRAW | PREP_EMIT_AOS_SWTCL | PREP_INDEXED,
+ NULL, 0, 0))
+ return;
+ }
- r500_emit_index_offset(r300, 0);
+ /* Below we manage the CS space manually because there may be more
+ * indices than it can fit in CS. */
- BEGIN_CS(dwords);
- OUT_CS_PKT3(R300_PACKET3_3D_DRAW_INDX_2, (count+1)/2);
- OUT_CS(R300_VAP_VF_CNTL__PRIM_WALK_INDICES | (count << 16) |
- r300render->hwprim);
- for (i = 0; i < count-1; i += 2) {
- OUT_CS(indices[i+1] << 16 | indices[i]);
- }
- if (count % 2) {
- OUT_CS(indices[count-1]);
- }
- END_CS;
-}
+ end_cs_dwords = r300_get_num_cs_end_dwords(r300);
-static void r300_render_draw_arrays(struct vbuf_render* render,
- unsigned start,
- unsigned count)
-{
- struct r300_context* r300 = r300_render(render)->r300;
+ while (count) {
+ free_dwords = r300->cs->ndw - r300->cs->cdw;
- if (!r300->stencil_ref_bf_fallback) {
- r500_render_draw_arrays(render, start, count);
- } else {
- r300_begin_stencil_ref_fallback(r300);
- r500_render_draw_arrays(render, start, count);
- r300_switch_stencil_ref_side(r300);
- r500_render_draw_arrays(render, start, count);
- r300_end_stencil_ref_fallback(r300);
- }
-}
+ short_count = MIN2(count, (free_dwords - end_cs_dwords - 6) * 2);
-static void r300_render_draw(struct vbuf_render* render,
- const ushort* indices,
- uint count)
-{
- struct r300_context* r300 = r300_render(render)->r300;
+ BEGIN_CS(6 + (short_count+1)/2);
+ OUT_CS_REG(R300_GA_COLOR_CONTROL,
+ r300_provoking_vertex_fixes(r300, r300render->prim));
+ OUT_CS_REG(R300_VAP_VF_MAX_VTX_INDX, max_index);
+ OUT_CS_PKT3(R300_PACKET3_3D_DRAW_INDX_2, (short_count+1)/2);
+ OUT_CS(R300_VAP_VF_CNTL__PRIM_WALK_INDICES | (short_count << 16) |
+ r300render->hwprim);
+ for (i = 0; i < short_count-1; i += 2) {
+ OUT_CS(indices[i+1] << 16 | indices[i]);
+ }
+ if (short_count % 2) {
+ OUT_CS(indices[short_count-1]);
+ }
+ END_CS;
- if (!r300->stencil_ref_bf_fallback) {
- r500_render_draw(render, indices, count);
- } else {
- r300_begin_stencil_ref_fallback(r300);
- r500_render_draw(render, indices, count);
- r300_switch_stencil_ref_side(r300);
- r500_render_draw(render, indices, count);
- r300_end_stencil_ref_fallback(r300);
+ /* OK now subtract the emitted indices and see if we need to emit
+ * another draw packet. */
+ indices += short_count;
+ count -= short_count;
+
+ if (count) {
+ if (!r300_prepare_for_rendering(r300,
+ PREP_EMIT_AOS_SWTCL | PREP_INDEXED,
+ NULL, 256, 0, 0))
+ return;
+
+ end_cs_dwords = r300_get_num_cs_end_dwords(r300);
+ }
}
+
+ r300->draw_first_emitted = TRUE;
}
static void r300_render_destroy(struct vbuf_render* render)
@@ -997,20 +1040,11 @@ static struct vbuf_render* r300_render_create(struct r300_context* r300)
r300render->base.map_vertices = r300_render_map_vertices;
r300render->base.unmap_vertices = r300_render_unmap_vertices;
r300render->base.set_primitive = r300_render_set_primitive;
- if (r300->screen->caps.is_r500) {
- r300render->base.draw = r500_render_draw;
- r300render->base.draw_arrays = r500_render_draw_arrays;
- } else {
- r300render->base.draw = r300_render_draw;
- r300render->base.draw_arrays = r300_render_draw_arrays;
- }
+ r300render->base.draw_elements = r300_render_draw_elements;
+ r300render->base.draw_arrays = r300_render_draw_arrays;
r300render->base.release_vertices = r300_render_release_vertices;
r300render->base.destroy = r300_render_destroy;
- r300render->vbo = NULL;
- r300render->vbo_size = 0;
- r300render->vbo_offset = 0;
-
return &r300render->base;
}
@@ -1036,3 +1070,154 @@ struct draw_stage* r300_draw_stage(struct r300_context* r300)
return stage;
}
+
+void r300_draw_flush_vbuf(struct r300_context *r300)
+{
+ pipe_resource_reference(&r300->vbo, NULL);
+ r300->draw_vbo_size = 0;
+}
+
+/****************************************************************************
+ * End of SW TCL functions *
+ ***************************************************************************/
+
+/* This functions is used to draw a rectangle for the blitter module.
+ *
+ * If we rendered a quad, the pixels on the main diagonal
+ * would be computed and stored twice, which makes the clear/copy codepaths
+ * somewhat inefficient. Instead we use a rectangular point sprite. */
+static void r300_blitter_draw_rectangle(struct blitter_context *blitter,
+ unsigned x1, unsigned y1,
+ unsigned x2, unsigned y2,
+ float depth,
+ enum blitter_attrib_type type,
+ const float attrib[4])
+{
+ struct r300_context *r300 = r300_context(util_blitter_get_pipe(blitter));
+ unsigned last_sprite_coord_enable = r300->sprite_coord_enable;
+ unsigned width = x2 - x1;
+ unsigned height = y2 - y1;
+ unsigned vertex_size =
+ type == UTIL_BLITTER_ATTRIB_COLOR || !r300->draw ? 8 : 4;
+ unsigned dwords = 13 + vertex_size +
+ (type == UTIL_BLITTER_ATTRIB_TEXCOORD ? 7 : 0);
+ const float zeros[4] = {0, 0, 0, 0};
+ CS_LOCALS(r300);
+
+ if (type == UTIL_BLITTER_ATTRIB_TEXCOORD)
+ r300->sprite_coord_enable = 1;
+
+ r300_update_derived_state(r300);
+
+ /* Mark some states we don't care about as non-dirty. */
+ r300->clip_state.dirty = FALSE;
+ r300->viewport_state.dirty = FALSE;
+
+ if (!r300_prepare_for_rendering(r300, PREP_FIRST_DRAW, NULL, dwords, 0, 0))
+ goto done;
+
+ DBG(r300, DBG_DRAW, "r300: draw_rectangle\n");
+
+ BEGIN_CS(dwords);
+ /* Set up GA. */
+ OUT_CS_REG(R300_GA_POINT_SIZE, (height * 6) | ((width * 6) << 16));
+
+ if (type == UTIL_BLITTER_ATTRIB_TEXCOORD) {
+ /* Set up the GA to generate texcoords. */
+ OUT_CS_REG(R300_GB_ENABLE, R300_GB_POINT_STUFF_ENABLE |
+ (R300_GB_TEX_STR << R300_GB_TEX0_SOURCE_SHIFT));
+ OUT_CS_REG_SEQ(R300_GA_POINT_S0, 4);
+ OUT_CS_32F(attrib[0]);
+ OUT_CS_32F(attrib[3]);
+ OUT_CS_32F(attrib[2]);
+ OUT_CS_32F(attrib[1]);
+ }
+
+ /* Set up VAP controls. */
+ OUT_CS_REG(R300_VAP_CLIP_CNTL, R300_CLIP_DISABLE);
+ OUT_CS_REG(R300_VAP_VTE_CNTL, R300_VTX_XY_FMT | R300_VTX_Z_FMT);
+ OUT_CS_REG(R300_VAP_VTX_SIZE, vertex_size);
+ OUT_CS_REG_SEQ(R300_VAP_VF_MAX_VTX_INDX, 2);
+ OUT_CS(1);
+ OUT_CS(0);
+
+ /* Draw. */
+ OUT_CS_PKT3(R300_PACKET3_3D_DRAW_IMMD_2, vertex_size);
+ OUT_CS(R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_EMBEDDED | (1 << 16) |
+ R300_VAP_VF_CNTL__PRIM_POINTS);
+
+ OUT_CS_32F(x1 + width * 0.5f);
+ OUT_CS_32F(y1 + height * 0.5f);
+ OUT_CS_32F(depth);
+ OUT_CS_32F(1);
+
+ if (vertex_size == 8) {
+ if (!attrib)
+ attrib = zeros;
+ OUT_CS_TABLE(attrib, 4);
+ }
+ END_CS;
+
+done:
+ /* Restore the state. */
+ r300->clip_state.dirty = TRUE;
+ r300->rs_state.dirty = TRUE;
+ r300->viewport_state.dirty = TRUE;
+
+ r300->sprite_coord_enable = last_sprite_coord_enable;
+}
+
+static void r300_resource_resolve(struct pipe_context* pipe,
+ struct pipe_resource* dest,
+ struct pipe_subresource subdest,
+ struct pipe_resource* src,
+ struct pipe_subresource subsrc)
+{
+ struct r300_context* r300 = r300_context(pipe);
+ struct r300_aa_state *aa = (struct r300_aa_state*)r300->aa_state.state;
+ struct pipe_surface* srcsurf = src->screen->get_tex_surface(src->screen,
+ src, subsrc.face, subsrc.level, 0, 0);
+ float color[] = {0, 0, 0, 0};
+
+ DBG(r300, DBG_DRAW, "r300: Resolving resource...\n");
+
+ /* Enable AA resolve. */
+ aa->dest = r300_surface(
+ dest->screen->get_tex_surface(dest->screen, dest, subdest.face,
+ subdest.level, 0, 0));
+
+ aa->aaresolve_ctl =
+ R300_RB3D_AARESOLVE_CTL_AARESOLVE_MODE_RESOLVE |
+ R300_RB3D_AARESOLVE_CTL_AARESOLVE_ALPHA_AVERAGE;
+ r300->aa_state.size = 12;
+ r300->aa_state.dirty = TRUE;
+
+ /* Resolve the surface. */
+ r300->context.clear_render_target(pipe,
+ srcsurf, color, 0, 0, src->width0, src->height0);
+
+ /* Disable AA resolve. */
+ aa->aaresolve_ctl = 0;
+ r300->aa_state.size = 4;
+ r300->aa_state.dirty = TRUE;
+
+ pipe_surface_reference((struct pipe_surface**)&srcsurf, NULL);
+ pipe_surface_reference((struct pipe_surface**)&aa->dest, NULL);
+}
+
+void r300_init_render_functions(struct r300_context *r300)
+{
+ /* Set draw functions based on presence of HW TCL. */
+ if (r300->screen->caps.has_tcl) {
+ r300->context.draw_vbo = r300_draw_vbo;
+ } else {
+ r300->context.draw_vbo = r300_swtcl_draw_vbo;
+ }
+
+ r300->context.resource_resolve = r300_resource_resolve;
+ r300->blitter->draw_rectangle = r300_blitter_draw_rectangle;
+
+ /* Plug in the two-sided stencil reference value fallback if needed. */
+ if (!r300->screen->caps.is_r500)
+ r300_plug_in_stencil_ref_fallback(r300);
+}
diff --git a/src/gallium/drivers/r300/r300_render.h b/src/gallium/drivers/r300/r300_render.h
deleted file mode 100644
index 4e78914c1ba..00000000000
--- a/src/gallium/drivers/r300/r300_render.h
+++ /dev/null
@@ -1,97 +0,0 @@
-/*
- * Copyright 2009 Corbin Simpson <[email protected]>
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * on the rights to use, copy, modify, merge, publish, distribute, sub
- * license, and/or sell copies of the Software, and to permit persons to whom
- * the Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE. */
-
-#ifndef R300_RENDER_H
-#define R300_RENDER_H
-
-void r500_emit_draw_arrays_immediate(struct r300_context *r300,
- unsigned mode,
- unsigned start,
- unsigned count);
-
-void r500_emit_draw_arrays(struct r300_context *r300,
- unsigned mode,
- unsigned count);
-
-void r500_emit_draw_elements(struct r300_context *r300,
- struct pipe_resource* indexBuffer,
- unsigned indexSize,
- int indexBias,
- unsigned minIndex,
- unsigned maxIndex,
- unsigned mode,
- unsigned start,
- unsigned count);
-
-void r300_emit_draw_arrays_immediate(struct r300_context *r300,
- unsigned mode,
- unsigned start,
- unsigned count);
-
-void r300_emit_draw_arrays(struct r300_context *r300,
- unsigned mode,
- unsigned count);
-
-void r300_emit_draw_elements(struct r300_context *r300,
- struct pipe_resource* indexBuffer,
- unsigned indexSize,
- int indexBias,
- unsigned minIndex,
- unsigned maxIndex,
- unsigned mode,
- unsigned start,
- unsigned count);
-
-void r300_draw_range_elements(struct pipe_context* pipe,
- struct pipe_resource* indexBuffer,
- unsigned indexSize,
- int indexBias,
- unsigned minIndex,
- unsigned maxIndex,
- unsigned mode,
- unsigned start,
- unsigned count);
-
-void r300_draw_elements(struct pipe_context* pipe,
- struct pipe_resource* indexBuffer,
- unsigned indexSize, int indexBias, unsigned mode,
- unsigned start, unsigned count);
-
-void r300_draw_arrays(struct pipe_context* pipe, unsigned mode,
- unsigned start, unsigned count);
-
-void r300_swtcl_draw_arrays(struct pipe_context* pipe,
- unsigned mode,
- unsigned start,
- unsigned count);
-
-void r300_swtcl_draw_range_elements(struct pipe_context* pipe,
- struct pipe_resource* indexBuffer,
- unsigned indexSize,
- int indexBias,
- unsigned minIndex,
- unsigned maxIndex,
- unsigned mode,
- unsigned start,
- unsigned count);
-
-#endif /* R300_RENDER_H */
diff --git a/src/gallium/drivers/r300/r300_render_stencilref.c b/src/gallium/drivers/r300/r300_render_stencilref.c
new file mode 100644
index 00000000000..1f035d64a28
--- /dev/null
+++ b/src/gallium/drivers/r300/r300_render_stencilref.c
@@ -0,0 +1,129 @@
+/*
+ * Copyright 2010 Marek Olšák <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE. */
+
+/**
+ * The two-sided stencil reference value fallback for r3xx-r4xx chips.
+ * These chips support two-sided stencil functions but they do not support
+ * a two-sided reference value.
+ *
+ * The functions below split every draw call which uses the two-sided
+ * reference value into two draw calls -- the first one renders front faces
+ * and the second renders back faces with the other reference value.
+ */
+
+#include "r300_context.h"
+#include "r300_reg.h"
+
+struct r300_stencilref_context {
+ void (*draw_vbo)(struct pipe_context *pipe,
+ const struct pipe_draw_info *info);
+
+ uint32_t rs_cull_mode;
+ uint32_t zb_stencilrefmask;
+ ubyte ref_value_front;
+};
+
+static boolean r300_stencilref_needed(struct r300_context *r300)
+{
+ struct r300_dsa_state *dsa = (struct r300_dsa_state*)r300->dsa_state.state;
+
+ return dsa->two_sided_stencil_ref ||
+ (dsa->two_sided &&
+ r300->stencil_ref.ref_value[0] != r300->stencil_ref.ref_value[1]);
+}
+
+/* Set drawing for front faces. */
+static void r300_stencilref_begin(struct r300_context *r300)
+{
+ struct r300_stencilref_context *sr = r300->stencilref_fallback;
+ struct r300_rs_state *rs = (struct r300_rs_state*)r300->rs_state.state;
+ struct r300_dsa_state *dsa = (struct r300_dsa_state*)r300->dsa_state.state;
+
+ /* Save state. */
+ sr->rs_cull_mode = rs->cb_main[rs->cull_mode_index];
+ sr->zb_stencilrefmask = dsa->stencil_ref_mask;
+ sr->ref_value_front = r300->stencil_ref.ref_value[0];
+
+ /* We *cull* pixels, therefore no need to mask out the bits. */
+ rs->cb_main[rs->cull_mode_index] |= R300_CULL_BACK;
+
+ r300->rs_state.dirty = TRUE;
+}
+
+/* Set drawing for back faces. */
+static void r300_stencilref_switch_side(struct r300_context *r300)
+{
+ struct r300_stencilref_context *sr = r300->stencilref_fallback;
+ struct r300_rs_state *rs = (struct r300_rs_state*)r300->rs_state.state;
+ struct r300_dsa_state *dsa = (struct r300_dsa_state*)r300->dsa_state.state;
+
+ rs->cb_main[rs->cull_mode_index] = sr->rs_cull_mode | R300_CULL_FRONT;
+ dsa->stencil_ref_mask = dsa->stencil_ref_bf;
+ r300->stencil_ref.ref_value[0] = r300->stencil_ref.ref_value[1];
+
+ r300->rs_state.dirty = TRUE;
+ r300->dsa_state.dirty = TRUE;
+}
+
+/* Restore the original state. */
+static void r300_stencilref_end(struct r300_context *r300)
+{
+ struct r300_stencilref_context *sr = r300->stencilref_fallback;
+ struct r300_rs_state *rs = (struct r300_rs_state*)r300->rs_state.state;
+ struct r300_dsa_state *dsa = (struct r300_dsa_state*)r300->dsa_state.state;
+
+ /* Restore state. */
+ rs->cb_main[rs->cull_mode_index] = sr->rs_cull_mode;
+ dsa->stencil_ref_mask = sr->zb_stencilrefmask;
+ r300->stencil_ref.ref_value[0] = sr->ref_value_front;
+
+ r300->rs_state.dirty = TRUE;
+ r300->dsa_state.dirty = TRUE;
+}
+
+static void r300_stencilref_draw_vbo(struct pipe_context *pipe,
+ const struct pipe_draw_info *info)
+{
+ struct r300_context *r300 = r300_context(pipe);
+ struct r300_stencilref_context *sr = r300->stencilref_fallback;
+
+ if (!r300_stencilref_needed(r300)) {
+ sr->draw_vbo(pipe, info);
+ } else {
+ r300_stencilref_begin(r300);
+ sr->draw_vbo(pipe, info);
+ r300_stencilref_switch_side(r300);
+ sr->draw_vbo(pipe, info);
+ r300_stencilref_end(r300);
+ }
+}
+
+void r300_plug_in_stencil_ref_fallback(struct r300_context *r300)
+{
+ r300->stencilref_fallback = CALLOC_STRUCT(r300_stencilref_context);
+
+ /* Save original draw function. */
+ r300->stencilref_fallback->draw_vbo = r300->context.draw_vbo;
+
+ /* Override the draw function. */
+ r300->context.draw_vbo = r300_stencilref_draw_vbo;
+}
diff --git a/src/gallium/drivers/r300/r300_render_translate.c b/src/gallium/drivers/r300/r300_render_translate.c
new file mode 100644
index 00000000000..9247064508f
--- /dev/null
+++ b/src/gallium/drivers/r300/r300_render_translate.c
@@ -0,0 +1,218 @@
+/*
+ * Copyright 2010 Marek Olšák <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE. */
+
+/**
+ * The functions below translate vertex and index buffers to the layout
+ * compatible with the hardware, so that all vertex and index fetches are
+ * DWORD-aligned and all used vertex and index formats are supported.
+ * For indices, an optional index offset is added to each index.
+ */
+
+#include "r300_context.h"
+#include "translate/translate.h"
+#include "util/u_index_modify.h"
+
+void r300_begin_vertex_translate(struct r300_context *r300)
+{
+ struct pipe_context *pipe = &r300->context;
+ struct translate_key key = {0};
+ struct translate_element *te;
+ unsigned tr_elem_index[PIPE_MAX_ATTRIBS] = {0};
+ struct translate *tr;
+ struct r300_vertex_element_state *ve = r300->velems;
+ boolean vb_translated[PIPE_MAX_ATTRIBS] = {0};
+ void *vb_map[PIPE_MAX_ATTRIBS] = {0}, *out_map;
+ struct pipe_transfer *vb_transfer[PIPE_MAX_ATTRIBS] = {0}, *out_transfer;
+ struct pipe_resource *out_buffer;
+ unsigned i, num_verts;
+
+ /* Initialize the translate key, i.e. the recipe how vertices should be
+ * translated. */
+ for (i = 0; i < ve->count; i++) {
+ struct pipe_vertex_buffer *vb =
+ &r300->vertex_buffer[ve->velem[i].vertex_buffer_index];
+ enum pipe_format output_format = ve->hw_format[i];
+ unsigned output_format_size = ve->hw_format_size[i];
+
+ /* Check for support. */
+ if (ve->velem[i].src_format == ve->hw_format[i] &&
+ (vb->buffer_offset + ve->velem[i].src_offset) % 4 == 0 &&
+ vb->stride % 4 == 0) {
+ continue;
+ }
+
+ /* Workaround for translate: output floats instead of halfs. */
+ switch (output_format) {
+ case PIPE_FORMAT_R16_FLOAT:
+ output_format = PIPE_FORMAT_R32_FLOAT;
+ output_format_size = 4;
+ break;
+ case PIPE_FORMAT_R16G16_FLOAT:
+ output_format = PIPE_FORMAT_R32G32_FLOAT;
+ output_format_size = 8;
+ break;
+ case PIPE_FORMAT_R16G16B16_FLOAT:
+ output_format = PIPE_FORMAT_R32G32B32_FLOAT;
+ output_format_size = 12;
+ break;
+ case PIPE_FORMAT_R16G16B16A16_FLOAT:
+ output_format = PIPE_FORMAT_R32G32B32A32_FLOAT;
+ output_format_size = 16;
+ break;
+ default:;
+ }
+
+ /* Add this vertex element. */
+ te = &key.element[key.nr_elements];
+ /*te->type;
+ te->instance_divisor;*/
+ te->input_buffer = ve->velem[i].vertex_buffer_index;
+ te->input_format = ve->velem[i].src_format;
+ te->input_offset = vb->buffer_offset + ve->velem[i].src_offset;
+ te->output_format = output_format;
+ te->output_offset = key.output_stride;
+
+ key.output_stride += output_format_size;
+ vb_translated[ve->velem[i].vertex_buffer_index] = TRUE;
+ tr_elem_index[i] = key.nr_elements;
+ key.nr_elements++;
+ }
+
+ /* Get a translate object. */
+ tr = translate_cache_find(r300->tran.translate_cache, &key);
+
+ /* Map buffers we want to translate. */
+ for (i = 0; i < r300->vertex_buffer_count; i++) {
+ if (vb_translated[i]) {
+ struct pipe_vertex_buffer *vb = &r300->vertex_buffer[i];
+
+ vb_map[i] = pipe_buffer_map(pipe, vb->buffer,
+ PIPE_TRANSFER_READ, &vb_transfer[i]);
+
+ tr->set_buffer(tr, i, vb_map[i], vb->stride, vb->max_index);
+ }
+ }
+
+ /* Create and map the output buffer. */
+ num_verts = r300->vertex_buffer_max_index + 1;
+
+ out_buffer = pipe_buffer_create(&r300->screen->screen,
+ PIPE_BIND_VERTEX_BUFFER,
+ key.output_stride * num_verts);
+
+ out_map = pipe_buffer_map(pipe, out_buffer, PIPE_TRANSFER_WRITE,
+ &out_transfer);
+
+ /* Translate. */
+ tr->run(tr, 0, num_verts, 0, out_map);
+
+ /* Unmap all buffers. */
+ for (i = 0; i < r300->vertex_buffer_count; i++) {
+ if (vb_translated[i]) {
+ pipe_buffer_unmap(pipe, r300->vertex_buffer[i].buffer,
+ vb_transfer[i]);
+ }
+ }
+
+ pipe_buffer_unmap(pipe, out_buffer, out_transfer);
+
+ /* Setup the new vertex buffer in the first free slot. */
+ for (i = 0; i < PIPE_MAX_ATTRIBS; i++) {
+ struct pipe_vertex_buffer *vb = &r300->vertex_buffer[i];
+
+ if (!vb->buffer) {
+ pipe_resource_reference(&vb->buffer, out_buffer);
+ vb->buffer_offset = 0;
+ vb->max_index = num_verts - 1;
+ vb->stride = key.output_stride;
+ r300->tran.vb_slot = i;
+ break;
+ }
+ }
+
+ /* Save and replace vertex elements. */
+ {
+ struct pipe_vertex_element new_velems[PIPE_MAX_ATTRIBS];
+
+ r300->tran.saved_velems = r300->velems;
+
+ for (i = 0; i < ve->count; i++) {
+ if (vb_translated[ve->velem[i].vertex_buffer_index]) {
+ te = &key.element[tr_elem_index[i]];
+ new_velems[i].instance_divisor = ve->velem[i].instance_divisor;
+ new_velems[i].src_format = te->output_format;
+ new_velems[i].src_offset = te->output_offset;
+ new_velems[i].vertex_buffer_index = r300->tran.vb_slot;
+ } else {
+ memcpy(&new_velems[i], &ve->velem[i],
+ sizeof(struct pipe_vertex_element));
+ }
+ }
+
+ r300->tran.new_velems =
+ pipe->create_vertex_elements_state(pipe, ve->count, new_velems);
+ pipe->bind_vertex_elements_state(pipe, r300->tran.new_velems);
+ }
+
+ pipe_resource_reference(&out_buffer, NULL);
+}
+
+void r300_end_vertex_translate(struct r300_context *r300)
+{
+ struct pipe_context *pipe = &r300->context;
+
+ /* Restore vertex elements. */
+ pipe->bind_vertex_elements_state(pipe, r300->tran.saved_velems);
+ pipe->delete_vertex_elements_state(pipe, r300->tran.new_velems);
+
+ /* Delete the now-unused VBO. */
+ pipe_resource_reference(&r300->vertex_buffer[r300->tran.vb_slot].buffer,
+ NULL);
+}
+
+void r300_translate_index_buffer(struct r300_context *r300,
+ struct pipe_resource **index_buffer,
+ unsigned *index_size, unsigned index_offset,
+ unsigned *start, unsigned count)
+{
+ switch (*index_size) {
+ case 1:
+ util_shorten_ubyte_elts(&r300->context, index_buffer, index_offset, *start, count);
+ *index_size = 2;
+ *start = 0;
+ break;
+
+ case 2:
+ if (*start % 2 != 0 || index_offset) {
+ util_rebuild_ushort_elts(&r300->context, index_buffer, index_offset, *start, count);
+ *start = 0;
+ }
+ break;
+
+ case 4:
+ if (index_offset) {
+ util_rebuild_uint_elts(&r300->context, index_buffer, index_offset, *start, count);
+ *start = 0;
+ }
+ break;
+ }
+}
diff --git a/src/gallium/drivers/r300/r300_resource.c b/src/gallium/drivers/r300/r300_resource.c
index 7ed47eaa605..f6f33028dc6 100644
--- a/src/gallium/drivers/r300/r300_resource.c
+++ b/src/gallium/drivers/r300/r300_resource.c
@@ -23,19 +23,9 @@
* Authors: Dave Airlie
*/
-#include <stdio.h>
-
-#include "util/u_inlines.h"
-#include "util/u_format.h"
-#include "util/u_memory.h"
-#include "util/u_upload_mgr.h"
-#include "util/u_math.h"
-
#include "r300_context.h"
#include "r300_texture.h"
-#include "r300_screen.h"
#include "r300_screen_buffer.h"
-#include "r300_winsys.h"
static struct pipe_resource *
r300_resource_create(struct pipe_screen *screen,
diff --git a/src/gallium/drivers/r300/r300_screen.c b/src/gallium/drivers/r300/r300_screen.c
index 8fc1d5aa00e..7f41ff0e2ec 100644
--- a/src/gallium/drivers/r300/r300_screen.c
+++ b/src/gallium/drivers/r300/r300_screen.c
@@ -22,12 +22,15 @@
* USE OR OTHER DEALINGS IN THE SOFTWARE. */
#include "util/u_format.h"
+#include "util/u_format_s3tc.h"
#include "util/u_memory.h"
#include "r300_context.h"
#include "r300_texture.h"
#include "r300_screen_buffer.h"
+#include "r300_state_inlines.h"
#include "r300_winsys.h"
+#include "r300_public.h"
/* Return the identifier behind whom the brave coders responsible for this
* amalgamation of code, sweat, and duct tape, routinely obscure their names.
@@ -75,20 +78,19 @@ static const char* r300_get_name(struct pipe_screen* pscreen)
return chip_families[r300screen->caps.family];
}
-static int r300_get_param(struct pipe_screen* pscreen, int param)
+static int r300_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
{
struct r300_screen* r300screen = r300_screen(pscreen);
+ boolean is_r400 = r300screen->caps.is_r400;
+ boolean is_r500 = r300screen->caps.is_r500;
+
+ /* XXX extended shader capabilities of r400 unimplemented */
+ is_r400 = FALSE;
switch (param) {
- case PIPE_CAP_MAX_TEXTURE_IMAGE_UNITS:
- case PIPE_CAP_MAX_COMBINED_SAMPLERS:
- return r300screen->caps.num_tex_units;
+ /* Supported features (boolean caps). */
case PIPE_CAP_NPOT_TEXTURES:
- /* XXX enable now to get GL2.1 API,
- * figure out later how to emulate this */
- return 1;
case PIPE_CAP_TWO_SIDED_STENCIL:
- return 1;
case PIPE_CAP_GLSL:
/* I'll be frank. This is a lie.
*
@@ -105,53 +107,45 @@ static int r300_get_param(struct pipe_screen* pscreen, int param)
*
* ~ C.
*/
- return 1;
- case PIPE_CAP_DUAL_SOURCE_BLEND:
- return 0;
case PIPE_CAP_ANISOTROPIC_FILTER:
- return 1;
case PIPE_CAP_POINT_SPRITE:
- return 1;
- case PIPE_CAP_MAX_RENDER_TARGETS:
- return 4;
case PIPE_CAP_OCCLUSION_QUERY:
- return 1;
case PIPE_CAP_TEXTURE_SHADOW_MAP:
- return 1;
- case PIPE_CAP_MAX_TEXTURE_2D_LEVELS:
- case PIPE_CAP_MAX_TEXTURE_3D_LEVELS:
- case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS:
- if (r300screen->caps.is_r500) {
- /* 13 == 4096 */
- return 13;
- } else {
- /* 12 == 2048 */
- return 12;
- }
case PIPE_CAP_TEXTURE_MIRROR_CLAMP:
- return 1;
case PIPE_CAP_TEXTURE_MIRROR_REPEAT:
- return 1;
- case PIPE_CAP_MAX_VERTEX_TEXTURE_UNITS:
- return 0;
- case PIPE_CAP_TGSI_CONT_SUPPORTED:
- return 0;
case PIPE_CAP_BLEND_EQUATION_SEPARATE:
+ case PIPE_CAP_TEXTURE_SWIZZLE:
return 1;
- case PIPE_CAP_SM3:
- if (r300screen->caps.is_r500) {
- return 1;
- } else {
- return 0;
- }
- case PIPE_CAP_MAX_CONST_BUFFERS:
- return 1;
- case PIPE_CAP_MAX_CONST_BUFFER_SIZE:
- return 256;
+
+ /* Unsupported features (boolean caps). */
+ case PIPE_CAP_TIMER_QUERY:
+ case PIPE_CAP_DUAL_SOURCE_BLEND:
case PIPE_CAP_INDEP_BLEND_ENABLE:
- return 0;
case PIPE_CAP_INDEP_BLEND_FUNC:
+ case PIPE_CAP_DEPTH_CLAMP: /* XXX implemented, but breaks Regnum Online */
+ case PIPE_CAP_DEPTHSTENCIL_CLEAR_SEPARATE:
+ return 0;
+
+ /* Texturing. */
+ case PIPE_CAP_MAX_TEXTURE_IMAGE_UNITS:
+ case PIPE_CAP_MAX_COMBINED_SAMPLERS:
+ return r300screen->caps.num_tex_units;
+ case PIPE_CAP_MAX_VERTEX_TEXTURE_UNITS:
return 0;
+ case PIPE_CAP_MAX_TEXTURE_2D_LEVELS:
+ case PIPE_CAP_MAX_TEXTURE_3D_LEVELS:
+ case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS:
+ /* 13 == 4096, 12 == 2048 */
+ return is_r500 ? 13 : 12;
+
+ /* Render targets. */
+ case PIPE_CAP_MAX_RENDER_TARGETS:
+ return 4;
+
+ /* General shader limits and features. */
+ case PIPE_CAP_SM3:
+ return is_r500 ? 1 : 0;
+ /* Fragment coordinate conventions. */
case PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT:
case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER:
return 1;
@@ -165,7 +159,89 @@ static int r300_get_param(struct pipe_screen* pscreen, int param)
}
}
-static float r300_get_paramf(struct pipe_screen* pscreen, int param)
+static int r300_get_shader_param(struct pipe_screen *pscreen, unsigned shader, enum pipe_shader_cap param)
+{
+ struct r300_screen* r300screen = r300_screen(pscreen);
+ boolean is_r400 = r300screen->caps.is_r400;
+ boolean is_r500 = r300screen->caps.is_r500;
+
+ /* XXX extended shader capabilities of r400 unimplemented */
+ is_r400 = FALSE;
+
+ switch (shader)
+ {
+ case PIPE_SHADER_FRAGMENT:
+ switch (param)
+ {
+ case PIPE_SHADER_CAP_MAX_INSTRUCTIONS:
+ return is_r500 || is_r400 ? 512 : 96;
+ case PIPE_SHADER_CAP_MAX_ALU_INSTRUCTIONS:
+ return is_r500 || is_r400 ? 512 : 64;
+ case PIPE_SHADER_CAP_MAX_TEX_INSTRUCTIONS:
+ return is_r500 || is_r400 ? 512 : 32;
+ case PIPE_SHADER_CAP_MAX_TEX_INDIRECTIONS:
+ return is_r500 ? 511 : 4;
+ case PIPE_SHADER_CAP_MAX_CONTROL_FLOW_DEPTH:
+ return is_r500 ? 64 : 0; /* Actually unlimited on r500. */
+ /* Fragment shader limits. */
+ case PIPE_SHADER_CAP_MAX_INPUTS:
+ /* 2 colors + 8 texcoords are always supported
+ * (minus fog and wpos).
+ *
+ * R500 has the ability to turn 3rd and 4th color into
+ * additional texcoords but there is no two-sided color
+ * selection then. However the facing bit can be used instead. */
+ return 10;
+ case PIPE_SHADER_CAP_MAX_CONSTS:
+ return is_r500 ? 256 : 32;
+ case PIPE_SHADER_CAP_MAX_CONST_BUFFERS:
+ return 1;
+ case PIPE_SHADER_CAP_MAX_TEMPS:
+ return is_r500 ? 128 : is_r400 ? 64 : 32;
+ case PIPE_SHADER_CAP_MAX_ADDRS:
+ return 0;
+ case PIPE_SHADER_CAP_MAX_PREDS:
+ return is_r500 ? 1 : 0;
+ case PIPE_SHADER_CAP_TGSI_CONT_SUPPORTED:
+ return 1;
+ }
+ break;
+ case PIPE_SHADER_VERTEX:
+ switch (param)
+ {
+ case PIPE_SHADER_CAP_MAX_INSTRUCTIONS:
+ case PIPE_SHADER_CAP_MAX_ALU_INSTRUCTIONS:
+ return is_r500 ? 1024 : 256;
+ case PIPE_SHADER_CAP_MAX_TEX_INSTRUCTIONS:
+ case PIPE_SHADER_CAP_MAX_TEX_INDIRECTIONS:
+ return 0;
+ case PIPE_SHADER_CAP_MAX_CONTROL_FLOW_DEPTH:
+ return is_r500 ? 4 : 0; /* For loops; not sure about conditionals. */
+ case PIPE_SHADER_CAP_MAX_INPUTS:
+ return 16;
+ case PIPE_SHADER_CAP_MAX_CONSTS:
+ return 256;
+ case PIPE_SHADER_CAP_MAX_CONST_BUFFERS:
+ return 1;
+ case PIPE_SHADER_CAP_MAX_TEMPS:
+ return 32;
+ case PIPE_SHADER_CAP_MAX_ADDRS:
+ return 1; /* XXX guessed */
+ case PIPE_SHADER_CAP_MAX_PREDS:
+ return is_r500 ? 4 : 0; /* XXX guessed. */
+ case PIPE_SHADER_CAP_TGSI_CONT_SUPPORTED:
+ return 1;
+ default:
+ break;
+ }
+ break;
+ default:
+ break;
+ }
+ return 0;
+}
+
+static float r300_get_paramf(struct pipe_screen* pscreen, enum pipe_cap param)
{
struct r300_screen* r300screen = r300_screen(pscreen);
@@ -197,14 +273,13 @@ static float r300_get_paramf(struct pipe_screen* pscreen, int param)
static boolean r300_is_format_supported(struct pipe_screen* screen,
enum pipe_format format,
enum pipe_texture_target target,
+ unsigned sample_count,
unsigned usage,
unsigned geom_flags)
{
uint32_t retval = 0;
boolean is_r500 = r300_screen(screen)->caps.is_r500;
boolean is_r400 = r300_screen(screen)->caps.is_r400;
- boolean is_z24 = format == PIPE_FORMAT_X8Z24_UNORM ||
- format == PIPE_FORMAT_S8_USCALED_Z24_UNORM;
boolean is_color2101010 = format == PIPE_FORMAT_R10G10B10A2_UNORM ||
format == PIPE_FORMAT_R10G10B10X2_SNORM ||
format == PIPE_FORMAT_B10G10R10A2_UNORM ||
@@ -213,17 +288,35 @@ static boolean r300_is_format_supported(struct pipe_screen* screen,
format == PIPE_FORMAT_RGTC1_SNORM;
boolean is_ati2n = format == PIPE_FORMAT_RGTC2_UNORM ||
format == PIPE_FORMAT_RGTC2_SNORM;
+ boolean is_half_float = format == PIPE_FORMAT_R16_FLOAT ||
+ format == PIPE_FORMAT_R16G16_FLOAT ||
+ format == PIPE_FORMAT_R16G16B16_FLOAT ||
+ format == PIPE_FORMAT_R16G16B16A16_FLOAT;
- if (target >= PIPE_MAX_TEXTURE_TYPES) {
- fprintf(stderr, "r300: Implementation error: Received bogus texture "
- "target %d in %s\n", target, __FUNCTION__);
- return FALSE;
+ /* Check multisampling support. */
+ switch (sample_count) {
+ case 0:
+ case 1:
+ break;
+ case 2:
+ case 3:
+ case 4:
+ case 6:
+ return FALSE;
+#if 0
+ if (usage != PIPE_BIND_RENDER_TARGET ||
+ !util_format_is_rgba8_variant(
+ util_format_description(format))) {
+ return FALSE;
+ }
+#endif
+ break;
+ default:
+ return FALSE;
}
/* Check sampler format support. */
if ((usage & PIPE_BIND_SAMPLER_VIEW) &&
- /* Z24 cannot be sampled from on non-r5xx. */
- (is_r500 || !is_z24) &&
/* ATI1N is r5xx-only. */
(is_r500 || !is_ati1n) &&
/* ATI2N is supported on r4xx-r5xx. */
@@ -253,6 +346,20 @@ static boolean r300_is_format_supported(struct pipe_screen* screen,
retval |= PIPE_BIND_DEPTH_STENCIL;
}
+ /* Check vertex buffer format support. */
+ if (usage & PIPE_BIND_VERTEX_BUFFER &&
+ /* Half float is supported on >= RV350. */
+ (is_r400 || is_r500 || !is_half_float) &&
+ r300_translate_vertex_data_type(format) != R300_INVALID_FORMAT) {
+ retval |= PIPE_BIND_VERTEX_BUFFER;
+ }
+
+ /* Transfers are always supported. */
+ if (usage & PIPE_BIND_TRANSFER_READ)
+ retval |= PIPE_BIND_TRANSFER_READ;
+ if (usage & PIPE_BIND_TRANSFER_WRITE)
+ retval |= PIPE_BIND_TRANSFER_WRITE;
+
return retval == usage;
}
@@ -261,6 +368,8 @@ static void r300_destroy_screen(struct pipe_screen* pscreen)
struct r300_screen* r300screen = r300_screen(pscreen);
struct r300_winsys_screen *rws = r300_winsys_screen(pscreen);
+ util_mempool_destroy(&r300screen->pool_buffers);
+
if (rws)
rws->destroy(rws);
@@ -271,23 +380,36 @@ static void r300_fence_reference(struct pipe_screen *screen,
struct pipe_fence_handle **ptr,
struct pipe_fence_handle *fence)
{
+ struct r300_fence **oldf = (struct r300_fence**)ptr;
+ struct r300_fence *newf = (struct r300_fence*)fence;
+
+ if (pipe_reference(&(*oldf)->reference, &newf->reference))
+ FREE(*oldf);
+
+ *ptr = fence;
}
static int r300_fence_signalled(struct pipe_screen *screen,
struct pipe_fence_handle *fence,
unsigned flags)
{
- return 0;
+ struct r300_fence *rfence = (struct r300_fence*)fence;
+
+ return rfence->signalled ? 0 : 1; /* 0 == success */
}
static int r300_fence_finish(struct pipe_screen *screen,
struct pipe_fence_handle *fence,
unsigned flags)
{
- return 0;
+ struct r300_fence *rfence = (struct r300_fence*)fence;
+
+ r300_finish(rfence->ctx);
+ rfence->signalled = TRUE;
+ return 0; /* 0 == success */
}
-struct pipe_screen* r300_create_screen(struct r300_winsys_screen *rws)
+struct pipe_screen* r300_screen_create(struct r300_winsys_screen *rws)
{
struct r300_screen *r300screen = CALLOC_STRUCT(r300_screen);
@@ -303,12 +425,17 @@ struct pipe_screen* r300_create_screen(struct r300_winsys_screen *rws)
r300_init_debug(r300screen);
r300_parse_chipset(&r300screen->caps);
+ util_mempool_create(&r300screen->pool_buffers,
+ sizeof(struct r300_buffer), 64,
+ UTIL_MEMPOOL_SINGLETHREADED);
+
r300screen->rws = rws;
r300screen->screen.winsys = (struct pipe_winsys*)rws;
r300screen->screen.destroy = r300_destroy_screen;
r300screen->screen.get_name = r300_get_name;
r300screen->screen.get_vendor = r300_get_vendor;
r300screen->screen.get_param = r300_get_param;
+ r300screen->screen.get_shader_param = r300_get_shader_param;
r300screen->screen.get_paramf = r300_get_paramf;
r300screen->screen.is_format_supported = r300_is_format_supported;
r300screen->screen.context_create = r300_create_context;
@@ -319,11 +446,7 @@ struct pipe_screen* r300_create_screen(struct r300_winsys_screen *rws)
r300_init_screen_resource_functions(r300screen);
- return &r300screen->screen;
-}
+ util_format_s3tc_init();
-struct r300_winsys_screen *
-r300_winsys_screen(struct pipe_screen *screen)
-{
- return r300_screen(screen)->rws;
+ return &r300screen->screen;
}
diff --git a/src/gallium/drivers/r300/r300_screen.h b/src/gallium/drivers/r300/r300_screen.h
index 70ea2fe8d92..dc2bc7e8279 100644
--- a/src/gallium/drivers/r300/r300_screen.h
+++ b/src/gallium/drivers/r300/r300_screen.h
@@ -28,8 +28,12 @@
#include "r300_chipset.h"
+#include "util/u_mempool.h"
+
#include <stdio.h>
+struct r300_winsys_screen;
+
struct r300_screen {
/* Parent class */
struct pipe_screen screen;
@@ -39,16 +43,28 @@ struct r300_screen {
/* Chipset capabilities */
struct r300_capabilities caps;
+ /* Memory pools. */
+ struct util_mempool pool_buffers;
+
/** Combination of DBG_xxx flags */
unsigned debug;
+
+ /* The number of created contexts to know whether we have multiple
+ * contexts or not. */
+ int num_contexts;
};
-/* Convenience cast wrapper. */
+/* Convenience cast wrappers. */
static INLINE struct r300_screen* r300_screen(struct pipe_screen* screen) {
return (struct r300_screen*)screen;
}
+static INLINE struct r300_winsys_screen *
+r300_winsys_screen(struct pipe_screen *screen) {
+ return r300_screen(screen)->rws;
+}
+
/* Debug functionality. */
/**
@@ -61,17 +77,31 @@ static INLINE struct r300_screen* r300_screen(struct pipe_screen* screen) {
* those changes.
*/
/*@{*/
-#define DBG_HELP 0x0000001
-#define DBG_FP 0x0000002
-#define DBG_VP 0x0000004
-#define DBG_CS 0x0000008
-#define DBG_DRAW 0x0000010
-#define DBG_TEX 0x0000020
-#define DBG_FALL 0x0000040
-#define DBG_ANISOHQ 0x0000080
-#define DBG_NO_TILING 0x0000100
-#define DBG_NO_IMMD 0x0000200
-#define DBG_STATS 0x0000400
+
+/* Logging. */
+#define DBG_PSC (1 << 0)
+#define DBG_FP (1 << 1)
+#define DBG_VP (1 << 2)
+#define DBG_SWTCL (1 << 3)
+#define DBG_DRAW (1 << 4)
+#define DBG_TEX (1 << 5)
+#define DBG_TEXALLOC (1 << 6)
+#define DBG_RS (1 << 7)
+#define DBG_FALL (1 << 8)
+#define DBG_FB (1 << 9)
+#define DBG_RS_BLOCK (1 << 10)
+#define DBG_CBZB (1 << 11)
+#define DBG_HYPERZ (1 << 12)
+#define DBG_SCISSOR (1 << 13)
+/* Features. */
+#define DBG_ANISOHQ (1 << 16)
+#define DBG_NO_TILING (1 << 17)
+#define DBG_NO_IMMD (1 << 18)
+#define DBG_FAKE_OCC (1 << 19)
+#define DBG_NO_OPT (1 << 20)
+#define DBG_NO_CBZB (1 << 21)
+/* Statistics. */
+#define DBG_STATS (1 << 24)
/*@}*/
static INLINE boolean SCREEN_DBG_ON(struct r300_screen * screen, unsigned flags)
diff --git a/src/gallium/drivers/r300/r300_screen_buffer.c b/src/gallium/drivers/r300/r300_screen_buffer.c
index 3e2b5afe6f4..37a080ba48b 100644
--- a/src/gallium/drivers/r300/r300_screen_buffer.c
+++ b/src/gallium/drivers/r300/r300_screen_buffer.c
@@ -43,7 +43,7 @@ unsigned r300_buffer_is_referenced(struct pipe_context *context,
if (r300_buffer_is_user_buffer(buf))
return PIPE_UNREFERENCED;
- if (r300->rws->is_buffer_referenced(r300->rws, rbuf->buf, domain))
+ if (r300->rws->cs_is_buffer_referenced(r300->cs, rbuf->buf, domain))
return PIPE_REFERENCED_FOR_READ | PIPE_REFERENCED_FOR_WRITE;
return PIPE_UNREFERENCED;
@@ -62,7 +62,8 @@ int r300_upload_index_buffer(struct r300_context *r300,
struct pipe_resource **index_buffer,
unsigned index_size,
unsigned start,
- unsigned count)
+ unsigned count,
+ unsigned *out_offset)
{
struct pipe_resource *upload_buffer = NULL;
unsigned index_offset = start * index_size;
@@ -79,7 +80,10 @@ int r300_upload_index_buffer(struct r300_context *r300,
goto done;
}
*index_buffer = upload_buffer;
- }
+ *out_offset = index_offset / index_size;
+ } else
+ *out_offset = start;
+
done:
// if (upload_buffer)
// pipe_resource_reference(&upload_buffer, NULL);
@@ -93,67 +97,85 @@ int r300_upload_user_buffers(struct r300_context *r300)
enum pipe_error ret = PIPE_OK;
int i, nr;
- nr = r300->vertex_buffer_count;
+ nr = r300->velems->count;
for (i = 0; i < nr; i++) {
- if (r300_buffer_is_user_buffer(r300->vertex_buffer[i].buffer)) {
- struct pipe_resource *upload_buffer = NULL;
- unsigned offset = 0; /*r300->vertex_buffer[i].buffer_offset * 4;*/
- unsigned size = r300->vertex_buffer[i].buffer->width0;
- unsigned upload_offset;
- ret = u_upload_buffer(r300->upload_vb,
- offset, size,
- r300->vertex_buffer[i].buffer,
- &upload_offset, &upload_buffer);
- if (ret)
- return ret;
-
- pipe_resource_reference(&r300->vertex_buffer[i].buffer, NULL);
- r300->vertex_buffer[i].buffer = upload_buffer;
- r300->vertex_buffer[i].buffer_offset = upload_offset;
- }
+ struct pipe_vertex_buffer *vb =
+ &r300->vertex_buffer[r300->velems->velem[i].vertex_buffer_index];
+
+ if (r300_buffer_is_user_buffer(vb->buffer)) {
+ struct pipe_resource *upload_buffer = NULL;
+ unsigned offset = 0; /*vb->buffer_offset * 4;*/
+ unsigned size = vb->buffer->width0;
+ unsigned upload_offset;
+ ret = u_upload_buffer(r300->upload_vb,
+ offset, size,
+ vb->buffer,
+ &upload_offset, &upload_buffer);
+ if (ret)
+ return ret;
+
+ pipe_resource_reference(&vb->buffer, NULL);
+ vb->buffer = upload_buffer;
+ vb->buffer_offset = upload_offset;
+ }
}
return ret;
}
-static struct r300_winsys_buffer *
-r300_winsys_buffer_create(struct r300_screen *r300screen,
- unsigned alignment,
- unsigned usage,
- unsigned size)
+static void r300_buffer_destroy(struct pipe_screen *screen,
+ struct pipe_resource *buf)
{
+ struct r300_screen *r300screen = r300_screen(screen);
+ struct r300_buffer *rbuf = r300_buffer(buf);
struct r300_winsys_screen *rws = r300screen->rws;
- struct r300_winsys_buffer *buf;
- buf = rws->buffer_create(rws, alignment, usage, size);
- return buf;
-}
+ if (rbuf->constant_buffer)
+ FREE(rbuf->constant_buffer);
-static void r300_winsys_buffer_destroy(struct r300_screen *r300screen,
- struct r300_buffer *rbuf)
-{
- struct r300_winsys_screen *rws = r300screen->rws;
+ if (rbuf->buf)
+ rws->buffer_reference(rws, &rbuf->buf, NULL);
- if (rbuf->buf) {
- rws->buffer_reference(rws, &rbuf->buf, NULL);
- rbuf->buf = NULL;
- }
+ util_mempool_free(&r300screen->pool_buffers, rbuf);
}
-static void r300_buffer_destroy(struct pipe_screen *screen,
- struct pipe_resource *buf)
+static struct pipe_transfer*
+r300_default_get_transfer(struct pipe_context *context,
+ struct pipe_resource *resource,
+ struct pipe_subresource sr,
+ unsigned usage,
+ const struct pipe_box *box)
{
- struct r300_screen *r300screen = r300_screen(screen);
- struct r300_buffer *rbuf = r300_buffer(buf);
+ struct r300_context *r300 = r300_context(context);
+ struct pipe_transfer *transfer =
+ util_mempool_malloc(&r300->pool_transfers);
+
+ transfer->resource = resource;
+ transfer->sr = sr;
+ transfer->usage = usage;
+ transfer->box = *box;
+ transfer->stride = 0;
+ transfer->slice_stride = 0;
+ transfer->data = NULL;
+
+ /* Note strides are zero, this is ok for buffers, but not for
+ * textures 2d & higher at least.
+ */
+ return transfer;
+}
- r300_winsys_buffer_destroy(r300screen, rbuf);
- FREE(rbuf);
+static void r300_default_transfer_destroy(struct pipe_context *pipe,
+ struct pipe_transfer *transfer)
+{
+ struct r300_context *r300 = r300_context(pipe);
+ util_mempool_free(&r300->pool_transfers, transfer);
}
static void *
r300_buffer_transfer_map( struct pipe_context *pipe,
struct pipe_transfer *transfer )
{
+ struct r300_context *r300 = r300_context(pipe);
struct r300_screen *r300screen = r300_screen(pipe->screen);
struct r300_winsys_screen *rws = r300screen->rws;
struct r300_buffer *rbuf = r300_buffer(transfer->resource);
@@ -163,10 +185,8 @@ r300_buffer_transfer_map( struct pipe_context *pipe,
if (rbuf->user_buffer)
return (uint8_t *) rbuf->user_buffer + transfer->box.x;
-
- if (rbuf->b.b.bind & PIPE_BIND_CONSTANT_BUFFER) {
- goto just_map;
- }
+ if (rbuf->constant_buffer)
+ return (uint8_t *) rbuf->constant_buffer + transfer->box.x;
/* check if the mapping is to a range we already flushed */
if (transfer->usage & PIPE_TRANSFER_DISCARD) {
@@ -180,16 +200,18 @@ r300_buffer_transfer_map( struct pipe_context *pipe,
rws->buffer_reference(rws, &rbuf->buf, NULL);
rbuf->num_ranges = 0;
- rbuf->buf = r300_winsys_buffer_create(r300screen,
- 16,
- rbuf->b.b.bind, /* XXX */
- rbuf->b.b.width0);
+ rbuf->buf =
+ r300screen->rws->buffer_create(r300screen->rws,
+ rbuf->b.b.width0, 16,
+ rbuf->b.b.bind,
+ rbuf->b.b.usage,
+ rbuf->domain);
break;
}
}
}
-just_map:
- map = rws->buffer_map(rws, rbuf->buf, transfer->usage);
+
+ map = rws->buffer_map(rws, rbuf->buf, r300->cs, transfer->usage);
if (map == NULL)
return NULL;
@@ -214,9 +236,8 @@ static void r300_buffer_transfer_flush_region( struct pipe_context *pipe,
if (rbuf->user_buffer)
return;
-
- if (rbuf->b.b.bind & PIPE_BIND_CONSTANT_BUFFER)
- return;
+ if (rbuf->constant_buffer)
+ return;
/* mark the range as used */
for(i = 0; i < rbuf->num_ranges; ++i) {
@@ -247,14 +268,14 @@ static void r300_buffer_transfer_unmap( struct pipe_context *pipe,
struct u_resource_vtbl r300_buffer_vtbl =
{
u_default_resource_get_handle, /* get_handle */
- r300_buffer_destroy, /* resource_destroy */
- r300_buffer_is_referenced_by_cs, /* is_buffer_referenced */
- u_default_get_transfer, /* get_transfer */
- u_default_transfer_destroy, /* transfer_destroy */
- r300_buffer_transfer_map, /* transfer_map */
+ r300_buffer_destroy, /* resource_destroy */
+ r300_buffer_is_referenced_by_cs, /* is_buffer_referenced */
+ r300_default_get_transfer, /* get_transfer */
+ r300_default_transfer_destroy, /* transfer_destroy */
+ r300_buffer_transfer_map, /* transfer_map */
r300_buffer_transfer_flush_region, /* transfer_flush_region */
- r300_buffer_transfer_unmap, /* transfer_unmap */
- u_default_transfer_inline_write /* transfer_inline_write */
+ r300_buffer_transfer_unmap, /* transfer_unmap */
+ u_default_transfer_inline_write /* transfer_inline_write */
};
struct pipe_resource *r300_buffer_create(struct pipe_screen *screen,
@@ -264,9 +285,7 @@ struct pipe_resource *r300_buffer_create(struct pipe_screen *screen,
struct r300_buffer *rbuf;
unsigned alignment = 16;
- rbuf = CALLOC_STRUCT(r300_buffer);
- if (!rbuf)
- goto error1;
+ rbuf = util_mempool_malloc(&r300screen->pool_buffers);
rbuf->magic = R300_BUFFER_MAGIC;
@@ -274,23 +293,30 @@ struct pipe_resource *r300_buffer_create(struct pipe_screen *screen,
rbuf->b.vtbl = &r300_buffer_vtbl;
pipe_reference_init(&rbuf->b.b.reference, 1);
rbuf->b.b.screen = screen;
+ rbuf->domain = R300_DOMAIN_GTT;
+ rbuf->num_ranges = 0;
+ rbuf->buf = NULL;
+ rbuf->constant_buffer = NULL;
+ rbuf->user_buffer = NULL;
+
+ /* Alloc constant buffers in RAM. */
+ if (templ->bind & PIPE_BIND_CONSTANT_BUFFER) {
+ rbuf->constant_buffer = MALLOC(templ->width0);
+ return &rbuf->b.b;
+ }
- if (rbuf->b.b.bind & R300_BIND_OQBO)
- alignment = 4096;
-
- rbuf->buf = r300_winsys_buffer_create(r300screen,
- alignment,
- rbuf->b.b.bind,
- rbuf->b.b.width0);
+ rbuf->buf =
+ r300screen->rws->buffer_create(r300screen->rws,
+ rbuf->b.b.width0, alignment,
+ rbuf->b.b.bind, rbuf->b.b.usage,
+ rbuf->domain);
- if (!rbuf->buf)
- goto error2;
+ if (!rbuf->buf) {
+ util_mempool_free(&r300screen->pool_buffers, rbuf);
+ return NULL;
+ }
return &rbuf->b.b;
-error2:
- FREE(rbuf);
-error1:
- return NULL;
}
struct pipe_resource *r300_user_buffer_create(struct pipe_screen *screen,
@@ -298,27 +324,28 @@ struct pipe_resource *r300_user_buffer_create(struct pipe_screen *screen,
unsigned bytes,
unsigned bind)
{
+ struct r300_screen *r300screen = r300_screen(screen);
struct r300_buffer *rbuf;
- rbuf = CALLOC_STRUCT(r300_buffer);
- if (!rbuf)
- goto no_rbuf;
+ rbuf = util_mempool_malloc(&r300screen->pool_buffers);
rbuf->magic = R300_BUFFER_MAGIC;
pipe_reference_init(&rbuf->b.b.reference, 1);
rbuf->b.vtbl = &r300_buffer_vtbl;
rbuf->b.b.screen = screen;
+ rbuf->b.b.target = PIPE_BUFFER;
rbuf->b.b.format = PIPE_FORMAT_R8_UNORM;
rbuf->b.b.usage = PIPE_USAGE_IMMUTABLE;
rbuf->b.b.bind = bind;
rbuf->b.b.width0 = bytes;
rbuf->b.b.height0 = 1;
rbuf->b.b.depth0 = 1;
-
+ rbuf->b.b.flags = 0;
+ rbuf->domain = R300_DOMAIN_GTT;
+ rbuf->num_ranges = 0;
+ rbuf->buf = NULL;
+ rbuf->constant_buffer = NULL;
rbuf->user_buffer = ptr;
return &rbuf->b.b;
-
-no_rbuf:
- return NULL;
}
diff --git a/src/gallium/drivers/r300/r300_screen_buffer.h b/src/gallium/drivers/r300/r300_screen_buffer.h
index 57f48229b2e..cafa9f96f20 100644
--- a/src/gallium/drivers/r300/r300_screen_buffer.h
+++ b/src/gallium/drivers/r300/r300_screen_buffer.h
@@ -52,7 +52,10 @@ struct r300_buffer
struct r300_winsys_buffer *buf;
+ enum r300_buffer_domain domain;
+
void *user_buffer;
+ void *constant_buffer;
struct r300_buffer_range ranges[R300_BUFFER_MAX_RANGES];
unsigned num_ranges;
};
@@ -65,7 +68,7 @@ int r300_upload_index_buffer(struct r300_context *r300,
struct pipe_resource **index_buffer,
unsigned index_size,
unsigned start,
- unsigned count);
+ unsigned count, unsigned *out_offset);
struct pipe_resource *r300_buffer_create(struct pipe_screen *screen,
const struct pipe_resource *templ);
@@ -95,40 +98,4 @@ static INLINE boolean r300_buffer_is_user_buffer(struct pipe_resource *buffer)
return r300_buffer(buffer)->user_buffer ? true : false;
}
-static INLINE boolean r300_add_buffer(struct r300_winsys_screen *rws,
- struct pipe_resource *buffer,
- int rd, int wr)
-{
- struct r300_buffer *buf = r300_buffer(buffer);
-
- if (!buf->buf)
- return true;
-
- return rws->add_buffer(rws, buf->buf, rd, wr);
-}
-
-static INLINE boolean r300_add_texture(struct r300_winsys_screen *rws,
- struct r300_texture *tex,
- int rd, int wr)
-{
- return rws->add_buffer(rws, tex->buffer, rd, wr);
-}
-
-static INLINE void r300_buffer_write_reloc(struct r300_winsys_screen *rws,
- struct r300_buffer *buf,
- uint32_t rd, uint32_t wd, uint32_t flags)
-{
- if (!buf->buf)
- return;
-
- rws->write_cs_reloc(rws, buf->buf, rd, wd, flags);
-}
-
-static INLINE void r300_texture_write_reloc(struct r300_winsys_screen *rws,
- struct r300_texture *texture,
- uint32_t rd, uint32_t wd, uint32_t flags)
-{
- rws->write_cs_reloc(rws, texture->buffer, rd, wd, flags);
-}
-
#endif
diff --git a/src/gallium/drivers/r300/r300_shader_semantics.h b/src/gallium/drivers/r300/r300_shader_semantics.h
index cb7a37033f3..4be23e64ce7 100644
--- a/src/gallium/drivers/r300/r300_shader_semantics.h
+++ b/src/gallium/drivers/r300/r300_shader_semantics.h
@@ -38,6 +38,7 @@ struct r300_shader_semantics {
int psize;
int color[ATTR_COLOR_COUNT];
int bcolor[ATTR_COLOR_COUNT];
+ int face;
int generic[ATTR_GENERIC_COUNT];
int fog;
int wpos;
@@ -50,6 +51,7 @@ static INLINE void r300_shader_semantics_reset(
info->pos = ATTR_UNUSED;
info->psize = ATTR_UNUSED;
+ info->face = ATTR_UNUSED;
info->fog = ATTR_UNUSED;
info->wpos = ATTR_UNUSED;
diff --git a/src/gallium/drivers/r300/r300_state.c b/src/gallium/drivers/r300/r300_state.c
index edd522288bd..f2479a994c8 100644
--- a/src/gallium/drivers/r300/r300_state.c
+++ b/src/gallium/drivers/r300/r300_state.c
@@ -23,7 +23,9 @@
#include "draw/draw_context.h"
+#include "util/u_framebuffer.h"
#include "util/u_math.h"
+#include "util/u_mm.h"
#include "util/u_memory.h"
#include "util/u_pack_color.h"
@@ -31,17 +33,18 @@
#include "pipe/p_config.h"
+#include "r300_cb.h"
#include "r300_context.h"
#include "r300_emit.h"
#include "r300_reg.h"
#include "r300_screen.h"
#include "r300_screen_buffer.h"
-#include "r300_state.h"
#include "r300_state_inlines.h"
#include "r300_fs.h"
#include "r300_texture.h"
#include "r300_vs.h"
#include "r300_winsys.h"
+#include "r300_hyperz.h"
/* r300_state: Functions used to intialize state context by translating
* Gallium state objects into semi-native r300 state objects. */
@@ -183,6 +186,12 @@ static void* r300_create_blend_state(struct pipe_context* pipe,
{
struct r300_screen* r300screen = r300_screen(pipe->screen);
struct r300_blend_state* blend = CALLOC_STRUCT(r300_blend_state);
+ uint32_t blend_control = 0; /* R300_RB3D_CBLEND: 0x4e04 */
+ uint32_t alpha_blend_control = 0; /* R300_RB3D_ABLEND: 0x4e08 */
+ uint32_t color_channel_mask = 0; /* R300_RB3D_COLOR_CHANNEL_MASK: 0x4e0c */
+ uint32_t rop = 0; /* R300_RB3D_ROPCNTL: 0x4e18 */
+ uint32_t dither = 0; /* R300_RB3D_DITHER_CTL: 0x4e50 */
+ CB_LOCALS;
if (state->rt[0].blend_enable)
{
@@ -196,7 +205,7 @@ static void* r300_create_blend_state(struct pipe_context* pipe,
/* despite the name, ALPHA_BLEND_ENABLE has nothing to do with alpha,
* this is just the crappy D3D naming */
- blend->blend_control = R300_ALPHA_BLEND_ENABLE |
+ blend_control = R300_ALPHA_BLEND_ENABLE |
r300_translate_blend_function(eqRGB) |
( r300_translate_blend_factor(srcRGB) << R300_SRC_BLEND_SHIFT) |
( r300_translate_blend_factor(dstRGB) << R300_DST_BLEND_SHIFT);
@@ -220,7 +229,7 @@ static void* r300_create_blend_state(struct pipe_context* pipe,
srcA == PIPE_BLENDFACTOR_INV_DST_ALPHA ||
srcRGB == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE) {
/* Enable reading from the colorbuffer. */
- blend->blend_control |= R300_READ_ENABLE;
+ blend_control |= R300_READ_ENABLE;
if (r300screen->caps.is_r500) {
/* Optimization: Depending on incoming pixels, we can
@@ -233,7 +242,7 @@ static void* r300_create_blend_state(struct pipe_context* pipe,
(dstA == PIPE_BLENDFACTOR_SRC_COLOR ||
dstA == PIPE_BLENDFACTOR_SRC_ALPHA ||
dstA == PIPE_BLENDFACTOR_ZERO)) {
- blend->blend_control |= R500_SRC_ALPHA_0_NO_READ;
+ blend_control |= R500_SRC_ALPHA_0_NO_READ;
}
/* Disable reading if SRC_ALPHA == 1. */
@@ -242,7 +251,7 @@ static void* r300_create_blend_state(struct pipe_context* pipe,
(dstA == PIPE_BLENDFACTOR_INV_SRC_COLOR ||
dstA == PIPE_BLENDFACTOR_INV_SRC_ALPHA ||
dstA == PIPE_BLENDFACTOR_ZERO)) {
- blend->blend_control |= R500_SRC_ALPHA_1_NO_READ;
+ blend_control |= R500_SRC_ALPHA_1_NO_READ;
}
}
}
@@ -272,31 +281,31 @@ static void* r300_create_blend_state(struct pipe_context* pipe,
* pixels.
*/
if (blend_discard_if_src_alpha_0(srcRGB, srcA, dstRGB, dstA)) {
- blend->blend_control |= R300_DISCARD_SRC_PIXELS_SRC_ALPHA_0;
+ blend_control |= R300_DISCARD_SRC_PIXELS_SRC_ALPHA_0;
} else if (blend_discard_if_src_alpha_1(srcRGB, srcA,
dstRGB, dstA)) {
- blend->blend_control |= R300_DISCARD_SRC_PIXELS_SRC_ALPHA_1;
+ blend_control |= R300_DISCARD_SRC_PIXELS_SRC_ALPHA_1;
} else if (blend_discard_if_src_color_0(srcRGB, srcA,
dstRGB, dstA)) {
- blend->blend_control |= R300_DISCARD_SRC_PIXELS_SRC_COLOR_0;
+ blend_control |= R300_DISCARD_SRC_PIXELS_SRC_COLOR_0;
} else if (blend_discard_if_src_color_1(srcRGB, srcA,
dstRGB, dstA)) {
- blend->blend_control |= R300_DISCARD_SRC_PIXELS_SRC_COLOR_1;
+ blend_control |= R300_DISCARD_SRC_PIXELS_SRC_COLOR_1;
} else if (blend_discard_if_src_alpha_color_0(srcRGB, srcA,
dstRGB, dstA)) {
- blend->blend_control |=
+ blend_control |=
R300_DISCARD_SRC_PIXELS_SRC_ALPHA_COLOR_0;
} else if (blend_discard_if_src_alpha_color_1(srcRGB, srcA,
dstRGB, dstA)) {
- blend->blend_control |=
+ blend_control |=
R300_DISCARD_SRC_PIXELS_SRC_ALPHA_COLOR_1;
}
}
/* separate alpha */
if (srcA != srcRGB || dstA != dstRGB || eqA != eqRGB) {
- blend->blend_control |= R300_SEPARATE_ALPHA_ENABLE;
- blend->alpha_blend_control =
+ blend_control |= R300_SEPARATE_ALPHA_ENABLE;
+ alpha_blend_control =
r300_translate_blend_function(eqA) |
(r300_translate_blend_factor(srcA) << R300_SRC_BLEND_SHIFT) |
(r300_translate_blend_factor(dstA) << R300_DST_BLEND_SHIFT);
@@ -305,21 +314,21 @@ static void* r300_create_blend_state(struct pipe_context* pipe,
/* PIPE_LOGICOP_* don't need to be translated, fortunately. */
if (state->logicop_enable) {
- blend->rop = R300_RB3D_ROPCNTL_ROP_ENABLE |
+ rop = R300_RB3D_ROPCNTL_ROP_ENABLE |
(state->logicop_func) << R300_RB3D_ROPCNTL_ROP_SHIFT;
}
/* Color channel masks for all MRTs. */
- blend->color_channel_mask = bgra_cmask(state->rt[0].colormask);
+ color_channel_mask = bgra_cmask(state->rt[0].colormask);
if (r300screen->caps.is_r500 && state->independent_blend_enable) {
if (state->rt[1].blend_enable) {
- blend->color_channel_mask |= bgra_cmask(state->rt[1].colormask) << 4;
+ color_channel_mask |= bgra_cmask(state->rt[1].colormask) << 4;
}
if (state->rt[2].blend_enable) {
- blend->color_channel_mask |= bgra_cmask(state->rt[2].colormask) << 8;
+ color_channel_mask |= bgra_cmask(state->rt[2].colormask) << 8;
}
if (state->rt[3].blend_enable) {
- blend->color_channel_mask |= bgra_cmask(state->rt[3].colormask) << 12;
+ color_channel_mask |= bgra_cmask(state->rt[3].colormask) << 12;
}
}
@@ -330,11 +339,31 @@ static void* r300_create_blend_state(struct pipe_context* pipe,
* This could be revisited if we ever get quality or conformance hints.
*
if (state->dither) {
- blend->dither = R300_RB3D_DITHER_CTL_DITHER_MODE_LUT |
+ dither = R300_RB3D_DITHER_CTL_DITHER_MODE_LUT |
R300_RB3D_DITHER_CTL_ALPHA_DITHER_MODE_LUT;
}
*/
+ /* Build a command buffer. */
+ BEGIN_CB(blend->cb, 8);
+ OUT_CB_REG(R300_RB3D_ROPCNTL, rop);
+ OUT_CB_REG_SEQ(R300_RB3D_CBLEND, 3);
+ OUT_CB(blend_control);
+ OUT_CB(alpha_blend_control);
+ OUT_CB(color_channel_mask);
+ OUT_CB_REG(R300_RB3D_DITHER_CTL, dither);
+ END_CB;
+
+ /* The same as above, but with no colorbuffer reads and writes. */
+ BEGIN_CB(blend->cb_no_readwrite, 8);
+ OUT_CB_REG(R300_RB3D_ROPCNTL, rop);
+ OUT_CB_REG_SEQ(R300_RB3D_CBLEND, 3);
+ OUT_CB(0);
+ OUT_CB(0);
+ OUT_CB(0);
+ OUT_CB_REG(R300_RB3D_DITHER_CTL, dither);
+ END_CB;
+
return (void*)blend;
}
@@ -368,20 +397,26 @@ static void r300_set_blend_color(struct pipe_context* pipe,
struct r300_context* r300 = r300_context(pipe);
struct r300_blend_color_state* state =
(struct r300_blend_color_state*)r300->blend_color_state.state;
- union util_color uc;
+ CB_LOCALS;
- util_pack_color(color->color, PIPE_FORMAT_B8G8R8A8_UNORM, &uc);
- state->blend_color = uc.ui;
+ if (r300->screen->caps.is_r500) {
+ /* XXX if FP16 blending is enabled, we should use the FP16 format */
+ BEGIN_CB(state->cb, 3);
+ OUT_CB_REG_SEQ(R500_RB3D_CONSTANT_COLOR_AR, 2);
+ OUT_CB(float_to_fixed10(color->color[0]) |
+ (float_to_fixed10(color->color[3]) << 16));
+ OUT_CB(float_to_fixed10(color->color[2]) |
+ (float_to_fixed10(color->color[1]) << 16));
+ END_CB;
+ } else {
+ union util_color uc;
+ util_pack_color(color->color, PIPE_FORMAT_B8G8R8A8_UNORM, &uc);
- /* XXX if FP16 blending is enabled, we should use the FP16 format */
- state->blend_color_red_alpha =
- float_to_fixed10(color->color[0]) |
- (float_to_fixed10(color->color[3]) << 16);
- state->blend_color_green_blue =
- float_to_fixed10(color->color[2]) |
- (float_to_fixed10(color->color[1]) << 16);
+ BEGIN_CB(state->cb, 2);
+ OUT_CB_REG(R300_RB3D_BLEND_COLOR, uc.ui);
+ END_CB;
+ }
- r300->blend_color_state.size = r300->screen->caps.is_r500 ? 3 : 2;
r300->blend_color_state.dirty = TRUE;
}
@@ -389,21 +424,41 @@ static void r300_set_clip_state(struct pipe_context* pipe,
const struct pipe_clip_state* state)
{
struct r300_context* r300 = r300_context(pipe);
+ struct r300_clip_state *clip =
+ (struct r300_clip_state*)r300->clip_state.state;
+ CB_LOCALS;
- r300->clip = *state;
+ clip->clip = *state;
if (r300->screen->caps.has_tcl) {
- memcpy(r300->clip_state.state, state, sizeof(struct pipe_clip_state));
- r300->clip_state.size = 29;
+ r300->clip_state.size = 2 + !!state->nr * 3 + state->nr * 4;
+
+ BEGIN_CB(clip->cb, r300->clip_state.size);
+ if (state->nr) {
+ OUT_CB_REG(R300_VAP_PVS_VECTOR_INDX_REG,
+ (r300->screen->caps.is_r500 ?
+ R500_PVS_UCP_START : R300_PVS_UCP_START));
+ OUT_CB_ONE_REG(R300_VAP_PVS_UPLOAD_DATA, state->nr * 4);
+ OUT_CB_TABLE(state->ucp, state->nr * 4);
+ }
+ OUT_CB_REG(R300_VAP_CLIP_CNTL, ((1 << state->nr) - 1) |
+ R300_PS_UCP_MODE_CLIP_AS_TRIFAN |
+ (state->depth_clamp ? R300_CLIP_DISABLE : 0));
+ END_CB;
+
+ r300->clip_state.dirty = TRUE;
} else {
- draw_flush(r300->draw);
draw_set_clip_state(r300->draw, state);
- r300->clip_state.size = 2;
}
+}
- r300->clip_state.dirty = TRUE;
+static void
+r300_set_sample_mask(struct pipe_context *pipe,
+ unsigned sample_mask)
+{
}
+
/* Create a new depth, stencil, and alpha state based on the CSO dsa state.
*
* This contains the depth buffer, stencil buffer, alpha test, and such.
@@ -415,15 +470,18 @@ static void*
{
struct r300_capabilities *caps = &r300_screen(pipe->screen)->caps;
struct r300_dsa_state* dsa = CALLOC_STRUCT(r300_dsa_state);
+ CB_LOCALS;
+
+ dsa->dsa = *state;
+
+ /* Depth test setup. - separate write mask depth for decomp flush */
+ if (state->depth.writemask) {
+ dsa->z_buffer_control |= R300_Z_WRITE_ENABLE;
+ }
- /* Depth test setup. */
if (state->depth.enabled) {
dsa->z_buffer_control |= R300_Z_ENABLE;
- if (state->depth.writemask) {
- dsa->z_buffer_control |= R300_Z_WRITE_ENABLE;
- }
-
dsa->z_stencil_control |=
(r300_translate_depth_stencil_function(state->depth.func) <<
R300_Z_FUNC_SHIFT);
@@ -467,7 +525,7 @@ static void*
if (caps->is_r500) {
dsa->z_buffer_control |= R500_STENCIL_REFMASK_FRONT_BACK;
} else {
- dsa->stencil_ref_bf_fallback =
+ dsa->two_sided_stencil_ref =
(state->stencil[0].valuemask != state->stencil[1].valuemask ||
state->stencil[0].writemask != state->stencil[1].writemask);
}
@@ -487,21 +545,41 @@ static void*
dsa->alpha_function |= R500_FG_ALPHA_FUNC_8BIT;
}
+ BEGIN_CB(&dsa->cb_begin, 8);
+ OUT_CB_REG(R300_FG_ALPHA_FUNC, dsa->alpha_function);
+ OUT_CB_REG_SEQ(R300_ZB_CNTL, 3);
+ OUT_CB(dsa->z_buffer_control);
+ OUT_CB(dsa->z_stencil_control);
+ OUT_CB(dsa->stencil_ref_mask);
+ OUT_CB_REG(R500_ZB_STENCILREFMASK_BF, dsa->stencil_ref_bf);
+ END_CB;
+
+ BEGIN_CB(dsa->cb_no_readwrite, 8);
+ OUT_CB_REG(R300_FG_ALPHA_FUNC, dsa->alpha_function);
+ OUT_CB_REG_SEQ(R300_ZB_CNTL, 3);
+ OUT_CB(0);
+ OUT_CB(0);
+ OUT_CB(0);
+ OUT_CB_REG(R500_ZB_STENCILREFMASK_BF, 0);
+ END_CB;
+
return (void*)dsa;
}
-static void r300_update_stencil_ref_fallback_status(struct r300_context *r300)
+static void r300_dsa_inject_stencilref(struct r300_context *r300)
{
- struct r300_dsa_state *dsa = (struct r300_dsa_state*)r300->dsa_state.state;
+ struct r300_dsa_state *dsa =
+ (struct r300_dsa_state*)r300->dsa_state.state;
- if (r300->screen->caps.is_r500) {
+ if (!dsa)
return;
- }
- r300->stencil_ref_bf_fallback =
- dsa->stencil_ref_bf_fallback ||
- (dsa->two_sided &&
- r300->stencil_ref.ref_value[0] != r300->stencil_ref.ref_value[1]);
+ dsa->stencil_ref_mask =
+ (dsa->stencil_ref_mask & ~R300_STENCILREF_MASK) |
+ r300->stencil_ref.ref_value[0];
+ dsa->stencil_ref_bf =
+ (dsa->stencil_ref_bf & ~R300_STENCILREF_MASK) |
+ r300->stencil_ref.ref_value[1];
}
/* Bind DSA state. */
@@ -516,7 +594,8 @@ static void r300_bind_dsa_state(struct pipe_context* pipe,
UPDATE_STATE(state, r300->dsa_state);
- r300_update_stencil_ref_fallback_status(r300);
+ r300->hyperz_state.dirty = TRUE; /* Will be updated before the emission. */
+ r300_dsa_inject_stencilref(r300);
}
/* Free DSA state. */
@@ -532,72 +611,103 @@ static void r300_set_stencil_ref(struct pipe_context* pipe,
struct r300_context* r300 = r300_context(pipe);
r300->stencil_ref = *sr;
+
+ r300_dsa_inject_stencilref(r300);
r300->dsa_state.dirty = TRUE;
+}
+
+static void r300_tex_set_tiling_flags(struct r300_context *r300,
+ struct r300_texture *tex, unsigned level)
+{
+ /* Check if the macrotile flag needs to be changed.
+ * Skip changing the flags otherwise. */
+ if (tex->desc.macrotile[tex->surface_level] !=
+ tex->desc.macrotile[level]) {
+ /* Tiling determines how DRM treats the buffer data.
+ * We must flush CS when changing it if the buffer is referenced. */
+ if (r300->rws->cs_is_buffer_referenced(r300->cs,
+ tex->buffer, R300_REF_CS))
+ r300->context.flush(&r300->context, 0, NULL);
- r300_update_stencil_ref_fallback_status(r300);
+ r300->rws->buffer_set_tiling(r300->rws, tex->buffer,
+ tex->desc.microtile, tex->desc.macrotile[level],
+ tex->desc.stride_in_bytes[0]);
+
+ tex->surface_level = level;
+ }
}
/* This switcheroo is needed just because of goddamned MACRO_SWITCH. */
-static void r300_fb_update_tiling_flags(struct r300_context *r300,
- const struct pipe_framebuffer_state *old_state,
- const struct pipe_framebuffer_state *new_state)
+static void r300_fb_set_tiling_flags(struct r300_context *r300,
+ const struct pipe_framebuffer_state *state)
{
- struct r300_texture *tex;
- unsigned i, j, level;
+ unsigned i;
- /* Reset tiling flags for old surfaces to default values. */
- for (i = 0; i < old_state->nr_cbufs; i++) {
- for (j = 0; j < new_state->nr_cbufs; j++) {
- if (old_state->cbufs[i]->texture == new_state->cbufs[j]->texture) {
- break;
- }
- }
- /* If not binding the surface again... */
- if (j != new_state->nr_cbufs) {
- continue;
- }
+ /* Set tiling flags for new surfaces. */
+ for (i = 0; i < state->nr_cbufs; i++) {
+ r300_tex_set_tiling_flags(r300,
+ r300_texture(state->cbufs[i]->texture),
+ state->cbufs[i]->level);
+ }
+ if (state->zsbuf) {
+ r300_tex_set_tiling_flags(r300,
+ r300_texture(state->zsbuf->texture),
+ state->zsbuf->level);
+ }
+}
- tex = r300_texture(old_state->cbufs[i]->texture);
+static void r300_print_fb_surf_info(struct pipe_surface *surf, unsigned index,
+ const char *binding)
+{
+ struct pipe_resource *tex = surf->texture;
+ struct r300_texture *rtex = r300_texture(tex);
- if (tex) {
- r300->rws->buffer_set_tiling(r300->rws, tex->buffer,
- tex->pitch[0],
- tex->microtile,
- tex->macrotile);
- }
- }
- if (old_state->zsbuf &&
- (!new_state->zsbuf ||
- old_state->zsbuf->texture != new_state->zsbuf->texture)) {
- tex = r300_texture(old_state->zsbuf->texture);
+ fprintf(stderr,
+ "r300: %s[%i] Dim: %ix%i, Offset: %i, ZSlice: %i, "
+ "Face: %i, Level: %i, Format: %s\n"
- if (tex) {
- r300->rws->buffer_set_tiling(r300->rws, tex->buffer,
- tex->pitch[0],
- tex->microtile,
- tex->macrotile);
- }
- }
+ "r300: TEX: Macro: %s, Micro: %s, Pitch: %i, "
+ "Dim: %ix%ix%i, LastLevel: %i, Format: %s\n",
- /* Set tiling flags for new surfaces. */
- for (i = 0; i < new_state->nr_cbufs; i++) {
- tex = r300_texture(new_state->cbufs[i]->texture);
- level = new_state->cbufs[i]->level;
+ binding, index, surf->width, surf->height, surf->offset,
+ surf->zslice, surf->face, surf->level,
+ util_format_short_name(surf->format),
- r300->rws->buffer_set_tiling(r300->rws, tex->buffer,
- tex->pitch[level],
- tex->microtile,
- tex->mip_macrotile[level]);
+ rtex->desc.macrotile[0] ? "YES" : " NO",
+ rtex->desc.microtile ? "YES" : " NO",
+ rtex->desc.stride_in_pixels[0],
+ tex->width0, tex->height0, tex->depth0,
+ tex->last_level, util_format_short_name(tex->format));
+}
+
+void r300_mark_fb_state_dirty(struct r300_context *r300,
+ enum r300_fb_state_change change)
+{
+ struct pipe_framebuffer_state *state = r300->fb_state.state;
+ boolean has_hyperz = r300->rws->get_value(r300->rws, R300_CAN_HYPERZ);
+
+ /* What is marked as dirty depends on the enum r300_fb_state_change. */
+ r300->gpu_flush.dirty = TRUE;
+ r300->fb_state.dirty = TRUE;
+ r300->hyperz_state.dirty = TRUE;
+
+ if (change == R300_CHANGED_FB_STATE) {
+ r300->aa_state.dirty = TRUE;
+ r300->fb_state_pipelined.dirty = TRUE;
}
- if (new_state->zsbuf) {
- tex = r300_texture(new_state->zsbuf->texture);
- level = new_state->zsbuf->level;
- r300->rws->buffer_set_tiling(r300->rws, tex->buffer,
- tex->pitch[level],
- tex->microtile,
- tex->mip_macrotile[level]);
+ /* Now compute the fb_state atom size. */
+ r300->fb_state.size = 2 + (8 * state->nr_cbufs);
+
+ if (r300->cbzb_clear)
+ r300->fb_state.size += 10;
+ else if (state->zsbuf) {
+ r300->fb_state.size += 10;
+ if (has_hyperz)
+ r300->fb_state.size += r300->screen->caps.hiz_ram ? 8 : 4;
}
+
+ /* The size of the rest of atoms stays the same. */
}
static void
@@ -605,15 +715,12 @@ static void
const struct pipe_framebuffer_state* state)
{
struct r300_context* r300 = r300_context(pipe);
+ struct r300_aa_state *aa = (struct r300_aa_state*)r300->aa_state.state;
struct pipe_framebuffer_state *old_state = r300->fb_state.state;
- unsigned max_width, max_height;
+ boolean has_hyperz = r300->rws->get_value(r300->rws, R300_CAN_HYPERZ);
+ unsigned max_width, max_height, i;
uint32_t zbuffer_bpp = 0;
-
- if (state->nr_cbufs > 4) {
- fprintf(stderr, "r300: Implementation error: Too many MRTs in %s, "
- "refusing to bind framebuffer state!\n", __FUNCTION__);
- return;
- }
+ int blocksize;
if (r300->screen->caps.is_r500) {
max_width = max_height = 4096;
@@ -629,12 +736,6 @@ static void
return;
}
- if (r300->draw) {
- draw_flush(r300->draw);
- }
-
- r300->fb_state.dirty = TRUE;
-
/* If nr_cbufs is changed from zero to non-zero or vice versa... */
if (!!old_state->nr_cbufs != !!state->nr_cbufs) {
r300->blend_state.dirty = TRUE;
@@ -644,27 +745,91 @@ static void
r300->dsa_state.dirty = TRUE;
}
- r300_fb_update_tiling_flags(r300, r300->fb_state.state, state);
+ /* The tiling flags are dependent on the surface miplevel, unfortunately. */
+ r300_fb_set_tiling_flags(r300, state);
- memcpy(r300->fb_state.state, state, sizeof(struct pipe_framebuffer_state));
+ util_copy_framebuffer_state(r300->fb_state.state, state);
- r300->fb_state.size = (10 * state->nr_cbufs) + (2 * (4 - state->nr_cbufs)) +
- (state->zsbuf ? 10 : 0) + 11;
+ r300_mark_fb_state_dirty(r300, R300_CHANGED_FB_STATE);
- /* Polygon offset depends on the zbuffer bit depth. */
- if (state->zsbuf && r300->polygon_offset_enabled) {
- switch (util_format_get_blocksize(state->zsbuf->texture->format)) {
- case 2:
- zbuffer_bpp = 16;
- break;
- case 4:
- zbuffer_bpp = 24;
- break;
+ r300->z_compression = false;
+
+ if (state->zsbuf) {
+ blocksize = util_format_get_blocksize(state->zsbuf->texture->format);
+ switch (blocksize) {
+ case 2:
+ zbuffer_bpp = 16;
+ break;
+ case 4:
+ zbuffer_bpp = 24;
+ break;
+ }
+ if (has_hyperz) {
+ struct r300_surface *zs_surf = r300_surface(state->zsbuf);
+ struct r300_texture *tex;
+ int compress = r300->screen->caps.is_rv350 ? RV350_Z_COMPRESS_88 : R300_Z_COMPRESS_44;
+ int level = zs_surf->base.level;
+
+ tex = r300_texture(zs_surf->base.texture);
+
+ /* work out whether we can support hiz on this buffer */
+ r300_hiz_alloc_block(r300, zs_surf);
+
+ /* work out whether we can support zmask features on this buffer */
+ r300_zmask_alloc_block(r300, zs_surf, compress);
+
+ if (tex->zmask_mem[level]) {
+ /* compression causes hangs on 16-bit */
+ if (zbuffer_bpp == 24)
+ r300->z_compression = compress;
+ }
+ DBG(r300, DBG_HYPERZ,
+ "hyper-z features: hiz: %d @ %08x z-compression: %d z-fastfill: %d @ %08x\n", tex->hiz_mem[level] ? 1 : 0,
+ tex->hiz_mem[level] ? tex->hiz_mem[level]->ofs : 0xdeadbeef,
+ r300->z_compression, tex->zmask_mem[level] ? 1 : 0,
+ tex->zmask_mem[level] ? tex->zmask_mem[level]->ofs : 0xdeadbeef);
}
+ /* Polygon offset depends on the zbuffer bit depth. */
if (r300->zbuffer_bpp != zbuffer_bpp) {
r300->zbuffer_bpp = zbuffer_bpp;
- r300->rs_state.dirty = TRUE;
+
+ if (r300->polygon_offset_enabled)
+ r300->rs_state.dirty = TRUE;
+ }
+ }
+
+ /* Set up AA config. */
+ if (r300->rws->get_value(r300->rws, R300_VID_DRM_2_3_0)) {
+ if (state->nr_cbufs && state->cbufs[0]->texture->nr_samples > 1) {
+ aa->aa_config = R300_GB_AA_CONFIG_AA_ENABLE;
+
+ switch (state->cbufs[0]->texture->nr_samples) {
+ case 2:
+ aa->aa_config |= R300_GB_AA_CONFIG_NUM_AA_SUBSAMPLES_2;
+ break;
+ case 3:
+ aa->aa_config |= R300_GB_AA_CONFIG_NUM_AA_SUBSAMPLES_3;
+ break;
+ case 4:
+ aa->aa_config |= R300_GB_AA_CONFIG_NUM_AA_SUBSAMPLES_4;
+ break;
+ case 6:
+ aa->aa_config |= R300_GB_AA_CONFIG_NUM_AA_SUBSAMPLES_6;
+ break;
+ }
+ } else {
+ aa->aa_config = 0;
+ }
+ }
+
+ if (DBG_ON(r300, DBG_FB)) {
+ fprintf(stderr, "r300: set_framebuffer_state:\n");
+ for (i = 0; i < state->nr_cbufs; i++) {
+ r300_print_fb_surf_info(state->cbufs[i], i, "CB");
+ }
+ if (state->zsbuf) {
+ r300_print_fb_surf_info(state->zsbuf, 0, "ZB");
}
}
}
@@ -691,16 +856,18 @@ void r300_mark_fs_code_dirty(struct r300_context *r300)
r300->fs.dirty = TRUE;
r300->fs_rc_constant_state.dirty = TRUE;
r300->fs_constants.dirty = TRUE;
+ r300->fs.size = fs->shader->cb_code_size;
if (r300->screen->caps.is_r500) {
- r300->fs.size = r500_get_fs_atom_size(r300);
r300->fs_rc_constant_state.size = fs->shader->rc_state_count * 7;
r300->fs_constants.size = fs->shader->externals_count * 4 + 3;
} else {
- r300->fs.size = r300_get_fs_atom_size(r300);
r300->fs_rc_constant_state.size = fs->shader->rc_state_count * 5;
r300->fs_constants.size = fs->shader->externals_count * 4 + 1;
}
+
+ ((struct r300_constant_buffer*)r300->fs_constants.state)->remap_table =
+ fs->shader->code.constants_remap_table;
}
/* Bind fragment shader state. */
@@ -719,10 +886,6 @@ static void r300_bind_fs_state(struct pipe_context* pipe, void* shader)
r300_mark_fs_code_dirty(r300);
r300->rs_block_state.dirty = TRUE; /* Will be updated before the emission. */
-
- if (r300->vs_state.state && r300_vertex_shader_setup_wpos(r300)) {
- r300->vap_output_state.dirty = TRUE;
- }
}
/* Delete fragment shader state. */
@@ -735,6 +898,7 @@ static void r300_delete_fs_state(struct pipe_context* pipe, void* shader)
tmp = ptr;
ptr = ptr->next;
rc_constants_destroy(&tmp->code.constants);
+ FREE(tmp->cb_code);
FREE(tmp);
}
FREE((void*)fs->state.tokens);
@@ -758,91 +922,121 @@ static void* r300_create_rs_state(struct pipe_context* pipe,
const struct pipe_rasterizer_state* state)
{
struct r300_rs_state* rs = CALLOC_STRUCT(r300_rs_state);
- int i;
-
- /* Copy rasterizer state for Draw. */
+ float psiz;
+ uint32_t vap_control_status; /* R300_VAP_CNTL_STATUS: 0x2140 */
+ uint32_t point_size; /* R300_GA_POINT_SIZE: 0x421c */
+ uint32_t point_minmax; /* R300_GA_POINT_MINMAX: 0x4230 */
+ uint32_t line_control; /* R300_GA_LINE_CNTL: 0x4234 */
+ uint32_t polygon_offset_enable; /* R300_SU_POLY_OFFSET_ENABLE: 0x42b4 */
+ uint32_t cull_mode; /* R300_SU_CULL_MODE: 0x42b8 */
+ uint32_t line_stipple_config; /* R300_GA_LINE_STIPPLE_CONFIG: 0x4328 */
+ uint32_t line_stipple_value; /* R300_GA_LINE_STIPPLE_VALUE: 0x4260 */
+ uint32_t polygon_mode; /* R300_GA_POLY_MODE: 0x4288 */
+ uint32_t clip_rule; /* R300_SC_CLIP_RULE: 0x43D0 */
+
+ /* Point sprites texture coordinates, 0: lower left, 1: upper right */
+ float point_texcoord_left = 0; /* R300_GA_POINT_S0: 0x4200 */
+ float point_texcoord_bottom = 0;/* R300_GA_POINT_T0: 0x4204 */
+ float point_texcoord_right = 1; /* R300_GA_POINT_S1: 0x4208 */
+ float point_texcoord_top = 0; /* R300_GA_POINT_T1: 0x420c */
+ CB_LOCALS;
+
+ /* Copy rasterizer state. */
rs->rs = *state;
+ rs->rs_draw = *state;
+
+ rs->rs.sprite_coord_enable = state->point_quad_rasterization *
+ state->sprite_coord_enable;
+
+ /* Override some states for Draw. */
+ rs->rs_draw.sprite_coord_enable = 0; /* We can do this in HW. */
#ifdef PIPE_ARCH_LITTLE_ENDIAN
- rs->vap_control_status = R300_VC_NO_SWAP;
+ vap_control_status = R300_VC_NO_SWAP;
#else
- rs->vap_control_status = R300_VC_32BIT_SWAP;
+ vap_control_status = R300_VC_32BIT_SWAP;
#endif
/* If no TCL engine is present, turn off the HW TCL. */
if (!r300_screen(pipe->screen)->caps.has_tcl) {
- rs->vap_control_status |= R300_VAP_TCL_BYPASS;
+ vap_control_status |= R300_VAP_TCL_BYPASS;
}
- rs->point_size = pack_float_16_6x(state->point_size) |
+ /* Point size width and height. */
+ point_size =
+ pack_float_16_6x(state->point_size) |
(pack_float_16_6x(state->point_size) << R300_POINTSIZE_X_SHIFT);
- rs->line_control = pack_float_16_6x(state->line_width) |
+ /* Point size clamping. */
+ if (state->point_size_per_vertex) {
+ /* Per-vertex point size.
+ * Clamp to [0, max FB size] */
+ psiz = pipe->screen->get_paramf(pipe->screen,
+ PIPE_CAP_MAX_POINT_WIDTH);
+ point_minmax =
+ pack_float_16_6x(psiz) << R300_GA_POINT_MINMAX_MAX_SHIFT;
+ } else {
+ /* We cannot disable the point-size vertex output,
+ * so clamp it. */
+ psiz = state->point_size;
+ point_minmax =
+ (pack_float_16_6x(psiz) << R300_GA_POINT_MINMAX_MIN_SHIFT) |
+ (pack_float_16_6x(psiz) << R300_GA_POINT_MINMAX_MAX_SHIFT);
+ }
+
+ /* Line control. */
+ line_control = pack_float_16_6x(state->line_width) |
R300_GA_LINE_CNTL_END_TYPE_COMP;
/* Enable polygon mode */
- if (state->fill_cw != PIPE_POLYGON_MODE_FILL ||
- state->fill_ccw != PIPE_POLYGON_MODE_FILL) {
- rs->polygon_mode = R300_GA_POLY_MODE_DUAL;
+ polygon_mode = 0;
+ if (state->fill_front != PIPE_POLYGON_MODE_FILL ||
+ state->fill_back != PIPE_POLYGON_MODE_FILL) {
+ polygon_mode = R300_GA_POLY_MODE_DUAL;
}
- /* Radeons don't think in "CW/CCW", they think in "front/back". */
- if (state->front_winding == PIPE_WINDING_CW) {
- rs->cull_mode = R300_FRONT_FACE_CW;
-
- /* Polygon offset */
- if (state->offset_cw) {
- rs->polygon_offset_enable |= R300_FRONT_ENABLE;
- }
- if (state->offset_ccw) {
- rs->polygon_offset_enable |= R300_BACK_ENABLE;
- }
-
- /* Polygon mode */
- if (rs->polygon_mode) {
- rs->polygon_mode |=
- r300_translate_polygon_mode_front(state->fill_cw);
- rs->polygon_mode |=
- r300_translate_polygon_mode_back(state->fill_ccw);
- }
- } else {
- rs->cull_mode = R300_FRONT_FACE_CCW;
-
- /* Polygon offset */
- if (state->offset_ccw) {
- rs->polygon_offset_enable |= R300_FRONT_ENABLE;
- }
- if (state->offset_cw) {
- rs->polygon_offset_enable |= R300_BACK_ENABLE;
- }
+ /* Front face */
+ if (state->front_ccw)
+ cull_mode = R300_FRONT_FACE_CCW;
+ else
+ cull_mode = R300_FRONT_FACE_CW;
- /* Polygon mode */
- if (rs->polygon_mode) {
- rs->polygon_mode |=
- r300_translate_polygon_mode_front(state->fill_ccw);
- rs->polygon_mode |=
- r300_translate_polygon_mode_back(state->fill_cw);
- }
+ /* Polygon offset */
+ polygon_offset_enable = 0;
+ if (util_get_offset(state, state->fill_front)) {
+ polygon_offset_enable |= R300_FRONT_ENABLE;
}
- if (state->front_winding & state->cull_mode) {
- rs->cull_mode |= R300_CULL_FRONT;
+ if (util_get_offset(state, state->fill_back)) {
+ polygon_offset_enable |= R300_BACK_ENABLE;
}
- if (~(state->front_winding) & state->cull_mode) {
- rs->cull_mode |= R300_CULL_BACK;
+
+ rs->polygon_offset_enable = polygon_offset_enable != 0;
+
+ /* Polygon mode */
+ if (polygon_mode) {
+ polygon_mode |=
+ r300_translate_polygon_mode_front(state->fill_front);
+ polygon_mode |=
+ r300_translate_polygon_mode_back(state->fill_back);
}
- if (rs->polygon_offset_enable) {
- rs->depth_offset = state->offset_units;
- rs->depth_scale = state->offset_scale;
+ if (state->cull_face & PIPE_FACE_FRONT) {
+ cull_mode |= R300_CULL_FRONT;
+ }
+ if (state->cull_face & PIPE_FACE_BACK) {
+ cull_mode |= R300_CULL_BACK;
}
if (state->line_stipple_enable) {
- rs->line_stipple_config =
+ line_stipple_config =
R300_GA_LINE_STIPPLE_CONFIG_LINE_RESET_LINE |
(fui((float)state->line_stipple_factor) &
R300_GA_LINE_STIPPLE_CONFIG_STIPPLE_SCALE_MASK);
/* XXX this might need to be scaled up */
- rs->line_stipple_value = state->line_stipple_pattern;
+ line_stipple_value = state->line_stipple_pattern;
+ } else {
+ line_stipple_config = 0;
+ line_stipple_value = 0;
}
if (state->flatshade) {
@@ -851,35 +1045,68 @@ static void* r300_create_rs_state(struct pipe_context* pipe,
rs->color_control = R300_SHADE_MODEL_SMOOTH;
}
- rs->clip_rule = state->scissor ? 0xAAAA : 0xFFFF;
+ clip_rule = state->scissor ? 0xAAAA : 0xFFFF;
- /* XXX Disable point sprites until we know what's wrong with them. */
- rs->rs.sprite_coord_enable = 0;
-
- /* Point sprites */
+ /* Point sprites coord mode */
if (rs->rs.sprite_coord_enable) {
- rs->stuffing_enable = R300_GB_POINT_STUFF_ENABLE;
- for (i = 0; i < 8; i++) {
- if (state->sprite_coord_enable & (1 << i))
- rs->stuffing_enable |=
- R300_GB_TEX_STR << (R300_GB_TEX0_SOURCE_SHIFT + (i*2));
- }
-
- rs->point_texcoord_left = 0.0f;
- rs->point_texcoord_right = 1.0f;
-
switch (state->sprite_coord_mode) {
case PIPE_SPRITE_COORD_UPPER_LEFT:
- rs->point_texcoord_top = 0.0f;
- rs->point_texcoord_bottom = 1.0f;
+ point_texcoord_top = 0.0f;
+ point_texcoord_bottom = 1.0f;
break;
case PIPE_SPRITE_COORD_LOWER_LEFT:
- rs->point_texcoord_top = 1.0f;
- rs->point_texcoord_bottom = 0.0f;
+ point_texcoord_top = 1.0f;
+ point_texcoord_bottom = 0.0f;
break;
}
}
+ /* Build the main command buffer. */
+ BEGIN_CB(rs->cb_main, RS_STATE_MAIN_SIZE);
+ OUT_CB_REG(R300_VAP_CNTL_STATUS, vap_control_status);
+ OUT_CB_REG(R300_GA_POINT_SIZE, point_size);
+ OUT_CB_REG_SEQ(R300_GA_POINT_MINMAX, 2);
+ OUT_CB(point_minmax);
+ OUT_CB(line_control);
+ OUT_CB_REG_SEQ(R300_SU_POLY_OFFSET_ENABLE, 2);
+ OUT_CB(polygon_offset_enable);
+ rs->cull_mode_index = 9;
+ OUT_CB(cull_mode);
+ OUT_CB_REG(R300_GA_LINE_STIPPLE_CONFIG, line_stipple_config);
+ OUT_CB_REG(R300_GA_LINE_STIPPLE_VALUE, line_stipple_value);
+ OUT_CB_REG(R300_GA_POLY_MODE, polygon_mode);
+ OUT_CB_REG(R300_SC_CLIP_RULE, clip_rule);
+ OUT_CB_REG_SEQ(R300_GA_POINT_S0, 4);
+ OUT_CB_32F(point_texcoord_left);
+ OUT_CB_32F(point_texcoord_bottom);
+ OUT_CB_32F(point_texcoord_right);
+ OUT_CB_32F(point_texcoord_top);
+ END_CB;
+
+ /* Build the two command buffers for polygon offset setup. */
+ if (polygon_offset_enable) {
+ float scale = state->offset_scale * 12;
+ float offset = state->offset_units * 4;
+
+ BEGIN_CB(rs->cb_poly_offset_zb16, 5);
+ OUT_CB_REG_SEQ(R300_SU_POLY_OFFSET_FRONT_SCALE, 4);
+ OUT_CB_32F(scale);
+ OUT_CB_32F(offset);
+ OUT_CB_32F(scale);
+ OUT_CB_32F(offset);
+ END_CB;
+
+ offset = state->offset_units * 2;
+
+ BEGIN_CB(rs->cb_poly_offset_zb24, 5);
+ OUT_CB_REG_SEQ(R300_SU_POLY_OFFSET_FRONT_SCALE, 4);
+ OUT_CB_32F(scale);
+ OUT_CB_32F(offset);
+ OUT_CB_32F(scale);
+ OUT_CB_32F(offset);
+ END_CB;
+ }
+
return (void*)rs;
}
@@ -889,24 +1116,27 @@ static void r300_bind_rs_state(struct pipe_context* pipe, void* state)
struct r300_context* r300 = r300_context(pipe);
struct r300_rs_state* rs = (struct r300_rs_state*)state;
int last_sprite_coord_enable = r300->sprite_coord_enable;
+ boolean last_two_sided_color = r300->two_sided_color;
- if (r300->draw) {
- draw_flush(r300->draw);
- draw_set_rasterizer_state(r300->draw, &rs->rs, state);
+ if (r300->draw && rs) {
+ draw_set_rasterizer_state(r300->draw, &rs->rs_draw, state);
}
if (rs) {
- r300->polygon_offset_enabled = rs->rs.offset_cw || rs->rs.offset_ccw;
+ r300->polygon_offset_enabled = rs->polygon_offset_enable;
r300->sprite_coord_enable = rs->rs.sprite_coord_enable;
+ r300->two_sided_color = rs->rs.light_twoside;
} else {
r300->polygon_offset_enabled = FALSE;
r300->sprite_coord_enable = 0;
+ r300->two_sided_color = FALSE;
}
UPDATE_STATE(state, r300->rs_state);
- r300->rs_state.size = 26 + (r300->polygon_offset_enabled ? 5 : 0);
+ r300->rs_state.size = RS_STATE_MAIN_SIZE + (r300->polygon_offset_enabled ? 5 : 0);
- if (last_sprite_coord_enable != r300->sprite_coord_enable) {
+ if (last_sprite_coord_enable != r300->sprite_coord_enable ||
+ last_two_sided_color != r300->two_sided_color) {
r300->rs_block_state.dirty = TRUE;
}
}
@@ -925,14 +1155,37 @@ static void*
struct r300_sampler_state* sampler = CALLOC_STRUCT(r300_sampler_state);
boolean is_r500 = r300->screen->caps.is_r500;
int lod_bias;
- union util_color uc;
sampler->state = *state;
+ /* r300 doesn't handle CLAMP and MIRROR_CLAMP correctly when either MAG
+ * or MIN filter is NEAREST. Since texwrap produces same results
+ * for CLAMP and CLAMP_TO_EDGE, we use them instead. */
+ if (sampler->state.min_img_filter == PIPE_TEX_FILTER_NEAREST ||
+ sampler->state.mag_img_filter == PIPE_TEX_FILTER_NEAREST) {
+ /* Wrap S. */
+ if (sampler->state.wrap_s == PIPE_TEX_WRAP_CLAMP)
+ sampler->state.wrap_s = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
+ else if (sampler->state.wrap_s == PIPE_TEX_WRAP_MIRROR_CLAMP)
+ sampler->state.wrap_s = PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE;
+
+ /* Wrap T. */
+ if (sampler->state.wrap_t == PIPE_TEX_WRAP_CLAMP)
+ sampler->state.wrap_t = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
+ else if (sampler->state.wrap_t == PIPE_TEX_WRAP_MIRROR_CLAMP)
+ sampler->state.wrap_t = PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE;
+
+ /* Wrap R. */
+ if (sampler->state.wrap_r == PIPE_TEX_WRAP_CLAMP)
+ sampler->state.wrap_r = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
+ else if (sampler->state.wrap_r == PIPE_TEX_WRAP_MIRROR_CLAMP)
+ sampler->state.wrap_r = PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE;
+ }
+
sampler->filter0 |=
- (r300_translate_wrap(state->wrap_s) << R300_TX_WRAP_S_SHIFT) |
- (r300_translate_wrap(state->wrap_t) << R300_TX_WRAP_T_SHIFT) |
- (r300_translate_wrap(state->wrap_r) << R300_TX_WRAP_R_SHIFT);
+ (r300_translate_wrap(sampler->state.wrap_s) << R300_TX_WRAP_S_SHIFT) |
+ (r300_translate_wrap(sampler->state.wrap_t) << R300_TX_WRAP_T_SHIFT) |
+ (r300_translate_wrap(sampler->state.wrap_r) << R300_TX_WRAP_R_SHIFT);
sampler->filter0 |= r300_translate_tex_filters(state->min_img_filter,
state->mag_img_filter,
@@ -943,12 +1196,12 @@ static void*
/* Unfortunately, r300-r500 don't support floating-point mipmap lods. */
/* We must pass these to the merge function to clamp them properly. */
- sampler->min_lod = MAX2((unsigned)state->min_lod, 0);
- sampler->max_lod = MAX2((unsigned)ceilf(state->max_lod), 0);
+ sampler->min_lod = (unsigned)MAX2(state->min_lod, 0);
+ sampler->max_lod = (unsigned)MAX2(ceilf(state->max_lod), 0);
- lod_bias = CLAMP((int)(state->lod_bias * 32), -(1 << 9), (1 << 9) - 1);
+ lod_bias = CLAMP((int)(state->lod_bias * 32 + 1), -(1 << 9), (1 << 9) - 1);
- sampler->filter1 |= lod_bias << R300_LOD_BIAS_SHIFT;
+ sampler->filter1 |= (lod_bias << R300_LOD_BIAS_SHIFT) & R300_LOD_BIAS_MASK;
/* This is very high quality anisotropic filtering for R5xx.
* It's good for benchmarking the performance of texturing but
@@ -958,9 +1211,6 @@ static void*
sampler->filter1 |= r500_anisotropy(state->max_anisotropy);
}
- util_pack_color(state->border_color, PIPE_FORMAT_B8G8R8A8_UNORM, &uc);
- sampler->border_color = uc.ui;
-
/* R500-specific fixups and optimizations */
if (r300->screen->caps.is_r500) {
sampler->filter1 |= R500_BORDER_FIX;
@@ -999,6 +1249,31 @@ static void r300_delete_sampler_state(struct pipe_context* pipe, void* state)
FREE(state);
}
+static uint32_t r300_assign_texture_cache_region(unsigned index, unsigned num)
+{
+ /* This looks like a hack, but I believe it's suppose to work like
+ * that. To illustrate how this works, let's assume you have 5 textures.
+ * From docs, 5 and the successive numbers are:
+ *
+ * FOURTH_1 = 5
+ * FOURTH_2 = 6
+ * FOURTH_3 = 7
+ * EIGHTH_0 = 8
+ * EIGHTH_1 = 9
+ *
+ * First 3 textures will get 3/4 of size of the cache, divived evenly
+ * between them. The last 1/4 of the cache must be divided between
+ * the last 2 textures, each will therefore get 1/8 of the cache.
+ * Why not just to use "5 + texture_index" ?
+ *
+ * This simple trick works for all "num" <= 16.
+ */
+ if (num <= 1)
+ return R300_TX_CACHE(R300_TX_CACHE_WHOLE);
+ else
+ return R300_TX_CACHE(num + index);
+}
+
static void r300_set_fragment_sampler_views(struct pipe_context* pipe,
unsigned count,
struct pipe_sampler_view** views)
@@ -1007,7 +1282,7 @@ static void r300_set_fragment_sampler_views(struct pipe_context* pipe,
struct r300_textures_state* state =
(struct r300_textures_state*)r300->textures_state.state;
struct r300_texture *texture;
- unsigned i;
+ unsigned i, real_num_views = 0, view_index = 0;
unsigned tex_units = r300->screen->caps.num_tex_units;
boolean dirty_tex = FALSE;
@@ -1015,6 +1290,12 @@ static void r300_set_fragment_sampler_views(struct pipe_context* pipe,
return;
}
+ /* Calculate the real number of views. */
+ for (i = 0; i < count; i++) {
+ if (views[i])
+ real_num_views++;
+ }
+
for (i = 0; i < count; i++) {
if (&state->sampler_views[i]->base != views[i]) {
pipe_sampler_view_reference(
@@ -1031,9 +1312,13 @@ static void r300_set_fragment_sampler_views(struct pipe_context* pipe,
/* Set the texrect factor in the fragment shader.
* Needed for RECT and NPOT fallback. */
texture = r300_texture(views[i]->texture);
- if (texture->uses_pitch) {
+ if (texture->desc.is_npot) {
r300->fs_rc_constant_state.dirty = TRUE;
}
+
+ state->sampler_views[i]->texcache_region =
+ r300_assign_texture_cache_region(view_index, real_num_views);
+ view_index++;
}
}
@@ -1061,7 +1346,7 @@ r300_create_sampler_view(struct pipe_context *pipe,
{
struct r300_sampler_view *view = CALLOC_STRUCT(r300_sampler_view);
struct r300_texture *tex = r300_texture(texture);
- unsigned char swizzle[4];
+ boolean is_r500 = r300_screen(pipe->screen)->caps.is_r500;
if (view) {
view->base = *templ;
@@ -1070,17 +1355,16 @@ r300_create_sampler_view(struct pipe_context *pipe,
view->base.texture = NULL;
pipe_resource_reference(&view->base.texture, texture);
- swizzle[0] = templ->swizzle_r;
- swizzle[1] = templ->swizzle_g;
- swizzle[2] = templ->swizzle_b;
- swizzle[3] = templ->swizzle_a;
+ view->swizzle[0] = templ->swizzle_r;
+ view->swizzle[1] = templ->swizzle_g;
+ view->swizzle[2] = templ->swizzle_b;
+ view->swizzle[3] = templ->swizzle_a;
- /* XXX Enable swizzles when they become supported. Now we get RGBA
- * everywhere. And do testing! */
view->format = tex->tx_format;
view->format.format1 |= r300_translate_texformat(templ->format,
- 0); /*swizzle);*/
- if (r300_screen(pipe->screen)->caps.is_r500) {
+ view->swizzle,
+ is_r500);
+ if (is_r500) {
view->format.format2 |= r500_tx_format_msb_bit(templ->format);
}
}
@@ -1116,6 +1400,12 @@ static void r300_set_viewport_state(struct pipe_context* pipe,
r300->viewport = *state;
+ if (r300->draw) {
+ draw_set_viewport_state(r300->draw, state);
+ viewport->vte_control = R300_VTX_XY_FMT | R300_VTX_Z_FMT;
+ return;
+ }
+
/* Do the transform in HW. */
viewport->vte_control = R300_VTX_W0_FMT;
@@ -1165,48 +1455,60 @@ static void r300_set_vertex_buffers(struct pipe_context* pipe,
return;
}
- /* Check if the stride is aligned to the size of DWORD. */
- for (i = 0; i < count; i++) {
- if (buffers[i].buffer) {
- if (buffers[i].stride % 4 != 0) {
- // XXX Shouldn't we align the buffer?
- fprintf(stderr, "r300: set_vertex_buffers: "
- "Unaligned buffer stride %i isn't supported.\n",
- buffers[i].stride);
- abort();
+ if (r300->screen->caps.has_tcl) {
+ /* HW TCL. */
+ r300->incompatible_vb_layout = FALSE;
+
+ /* Check if the strides and offsets are aligned to the size of DWORD. */
+ for (i = 0; i < count; i++) {
+ if (buffers[i].buffer) {
+ if (buffers[i].stride % 4 != 0 ||
+ buffers[i].buffer_offset % 4 != 0) {
+ r300->incompatible_vb_layout = TRUE;
+ break;
+ }
}
}
- }
- for (i = 0; i < count; i++) {
- /* Why, yes, I AM casting away constness. How did you know? */
- vbo = (struct pipe_vertex_buffer*)&buffers[i];
+ for (i = 0; i < count; i++) {
+ /* Why, yes, I AM casting away constness. How did you know? */
+ vbo = (struct pipe_vertex_buffer*)&buffers[i];
- /* Reference our buffer. */
- pipe_resource_reference(&r300->vertex_buffer[i].buffer, vbo->buffer);
+ /* Skip NULL buffers */
+ if (!buffers[i].buffer) {
+ continue;
+ }
- /* Skip NULL buffers */
- if (!buffers[i].buffer) {
- continue;
- }
+ if (r300_buffer_is_user_buffer(vbo->buffer)) {
+ any_user_buffer = TRUE;
+ }
- if (r300_buffer_is_user_buffer(vbo->buffer)) {
- any_user_buffer = TRUE;
- }
+ if (vbo->max_index == ~0) {
+ /* if no VBO stride then only one vertex value so max index is 1 */
+ /* should think about converting to VS constants like svga does */
+ if (!vbo->stride)
+ vbo->max_index = 1;
+ else
+ vbo->max_index =
+ (vbo->buffer->width0 - vbo->buffer_offset) / vbo->stride;
+ }
- if (vbo->max_index == ~0) {
- /* if no VBO stride then only one vertex value so max index is 1 */
- /* should think about converting to VS constants like svga does */
- if (!vbo->stride)
- vbo->max_index = 1;
- else
- vbo->max_index =
- (vbo->buffer->width0 - vbo->buffer_offset) / vbo->stride;
+ max_index = MIN2(vbo->max_index, max_index);
}
- max_index = MIN2(vbo->max_index, max_index);
+ r300->any_user_vbs = any_user_buffer;
+ r300->vertex_buffer_max_index = max_index;
+
+ } else {
+ /* SW TCL. */
+ draw_set_vertex_buffers(r300->draw, count, buffers);
}
+ /* Common code. */
+ for (i = 0; i < count; i++) {
+ /* Reference our buffer. */
+ pipe_resource_reference(&r300->vertex_buffer[i].buffer, buffers[i].buffer);
+ }
for (; i < r300->vertex_buffer_count; i++) {
/* Dereference any old buffers. */
pipe_resource_reference(&r300->vertex_buffer[i].buffer, NULL);
@@ -1214,18 +1516,32 @@ static void r300_set_vertex_buffers(struct pipe_context* pipe,
memcpy(r300->vertex_buffer, buffers,
sizeof(struct pipe_vertex_buffer) * count);
-
r300->vertex_buffer_count = count;
- r300->vertex_buffer_max_index = max_index;
- r300->any_user_vbs = any_user_buffer;
+}
- if (r300->draw) {
- draw_flush(r300->draw);
- draw_set_vertex_buffers(r300->draw, count, buffers);
+static void r300_set_index_buffer(struct pipe_context* pipe,
+ const struct pipe_index_buffer *ib)
+{
+ struct r300_context* r300 = r300_context(pipe);
+
+ if (ib) {
+ pipe_resource_reference(&r300->index_buffer.buffer, ib->buffer);
+ memcpy(&r300->index_buffer, ib, sizeof(r300->index_buffer));
+ }
+ else {
+ pipe_resource_reference(&r300->index_buffer.buffer, NULL);
+ memset(&r300->index_buffer, 0, sizeof(r300->index_buffer));
+ }
+
+ if (r300->screen->caps.has_tcl) {
+ /* TODO make this more like a state */
+ }
+ else {
+ draw_set_index_buffer(r300->draw, ib);
}
}
-/* Update the PSC tables. */
+/* Initialize the PSC tables. */
static void r300_vertex_psc(struct r300_vertex_element_state *velems)
{
struct r300_vertex_stream_state *vstream = &velems->vertex_stream;
@@ -1243,10 +1559,17 @@ static void r300_vertex_psc(struct r300_vertex_element_state *velems)
* so PSC should just route stuff based on the vertex elements,
* and not on attrib information. */
for (i = 0; i < velems->count; i++) {
- format = velems->velem[i].src_format;
+ format = velems->hw_format[i];
+
+ type = r300_translate_vertex_data_type(format);
+ if (type == R300_INVALID_FORMAT) {
+ fprintf(stderr, "r300: Bad vertex format %s.\n",
+ util_format_short_name(format));
+ assert(0);
+ abort();
+ }
- type = r300_translate_vertex_data_type(format) |
- (i << R300_DST_VEC_LOC_SHIFT);
+ type |= i << R300_DST_VEC_LOC_SHIFT;
swizzle = r300_translate_vertex_data_swizzle(format);
if (i & 1) {
@@ -1268,12 +1591,15 @@ static void r300_vertex_psc(struct r300_vertex_element_state *velems)
vstream->count = (i >> 1) + 1;
}
+#define FORMAT_REPLACE(what, withwhat) \
+ case PIPE_FORMAT_##what: *format = PIPE_FORMAT_##withwhat; break
+
static void* r300_create_vertex_elements_state(struct pipe_context* pipe,
unsigned count,
const struct pipe_vertex_element* attribs)
{
struct r300_vertex_element_state *velems;
- unsigned i, size;
+ unsigned i;
enum pipe_format *format;
assert(count <= PIPE_MAX_ATTRIBS);
@@ -1283,88 +1609,69 @@ static void* r300_create_vertex_elements_state(struct pipe_context* pipe,
memcpy(velems->velem, attribs, sizeof(struct pipe_vertex_element) * count);
if (r300_screen(pipe->screen)->caps.has_tcl) {
- r300_vertex_psc(velems);
-
- /* Check if the format is aligned to the size of DWORD.
- * We only care about the blocksizes of the formats since
- * swizzles are already set up. */
+ /* Set the best hw format in case the original format is not
+ * supported by hw. */
for (i = 0; i < count; i++) {
- format = &velems->velem[i].src_format;
+ velems->hw_format[i] = velems->velem[i].src_format;
+ format = &velems->hw_format[i];
- /* Replace some formats with their aligned counterparts,
- * this is OK because we check for aligned strides too. */
+ /* This is basically the list of unsupported formats.
+ * For now we don't care about the alignment, that's going to
+ * be sorted out after the PSC setup. */
switch (*format) {
- /* Align to RGBA8. */
- case PIPE_FORMAT_R8_UNORM:
- case PIPE_FORMAT_R8G8_UNORM:
- case PIPE_FORMAT_R8G8B8_UNORM:
- *format = PIPE_FORMAT_R8G8B8A8_UNORM;
- continue;
- case PIPE_FORMAT_R8_SNORM:
- case PIPE_FORMAT_R8G8_SNORM:
- case PIPE_FORMAT_R8G8B8_SNORM:
- *format = PIPE_FORMAT_R8G8B8A8_SNORM;
- continue;
- case PIPE_FORMAT_R8_USCALED:
- case PIPE_FORMAT_R8G8_USCALED:
- case PIPE_FORMAT_R8G8B8_USCALED:
- *format = PIPE_FORMAT_R8G8B8A8_USCALED;
- continue;
- case PIPE_FORMAT_R8_SSCALED:
- case PIPE_FORMAT_R8G8_SSCALED:
- case PIPE_FORMAT_R8G8B8_SSCALED:
- *format = PIPE_FORMAT_R8G8B8A8_SSCALED;
- continue;
-
- /* Align to RG16. */
- case PIPE_FORMAT_R16_UNORM:
- *format = PIPE_FORMAT_R16G16_UNORM;
- continue;
- case PIPE_FORMAT_R16_SNORM:
- *format = PIPE_FORMAT_R16G16_SNORM;
- continue;
- case PIPE_FORMAT_R16_USCALED:
- *format = PIPE_FORMAT_R16G16_USCALED;
- continue;
- case PIPE_FORMAT_R16_SSCALED:
- *format = PIPE_FORMAT_R16G16_SSCALED;
- continue;
- case PIPE_FORMAT_R16_FLOAT:
- *format = PIPE_FORMAT_R16G16_FLOAT;
- continue;
-
- /* Align to RGBA16. */
- case PIPE_FORMAT_R16G16B16_UNORM:
- *format = PIPE_FORMAT_R16G16B16A16_UNORM;
- continue;
- case PIPE_FORMAT_R16G16B16_SNORM:
- *format = PIPE_FORMAT_R16G16B16A16_SNORM;
- continue;
- case PIPE_FORMAT_R16G16B16_USCALED:
- *format = PIPE_FORMAT_R16G16B16A16_USCALED;
- continue;
- case PIPE_FORMAT_R16G16B16_SSCALED:
- *format = PIPE_FORMAT_R16G16B16A16_SSCALED;
- continue;
- case PIPE_FORMAT_R16G16B16_FLOAT:
- *format = PIPE_FORMAT_R16G16B16A16_FLOAT;
- continue;
+ FORMAT_REPLACE(R64_FLOAT, R32_FLOAT);
+ FORMAT_REPLACE(R64G64_FLOAT, R32G32_FLOAT);
+ FORMAT_REPLACE(R64G64B64_FLOAT, R32G32B32_FLOAT);
+ FORMAT_REPLACE(R64G64B64A64_FLOAT, R32G32B32A32_FLOAT);
+
+ FORMAT_REPLACE(R32_UNORM, R32_FLOAT);
+ FORMAT_REPLACE(R32G32_UNORM, R32G32_FLOAT);
+ FORMAT_REPLACE(R32G32B32_UNORM, R32G32B32_FLOAT);
+ FORMAT_REPLACE(R32G32B32A32_UNORM, R32G32B32A32_FLOAT);
+
+ FORMAT_REPLACE(R32_USCALED, R32_FLOAT);
+ FORMAT_REPLACE(R32G32_USCALED, R32G32_FLOAT);
+ FORMAT_REPLACE(R32G32B32_USCALED, R32G32B32_FLOAT);
+ FORMAT_REPLACE(R32G32B32A32_USCALED,R32G32B32A32_FLOAT);
+
+ FORMAT_REPLACE(R32_SNORM, R32_FLOAT);
+ FORMAT_REPLACE(R32G32_SNORM, R32G32_FLOAT);
+ FORMAT_REPLACE(R32G32B32_SNORM, R32G32B32_FLOAT);
+ FORMAT_REPLACE(R32G32B32A32_SNORM, R32G32B32A32_FLOAT);
+
+ FORMAT_REPLACE(R32_SSCALED, R32_FLOAT);
+ FORMAT_REPLACE(R32G32_SSCALED, R32G32_FLOAT);
+ FORMAT_REPLACE(R32G32B32_SSCALED, R32G32B32_FLOAT);
+ FORMAT_REPLACE(R32G32B32A32_SSCALED,R32G32B32A32_FLOAT);
+
+ FORMAT_REPLACE(R32_FIXED, R32_FLOAT);
+ FORMAT_REPLACE(R32G32_FIXED, R32G32_FLOAT);
+ FORMAT_REPLACE(R32G32B32_FIXED, R32G32B32_FLOAT);
+ FORMAT_REPLACE(R32G32B32A32_FIXED, R32G32B32A32_FLOAT);
default:;
}
- size = util_format_get_blocksize(*format);
-
- if (size % 4 != 0) {
- /* XXX Shouldn't we align the format? */
- fprintf(stderr, "r300_create_vertex_elements_state: "
- "Unaligned format %s:%i isn't supported\n",
- util_format_name(*format), size);
- assert(0);
- abort();
- }
+ velems->incompatible_layout =
+ velems->incompatible_layout ||
+ velems->velem[i].src_format != velems->hw_format[i] ||
+ velems->velem[i].src_offset % 4 != 0;
}
+ /* Now setup PSC.
+ * The unused components will be replaced by (..., 0, 1). */
+ r300_vertex_psc(velems);
+
+ /* Align the formats to the size of DWORD.
+ * We only care about the blocksizes of the formats since
+ * swizzles are already set up.
+ * Also compute the vertex size. */
+ for (i = 0; i < count; i++) {
+ /* This is OK because we check for aligned strides too. */
+ velems->hw_format_size[i] =
+ align(util_format_get_blocksize(velems->hw_format[i]), 4);
+ velems->vertex_size_dwords += velems->hw_format_size[i] / 4;
+ }
}
}
return velems;
@@ -1383,8 +1690,8 @@ static void r300_bind_vertex_elements_state(struct pipe_context *pipe,
r300->velems = velems;
if (r300->draw) {
- draw_flush(r300->draw);
draw_set_vertex_elements(r300->draw, velems->count, velems->velem);
+ return;
}
UPDATE_STATE(&velems->vertex_stream, r300->vertex_stream_state);
@@ -1400,7 +1707,6 @@ static void* r300_create_vs_state(struct pipe_context* pipe,
const struct pipe_shader_state* shader)
{
struct r300_context* r300 = r300_context(pipe);
-
struct r300_vertex_shader* vs = CALLOC_STRUCT(r300_vertex_shader);
/* Copy state directly into shader. */
@@ -1408,9 +1714,10 @@ static void* r300_create_vs_state(struct pipe_context* pipe,
vs->state.tokens = tgsi_dup_tokens(shader->tokens);
if (r300->screen->caps.has_tcl) {
- r300_translate_vertex_shader(r300, vs, vs->state.tokens);
+ r300_init_vs_outputs(vs);
+ r300_translate_vertex_shader(r300, vs);
} else {
- vs->draw_vs = draw_create_vertex_shader(r300->draw, shader);
+ r300_draw_init_vertex_shader(r300->draw, vs);
}
return vs;
@@ -1430,22 +1737,16 @@ static void r300_bind_vs_state(struct pipe_context* pipe, void* shader)
}
r300->vs_state.state = vs;
- // VS output mapping for HWTCL or stream mapping for SWTCL to the RS block
- if (r300->fs.state) {
- r300_vertex_shader_setup_wpos(r300);
- }
- memcpy(r300->vap_output_state.state, &vs->vap_out,
- sizeof(struct r300_vap_output_state));
- r300->vap_output_state.dirty = TRUE;
-
/* The majority of the RS block bits is dependent on the vertex shader. */
r300->rs_block_state.dirty = TRUE; /* Will be updated before the emission. */
if (r300->screen->caps.has_tcl) {
+ unsigned fc_op_dwords = r300->screen->caps.is_r500 ? 3 : 2;
r300->vs_state.dirty = TRUE;
r300->vs_state.size =
vs->code.length + 9 +
- (vs->immediates_count ? vs->immediates_count * 4 + 3 : 0);
+ (vs->immediates_count ? vs->immediates_count * 4 + 3 : 0) +
+ (vs->code.num_fc_ops ? vs->code.num_fc_ops * fc_op_dwords + 4 : 0);
if (vs->externals_count) {
r300->vs_constants.dirty = TRUE;
@@ -1454,9 +1755,11 @@ static void r300_bind_vs_state(struct pipe_context* pipe, void* shader)
r300->vs_constants.size = 0;
}
+ ((struct r300_constant_buffer*)r300->vs_constants.state)->remap_table =
+ vs->code.constants_remap_table;
+
r300->pvs_flush.dirty = TRUE;
} else {
- draw_flush(r300->draw);
draw_bind_vertex_shader(r300->draw,
(struct draw_vertex_shader*)vs->draw_vs);
}
@@ -1469,6 +1772,8 @@ static void r300_delete_vs_state(struct pipe_context* pipe, void* shader)
if (r300->screen->caps.has_tcl) {
rc_constants_destroy(&vs->code.constants);
+ if (vs->code.constants_remap_table)
+ FREE(vs->code.constants_remap_table);
} else {
draw_delete_vertex_shader(r300->draw,
(struct draw_vertex_shader*)vs->draw_vs);
@@ -1484,22 +1789,14 @@ static void r300_set_constant_buffer(struct pipe_context *pipe,
{
struct r300_context* r300 = r300_context(pipe);
struct r300_constant_buffer *cbuf;
- struct pipe_transfer *tr;
- void *mapped;
- int max_size = 0;
+ uint32_t *mapped = r300_buffer(buf)->user_buffer;
switch (shader) {
case PIPE_SHADER_VERTEX:
cbuf = (struct r300_constant_buffer*)r300->vs_constants.state;
- max_size = 256;
break;
case PIPE_SHADER_FRAGMENT:
cbuf = (struct r300_constant_buffer*)r300->fs_constants.state;
- if (r300->screen->caps.is_r500) {
- max_size = 256;
- } else {
- max_size = 32;
- }
break;
default:
assert(0);
@@ -1507,26 +1804,16 @@ static void r300_set_constant_buffer(struct pipe_context *pipe,
}
if (buf == NULL || buf->width0 == 0 ||
- (mapped = pipe_buffer_map(pipe, buf, PIPE_TRANSFER_READ, &tr)) == NULL)
- {
- cbuf->count = 0;
+ (mapped = r300_buffer(buf)->constant_buffer) == NULL) {
return;
}
- assert((buf->width0 % 4 * sizeof(float)) == 0);
-
- /* Check the size of the constant buffer. */
- /* XXX Subtract immediates and RC_STATE_* variables. */
- if (buf->width0 > (sizeof(float) * 4 * max_size)) {
- fprintf(stderr, "r300: Max size of the constant buffer is "
- "%i*4 floats.\n", max_size);
- abort();
+ if (shader == PIPE_SHADER_FRAGMENT ||
+ (shader == PIPE_SHADER_VERTEX && r300->screen->caps.has_tcl)) {
+ assert((buf->width0 % (4 * sizeof(float))) == 0);
+ cbuf->ptr = mapped + index*4;
}
- memcpy(cbuf->constants, mapped, buf->width0);
- cbuf->count = buf->width0 / (4 * sizeof(float));
- pipe_buffer_unmap(pipe, buf, tr);
-
if (shader == PIPE_SHADER_VERTEX) {
if (r300->screen->caps.has_tcl) {
if (r300->vs_constants.size) {
@@ -1535,8 +1822,7 @@ static void r300_set_constant_buffer(struct pipe_context *pipe,
r300->pvs_flush.dirty = TRUE;
} else if (r300->draw) {
draw_set_mapped_constant_buffer(r300->draw, PIPE_SHADER_VERTEX,
- 0, cbuf->constants,
- buf->width0);
+ 0, mapped, buf->width0);
}
} else if (shader == PIPE_SHADER_FRAGMENT) {
r300->fs_constants.dirty = TRUE;
@@ -1552,6 +1838,7 @@ void r300_init_state_functions(struct r300_context* r300)
r300->context.set_blend_color = r300_set_blend_color;
r300->context.set_clip_state = r300_set_clip_state;
+ r300->context.set_sample_mask = r300_set_sample_mask;
r300->context.set_constant_buffer = r300_set_constant_buffer;
@@ -1587,6 +1874,7 @@ void r300_init_state_functions(struct r300_context* r300)
r300->context.set_viewport_state = r300_set_viewport_state;
r300->context.set_vertex_buffers = r300_set_vertex_buffers;
+ r300->context.set_index_buffer = r300_set_index_buffer;
r300->context.create_vertex_elements_state = r300_create_vertex_elements_state;
r300->context.bind_vertex_elements_state = r300_bind_vertex_elements_state;
diff --git a/src/gallium/drivers/r300/r300_state.h b/src/gallium/drivers/r300/r300_state.h
deleted file mode 100644
index 1d557506cf3..00000000000
--- a/src/gallium/drivers/r300/r300_state.h
+++ /dev/null
@@ -1,30 +0,0 @@
-/*
- * Copyright 2008 Marek Olšák <[email protected]>
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * on the rights to use, copy, modify, merge, publish, distribute, sub
- * license, and/or sell copies of the Software, and to permit persons to whom
- * the Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE. */
-
-#ifndef R300_STATE_H
-#define R300_STATE_H
-
-struct r300_context;
-
-void r300_mark_fs_code_dirty(struct r300_context *r300);
-
-#endif /* R300_STATE_H */
diff --git a/src/gallium/drivers/r300/r300_state_derived.c b/src/gallium/drivers/r300/r300_state_derived.c
index 10ea579be3b..904736ef06d 100644
--- a/src/gallium/drivers/r300/r300_state_derived.c
+++ b/src/gallium/drivers/r300/r300_state_derived.c
@@ -31,9 +31,9 @@
#include "r300_hyperz.h"
#include "r300_screen.h"
#include "r300_shader_semantics.h"
-#include "r300_state.h"
#include "r300_state_derived.h"
#include "r300_state_inlines.h"
+#include "r300_texture.h"
#include "r300_vs.h"
/* r300_state_derived: Various bits of state which are dependent upon
@@ -43,6 +43,12 @@ enum r300_rs_swizzle {
SWIZ_XYZW = 0,
SWIZ_X001,
SWIZ_XY01,
+ SWIZ_0001,
+};
+
+enum r300_rs_col_write_type {
+ WRITE_COLOR = 0,
+ WRITE_FACE
};
static void r300_draw_emit_attrib(struct r300_context* r300,
@@ -88,14 +94,21 @@ static void r300_draw_emit_all_attribs(struct r300_context* r300)
}
}
- /* XXX Back-face colors. */
+ /* Back-face colors. */
+ for (i = 0; i < ATTR_COLOR_COUNT; i++) {
+ if (vs_outputs->bcolor[i] != ATTR_UNUSED) {
+ r300_draw_emit_attrib(r300, EMIT_4F, INTERP_LINEAR,
+ vs_outputs->bcolor[i]);
+ }
+ }
/* Texture coordinates. */
/* Only 8 generic vertex attributes can be used. If there are more,
* they won't be rasterized. */
gen_count = 0;
for (i = 0; i < ATTR_GENERIC_COUNT && gen_count < 8; i++) {
- if (vs_outputs->generic[i] != ATTR_UNUSED) {
+ if (vs_outputs->generic[i] != ATTR_UNUSED &&
+ !(r300->sprite_coord_enable & (1 << i))) {
r300_draw_emit_attrib(r300, EMIT_4F, INTERP_PERSPECTIVE,
vs_outputs->generic[i]);
gen_count++;
@@ -108,40 +121,54 @@ static void r300_draw_emit_all_attribs(struct r300_context* r300)
vs_outputs->fog);
gen_count++;
}
+
+ /* WPOS. */
+ if (r300_fs(r300)->shader->inputs.wpos != ATTR_UNUSED && gen_count < 8) {
+ DBG(r300, DBG_SWTCL, "draw_emit_attrib: WPOS, index: %i\n",
+ vs_outputs->wpos);
+ r300_draw_emit_attrib(r300, EMIT_4F, INTERP_PERSPECTIVE,
+ vs_outputs->wpos);
+ }
}
/* Update the PSC tables for SW TCL, using Draw. */
static void r300_swtcl_vertex_psc(struct r300_context *r300)
{
struct r300_vertex_stream_state *vstream = r300->vertex_stream_state.state;
- struct r300_vertex_shader* vs = r300->vs_state.state;
- struct vertex_info* vinfo = &r300->vertex_info;
+ struct vertex_info *vinfo = &r300->vertex_info;
uint16_t type, swizzle;
enum pipe_format format;
unsigned i, attrib_count;
- int* vs_output_tab = vs->stream_loc_notcl;
+ int* vs_output_tab = r300->stream_loc_notcl;
- /* XXX hax */
memset(vstream, 0, sizeof(struct r300_vertex_stream_state));
/* For each Draw attribute, route it to the fragment shader according
* to the vs_output_tab. */
attrib_count = vinfo->num_attribs;
- DBG(r300, DBG_DRAW, "r300: attrib count: %d\n", attrib_count);
+ DBG(r300, DBG_SWTCL, "r300: attrib count: %d\n", attrib_count);
for (i = 0; i < attrib_count; i++) {
- DBG(r300, DBG_DRAW, "r300: attrib: offset %d, interp %d, size %d,"
- " vs_output_tab %d\n", vinfo->attrib[i].src_index,
- vinfo->attrib[i].interp_mode, vinfo->attrib[i].emit,
- vs_output_tab[i]);
-
- /* Make sure we have a proper destination for our attribute. */
- assert(vs_output_tab[i] != -1);
+ if (vs_output_tab[i] == -1) {
+ assert(0);
+ abort();
+ }
format = draw_translate_vinfo_format(vinfo->attrib[i].emit);
+ DBG(r300, DBG_SWTCL,
+ "r300: swtcl_vertex_psc [%i] <- %s\n",
+ vs_output_tab[i], util_format_short_name(format));
+
/* Obtain the type of data in this attribute. */
- type = r300_translate_vertex_data_type(format) |
- vs_output_tab[i] << R300_DST_VEC_LOC_SHIFT;
+ type = r300_translate_vertex_data_type(format);
+ if (type == R300_INVALID_FORMAT) {
+ fprintf(stderr, "r300: Bad vertex format %s.\n",
+ util_format_short_name(format));
+ assert(0);
+ abort();
+ }
+
+ type |= vs_output_tab[i] << R300_DST_VEC_LOC_SHIFT;
/* Obtain the swizzle for this attribute. Note that the default
* swizzle in the hardware is not XYZW! */
@@ -170,10 +197,10 @@ static void r300_swtcl_vertex_psc(struct r300_context *r300)
}
static void r300_rs_col(struct r300_rs_block* rs, int id, int ptr,
- boolean swizzle_0001)
+ enum r300_rs_swizzle swiz)
{
rs->ip[id] |= R300_RS_COL_PTR(ptr);
- if (swizzle_0001) {
+ if (swiz == SWIZ_0001) {
rs->ip[id] |= R300_RS_COL_FMT(R300_RS_COL_FMT_0001);
} else {
rs->ip[id] |= R300_RS_COL_FMT(R300_RS_COL_FMT_RGBA);
@@ -181,8 +208,10 @@ static void r300_rs_col(struct r300_rs_block* rs, int id, int ptr,
rs->inst[id] |= R300_RS_INST_COL_ID(id);
}
-static void r300_rs_col_write(struct r300_rs_block* rs, int id, int fp_offset)
+static void r300_rs_col_write(struct r300_rs_block* rs, int id, int fp_offset,
+ enum r300_rs_col_write_type type)
{
+ assert(type == WRITE_COLOR);
rs->inst[id] |= R300_RS_INST_COL_CN_WRITE |
R300_RS_INST_COL_ADDR(fp_offset);
}
@@ -191,19 +220,19 @@ static void r300_rs_tex(struct r300_rs_block* rs, int id, int ptr,
enum r300_rs_swizzle swiz)
{
if (swiz == SWIZ_X001) {
- rs->ip[id] |= R300_RS_TEX_PTR(ptr*4) |
+ rs->ip[id] |= R300_RS_TEX_PTR(ptr) |
R300_RS_SEL_S(R300_RS_SEL_C0) |
R300_RS_SEL_T(R300_RS_SEL_K0) |
R300_RS_SEL_R(R300_RS_SEL_K0) |
R300_RS_SEL_Q(R300_RS_SEL_K1);
} else if (swiz == SWIZ_XY01) {
- rs->ip[id] |= R300_RS_TEX_PTR(ptr*4) |
+ rs->ip[id] |= R300_RS_TEX_PTR(ptr) |
R300_RS_SEL_S(R300_RS_SEL_C0) |
R300_RS_SEL_T(R300_RS_SEL_C1) |
R300_RS_SEL_R(R300_RS_SEL_K0) |
R300_RS_SEL_Q(R300_RS_SEL_K1);
} else {
- rs->ip[id] |= R300_RS_TEX_PTR(ptr*4) |
+ rs->ip[id] |= R300_RS_TEX_PTR(ptr) |
R300_RS_SEL_S(R300_RS_SEL_C0) |
R300_RS_SEL_T(R300_RS_SEL_C1) |
R300_RS_SEL_R(R300_RS_SEL_C2) |
@@ -219,10 +248,10 @@ static void r300_rs_tex_write(struct r300_rs_block* rs, int id, int fp_offset)
}
static void r500_rs_col(struct r300_rs_block* rs, int id, int ptr,
- boolean swizzle_0001)
+ enum r300_rs_swizzle swiz)
{
rs->ip[id] |= R500_RS_COL_PTR(ptr);
- if (swizzle_0001) {
+ if (swiz == SWIZ_0001) {
rs->ip[id] |= R500_RS_COL_FMT(R300_RS_COL_FMT_0001);
} else {
rs->ip[id] |= R500_RS_COL_FMT(R300_RS_COL_FMT_RGBA);
@@ -230,32 +259,36 @@ static void r500_rs_col(struct r300_rs_block* rs, int id, int ptr,
rs->inst[id] |= R500_RS_INST_COL_ID(id);
}
-static void r500_rs_col_write(struct r300_rs_block* rs, int id, int fp_offset)
+static void r500_rs_col_write(struct r300_rs_block* rs, int id, int fp_offset,
+ enum r300_rs_col_write_type type)
{
- rs->inst[id] |= R500_RS_INST_COL_CN_WRITE |
- R500_RS_INST_COL_ADDR(fp_offset);
+ if (type == WRITE_FACE)
+ rs->inst[id] |= R500_RS_INST_COL_CN_WRITE_BACKFACE |
+ R500_RS_INST_COL_ADDR(fp_offset);
+ else
+ rs->inst[id] |= R500_RS_INST_COL_CN_WRITE |
+ R500_RS_INST_COL_ADDR(fp_offset);
+
}
static void r500_rs_tex(struct r300_rs_block* rs, int id, int ptr,
enum r300_rs_swizzle swiz)
{
- int rs_tex_comp = ptr*4;
-
if (swiz == SWIZ_X001) {
- rs->ip[id] |= R500_RS_SEL_S(rs_tex_comp) |
+ rs->ip[id] |= R500_RS_SEL_S(ptr) |
R500_RS_SEL_T(R500_RS_IP_PTR_K0) |
R500_RS_SEL_R(R500_RS_IP_PTR_K0) |
R500_RS_SEL_Q(R500_RS_IP_PTR_K1);
} else if (swiz == SWIZ_XY01) {
- rs->ip[id] |= R500_RS_SEL_S(rs_tex_comp) |
- R500_RS_SEL_T(rs_tex_comp + 1) |
+ rs->ip[id] |= R500_RS_SEL_S(ptr) |
+ R500_RS_SEL_T(ptr + 1) |
R500_RS_SEL_R(R500_RS_IP_PTR_K0) |
R500_RS_SEL_Q(R500_RS_IP_PTR_K1);
} else {
- rs->ip[id] |= R500_RS_SEL_S(rs_tex_comp) |
- R500_RS_SEL_T(rs_tex_comp + 1) |
- R500_RS_SEL_R(rs_tex_comp + 2) |
- R500_RS_SEL_Q(rs_tex_comp + 3);
+ rs->ip[id] |= R500_RS_SEL_S(ptr) |
+ R500_RS_SEL_T(ptr + 1) |
+ R500_RS_SEL_R(ptr + 2) |
+ R500_RS_SEL_Q(ptr + 3);
}
rs->inst[id] |= R500_RS_INST_TEX_ID(id);
}
@@ -268,21 +301,30 @@ static void r500_rs_tex_write(struct r300_rs_block* rs, int id, int fp_offset)
/* Set up the RS block.
*
- * This is the part of the chipset that actually does the rasterization
- * of vertices into fragments. This is also the part of the chipset that
- * locks up if any part of it is even slightly wrong. */
-static void r300_update_rs_block(struct r300_context* r300,
- struct r300_shader_semantics* vs_outputs,
- struct r300_shader_semantics* fs_inputs)
+ * This is the part of the chipset that is responsible for linking vertex
+ * and fragment shaders and stuffed texture coordinates.
+ *
+ * The rasterizer reads data from VAP, which produces vertex shader outputs,
+ * and GA, which produces stuffed texture coordinates. VAP outputs have
+ * precedence over GA. All outputs must be rasterized otherwise it locks up.
+ * If there are more outputs rasterized than is set in VAP/GA, it locks up
+ * too. The funky part is that this info has been pretty much obtained by trial
+ * and error. */
+static void r300_update_rs_block(struct r300_context *r300)
{
- struct r300_rs_block rs = { { 0 } };
- int i, col_count = 0, tex_count = 0, fp_offset = 0, count;
- void (*rX00_rs_col)(struct r300_rs_block*, int, int, boolean);
- void (*rX00_rs_col_write)(struct r300_rs_block*, int, int);
+ struct r300_vertex_shader *vs = r300->vs_state.state;
+ struct r300_shader_semantics *vs_outputs = &vs->outputs;
+ struct r300_shader_semantics *fs_inputs = &r300_fs(r300)->shader->inputs;
+ struct r300_rs_block rs = {0};
+ int i, col_count = 0, tex_count = 0, fp_offset = 0, count, loc = 0, tex_ptr = 0;
+ void (*rX00_rs_col)(struct r300_rs_block*, int, int, enum r300_rs_swizzle);
+ void (*rX00_rs_col_write)(struct r300_rs_block*, int, int, enum r300_rs_col_write_type);
void (*rX00_rs_tex)(struct r300_rs_block*, int, int, enum r300_rs_swizzle);
void (*rX00_rs_tex_write)(struct r300_rs_block*, int, int);
boolean any_bcolor_used = vs_outputs->bcolor[0] != ATTR_UNUSED ||
vs_outputs->bcolor[1] != ATTR_UNUSED;
+ int *stream_loc_notcl = r300->stream_loc_notcl;
+ uint32_t stuffing_enable = 0;
if (r300->screen->caps.is_r500) {
rX00_rs_col = r500_rs_col;
@@ -296,18 +338,44 @@ static void r300_update_rs_block(struct r300_context* r300,
rX00_rs_tex_write = r300_rs_tex_write;
}
- /* Rasterize colors. */
+ /* 0x5555 copied from classic, which means:
+ * Select user color 0 for COLOR0 up to COLOR7.
+ * What the hell does that mean? */
+ rs.vap_vtx_state_cntl = 0x5555;
+
+ /* The position is always present in VAP. */
+ rs.vap_vsm_vtx_assm |= R300_INPUT_CNTL_POS;
+ rs.vap_out_vtx_fmt[0] |= R300_VAP_OUTPUT_VTX_FMT_0__POS_PRESENT;
+ stream_loc_notcl[loc++] = 0;
+
+ /* Set up the point size in VAP. */
+ if (vs_outputs->psize != ATTR_UNUSED) {
+ rs.vap_out_vtx_fmt[0] |= R300_VAP_OUTPUT_VTX_FMT_0__PT_SIZE_PRESENT;
+ stream_loc_notcl[loc++] = 1;
+ }
+
+ /* Set up and rasterize colors. */
for (i = 0; i < ATTR_COLOR_COUNT; i++) {
if (vs_outputs->color[i] != ATTR_UNUSED || any_bcolor_used ||
vs_outputs->color[1] != ATTR_UNUSED) {
- /* Always rasterize if it's written by the VS,
- * otherwise it locks up. */
- rX00_rs_col(&rs, col_count, i, FALSE);
+ /* Set up the color in VAP. */
+ rs.vap_vsm_vtx_assm |= R300_INPUT_CNTL_COLOR;
+ rs.vap_out_vtx_fmt[0] |=
+ R300_VAP_OUTPUT_VTX_FMT_0__COLOR_0_PRESENT << i;
+ stream_loc_notcl[loc++] = 2 + i;
+
+ /* Rasterize it. */
+ rX00_rs_col(&rs, col_count, col_count, SWIZ_XYZW);
- /* Write it to the FS input register if it's used by the FS. */
+ /* Write it to the FS input register if it's needed by the FS. */
if (fs_inputs->color[i] != ATTR_UNUSED) {
- rX00_rs_col_write(&rs, col_count, fp_offset);
+ rX00_rs_col_write(&rs, col_count, fp_offset, WRITE_COLOR);
fp_offset++;
+
+ DBG(r300, DBG_RS,
+ "r300: Rasterized color %i written to FS.\n", i);
+ } else {
+ DBG(r300, DBG_RS, "r300: Rasterized color %i unused.\n", i);
}
col_count++;
} else {
@@ -315,92 +383,254 @@ static void r300_update_rs_block(struct r300_context* r300,
/* If we try to set it to (0,0,0,1), it will lock up. */
if (fs_inputs->color[i] != ATTR_UNUSED) {
fp_offset++;
+
+ DBG(r300, DBG_RS, "r300: FS input color %i unassigned%s.\n",
+ i);
+ }
+ }
+ }
+
+ /* Set up back-face colors. The rasterizer will do the color selection
+ * automatically. */
+ if (any_bcolor_used) {
+ if (r300->two_sided_color) {
+ /* Rasterize as back-face colors. */
+ for (i = 0; i < ATTR_COLOR_COUNT; i++) {
+ rs.vap_vsm_vtx_assm |= R300_INPUT_CNTL_COLOR;
+ rs.vap_out_vtx_fmt[0] |= R300_VAP_OUTPUT_VTX_FMT_0__COLOR_0_PRESENT << (2+i);
+ stream_loc_notcl[loc++] = 4 + i;
+ }
+ } else {
+ /* Rasterize two fake texcoords to prevent from the two-sided color
+ * selection. */
+ /* XXX Consider recompiling the vertex shader to save 2 RS units. */
+ for (i = 0; i < 2; i++) {
+ rs.vap_vsm_vtx_assm |= (R300_INPUT_CNTL_TC0 << tex_count);
+ rs.vap_out_vtx_fmt[1] |= (4 << (3 * tex_count));
+ stream_loc_notcl[loc++] = 6 + tex_count;
+
+ /* Rasterize it. */
+ rX00_rs_tex(&rs, tex_count, tex_ptr, SWIZ_XYZW);
+ tex_count++;
+ tex_ptr += 4;
}
}
}
+ /* gl_FrontFacing.
+ * Note that we can use either the two-sided color selection based on
+ * the front and back vertex shader colors, or gl_FrontFacing,
+ * but not both! It locks up otherwise.
+ *
+ * In Direct3D 9, the two-sided color selection can be used
+ * with shaders 2.0 only, while gl_FrontFacing can be used
+ * with shaders 3.0 only. The hardware apparently hasn't been designed
+ * to support both at the same time. */
+ if (r300->screen->caps.is_r500 && fs_inputs->face != ATTR_UNUSED &&
+ !(any_bcolor_used && r300->two_sided_color)) {
+ rX00_rs_col(&rs, col_count, col_count, SWIZ_XYZW);
+ rX00_rs_col_write(&rs, col_count, fp_offset, WRITE_FACE);
+ fp_offset++;
+ col_count++;
+ DBG(r300, DBG_RS, "r300: Rasterized FACE written to FS.\n");
+ }
+
/* Rasterize texture coordinates. */
- for (i = 0; i < ATTR_GENERIC_COUNT; i++) {
- bool sprite_coord = !!(r300->sprite_coord_enable & (1 << i));
+ for (i = 0; i < ATTR_GENERIC_COUNT && tex_count < 8; i++) {
+ bool sprite_coord = false;
+
+ if (fs_inputs->generic[i] != ATTR_UNUSED) {
+ sprite_coord = !!(r300->sprite_coord_enable & (1 << i));
+ }
if (vs_outputs->generic[i] != ATTR_UNUSED || sprite_coord) {
- /* Always rasterize if it's written by the VS,
- * otherwise it locks up. */
- rX00_rs_tex(&rs, tex_count, tex_count,
+ if (!sprite_coord) {
+ /* Set up the texture coordinates in VAP. */
+ rs.vap_vsm_vtx_assm |= (R300_INPUT_CNTL_TC0 << tex_count);
+ rs.vap_out_vtx_fmt[1] |= (4 << (3 * tex_count));
+ stream_loc_notcl[loc++] = 6 + tex_count;
+ } else
+ stuffing_enable |=
+ R300_GB_TEX_ST << (R300_GB_TEX0_SOURCE_SHIFT + (tex_count*2));
+
+ /* Rasterize it. */
+ rX00_rs_tex(&rs, tex_count, tex_ptr,
sprite_coord ? SWIZ_XY01 : SWIZ_XYZW);
- /* Write it to the FS input register if it's used by the FS. */
+ /* Write it to the FS input register if it's needed by the FS. */
if (fs_inputs->generic[i] != ATTR_UNUSED) {
rX00_rs_tex_write(&rs, tex_count, fp_offset);
- if (sprite_coord)
- debug_printf("r300: SpriteCoord (generic index %i) is being written to reg %i\n", i, fp_offset);
fp_offset++;
+
+ DBG(r300, DBG_RS,
+ "r300: Rasterized generic %i written to FS%s in texcoord %d.\n",
+ i, sprite_coord ? " (sprite coord)" : "", tex_count);
+ } else {
+ DBG(r300, DBG_RS,
+ "r300: Rasterized generic %i unused%s.\n",
+ i, sprite_coord ? " (sprite coord)" : "");
}
tex_count++;
+ tex_ptr += sprite_coord ? 2 : 4;
} else {
/* Skip the FS input register, leave it uninitialized. */
/* If we try to set it to (0,0,0,1), it will lock up. */
if (fs_inputs->generic[i] != ATTR_UNUSED) {
fp_offset++;
+
+ DBG(r300, DBG_RS, "r300: FS input generic %i unassigned%s.\n",
+ i, sprite_coord ? " (sprite coord)" : "");
+ }
+ }
+ }
+
+ if (DBG_ON(r300, DBG_RS)) {
+ for (; i < ATTR_GENERIC_COUNT; i++) {
+ if (fs_inputs->generic[i] != ATTR_UNUSED) {
+ DBG(r300, DBG_RS,
+ "r300: FS input generic %i unassigned.\n", i);
}
}
}
/* Rasterize fog coordinates. */
- if (vs_outputs->fog != ATTR_UNUSED) {
- /* Always rasterize if it's written by the VS,
- * otherwise it locks up. */
- rX00_rs_tex(&rs, tex_count, tex_count, SWIZ_X001);
+ if (vs_outputs->fog != ATTR_UNUSED && tex_count < 8) {
+ /* Set up the fog coordinates in VAP. */
+ rs.vap_vsm_vtx_assm |= (R300_INPUT_CNTL_TC0 << tex_count);
+ rs.vap_out_vtx_fmt[1] |= (4 << (3 * tex_count));
+ stream_loc_notcl[loc++] = 6 + tex_count;
- /* Write it to the FS input register if it's used by the FS. */
+ /* Rasterize it. */
+ rX00_rs_tex(&rs, tex_count, tex_ptr, SWIZ_X001);
+
+ /* Write it to the FS input register if it's needed by the FS. */
if (fs_inputs->fog != ATTR_UNUSED) {
rX00_rs_tex_write(&rs, tex_count, fp_offset);
fp_offset++;
+
+ DBG(r300, DBG_RS, "r300: Rasterized fog written to FS.\n");
+ } else {
+ DBG(r300, DBG_RS, "r300: Rasterized fog unused.\n");
}
tex_count++;
+ tex_ptr += 4;
} else {
/* Skip the FS input register, leave it uninitialized. */
/* If we try to set it to (0,0,0,1), it will lock up. */
if (fs_inputs->fog != ATTR_UNUSED) {
fp_offset++;
+
+ DBG(r300, DBG_RS, "r300: FS input fog unassigned.\n");
}
}
/* Rasterize WPOS. */
- /* If the FS doesn't need it, it's not written by the VS. */
- if (vs_outputs->wpos != ATTR_UNUSED && fs_inputs->wpos != ATTR_UNUSED) {
- rX00_rs_tex(&rs, tex_count, tex_count, SWIZ_XYZW);
+ /* Don't set it in VAP if the FS doesn't need it. */
+ if (fs_inputs->wpos != ATTR_UNUSED && tex_count < 8) {
+ /* Set up the WPOS coordinates in VAP. */
+ rs.vap_vsm_vtx_assm |= (R300_INPUT_CNTL_TC0 << tex_count);
+ rs.vap_out_vtx_fmt[1] |= (4 << (3 * tex_count));
+ stream_loc_notcl[loc++] = 6 + tex_count;
+
+ /* Rasterize it. */
+ rX00_rs_tex(&rs, tex_count, tex_ptr, SWIZ_XYZW);
+
+ /* Write it to the FS input register. */
rX00_rs_tex_write(&rs, tex_count, fp_offset);
+ DBG(r300, DBG_RS, "r300: Rasterized WPOS written to FS.\n");
+
fp_offset++;
tex_count++;
+ tex_ptr += 4;
+ }
+
+ /* Invalidate the rest of the no-TCL (GA) stream locations. */
+ for (; loc < 16;) {
+ stream_loc_notcl[loc++] = -1;
}
/* Rasterize at least one color, or bad things happen. */
if (col_count == 0 && tex_count == 0) {
- rX00_rs_col(&rs, 0, 0, TRUE);
+ rX00_rs_col(&rs, 0, 0, SWIZ_0001);
col_count++;
+
+ DBG(r300, DBG_RS, "r300: Rasterized color 0 to prevent lockups.\n");
}
- rs.count = (tex_count*4) | (col_count << R300_IC_COUNT_SHIFT) |
+ DBG(r300, DBG_RS, "r300: --- Rasterizer status ---: colors: %i, "
+ "generics: %i.\n", col_count, tex_count);
+
+ rs.count = MIN2(tex_ptr, 32) | (col_count << R300_IC_COUNT_SHIFT) |
R300_HIRES_EN;
count = MAX3(col_count, tex_count, 1);
rs.inst_count = count - 1;
+ /* set the GB enable flags */
+ if (r300->sprite_coord_enable)
+ stuffing_enable |= R300_GB_POINT_STUFF_ENABLE;
+
+ rs.gb_enable = stuffing_enable;
+
/* Now, after all that, see if we actually need to update the state. */
if (memcmp(r300->rs_block_state.state, &rs, sizeof(struct r300_rs_block))) {
memcpy(r300->rs_block_state.state, &rs, sizeof(struct r300_rs_block));
- r300->rs_block_state.size = 5 + count*2;
+ r300->rs_block_state.size = 13 + count*2;
}
}
-/* Update the shader-dependant states. */
-static void r300_update_derived_shader_state(struct r300_context* r300)
+static uint32_t r300_get_border_color(enum pipe_format format,
+ const float border[4])
{
- struct r300_vertex_shader* vs = r300->vs_state.state;
+ const struct util_format_description *desc;
+ float border_swizzled[4] = {
+ border[2],
+ border[1],
+ border[0],
+ border[3]
+ };
+ uint32_t r;
+
+ desc = util_format_description(format);
+
+ /* We don't use util_pack_format because it does not handle the formats
+ * we want, e.g. R4G4B4A4 is non-existent in Gallium. */
+ switch (desc->channel[0].size) {
+ case 4:
+ r = ((float_to_ubyte(border_swizzled[0]) & 0xf0) >> 4) |
+ ((float_to_ubyte(border_swizzled[1]) & 0xf0) << 0) |
+ ((float_to_ubyte(border_swizzled[2]) & 0xf0) << 4) |
+ ((float_to_ubyte(border_swizzled[3]) & 0xf0) << 8);
+ break;
+
+ case 5:
+ if (desc->channel[1].size == 5) {
+ r = ((float_to_ubyte(border_swizzled[0]) & 0xf8) >> 3) |
+ ((float_to_ubyte(border_swizzled[1]) & 0xf8) << 2) |
+ ((float_to_ubyte(border_swizzled[2]) & 0xf8) << 7) |
+ ((float_to_ubyte(border_swizzled[3]) & 0x80) << 8);
+ } else if (desc->channel[1].size == 6) {
+ r = ((float_to_ubyte(border_swizzled[0]) & 0xf8) >> 3) |
+ ((float_to_ubyte(border_swizzled[1]) & 0xfc) << 3) |
+ ((float_to_ubyte(border_swizzled[2]) & 0xf8) << 8);
+ } else {
+ assert(0);
+ r = 0;
+ }
+ break;
+
+ default:
+ /* I think the fat formats (16, 32) are specified
+ * as the 8-bit ones. I am not sure how compressed formats
+ * work here. */
+ r = ((float_to_ubyte(border_swizzled[0]) & 0xff) << 0) |
+ ((float_to_ubyte(border_swizzled[1]) & 0xff) << 8) |
+ ((float_to_ubyte(border_swizzled[2]) & 0xff) << 16) |
+ ((float_to_ubyte(border_swizzled[3]) & 0xff) << 24);
+ }
- r300_update_rs_block(r300, &vs->outputs, &r300_fs(r300)->shader->inputs);
+ return r;
}
static void r300_merge_textures_and_samplers(struct r300_context* r300)
@@ -411,10 +641,14 @@ static void r300_merge_textures_and_samplers(struct r300_context* r300)
struct r300_sampler_state *sampler;
struct r300_sampler_view *view;
struct r300_texture *tex;
- unsigned min_level, max_level, i, size;
+ unsigned min_level, max_level, i, j, size;
unsigned count = MIN2(state->sampler_view_count,
state->sampler_state_count);
+ /* The KIL opcode fix, see below. */
+ if (!count && !r300->screen->caps.is_r500)
+ count = 1;
+
state->tx_enable = 0;
state->count = 0;
size = 2;
@@ -431,15 +665,70 @@ static void r300_merge_textures_and_samplers(struct r300_context* r300)
texstate->format = view->format;
texstate->filter0 = sampler->filter0;
texstate->filter1 = sampler->filter1;
- texstate->border_color = sampler->border_color;
+
+ /* Set the border color. */
+ texstate->border_color =
+ r300_get_border_color(view->base.format,
+ sampler->state.border_color);
+
+ /* determine min/max levels */
+ max_level = MIN3(sampler->max_lod + view->base.first_level,
+ tex->desc.b.b.last_level, view->base.last_level);
+ min_level = MIN2(sampler->min_lod + view->base.first_level,
+ max_level);
+
+ if (tex->desc.is_npot && min_level > 0) {
+ /* Even though we do not implement mipmapping for NPOT
+ * textures, we should at least honor the minimum level
+ * which is allowed to be displayed. We do this by setting up
+ * an i-th mipmap level as the zero level. */
+ r300_texture_setup_format_state(r300->screen, &tex->desc,
+ min_level,
+ &texstate->format);
+ texstate->format.tile_config |=
+ tex->desc.offset_in_bytes[min_level] & 0xffffffe0;
+ assert((tex->desc.offset_in_bytes[min_level] & 0x1f) == 0);
+ }
+
+ /* Assign a texture cache region. */
+ texstate->format.format1 |= view->texcache_region;
+
+ /* Depth textures are kinda special. */
+ if (util_format_is_depth_or_stencil(tex->desc.b.b.format)) {
+ unsigned char depth_swizzle[4];
+
+ if (!r300->screen->caps.is_r500 &&
+ util_format_get_blocksizebits(tex->desc.b.b.format) == 32) {
+ /* X24x8 is sampled as Y16X16 on r3xx-r4xx.
+ * The depth here is at the Y component. */
+ for (j = 0; j < 4; j++)
+ depth_swizzle[j] = UTIL_FORMAT_SWIZZLE_Y;
+ } else {
+ for (j = 0; j < 4; j++)
+ depth_swizzle[j] = UTIL_FORMAT_SWIZZLE_X;
+ }
+
+ /* If compare mode is disabled, sampler view swizzles
+ * are stored in the format.
+ * Otherwise, the swizzles must be applied after the compare
+ * mode in the fragment shader. */
+ if (sampler->state.compare_mode == PIPE_TEX_COMPARE_NONE) {
+ texstate->format.format1 |=
+ r300_get_swizzle_combined(depth_swizzle,
+ view->swizzle);
+ } else {
+ texstate->format.format1 |=
+ r300_get_swizzle_combined(depth_swizzle, 0);
+ }
+ }
/* to emulate 1D textures through 2D ones correctly */
- if (tex->b.b.target == PIPE_TEXTURE_1D) {
+ if (tex->desc.b.b.target == PIPE_TEXTURE_1D) {
texstate->filter0 &= ~R300_TX_WRAP_T_MASK;
texstate->filter0 |= R300_TX_WRAP_T(R300_TX_CLAMP_TO_EDGE);
}
- if (tex->uses_pitch) {
+ if (tex->desc.is_npot) {
/* NPOT textures don't support mip filter, unfortunately.
* This prevents incorrect rendering. */
texstate->filter0 &= ~R300_TX_MIN_FILTER_MIP_MASK;
@@ -463,12 +752,7 @@ static void r300_merge_textures_and_samplers(struct r300_context* r300)
texstate->filter0 |= R300_TX_WRAP_T(R300_TX_CLAMP_TO_EDGE);
}
} else {
- /* determine min/max levels */
/* the MAX_MIP level is the largest (finest) one */
- max_level = MIN3(sampler->max_lod + view->base.first_level,
- tex->b.b.last_level, view->base.last_level);
- min_level = MIN2(sampler->min_lod + view->base.first_level,
- max_level);
texstate->format.format0 |= R300_TX_NUM_LEVELS(max_level);
texstate->filter0 |= R300_TX_MAX_MIP_LEVEL(min_level);
}
@@ -477,6 +761,36 @@ static void r300_merge_textures_and_samplers(struct r300_context* r300)
size += 16;
state->count = i+1;
+ } else {
+ /* For the KIL opcode to work on r3xx-r4xx, the texture unit
+ * assigned to this opcode (it's always the first one) must be
+ * enabled. Otherwise the opcode doesn't work.
+ *
+ * In order to not depend on the fragment shader, we just make
+ * the first unit enabled all the time. */
+ if (i == 0 && !r300->screen->caps.is_r500) {
+ pipe_sampler_view_reference(
+ (struct pipe_sampler_view**)&state->sampler_views[i],
+ &r300->texkill_sampler->base);
+
+ state->tx_enable |= 1 << i;
+
+ texstate = &state->regs[i];
+
+ /* Just set some valid state. */
+ texstate->format = r300->texkill_sampler->format;
+ texstate->filter0 =
+ r300_translate_tex_filters(PIPE_TEX_FILTER_NEAREST,
+ PIPE_TEX_FILTER_NEAREST,
+ PIPE_TEX_FILTER_NEAREST,
+ FALSE);
+ texstate->filter1 = 0;
+ texstate->border_color = 0;
+
+ texstate->filter0 |= i << 28;
+ size += 16;
+ state->count = i+1;
+ }
}
}
@@ -491,21 +805,57 @@ static void r300_merge_textures_and_samplers(struct r300_context* r300)
}
}
+/* We can't use compressed zbuffers as samplers. */
+static void r300_flush_depth_textures(struct r300_context *r300)
+{
+ struct r300_textures_state *state =
+ (struct r300_textures_state*)r300->textures_state.state;
+ unsigned i, level;
+ unsigned count = MIN2(state->sampler_view_count,
+ state->sampler_state_count);
+
+ if (r300->z_decomp_rd)
+ return;
+
+ for (i = 0; i < count; i++)
+ if (state->sampler_views[i] && state->sampler_states[i]) {
+ struct pipe_resource *tex = state->sampler_views[i]->base.texture;
+
+ if (tex->target == PIPE_TEXTURE_3D ||
+ tex->target == PIPE_TEXTURE_CUBE)
+ continue;
+
+ /* Ignore non-depth textures.
+ * Also ignore reinterpreted depth textures, e.g. resource_copy. */
+ if (!util_format_is_depth_or_stencil(tex->format))
+ continue;
+
+ for (level = 0; level <= tex->last_level; level++)
+ if (r300_texture(tex)->zmask_in_use[level]) {
+ /* We don't handle 3D textures and cubemaps yet. */
+ r300_flush_depth_stencil(&r300->context, tex,
+ u_subresource(0, level), 0);
+ }
+ }
+}
+
void r300_update_derived_state(struct r300_context* r300)
{
+ r300_flush_depth_textures(r300);
+
if (r300->textures_state.dirty) {
r300_merge_textures_and_samplers(r300);
}
if (r300->rs_block_state.dirty) {
- r300_update_derived_shader_state(r300);
- }
+ r300_update_rs_block(r300);
- if (r300->draw) {
- memset(&r300->vertex_info, 0, sizeof(struct vertex_info));
- r300_draw_emit_all_attribs(r300);
- draw_compute_vertex_size(&r300->vertex_info);
- r300_swtcl_vertex_psc(r300);
+ if (r300->draw) {
+ memset(&r300->vertex_info, 0, sizeof(struct vertex_info));
+ r300_draw_emit_all_attribs(r300);
+ draw_compute_vertex_size(&r300->vertex_info);
+ r300_swtcl_vertex_psc(r300);
+ }
}
r300_update_hyperz_state(r300);
diff --git a/src/gallium/drivers/r300/r300_state_inlines.h b/src/gallium/drivers/r300/r300_state_inlines.h
index fcbdb91b67e..7e501221b1f 100644
--- a/src/gallium/drivers/r300/r300_state_inlines.h
+++ b/src/gallium/drivers/r300/r300_state_inlines.h
@@ -348,41 +348,43 @@ static INLINE uint32_t r300_translate_gb_pipes(int pipe_count)
switch (pipe_count) {
case 1:
return R300_GB_TILE_PIPE_COUNT_RV300;
- break;
case 2:
return R300_GB_TILE_PIPE_COUNT_R300;
- break;
case 3:
return R300_GB_TILE_PIPE_COUNT_R420_3P;
- break;
case 4:
return R300_GB_TILE_PIPE_COUNT_R420;
- break;
}
return 0;
}
+
/* Translate pipe_formats into PSC vertex types. */
static INLINE uint16_t
r300_translate_vertex_data_type(enum pipe_format format) {
uint32_t result = 0;
const struct util_format_description *desc;
+ unsigned i;
desc = util_format_description(format);
if (desc->layout != UTIL_FORMAT_LAYOUT_PLAIN) {
- fprintf(stderr, "r300: Bad format %s in %s:%d\n", util_format_name(format),
- __FUNCTION__, __LINE__);
- assert(0);
- abort();
+ return R300_INVALID_FORMAT;
}
- switch (desc->channel[0].type) {
+ /* Find the first non-VOID channel. */
+ for (i = 0; i < 4; i++) {
+ if (desc->channel[i].type != UTIL_FORMAT_TYPE_VOID) {
+ break;
+ }
+ }
+
+ switch (desc->channel[i].type) {
/* Half-floats, floats, doubles */
case UTIL_FORMAT_TYPE_FLOAT:
- switch (desc->channel[0].size) {
+ switch (desc->channel[i].size) {
case 16:
- /* XXX Supported only on RV350 and later. */
+ /* Supported only on RV350 and later. */
if (desc->nr_channels > 2) {
result = R300_DATA_TYPE_FLT16_4;
} else {
@@ -393,17 +395,14 @@ r300_translate_vertex_data_type(enum pipe_format format) {
result = R300_DATA_TYPE_FLOAT_1 + (desc->nr_channels - 1);
break;
default:
- fprintf(stderr, "r300: Bad format %s in %s:%d\n",
- util_format_name(format), __FUNCTION__, __LINE__);
- assert(0);
- abort();
+ return R300_INVALID_FORMAT;
}
break;
/* Unsigned ints */
case UTIL_FORMAT_TYPE_UNSIGNED:
/* Signed ints */
case UTIL_FORMAT_TYPE_SIGNED:
- switch (desc->channel[0].size) {
+ switch (desc->channel[i].size) {
case 8:
result = R300_DATA_TYPE_BYTE;
break;
@@ -415,25 +414,17 @@ r300_translate_vertex_data_type(enum pipe_format format) {
}
break;
default:
- fprintf(stderr, "r300: Bad format %s in %s:%d\n",
- util_format_name(format), __FUNCTION__, __LINE__);
- fprintf(stderr, "r300: desc->channel[0].size == %d\n",
- desc->channel[0].size);
- assert(0);
- abort();
+ return R300_INVALID_FORMAT;
}
break;
default:
- fprintf(stderr, "r300: Bad format %s in %s:%d\n",
- util_format_name(format), __FUNCTION__, __LINE__);
- assert(0);
- abort();
+ return R300_INVALID_FORMAT;
}
- if (desc->channel[0].type == UTIL_FORMAT_TYPE_SIGNED) {
+ if (desc->channel[i].type == UTIL_FORMAT_TYPE_SIGNED) {
result |= R300_SIGNED;
}
- if (desc->channel[0].normalized) {
+ if (desc->channel[i].normalized) {
result |= R300_NORMALIZE;
}
@@ -449,7 +440,7 @@ r300_translate_vertex_data_swizzle(enum pipe_format format) {
if (desc->layout != UTIL_FORMAT_LAYOUT_PLAIN) {
fprintf(stderr, "r300: Bad format %s in %s:%d\n",
- util_format_name(format), __FUNCTION__, __LINE__);
+ util_format_short_name(format), __FUNCTION__, __LINE__);
return 0;
}
diff --git a/src/gallium/drivers/r300/r300_state_invariant.c b/src/gallium/drivers/r300/r300_state_invariant.c
deleted file mode 100644
index cd9443fa260..00000000000
--- a/src/gallium/drivers/r300/r300_state_invariant.c
+++ /dev/null
@@ -1,121 +0,0 @@
-/*
- * Copyright 2009 Joakim Sindholt <[email protected]>
- * Corbin Simpson <[email protected]>
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * on the rights to use, copy, modify, merge, publish, distribute, sub
- * license, and/or sell copies of the Software, and to permit persons to whom
- * the Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE. */
-
-#include "r300_context.h"
-#include "r300_cs.h"
-#include "r300_reg.h"
-#include "r300_screen.h"
-#include "r300_state_invariant.h"
-
-struct pipe_viewport_state r300_viewport_identity = {
- .scale = {1.0, 1.0, 1.0, 1.0},
- .translate = {0.0, 0.0, 0.0, 0.0},
-};
-
-/* Calculate and emit invariant state. This is data that the 3D engine
- * will probably want at the beginning of every CS, but it's not currently
- * handled by any CSO setup, and in addition it doesn't really change much.
- *
- * Note that eventually this should be empty, but it's useful for development
- * and general unduplication of code. */
-void r300_emit_invariant_state(struct r300_context* r300,
- unsigned size, void* state)
-{
- CS_LOCALS(r300);
-
- if (r300->rws->get_value(r300->rws, R300_VID_DRM_2_3_0)) {
- /* Subpixel multisampling for AA. */
- BEGIN_CS(4);
- OUT_CS_REG(R300_GB_MSPOS0, 0x66666666);
- OUT_CS_REG(R300_GB_MSPOS1, 0x6666666);
- END_CS;
- }
-
- BEGIN_CS(12 + (r300->screen->caps.has_tcl ? 2 : 0));
-
- /*** Graphics Backend (GB) ***/
- /* Source of fog depth */
- OUT_CS_REG(R300_GB_SELECT, R300_GB_FOG_SELECT_1_1_W);
-
- /*** Fog (FG) ***/
- OUT_CS_REG(R300_FG_FOG_BLEND, 0x0);
- OUT_CS_REG(R300_FG_FOG_COLOR_R, 0x0);
- OUT_CS_REG(R300_FG_FOG_COLOR_G, 0x0);
- OUT_CS_REG(R300_FG_FOG_COLOR_B, 0x0);
-
- /*** VAP ***/
- /* Sign/normalize control */
- OUT_CS_REG(R300_VAP_PSC_SGN_NORM_CNTL, R300_SGN_NORM_NO_ZERO);
- /* TCL-only stuff */
- if (r300->screen->caps.has_tcl) {
- /* Amount of time to wait for vertex fetches in PVS */
- OUT_CS_REG(VAP_PVS_VTX_TIMEOUT_REG, 0xffff);
- }
-
- END_CS;
-
- /* XXX unsorted stuff from surface_fill */
- BEGIN_CS(38 + (r300->screen->caps.has_tcl ? 7 : 0) +
- (r300->screen->caps.is_rv350 ? 4 : 0));
-
- if (r300->screen->caps.has_tcl) {
- /*Flushing PVS is required before the VAP_GB registers can be changed*/
- OUT_CS_REG(R300_VAP_PVS_STATE_FLUSH_REG, 0);
- OUT_CS_REG_SEQ(R300_VAP_GB_VERT_CLIP_ADJ, 4);
- OUT_CS_32F(1.0);
- OUT_CS_32F(1.0);
- OUT_CS_32F(1.0);
- OUT_CS_32F(1.0);
- }
- /* XXX line tex stuffing */
- OUT_CS_REG_SEQ(R300_GA_LINE_S0, 1);
- OUT_CS_32F(0.0);
- OUT_CS_REG_SEQ(R300_GA_LINE_S1, 1);
- OUT_CS_32F(1.0);
- OUT_CS_REG(R300_GA_TRIANGLE_STIPPLE, 0x5 |
- (0x5 << R300_GA_TRIANGLE_STIPPLE_Y_SHIFT_SHIFT));
- /* XXX this big chunk should be refactored into rs_state */
- OUT_CS_REG(R300_GA_SOLID_RG, 0x00000000);
- OUT_CS_REG(R300_GA_SOLID_BA, 0x00000000);
- OUT_CS_REG(R300_GA_ROUND_MODE, 0x00000001);
- OUT_CS_REG(R300_GA_OFFSET, 0x00000000);
- OUT_CS_REG(R300_GA_FOG_SCALE, 0x3DBF1412);
- OUT_CS_REG(R300_GA_FOG_OFFSET, 0x00000000);
- OUT_CS_REG(R300_SU_TEX_WRAP, 0x00000000);
- OUT_CS_REG(R300_SU_DEPTH_SCALE, 0x4B7FFFFF);
- OUT_CS_REG(R300_SU_DEPTH_OFFSET, 0x00000000);
- OUT_CS_REG(R300_SC_HYPERZ, 0x0000001C);
- OUT_CS_REG(R300_SC_EDGERULE, 0x2DA49525);
- OUT_CS_REG(R300_RB3D_AARESOLVE_CTL, 0x00000000);
-
- if (r300->screen->caps.is_rv350) {
- OUT_CS_REG(R500_RB3D_DISCARD_SRC_PIXEL_LTE_THRESHOLD, 0x01010101);
- OUT_CS_REG(R500_RB3D_DISCARD_SRC_PIXEL_GTE_THRESHOLD, 0xFEFEFEFE);
- }
-
- OUT_CS_REG(R300_ZB_BW_CNTL, 0x00000000);
- OUT_CS_REG(R300_ZB_DEPTHCLEARVALUE, 0x00000000);
- OUT_CS_REG(R300_ZB_HIZ_OFFSET, 0x00000000);
- OUT_CS_REG(R300_ZB_HIZ_PITCH, 0x00000000);
- END_CS;
-}
diff --git a/src/gallium/drivers/r300/r300_state_invariant.h b/src/gallium/drivers/r300/r300_state_invariant.h
deleted file mode 100644
index 83d031c7fe9..00000000000
--- a/src/gallium/drivers/r300/r300_state_invariant.h
+++ /dev/null
@@ -1,31 +0,0 @@
-/*
- * Copyright 2008 Corbin Simpson <[email protected]>
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * on the rights to use, copy, modify, merge, publish, distribute, sub
- * license, and/or sell copies of the Software, and to permit persons to whom
- * the Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE. */
-
-#ifndef R300_STATE_INVARIANT_H
-#define R300_STATE_INVARIANT_H
-
-struct r300_context;
-
-void r300_emit_invariant_state(struct r300_context* r300,
- unsigned size, void* state);
-
-#endif /* R300_STATE_INVARIANT_H */
diff --git a/src/gallium/drivers/r300/r300_texture.c b/src/gallium/drivers/r300/r300_texture.c
index bffda846d70..cee56bccdcd 100644
--- a/src/gallium/drivers/r300/r300_texture.c
+++ b/src/gallium/drivers/r300/r300_texture.c
@@ -21,44 +21,76 @@
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
* USE OR OTHER DEALINGS IN THE SOFTWARE. */
-#include "pipe/p_screen.h"
-
-#include "util/u_format.h"
-#include "util/u_math.h"
-#include "util/u_memory.h"
+/* Always include headers in the reverse order!! ~ M. */
+#include "r300_texture.h"
#include "r300_context.h"
#include "r300_reg.h"
-#include "r300_texture.h"
+#include "r300_texture_desc.h"
#include "r300_transfer.h"
#include "r300_screen.h"
#include "r300_winsys.h"
-/* XXX Enable float textures here. */
-/*#define ENABLE_FLOAT_TEXTURES*/
-
-#define TILE_WIDTH 0
-#define TILE_HEIGHT 1
+#include "util/u_format.h"
+#include "util/u_format_s3tc.h"
+#include "util/u_math.h"
+#include "util/u_memory.h"
+#include "util/u_mm.h"
-static const unsigned microblock_table[5][3][2] = {
- /*linear tiled square-tiled */
- {{32, 1}, {8, 4}, {0, 0}}, /* 8 bits per pixel */
- {{16, 1}, {8, 2}, {4, 4}}, /* 16 bits per pixel */
- {{ 8, 1}, {4, 2}, {0, 0}}, /* 32 bits per pixel */
- {{ 4, 1}, {0, 0}, {2, 2}}, /* 64 bits per pixel */
- {{ 2, 1}, {0, 0}, {0, 0}} /* 128 bits per pixel */
-};
+#include "pipe/p_screen.h"
-/* Return true for non-compressed and non-YUV formats. */
-static boolean r300_format_is_plain(enum pipe_format format)
+unsigned r300_get_swizzle_combined(const unsigned char *swizzle_format,
+ const unsigned char *swizzle_view)
{
- const struct util_format_description *desc = util_format_description(format);
+ unsigned i;
+ unsigned char swizzle[4];
+ unsigned result = 0;
+ const uint32_t swizzle_shift[4] = {
+ R300_TX_FORMAT_R_SHIFT,
+ R300_TX_FORMAT_G_SHIFT,
+ R300_TX_FORMAT_B_SHIFT,
+ R300_TX_FORMAT_A_SHIFT
+ };
+ const uint32_t swizzle_bit[4] = {
+ R300_TX_FORMAT_X,
+ R300_TX_FORMAT_Y,
+ R300_TX_FORMAT_Z,
+ R300_TX_FORMAT_W
+ };
- if (!format) {
- return FALSE;
+ if (swizzle_view) {
+ /* Combine two sets of swizzles. */
+ for (i = 0; i < 4; i++) {
+ swizzle[i] = swizzle_view[i] <= UTIL_FORMAT_SWIZZLE_W ?
+ swizzle_format[swizzle_view[i]] : swizzle_view[i];
+ }
+ } else {
+ memcpy(swizzle, swizzle_format, 4);
}
- return desc->layout == UTIL_FORMAT_LAYOUT_PLAIN;
+ /* Get swizzle. */
+ for (i = 0; i < 4; i++) {
+ switch (swizzle[i]) {
+ case UTIL_FORMAT_SWIZZLE_Y:
+ result |= swizzle_bit[1] << swizzle_shift[i];
+ break;
+ case UTIL_FORMAT_SWIZZLE_Z:
+ result |= swizzle_bit[2] << swizzle_shift[i];
+ break;
+ case UTIL_FORMAT_SWIZZLE_W:
+ result |= swizzle_bit[3] << swizzle_shift[i];
+ break;
+ case UTIL_FORMAT_SWIZZLE_0:
+ result |= R300_TX_FORMAT_ZERO << swizzle_shift[i];
+ break;
+ case UTIL_FORMAT_SWIZZLE_1:
+ result |= R300_TX_FORMAT_ONE << swizzle_shift[i];
+ break;
+ default: /* UTIL_FORMAT_SWIZZLE_X */
+ result |= swizzle_bit[0] << swizzle_shift[i];
+ }
+ }
+ return result;
}
/* Translate a pipe_format into a useful texture format for sampling.
@@ -74,24 +106,13 @@ static boolean r300_format_is_plain(enum pipe_format format)
* The FORMAT specifies how the texture sampler will treat the texture, and
* makes available X, Y, Z, W, ZERO, and ONE for swizzling. */
uint32_t r300_translate_texformat(enum pipe_format format,
- const unsigned char *swizzle)
+ const unsigned char *swizzle_view,
+ boolean is_r500)
{
uint32_t result = 0;
const struct util_format_description *desc;
unsigned i;
boolean uniform = TRUE;
- const uint32_t swizzle_shift[4] = {
- R300_TX_FORMAT_R_SHIFT,
- R300_TX_FORMAT_G_SHIFT,
- R300_TX_FORMAT_B_SHIFT,
- R300_TX_FORMAT_A_SHIFT
- };
- const uint32_t swizzle_bit[4] = {
- R300_TX_FORMAT_X,
- R300_TX_FORMAT_Y,
- R300_TX_FORMAT_Z,
- R300_TX_FORMAT_W
- };
const uint32_t sign_bit[4] = {
R300_TX_FORMAT_SIGNED_X,
R300_TX_FORMAT_SIGNED_Y,
@@ -103,14 +124,18 @@ uint32_t r300_translate_texformat(enum pipe_format format,
/* Colorspace (return non-RGB formats directly). */
switch (desc->colorspace) {
- /* Depth stencil formats. */
+ /* Depth stencil formats.
+ * Swizzles are added in r300_merge_textures_and_samplers. */
case UTIL_FORMAT_COLORSPACE_ZS:
switch (format) {
case PIPE_FORMAT_Z16_UNORM:
- return R300_EASY_TX_FORMAT(X, X, X, X, X16);
+ return R300_TX_FORMAT_X16;
case PIPE_FORMAT_X8Z24_UNORM:
case PIPE_FORMAT_S8_USCALED_Z24_UNORM:
- return R300_EASY_TX_FORMAT(X, X, X, X, W24_FP);
+ if (is_r500)
+ return R500_TX_FORMAT_Y8X24;
+ else
+ return R300_TX_FORMAT_Y16X16;
default:
return ~0; /* Unsupported. */
}
@@ -144,53 +169,14 @@ uint32_t r300_translate_texformat(enum pipe_format format,
}
}
- /* Add swizzle. */
- if (!swizzle) {
- swizzle = desc->swizzle;
- } /*else {
- if (swizzle[0] != desc->swizzle[0] ||
- swizzle[1] != desc->swizzle[1] ||
- swizzle[2] != desc->swizzle[2] ||
- swizzle[3] != desc->swizzle[3])
- {
- const char n[6] = "RGBA01";
- fprintf(stderr, "Got different swizzling! Format: %c%c%c%c, "
- "View: %c%c%c%c\n",
- n[desc->swizzle[0]], n[desc->swizzle[1]],
- n[desc->swizzle[2]], n[desc->swizzle[3]],
- n[swizzle[0]], n[swizzle[1]], n[swizzle[2]],
- n[swizzle[3]]);
- }
- }*/
-
- for (i = 0; i < 4; i++) {
- switch (swizzle[i]) {
- case UTIL_FORMAT_SWIZZLE_X:
- case UTIL_FORMAT_SWIZZLE_NONE:
- result |= swizzle_bit[0] << swizzle_shift[i];
- break;
- case UTIL_FORMAT_SWIZZLE_Y:
- result |= swizzle_bit[1] << swizzle_shift[i];
- break;
- case UTIL_FORMAT_SWIZZLE_Z:
- result |= swizzle_bit[2] << swizzle_shift[i];
- break;
- case UTIL_FORMAT_SWIZZLE_W:
- result |= swizzle_bit[3] << swizzle_shift[i];
- break;
- case UTIL_FORMAT_SWIZZLE_0:
- result |= R300_TX_FORMAT_ZERO << swizzle_shift[i];
- break;
- case UTIL_FORMAT_SWIZZLE_1:
- result |= R300_TX_FORMAT_ONE << swizzle_shift[i];
- break;
- default:
- return ~0; /* Unsupported. */
- }
- }
+ result |= r300_get_swizzle_combined(desc->swizzle, swizzle_view);
/* S3TC formats. */
if (desc->layout == UTIL_FORMAT_LAYOUT_S3TC) {
+ if (!util_format_s3tc_enabled) {
+ return ~0; /* Unsupported. */
+ }
+
switch (format) {
case PIPE_FORMAT_DXT1_RGB:
case PIPE_FORMAT_DXT1_RGBA:
@@ -274,16 +260,26 @@ uint32_t r300_translate_texformat(enum pipe_format format,
return ~0; /* Unsupported/unknown. */
}
+ /* Find the first non-VOID channel. */
+ for (i = 0; i < 4; i++) {
+ if (desc->channel[i].type != UTIL_FORMAT_TYPE_VOID) {
+ break;
+ }
+ }
+
+ if (i == 4)
+ return ~0; /* Unsupported/unknown. */
+
/* And finally, uniform formats. */
- switch (desc->channel[0].type) {
+ switch (desc->channel[i].type) {
case UTIL_FORMAT_TYPE_UNSIGNED:
case UTIL_FORMAT_TYPE_SIGNED:
- if (!desc->channel[0].normalized &&
+ if (!desc->channel[i].normalized &&
desc->colorspace != UTIL_FORMAT_COLORSPACE_SRGB) {
return ~0;
}
- switch (desc->channel[0].size) {
+ switch (desc->channel[i].size) {
case 4:
switch (desc->nr_channels) {
case 2:
@@ -316,9 +312,8 @@ uint32_t r300_translate_texformat(enum pipe_format format,
}
return ~0;
-#if defined(ENABLE_FLOAT_TEXTURES)
case UTIL_FORMAT_TYPE_FLOAT:
- switch (desc->channel[0].size) {
+ switch (desc->channel[i].size) {
case 16:
switch (desc->nr_channels) {
case 1:
@@ -340,7 +335,6 @@ uint32_t r300_translate_texformat(enum pipe_format format,
return R300_TX_FORMAT_32F_32F_32F_32F | result;
}
}
-#endif
}
return ~0; /* Unsupported/unknown. */
@@ -351,6 +345,8 @@ uint32_t r500_tx_format_msb_bit(enum pipe_format format)
switch (format) {
case PIPE_FORMAT_RGTC1_UNORM:
case PIPE_FORMAT_RGTC1_SNORM:
+ case PIPE_FORMAT_X8Z24_UNORM:
+ case PIPE_FORMAT_S8_USCALED_Z24_UNORM:
return R500_TXFORMAT_MSB;
default:
return 0;
@@ -373,6 +369,11 @@ static uint32_t r300_translate_colorformat(enum pipe_format format)
return R300_COLOR_FORMAT_I8;
/* 16-bit buffers. */
+ case PIPE_FORMAT_L8A8_UNORM:
+ case PIPE_FORMAT_R8G8_UNORM:
+ case PIPE_FORMAT_R8G8_SNORM:
+ return R300_COLOR_FORMAT_UV88;
+
case PIPE_FORMAT_B5G6R5_UNORM:
return R300_COLOR_FORMAT_RGB565;
@@ -405,16 +406,12 @@ static uint32_t r300_translate_colorformat(enum pipe_format format)
/* 64-bit buffers. */
case PIPE_FORMAT_R16G16B16A16_UNORM:
case PIPE_FORMAT_R16G16B16A16_SNORM:
-#if defined(ENABLE_FLOAT_TEXTURES)
case PIPE_FORMAT_R16G16B16A16_FLOAT:
-#endif
return R300_COLOR_FORMAT_ARGB16161616;
/* 128-bit buffers. */
-#if defined(ENABLE_FLOAT_TEXTURES)
case PIPE_FORMAT_R32G32B32A32_FLOAT:
return R300_COLOR_FORMAT_ARGB32323232;
-#endif
/* YUV buffers. */
case PIPE_FORMAT_UYVY:
@@ -461,15 +458,25 @@ static uint32_t r300_translate_out_fmt(enum pipe_format format)
desc = util_format_description(format);
+ /* Find the first non-VOID channel. */
+ for (i = 0; i < 4; i++) {
+ if (desc->channel[i].type != UTIL_FORMAT_TYPE_VOID) {
+ break;
+ }
+ }
+
+ if (i == 4)
+ return ~0; /* Unsupported/unknown. */
+
/* Specifies how the shader output is written to the fog unit. */
- if (desc->channel[0].type == UTIL_FORMAT_TYPE_FLOAT) {
- if (desc->channel[0].size == 32) {
+ if (desc->channel[i].type == UTIL_FORMAT_TYPE_FLOAT) {
+ if (desc->channel[i].size == 32) {
modifier |= R300_US_OUT_FMT_C4_32_FP;
} else {
modifier |= R300_US_OUT_FMT_C4_16_FP;
}
} else {
- if (desc->channel[0].size == 16) {
+ if (desc->channel[i].size == 16) {
modifier |= R300_US_OUT_FMT_C4_16;
} else {
/* C4_8 seems to be used for the formats whose pixel size
@@ -486,7 +493,7 @@ static uint32_t r300_translate_out_fmt(enum pipe_format format)
/* Add swizzles and return. */
switch (format) {
- /* 8-bit outputs.
+ /* 8-bit outputs, one channel.
* COLORFORMAT_I8 stores the C2 component. */
case PIPE_FORMAT_A8_UNORM:
return modifier | R300_C2_SEL_A;
@@ -496,6 +503,14 @@ static uint32_t r300_translate_out_fmt(enum pipe_format format)
case PIPE_FORMAT_R8_SNORM:
return modifier | R300_C2_SEL_R;
+ /* 16-bit outputs, two channels.
+ * COLORFORMAT_UV88 stores C2 and C0. */
+ case PIPE_FORMAT_L8A8_UNORM:
+ return modifier | R300_C0_SEL_A | R300_C2_SEL_R;
+ case PIPE_FORMAT_R8G8_UNORM:
+ case PIPE_FORMAT_R8G8_SNORM:
+ return modifier | R300_C0_SEL_G | R300_C2_SEL_R;
+
/* BGRA outputs. */
case PIPE_FORMAT_B5G6R5_UNORM:
case PIPE_FORMAT_B5G5R5A1_UNORM:
@@ -532,7 +547,7 @@ static uint32_t r300_translate_out_fmt(enum pipe_format format)
case PIPE_FORMAT_R10SG10SB10SA2U_NORM:
case PIPE_FORMAT_R16G16B16A16_UNORM:
case PIPE_FORMAT_R16G16B16A16_SNORM:
- //case PIPE_FORMAT_R16G16B16A16_FLOAT: /* not in pipe_format */
+ case PIPE_FORMAT_R16G16B16A16_FLOAT:
case PIPE_FORMAT_R32G32B32A32_FLOAT:
return modifier |
R300_C0_SEL_R | R300_C1_SEL_G |
@@ -556,53 +571,55 @@ boolean r300_is_zs_format_supported(enum pipe_format format)
boolean r300_is_sampler_format_supported(enum pipe_format format)
{
- return r300_translate_texformat(format, 0) != ~0;
+ return r300_translate_texformat(format, 0, TRUE) != ~0;
}
-static void r300_texture_setup_immutable_state(struct r300_screen* screen,
- struct r300_texture* tex)
+void r300_texture_setup_format_state(struct r300_screen *screen,
+ struct r300_texture_desc *desc,
+ unsigned level,
+ struct r300_texture_format_state *out)
{
- struct r300_texture_format_state* f = &tex->tx_format;
- struct pipe_resource *pt = &tex->b.b;
+ struct pipe_resource *pt = &desc->b.b;
boolean is_r500 = screen->caps.is_r500;
+ /* Mask out all the fields we change. */
+ out->format0 = 0;
+ out->format1 &= ~R300_TX_FORMAT_TEX_COORD_TYPE_MASK;
+ out->format2 &= R500_TXFORMAT_MSB;
+ out->tile_config = 0;
+
/* Set sampler state. */
- f->format0 = R300_TX_WIDTH((pt->width0 - 1) & 0x7ff) |
- R300_TX_HEIGHT((pt->height0 - 1) & 0x7ff);
+ out->format0 =
+ R300_TX_WIDTH((u_minify(desc->width0, level) - 1) & 0x7ff) |
+ R300_TX_HEIGHT((u_minify(desc->height0, level) - 1) & 0x7ff) |
+ R300_TX_DEPTH(util_logbase2(u_minify(desc->depth0, level)) & 0xf);
- if (tex->uses_pitch) {
+ if (desc->uses_stride_addressing) {
/* rectangles love this */
- f->format0 |= R300_TX_PITCH_EN;
- f->format2 = (tex->pitch[0] - 1) & 0x1fff;
- } else {
- /* power of two textures (3D, mipmaps, and no pitch) */
- f->format0 |= R300_TX_DEPTH(util_logbase2(pt->depth0) & 0xf);
+ out->format0 |= R300_TX_PITCH_EN;
+ out->format2 = (desc->stride_in_pixels[level] - 1) & 0x1fff;
}
- f->format1 = 0;
if (pt->target == PIPE_TEXTURE_CUBE) {
- f->format1 |= R300_TX_FORMAT_CUBIC_MAP;
+ out->format1 |= R300_TX_FORMAT_CUBIC_MAP;
}
if (pt->target == PIPE_TEXTURE_3D) {
- f->format1 |= R300_TX_FORMAT_3D;
+ out->format1 |= R300_TX_FORMAT_3D;
}
/* large textures on r500 */
if (is_r500)
{
- if (pt->width0 > 2048) {
- f->format2 |= R500_TXWIDTH_BIT11;
+ if (desc->width0 > 2048) {
+ out->format2 |= R500_TXWIDTH_BIT11;
}
- if (pt->height0 > 2048) {
- f->format2 |= R500_TXHEIGHT_BIT11;
+ if (desc->height0 > 2048) {
+ out->format2 |= R500_TXHEIGHT_BIT11;
}
}
- f->tile_config = R300_TXO_MACRO_TILE(tex->macrotile) |
- R300_TXO_MICRO_TILE(tex->microtile);
-
- SCREEN_DBG(screen, DBG_TEX, "r300: Set texture state (%dx%d, %d levels)\n",
- pt->width0, pt->height0, pt->last_level);
+ out->tile_config = R300_TXO_MACRO_TILE(desc->macrotile[level]) |
+ R300_TXO_MICRO_TILE(desc->microtile);
}
static void r300_texture_setup_fb_state(struct r300_screen* screen,
@@ -611,23 +628,23 @@ static void r300_texture_setup_fb_state(struct r300_screen* screen,
unsigned i;
/* Set framebuffer state. */
- if (util_format_is_depth_or_stencil(tex->b.b.format)) {
- for (i = 0; i <= tex->b.b.last_level; i++) {
- tex->fb_state.depthpitch[i] =
- tex->pitch[i] |
- R300_DEPTHMACROTILE(tex->mip_macrotile[i]) |
- R300_DEPTHMICROTILE(tex->microtile);
+ if (util_format_is_depth_or_stencil(tex->desc.b.b.format)) {
+ for (i = 0; i <= tex->desc.b.b.last_level; i++) {
+ tex->fb_state.pitch[i] =
+ tex->desc.stride_in_pixels[i] |
+ R300_DEPTHMACROTILE(tex->desc.macrotile[i]) |
+ R300_DEPTHMICROTILE(tex->desc.microtile);
}
- tex->fb_state.zb_format = r300_translate_zsformat(tex->b.b.format);
+ tex->fb_state.format = r300_translate_zsformat(tex->desc.b.b.format);
} else {
- for (i = 0; i <= tex->b.b.last_level; i++) {
- tex->fb_state.colorpitch[i] =
- tex->pitch[i] |
- r300_translate_colorformat(tex->b.b.format) |
- R300_COLOR_TILE(tex->mip_macrotile[i]) |
- R300_COLOR_MICROTILE(tex->microtile);
+ for (i = 0; i <= tex->desc.b.b.last_level; i++) {
+ tex->fb_state.pitch[i] =
+ tex->desc.stride_in_pixels[i] |
+ r300_translate_colorformat(tex->desc.b.b.format) |
+ R300_COLOR_TILE(tex->desc.macrotile[i]) |
+ R300_COLOR_MICROTILE(tex->desc.microtile);
}
- tex->fb_state.us_out_fmt = r300_translate_out_fmt(tex->b.b.format);
+ tex->fb_state.format = r300_translate_out_fmt(tex->desc.b.b.format);
}
}
@@ -637,242 +654,16 @@ void r300_texture_reinterpret_format(struct pipe_screen *screen,
{
struct r300_screen *r300screen = r300_screen(screen);
- SCREEN_DBG(r300screen, DBG_TEX, "r300: Reinterpreting format: %s -> %s\n",
- util_format_name(tex->format), util_format_name(new_format));
+ SCREEN_DBG(r300screen, DBG_TEX,
+ "r300: texture_reinterpret_format: %s -> %s\n",
+ util_format_short_name(tex->format),
+ util_format_short_name(new_format));
tex->format = new_format;
r300_texture_setup_fb_state(r300_screen(screen), r300_texture(tex));
}
-unsigned r300_texture_get_offset(struct r300_texture* tex, unsigned level,
- unsigned zslice, unsigned face)
-{
- unsigned offset = tex->offset[level];
-
- switch (tex->b.b.target) {
- case PIPE_TEXTURE_3D:
- assert(face == 0);
- return offset + zslice * tex->layer_size[level];
-
- case PIPE_TEXTURE_CUBE:
- assert(zslice == 0);
- return offset + face * tex->layer_size[level];
-
- default:
- assert(zslice == 0 && face == 0);
- return offset;
- }
-}
-
-/**
- * Return the width (dim==TILE_WIDTH) or height (dim==TILE_HEIGHT) of one tile
- * of the given texture.
- */
-static unsigned r300_texture_get_tile_size(struct r300_texture* tex,
- int dim, boolean macrotile)
-{
- unsigned pixsize, tile_size;
-
- pixsize = util_format_get_blocksize(tex->b.b.format);
- tile_size = microblock_table[util_logbase2(pixsize)][tex->microtile][dim];
-
- if (macrotile) {
- tile_size *= 8;
- }
-
- assert(tile_size);
- return tile_size;
-}
-
-/* Return true if macrotiling should be enabled on the miplevel. */
-static boolean r300_texture_macro_switch(struct r300_texture *tex,
- unsigned level,
- boolean rv350_mode,
- int dim)
-{
- unsigned tile, texdim;
-
- tile = r300_texture_get_tile_size(tex, dim, TRUE);
- if (dim == TILE_WIDTH) {
- texdim = u_minify(tex->b.b.width0, level);
- } else {
- texdim = u_minify(tex->b.b.height0, level);
- }
-
- /* See TX_FILTER1_n.MACRO_SWITCH. */
- if (rv350_mode) {
- return texdim >= tile;
- } else {
- return texdim > tile;
- }
-}
-
-/**
- * Return the stride, in bytes, of the texture images of the given texture
- * at the given level.
- */
-unsigned r300_texture_get_stride(struct r300_screen* screen,
- struct r300_texture* tex, unsigned level)
-{
- unsigned tile_width, width;
-
- if (tex->stride_override)
- return tex->stride_override;
-
- /* Check the level. */
- if (level > tex->b.b.last_level) {
- SCREEN_DBG(screen, DBG_TEX, "%s: level (%u) > last_level (%u)\n",
- __FUNCTION__, level, tex->b.b.last_level);
- return 0;
- }
-
- width = u_minify(tex->b.b.width0, level);
-
- if (r300_format_is_plain(tex->b.b.format)) {
- tile_width = r300_texture_get_tile_size(tex, TILE_WIDTH,
- tex->mip_macrotile[level]);
- width = align(width, tile_width);
-
- return util_format_get_stride(tex->b.b.format, width);
- } else {
- return align(util_format_get_stride(tex->b.b.format, width), 32);
- }
-}
-
-static unsigned r300_texture_get_nblocksy(struct r300_texture* tex,
- unsigned level)
-{
- unsigned height, tile_height;
-
- height = u_minify(tex->b.b.height0, level);
-
- if (r300_format_is_plain(tex->b.b.format)) {
- tile_height = r300_texture_get_tile_size(tex, TILE_HEIGHT,
- tex->mip_macrotile[level]);
- height = align(height, tile_height);
-
- /* This is needed for the kernel checker, unfortunately. */
- height = util_next_power_of_two(height);
- }
-
- return util_format_get_nblocksy(tex->b.b.format, height);
-}
-
-static void r300_texture_3d_fix_mipmapping(struct r300_screen *screen,
- struct r300_texture *tex)
-{
- /* The kernels <= 2.6.34-rc4 compute the size of mipmapped 3D textures
- * incorrectly. This is a workaround to prevent CS from being rejected. */
-
- unsigned i, size;
-
- if (!screen->rws->get_value(screen->rws, R300_VID_DRM_2_3_0) &&
- tex->b.b.target == PIPE_TEXTURE_3D &&
- tex->b.b.last_level > 0) {
- size = 0;
-
- for (i = 0; i <= tex->b.b.last_level; i++) {
- size += r300_texture_get_stride(screen, tex, i) *
- r300_texture_get_nblocksy(tex, i);
- }
-
- size *= tex->b.b.depth0;
- tex->size = size;
- }
-}
-
-static void r300_setup_miptree(struct r300_screen* screen,
- struct r300_texture* tex)
-{
- struct pipe_resource* base = &tex->b.b;
- unsigned stride, size, layer_size, nblocksy, i;
- boolean rv350_mode = screen->caps.is_rv350;
-
- SCREEN_DBG(screen, DBG_TEX, "r300: Making miptree for texture, format %s\n",
- util_format_name(base->format));
-
- for (i = 0; i <= base->last_level; i++) {
- /* Let's see if this miplevel can be macrotiled. */
- tex->mip_macrotile[i] =
- (tex->macrotile == R300_BUFFER_TILED &&
- r300_texture_macro_switch(tex, i, rv350_mode, TILE_WIDTH) &&
- r300_texture_macro_switch(tex, i, rv350_mode, TILE_HEIGHT)) ?
- R300_BUFFER_TILED : R300_BUFFER_LINEAR;
-
- stride = r300_texture_get_stride(screen, tex, i);
- nblocksy = r300_texture_get_nblocksy(tex, i);
- layer_size = stride * nblocksy;
-
- if (base->target == PIPE_TEXTURE_CUBE)
- size = layer_size * 6;
- else
- size = layer_size * u_minify(base->depth0, i);
-
- tex->offset[i] = tex->size;
- tex->size = tex->offset[i] + size;
- tex->layer_size[i] = layer_size;
- tex->pitch[i] = stride / util_format_get_blocksize(base->format);
-
- SCREEN_DBG(screen, DBG_TEX, "r300: Texture miptree: Level %d "
- "(%dx%dx%d px, pitch %d bytes) %d bytes total, macrotiled %s\n",
- i, u_minify(base->width0, i), u_minify(base->height0, i),
- u_minify(base->depth0, i), stride, tex->size,
- tex->mip_macrotile[i] ? "TRUE" : "FALSE");
- }
-}
-
-static void r300_setup_flags(struct r300_texture* tex)
-{
- tex->uses_pitch = !util_is_power_of_two(tex->b.b.width0) ||
- !util_is_power_of_two(tex->b.b.height0) ||
- tex->stride_override;
-}
-
-static void r300_setup_tiling(struct pipe_screen *screen,
- struct r300_texture *tex)
-{
- struct r300_winsys_screen *rws = (struct r300_winsys_screen *)screen->winsys;
- enum pipe_format format = tex->b.b.format;
- boolean rv350_mode = r300_screen(screen)->caps.is_rv350;
- boolean is_zb = util_format_is_depth_or_stencil(format);
- boolean dbg_no_tiling = SCREEN_DBG_ON(r300_screen(screen), DBG_NO_TILING);
-
- if (!r300_format_is_plain(format)) {
- return;
- }
-
- /* If height == 1, disable microtiling except for zbuffer. */
- if (!is_zb && (tex->b.b.height0 == 1 || dbg_no_tiling)) {
- return;
- }
-
- /* Set microtiling. */
- switch (util_format_get_blocksize(format)) {
- case 1:
- case 4:
- tex->microtile = R300_BUFFER_TILED;
- break;
-
- case 2:
- case 8:
- if (rws->get_value(rws, R300_VID_SQUARE_TILING_SUPPORT)) {
- tex->microtile = R300_BUFFER_SQUARETILED;
- }
- break;
- }
-
- if (dbg_no_tiling) {
- return;
- }
-
- /* Set macrotiling. */
- if (r300_texture_macro_switch(tex, 0, rv350_mode, TILE_WIDTH) &&
- r300_texture_macro_switch(tex, 0, rv350_mode, TILE_HEIGHT)) {
- tex->macrotile = R300_BUFFER_TILED;
- }
-}
-
static unsigned r300_texture_is_referenced(struct pipe_context *context,
struct pipe_resource *texture,
unsigned face, unsigned level)
@@ -880,7 +671,8 @@ static unsigned r300_texture_is_referenced(struct pipe_context *context,
struct r300_context *r300 = r300_context(context);
struct r300_texture *rtex = (struct r300_texture *)texture;
- if (r300->rws->is_buffer_referenced(r300->rws, rtex->buffer, R300_REF_CS))
+ if (r300->rws->cs_is_buffer_referenced(r300->cs,
+ rtex->buffer, R300_REF_CS))
return PIPE_REFERENCED_FOR_READ | PIPE_REFERENCED_FOR_WRITE;
return PIPE_UNREFERENCED;
@@ -891,8 +683,16 @@ static void r300_texture_destroy(struct pipe_screen *screen,
{
struct r300_texture* tex = (struct r300_texture*)texture;
struct r300_winsys_screen *rws = (struct r300_winsys_screen *)texture->screen->winsys;
+ int i;
rws->buffer_reference(rws, &tex->buffer, NULL);
+ for (i = 0; i < R300_MAX_TEXTURE_LEVELS; i++) {
+ if (tex->hiz_mem[i])
+ u_mmFreeMem(tex->hiz_mem[i]);
+ if (tex->zmask_mem[i])
+ u_mmFreeMem(tex->zmask_mem[i]);
+ }
+
FREE(tex);
}
@@ -902,20 +702,16 @@ static boolean r300_texture_get_handle(struct pipe_screen* screen,
{
struct r300_winsys_screen *rws = (struct r300_winsys_screen *)screen->winsys;
struct r300_texture* tex = (struct r300_texture*)texture;
- unsigned stride;
if (!tex) {
return FALSE;
}
- stride = r300_texture_get_stride(r300_screen(screen), tex, 0);
-
- rws->buffer_get_handle(rws, tex->buffer, stride, whandle);
-
- return TRUE;
+ return rws->buffer_get_handle(rws, tex->buffer,
+ tex->desc.stride_in_bytes[0], whandle);
}
-struct u_resource_vtbl r300_texture_vtbl =
+struct u_resource_vtbl r300_texture_vtbl =
{
r300_texture_get_handle, /* get_handle */
r300_texture_destroy, /* resource_destroy */
@@ -928,162 +724,208 @@ struct u_resource_vtbl r300_texture_vtbl =
u_default_transfer_inline_write /* transfer_inline_write */
};
-/* Create a new texture. */
-struct pipe_resource* r300_texture_create(struct pipe_screen* screen,
- const struct pipe_resource* base)
+/* The common texture constructor. */
+static struct r300_texture*
+r300_texture_create_object(struct r300_screen *rscreen,
+ const struct pipe_resource *base,
+ enum r300_buffer_tiling microtile,
+ enum r300_buffer_tiling macrotile,
+ unsigned stride_in_bytes_override,
+ unsigned max_buffer_size,
+ struct r300_winsys_buffer *buffer)
{
- struct r300_texture* tex = CALLOC_STRUCT(r300_texture);
- struct r300_screen* rscreen = r300_screen(screen);
- struct r300_winsys_screen *rws = (struct r300_winsys_screen *)screen->winsys;
-
+ struct r300_winsys_screen *rws = rscreen->rws;
+ struct r300_texture *tex = CALLOC_STRUCT(r300_texture);
if (!tex) {
+ if (buffer)
+ rws->buffer_reference(rws, &buffer, NULL);
return NULL;
}
- tex->b.b = *base;
- tex->b.vtbl = &r300_texture_vtbl;
- pipe_reference_init(&tex->b.b.reference, 1);
- tex->b.b.screen = screen;
-
- r300_setup_flags(tex);
- if (!(base->flags & R300_RESOURCE_FLAG_TRANSFER) &&
- !(base->bind & PIPE_BIND_SCANOUT)) {
- r300_setup_tiling(screen, tex);
+ /* Initialize the descriptor. */
+ if (!r300_texture_desc_init(rscreen, &tex->desc, base,
+ microtile, macrotile,
+ stride_in_bytes_override,
+ max_buffer_size)) {
+ if (buffer)
+ rws->buffer_reference(rws, &buffer, NULL);
+ FREE(tex);
+ return NULL;
}
- r300_setup_miptree(rscreen, tex);
- r300_texture_3d_fix_mipmapping(rscreen, tex);
- r300_texture_setup_immutable_state(rscreen, tex);
+ /* Initialize the hardware state. */
+ r300_texture_setup_format_state(rscreen, &tex->desc, 0, &tex->tx_format);
r300_texture_setup_fb_state(rscreen, tex);
- tex->buffer = rws->buffer_create(rws, 2048,
- PIPE_BIND_SAMPLER_VIEW, /* XXX */
- tex->size);
- rws->buffer_set_tiling(rws, tex->buffer,
- tex->pitch[0],
- tex->microtile,
- tex->macrotile);
+ tex->desc.b.vtbl = &r300_texture_vtbl;
+ pipe_reference_init(&tex->desc.b.b.reference, 1);
+ tex->domain = base->flags & R300_RESOURCE_FLAG_TRANSFER ?
+ R300_DOMAIN_GTT :
+ R300_DOMAIN_VRAM | R300_DOMAIN_GTT;
+ tex->buffer = buffer;
+ /* Create the backing buffer if needed. */
if (!tex->buffer) {
- FREE(tex);
- return NULL;
+ tex->buffer = rws->buffer_create(rws, tex->desc.size_in_bytes, 2048,
+ base->bind, base->usage, tex->domain);
+
+ if (!tex->buffer) {
+ FREE(tex);
+ return NULL;
+ }
}
- return (struct pipe_resource*)tex;
+ rws->buffer_set_tiling(rws, tex->buffer,
+ tex->desc.microtile, tex->desc.macrotile[0],
+ tex->desc.stride_in_bytes[0]);
+
+ return tex;
}
-/* Not required to implement u_resource_vtbl, consider moving to another file:
- */
-struct pipe_surface* r300_get_tex_surface(struct pipe_screen* screen,
- struct pipe_resource* texture,
- unsigned face,
- unsigned level,
- unsigned zslice,
- unsigned flags)
+/* Create a new texture. */
+struct pipe_resource *r300_texture_create(struct pipe_screen *screen,
+ const struct pipe_resource *base)
{
- struct r300_texture* tex = r300_texture(texture);
- struct pipe_surface* surface = CALLOC_STRUCT(pipe_surface);
- unsigned offset;
+ struct r300_screen *rscreen = r300_screen(screen);
+ enum r300_buffer_tiling microtile, macrotile;
- offset = r300_texture_get_offset(tex, level, zslice, face);
-
- if (surface) {
- pipe_reference_init(&surface->reference, 1);
- pipe_resource_reference(&surface->texture, texture);
- surface->format = texture->format;
- surface->width = u_minify(texture->width0, level);
- surface->height = u_minify(texture->height0, level);
- surface->offset = offset;
- surface->usage = flags;
- surface->zslice = zslice;
- surface->texture = texture;
- surface->face = face;
- surface->level = level;
+ if ((base->flags & R300_RESOURCE_FLAG_TRANSFER) ||
+ (base->bind & PIPE_BIND_SCANOUT)) {
+ microtile = R300_BUFFER_LINEAR;
+ macrotile = R300_BUFFER_LINEAR;
+ } else {
+ microtile = R300_BUFFER_SELECT_LAYOUT;
+ macrotile = R300_BUFFER_SELECT_LAYOUT;
}
- return surface;
-}
-
-/* Not required to implement u_resource_vtbl, consider moving to another file:
- */
-void r300_tex_surface_destroy(struct pipe_surface* s)
-{
- pipe_resource_reference(&s->texture, NULL);
- FREE(s);
+ return (struct pipe_resource*)
+ r300_texture_create_object(rscreen, base, microtile, macrotile,
+ 0, 0, NULL);
}
-struct pipe_resource*
-r300_texture_from_handle(struct pipe_screen* screen,
- const struct pipe_resource* base,
- struct winsys_handle *whandle)
+struct pipe_resource *r300_texture_from_handle(struct pipe_screen *screen,
+ const struct pipe_resource *base,
+ struct winsys_handle *whandle)
{
struct r300_winsys_screen *rws = (struct r300_winsys_screen*)screen->winsys;
- struct r300_screen* rscreen = r300_screen(screen);
+ struct r300_screen *rscreen = r300_screen(screen);
struct r300_winsys_buffer *buffer;
- struct r300_texture* tex;
- unsigned stride;
- boolean override_zb_flags;
+ enum r300_buffer_tiling microtile, macrotile;
+ unsigned stride, size;
/* Support only 2D textures without mipmaps */
- if (base->target != PIPE_TEXTURE_2D ||
+ if ((base->target != PIPE_TEXTURE_2D &&
+ base->target != PIPE_TEXTURE_RECT) ||
base->depth0 != 1 ||
base->last_level != 0) {
return NULL;
}
- buffer = rws->buffer_from_handle(rws, screen, whandle, &stride);
- if (!buffer) {
- return NULL;
- }
-
- tex = CALLOC_STRUCT(r300_texture);
- if (!tex) {
+ buffer = rws->buffer_from_handle(rws, whandle, &stride, &size);
+ if (!buffer)
return NULL;
- }
- tex->b.b = *base;
- tex->b.vtbl = &r300_texture_vtbl;
- pipe_reference_init(&tex->b.b.reference, 1);
- tex->b.b.screen = screen;
+ rws->buffer_get_tiling(rws, buffer, &microtile, &macrotile);
- tex->stride_override = stride;
-
- /* one ref already taken */
- tex->buffer = buffer;
-
- rws->buffer_get_tiling(rws, buffer, &tex->microtile, &tex->macrotile);
- r300_setup_flags(tex);
-
- /* Enforce microtiled zbuffer. */
- override_zb_flags = util_format_is_depth_or_stencil(base->format) &&
- tex->microtile == R300_BUFFER_LINEAR;
-
- if (override_zb_flags) {
+ /* Enforce a microtiled zbuffer. */
+ if (util_format_is_depth_or_stencil(base->format) &&
+ microtile == R300_BUFFER_LINEAR) {
switch (util_format_get_blocksize(base->format)) {
case 4:
- tex->microtile = R300_BUFFER_TILED;
+ microtile = R300_BUFFER_TILED;
break;
case 2:
- if (rws->get_value(rws, R300_VID_SQUARE_TILING_SUPPORT)) {
- tex->microtile = R300_BUFFER_SQUARETILED;
- break;
- }
- /* Pass through. */
-
- default:
- override_zb_flags = FALSE;
+ if (rws->get_value(rws, R300_VID_SQUARE_TILING_SUPPORT))
+ microtile = R300_BUFFER_SQUARETILED;
+ break;
}
}
- r300_setup_miptree(rscreen, tex);
- r300_texture_setup_immutable_state(rscreen, tex);
- r300_texture_setup_fb_state(rscreen, tex);
+ return (struct pipe_resource*)
+ r300_texture_create_object(rscreen, base, microtile, macrotile,
+ stride, size, buffer);
+}
- if (override_zb_flags) {
- rws->buffer_set_tiling(rws, tex->buffer,
- tex->pitch[0],
- tex->microtile,
- tex->macrotile);
+/* Not required to implement u_resource_vtbl, consider moving to another file:
+ */
+struct pipe_surface* r300_get_tex_surface(struct pipe_screen* screen,
+ struct pipe_resource* texture,
+ unsigned face,
+ unsigned level,
+ unsigned zslice,
+ unsigned flags)
+{
+ struct r300_texture* tex = r300_texture(texture);
+ struct r300_surface* surface = CALLOC_STRUCT(r300_surface);
+
+ if (surface) {
+ uint32_t offset, tile_height;
+
+ pipe_reference_init(&surface->base.reference, 1);
+ pipe_resource_reference(&surface->base.texture, texture);
+ surface->base.format = texture->format;
+ surface->base.width = u_minify(texture->width0, level);
+ surface->base.height = u_minify(texture->height0, level);
+ surface->base.usage = flags;
+ surface->base.zslice = zslice;
+ surface->base.face = face;
+ surface->base.level = level;
+
+ surface->buffer = tex->buffer;
+
+ /* Prefer VRAM if there are multiple domains to choose from. */
+ surface->domain = tex->domain;
+ if (surface->domain & R300_DOMAIN_VRAM)
+ surface->domain &= ~R300_DOMAIN_GTT;
+
+ surface->offset = r300_texture_get_offset(&tex->desc,
+ level, zslice, face);
+ surface->pitch = tex->fb_state.pitch[level];
+ surface->format = tex->fb_state.format;
+
+ /* Parameters for the CBZB clear. */
+ surface->cbzb_allowed = tex->desc.cbzb_allowed[level];
+ surface->cbzb_width = align(surface->base.width, 64);
+
+ /* Height must be aligned to the size of a tile. */
+ tile_height = r300_get_pixel_alignment(tex->desc.b.b.format,
+ tex->desc.b.b.nr_samples,
+ tex->desc.microtile,
+ tex->desc.macrotile[level],
+ DIM_HEIGHT);
+
+ surface->cbzb_height = align((surface->base.height + 1) / 2,
+ tile_height);
+
+ /* Offset must be aligned to 2K and must point at the beginning
+ * of a scanline. */
+ offset = surface->offset +
+ tex->desc.stride_in_bytes[level] * surface->cbzb_height;
+ surface->cbzb_midpoint_offset = offset & ~2047;
+
+ surface->cbzb_pitch = surface->pitch & 0x1ffffc;
+
+ if (util_format_get_blocksizebits(surface->base.format) == 32)
+ surface->cbzb_format = R300_DEPTHFORMAT_24BIT_INT_Z_8BIT_STENCIL;
+ else
+ surface->cbzb_format = R300_DEPTHFORMAT_16BIT_INT_Z;
+
+ SCREEN_DBG(r300_screen(screen), DBG_CBZB,
+ "CBZB Allowed: %s, Dim: %ix%i, Misalignment: %i, Micro: %s, Macro: %s\n",
+ surface->cbzb_allowed ? "YES" : " NO",
+ surface->cbzb_width, surface->cbzb_height,
+ offset & 2047,
+ tex->desc.microtile ? "YES" : " NO",
+ tex->desc.macrotile[level] ? "YES" : " NO");
}
- return (struct pipe_resource*)tex;
+
+ return &surface->base;
+}
+
+/* Not required to implement u_resource_vtbl, consider moving to another file:
+ */
+void r300_tex_surface_destroy(struct pipe_surface* s)
+{
+ pipe_resource_reference(&s->texture, NULL);
+ FREE(s);
}
diff --git a/src/gallium/drivers/r300/r300_texture.h b/src/gallium/drivers/r300/r300_texture.h
index 453d42b188f..c4588a0c90b 100644
--- a/src/gallium/drivers/r300/r300_texture.h
+++ b/src/gallium/drivers/r300/r300_texture.h
@@ -23,21 +23,26 @@
#ifndef R300_TEXTURE_H
#define R300_TEXTURE_H
-#include "util/u_format.h"
-
+#include "pipe/p_compiler.h"
+#include "pipe/p_format.h"
+
+struct pipe_screen;
+struct pipe_resource;
+struct winsys_handle;
+struct r300_texture_format_state;
+struct r300_texture_desc;
struct r300_texture;
+struct r300_screen;
+
+unsigned r300_get_swizzle_combined(const unsigned char *swizzle_format,
+ const unsigned char *swizzle_view);
uint32_t r300_translate_texformat(enum pipe_format format,
- const unsigned char *swizzle);
+ const unsigned char *swizzle_view,
+ boolean is_r500);
uint32_t r500_tx_format_msb_bit(enum pipe_format format);
-unsigned r300_texture_get_stride(struct r300_screen* screen,
- struct r300_texture* tex, unsigned level);
-
-unsigned r300_texture_get_offset(struct r300_texture* tex, unsigned level,
- unsigned zslice, unsigned face);
-
void r300_texture_reinterpret_format(struct pipe_screen *screen,
struct pipe_resource *tex,
enum pipe_format new_format);
@@ -48,6 +53,10 @@ boolean r300_is_zs_format_supported(enum pipe_format format);
boolean r300_is_sampler_format_supported(enum pipe_format format);
+void r300_texture_setup_format_state(struct r300_screen *screen,
+ struct r300_texture_desc *desc,
+ unsigned level,
+ struct r300_texture_format_state *out);
struct pipe_resource*
r300_texture_from_handle(struct pipe_screen* screen,
diff --git a/src/gallium/drivers/r300/r300_texture_desc.c b/src/gallium/drivers/r300/r300_texture_desc.c
new file mode 100644
index 00000000000..543d0fdc15b
--- /dev/null
+++ b/src/gallium/drivers/r300/r300_texture_desc.c
@@ -0,0 +1,495 @@
+/*
+ * Copyright 2008 Corbin Simpson <[email protected]>
+ * Copyright 2010 Marek Olšák <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE. */
+
+#include "r300_texture_desc.h"
+
+#include "r300_context.h"
+#include "r300_winsys.h"
+
+#include "util/u_format.h"
+
+/* Returns the number of pixels that the texture should be aligned to
+ * in the given dimension. */
+unsigned r300_get_pixel_alignment(enum pipe_format format,
+ unsigned num_samples,
+ enum r300_buffer_tiling microtile,
+ enum r300_buffer_tiling macrotile,
+ enum r300_dim dim)
+{
+ static const unsigned table[2][5][3][2] =
+ {
+ {
+ /* Macro: linear linear linear
+ Micro: linear tiled square-tiled */
+ {{ 32, 1}, { 8, 4}, { 0, 0}}, /* 8 bits per pixel */
+ {{ 16, 1}, { 8, 2}, { 4, 4}}, /* 16 bits per pixel */
+ {{ 8, 1}, { 4, 2}, { 0, 0}}, /* 32 bits per pixel */
+ {{ 4, 1}, { 2, 2}, { 0, 0}}, /* 64 bits per pixel */
+ {{ 2, 1}, { 0, 0}, { 0, 0}} /* 128 bits per pixel */
+ },
+ {
+ /* Macro: tiled tiled tiled
+ Micro: linear tiled square-tiled */
+ {{256, 8}, {64, 32}, { 0, 0}}, /* 8 bits per pixel */
+ {{128, 8}, {64, 16}, {32, 32}}, /* 16 bits per pixel */
+ {{ 64, 8}, {32, 16}, { 0, 0}}, /* 32 bits per pixel */
+ {{ 32, 8}, {16, 16}, { 0, 0}}, /* 64 bits per pixel */
+ {{ 16, 8}, { 0, 0}, { 0, 0}} /* 128 bits per pixel */
+ }
+ };
+ static const unsigned aa_block[2] = {4, 8};
+ unsigned tile = 0;
+ unsigned pixsize = util_format_get_blocksize(format);
+
+ assert(macrotile <= R300_BUFFER_TILED);
+ assert(microtile <= R300_BUFFER_SQUARETILED);
+ assert(pixsize <= 16);
+ assert(dim <= DIM_HEIGHT);
+
+ if (num_samples > 1) {
+ /* Multisampled textures have their own alignment scheme. */
+ if (pixsize == 4)
+ tile = aa_block[dim];
+ /* XXX FP16 AA. */
+ } else {
+ /* Standard alignment. */
+ tile = table[macrotile][util_logbase2(pixsize)][microtile][dim];
+ }
+
+ assert(tile);
+ return tile;
+}
+
+/* Return true if macrotiling should be enabled on the miplevel. */
+static boolean r300_texture_macro_switch(struct r300_texture_desc *desc,
+ unsigned level,
+ boolean rv350_mode,
+ enum r300_dim dim)
+{
+ unsigned tile, texdim;
+
+ tile = r300_get_pixel_alignment(desc->b.b.format, desc->b.b.nr_samples,
+ desc->microtile, R300_BUFFER_TILED, dim);
+ if (dim == DIM_WIDTH) {
+ texdim = u_minify(desc->width0, level);
+ } else {
+ texdim = u_minify(desc->height0, level);
+ }
+
+ /* See TX_FILTER1_n.MACRO_SWITCH. */
+ if (rv350_mode) {
+ return texdim >= tile;
+ } else {
+ return texdim > tile;
+ }
+}
+
+/**
+ * Return the stride, in bytes, of the texture image of the given texture
+ * at the given level.
+ */
+static unsigned r300_texture_get_stride(struct r300_screen *screen,
+ struct r300_texture_desc *desc,
+ unsigned level)
+{
+ unsigned tile_width, width, stride;
+
+ if (desc->stride_in_bytes_override)
+ return desc->stride_in_bytes_override;
+
+ /* Check the level. */
+ if (level > desc->b.b.last_level) {
+ SCREEN_DBG(screen, DBG_TEX, "%s: level (%u) > last_level (%u)\n",
+ __FUNCTION__, level, desc->b.b.last_level);
+ return 0;
+ }
+
+ width = u_minify(desc->width0, level);
+
+ if (util_format_is_plain(desc->b.b.format)) {
+ tile_width = r300_get_pixel_alignment(desc->b.b.format,
+ desc->b.b.nr_samples,
+ desc->microtile,
+ desc->macrotile[level],
+ DIM_WIDTH);
+ width = align(width, tile_width);
+
+ stride = util_format_get_stride(desc->b.b.format, width);
+
+ /* Some IGPs need a minimum stride of 64 bytes, hmm... */
+ if (!desc->macrotile[level] &&
+ (screen->caps.family == CHIP_FAMILY_RS600 ||
+ screen->caps.family == CHIP_FAMILY_RS690 ||
+ screen->caps.family == CHIP_FAMILY_RS740)) {
+ unsigned min_stride;
+
+ if (desc->microtile) {
+ unsigned tile_height =
+ r300_get_pixel_alignment(desc->b.b.format,
+ desc->b.b.nr_samples,
+ desc->microtile,
+ desc->macrotile[level],
+ DIM_HEIGHT);
+
+ min_stride = 64 / tile_height;
+ } else {
+ min_stride = 64;
+ }
+
+ return stride < min_stride ? min_stride : stride;
+ }
+
+ /* The alignment to 32 bytes is sort of implied by the layout... */
+ return stride;
+ } else {
+ return align(util_format_get_stride(desc->b.b.format, width), 32);
+ }
+}
+
+static unsigned r300_texture_get_nblocksy(struct r300_texture_desc *desc,
+ unsigned level,
+ boolean *out_aligned_for_cbzb)
+{
+ unsigned height, tile_height;
+
+ height = u_minify(desc->height0, level);
+
+ if (util_format_is_plain(desc->b.b.format)) {
+ tile_height = r300_get_pixel_alignment(desc->b.b.format,
+ desc->b.b.nr_samples,
+ desc->microtile,
+ desc->macrotile[level],
+ DIM_HEIGHT);
+ height = align(height, tile_height);
+
+ /* This is needed for the kernel checker, unfortunately. */
+ if ((desc->b.b.target != PIPE_TEXTURE_1D &&
+ desc->b.b.target != PIPE_TEXTURE_2D &&
+ desc->b.b.target != PIPE_TEXTURE_RECT) ||
+ desc->b.b.last_level != 0) {
+ height = util_next_power_of_two(height);
+ }
+
+ /* See if the CBZB clear can be used on the buffer,
+ * taking the texture size into account. */
+ if (out_aligned_for_cbzb) {
+ if (desc->macrotile[level]) {
+ /* When clearing, the layer (width*height) is horizontally split
+ * into two, and the upper and lower halves are cleared by the CB
+ * and ZB units, respectively. Therefore, the number of macrotiles
+ * in the Y direction must be even. */
+
+ /* Align the height so that there is an even number of macrotiles.
+ * Do so for 3 or more macrotiles in the Y direction. */
+ if (level == 0 && desc->b.b.last_level == 0 &&
+ (desc->b.b.target == PIPE_TEXTURE_1D ||
+ desc->b.b.target == PIPE_TEXTURE_2D ||
+ desc->b.b.target == PIPE_TEXTURE_RECT) &&
+ height >= tile_height * 3) {
+ height = align(height, tile_height * 2);
+ }
+
+ *out_aligned_for_cbzb = height % (tile_height * 2) == 0;
+ } else {
+ *out_aligned_for_cbzb = FALSE;
+ }
+ }
+ }
+
+ return util_format_get_nblocksy(desc->b.b.format, height);
+}
+
+static void r300_texture_3d_fix_mipmapping(struct r300_screen *screen,
+ struct r300_texture_desc *desc)
+{
+ /* The kernels <= 2.6.34-rc4 compute the size of mipmapped 3D textures
+ * incorrectly. This is a workaround to prevent CS from being rejected. */
+
+ unsigned i, size;
+
+ if (!screen->rws->get_value(screen->rws, R300_VID_DRM_2_3_0) &&
+ desc->b.b.target == PIPE_TEXTURE_3D &&
+ desc->b.b.last_level > 0) {
+ size = 0;
+
+ for (i = 0; i <= desc->b.b.last_level; i++) {
+ size += desc->stride_in_bytes[i] *
+ r300_texture_get_nblocksy(desc, i, FALSE);
+ }
+
+ size *= desc->depth0;
+ desc->size_in_bytes = size;
+ }
+}
+
+/* Get a width in pixels from a stride in bytes. */
+static unsigned stride_to_width(enum pipe_format format,
+ unsigned stride_in_bytes)
+{
+ return (stride_in_bytes / util_format_get_blocksize(format)) *
+ util_format_get_blockwidth(format);
+}
+
+static void r300_setup_miptree(struct r300_screen *screen,
+ struct r300_texture_desc *desc,
+ boolean align_for_cbzb)
+{
+ struct pipe_resource *base = &desc->b.b;
+ unsigned stride, size, layer_size, nblocksy, i;
+ boolean rv350_mode = screen->caps.family >= CHIP_FAMILY_R350;
+ boolean aligned_for_cbzb;
+
+ desc->size_in_bytes = 0;
+
+ SCREEN_DBG(screen, DBG_TEXALLOC,
+ "r300: Making miptree for texture, format %s\n",
+ util_format_short_name(base->format));
+
+ for (i = 0; i <= base->last_level; i++) {
+ /* Let's see if this miplevel can be macrotiled. */
+ desc->macrotile[i] =
+ (desc->macrotile[0] == R300_BUFFER_TILED &&
+ r300_texture_macro_switch(desc, i, rv350_mode, DIM_WIDTH) &&
+ r300_texture_macro_switch(desc, i, rv350_mode, DIM_HEIGHT)) ?
+ R300_BUFFER_TILED : R300_BUFFER_LINEAR;
+
+ stride = r300_texture_get_stride(screen, desc, i);
+
+ /* Compute the number of blocks in Y, see if the CBZB clear can be
+ * used on the texture. */
+ aligned_for_cbzb = FALSE;
+ if (align_for_cbzb && desc->cbzb_allowed[i])
+ nblocksy = r300_texture_get_nblocksy(desc, i, &aligned_for_cbzb);
+ else
+ nblocksy = r300_texture_get_nblocksy(desc, i, NULL);
+
+ layer_size = stride * nblocksy;
+
+ if (base->nr_samples) {
+ layer_size *= base->nr_samples;
+ }
+
+ if (base->target == PIPE_TEXTURE_CUBE)
+ size = layer_size * 6;
+ else
+ size = layer_size * u_minify(desc->depth0, i);
+
+ desc->offset_in_bytes[i] = desc->size_in_bytes;
+ desc->size_in_bytes = desc->offset_in_bytes[i] + size;
+ desc->layer_size_in_bytes[i] = layer_size;
+ desc->stride_in_bytes[i] = stride;
+ desc->stride_in_pixels[i] = stride_to_width(desc->b.b.format, stride);
+ desc->cbzb_allowed[i] = desc->cbzb_allowed[i] && aligned_for_cbzb;
+
+ SCREEN_DBG(screen, DBG_TEXALLOC, "r300: Texture miptree: Level %d "
+ "(%dx%dx%d px, pitch %d bytes) %d bytes total, macrotiled %s\n",
+ i, u_minify(desc->width0, i), u_minify(desc->height0, i),
+ u_minify(desc->depth0, i), stride, desc->size_in_bytes,
+ desc->macrotile[i] ? "TRUE" : "FALSE");
+ }
+}
+
+static void r300_setup_flags(struct r300_texture_desc *desc)
+{
+ desc->uses_stride_addressing =
+ !util_is_power_of_two(desc->b.b.width0) ||
+ (desc->stride_in_bytes_override &&
+ stride_to_width(desc->b.b.format,
+ desc->stride_in_bytes_override) != desc->b.b.width0);
+
+ desc->is_npot =
+ desc->uses_stride_addressing ||
+ !util_is_power_of_two(desc->b.b.height0) ||
+ !util_is_power_of_two(desc->b.b.depth0);
+}
+
+static void r300_setup_cbzb_flags(struct r300_screen *rscreen,
+ struct r300_texture_desc *desc)
+{
+ unsigned i, bpp;
+ boolean first_level_valid;
+
+ bpp = util_format_get_blocksizebits(desc->b.b.format);
+
+ /* 1) The texture must be point-sampled,
+ * 2) The depth must be 16 or 32 bits.
+ * 3) If the midpoint ZB offset is not aligned to 2048, it returns garbage
+ * with certain texture sizes. Macrotiling ensures the alignment. */
+ first_level_valid = desc->b.b.nr_samples <= 1 &&
+ (bpp == 16 || bpp == 32) &&
+ desc->macrotile[0];
+
+ if (SCREEN_DBG_ON(rscreen, DBG_NO_CBZB))
+ first_level_valid = FALSE;
+
+ for (i = 0; i <= desc->b.b.last_level; i++)
+ desc->cbzb_allowed[i] = first_level_valid && desc->macrotile[i];
+}
+
+static void r300_setup_tiling(struct r300_screen *screen,
+ struct r300_texture_desc *desc)
+{
+ struct r300_winsys_screen *rws = screen->rws;
+ enum pipe_format format = desc->b.b.format;
+ boolean rv350_mode = screen->caps.family >= CHIP_FAMILY_R350;
+ boolean is_zb = util_format_is_depth_or_stencil(format);
+ boolean dbg_no_tiling = SCREEN_DBG_ON(screen, DBG_NO_TILING);
+
+ if (!util_format_is_plain(format)) {
+ return;
+ }
+
+ /* If height == 1, disable microtiling except for zbuffer. */
+ if (!is_zb && (desc->b.b.height0 == 1 || dbg_no_tiling)) {
+ return;
+ }
+
+ /* Set microtiling. */
+ switch (util_format_get_blocksize(format)) {
+ case 1:
+ case 4:
+ case 8:
+ desc->microtile = R300_BUFFER_TILED;
+ break;
+
+ case 2:
+ if (rws->get_value(rws, R300_VID_SQUARE_TILING_SUPPORT)) {
+ desc->microtile = R300_BUFFER_SQUARETILED;
+ }
+ break;
+ }
+
+ if (dbg_no_tiling) {
+ return;
+ }
+
+ /* Set macrotiling. */
+ if (r300_texture_macro_switch(desc, 0, rv350_mode, DIM_WIDTH) &&
+ r300_texture_macro_switch(desc, 0, rv350_mode, DIM_HEIGHT)) {
+ desc->macrotile[0] = R300_BUFFER_TILED;
+ }
+}
+
+static void r300_tex_print_info(struct r300_screen *rscreen,
+ struct r300_texture_desc *desc,
+ const char *func)
+{
+ fprintf(stderr,
+ "r300: %s: Macro: %s, Micro: %s, Pitch: %i, Dim: %ix%ix%i, "
+ "LastLevel: %i, Size: %i, Format: %s\n",
+ func,
+ desc->macrotile[0] ? "YES" : " NO",
+ desc->microtile ? "YES" : " NO",
+ desc->stride_in_pixels[0],
+ desc->b.b.width0, desc->b.b.height0, desc->b.b.depth0,
+ desc->b.b.last_level, desc->size_in_bytes,
+ util_format_short_name(desc->b.b.format));
+}
+
+boolean r300_texture_desc_init(struct r300_screen *rscreen,
+ struct r300_texture_desc *desc,
+ const struct pipe_resource *base,
+ enum r300_buffer_tiling microtile,
+ enum r300_buffer_tiling macrotile,
+ unsigned stride_in_bytes_override,
+ unsigned max_buffer_size)
+{
+ desc->b.b = *base;
+ desc->b.b.screen = &rscreen->screen;
+ desc->stride_in_bytes_override = stride_in_bytes_override;
+ desc->width0 = base->width0;
+ desc->height0 = base->height0;
+ desc->depth0 = base->depth0;
+
+ r300_setup_flags(desc);
+
+ /* Align a 3D NPOT texture to POT. */
+ if (base->target == PIPE_TEXTURE_3D && desc->is_npot) {
+ desc->width0 = util_next_power_of_two(desc->width0);
+ desc->height0 = util_next_power_of_two(desc->height0);
+ desc->depth0 = util_next_power_of_two(desc->depth0);
+ }
+
+ /* Setup tiling. */
+ if (microtile == R300_BUFFER_SELECT_LAYOUT ||
+ macrotile == R300_BUFFER_SELECT_LAYOUT) {
+ r300_setup_tiling(rscreen, desc);
+ } else {
+ desc->microtile = microtile;
+ desc->macrotile[0] = macrotile;
+ assert(desc->b.b.last_level == 0);
+ }
+
+ r300_setup_cbzb_flags(rscreen, desc);
+
+ /* Setup the miptree description. */
+ r300_setup_miptree(rscreen, desc, TRUE);
+ /* If the required buffer size is larger the given max size,
+ * try again without the alignment for the CBZB clear. */
+ if (max_buffer_size && desc->size_in_bytes > max_buffer_size) {
+ r300_setup_miptree(rscreen, desc, FALSE);
+ }
+
+ r300_texture_3d_fix_mipmapping(rscreen, desc);
+
+ if (max_buffer_size) {
+ /* Make sure the buffer we got is large enough. */
+ if (desc->size_in_bytes > max_buffer_size) {
+ fprintf(stderr, "r300: texture_desc_init: The buffer is not "
+ "large enough. Got: %i, Need: %i, Info:\n",
+ max_buffer_size, desc->size_in_bytes);
+ r300_tex_print_info(rscreen, desc, "texture_desc_init");
+ return FALSE;
+ }
+
+ desc->buffer_size_in_bytes = max_buffer_size;
+ } else {
+ desc->buffer_size_in_bytes = desc->size_in_bytes;
+ }
+
+ if (SCREEN_DBG_ON(rscreen, DBG_TEX))
+ r300_tex_print_info(rscreen, desc, "texture_desc_init");
+
+ return TRUE;
+}
+
+unsigned r300_texture_get_offset(struct r300_texture_desc *desc,
+ unsigned level, unsigned zslice,
+ unsigned face)
+{
+ unsigned offset = desc->offset_in_bytes[level];
+
+ switch (desc->b.b.target) {
+ case PIPE_TEXTURE_3D:
+ assert(face == 0);
+ return offset + zslice * desc->layer_size_in_bytes[level];
+
+ case PIPE_TEXTURE_CUBE:
+ assert(zslice == 0);
+ return offset + face * desc->layer_size_in_bytes[level];
+
+ default:
+ assert(zslice == 0 && face == 0);
+ return offset;
+ }
+}
diff --git a/src/gallium/drivers/r300/r300_texture_desc.h b/src/gallium/drivers/r300/r300_texture_desc.h
new file mode 100644
index 00000000000..3d7fe1fb473
--- /dev/null
+++ b/src/gallium/drivers/r300/r300_texture_desc.h
@@ -0,0 +1,58 @@
+/*
+ * Copyright 2008 Corbin Simpson <[email protected]>
+ * Copyright 2010 Marek Olšák <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE. */
+
+#ifndef R300_TEXTURE_DESC_H
+#define R300_TEXTURE_DESC_H
+
+#include "pipe/p_format.h"
+#include "r300_defines.h"
+
+struct pipe_resource;
+struct r300_screen;
+struct r300_texture_desc;
+struct r300_texture;
+
+enum r300_dim {
+ DIM_WIDTH = 0,
+ DIM_HEIGHT = 1
+};
+
+unsigned r300_get_pixel_alignment(enum pipe_format format,
+ unsigned num_samples,
+ enum r300_buffer_tiling microtile,
+ enum r300_buffer_tiling macrotile,
+ enum r300_dim dim);
+
+boolean r300_texture_desc_init(struct r300_screen *rscreen,
+ struct r300_texture_desc *desc,
+ const struct pipe_resource *base,
+ enum r300_buffer_tiling microtile,
+ enum r300_buffer_tiling macrotile,
+ unsigned stride_in_bytes_override,
+ unsigned max_buffer_size);
+
+unsigned r300_texture_get_offset(struct r300_texture_desc *desc,
+ unsigned level, unsigned zslice,
+ unsigned face);
+
+#endif
diff --git a/src/gallium/drivers/r300/r300_tgsi_to_rc.c b/src/gallium/drivers/r300/r300_tgsi_to_rc.c
index 89f39af9761..a4911b9a2a6 100644
--- a/src/gallium/drivers/r300/r300_tgsi_to_rc.c
+++ b/src/gallium/drivers/r300/r300_tgsi_to_rc.c
@@ -71,7 +71,7 @@ static unsigned translate_opcode(unsigned opcode)
case TGSI_OPCODE_COS: return RC_OPCODE_COS;
case TGSI_OPCODE_DDX: return RC_OPCODE_DDX;
case TGSI_OPCODE_DDY: return RC_OPCODE_DDY;
- /* case TGSI_OPCODE_KILP: return RC_OPCODE_KILP; */
+ case TGSI_OPCODE_KILP: return RC_OPCODE_KILP;
/* case TGSI_OPCODE_PK2H: return RC_OPCODE_PK2H; */
/* case TGSI_OPCODE_PK2US: return RC_OPCODE_PK2US; */
/* case TGSI_OPCODE_PK4B: return RC_OPCODE_PK4B; */
@@ -97,20 +97,20 @@ static unsigned translate_opcode(unsigned opcode)
/* case TGSI_OPCODE_BRA: return RC_OPCODE_BRA; */
/* case TGSI_OPCODE_CAL: return RC_OPCODE_CAL; */
/* case TGSI_OPCODE_RET: return RC_OPCODE_RET; */
- /* case TGSI_OPCODE_SSG: return RC_OPCODE_SSG; */
+ case TGSI_OPCODE_SSG: return RC_OPCODE_SSG;
case TGSI_OPCODE_CMP: return RC_OPCODE_CMP;
case TGSI_OPCODE_SCS: return RC_OPCODE_SCS;
case TGSI_OPCODE_TXB: return RC_OPCODE_TXB;
/* case TGSI_OPCODE_NRM: return RC_OPCODE_NRM; */
/* case TGSI_OPCODE_DIV: return RC_OPCODE_DIV; */
- /* case TGSI_OPCODE_DP2: return RC_OPCODE_DP2; */
+ case TGSI_OPCODE_DP2: return RC_OPCODE_DP2;
case TGSI_OPCODE_TXL: return RC_OPCODE_TXL;
- /* case TGSI_OPCODE_BRK: return RC_OPCODE_BRK; */
+ case TGSI_OPCODE_BRK: return RC_OPCODE_BRK;
case TGSI_OPCODE_IF: return RC_OPCODE_IF;
- /* case TGSI_OPCODE_LOOP: return RC_OPCODE_LOOP; */
+ case TGSI_OPCODE_BGNLOOP: return RC_OPCODE_BGNLOOP;
case TGSI_OPCODE_ELSE: return RC_OPCODE_ELSE;
case TGSI_OPCODE_ENDIF: return RC_OPCODE_ENDIF;
- /* case TGSI_OPCODE_ENDLOOP: return RC_OPCODE_ENDLOOP; */
+ case TGSI_OPCODE_ENDLOOP: return RC_OPCODE_ENDLOOP;
/* case TGSI_OPCODE_PUSHA: return RC_OPCODE_PUSHA; */
/* case TGSI_OPCODE_POPA: return RC_OPCODE_POPA; */
case TGSI_OPCODE_CEIL: return RC_OPCODE_CEIL;
@@ -126,7 +126,7 @@ static unsigned translate_opcode(unsigned opcode)
/* case TGSI_OPCODE_SAD: return RC_OPCODE_SAD; */
/* case TGSI_OPCODE_TXF: return RC_OPCODE_TXF; */
/* case TGSI_OPCODE_TXQ: return RC_OPCODE_TXQ; */
- /* case TGSI_OPCODE_CONT: return RC_OPCODE_CONT; */
+ case TGSI_OPCODE_CONT: return RC_OPCODE_CONT;
/* case TGSI_OPCODE_EMIT: return RC_OPCODE_EMIT; */
/* case TGSI_OPCODE_ENDPRIM: return RC_OPCODE_ENDPRIM; */
/* case TGSI_OPCODE_BGNLOOP2: return RC_OPCODE_BGNLOOP2; */
diff --git a/src/gallium/drivers/r300/r300_transfer.c b/src/gallium/drivers/r300/r300_transfer.c
index 0dae9ef98b7..e9333b35ef5 100644
--- a/src/gallium/drivers/r300/r300_transfer.c
+++ b/src/gallium/drivers/r300/r300_transfer.c
@@ -21,12 +21,9 @@
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
* USE OR OTHER DEALINGS IN THE SOFTWARE. */
-#include "r300_context.h"
#include "r300_transfer.h"
-#include "r300_texture.h"
-#include "r300_screen.h"
-
-#include "r300_winsys.h"
+#include "r300_texture_desc.h"
+#include "r300_screen_buffer.h"
#include "util/u_memory.h"
#include "util/u_format.h"
@@ -38,11 +35,8 @@ struct r300_transfer {
/* Offset from start of buffer. */
unsigned offset;
- /* Detiled texture. */
- struct r300_texture *detiled_texture;
-
- /* Transfer and format flags. */
- unsigned render_target_usage;
+ /* Linear texture. */
+ struct r300_texture *linear_texture;
};
/* Convenience cast wrapper. */
@@ -56,63 +50,38 @@ r300_transfer(struct pipe_transfer* transfer)
static void r300_copy_from_tiled_texture(struct pipe_context *ctx,
struct r300_transfer *r300transfer)
{
- struct pipe_screen *screen = ctx->screen;
struct pipe_transfer *transfer = (struct pipe_transfer*)r300transfer;
struct pipe_resource *tex = transfer->resource;
- struct pipe_surface *src, *dst;
-
- src = screen->get_tex_surface(screen, tex,
- transfer->sr.face,
- transfer->sr.level,
- transfer->box.z,
- PIPE_BIND_BLIT_SOURCE);
+ struct pipe_subresource subdst;
- dst = screen->get_tex_surface(screen, &r300transfer->detiled_texture->b.b,
- 0, 0, 0,
- PIPE_BIND_BLIT_DESTINATION);
+ subdst.face = 0;
+ subdst.level = 0;
- ctx->surface_copy(ctx, dst, 0, 0, src,
- transfer->box.x, transfer->box.y,
- transfer->box.width, transfer->box.height);
-
- pipe_surface_reference(&src, NULL);
- pipe_surface_reference(&dst, NULL);
+ ctx->resource_copy_region(ctx, &r300transfer->linear_texture->desc.b.b, subdst,
+ 0, 0, 0,
+ tex, transfer->sr,
+ transfer->box.x, transfer->box.y, transfer->box.z,
+ transfer->box.width, transfer->box.height);
}
/* Copy a detiled texture to a tiled one. */
static void r300_copy_into_tiled_texture(struct pipe_context *ctx,
struct r300_transfer *r300transfer)
{
- struct pipe_screen *screen = ctx->screen;
struct pipe_transfer *transfer = (struct pipe_transfer*)r300transfer;
struct pipe_resource *tex = transfer->resource;
- struct pipe_surface *src, *dst;
-
- src = screen->get_tex_surface(screen, &r300transfer->detiled_texture->b.b,
- 0, 0, 0,
- PIPE_BIND_BLIT_SOURCE);
+ struct pipe_subresource subsrc;
- dst = screen->get_tex_surface(screen, tex,
- transfer->sr.face,
- transfer->sr.level,
- transfer->box.z,
- PIPE_BIND_BLIT_DESTINATION);
+ subsrc.face = 0;
+ subsrc.level = 0;
- /* XXX this flush prevents the following DRM error from occuring:
- * [drm:radeon_cs_ioctl] *ERROR* Failed to parse relocation !
- * Reproducible with perf/copytex. */
- ctx->flush(ctx, 0, NULL);
-
- ctx->surface_copy(ctx, dst,
- transfer->box.x, transfer->box.y,
- src, 0, 0,
- transfer->box.width, transfer->box.height);
+ ctx->resource_copy_region(ctx, tex, transfer->sr,
+ transfer->box.x, transfer->box.y, transfer->box.z,
+ &r300transfer->linear_texture->desc.b.b, subsrc,
+ 0, 0, 0,
+ transfer->box.width, transfer->box.height);
- /* XXX this flush fixes a few piglit tests (e.g. glean/pixelFormats). */
ctx->flush(ctx, 0, NULL);
-
- pipe_surface_reference(&src, NULL);
- pipe_surface_reference(&dst, NULL);
}
struct pipe_transfer*
@@ -122,10 +91,26 @@ r300_texture_get_transfer(struct pipe_context *ctx,
unsigned usage,
const struct pipe_box *box)
{
+ struct r300_context *r300 = r300_context(ctx);
struct r300_texture *tex = r300_texture(texture);
- struct r300_screen *r300screen = r300_screen(ctx->screen);
struct r300_transfer *trans;
struct pipe_resource base;
+ boolean referenced_cs, referenced_hw, blittable;
+
+ referenced_cs =
+ r300->rws->cs_is_buffer_referenced(r300->cs,
+ tex->buffer, R300_REF_CS);
+ if (referenced_cs) {
+ referenced_hw = TRUE;
+ } else {
+ referenced_hw =
+ r300->rws->cs_is_buffer_referenced(r300->cs,
+ tex->buffer, R300_REF_HW);
+ }
+
+ blittable = ctx->screen->is_format_supported(
+ ctx->screen, texture->format, texture->target, 0,
+ PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_RENDER_TARGET, 0);
trans = CALLOC_STRUCT(r300_transfer);
if (trans) {
@@ -136,13 +121,10 @@ r300_texture_get_transfer(struct pipe_context *ctx,
trans->transfer.box = *box;
/* If the texture is tiled, we must create a temporary detiled texture
- * for this transfer. */
- if (tex->microtile || tex->macrotile) {
- trans->render_target_usage =
- util_format_is_depth_or_stencil(texture->format) ?
- PIPE_BIND_DEPTH_STENCIL :
- PIPE_BIND_RENDER_TARGET;
-
+ * for this transfer.
+ * Also make write transfers pipelined. */
+ if (tex->desc.microtile || tex->desc.macrotile[sr.level] ||
+ ((referenced_hw & !(usage & PIPE_TRANSFER_READ)) && blittable)) {
base.target = PIPE_TEXTURE_2D;
base.format = texture->format;
base.width0 = box->width;
@@ -157,7 +139,7 @@ r300_texture_get_transfer(struct pipe_context *ctx,
/* For texture reading, the temporary (detiled) texture is used as
* a render target when blitting from a tiled texture. */
if (usage & PIPE_TRANSFER_READ) {
- base.bind |= trans->render_target_usage;
+ base.bind |= PIPE_BIND_RENDER_TARGET;
}
/* For texture writing, the temporary texture is used as a sampler
* when blitting into a tiled texture. */
@@ -166,12 +148,36 @@ r300_texture_get_transfer(struct pipe_context *ctx,
}
/* Create the temporary texture. */
- trans->detiled_texture = r300_texture(
+ trans->linear_texture = r300_texture(
ctx->screen->resource_create(ctx->screen,
&base));
- assert(!trans->detiled_texture->microtile &&
- !trans->detiled_texture->macrotile);
+ if (!trans->linear_texture) {
+ /* Oh crap, the thing can't create the texture.
+ * Let's flush and try again. */
+ ctx->flush(ctx, 0, NULL);
+
+ trans->linear_texture = r300_texture(
+ ctx->screen->resource_create(ctx->screen,
+ &base));
+
+ if (!trans->linear_texture) {
+ /* For linear textures, it's safe to fallback to
+ * an unpipelined transfer. */
+ if (!tex->desc.microtile && !tex->desc.macrotile[sr.level]) {
+ goto unpipelined;
+ }
+
+ /* Otherwise, go to hell. */
+ fprintf(stderr,
+ "r300: Failed to create a transfer object, praise.\n");
+ FREE(trans);
+ return NULL;
+ }
+ }
+
+ assert(!trans->linear_texture->desc.microtile &&
+ !trans->linear_texture->desc.macrotile[0]);
/* Set the stride.
*
@@ -181,20 +187,30 @@ r300_texture_get_transfer(struct pipe_context *ctx,
* right thing internally.
*/
trans->transfer.stride =
- r300_texture_get_stride(r300screen, trans->detiled_texture, 0);
+ trans->linear_texture->desc.stride_in_bytes[0];
if (usage & PIPE_TRANSFER_READ) {
/* We cannot map a tiled texture directly because the data is
* in a different order, therefore we do detiling using a blit. */
r300_copy_from_tiled_texture(ctx, trans);
+
+ /* Always referenced in the blit. */
+ ctx->flush(ctx, 0, NULL);
}
- } else {
- trans->transfer.stride =
- r300_texture_get_stride(r300screen, tex, sr.level);
- trans->offset = r300_texture_get_offset(tex, sr.level, box->z, sr.face);
+ return &trans->transfer;
}
+
+ unpipelined:
+ /* Unpipelined transfer. */
+ trans->transfer.stride = tex->desc.stride_in_bytes[sr.level];
+ trans->offset = r300_texture_get_offset(&tex->desc,
+ sr.level, box->z, sr.face);
+
+ if (referenced_cs)
+ ctx->flush(ctx, PIPE_FLUSH_RENDER_CACHE, NULL);
+ return &trans->transfer;
}
- return &trans->transfer;
+ return NULL;
}
void r300_texture_transfer_destroy(struct pipe_context *ctx,
@@ -202,13 +218,13 @@ void r300_texture_transfer_destroy(struct pipe_context *ctx,
{
struct r300_transfer *r300transfer = r300_transfer(trans);
- if (r300transfer->detiled_texture) {
+ if (r300transfer->linear_texture) {
if (trans->usage & PIPE_TRANSFER_WRITE) {
r300_copy_into_tiled_texture(ctx, r300transfer);
}
pipe_resource_reference(
- (struct pipe_resource**)&r300transfer->detiled_texture, NULL);
+ (struct pipe_resource**)&r300transfer->linear_texture, NULL);
}
pipe_resource_reference(&trans->resource, NULL);
FREE(trans);
@@ -217,21 +233,23 @@ void r300_texture_transfer_destroy(struct pipe_context *ctx,
void* r300_texture_transfer_map(struct pipe_context *ctx,
struct pipe_transfer *transfer)
{
+ struct r300_context *r300 = r300_context(ctx);
struct r300_winsys_screen *rws = (struct r300_winsys_screen *)ctx->winsys;
struct r300_transfer *r300transfer = r300_transfer(transfer);
struct r300_texture *tex = r300_texture(transfer->resource);
char *map;
- enum pipe_format format = tex->b.b.format;
+ enum pipe_format format = tex->desc.b.b.format;
- if (r300transfer->detiled_texture) {
+ if (r300transfer->linear_texture) {
/* The detiled texture is of the same size as the region being mapped
* (no offset needed). */
return rws->buffer_map(rws,
- r300transfer->detiled_texture->buffer,
+ r300transfer->linear_texture->buffer,
+ r300->cs,
transfer->usage);
} else {
/* Tiling is disabled. */
- map = rws->buffer_map(rws, tex->buffer,
+ map = rws->buffer_map(rws, tex->buffer, r300->cs,
transfer->usage);
if (!map) {
@@ -251,10 +269,9 @@ void r300_texture_transfer_unmap(struct pipe_context *ctx,
struct r300_transfer *r300transfer = r300_transfer(transfer);
struct r300_texture *tex = r300_texture(transfer->resource);
- if (r300transfer->detiled_texture) {
- rws->buffer_unmap(rws, r300transfer->detiled_texture->buffer);
+ if (r300transfer->linear_texture) {
+ rws->buffer_unmap(rws, r300transfer->linear_texture->buffer);
} else {
rws->buffer_unmap(rws, tex->buffer);
}
}
-
diff --git a/src/gallium/drivers/r300/r300_transfer.h b/src/gallium/drivers/r300/r300_transfer.h
index d72e54e5ed9..0d32a68d1fa 100644
--- a/src/gallium/drivers/r300/r300_transfer.h
+++ b/src/gallium/drivers/r300/r300_transfer.h
@@ -24,7 +24,7 @@
#ifndef R300_TRANSFER
#define R300_TRANSFER
-#include "pipe/p_screen.h"
+#include "pipe/p_context.h"
struct r300_context;
diff --git a/src/gallium/drivers/r300/r300_vs.c b/src/gallium/drivers/r300/r300_vs.c
index bfab9c3b014..e2b9af9d018 100644
--- a/src/gallium/drivers/r300/r300_vs.c
+++ b/src/gallium/drivers/r300/r300_vs.c
@@ -22,7 +22,6 @@
* USE OR OTHER DEALINGS IN THE SOFTWARE. */
#include "r300_vs.h"
-#include "r300_fs.h"
#include "r300_context.h"
#include "r300_screen.h"
@@ -94,94 +93,6 @@ static void r300_shader_read_vs_outputs(
vs_outputs->wpos = i;
}
-/* This function sets up:
- * - VAP mapping, which maps VS registers to output semantics and
- * at the same time it indicates which attributes are enabled and should
- * be rasterized.
- * - Stream mapping to VS outputs if TCL is not present. */
-static void r300_init_vs_output_mapping(struct r300_vertex_shader* vs)
-{
- struct r300_shader_semantics* vs_outputs = &vs->outputs;
- struct r300_vap_output_state *vap_out = &vs->vap_out;
- int *stream_loc = vs->stream_loc_notcl;
- int i, gen_count, tabi = 0;
- boolean any_bcolor_used = vs_outputs->bcolor[0] != ATTR_UNUSED ||
- vs_outputs->bcolor[1] != ATTR_UNUSED;
-
- vap_out->vap_vtx_state_cntl = 0x5555; /* XXX this is classic Mesa bonghits */
-
- /* Position. */
- if (vs_outputs->pos != ATTR_UNUSED) {
- vap_out->vap_vsm_vtx_assm |= R300_INPUT_CNTL_POS;
- vap_out->vap_out_vtx_fmt[0] |= R300_VAP_OUTPUT_VTX_FMT_0__POS_PRESENT;
-
- stream_loc[tabi++] = 0;
- } else {
- assert(0);
- }
-
- /* Point size. */
- if (vs_outputs->psize != ATTR_UNUSED) {
- vap_out->vap_out_vtx_fmt[0] |= R300_VAP_OUTPUT_VTX_FMT_0__PT_SIZE_PRESENT;
-
- stream_loc[tabi++] = 1;
- }
-
- /* Colors. */
- for (i = 0; i < ATTR_COLOR_COUNT; i++) {
- if (vs_outputs->color[i] != ATTR_UNUSED || any_bcolor_used ||
- vs_outputs->color[1] != ATTR_UNUSED) {
- vap_out->vap_vsm_vtx_assm |= R300_INPUT_CNTL_COLOR;
- vap_out->vap_out_vtx_fmt[0] |= R300_VAP_OUTPUT_VTX_FMT_0__COLOR_0_PRESENT << i;
-
- stream_loc[tabi++] = 2 + i;
- }
- }
-
- /* Back-face colors. */
- if (any_bcolor_used) {
- for (i = 0; i < ATTR_COLOR_COUNT; i++) {
- vap_out->vap_vsm_vtx_assm |= R300_INPUT_CNTL_COLOR;
- vap_out->vap_out_vtx_fmt[0] |= R300_VAP_OUTPUT_VTX_FMT_0__COLOR_0_PRESENT << (2+i);
-
- stream_loc[tabi++] = 4 + i;
- }
- }
-
- /* Texture coordinates. */
- gen_count = 0;
- for (i = 0; i < ATTR_GENERIC_COUNT && gen_count < 8; i++) {
- if (vs_outputs->generic[i] != ATTR_UNUSED) {
- vap_out->vap_vsm_vtx_assm |= (R300_INPUT_CNTL_TC0 << gen_count);
- vap_out->vap_out_vtx_fmt[1] |= (4 << (3 * gen_count));
-
- stream_loc[tabi++] = 6 + gen_count;
- gen_count++;
- }
- }
-
- /* Fog coordinates. */
- if (gen_count < 8 && vs_outputs->fog != ATTR_UNUSED) {
- vap_out->vap_vsm_vtx_assm |= (R300_INPUT_CNTL_TC0 << gen_count);
- vap_out->vap_out_vtx_fmt[1] |= (4 << (3 * gen_count));
-
- stream_loc[tabi++] = 6 + gen_count;
- gen_count++;
- }
-
- /* WPOS. */
- if (gen_count < 8) {
- vs->wpos_tex_output = gen_count;
- stream_loc[tabi++] = 6 + gen_count;
- } else {
- vs_outputs->wpos = ATTR_UNUSED;
- }
-
- for (; tabi < 16;) {
- stream_loc[tabi++] = -1;
- }
-}
-
static void set_vertex_inputs_outputs(struct r300_vertex_program_compiler * c)
{
struct r300_vertex_shader * vs = c->UserData;
@@ -246,16 +157,19 @@ static void set_vertex_inputs_outputs(struct r300_vertex_program_compiler * c)
}
/* WPOS. */
- if (outputs->wpos != ATTR_UNUSED) {
- c->code->outputs[outputs->wpos] = reg++;
- }
+ c->code->outputs[outputs->wpos] = reg++;
+}
+
+void r300_init_vs_outputs(struct r300_vertex_shader *vs)
+{
+ tgsi_scan_shader(vs->state.tokens, &vs->info);
+ r300_shader_read_vs_outputs(&vs->info, &vs->outputs);
}
static void r300_dummy_vertex_shader(
struct r300_context* r300,
struct r300_vertex_shader* shader)
{
- struct pipe_shader_state state;
struct ureg_program *ureg;
struct ureg_dst dst;
struct ureg_src imm;
@@ -269,35 +183,40 @@ static void r300_dummy_vertex_shader(
ureg_MOV(ureg, dst, imm);
ureg_END(ureg);
- state.tokens = ureg_finalize(ureg);
+ shader->state.tokens = tgsi_dup_tokens(ureg_finalize(ureg));
+ ureg_destroy(ureg);
shader->dummy = TRUE;
- r300_translate_vertex_shader(r300, shader, state.tokens);
-
- ureg_destroy(ureg);
+ r300_init_vs_outputs(shader);
+ r300_translate_vertex_shader(r300, shader);
}
-void r300_translate_vertex_shader(struct r300_context* r300,
- struct r300_vertex_shader* vs,
- const struct tgsi_token *tokens)
+void r300_translate_vertex_shader(struct r300_context *r300,
+ struct r300_vertex_shader *vs)
{
struct r300_vertex_program_compiler compiler;
struct tgsi_to_rc ttr;
-
- tgsi_scan_shader(tokens, &vs->info);
- r300_shader_read_vs_outputs(&vs->info, &vs->outputs);
- r300_init_vs_output_mapping(vs);
+ unsigned i;
/* Setup the compiler */
+ memset(&compiler, 0, sizeof(compiler));
rc_init(&compiler.Base);
compiler.Base.Debug = DBG_ON(r300, DBG_VP);
compiler.code = &vs->code;
compiler.UserData = vs;
+ compiler.Base.is_r500 = r300->screen->caps.is_r500;
+ compiler.Base.disable_optimizations = DBG_ON(r300, DBG_NO_OPT);
+ compiler.Base.has_half_swizzles = FALSE;
+ compiler.Base.has_presub = FALSE;
+ compiler.Base.max_temp_regs = 32;
+ compiler.Base.max_constants = 256;
+ compiler.Base.max_alu_insts = r300->screen->caps.is_r500 ? 1024 : 256;
+ compiler.Base.remove_unused_constants = TRUE;
if (compiler.Base.Debug) {
- debug_printf("r300: Initial vertex program\n");
- tgsi_dump(tokens, 0);
+ DBG(r300, DBG_VP, "r300: Initial vertex program\n");
+ tgsi_dump(vs->state.tokens, 0);
}
/* Translate TGSI to our internal representation */
@@ -305,23 +224,17 @@ void r300_translate_vertex_shader(struct r300_context* r300,
ttr.info = &vs->info;
ttr.use_half_swizzles = FALSE;
- r300_tgsi_to_rc(&ttr, tokens);
-
- compiler.RequiredOutputs =
- ~(~0 << (vs->info.num_outputs +
- (vs->outputs.wpos != ATTR_UNUSED ? 1 : 0)));
+ r300_tgsi_to_rc(&ttr, vs->state.tokens);
+ compiler.RequiredOutputs = ~(~0 << (vs->info.num_outputs + 1));
compiler.SetHwInputOutput = &set_vertex_inputs_outputs;
/* Insert the WPOS output. */
- if (vs->outputs.wpos != ATTR_UNUSED) {
- rc_copy_output(&compiler.Base, 0, vs->outputs.wpos);
- }
+ rc_copy_output(&compiler.Base, 0, vs->outputs.wpos);
/* Invoke the compiler */
r3xx_compile_vertex_program(&compiler);
if (compiler.Base.Error) {
- /* XXX We should fallback using Draw. */
fprintf(stderr, "r300 VP: Compiler error:\n%sUsing a dummy shader"
" instead.\n", compiler.Base.ErrorMsg);
@@ -337,38 +250,17 @@ void r300_translate_vertex_shader(struct r300_context* r300,
}
/* Initialize numbers of constants for each type. */
- vs->externals_count = ttr.immediate_offset;
+ vs->externals_count = 0;
+ for (i = 0;
+ i < vs->code.constants.Count &&
+ vs->code.constants.Constants[i].Type == RC_CONSTANT_EXTERNAL; i++) {
+ vs->externals_count = i+1;
+ }
+ for (; i < vs->code.constants.Count; i++) {
+ assert(vs->code.constants.Constants[i].Type == RC_CONSTANT_IMMEDIATE);
+ }
vs->immediates_count = vs->code.constants.Count - vs->externals_count;
/* And, finally... */
rc_destroy(&compiler.Base);
}
-
-boolean r300_vertex_shader_setup_wpos(struct r300_context* r300)
-{
- struct r300_vertex_shader* vs = r300->vs_state.state;
- struct r300_vap_output_state *vap_out = &vs->vap_out;
- int tex_output = vs->wpos_tex_output;
- uint32_t tex_fmt = R300_INPUT_CNTL_TC0 << tex_output;
-
- if (vs->outputs.wpos == ATTR_UNUSED) {
- return FALSE;
- }
-
- if (r300_fs(r300)->shader->inputs.wpos != ATTR_UNUSED) {
- /* Enable WPOS in VAP. */
- if (!(vap_out->vap_vsm_vtx_assm & tex_fmt)) {
- vap_out->vap_vsm_vtx_assm |= tex_fmt;
- vap_out->vap_out_vtx_fmt[1] |= (4 << (3 * tex_output));
- return TRUE;
- }
- } else {
- /* Disable WPOS in VAP. */
- if (vap_out->vap_vsm_vtx_assm & tex_fmt) {
- vap_out->vap_vsm_vtx_assm &= ~tex_fmt;
- vap_out->vap_out_vtx_fmt[1] &= ~(4 << (3 * tex_output));
- return TRUE;
- }
- }
- return FALSE;
-}
diff --git a/src/gallium/drivers/r300/r300_vs.h b/src/gallium/drivers/r300/r300_vs.h
index 56bcc3b70b8..170de6c79db 100644
--- a/src/gallium/drivers/r300/r300_vs.h
+++ b/src/gallium/drivers/r300/r300_vs.h
@@ -39,7 +39,6 @@ struct r300_vertex_shader {
struct tgsi_shader_info info;
struct r300_shader_semantics outputs;
- struct r300_vap_output_state vap_out;
/* Whether the shader was replaced by a dummy one due to a shader
* compilation failure. */
@@ -49,12 +48,6 @@ struct r300_vertex_shader {
unsigned externals_count;
unsigned immediates_count;
- /* Stream locations for SWTCL or if TCL is bypassed. */
- int stream_loc_notcl[16];
-
- /* Output stream location for WPOS. */
- int wpos_tex_output;
-
/* HWTCL-specific. */
/* Machine code (if translated) */
struct r300_vertex_program_code code;
@@ -63,11 +56,12 @@ struct r300_vertex_shader {
void *draw_vs;
};
-void r300_translate_vertex_shader(struct r300_context* r300,
- struct r300_vertex_shader* vs,
- const struct tgsi_token *tokens);
+void r300_init_vs_outputs(struct r300_vertex_shader *vs);
+
+void r300_translate_vertex_shader(struct r300_context *r300,
+ struct r300_vertex_shader *vs);
-/* Return TRUE if VAP (hwfmt) needs to be re-emitted. */
-boolean r300_vertex_shader_setup_wpos(struct r300_context* r300);
+void r300_draw_init_vertex_shader(struct draw_context *draw,
+ struct r300_vertex_shader *vs);
#endif /* R300_VS_H */
diff --git a/src/gallium/drivers/r300/r300_vs_draw.c b/src/gallium/drivers/r300/r300_vs_draw.c
new file mode 100644
index 00000000000..2939963c355
--- /dev/null
+++ b/src/gallium/drivers/r300/r300_vs_draw.c
@@ -0,0 +1,358 @@
+/**************************************************************************
+ *
+ * Copyright 2009 Marek Olšák <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/* This file contains the vertex shader tranformations for SW TCL needed
+ * to overcome the limitations of the r300 rasterizer.
+ *
+ * Transformations:
+ * 1) If the secondary color output is present, the primary color must be
+ * inserted before it.
+ * 2) If any back-face color output is present, there must be all 4 color
+ * outputs and missing ones must be inserted.
+ * 3) Insert a trailing texcoord output containing a copy of POS, for WPOS.
+ *
+ * I know this code is cumbersome, but I don't know of any nicer way
+ * of transforming TGSI shaders. ~ M.
+ */
+
+#include "r300_vs.h"
+
+#include <stdio.h>
+
+#include "tgsi/tgsi_transform.h"
+#include "tgsi/tgsi_dump.h"
+
+#include "draw/draw_context.h"
+
+struct vs_transform_context {
+ struct tgsi_transform_context base;
+
+ boolean color_used[2];
+ boolean bcolor_used[2];
+ boolean temp_used[128];
+
+ /* Index of the pos output, typically 0. */
+ unsigned pos_output;
+ /* Index of the pos temp where all writes of pos are redirected to. */
+ unsigned pos_temp;
+ /* The index of the last generic output, after which we insert a new
+ * output for WPOS. */
+ int last_generic;
+
+ unsigned num_outputs;
+ /* Used to shift output decl. indices when inserting new ones. */
+ unsigned decl_shift;
+ /* Used to remap writes to output decls if their indices changed. */
+ unsigned out_remap[32];
+
+ /* First instruction processed? */
+ boolean first_instruction;
+ /* End instruction processed? */
+ boolean end_instruction;
+};
+
+static void emit_temp(struct tgsi_transform_context *ctx, unsigned reg)
+{
+ struct tgsi_full_declaration decl;
+
+ decl = tgsi_default_full_declaration();
+ decl.Declaration.File = TGSI_FILE_TEMPORARY;
+ decl.Range.First = decl.Range.Last = reg;
+ ctx->emit_declaration(ctx, &decl);
+}
+
+static void emit_output(struct tgsi_transform_context *ctx,
+ unsigned name, unsigned index, unsigned interp,
+ unsigned reg)
+{
+ struct vs_transform_context *vsctx = (struct vs_transform_context *)ctx;
+ struct tgsi_full_declaration decl;
+
+ decl = tgsi_default_full_declaration();
+ decl.Declaration.File = TGSI_FILE_OUTPUT;
+ decl.Declaration.Interpolate = interp;
+ decl.Declaration.Semantic = TRUE;
+ decl.Semantic.Name = name;
+ decl.Semantic.Index = index;
+ decl.Range.First = decl.Range.Last = reg;
+ ctx->emit_declaration(ctx, &decl);
+ ++vsctx->num_outputs;
+}
+
+static void insert_output(struct tgsi_transform_context *ctx,
+ struct tgsi_full_declaration *before,
+ unsigned name, unsigned index, unsigned interp)
+{
+ struct vs_transform_context *vsctx = (struct vs_transform_context *)ctx;
+ unsigned i;
+
+ /* Make a place for the new output. */
+ for (i = before->Range.First; i < Elements(vsctx->out_remap); i++) {
+ ++vsctx->out_remap[i];
+ }
+
+ /* Insert the new output. */
+ emit_output(ctx, name, index, interp, before->Range.First);
+
+ ++vsctx->decl_shift;
+}
+
+static void insert_trailing_bcolor(struct tgsi_transform_context *ctx,
+ struct tgsi_full_declaration *before)
+{
+ struct vs_transform_context *vsctx = (struct vs_transform_context *)ctx;
+
+ /* If BCOLOR0 is used, make sure BCOLOR1 is present too. Otherwise
+ * the rasterizer doesn't do the color selection correctly. */
+ if (vsctx->bcolor_used[0] && !vsctx->bcolor_used[1]) {
+ if (before) {
+ insert_output(ctx, before, TGSI_SEMANTIC_BCOLOR, 1,
+ TGSI_INTERPOLATE_LINEAR);
+ } else {
+ emit_output(ctx, TGSI_SEMANTIC_BCOLOR, 1,
+ TGSI_INTERPOLATE_LINEAR, vsctx->num_outputs);
+ }
+ vsctx->bcolor_used[1] = TRUE;
+ }
+}
+
+static void transform_decl(struct tgsi_transform_context *ctx,
+ struct tgsi_full_declaration *decl)
+{
+ struct vs_transform_context *vsctx = (struct vs_transform_context *)ctx;
+ unsigned i;
+
+ if (decl->Declaration.File == TGSI_FILE_OUTPUT) {
+ switch (decl->Semantic.Name) {
+ case TGSI_SEMANTIC_POSITION:
+ vsctx->pos_output = decl->Range.First;
+ break;
+
+ case TGSI_SEMANTIC_COLOR:
+ assert(decl->Semantic.Index < 2);
+ vsctx->color_used[decl->Semantic.Index] = TRUE;
+
+ /* We must rasterize the first color if the second one is
+ * used, otherwise the rasterizer doesn't do the color
+ * selection correctly. Declare it, but don't write to it. */
+ if (decl->Semantic.Index == 1 && !vsctx->color_used[0]) {
+ insert_output(ctx, decl, TGSI_SEMANTIC_COLOR, 0,
+ TGSI_INTERPOLATE_LINEAR);
+ vsctx->color_used[0] = TRUE;
+ }
+ break;
+
+ case TGSI_SEMANTIC_BCOLOR:
+ assert(decl->Semantic.Index < 2);
+ vsctx->bcolor_used[decl->Semantic.Index] = TRUE;
+
+ /* We must rasterize all 4 colors if back-face colors are
+ * used, otherwise the rasterizer doesn't do the color
+ * selection correctly. Declare it, but don't write to it. */
+ if (!vsctx->color_used[0]) {
+ insert_output(ctx, decl, TGSI_SEMANTIC_COLOR, 0,
+ TGSI_INTERPOLATE_LINEAR);
+ vsctx->color_used[0] = TRUE;
+ }
+ if (!vsctx->color_used[1]) {
+ insert_output(ctx, decl, TGSI_SEMANTIC_COLOR, 1,
+ TGSI_INTERPOLATE_LINEAR);
+ vsctx->color_used[1] = TRUE;
+ }
+ if (decl->Semantic.Index == 1 && !vsctx->bcolor_used[0]) {
+ insert_output(ctx, decl, TGSI_SEMANTIC_BCOLOR, 0,
+ TGSI_INTERPOLATE_LINEAR);
+ vsctx->bcolor_used[0] = TRUE;
+ }
+ /* One more case is handled in insert_trailing_bcolor. */
+ break;
+
+ case TGSI_SEMANTIC_GENERIC:
+ vsctx->last_generic = MAX2(vsctx->last_generic, decl->Semantic.Index);
+ break;
+ }
+
+ if (decl->Semantic.Name != TGSI_SEMANTIC_BCOLOR) {
+ /* Insert it as soon as possible. */
+ insert_trailing_bcolor(ctx, decl);
+ }
+
+ /* Since we're inserting new outputs in between, the following outputs
+ * should be moved to the right so that they don't overlap with
+ * the newly added ones. */
+ decl->Range.First += vsctx->decl_shift;
+ decl->Range.Last += vsctx->decl_shift;
+
+ ++vsctx->num_outputs;
+ } else if (decl->Declaration.File == TGSI_FILE_TEMPORARY) {
+ for (i = decl->Range.First; i <= decl->Range.Last; i++) {
+ vsctx->temp_used[i] = TRUE;
+ }
+ }
+
+ ctx->emit_declaration(ctx, decl);
+}
+
+static void transform_inst(struct tgsi_transform_context *ctx,
+ struct tgsi_full_instruction *inst)
+{
+ struct vs_transform_context *vsctx = (struct vs_transform_context *) ctx;
+ struct tgsi_full_instruction new_inst;
+ unsigned i;
+
+ if (!vsctx->first_instruction) {
+ vsctx->first_instruction = TRUE;
+
+ /* The trailing BCOLOR should be inserted before the code
+ * if it hasn't already been done so. */
+ insert_trailing_bcolor(ctx, NULL);
+
+ /* Insert the generic output for WPOS. */
+ emit_output(ctx, TGSI_SEMANTIC_GENERIC, vsctx->last_generic + 1,
+ TGSI_INTERPOLATE_PERSPECTIVE, vsctx->num_outputs);
+
+ /* Find a free temp for POSITION. */
+ for (i = 0; i < Elements(vsctx->temp_used); i++) {
+ if (!vsctx->temp_used[i]) {
+ emit_temp(ctx, i);
+ vsctx->pos_temp = i;
+ break;
+ }
+ }
+ }
+
+ if (inst->Instruction.Opcode == TGSI_OPCODE_END) {
+ /* MOV OUT[pos_output], TEMP[pos_temp]; */
+ new_inst = tgsi_default_full_instruction();
+ new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;
+ new_inst.Instruction.NumDstRegs = 1;
+ new_inst.Dst[0].Register.File = TGSI_FILE_OUTPUT;
+ new_inst.Dst[0].Register.Index = vsctx->pos_output;
+ new_inst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_XYZW;
+ new_inst.Instruction.NumSrcRegs = 1;
+ new_inst.Src[0].Register.File = TGSI_FILE_TEMPORARY;
+ new_inst.Src[0].Register.Index = vsctx->pos_temp;
+ ctx->emit_instruction(ctx, &new_inst);
+
+ /* MOV OUT[n-1], TEMP[pos_temp]; */
+ new_inst = tgsi_default_full_instruction();
+ new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;
+ new_inst.Instruction.NumDstRegs = 1;
+ new_inst.Dst[0].Register.File = TGSI_FILE_OUTPUT;
+ new_inst.Dst[0].Register.Index = vsctx->num_outputs - 1;
+ new_inst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_XYZW;
+ new_inst.Instruction.NumSrcRegs = 1;
+ new_inst.Src[0].Register.File = TGSI_FILE_TEMPORARY;
+ new_inst.Src[0].Register.Index = vsctx->pos_temp;
+ ctx->emit_instruction(ctx, &new_inst);
+
+ vsctx->end_instruction = TRUE;
+ } else {
+ /* Not an END instruction. */
+ /* Fix writes to outputs. */
+ for (i = 0; i < inst->Instruction.NumDstRegs; i++) {
+ struct tgsi_full_dst_register *dst = &inst->Dst[i];
+ if (dst->Register.File == TGSI_FILE_OUTPUT) {
+ if (dst->Register.Index == vsctx->pos_output) {
+ /* Replace writes to OUT[pos_output] with TEMP[pos_temp]. */
+ dst->Register.File = TGSI_FILE_TEMPORARY;
+ dst->Register.Index = vsctx->pos_temp;
+ } else {
+ /* Not a position, good...
+ * Since we were changing the indices of output decls,
+ * we must redirect writes into them too. */
+ dst->Register.Index = vsctx->out_remap[dst->Register.Index];
+ }
+ }
+ }
+
+ /* Inserting 2 instructions before the END opcode moves all following
+ * labels by 2. Subroutines are always after the END opcode so
+ * they're always moved. */
+ if (inst->Instruction.Opcode == TGSI_OPCODE_CAL) {
+ inst->Label.Label += 2;
+ }
+ /* The labels of the following opcodes are moved only after
+ * the END opcode. */
+ if (vsctx->end_instruction &&
+ (inst->Instruction.Opcode == TGSI_OPCODE_IF ||
+ inst->Instruction.Opcode == TGSI_OPCODE_ELSE ||
+ inst->Instruction.Opcode == TGSI_OPCODE_BGNLOOP ||
+ inst->Instruction.Opcode == TGSI_OPCODE_ENDLOOP)) {
+ inst->Label.Label += 2;
+ }
+ }
+
+ ctx->emit_instruction(ctx, inst);
+}
+
+void r300_draw_init_vertex_shader(struct draw_context *draw,
+ struct r300_vertex_shader *vs)
+{
+ struct pipe_shader_state new_vs;
+ struct vs_transform_context transform;
+ const uint newLen = tgsi_num_tokens(vs->state.tokens) + 100 /* XXX */;
+ unsigned i;
+
+ new_vs.tokens = tgsi_alloc_tokens(newLen);
+ if (new_vs.tokens == NULL)
+ return;
+
+ memset(&transform, 0, sizeof(transform));
+ for (i = 0; i < Elements(transform.out_remap); i++) {
+ transform.out_remap[i] = i;
+ }
+ transform.last_generic = -1;
+ transform.base.transform_instruction = transform_inst;
+ transform.base.transform_declaration = transform_decl;
+
+ tgsi_transform_shader(vs->state.tokens,
+ (struct tgsi_token*)new_vs.tokens,
+ newLen, &transform.base);
+
+#if 0
+ printf("----------------------------------------------\norig shader:\n");
+ tgsi_dump(vs->state.tokens, 0);
+ printf("----------------------------------------------\nnew shader:\n");
+ tgsi_dump(new_vs.tokens, 0);
+ printf("----------------------------------------------\n");
+#endif
+
+ /* Free old tokens. */
+ FREE((void*)vs->state.tokens);
+
+ vs->draw_vs = draw_create_vertex_shader(draw, &new_vs);
+
+ /* Instead of duplicating and freeing the tokens, copy the pointer directly. */
+ vs->state.tokens = new_vs.tokens;
+
+ /* Init the VS output table for the rasterizer. */
+ r300_init_vs_outputs(vs);
+
+ /* Make the last generic be WPOS. */
+ vs->outputs.wpos = vs->outputs.generic[transform.last_generic + 1];
+ vs->outputs.generic[transform.last_generic + 1] = ATTR_UNUSED;
+}
diff --git a/src/gallium/drivers/r300/r300_winsys.h b/src/gallium/drivers/r300/r300_winsys.h
index 1642981eaa8..4597332399a 100644
--- a/src/gallium/drivers/r300/r300_winsys.h
+++ b/src/gallium/drivers/r300/r300_winsys.h
@@ -1,5 +1,6 @@
/*
* Copyright 2008 Corbin Simpson <[email protected]>
+ * Copyright 2010 Marek Olšák <[email protected]>
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -23,23 +24,34 @@
#ifndef R300_WINSYS_H
#define R300_WINSYS_H
-/* The public interface header for the r300 pipe driver.
- * Any winsys hosting this pipe needs to implement r300_winsys and then
- * call r300_create_screen to start things. */
+/* The public winsys interface header for the r300 pipe driver.
+ * Any winsys hosting this pipe needs to implement r300_winsys_screen and then
+ * call r300_screen_create to start things. */
#include "pipe/p_defines.h"
#include "pipe/p_state.h"
#include "r300_defines.h"
+struct winsys_handle;
+struct r300_winsys_screen;
+
struct r300_winsys_buffer;
+struct r300_winsys_cs {
+ uint32_t *ptr; /* Pointer to the beginning of the CS. */
+ unsigned cdw; /* Number of used dwords. */
+ unsigned ndw; /* Size of the CS in dwords. */
+};
+
enum r300_value_id {
R300_VID_PCI_ID,
R300_VID_GB_PIPES,
R300_VID_Z_PIPES,
R300_VID_SQUARE_TILING_SUPPORT,
R300_VID_DRM_2_3_0,
+ R300_VID_DRM_2_6_0,
+ R300_CAN_HYPERZ,
};
enum r300_reference_domain { /* bitfield */
@@ -48,135 +60,251 @@ enum r300_reference_domain { /* bitfield */
};
struct r300_winsys_screen {
+ /**
+ * Destroy this winsys.
+ *
+ * \param ws The winsys this function is called from.
+ */
void (*destroy)(struct r300_winsys_screen *ws);
-
+
/**
+ * Query a system value from a winsys.
+ *
+ * \param ws The winsys this function is called from.
+ * \param vid One of the R300_VID_* enums.
+ */
+ uint32_t (*get_value)(struct r300_winsys_screen *ws,
+ enum r300_value_id vid);
+
+ /**************************************************************************
* Buffer management. Buffer attributes are mostly fixed over its lifetime.
*
* Remember that gallium gets to choose the interface it needs, and the
* window systems must then implement that interface (rather than the
* other way around...).
+ *************************************************************************/
+
+ /**
+ * Create a buffer object.
*
- * usage is a bitmask of R300_WINSYS_BUFFER_USAGE_PIXEL/VERTEX/INDEX/CONSTANT. This
- * usage argument is only an optimization hint, not a guarantee, therefore
- * proper behavior must be observed in all circumstances.
- *
- * alignment indicates the client's alignment requirements, eg for
- * SSE instructions.
+ * \param ws The winsys this function is called from.
+ * \param size The size to allocate.
+ * \param alignment An alignment of the buffer in memory.
+ * \param bind A bitmask of the PIPE_BIND_* flags.
+ * \param usage A bitmask of the PIPE_USAGE_* flags.
+ * \param domain A bitmask of the R300_DOMAIN_* flags.
+ * \return The created buffer object.
*/
struct r300_winsys_buffer *(*buffer_create)(struct r300_winsys_screen *ws,
- unsigned alignment,
- unsigned usage,
- unsigned size);
-
+ unsigned size,
+ unsigned alignment,
+ unsigned bind,
+ unsigned usage,
+ enum r300_buffer_domain domain);
+
/**
- * Map the entire data store of a buffer object into the client's address.
- * flags is bitmask of R300_WINSYS_BUFFER_USAGE_CPU_READ/WRITE flags.
+ * Reference a buffer object (assign with reference counting).
+ *
+ * \param ws The winsys this function is called from.
+ * \param pdst A destination pointer to set the source buffer to.
+ * \param src A source buffer object.
*/
- void *(*buffer_map)( struct r300_winsys_screen *ws,
- struct r300_winsys_buffer *buf,
- unsigned usage);
+ void (*buffer_reference)(struct r300_winsys_screen *ws,
+ struct r300_winsys_buffer **pdst,
+ struct r300_winsys_buffer *src);
- void (*buffer_unmap)( struct r300_winsys_screen *ws,
- struct r300_winsys_buffer *buf );
-
- void (*buffer_destroy)( struct r300_winsys_buffer *buf );
+ /**
+ * Map the entire data store of a buffer object into the client's address
+ * space.
+ *
+ * \param ws The winsys this function is called from.
+ * \param buf A winsys buffer object to map.
+ * \param cs A command stream to flush if the buffer is referenced by it.
+ * \param usage A bitmask of the PIPE_TRANSFER_* flags.
+ * \return The pointer at the beginning of the buffer.
+ */
+ void *(*buffer_map)(struct r300_winsys_screen *ws,
+ struct r300_winsys_buffer *buf,
+ struct r300_winsys_cs *cs,
+ enum pipe_transfer_usage usage);
+ /**
+ * Unmap a buffer object from the client's address space.
+ *
+ * \param ws The winsys this function is called from.
+ * \param buf A winsys buffer object to unmap.
+ */
+ void (*buffer_unmap)(struct r300_winsys_screen *ws,
+ struct r300_winsys_buffer *buf);
- void (*buffer_reference)(struct r300_winsys_screen *rws,
- struct r300_winsys_buffer **pdst,
- struct r300_winsys_buffer *src);
+ /**
+ * Wait for a buffer object until it is not used by a GPU. This is
+ * equivalent to a fence placed after the last command using the buffer,
+ * and synchronizing to the fence.
+ *
+ * \param ws The winsys this function is called from.
+ * \param buf A winsys buffer object to wait for.
+ */
+ void (*buffer_wait)(struct r300_winsys_screen *ws,
+ struct r300_winsys_buffer *buf);
- boolean (*buffer_references)(struct r300_winsys_buffer *a,
- struct r300_winsys_buffer *b);
+ /**
+ * Return tiling flags describing a memory layout of a buffer object.
+ *
+ * \param ws The winsys this function is called from.
+ * \param buf A winsys buffer object to get the flags from.
+ * \param macrotile A pointer to the return value of the microtile flag.
+ * \param microtile A pointer to the return value of the macrotile flag.
+ *
+ * \note microtile and macrotile are not bitmasks!
+ */
+ void (*buffer_get_tiling)(struct r300_winsys_screen *ws,
+ struct r300_winsys_buffer *buf,
+ enum r300_buffer_tiling *microtile,
+ enum r300_buffer_tiling *macrotile);
- void (*buffer_flush_range)(struct r300_winsys_screen *rws,
- struct r300_winsys_buffer *buf,
- unsigned offset,
- unsigned length);
+ /**
+ * Set tiling flags describing a memory layout of a buffer object.
+ *
+ * \param ws The winsys this function is called from.
+ * \param buf A winsys buffer object to set the flags for.
+ * \param macrotile A macrotile flag.
+ * \param microtile A microtile flag.
+ * \param stride A stride of the buffer in bytes, for texturing.
+ *
+ * \note microtile and macrotile are not bitmasks!
+ */
+ void (*buffer_set_tiling)(struct r300_winsys_screen *ws,
+ struct r300_winsys_buffer *buf,
+ enum r300_buffer_tiling microtile,
+ enum r300_buffer_tiling macrotile,
+ unsigned stride);
- /* Add a pipe_resource to the list of buffer objects to validate. */
- boolean (*add_buffer)(struct r300_winsys_screen *winsys,
- struct r300_winsys_buffer *buf,
- uint32_t rd,
- uint32_t wd);
+ /**
+ * Get a winsys buffer from a winsys handle. The internal structure
+ * of the handle is platform-specific and only a winsys should access it.
+ *
+ * \param ws The winsys this function is called from.
+ * \param whandle A winsys handle pointer as was received from a state
+ * tracker.
+ * \param stride The returned buffer stride in bytes.
+ * \param size The returned buffer size.
+ */
+ struct r300_winsys_buffer *(*buffer_from_handle)(struct r300_winsys_screen *ws,
+ struct winsys_handle *whandle,
+ unsigned *stride,
+ unsigned *size);
+ /**
+ * Get a winsys handle from a winsys buffer. The internal structure
+ * of the handle is platform-specific and only a winsys should access it.
+ *
+ * \param ws The winsys this function is called from.
+ * \param buf A winsys buffer object to get the handle from.
+ * \param whandle A winsys handle pointer.
+ * \param stride A stride of the buffer in bytes, for texturing.
+ * \return TRUE on success.
+ */
+ boolean (*buffer_get_handle)(struct r300_winsys_screen *ws,
+ struct r300_winsys_buffer *buf,
+ unsigned stride,
+ struct winsys_handle *whandle);
- /* Revalidate all currently setup pipe_buffers.
- * Returns TRUE if a flush is required. */
- boolean (*validate)(struct r300_winsys_screen* winsys);
+ /**************************************************************************
+ * Command submission.
+ *
+ * Each pipe context should create its own command stream and submit
+ * commands independently of other contexts.
+ *************************************************************************/
- /* Check to see if there's room for commands. */
- boolean (*check_cs)(struct r300_winsys_screen* winsys, int size);
+ /**
+ * Create a command stream.
+ *
+ * \param ws The winsys this function is called from.
+ */
+ struct r300_winsys_cs *(*cs_create)(struct r300_winsys_screen *ws);
- /* Start a command emit. */
- void (*begin_cs)(struct r300_winsys_screen* winsys,
- int size,
- const char* file,
- const char* function,
- int line);
+ /**
+ * Destroy a command stream.
+ *
+ * \param cs A command stream to destroy.
+ */
+ void (*cs_destroy)(struct r300_winsys_cs *cs);
- /* Write a dword to the command buffer. */
- void (*write_cs_dword)(struct r300_winsys_screen* winsys, uint32_t dword);
+ /**
+ * Add a buffer object to the list of buffers to validate.
+ *
+ * \param cs A command stream to add buffer for validation against.
+ * \param buf A winsys buffer to validate.
+ * \param rd A read domain containing a bitmask
+ * of the R300_DOMAIN_* flags.
+ * \param wd A write domain containing a bitmask
+ * of the R300_DOMAIN_* flags.
+ */
+ void (*cs_add_buffer)(struct r300_winsys_cs *cs,
+ struct r300_winsys_buffer *buf,
+ enum r300_buffer_domain rd,
+ enum r300_buffer_domain wd);
- /* Write a table of dwords to the command buffer. */
- void (*write_cs_table)(struct r300_winsys_screen* winsys,
- const void *dwords, unsigned count);
+ /**
+ * Revalidate all currently set up winsys buffers.
+ * Returns TRUE if a flush is required.
+ *
+ * \param cs A command stream to validate.
+ */
+ boolean (*cs_validate)(struct r300_winsys_cs *cs);
- /* Write a relocated dword to the command buffer. */
- void (*write_cs_reloc)(struct r300_winsys_screen *winsys,
+ /**
+ * Write a relocated dword to a command buffer.
+ *
+ * \param cs A command stream the relocation is written to.
+ * \param buf A winsys buffer to write the relocation for.
+ * \param rd A read domain containing a bitmask of the R300_DOMAIN_* flags.
+ * \param wd A write domain containing a bitmask of the R300_DOMAIN_* flags.
+ */
+ void (*cs_write_reloc)(struct r300_winsys_cs *cs,
struct r300_winsys_buffer *buf,
- uint32_t rd,
- uint32_t wd,
- uint32_t flags);
-
- /* Finish a command emit. */
- void (*end_cs)(struct r300_winsys_screen* winsys,
- const char* file,
- const char* function,
- int line);
-
- /* Flush the CS. */
- void (*flush_cs)(struct r300_winsys_screen* winsys);
-
- /* winsys flush - callback from winsys when flush required */
- void (*set_flush_cb)(struct r300_winsys_screen *winsys,
- void (*flush_cb)(void *), void *data);
-
- void (*reset_bos)(struct r300_winsys_screen *winsys);
-
- void (*buffer_get_tiling)(struct r300_winsys_screen *winsys,
- struct r300_winsys_buffer *buffer,
- enum r300_buffer_tiling *microtiled,
- enum r300_buffer_tiling *macrotiled);
-
- void (*buffer_set_tiling)(struct r300_winsys_screen *winsys,
- struct r300_winsys_buffer *buffer,
- uint32_t pitch,
- enum r300_buffer_tiling microtiled,
- enum r300_buffer_tiling macrotiled);
-
- uint32_t (*get_value)(struct r300_winsys_screen *winsys,
- enum r300_value_id vid);
-
- struct r300_winsys_buffer *(*buffer_from_handle)(struct r300_winsys_screen *winsys,
- struct pipe_screen *screen,
- struct winsys_handle *whandle,
- unsigned *stride);
- boolean (*buffer_get_handle)(struct r300_winsys_screen *winsys,
- struct r300_winsys_buffer *buffer,
- unsigned stride,
- struct winsys_handle *whandle);
-
- boolean (*is_buffer_referenced)(struct r300_winsys_screen *winsys,
- struct r300_winsys_buffer *buffer,
- enum r300_reference_domain domain);
-};
+ enum r300_buffer_domain rd,
+ enum r300_buffer_domain wd);
+
+ /**
+ * Flush a command stream.
+ *
+ * \param cs A command stream to flush.
+ */
+ void (*cs_flush)(struct r300_winsys_cs *cs);
+
+ /**
+ * Set a flush callback which is called from winsys when flush is
+ * required.
+ *
+ * \param cs A command stream to set the callback for.
+ * \param flush A flush callback function associated with the command stream.
+ * \param user A user pointer that will be passed to the flush callback.
+ */
+ void (*cs_set_flush)(struct r300_winsys_cs *cs,
+ void (*flush)(void *),
+ void *user);
-struct r300_winsys_screen *
-r300_winsys_screen(struct pipe_screen *screen);
+ /**
+ * Reset the list of buffer objects to validate, usually called
+ * prior to adding buffer objects for validation.
+ *
+ * \param cs A command stream to reset buffers for.
+ */
+ void (*cs_reset_buffers)(struct r300_winsys_cs *cs);
-/* Creates a new r300 screen. */
-struct pipe_screen* r300_create_screen(struct r300_winsys_screen *rws);
+ /**
+ * Return TRUE if a buffer is referenced by a command stream or by hardware
+ * (i.e. is busy), based on the domain parameter.
+ *
+ * \param cs A command stream.
+ * \param buf A winsys buffer.
+ * \param domain A bitmask of the R300_REF_* enums.
+ */
+ boolean (*cs_is_buffer_referenced)(struct r300_winsys_cs *cs,
+ struct r300_winsys_buffer *buf,
+ enum r300_reference_domain domain);
+};
#endif /* R300_WINSYS_H */