summaryrefslogtreecommitdiffstats
path: root/src/gallium
diff options
context:
space:
mode:
Diffstat (limited to 'src/gallium')
-rw-r--r--src/gallium/drivers/nvfx/nvfx_context.c4
-rw-r--r--src/gallium/drivers/nvfx/nvfx_context.h6
-rw-r--r--src/gallium/drivers/nvfx/nvfx_fragtex.c4
-rw-r--r--src/gallium/drivers/nvfx/nvfx_miptree.c51
-rw-r--r--src/gallium/drivers/nvfx/nvfx_resource.h36
-rw-r--r--src/gallium/drivers/nvfx/nvfx_state_emit.c81
-rw-r--r--src/gallium/drivers/nvfx/nvfx_state_fb.c251
-rw-r--r--src/gallium/drivers/nvfx/nvfx_surface.c121
8 files changed, 408 insertions, 146 deletions
diff --git a/src/gallium/drivers/nvfx/nvfx_context.c b/src/gallium/drivers/nvfx/nvfx_context.c
index 3d45f5f0baf..7ab81de7dd5 100644
--- a/src/gallium/drivers/nvfx/nvfx_context.c
+++ b/src/gallium/drivers/nvfx/nvfx_context.c
@@ -15,6 +15,7 @@ nvfx_flush(struct pipe_context *pipe, unsigned flags,
struct nouveau_channel *chan = screen->base.channel;
struct nouveau_grobj *eng3d = screen->eng3d;
+ /* XXX: we need to actually be intelligent here */
if (flags & PIPE_FLUSH_TEXTURE_CACHE) {
BEGIN_RING(chan, eng3d, 0x1fd8, 1);
OUT_RING (chan, 2);
@@ -87,5 +88,8 @@ nvfx_create(struct pipe_screen *pscreen, void *priv)
/* set these to that we init them on first validation */
nvfx->state.scissor_enabled = ~0;
nvfx->state.stipple_enabled = ~0;
+
+ LIST_INITHEAD(&nvfx->render_cache);
+
return &nvfx->pipe;
}
diff --git a/src/gallium/drivers/nvfx/nvfx_context.h b/src/gallium/drivers/nvfx/nvfx_context.h
index 278be94d525..a6ea9139675 100644
--- a/src/gallium/drivers/nvfx/nvfx_context.h
+++ b/src/gallium/drivers/nvfx/nvfx_context.h
@@ -11,6 +11,7 @@
#include "util/u_memory.h"
#include "util/u_math.h"
#include "util/u_inlines.h"
+#include "util/u_double_list.h"
#include "draw/draw_vertex.h"
#include "util/u_blitter.h"
@@ -67,6 +68,7 @@ struct nvfx_state {
unsigned scissor_enabled;
unsigned stipple_enabled;
unsigned fp_samplers;
+ unsigned render_temps;
};
struct nvfx_vtxelt_state {
@@ -90,6 +92,7 @@ struct nvfx_context {
struct draw_context *draw;
struct blitter_context* blitter;
+ struct list_head render_cache;
/* HW state derived from pipe states */
struct nvfx_state state;
@@ -185,7 +188,8 @@ extern void nvfx_draw_elements_swtnl(struct pipe_context *pipe,
extern void nvfx_vtxfmt_validate(struct nvfx_context *nvfx);
/* nvfx_fb.c */
-extern void nvfx_state_framebuffer_validate(struct nvfx_context *nvfx);
+extern int nvfx_framebuffer_prepare(struct nvfx_context *nvfx);
+extern void nvfx_framebuffer_validate(struct nvfx_context *nvfx, unsigned prepare_result);
void
nvfx_framebuffer_relocate(struct nvfx_context *nvfx);
diff --git a/src/gallium/drivers/nvfx/nvfx_fragtex.c b/src/gallium/drivers/nvfx/nvfx_fragtex.c
index 0b4a434fecc..66057454337 100644
--- a/src/gallium/drivers/nvfx/nvfx_fragtex.c
+++ b/src/gallium/drivers/nvfx/nvfx_fragtex.c
@@ -16,6 +16,10 @@ nvfx_fragtex_validate(struct nvfx_context *nvfx)
samplers &= ~(1 << unit);
if(nvfx->fragment_sampler_views[unit] && nvfx->tex_sampler[unit]) {
+ util_dirty_surfaces_use_for_sampling(&nvfx->pipe,
+ &((struct nvfx_miptree*)nvfx->fragment_sampler_views[unit]->texture)->dirty_surfaces,
+ nvfx_surface_flush);
+
if(!nvfx->is_nv4x)
nv30_fragtex_set(nvfx, unit);
else
diff --git a/src/gallium/drivers/nvfx/nvfx_miptree.c b/src/gallium/drivers/nvfx/nvfx_miptree.c
index 7deb9d7b9a3..530d705e136 100644
--- a/src/gallium/drivers/nvfx/nvfx_miptree.c
+++ b/src/gallium/drivers/nvfx/nvfx_miptree.c
@@ -11,6 +11,7 @@
#include "nvfx_screen.h"
#include "nvfx_resource.h"
#include "nvfx_transfer.h"
+#include "nv04_2d.h"
static void
nvfx_miptree_choose_format(struct nvfx_miptree *mt)
@@ -115,16 +116,23 @@ nvfx_miptree_get_handle(struct pipe_screen *pscreen,
static void
+nvfx_miptree_surface_final_destroy(struct pipe_surface* ps)
+{
+ struct nvfx_surface* ns = (struct nvfx_surface*)ps;
+ pipe_resource_reference(&ps->texture, 0);
+ pipe_resource_reference((struct pipe_resource**)&ns->temp, 0);
+ FREE(ps);
+}
+
+static void
nvfx_miptree_destroy(struct pipe_screen *screen, struct pipe_resource *pt)
{
struct nvfx_miptree *mt = (struct nvfx_miptree *)pt;
+ util_surfaces_destroy(&mt->surfaces, pt, nvfx_miptree_surface_final_destroy);
nouveau_screen_bo_release(screen, mt->base.bo);
FREE(mt);
}
-
-
-
struct u_resource_vtbl nvfx_miptree_vtbl =
{
nvfx_miptree_get_handle, /* get_handle */
@@ -152,6 +160,8 @@ nvfx_miptree_create_skeleton(struct pipe_screen *pscreen, const struct pipe_reso
mt->base.base = *pt;
mt->base.vtbl = &nvfx_miptree_vtbl;
+ util_dirty_surfaces_init(&mt->dirty_surfaces);
+
pipe_reference_init(&mt->base.base.reference, 1);
mt->base.base.screen = pscreen;
@@ -218,29 +228,28 @@ nvfx_miptree_surface_new(struct pipe_screen *pscreen, struct pipe_resource *pt,
unsigned face, unsigned level, unsigned zslice,
unsigned flags)
{
+ struct nvfx_miptree* mt = (struct nvfx_miptree*)pt;
struct nvfx_surface *ns;
- ns = CALLOC_STRUCT(nvfx_surface);
- if (!ns)
- return NULL;
- pipe_resource_reference(&ns->base.texture, pt);
- ns->base.format = pt->format;
- ns->base.width = u_minify(pt->width0, level);
- ns->base.height = u_minify(pt->height0, level);
- ns->base.usage = flags;
- pipe_reference_init(&ns->base.reference, 1);
- ns->base.face = face;
- ns->base.level = level;
- ns->base.zslice = zslice;
- ns->pitch = nvfx_subresource_pitch(pt, level);
- ns->base.offset = nvfx_subresource_offset(pt, face, level, zslice);
-
- return &ns->base;
+ ns = (struct nvfx_surface*)util_surfaces_get(&mt->surfaces, sizeof(struct nvfx_surface), pscreen, pt, face, level, zslice, flags);
+ if(ns->base.base.offset == ~0) {
+ util_dirty_surface_init(&ns->base);
+ ns->pitch = nvfx_subresource_pitch(pt, level);
+ ns->base.base.offset = nvfx_subresource_offset(pt, face, level, zslice);
+ }
+
+ return &ns->base.base;
}
void
nvfx_miptree_surface_del(struct pipe_surface *ps)
{
- pipe_resource_reference(&ps->texture, NULL);
- FREE(ps);
+ struct nvfx_surface* ns = (struct nvfx_surface*)ps;
+
+ if(!ns->temp)
+ {
+ util_surfaces_detach(&((struct nvfx_miptree*)ps->texture)->surfaces, ps);
+ pipe_resource_reference(&ps->texture, 0);
+ FREE(ps);
+ }
}
diff --git a/src/gallium/drivers/nvfx/nvfx_resource.h b/src/gallium/drivers/nvfx/nvfx_resource.h
index 42d04ebb370..be1845dd9c1 100644
--- a/src/gallium/drivers/nvfx/nvfx_resource.h
+++ b/src/gallium/drivers/nvfx/nvfx_resource.h
@@ -1,13 +1,16 @@
-
#ifndef NVFX_RESOURCE_H
#define NVFX_RESOURCE_H
#include "util/u_transfer.h"
#include "util/u_format.h"
#include "util/u_math.h"
+#include "util/u_double_list.h"
+#include "util/u_surfaces.h"
+#include "util/u_dirty_surfaces.h"
#include <nouveau/nouveau_bo.h>
struct pipe_resource;
+struct nv04_region;
/* This gets further specialized into either buffer or texture
@@ -38,17 +41,34 @@ nvfx_resource_on_gpu(struct pipe_resource* pr)
#define NVFX_MAX_TEXTURE_LEVELS 16
+/* We have the following invariants for render temporaries
+ *
+ * 1. Render temporaries are always linear
+ * 2. Render temporaries are always up to date
+ * 3. Currently, render temporaries are destroyed when the resource is used for sampling, but kept for any other use
+ *
+ * Also, we do NOT flush temporaries on any pipe->flush().
+ * This is fine, as long as scanout targets and shared resources never need temps.
+ *
+ * TODO: we may want to also support swizzled temporaries to improve performance in some cases.
+ */
+
struct nvfx_miptree {
struct nvfx_resource base;
unsigned linear_pitch; /* for linear textures, 0 for swizzled and compressed textures with level-dependent minimal pitch */
unsigned face_size; /* 128-byte aligned face/total size */
unsigned level_offset[NVFX_MAX_TEXTURE_LEVELS];
+
+ struct util_surfaces surfaces;
+ struct util_dirty_surfaces dirty_surfaces;
};
struct nvfx_surface {
- struct pipe_surface base;
+ struct util_dirty_surface base;
unsigned pitch;
+
+ struct nvfx_miptree* temp;
};
static INLINE
@@ -65,6 +85,12 @@ nvfx_surface_buffer(struct pipe_surface *surf)
return mt->bo;
}
+static INLINE struct util_dirty_surfaces*
+nvfx_surface_get_dirty_surfaces(struct pipe_surface* surf)
+{
+ struct nvfx_miptree *mt = (struct nvfx_miptree *)surf->texture;
+ return &mt->dirty_surfaces;
+}
void
nvfx_init_resource_functions(struct pipe_context *pipe);
@@ -141,4 +167,10 @@ nvfx_subresource_pitch(struct pipe_resource* pt, unsigned level)
}
}
+void
+nvfx_surface_create_temp(struct pipe_context* pipe, struct pipe_surface* surf);
+
+void
+nvfx_surface_flush(struct pipe_context* pipe, struct pipe_surface* surf);
+
#endif
diff --git a/src/gallium/drivers/nvfx/nvfx_state_emit.c b/src/gallium/drivers/nvfx/nvfx_state_emit.c
index f91ae19ecd3..dc70f3de870 100644
--- a/src/gallium/drivers/nvfx/nvfx_state_emit.c
+++ b/src/gallium/drivers/nvfx/nvfx_state_emit.c
@@ -1,15 +1,48 @@
#include "nvfx_context.h"
#include "nvfx_state.h"
+#include "nvfx_resource.h"
#include "draw/draw_context.h"
static boolean
nvfx_state_validate_common(struct nvfx_context *nvfx)
{
struct nouveau_channel* chan = nvfx->screen->base.channel;
- unsigned dirty = nvfx->dirty;
+ unsigned dirty;
+ int all_swizzled = -1;
+ boolean flush_tex_cache = FALSE;
if(nvfx != nvfx->screen->cur_ctx)
- dirty = ~0;
+ {
+ nvfx->dirty = ~0;
+ nvfx->screen->cur_ctx = nvfx;
+ }
+
+ /* These can trigger use the of 3D engine to copy temporaries.
+ * That will recurse here and thus dirty all 3D state, so we need to this before anything else, and in a loop..
+ * This converges to having clean temps, then binding both fragtexes and framebuffers.
+ */
+ while(nvfx->dirty & (NVFX_NEW_FB | NVFX_NEW_SAMPLER))
+ {
+ if(nvfx->dirty & NVFX_NEW_SAMPLER)
+ {
+ nvfx->dirty &=~ NVFX_NEW_SAMPLER;
+ nvfx_fragtex_validate(nvfx);
+
+ // TODO: only set this if really necessary
+ flush_tex_cache = TRUE;
+ }
+
+ if(nvfx->dirty & NVFX_NEW_FB)
+ {
+ nvfx->dirty &=~ NVFX_NEW_FB;
+ all_swizzled = nvfx_framebuffer_prepare(nvfx);
+
+ // TODO: make sure this doesn't happen, i.e. fbs have matching formats
+ assert(all_swizzled >= 0);
+ }
+ }
+
+ dirty = nvfx->dirty;
if(nvfx->render_mode == HW)
{
@@ -35,9 +68,6 @@ nvfx_state_validate_common(struct nvfx_context *nvfx)
nvfx_vtxfmt_validate(nvfx);
}
- if(dirty & NVFX_NEW_FB)
- nvfx_state_framebuffer_validate(nvfx);
-
if(dirty & NVFX_NEW_RAST)
sb_emit(chan, nvfx->rasterizer->sb, nvfx->rasterizer->sb_len);
@@ -48,10 +78,14 @@ nvfx_state_validate_common(struct nvfx_context *nvfx)
nvfx_state_stipple_validate(nvfx);
if(dirty & (NVFX_NEW_FRAGPROG | NVFX_NEW_FRAGCONST))
+ {
nvfx_fragprog_validate(nvfx);
+ if(dirty & NVFX_NEW_FRAGPROG)
+ flush_tex_cache = TRUE; // TODO: do we need this?
+ }
- if(dirty & NVFX_NEW_SAMPLER)
- nvfx_fragtex_validate(nvfx);
+ if(all_swizzled >= 0)
+ nvfx_framebuffer_validate(nvfx, all_swizzled);
if(dirty & NVFX_NEW_BLEND)
sb_emit(chan, nvfx->blend->sb, nvfx->blend->sb_len);
@@ -72,13 +106,17 @@ nvfx_state_validate_common(struct nvfx_context *nvfx)
if(dirty & (NVFX_NEW_VIEWPORT | NVFX_NEW_FB))
nvfx_state_viewport_validate(nvfx);
- /* TODO: could nv30 need this or something similar too? */
- if((dirty & (NVFX_NEW_FRAGPROG | NVFX_NEW_SAMPLER)) && nvfx->is_nv4x) {
- WAIT_RING(chan, 4);
- OUT_RING(chan, RING_3D(NV40TCL_TEX_CACHE_CTL, 1));
- OUT_RING(chan, 2);
- OUT_RING(chan, RING_3D(NV40TCL_TEX_CACHE_CTL, 1));
- OUT_RING(chan, 1);
+ if(flush_tex_cache)
+ {
+ // TODO: what about nv30?
+ if(nvfx->is_nv4x)
+ {
+ WAIT_RING(chan, 4);
+ OUT_RING(chan, RING_3D(NV40TCL_TEX_CACHE_CTL, 1));
+ OUT_RING(chan, 2);
+ OUT_RING(chan, RING_3D(NV40TCL_TEX_CACHE_CTL, 1));
+ OUT_RING(chan, 1);
+ }
}
nvfx->dirty = 0;
return TRUE;
@@ -99,6 +137,21 @@ nvfx_state_emit(struct nvfx_context *nvfx)
;
MARK_RING(chan, max_relocs * 2, max_relocs * 2);
nvfx_state_relocate(nvfx);
+
+ unsigned render_temps = nvfx->state.render_temps;
+ if(render_temps)
+ {
+ for(int i = 0; i < nvfx->framebuffer.nr_cbufs; ++i)
+ {
+ if(render_temps & (1 << i))
+ util_dirty_surface_set_dirty(nvfx_surface_get_dirty_surfaces(nvfx->framebuffer.cbufs[i]),
+ (struct util_dirty_surface*)nvfx->framebuffer.cbufs[i]);
+ }
+
+ if(render_temps & 0x80)
+ util_dirty_surface_set_dirty(nvfx_surface_get_dirty_surfaces(nvfx->framebuffer.zsbuf),
+ (struct util_dirty_surface*)nvfx->framebuffer.zsbuf);
+ }
}
void
diff --git a/src/gallium/drivers/nvfx/nvfx_state_fb.c b/src/gallium/drivers/nvfx/nvfx_state_fb.c
index e111d11627f..80b0f21575f 100644
--- a/src/gallium/drivers/nvfx/nvfx_state_fb.c
+++ b/src/gallium/drivers/nvfx/nvfx_state_fb.c
@@ -1,19 +1,56 @@
#include "nvfx_context.h"
#include "nvfx_resource.h"
#include "nouveau/nouveau_util.h"
+#include "util/u_format.h"
-void
-nvfx_state_framebuffer_validate(struct nvfx_context *nvfx)
+static inline boolean
+nvfx_surface_linear_renderable(struct pipe_surface* surf)
+{
+ return (surf->texture->flags & NVFX_RESOURCE_FLAG_LINEAR)
+ && !(surf->offset & 63)
+ && !(((struct nvfx_surface*)surf)->pitch & 63);
+}
+
+static inline boolean
+nvfx_surface_swizzled_renderable(struct pipe_framebuffer_state* fb, struct pipe_surface* surf)
+{
+ /* TODO: return FALSE if we have a format not supporting swizzled rendering (e.g. r8); currently those are not supported at all */
+ return !((struct nvfx_miptree*)surf->texture)->linear_pitch
+ && (surf->texture->target != PIPE_TEXTURE_3D || u_minify(surf->texture->depth0, surf->level) <= 1)
+ && !(surf->offset & 127)
+ && (surf->width == fb->width)
+ && (surf->height == fb->height)
+ && !((struct nvfx_surface*)surf)->temp;
+}
+
+static boolean
+nvfx_surface_get_render_target(struct pipe_surface* surf, int all_swizzled, struct nvfx_render_target* target)
+{
+ struct nvfx_surface* ns = (struct nvfx_surface*)surf;
+ if(!ns->temp)
+ {
+ target->bo = ((struct nvfx_miptree*)surf->texture)->base.bo;
+ target->offset = surf->offset;
+ target->pitch = align(ns->pitch, 64);
+ assert(target->pitch);
+ return FALSE;
+ }
+ else
+ {
+ target->offset = 0;
+ target->pitch = ns->temp->linear_pitch;
+ target->bo = ns->temp->base.bo;
+ assert(target->pitch);
+ return TRUE;
+ }
+}
+
+int
+nvfx_framebuffer_prepare(struct nvfx_context *nvfx)
{
struct pipe_framebuffer_state *fb = &nvfx->framebuffer;
- struct nouveau_channel *chan = nvfx->screen->base.channel;
- uint32_t rt_enable = 0, rt_format = 0;
- int i, colour_format = 0, zeta_format = 0;
- int depth_only = 0;
- unsigned rt_flags = NOUVEAU_BO_RDWR | NOUVEAU_BO_VRAM;
- unsigned w = fb->width;
- unsigned h = fb->height;
- int colour_bits = 32, zeta_bits = 32;
+ int i, color_format = 0, zeta_format = 0;
+ int all_swizzled = 1;
if(!nvfx->is_nv4x)
assert(fb->nr_cbufs <= 2);
@@ -21,113 +58,135 @@ nvfx_state_framebuffer_validate(struct nvfx_context *nvfx)
assert(fb->nr_cbufs <= 4);
for (i = 0; i < fb->nr_cbufs; i++) {
- if (colour_format)
- assert(colour_format == fb->cbufs[i]->format);
- else
- colour_format = fb->cbufs[i]->format;
-
- rt_enable |= (NV34TCL_RT_ENABLE_COLOR0 << i);
- nvfx->hw_rt[i].bo = ((struct nvfx_miptree*)fb->cbufs[i]->texture)->base.bo;
- nvfx->hw_rt[i].offset = fb->cbufs[i]->offset;
- nvfx->hw_rt[i].pitch = ((struct nvfx_surface *)fb->cbufs[i])->pitch;
+ if (color_format) {
+ if(color_format != fb->cbufs[i]->format)
+ return -1;
+ } else
+ color_format = fb->cbufs[i]->format;
+
+ if(!nvfx_surface_swizzled_renderable(fb, fb->cbufs[i]))
+ all_swizzled = 0;
}
- for(; i < 4; ++i)
- nvfx->hw_rt[i].bo = 0;
+ if (fb->zsbuf) {
+ /* TODO: return FALSE if we have a format not supporting a depth buffer (e.g. r8); currently those are not supported at all */
+ if(!nvfx_surface_swizzled_renderable(fb, fb->zsbuf))
+ all_swizzled = 0;
+
+ if(all_swizzled && util_format_get_blocksize(color_format) != util_format_get_blocksize(zeta_format))
+ all_swizzled = 0;
+ }
+
+ for (i = 0; i < fb->nr_cbufs; i++) {
+ if(!((struct nvfx_surface*)fb->cbufs[i])->temp && !all_swizzled && !nvfx_surface_linear_renderable(fb->cbufs[i]))
+ nvfx_surface_create_temp(&nvfx->pipe, fb->cbufs[i]);
+ }
+
+ if(fb->zsbuf) {
+ if(!((struct nvfx_surface*)fb->zsbuf)->temp && !all_swizzled && !nvfx_surface_linear_renderable(fb->zsbuf))
+ nvfx_surface_create_temp(&nvfx->pipe, fb->zsbuf);
+ }
+
+ return all_swizzled;
+}
+
+void
+nvfx_framebuffer_validate(struct nvfx_context *nvfx, unsigned prepare_result)
+{
+ struct pipe_framebuffer_state *fb = &nvfx->framebuffer;
+ struct nouveau_channel *chan = nvfx->screen->base.channel;
+ uint32_t rt_enable, rt_format;
+ int i;
+ unsigned rt_flags = NOUVEAU_BO_RDWR | NOUVEAU_BO_VRAM;
+ unsigned w = fb->width;
+ unsigned h = fb->height;
+
+ rt_enable = (NV34TCL_RT_ENABLE_COLOR0 << fb->nr_cbufs) - 1;
if (rt_enable & (NV34TCL_RT_ENABLE_COLOR1 |
NV40TCL_RT_ENABLE_COLOR2 | NV40TCL_RT_ENABLE_COLOR3))
rt_enable |= NV34TCL_RT_ENABLE_MRT;
+ nvfx->state.render_temps = 0;
+
+ for (i = 0; i < fb->nr_cbufs; i++)
+ nvfx->state.render_temps |= nvfx_surface_get_render_target(fb->cbufs[i], prepare_result, &nvfx->hw_rt[i]) << i;
+
+ for(; i < 4; ++i)
+ nvfx->hw_rt[i].bo = 0;
+
if (fb->zsbuf) {
- zeta_format = fb->zsbuf->format;
- nvfx->hw_zeta.bo = ((struct nvfx_miptree*)fb->zsbuf->texture)->base.bo;
- nvfx->hw_zeta.offset = fb->zsbuf->offset;
- nvfx->hw_zeta.pitch = ((struct nvfx_surface *)fb->zsbuf)->pitch;
- }
- else
- nvfx->hw_zeta.bo = 0;
-
- if (rt_enable & (NV34TCL_RT_ENABLE_COLOR0 | NV34TCL_RT_ENABLE_COLOR1 |
- NV40TCL_RT_ENABLE_COLOR2 | NV40TCL_RT_ENABLE_COLOR3)) {
- /* Render to at least a colour buffer */
- if (!(fb->cbufs[0]->texture->flags & NVFX_RESOURCE_FLAG_LINEAR)) {
- assert(!(fb->width & (fb->width - 1)) && !(fb->height & (fb->height - 1)));
- for (i = 1; i < fb->nr_cbufs; i++)
- assert(!(fb->cbufs[i]->texture->flags & NVFX_RESOURCE_FLAG_LINEAR));
-
- rt_format = NV34TCL_RT_FORMAT_TYPE_SWIZZLED |
- (log2i(fb->cbufs[0]->width) << NV34TCL_RT_FORMAT_LOG2_WIDTH_SHIFT) |
- (log2i(fb->cbufs[0]->height) << NV34TCL_RT_FORMAT_LOG2_HEIGHT_SHIFT);
- }
- else
- rt_format = NV34TCL_RT_FORMAT_TYPE_LINEAR;
- } else if (fb->zsbuf) {
- depth_only = 1;
-
- /* Render to depth buffer only */
- if (!(fb->zsbuf->texture->flags & NVFX_RESOURCE_FLAG_LINEAR)) {
- assert(!(fb->width & (fb->width - 1)) && !(fb->height & (fb->height - 1)));
-
- rt_format = NV34TCL_RT_FORMAT_TYPE_SWIZZLED |
- (log2i(fb->zsbuf->width) << NV34TCL_RT_FORMAT_LOG2_WIDTH_SHIFT) |
- (log2i(fb->zsbuf->height) << NV34TCL_RT_FORMAT_LOG2_HEIGHT_SHIFT);
- }
- else
- rt_format = NV34TCL_RT_FORMAT_TYPE_LINEAR;
- } else {
- return;
+ nvfx->state.render_temps |= nvfx_surface_get_render_target(fb->zsbuf, prepare_result, &nvfx->hw_zeta) << 7;
+
+ assert(util_format_get_stride(fb->zsbuf->format, fb->width) <= nvfx->hw_zeta.pitch);
+ assert(nvfx->hw_zeta.offset + nvfx->hw_zeta.pitch * fb->height <= nvfx->hw_zeta.bo->size);
}
- switch (colour_format) {
- case PIPE_FORMAT_B8G8R8X8_UNORM:
- rt_format |= NV34TCL_RT_FORMAT_COLOR_X8R8G8B8;
- break;
- case PIPE_FORMAT_B8G8R8A8_UNORM:
- case 0:
- rt_format |= NV34TCL_RT_FORMAT_COLOR_A8R8G8B8;
- break;
- case PIPE_FORMAT_B5G6R5_UNORM:
+ if (prepare_result) {
+ assert(!(fb->width & (fb->width - 1)) && !(fb->height & (fb->height - 1)));
+
+ rt_format = NV34TCL_RT_FORMAT_TYPE_SWIZZLED |
+ (log2i(fb->width) << NV34TCL_RT_FORMAT_LOG2_WIDTH_SHIFT) |
+ (log2i(fb->height) << NV34TCL_RT_FORMAT_LOG2_HEIGHT_SHIFT);
+ } else
+ rt_format = NV34TCL_RT_FORMAT_TYPE_LINEAR;
+
+ if(fb->nr_cbufs > 0) {
+ switch (fb->cbufs[0]->format) {
+ case PIPE_FORMAT_B8G8R8X8_UNORM:
+ rt_format |= NV34TCL_RT_FORMAT_COLOR_X8R8G8B8;
+ break;
+ case PIPE_FORMAT_B8G8R8A8_UNORM:
+ case 0:
+ rt_format |= NV34TCL_RT_FORMAT_COLOR_A8R8G8B8;
+ break;
+ case PIPE_FORMAT_B5G6R5_UNORM:
+ rt_format |= NV34TCL_RT_FORMAT_COLOR_R5G6B5;
+ break;
+ default:
+ assert(0);
+ }
+ } else if(fb->zsbuf && util_format_get_blocksize(fb->zsbuf->format) == 2)
rt_format |= NV34TCL_RT_FORMAT_COLOR_R5G6B5;
- colour_bits = 16;
- break;
- default:
- assert(0);
- }
+ else
+ rt_format |= NV34TCL_RT_FORMAT_COLOR_A8R8G8B8;
- switch (zeta_format) {
- case PIPE_FORMAT_Z16_UNORM:
+ if(fb->zsbuf) {
+ switch (fb->zsbuf->format) {
+ case PIPE_FORMAT_Z16_UNORM:
+ rt_format |= NV34TCL_RT_FORMAT_ZETA_Z16;
+ break;
+ case PIPE_FORMAT_S8_USCALED_Z24_UNORM:
+ case PIPE_FORMAT_X8Z24_UNORM:
+ case 0:
+ rt_format |= NV34TCL_RT_FORMAT_ZETA_Z24S8;
+ break;
+ default:
+ assert(0);
+ }
+ } else if(fb->nr_cbufs && util_format_get_blocksize(fb->cbufs[0]->format) == 2)
rt_format |= NV34TCL_RT_FORMAT_ZETA_Z16;
- zeta_bits = 16;
- break;
- case PIPE_FORMAT_S8_USCALED_Z24_UNORM:
- case PIPE_FORMAT_X8Z24_UNORM:
- case 0:
+ else
rt_format |= NV34TCL_RT_FORMAT_ZETA_Z24S8;
- break;
- default:
- assert(0);
- }
- if ((!nvfx->is_nv4x) && colour_bits > zeta_bits) {
- /* TODO: does this limitation really exist?
- TODO: can it be worked around somehow? */
- assert(0);
- }
+ if ((rt_enable & NV34TCL_RT_ENABLE_COLOR0) || fb->zsbuf) {
+ struct nvfx_render_target *rt0 = &nvfx->hw_rt[0];
+ uint32_t pitch;
+
+ if(!(rt_enable & NV34TCL_RT_ENABLE_COLOR0))
+ rt0 = &nvfx->hw_zeta;
- if ((rt_enable & NV34TCL_RT_ENABLE_COLOR0)
- || ((!nvfx->is_nv4x) && depth_only)) {
- struct nvfx_render_target *rt0 = (depth_only ? &nvfx->hw_zeta : &nvfx->hw_rt[0]);
- uint32_t pitch = rt0->pitch;
+ pitch = rt0->pitch;
if(!nvfx->is_nv4x)
{
- if (nvfx->hw_zeta.bo) {
+ if (nvfx->hw_zeta.bo)
pitch |= (nvfx->hw_zeta.pitch << 16);
- } else {
+ else
pitch |= (pitch << 16);
- }
}
+ //printf("rendering to bo %p [%i] at offset %i with pitch %i\n", rt0->bo, rt0->bo->handle, rt0->offset, pitch);
+
OUT_RING(chan, RING_3D(NV34TCL_DMA_COLOR0, 1));
OUT_RELOC(chan, rt0->bo, 0,
rt_flags | NOUVEAU_BO_OR,
@@ -180,7 +239,7 @@ nvfx_state_framebuffer_validate(struct nvfx_context *nvfx)
}
}
- if (zeta_format) {
+ if (fb->zsbuf) {
OUT_RING(chan, RING_3D(NV34TCL_DMA_ZETA, 1));
OUT_RELOC(chan, nvfx->hw_zeta.bo, 0,
rt_flags | NOUVEAU_BO_OR,
diff --git a/src/gallium/drivers/nvfx/nvfx_surface.c b/src/gallium/drivers/nvfx/nvfx_surface.c
index a97f342c646..8208c67f2a2 100644
--- a/src/gallium/drivers/nvfx/nvfx_surface.c
+++ b/src/gallium/drivers/nvfx/nvfx_surface.c
@@ -94,23 +94,44 @@ nvfx_region_fixup_swizzled(struct nv04_region* rgn, unsigned zslice, unsigned wi
}
static INLINE void
-nvfx_region_init_for_surface(struct nv04_region* rgn, struct nvfx_surface* surf, unsigned x, unsigned y)
+nvfx_region_init_for_surface(struct nv04_region* rgn, struct nvfx_surface* surf, unsigned x, unsigned y, bool for_write)
{
- rgn->bo = ((struct nvfx_resource*)surf->base.texture)->bo;
- rgn->offset = surf->base.offset;
- rgn->pitch = surf->pitch;
rgn->x = x;
rgn->y = y;
rgn->z = 0;
+ nvfx_region_set_format(rgn, surf->base.base.format);
- nvfx_region_set_format(rgn, surf->base.format);
- if(!(surf->base.texture->flags & NVFX_RESOURCE_FLAG_LINEAR))
- nvfx_region_fixup_swizzled(rgn, surf->base.zslice, surf->base.width, surf->base.height, u_minify(surf->base.texture->depth0, surf->base.level));
+ if(surf->temp)
+ {
+ rgn->bo = surf->temp->base.bo;
+ rgn->offset = 0;
+ rgn->pitch = surf->temp->linear_pitch;
+
+ if(for_write)
+ util_dirty_surface_set_dirty(nvfx_surface_get_dirty_surfaces(&surf->base.base), &surf->base);
+ } else {
+ rgn->bo = ((struct nvfx_resource*)surf->base.base.texture)->bo;
+ rgn->offset = surf->base.base.offset;
+ rgn->pitch = surf->pitch;
+
+ if(!(surf->base.base.texture->flags & NVFX_RESOURCE_FLAG_LINEAR))
+ nvfx_region_fixup_swizzled(rgn, surf->base.base.zslice, surf->base.base.width, surf->base.base.height, u_minify(surf->base.base.texture->depth0, surf->base.base.level));
+ }
}
static INLINE void
-nvfx_region_init_for_subresource(struct nv04_region* rgn, struct pipe_resource* pt, struct pipe_subresource sub, unsigned x, unsigned y, unsigned z)
+nvfx_region_init_for_subresource(struct nv04_region* rgn, struct pipe_resource* pt, struct pipe_subresource sub, unsigned x, unsigned y, unsigned z, bool for_write)
{
+ if(pt->target != PIPE_BUFFER)
+ {
+ struct nvfx_surface* ns = (struct nvfx_surface*)util_surfaces_peek(&((struct nvfx_miptree*)pt)->surfaces, pt, sub.face, sub.level, z);
+ if(ns && util_dirty_surface_is_dirty(&ns->base))
+ {
+ nvfx_region_init_for_surface(rgn, ns, x, y, for_write);
+ return;
+ }
+ }
+
rgn->bo = ((struct nvfx_resource*)pt)->bo;
rgn->offset = nvfx_subresource_offset(pt, sub.face, sub.level, z);
rgn->pitch = nvfx_subresource_pitch(pt, sub.level);
@@ -165,6 +186,7 @@ nv04_scaled_image_format(enum pipe_format format)
}
}
+// XXX: must save index buffer too!
static struct blitter_context*
nvfx_get_blitter(struct pipe_context* pipe, int copy)
{
@@ -237,8 +259,8 @@ nvfx_resource_copy_region(struct pipe_context *pipe,
int dst_to_gpu = dstr->usage != PIPE_USAGE_DYNAMIC && dstr->usage != PIPE_USAGE_STAGING;
int src_on_gpu = nvfx_resource_on_gpu(srcr);
- nvfx_region_init_for_subresource(&dst, dstr, subdst, dstx, dsty, dstz);
- nvfx_region_init_for_subresource(&src, srcr, subsrc, srcx, srcy, srcz);
+ nvfx_region_init_for_subresource(&dst, dstr, subdst, dstx, dsty, dstz, TRUE);
+ nvfx_region_init_for_subresource(&src, srcr, subsrc, srcx, srcy, srcz, FALSE);
w = util_format_get_stride(dstr->format, w) >> dst.bpps;
h = util_format_get_nblocksy(dstr->format, h);
@@ -293,10 +315,11 @@ nvfx_surface_fill(struct pipe_context* pipe, struct pipe_surface *dsts,
struct nv04_2d_context *ctx = nvfx_screen(pipe->screen)->eng2d;
struct nv04_region dst;
/* Always try to use the GPU right now, if possible
- * If the user wanted the surface data on the CPU, he would have cleared with memset */
+ * If the user wanted the surface data on the CPU, he would have cleared with memset (hopefully) */
// we don't care about interior pixel order since we set all them to the same value
- nvfx_region_init_for_surface(&dst, (struct nvfx_surface*)dsts, dx, dy);
+ nvfx_region_init_for_surface(&dst, (struct nvfx_surface*)dsts, dx, dy, TRUE);
+
w = util_format_get_stride(dsts->format, w) >> dst.bpps;
h = util_format_get_nblocksy(dsts->format, h);
@@ -342,6 +365,80 @@ nvfx_screen_surface_init(struct pipe_screen *pscreen)
}
static void
+nvfx_surface_copy_temp(struct pipe_context* pipe, struct pipe_surface* surf, int to_temp)
+{
+ struct nvfx_surface* ns = (struct nvfx_surface*)surf;
+ struct pipe_subresource tempsr, surfsr;
+ struct pipe_resource *idxbuf_buffer;
+ unsigned idxbuf_format;
+
+ tempsr.face = 0;
+ tempsr.level = 0;
+ surfsr.face = surf->face;
+ surfsr.level = surf->level;
+
+ // TODO: do this properly, in blitter save
+ idxbuf_buffer = ((struct nvfx_context*)pipe)->idxbuf_buffer;
+ idxbuf_format = ((struct nvfx_context*)pipe)->idxbuf_format;
+
+ if(to_temp)
+ nvfx_resource_copy_region(pipe, &ns->temp->base.base, tempsr, 0, 0, 0, surf->texture, surfsr, 0, 0, surf->zslice, surf->width, surf->height);
+ else
+ nvfx_resource_copy_region(pipe, surf->texture, surfsr, 0, 0, surf->zslice, &ns->temp->base.base, tempsr, 0, 0, 0, surf->width, surf->height);
+
+ ((struct nvfx_context*)pipe)->idxbuf_buffer = idxbuf_buffer;
+ ((struct nvfx_context*)pipe)->idxbuf_format = idxbuf_format;
+}
+
+void
+nvfx_surface_create_temp(struct pipe_context* pipe, struct pipe_surface* surf)
+{
+ struct nvfx_surface* ns = (struct nvfx_surface*)surf;
+ struct pipe_resource template;
+ memset(&template, 0, sizeof(struct pipe_resource));
+ template.target = PIPE_TEXTURE_2D;
+ template.format = surf->format;
+ template.width0 = surf->width;
+ template.height0 = surf->height;
+ template.depth0 = 1;
+ template.nr_samples = surf->texture->nr_samples;
+ template.flags = NVFX_RESOURCE_FLAG_LINEAR;
+
+ ns->temp = (struct nvfx_miptree*)nvfx_miptree_create(pipe->screen, &template);
+ nvfx_surface_copy_temp(pipe, surf, 1);
+}
+
+void
+nvfx_surface_flush(struct pipe_context* pipe, struct pipe_surface* surf)
+{
+ struct nvfx_context* nvfx = (struct nvfx_context*)pipe;
+ struct nvfx_surface* ns = (struct nvfx_surface*)surf;
+ boolean bound = FALSE;
+
+ /* must be done before the copy, otherwise the copy will use the temp as destination */
+ util_dirty_surface_set_clean(nvfx_surface_get_dirty_surfaces(surf), &ns->base);
+
+ nvfx_surface_copy_temp(pipe, surf, 0);
+
+ if(nvfx->framebuffer.zsbuf == surf)
+ bound = TRUE;
+ else
+ {
+ for(unsigned i = 0; i < nvfx->framebuffer.nr_cbufs; ++i)
+ {
+ if(nvfx->framebuffer.cbufs[i] == surf)
+ {
+ bound = TRUE;
+ break;
+ }
+ }
+ }
+
+ if(!bound)
+ pipe_resource_reference((struct pipe_resource**)&ns->temp, 0);
+}
+
+static void
nvfx_clear_render_target(struct pipe_context *pipe,
struct pipe_surface *dst,
const float *rgba,