summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorYounes Manton <[email protected]>2009-10-06 22:07:47 -0400
committerYounes Manton <[email protected]>2009-10-06 22:11:07 -0400
commitaec2c010f6dc2febcd5f3a10a0dc92738db68e1a (patch)
tree0f9752853c250b6b6413cad229fca25525320250
parent9c778a90ea24f25437b68bb67856c81add61e261 (diff)
nv04-nv40: Fix swizzle transfers for NPOT sizes.
Workarounds not necessary, SIFM can handle NPOT, we just weren't setting dst dimensions properly. SIFM can't handle odd w,h though, that still needs fixing.
-rw-r--r--src/gallium/drivers/nv04/nv04_surface_2d.c147
1 files changed, 34 insertions, 113 deletions
diff --git a/src/gallium/drivers/nv04/nv04_surface_2d.c b/src/gallium/drivers/nv04/nv04_surface_2d.c
index f88e138c79d..b2ab50ee21f 100644
--- a/src/gallium/drivers/nv04/nv04_surface_2d.c
+++ b/src/gallium/drivers/nv04/nv04_surface_2d.c
@@ -1,5 +1,6 @@
#include "pipe/p_context.h"
#include "pipe/p_format.h"
+#include "util/u_math.h"
#include "util/u_memory.h"
#include "nouveau/nouveau_winsys.h"
@@ -107,17 +108,20 @@ nv04_surface_copy_swizzle(struct nv04_surface_2d *ctx,
struct nouveau_bo *src_bo = nouveau_bo(ctx->buf(src));
struct nouveau_bo *dst_bo = nouveau_bo(ctx->buf(dst));
const unsigned src_pitch = ((struct nv04_surface *)src)->pitch;
+ /* Max width & height may not be the same on all HW, but must be POT */
const unsigned max_w = 1024;
const unsigned max_h = 1024;
- const unsigned sub_w = w > max_w ? max_w : w;
- const unsigned sub_h = h > max_h ? max_h : h;
- unsigned cx;
- unsigned cy;
+ unsigned sub_w = w > max_w ? max_w : w;
+ unsigned sub_h = h > max_h ? max_h : h;
+ unsigned x;
+ unsigned y;
-#if 0
- /* That's the way she likes it */
- assert(src_pitch == ((struct nv04_surface *)dst)->pitch);
-#endif
+ /* Swizzled surfaces must be POT */
+ assert(util_is_pot(dst->width) && util_is_pot(dst->height));
+
+ /* If area is too large to copy in one shot we must copy it in POT chunks to meet alignment requirements */
+ assert(sub_w == w || util_is_pot(sub_w));
+ assert(sub_h == h || util_is_pot(sub_h));
BEGIN_RING(chan, swzsurf, NV04_SWIZZLED_SURFACE_DMA_IMAGE, 1);
OUT_RELOCo(chan, dst_bo,
@@ -125,8 +129,8 @@ nv04_surface_copy_swizzle(struct nv04_surface_2d *ctx,
BEGIN_RING(chan, swzsurf, NV04_SWIZZLED_SURFACE_FORMAT, 1);
OUT_RING (chan, nv04_surface_format(dst->format) |
- log2i(w) << NV04_SWIZZLED_SURFACE_FORMAT_BASE_SIZE_U_SHIFT |
- log2i(h) << NV04_SWIZZLED_SURFACE_FORMAT_BASE_SIZE_V_SHIFT);
+ log2i(dst->width) << NV04_SWIZZLED_SURFACE_FORMAT_BASE_SIZE_U_SHIFT |
+ log2i(dst->height) << NV04_SWIZZLED_SURFACE_FORMAT_BASE_SIZE_V_SHIFT);
BEGIN_RING(chan, sifm, NV04_SCALED_IMAGE_FROM_MEMORY_DMA_IMAGE, 1);
OUT_RELOCo(chan, src_bo,
@@ -134,32 +138,37 @@ nv04_surface_copy_swizzle(struct nv04_surface_2d *ctx,
BEGIN_RING(chan, sifm, NV04_SCALED_IMAGE_FROM_MEMORY_SURFACE, 1);
OUT_RING (chan, swzsurf->handle);
- for (cy = 0; cy < h; cy += sub_h) {
- for (cx = 0; cx < w; cx += sub_w) {
+ for (y = 0; y < h; y += sub_h) {
+ sub_h = MIN2(sub_h, h - y);
+
+ for (x = 0; x < w; x += sub_w) {
+ sub_w = MIN2(sub_w, w - x);
+
+ /* Must be 64-byte aligned */
+ assert(!((dst->offset + nv04_swizzle_bits(dx+x, dy+y) * dst->texture->block.size) & 63));
+
BEGIN_RING(chan, swzsurf, NV04_SWIZZLED_SURFACE_OFFSET, 1);
- OUT_RELOCl(chan, dst_bo, dst->offset + nv04_swizzle_bits(cx+dx, cy+dy) *
- dst->texture->block.size, NOUVEAU_BO_GART |
- NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
+ OUT_RELOCl(chan, dst_bo, dst->offset + nv04_swizzle_bits(dx+x, dy+y) * dst->texture->block.size,
+ NOUVEAU_BO_GART | NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
BEGIN_RING(chan, sifm, NV04_SCALED_IMAGE_FROM_MEMORY_COLOR_CONVERSION, 9);
OUT_RING (chan, NV04_SCALED_IMAGE_FROM_MEMORY_COLOR_CONVERSION_TRUNCATE);
OUT_RING (chan, nv04_scaled_image_format(src->format));
OUT_RING (chan, NV04_SCALED_IMAGE_FROM_MEMORY_OPERATION_SRCCOPY);
OUT_RING (chan, 0);
- OUT_RING (chan, sub_h << 16 | sub_w);
+ OUT_RING (chan, sub_h << NV04_SCALED_IMAGE_FROM_MEMORY_CLIP_SIZE_H_SHIFT | sub_w);
OUT_RING (chan, 0);
- OUT_RING (chan, sub_h << 16 | sub_w);
+ OUT_RING (chan, sub_h << NV04_SCALED_IMAGE_FROM_MEMORY_OUT_SIZE_H_SHIFT | sub_w);
OUT_RING (chan, 1 << 20);
OUT_RING (chan, 1 << 20);
BEGIN_RING(chan, sifm, NV04_SCALED_IMAGE_FROM_MEMORY_SIZE, 4);
- OUT_RING (chan, sub_h << 16 | sub_w);
+ OUT_RING (chan, sub_h << NV04_SCALED_IMAGE_FROM_MEMORY_SIZE_H_SHIFT | sub_w);
OUT_RING (chan, src_pitch |
NV04_SCALED_IMAGE_FROM_MEMORY_FORMAT_ORIGIN_CENTER |
NV04_SCALED_IMAGE_FROM_MEMORY_FORMAT_FILTER_POINT_SAMPLE);
- OUT_RELOCl(chan, src_bo, src->offset + (cy+sy) * src_pitch +
- (cx+sx) * src->texture->block.size, NOUVEAU_BO_GART |
- NOUVEAU_BO_VRAM | NOUVEAU_BO_RD);
+ OUT_RELOCl(chan, src_bo, src->offset + (sy+y) * src_pitch + (sx+x) * src->texture->block.size,
+ NOUVEAU_BO_GART | NOUVEAU_BO_VRAM | NOUVEAU_BO_RD);
OUT_RING (chan, 0);
}
}
@@ -214,43 +223,6 @@ nv04_surface_copy_m2mf(struct nv04_surface_2d *ctx,
}
static int
-nv04_surface_copy_m2mf_swizzle(struct nv04_surface_2d *ctx,
- struct pipe_surface *dst, int dx, int dy,
- struct pipe_surface *src, int sx, int sy)
-{
- struct nouveau_channel *chan = ctx->m2mf->channel;
- struct nouveau_grobj *m2mf = ctx->m2mf;
- struct nouveau_bo *src_bo = nouveau_bo(ctx->buf(src));
- struct nouveau_bo *dst_bo = nouveau_bo(ctx->buf(dst));
- unsigned src_pitch = ((struct nv04_surface *)src)->pitch;
- unsigned dst_pitch = ((struct nv04_surface *)dst)->pitch;
- unsigned dst_offset = dst->offset + nv04_swizzle_bits(dx, dy) *
- dst->texture->block.size;
- unsigned src_offset = src->offset + sy * src_pitch +
- sx * src->texture->block.size;
-
- BEGIN_RING(chan, m2mf, NV04_MEMORY_TO_MEMORY_FORMAT_DMA_BUFFER_IN, 2);
- OUT_RELOCo(chan, src_bo,
- NOUVEAU_BO_GART | NOUVEAU_BO_VRAM | NOUVEAU_BO_RD);
- OUT_RELOCo(chan, dst_bo,
- NOUVEAU_BO_GART | NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
-
- BEGIN_RING(chan, m2mf, NV04_MEMORY_TO_MEMORY_FORMAT_OFFSET_IN, 8);
- OUT_RELOCl(chan, src_bo, src_offset,
- NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD);
- OUT_RELOCl(chan, dst_bo, dst_offset,
- NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_WR);
- OUT_RING (chan, src_pitch);
- OUT_RING (chan, dst_pitch);
- OUT_RING (chan, 1 * src->texture->block.size);
- OUT_RING (chan, 1);
- OUT_RING (chan, 0x0101);
- OUT_RING (chan, 0);
-
- return 0;
-}
-
-static int
nv04_surface_copy_blit(struct nv04_surface_2d *ctx, struct pipe_surface *dst,
int dx, int dy, struct pipe_surface *src, int sx, int sy,
int w, int h)
@@ -299,61 +271,10 @@ nv04_surface_copy(struct nv04_surface_2d *ctx, struct pipe_surface *dst,
assert(src->format == dst->format);
/* Setup transfer to swizzle the texture to vram if needed */
- if (src_linear && !dst_linear) {
- int x,y;
-
- if ((w>1) && (h>1)) {
- int potWidth = 1<<log2i(w);
- int potHeight = 1<<log2i(h);
- int remainWidth = w-potWidth;
- int remainHeight = h-potHeight;
- int squareDim = (potWidth>potHeight ? potHeight : potWidth);
-
- /* top left is always POT, but we can only swizzle squares */
- for (y=0; y<potHeight; y+=squareDim) {
- for (x=0; x<potWidth; x+= squareDim) {
- nv04_surface_copy_swizzle(ctx, dst, dx+x, dy+y,
- src, sx+x, sy+y,
- squareDim, squareDim);
- }
- }
-
- /* top right */
- if (remainWidth>0) {
- nv04_surface_copy(ctx, dst, dx+potWidth, dy,
- src, sx+potWidth, sy,
- remainWidth, potHeight);
- }
-
- /* bottom left */
- if (remainHeight>0) {
- nv04_surface_copy(ctx, dst, dx, dy+potHeight,
- src, sx, sy+potHeight,
- potWidth, remainHeight);
- }
-
- /* bottom right */
- if ((remainWidth>0) && (remainHeight>0)) {
- nv04_surface_copy(ctx, dst, dx+potWidth, dy+potHeight,
- src, sx+potWidth, sy+potHeight,
- remainWidth, remainHeight);
- }
- } else if (w==1) {
- /* We have a column to copy to a swizzled texture */
- for (y=0; y<h; y++) {
- nv04_surface_copy_m2mf_swizzle(ctx, dst, dx, dy+y,
- src, sx, sy+y);
- }
- } else if (h==1) {
- /* We have a row to copy to a swizzled texture */
- for (x=0; x<w; x++) {
- nv04_surface_copy_m2mf_swizzle(ctx, dst, dx+x, dy,
- src, sx+x, sy);
- }
- }
-
- return;
- }
+ if (src_linear && !dst_linear && w > 1 && h > 1) {
+ nv04_surface_copy_swizzle(ctx, dst, dx, dy, src, sx, sy, w, h);
+ return;
+ }
/* NV_CONTEXT_SURFACES_2D has buffer alignment restrictions, fallback
* to NV_MEMORY_TO_MEMORY_FORMAT in this case.