summaryrefslogtreecommitdiffstats
path: root/src/gallium/drivers/nvc0/nvc0_tex.c
diff options
context:
space:
mode:
authorChristoph Bumiller <[email protected]>2013-02-23 19:40:23 +0100
committerChristoph Bumiller <[email protected]>2013-03-12 12:55:37 +0100
commite066f2f62f6043d43385bcdce4e7fa07ffa3ecbe (patch)
tree6f49b99571ffd3b93727988b635d74b0c397857e /src/gallium/drivers/nvc0/nvc0_tex.c
parent75f1f852b00ad0d766684d01695322b93a2acd55 (diff)
nvc0: implement compute support for nve4
Diffstat (limited to 'src/gallium/drivers/nvc0/nvc0_tex.c')
-rw-r--r--src/gallium/drivers/nvc0/nvc0_tex.c295
1 files changed, 294 insertions, 1 deletions
diff --git a/src/gallium/drivers/nvc0/nvc0_tex.c b/src/gallium/drivers/nvc0/nvc0_tex.c
index 2bce97b32c7..7fbe1e6736b 100644
--- a/src/gallium/drivers/nvc0/nvc0_tex.c
+++ b/src/gallium/drivers/nvc0/nvc0_tex.c
@@ -23,6 +23,7 @@
#include "nvc0_context.h"
#include "nvc0_resource.h"
#include "nv50/nv50_texture.xml.h"
+#include "nv50/nv50_defs.xml.h"
#include "util/u_format.h"
@@ -413,7 +414,7 @@ nvc0_validate_tsc(struct nvc0_context *nvc0, int s)
return need_flush;
}
-static boolean
+boolean
nve4_validate_tsc(struct nvc0_context *nvc0, int s)
{
struct nouveau_bo *txc = nvc0->screen->txc;
@@ -515,3 +516,295 @@ nve4_set_tex_handles(struct nvc0_context *nvc0)
nvc0->samplers_dirty[s] = 0;
}
}
+
+
+static const uint8_t nve4_su_format_map[PIPE_FORMAT_COUNT];
+static const uint16_t nve4_su_format_aux_map[PIPE_FORMAT_COUNT];
+static const uint16_t nve4_suldp_lib_offset[PIPE_FORMAT_COUNT];
+
+void
+nve4_set_surface_info(struct nouveau_pushbuf *push,
+ struct pipe_surface *psf,
+ struct nvc0_screen *screen)
+{
+ struct nv50_surface *sf = nv50_surface(psf);
+ struct nv04_resource *res;
+ uint64_t address;
+ uint32_t *const info = push->cur;
+ uint8_t log2cpp;
+
+ if (psf && !nve4_su_format_map[psf->format])
+ NOUVEAU_ERR("unsupported surface format, try is_format_supported() !\n");
+
+ push->cur += 16;
+
+ if (!psf || !nve4_su_format_map[psf->format]) {
+ memset(info, 0, 16 * sizeof(*info));
+
+ info[0] = 0xbadf0000;
+ info[1] = 0x80004000;
+ info[12] = nve4_suldp_lib_offset[PIPE_FORMAT_R32G32B32A32_UINT] +
+ screen->lib_code->start;
+ return;
+ }
+ res = nv04_resource(sf->base.texture);
+
+ address = res->address + sf->offset;
+
+ info[8] = sf->width;
+ info[9] = sf->height;
+ info[10] = sf->depth;
+ switch (res->base.target) {
+ case PIPE_TEXTURE_1D_ARRAY:
+ info[11] = 1;
+ break;
+ case PIPE_TEXTURE_2D:
+ case PIPE_TEXTURE_RECT:
+ info[11] = 2;
+ break;
+ case PIPE_TEXTURE_3D:
+ info[11] = 3;
+ break;
+ case PIPE_TEXTURE_2D_ARRAY:
+ case PIPE_TEXTURE_CUBE:
+ case PIPE_TEXTURE_CUBE_ARRAY:
+ info[11] = 4;
+ break;
+ default:
+ info[11] = 0;
+ break;
+ }
+ log2cpp = (0xf000 & nve4_su_format_aux_map[sf->base.format]) >> 12;
+
+ info[12] = nve4_suldp_lib_offset[sf->base.format] + screen->lib_code->start;
+
+ /* limit in bytes for raw access */
+ info[13] = (0x06 << 22) | ((sf->width << log2cpp) - 1);
+
+ info[1] = nve4_su_format_map[sf->base.format];
+
+#if 0
+ switch (util_format_get_blocksizebits(sf->base.format)) {
+ case 16: info[1] |= 1 << 16; break;
+ case 32: info[1] |= 2 << 16; break;
+ case 64: info[1] |= 3 << 16; break;
+ case 128: info[1] |= 4 << 16; break;
+ default:
+ break;
+ }
+#else
+ info[1] |= log2cpp << 16;
+ info[1] |= 0x4000;
+ info[1] |= (0x0f00 & nve4_su_format_aux_map[sf->base.format]);
+#endif
+
+ if (res->base.target == PIPE_BUFFER) {
+ info[0] = address >> 8;
+ info[2] = sf->width - 1;
+ info[2] |= (0xff & nve4_su_format_aux_map[sf->base.format]) << 22;
+ info[3] = 0;
+ info[4] = 0;
+ info[5] = 0;
+ info[6] = 0;
+ info[7] = 0;
+ info[14] = 0;
+ info[15] = 0;
+ } else {
+ struct nv50_miptree *mt = nv50_miptree(&res->base);
+ struct nv50_miptree_level *lvl = &mt->level[sf->base.u.tex.level];
+ const unsigned z = sf->base.u.tex.first_layer;
+
+ if (z) {
+ if (mt->layout_3d) {
+ address += nvc0_mt_zslice_offset(mt, psf->u.tex.level, z);
+ /* doesn't work if z passes z-tile boundary */
+ assert(sf->depth == 1);
+ } else {
+ address += mt->layer_stride * z;
+ }
+ }
+ info[0] = address >> 8;
+ info[2] = sf->width - 1;
+ /* NOTE: this is really important: */
+ info[2] |= (0xff & nve4_su_format_aux_map[sf->base.format]) << 22;
+ info[3] = (0x88 << 24) | (lvl->pitch / 64);
+ info[4] = sf->height - 1;
+ info[4] |= (lvl->tile_mode & 0x0f0) << 25;
+ info[4] |= NVC0_TILE_SHIFT_Y(lvl->tile_mode) << 22;
+ info[5] = mt->layer_stride >> 8;
+ info[6] = sf->depth - 1;
+ info[6] |= (lvl->tile_mode & 0xf00) << 21;
+ info[6] |= NVC0_TILE_SHIFT_Z(lvl->tile_mode) << 22;
+ info[7] = 0;
+ info[14] = mt->ms_x;
+ info[15] = mt->ms_y;
+ }
+}
+
+static INLINE void
+nvc0_update_surface_bindings(struct nvc0_context *nvc0)
+{
+ /* TODO */
+}
+
+static INLINE void
+nve4_update_surface_bindings(struct nvc0_context *nvc0)
+{
+ /* TODO */
+}
+
+void
+nvc0_validate_surfaces(struct nvc0_context *nvc0)
+{
+ if (nvc0->screen->base.class_3d >= NVE4_3D_CLASS) {
+ nve4_update_surface_bindings(nvc0);
+ } else {
+ nvc0_update_surface_bindings(nvc0);
+ }
+}
+
+
+static const uint8_t nve4_su_format_map[PIPE_FORMAT_COUNT] =
+{
+ [PIPE_FORMAT_R32G32B32A32_FLOAT] = NVE4_IMAGE_FORMAT_RGBA32_FLOAT,
+ [PIPE_FORMAT_R32G32B32A32_SINT] = NVE4_IMAGE_FORMAT_RGBA32_SINT,
+ [PIPE_FORMAT_R32G32B32A32_UINT] = NVE4_IMAGE_FORMAT_RGBA32_UINT,
+ [PIPE_FORMAT_R16G16B16A16_FLOAT] = NVE4_IMAGE_FORMAT_RGBA16_FLOAT,
+ [PIPE_FORMAT_R16G16B16A16_UNORM] = NVE4_IMAGE_FORMAT_RGBA16_UNORM,
+ [PIPE_FORMAT_R16G16B16A16_SNORM] = NVE4_IMAGE_FORMAT_RGBA16_SNORM,
+ [PIPE_FORMAT_R16G16B16A16_SINT] = NVE4_IMAGE_FORMAT_RGBA16_SINT,
+ [PIPE_FORMAT_R16G16B16A16_UINT] = NVE4_IMAGE_FORMAT_RGBA16_UINT,
+ [PIPE_FORMAT_R8G8B8A8_UNORM] = NVE4_IMAGE_FORMAT_RGBA8_UNORM,
+ [PIPE_FORMAT_R8G8B8A8_SNORM] = NVE4_IMAGE_FORMAT_RGBA8_SNORM,
+ [PIPE_FORMAT_R8G8B8A8_SINT] = NVE4_IMAGE_FORMAT_RGBA8_SINT,
+ [PIPE_FORMAT_R8G8B8A8_UINT] = NVE4_IMAGE_FORMAT_RGBA8_UINT,
+ [PIPE_FORMAT_R11G11B10_FLOAT] = NVE4_IMAGE_FORMAT_R11G11B10_FLOAT,
+ [PIPE_FORMAT_R10G10B10A2_UNORM] = NVE4_IMAGE_FORMAT_RGB10_A2_UNORM,
+/* [PIPE_FORMAT_R10G10B10A2_UINT] = NVE4_IMAGE_FORMAT_RGB10_A2_UINT, */
+ [PIPE_FORMAT_R32G32_FLOAT] = NVE4_IMAGE_FORMAT_RG32_FLOAT,
+ [PIPE_FORMAT_R32G32_SINT] = NVE4_IMAGE_FORMAT_RG32_SINT,
+ [PIPE_FORMAT_R32G32_UINT] = NVE4_IMAGE_FORMAT_RG32_UINT,
+ [PIPE_FORMAT_R16G16_FLOAT] = NVE4_IMAGE_FORMAT_RG16_FLOAT,
+ [PIPE_FORMAT_R16G16_UNORM] = NVE4_IMAGE_FORMAT_RG16_UNORM,
+ [PIPE_FORMAT_R16G16_SNORM] = NVE4_IMAGE_FORMAT_RG16_SNORM,
+ [PIPE_FORMAT_R16G16_SINT] = NVE4_IMAGE_FORMAT_RG16_SINT,
+ [PIPE_FORMAT_R16G16_UINT] = NVE4_IMAGE_FORMAT_RG16_UINT,
+ [PIPE_FORMAT_R8G8_UNORM] = NVE4_IMAGE_FORMAT_RG8_UNORM,
+ [PIPE_FORMAT_R8G8_SNORM] = NVE4_IMAGE_FORMAT_RG8_SNORM,
+ [PIPE_FORMAT_R8G8_SINT] = NVE4_IMAGE_FORMAT_RG8_SINT,
+ [PIPE_FORMAT_R8G8_UINT] = NVE4_IMAGE_FORMAT_RG8_UINT,
+ [PIPE_FORMAT_R32_FLOAT] = NVE4_IMAGE_FORMAT_R32_FLOAT,
+ [PIPE_FORMAT_R32_SINT] = NVE4_IMAGE_FORMAT_R32_SINT,
+ [PIPE_FORMAT_R32_UINT] = NVE4_IMAGE_FORMAT_R32_UINT,
+ [PIPE_FORMAT_R16_FLOAT] = NVE4_IMAGE_FORMAT_R16_FLOAT,
+ [PIPE_FORMAT_R16_UNORM] = NVE4_IMAGE_FORMAT_R16_UNORM,
+ [PIPE_FORMAT_R16_SNORM] = NVE4_IMAGE_FORMAT_R16_SNORM,
+ [PIPE_FORMAT_R16_SINT] = NVE4_IMAGE_FORMAT_R16_SINT,
+ [PIPE_FORMAT_R16_UINT] = NVE4_IMAGE_FORMAT_R16_UINT,
+ [PIPE_FORMAT_R8_UNORM] = NVE4_IMAGE_FORMAT_R8_UNORM,
+ [PIPE_FORMAT_R8_SNORM] = NVE4_IMAGE_FORMAT_R8_SNORM,
+ [PIPE_FORMAT_R8_SINT] = NVE4_IMAGE_FORMAT_R8_SINT,
+ [PIPE_FORMAT_R8_UINT] = NVE4_IMAGE_FORMAT_R8_UINT,
+};
+
+/* Auxiliary format description values for surface instructions.
+ * (log2(bytes per pixel) << 12) | (unk8 << 8) | unk22
+ */
+static const uint16_t nve4_su_format_aux_map[PIPE_FORMAT_COUNT] =
+{
+ [PIPE_FORMAT_R32G32B32A32_FLOAT] = 0x4842,
+ [PIPE_FORMAT_R32G32B32A32_SINT] = 0x4842,
+ [PIPE_FORMAT_R32G32B32A32_UINT] = 0x4842,
+
+ [PIPE_FORMAT_R16G16B16A16_UNORM] = 0x3933,
+ [PIPE_FORMAT_R16G16B16A16_SNORM] = 0x3933,
+ [PIPE_FORMAT_R16G16B16A16_SINT] = 0x3933,
+ [PIPE_FORMAT_R16G16B16A16_UINT] = 0x3933,
+ [PIPE_FORMAT_R16G16B16A16_FLOAT] = 0x3933,
+
+ [PIPE_FORMAT_R32G32_FLOAT] = 0x3433,
+ [PIPE_FORMAT_R32G32_SINT] = 0x3433,
+ [PIPE_FORMAT_R32G32_UINT] = 0x3433,
+
+ [PIPE_FORMAT_R10G10B10A2_UNORM] = 0x2a24,
+/* [PIPE_FORMAT_R10G10B10A2_UINT] = 0x2a24, */
+ [PIPE_FORMAT_R8G8B8A8_UNORM] = 0x2a24,
+ [PIPE_FORMAT_R8G8B8A8_SNORM] = 0x2a24,
+ [PIPE_FORMAT_R8G8B8A8_SINT] = 0x2a24,
+ [PIPE_FORMAT_R8G8B8A8_UINT] = 0x2a24,
+ [PIPE_FORMAT_R11G11B10_FLOAT] = 0x2a24,
+
+ [PIPE_FORMAT_R16G16_UNORM] = 0x2524,
+ [PIPE_FORMAT_R16G16_SNORM] = 0x2524,
+ [PIPE_FORMAT_R16G16_SINT] = 0x2524,
+ [PIPE_FORMAT_R16G16_UINT] = 0x2524,
+ [PIPE_FORMAT_R16G16_FLOAT] = 0x2524,
+
+ [PIPE_FORMAT_R32_SINT] = 0x2024,
+ [PIPE_FORMAT_R32_UINT] = 0x2024,
+ [PIPE_FORMAT_R32_FLOAT] = 0x2024,
+
+ [PIPE_FORMAT_R8G8_UNORM] = 0x1615,
+ [PIPE_FORMAT_R8G8_SNORM] = 0x1615,
+ [PIPE_FORMAT_R8G8_SINT] = 0x1615,
+ [PIPE_FORMAT_R8G8_UINT] = 0x1615,
+
+ [PIPE_FORMAT_R16_UNORM] = 0x1115,
+ [PIPE_FORMAT_R16_SNORM] = 0x1115,
+ [PIPE_FORMAT_R16_SINT] = 0x1115,
+ [PIPE_FORMAT_R16_UINT] = 0x1115,
+ [PIPE_FORMAT_R16_FLOAT] = 0x1115,
+
+ [PIPE_FORMAT_R8_UNORM] = 0x0206,
+ [PIPE_FORMAT_R8_SNORM] = 0x0206,
+ [PIPE_FORMAT_R8_SINT] = 0x0206,
+ [PIPE_FORMAT_R8_UINT] = 0x0206
+};
+
+/* NOTE: These are hardcoded offsets for the shader library.
+ * TODO: Automate them.
+ */
+static const uint16_t nve4_suldp_lib_offset[PIPE_FORMAT_COUNT] =
+{
+ [PIPE_FORMAT_R32G32B32A32_FLOAT] = 0x218,
+ [PIPE_FORMAT_R32G32B32A32_SINT] = 0x218,
+ [PIPE_FORMAT_R32G32B32A32_UINT] = 0x218,
+ [PIPE_FORMAT_R16G16B16A16_UNORM] = 0x248,
+ [PIPE_FORMAT_R16G16B16A16_SNORM] = 0x2b8,
+ [PIPE_FORMAT_R16G16B16A16_SINT] = 0x330,
+ [PIPE_FORMAT_R16G16B16A16_UINT] = 0x388,
+ [PIPE_FORMAT_R16G16B16A16_FLOAT] = 0x3d8,
+ [PIPE_FORMAT_R32G32_FLOAT] = 0x428,
+ [PIPE_FORMAT_R32G32_SINT] = 0x468,
+ [PIPE_FORMAT_R32G32_UINT] = 0x468,
+ [PIPE_FORMAT_R10G10B10A2_UNORM] = 0x4a8,
+/* [PIPE_FORMAT_R10G10B10A2_UINT] = 0x530, */
+ [PIPE_FORMAT_R8G8B8A8_UNORM] = 0x588,
+ [PIPE_FORMAT_R8G8B8A8_SNORM] = 0x5f8,
+ [PIPE_FORMAT_R8G8B8A8_SINT] = 0x670,
+ [PIPE_FORMAT_R8G8B8A8_UINT] = 0x6c8,
+ [PIPE_FORMAT_B5G6R5_UNORM] = 0x718,
+ [PIPE_FORMAT_B5G5R5X1_UNORM] = 0x7a0,
+ [PIPE_FORMAT_R16G16_UNORM] = 0x828,
+ [PIPE_FORMAT_R16G16_SNORM] = 0x890,
+ [PIPE_FORMAT_R16G16_SINT] = 0x8f0,
+ [PIPE_FORMAT_R16G16_UINT] = 0x948,
+ [PIPE_FORMAT_R16G16_FLOAT] = 0x998,
+ [PIPE_FORMAT_R32_FLOAT] = 0x9e8,
+ [PIPE_FORMAT_R32_SINT] = 0xa30,
+ [PIPE_FORMAT_R32_UINT] = 0xa30,
+ [PIPE_FORMAT_R8G8_UNORM] = 0xa78,
+ [PIPE_FORMAT_R8G8_SNORM] = 0xae0,
+ [PIPE_FORMAT_R8G8_UINT] = 0xb48,
+ [PIPE_FORMAT_R8G8_SINT] = 0xb98,
+ [PIPE_FORMAT_R16_UNORM] = 0xbe8,
+ [PIPE_FORMAT_R16_SNORM] = 0xc48,
+ [PIPE_FORMAT_R16_SINT] = 0xca0,
+ [PIPE_FORMAT_R16_UINT] = 0xce8,
+ [PIPE_FORMAT_R16_FLOAT] = 0xd30,
+ [PIPE_FORMAT_R8_UNORM] = 0xd88,
+ [PIPE_FORMAT_R8_SNORM] = 0xde0,
+ [PIPE_FORMAT_R8_SINT] = 0xe38,
+ [PIPE_FORMAT_R8_UINT] = 0xe88,
+ [PIPE_FORMAT_R11G11B10_FLOAT] = 0xed0
+};