diff options
author | Christoph Bumiller <[email protected]> | 2013-02-23 19:40:23 +0100 |
---|---|---|
committer | Christoph Bumiller <[email protected]> | 2013-03-12 12:55:37 +0100 |
commit | e066f2f62f6043d43385bcdce4e7fa07ffa3ecbe (patch) | |
tree | 6f49b99571ffd3b93727988b635d74b0c397857e /src/gallium/drivers/nvc0/nvc0_tex.c | |
parent | 75f1f852b00ad0d766684d01695322b93a2acd55 (diff) |
nvc0: implement compute support for nve4
Diffstat (limited to 'src/gallium/drivers/nvc0/nvc0_tex.c')
-rw-r--r-- | src/gallium/drivers/nvc0/nvc0_tex.c | 295 |
1 files changed, 294 insertions, 1 deletions
diff --git a/src/gallium/drivers/nvc0/nvc0_tex.c b/src/gallium/drivers/nvc0/nvc0_tex.c index 2bce97b32c7..7fbe1e6736b 100644 --- a/src/gallium/drivers/nvc0/nvc0_tex.c +++ b/src/gallium/drivers/nvc0/nvc0_tex.c @@ -23,6 +23,7 @@ #include "nvc0_context.h" #include "nvc0_resource.h" #include "nv50/nv50_texture.xml.h" +#include "nv50/nv50_defs.xml.h" #include "util/u_format.h" @@ -413,7 +414,7 @@ nvc0_validate_tsc(struct nvc0_context *nvc0, int s) return need_flush; } -static boolean +boolean nve4_validate_tsc(struct nvc0_context *nvc0, int s) { struct nouveau_bo *txc = nvc0->screen->txc; @@ -515,3 +516,295 @@ nve4_set_tex_handles(struct nvc0_context *nvc0) nvc0->samplers_dirty[s] = 0; } } + + +static const uint8_t nve4_su_format_map[PIPE_FORMAT_COUNT]; +static const uint16_t nve4_su_format_aux_map[PIPE_FORMAT_COUNT]; +static const uint16_t nve4_suldp_lib_offset[PIPE_FORMAT_COUNT]; + +void +nve4_set_surface_info(struct nouveau_pushbuf *push, + struct pipe_surface *psf, + struct nvc0_screen *screen) +{ + struct nv50_surface *sf = nv50_surface(psf); + struct nv04_resource *res; + uint64_t address; + uint32_t *const info = push->cur; + uint8_t log2cpp; + + if (psf && !nve4_su_format_map[psf->format]) + NOUVEAU_ERR("unsupported surface format, try is_format_supported() !\n"); + + push->cur += 16; + + if (!psf || !nve4_su_format_map[psf->format]) { + memset(info, 0, 16 * sizeof(*info)); + + info[0] = 0xbadf0000; + info[1] = 0x80004000; + info[12] = nve4_suldp_lib_offset[PIPE_FORMAT_R32G32B32A32_UINT] + + screen->lib_code->start; + return; + } + res = nv04_resource(sf->base.texture); + + address = res->address + sf->offset; + + info[8] = sf->width; + info[9] = sf->height; + info[10] = sf->depth; + switch (res->base.target) { + case PIPE_TEXTURE_1D_ARRAY: + info[11] = 1; + break; + case PIPE_TEXTURE_2D: + case PIPE_TEXTURE_RECT: + info[11] = 2; + break; + case PIPE_TEXTURE_3D: + info[11] = 3; + break; + case PIPE_TEXTURE_2D_ARRAY: + case PIPE_TEXTURE_CUBE: + case PIPE_TEXTURE_CUBE_ARRAY: + info[11] = 4; + break; + default: + info[11] = 0; + break; + } + log2cpp = (0xf000 & nve4_su_format_aux_map[sf->base.format]) >> 12; + + info[12] = nve4_suldp_lib_offset[sf->base.format] + screen->lib_code->start; + + /* limit in bytes for raw access */ + info[13] = (0x06 << 22) | ((sf->width << log2cpp) - 1); + + info[1] = nve4_su_format_map[sf->base.format]; + +#if 0 + switch (util_format_get_blocksizebits(sf->base.format)) { + case 16: info[1] |= 1 << 16; break; + case 32: info[1] |= 2 << 16; break; + case 64: info[1] |= 3 << 16; break; + case 128: info[1] |= 4 << 16; break; + default: + break; + } +#else + info[1] |= log2cpp << 16; + info[1] |= 0x4000; + info[1] |= (0x0f00 & nve4_su_format_aux_map[sf->base.format]); +#endif + + if (res->base.target == PIPE_BUFFER) { + info[0] = address >> 8; + info[2] = sf->width - 1; + info[2] |= (0xff & nve4_su_format_aux_map[sf->base.format]) << 22; + info[3] = 0; + info[4] = 0; + info[5] = 0; + info[6] = 0; + info[7] = 0; + info[14] = 0; + info[15] = 0; + } else { + struct nv50_miptree *mt = nv50_miptree(&res->base); + struct nv50_miptree_level *lvl = &mt->level[sf->base.u.tex.level]; + const unsigned z = sf->base.u.tex.first_layer; + + if (z) { + if (mt->layout_3d) { + address += nvc0_mt_zslice_offset(mt, psf->u.tex.level, z); + /* doesn't work if z passes z-tile boundary */ + assert(sf->depth == 1); + } else { + address += mt->layer_stride * z; + } + } + info[0] = address >> 8; + info[2] = sf->width - 1; + /* NOTE: this is really important: */ + info[2] |= (0xff & nve4_su_format_aux_map[sf->base.format]) << 22; + info[3] = (0x88 << 24) | (lvl->pitch / 64); + info[4] = sf->height - 1; + info[4] |= (lvl->tile_mode & 0x0f0) << 25; + info[4] |= NVC0_TILE_SHIFT_Y(lvl->tile_mode) << 22; + info[5] = mt->layer_stride >> 8; + info[6] = sf->depth - 1; + info[6] |= (lvl->tile_mode & 0xf00) << 21; + info[6] |= NVC0_TILE_SHIFT_Z(lvl->tile_mode) << 22; + info[7] = 0; + info[14] = mt->ms_x; + info[15] = mt->ms_y; + } +} + +static INLINE void +nvc0_update_surface_bindings(struct nvc0_context *nvc0) +{ + /* TODO */ +} + +static INLINE void +nve4_update_surface_bindings(struct nvc0_context *nvc0) +{ + /* TODO */ +} + +void +nvc0_validate_surfaces(struct nvc0_context *nvc0) +{ + if (nvc0->screen->base.class_3d >= NVE4_3D_CLASS) { + nve4_update_surface_bindings(nvc0); + } else { + nvc0_update_surface_bindings(nvc0); + } +} + + +static const uint8_t nve4_su_format_map[PIPE_FORMAT_COUNT] = +{ + [PIPE_FORMAT_R32G32B32A32_FLOAT] = NVE4_IMAGE_FORMAT_RGBA32_FLOAT, + [PIPE_FORMAT_R32G32B32A32_SINT] = NVE4_IMAGE_FORMAT_RGBA32_SINT, + [PIPE_FORMAT_R32G32B32A32_UINT] = NVE4_IMAGE_FORMAT_RGBA32_UINT, + [PIPE_FORMAT_R16G16B16A16_FLOAT] = NVE4_IMAGE_FORMAT_RGBA16_FLOAT, + [PIPE_FORMAT_R16G16B16A16_UNORM] = NVE4_IMAGE_FORMAT_RGBA16_UNORM, + [PIPE_FORMAT_R16G16B16A16_SNORM] = NVE4_IMAGE_FORMAT_RGBA16_SNORM, + [PIPE_FORMAT_R16G16B16A16_SINT] = NVE4_IMAGE_FORMAT_RGBA16_SINT, + [PIPE_FORMAT_R16G16B16A16_UINT] = NVE4_IMAGE_FORMAT_RGBA16_UINT, + [PIPE_FORMAT_R8G8B8A8_UNORM] = NVE4_IMAGE_FORMAT_RGBA8_UNORM, + [PIPE_FORMAT_R8G8B8A8_SNORM] = NVE4_IMAGE_FORMAT_RGBA8_SNORM, + [PIPE_FORMAT_R8G8B8A8_SINT] = NVE4_IMAGE_FORMAT_RGBA8_SINT, + [PIPE_FORMAT_R8G8B8A8_UINT] = NVE4_IMAGE_FORMAT_RGBA8_UINT, + [PIPE_FORMAT_R11G11B10_FLOAT] = NVE4_IMAGE_FORMAT_R11G11B10_FLOAT, + [PIPE_FORMAT_R10G10B10A2_UNORM] = NVE4_IMAGE_FORMAT_RGB10_A2_UNORM, +/* [PIPE_FORMAT_R10G10B10A2_UINT] = NVE4_IMAGE_FORMAT_RGB10_A2_UINT, */ + [PIPE_FORMAT_R32G32_FLOAT] = NVE4_IMAGE_FORMAT_RG32_FLOAT, + [PIPE_FORMAT_R32G32_SINT] = NVE4_IMAGE_FORMAT_RG32_SINT, + [PIPE_FORMAT_R32G32_UINT] = NVE4_IMAGE_FORMAT_RG32_UINT, + [PIPE_FORMAT_R16G16_FLOAT] = NVE4_IMAGE_FORMAT_RG16_FLOAT, + [PIPE_FORMAT_R16G16_UNORM] = NVE4_IMAGE_FORMAT_RG16_UNORM, + [PIPE_FORMAT_R16G16_SNORM] = NVE4_IMAGE_FORMAT_RG16_SNORM, + [PIPE_FORMAT_R16G16_SINT] = NVE4_IMAGE_FORMAT_RG16_SINT, + [PIPE_FORMAT_R16G16_UINT] = NVE4_IMAGE_FORMAT_RG16_UINT, + [PIPE_FORMAT_R8G8_UNORM] = NVE4_IMAGE_FORMAT_RG8_UNORM, + [PIPE_FORMAT_R8G8_SNORM] = NVE4_IMAGE_FORMAT_RG8_SNORM, + [PIPE_FORMAT_R8G8_SINT] = NVE4_IMAGE_FORMAT_RG8_SINT, + [PIPE_FORMAT_R8G8_UINT] = NVE4_IMAGE_FORMAT_RG8_UINT, + [PIPE_FORMAT_R32_FLOAT] = NVE4_IMAGE_FORMAT_R32_FLOAT, + [PIPE_FORMAT_R32_SINT] = NVE4_IMAGE_FORMAT_R32_SINT, + [PIPE_FORMAT_R32_UINT] = NVE4_IMAGE_FORMAT_R32_UINT, + [PIPE_FORMAT_R16_FLOAT] = NVE4_IMAGE_FORMAT_R16_FLOAT, + [PIPE_FORMAT_R16_UNORM] = NVE4_IMAGE_FORMAT_R16_UNORM, + [PIPE_FORMAT_R16_SNORM] = NVE4_IMAGE_FORMAT_R16_SNORM, + [PIPE_FORMAT_R16_SINT] = NVE4_IMAGE_FORMAT_R16_SINT, + [PIPE_FORMAT_R16_UINT] = NVE4_IMAGE_FORMAT_R16_UINT, + [PIPE_FORMAT_R8_UNORM] = NVE4_IMAGE_FORMAT_R8_UNORM, + [PIPE_FORMAT_R8_SNORM] = NVE4_IMAGE_FORMAT_R8_SNORM, + [PIPE_FORMAT_R8_SINT] = NVE4_IMAGE_FORMAT_R8_SINT, + [PIPE_FORMAT_R8_UINT] = NVE4_IMAGE_FORMAT_R8_UINT, +}; + +/* Auxiliary format description values for surface instructions. + * (log2(bytes per pixel) << 12) | (unk8 << 8) | unk22 + */ +static const uint16_t nve4_su_format_aux_map[PIPE_FORMAT_COUNT] = +{ + [PIPE_FORMAT_R32G32B32A32_FLOAT] = 0x4842, + [PIPE_FORMAT_R32G32B32A32_SINT] = 0x4842, + [PIPE_FORMAT_R32G32B32A32_UINT] = 0x4842, + + [PIPE_FORMAT_R16G16B16A16_UNORM] = 0x3933, + [PIPE_FORMAT_R16G16B16A16_SNORM] = 0x3933, + [PIPE_FORMAT_R16G16B16A16_SINT] = 0x3933, + [PIPE_FORMAT_R16G16B16A16_UINT] = 0x3933, + [PIPE_FORMAT_R16G16B16A16_FLOAT] = 0x3933, + + [PIPE_FORMAT_R32G32_FLOAT] = 0x3433, + [PIPE_FORMAT_R32G32_SINT] = 0x3433, + [PIPE_FORMAT_R32G32_UINT] = 0x3433, + + [PIPE_FORMAT_R10G10B10A2_UNORM] = 0x2a24, +/* [PIPE_FORMAT_R10G10B10A2_UINT] = 0x2a24, */ + [PIPE_FORMAT_R8G8B8A8_UNORM] = 0x2a24, + [PIPE_FORMAT_R8G8B8A8_SNORM] = 0x2a24, + [PIPE_FORMAT_R8G8B8A8_SINT] = 0x2a24, + [PIPE_FORMAT_R8G8B8A8_UINT] = 0x2a24, + [PIPE_FORMAT_R11G11B10_FLOAT] = 0x2a24, + + [PIPE_FORMAT_R16G16_UNORM] = 0x2524, + [PIPE_FORMAT_R16G16_SNORM] = 0x2524, + [PIPE_FORMAT_R16G16_SINT] = 0x2524, + [PIPE_FORMAT_R16G16_UINT] = 0x2524, + [PIPE_FORMAT_R16G16_FLOAT] = 0x2524, + + [PIPE_FORMAT_R32_SINT] = 0x2024, + [PIPE_FORMAT_R32_UINT] = 0x2024, + [PIPE_FORMAT_R32_FLOAT] = 0x2024, + + [PIPE_FORMAT_R8G8_UNORM] = 0x1615, + [PIPE_FORMAT_R8G8_SNORM] = 0x1615, + [PIPE_FORMAT_R8G8_SINT] = 0x1615, + [PIPE_FORMAT_R8G8_UINT] = 0x1615, + + [PIPE_FORMAT_R16_UNORM] = 0x1115, + [PIPE_FORMAT_R16_SNORM] = 0x1115, + [PIPE_FORMAT_R16_SINT] = 0x1115, + [PIPE_FORMAT_R16_UINT] = 0x1115, + [PIPE_FORMAT_R16_FLOAT] = 0x1115, + + [PIPE_FORMAT_R8_UNORM] = 0x0206, + [PIPE_FORMAT_R8_SNORM] = 0x0206, + [PIPE_FORMAT_R8_SINT] = 0x0206, + [PIPE_FORMAT_R8_UINT] = 0x0206 +}; + +/* NOTE: These are hardcoded offsets for the shader library. + * TODO: Automate them. + */ +static const uint16_t nve4_suldp_lib_offset[PIPE_FORMAT_COUNT] = +{ + [PIPE_FORMAT_R32G32B32A32_FLOAT] = 0x218, + [PIPE_FORMAT_R32G32B32A32_SINT] = 0x218, + [PIPE_FORMAT_R32G32B32A32_UINT] = 0x218, + [PIPE_FORMAT_R16G16B16A16_UNORM] = 0x248, + [PIPE_FORMAT_R16G16B16A16_SNORM] = 0x2b8, + [PIPE_FORMAT_R16G16B16A16_SINT] = 0x330, + [PIPE_FORMAT_R16G16B16A16_UINT] = 0x388, + [PIPE_FORMAT_R16G16B16A16_FLOAT] = 0x3d8, + [PIPE_FORMAT_R32G32_FLOAT] = 0x428, + [PIPE_FORMAT_R32G32_SINT] = 0x468, + [PIPE_FORMAT_R32G32_UINT] = 0x468, + [PIPE_FORMAT_R10G10B10A2_UNORM] = 0x4a8, +/* [PIPE_FORMAT_R10G10B10A2_UINT] = 0x530, */ + [PIPE_FORMAT_R8G8B8A8_UNORM] = 0x588, + [PIPE_FORMAT_R8G8B8A8_SNORM] = 0x5f8, + [PIPE_FORMAT_R8G8B8A8_SINT] = 0x670, + [PIPE_FORMAT_R8G8B8A8_UINT] = 0x6c8, + [PIPE_FORMAT_B5G6R5_UNORM] = 0x718, + [PIPE_FORMAT_B5G5R5X1_UNORM] = 0x7a0, + [PIPE_FORMAT_R16G16_UNORM] = 0x828, + [PIPE_FORMAT_R16G16_SNORM] = 0x890, + [PIPE_FORMAT_R16G16_SINT] = 0x8f0, + [PIPE_FORMAT_R16G16_UINT] = 0x948, + [PIPE_FORMAT_R16G16_FLOAT] = 0x998, + [PIPE_FORMAT_R32_FLOAT] = 0x9e8, + [PIPE_FORMAT_R32_SINT] = 0xa30, + [PIPE_FORMAT_R32_UINT] = 0xa30, + [PIPE_FORMAT_R8G8_UNORM] = 0xa78, + [PIPE_FORMAT_R8G8_SNORM] = 0xae0, + [PIPE_FORMAT_R8G8_UINT] = 0xb48, + [PIPE_FORMAT_R8G8_SINT] = 0xb98, + [PIPE_FORMAT_R16_UNORM] = 0xbe8, + [PIPE_FORMAT_R16_SNORM] = 0xc48, + [PIPE_FORMAT_R16_SINT] = 0xca0, + [PIPE_FORMAT_R16_UINT] = 0xce8, + [PIPE_FORMAT_R16_FLOAT] = 0xd30, + [PIPE_FORMAT_R8_UNORM] = 0xd88, + [PIPE_FORMAT_R8_SNORM] = 0xde0, + [PIPE_FORMAT_R8_SINT] = 0xe38, + [PIPE_FORMAT_R8_UINT] = 0xe88, + [PIPE_FORMAT_R11G11B10_FLOAT] = 0xed0 +}; |