diff options
author | Christoph Bumiller <[email protected]> | 2012-04-14 23:56:56 +0200 |
---|---|---|
committer | Christoph Bumiller <[email protected]> | 2012-04-15 00:08:51 +0200 |
commit | e44089b2f79aa2dcaacf348911433d1e21235c0c (patch) | |
tree | 955d621392f0068ef8e3c98dc46195ff3916525e /src/gallium/drivers/nv50 | |
parent | 69a921892d2303f1400576aa73980c28880f8654 (diff) |
nvc0: add initial support for nve4+ (Kepler) chipsets
Most things that work on Fermi should work on Kepler too.
There are a few performance optimizations left to do, like better
placement of texture barriers and adding scheduling data to the
shader instructions (without them, a thread group will be masked
for 32 cycles after each single instruction issue).
Diffstat (limited to 'src/gallium/drivers/nv50')
-rw-r--r-- | src/gallium/drivers/nv50/codegen/nv50_ir.h | 2 | ||||
-rw-r--r-- | src/gallium/drivers/nv50/codegen/nv50_ir_driver.h | 2 | ||||
-rw-r--r-- | src/gallium/drivers/nv50/codegen/nv50_ir_from_tgsi.cpp | 6 | ||||
-rw-r--r-- | src/gallium/drivers/nv50/codegen/nv50_ir_inlines.h | 1 | ||||
-rw-r--r-- | src/gallium/drivers/nv50/codegen/nv50_ir_print.cpp | 1 | ||||
-rw-r--r-- | src/gallium/drivers/nv50/codegen/nv50_ir_target.cpp | 3 | ||||
-rw-r--r-- | src/gallium/drivers/nv50/nv50_screen.c | 1 | ||||
-rw-r--r-- | src/gallium/drivers/nv50/nv50_state.c | 7 | ||||
-rw-r--r-- | src/gallium/drivers/nv50/nv50_texture.xml.h | 12 |
9 files changed, 26 insertions, 9 deletions
diff --git a/src/gallium/drivers/nv50/codegen/nv50_ir.h b/src/gallium/drivers/nv50/codegen/nv50_ir.h index 6ec4fc95441..c299cab3f52 100644 --- a/src/gallium/drivers/nv50/codegen/nv50_ir.h +++ b/src/gallium/drivers/nv50/codegen/nv50_ir.h @@ -131,6 +131,7 @@ enum operation OP_POPCNT, // bitcount(src0 & src1) OP_INSBF, // insert first src1[8:15] bits of src0 into src2 at src1[0:7] OP_EXTBF, + OP_TEXBAR, OP_LAST }; @@ -141,6 +142,7 @@ enum operation #define NV50_IR_SUBOP_LDC_ISL 3 #define NV50_IR_SUBOP_SHIFT_WRAP 1 #define NV50_IR_SUBOP_EMU_PRERET 1 +#define NV50_IR_SUBOP_TEXBAR(n) n enum DataType { diff --git a/src/gallium/drivers/nv50/codegen/nv50_ir_driver.h b/src/gallium/drivers/nv50/codegen/nv50_ir_driver.h index e734c5b03bd..9632986fe40 100644 --- a/src/gallium/drivers/nv50/codegen/nv50_ir_driver.h +++ b/src/gallium/drivers/nv50/codegen/nv50_ir_driver.h @@ -163,6 +163,8 @@ struct nv50_ir_prog_info uint8_t clipDistanceMask; /* mask of clip distances defined */ uint8_t cullDistanceMask; /* clip distance mode (1 bit per output) */ int8_t genUserClip; /* request user clip planes for ClipVertex */ + uint16_t ucpBase; /* base address for UCPs */ + uint8_t ucpBinding; /* constant buffer index of UCP data */ uint8_t pointSize; /* output index for PointSize */ uint8_t instanceId; /* system value index of InstanceID */ uint8_t vertexId; /* system value index of VertexID */ diff --git a/src/gallium/drivers/nv50/codegen/nv50_ir_from_tgsi.cpp b/src/gallium/drivers/nv50/codegen/nv50_ir_from_tgsi.cpp index 4530dc23715..8bd784fa47d 100644 --- a/src/gallium/drivers/nv50/codegen/nv50_ir_from_tgsi.cpp +++ b/src/gallium/drivers/nv50/codegen/nv50_ir_from_tgsi.cpp @@ -2250,9 +2250,9 @@ Converter::handleUserClipPlanes() for (c = 0; c < 4; ++c) { for (i = 0; i < info->io.genUserClip; ++i) { - Value *ucp; - ucp = mkLoad(TYPE_F32, mkSymbol(FILE_MEMORY_CONST, 15, TYPE_F32, - i * 16 + c * 4), NULL); + Symbol *sym = mkSymbol(FILE_MEMORY_CONST, info->io.ucpBinding, + TYPE_F32, info->io.ucpBase + i * 16 + c * 4); + Value *ucp = mkLoad(TYPE_F32, sym, NULL); if (c == 0) res[i] = mkOp2v(OP_MUL, TYPE_F32, getScratch(), clipVtx[c], ucp); else diff --git a/src/gallium/drivers/nv50/codegen/nv50_ir_inlines.h b/src/gallium/drivers/nv50/codegen/nv50_ir_inlines.h index 4ce9deb131f..93e502ea609 100644 --- a/src/gallium/drivers/nv50/codegen/nv50_ir_inlines.h +++ b/src/gallium/drivers/nv50/codegen/nv50_ir_inlines.h @@ -40,6 +40,7 @@ static inline bool isMemoryFile(DataFile f) return (f >= FILE_MEMORY_CONST && f <= FILE_MEMORY_LOCAL); } +// contrary to asTex(), this will never include SULD/SUST static inline bool isTextureOp(operation op) { return (op >= OP_TEX && op <= OP_TEXCSAA); diff --git a/src/gallium/drivers/nv50/codegen/nv50_ir_print.cpp b/src/gallium/drivers/nv50/codegen/nv50_ir_print.cpp index 45e61c5e58a..4652bb95f69 100644 --- a/src/gallium/drivers/nv50/codegen/nv50_ir_print.cpp +++ b/src/gallium/drivers/nv50/codegen/nv50_ir_print.cpp @@ -147,6 +147,7 @@ const char *operationStr[OP_LAST + 1] = "popcnt", "insbf", "extbf", + "texbar", "(invalid)" }; diff --git a/src/gallium/drivers/nv50/codegen/nv50_ir_target.cpp b/src/gallium/drivers/nv50/codegen/nv50_ir_target.cpp index 27b9610ed52..e3eae69554c 100644 --- a/src/gallium/drivers/nv50/codegen/nv50_ir_target.cpp +++ b/src/gallium/drivers/nv50/codegen/nv50_ir_target.cpp @@ -48,7 +48,7 @@ const uint8_t Target::operationSrcNr[OP_LAST + 1] = 1, 2, // SULD, SUST 1, 1, // DFDX, DFDY 1, 2, 2, 2, 0, 0, // RDSV, WRSV, PIXLD, QUADOP, QUADON, QUADPOP - 2, 3, 2, // POPCNT, INSBF, EXTBF + 2, 3, 2, 0, // POPCNT, INSBF, EXTBF, TEXBAR 0 }; @@ -61,6 +61,7 @@ Target *Target::create(unsigned int chipset) switch (chipset & 0xf0) { case 0xc0: case 0xd0: + case 0xe0: return getTargetNVC0(chipset); case 0x50: case 0x80: diff --git a/src/gallium/drivers/nv50/nv50_screen.c b/src/gallium/drivers/nv50/nv50_screen.c index 4bcd2049099..e8118d70ca7 100644 --- a/src/gallium/drivers/nv50/nv50_screen.c +++ b/src/gallium/drivers/nv50/nv50_screen.c @@ -594,6 +594,7 @@ nv50_screen_create(struct nouveau_device *dev) FAIL_SCREEN_INIT("Not a known NV50 chipset: NV%02x\n", dev->chipset); break; } + screen->base.class_3d = tesla_class; ret = nouveau_object_new(chan, 0xbeef5097, tesla_class, NULL, 0, &screen->tesla); diff --git a/src/gallium/drivers/nv50/nv50_state.c b/src/gallium/drivers/nv50/nv50_state.c index bf554427ca0..5b783da7ad7 100644 --- a/src/gallium/drivers/nv50/nv50_state.c +++ b/src/gallium/drivers/nv50/nv50_state.c @@ -465,6 +465,13 @@ nv50_sampler_state_create(struct pipe_context *pipe, (nv50_tsc_wrap_mode(cso->wrap_t) << 3) | (nv50_tsc_wrap_mode(cso->wrap_r) << 6)); + if (nouveau_screen(pipe->screen)->class_3d >= NVE4_3D_CLASS) { + if (cso->seamless_cube_map) + so->tsc[1] |= NVE4_TSC_1_CUBE_SEAMLESS; + if (!cso->normalized_coords) + so->tsc[1] |= NVE4_TSC_1_FORCE_NONNORMALIZED_COORDS; + } + switch (cso->mag_img_filter) { case PIPE_TEX_FILTER_LINEAR: so->tsc[1] |= NV50_TSC_1_MAGF_LINEAR; diff --git a/src/gallium/drivers/nv50/nv50_texture.xml.h b/src/gallium/drivers/nv50/nv50_texture.xml.h index 08f6efdd7bf..2b140be8d80 100644 --- a/src/gallium/drivers/nv50/nv50_texture.xml.h +++ b/src/gallium/drivers/nv50/nv50_texture.xml.h @@ -8,12 +8,12 @@ http://0x04.net/cgit/index.cgi/rules-ng-ng git clone git://0x04.net/rules-ng-ng The rules-ng-ng source files this header was generated from are: -- rnndb/nv50_texture.xml ( 7947 bytes, from 2011-07-09 13:43:58) -- ./rnndb/copyright.xml ( 6452 bytes, from 2011-07-09 13:43:58) -- ./rnndb/nvchipsets.xml ( 3617 bytes, from 2011-07-09 13:43:58) -- ./rnndb/nv50_defs.xml ( 5468 bytes, from 2011-07-09 13:43:58) +- rnndb/nv50_texture.xml ( 8111 bytes, from 2012-03-31 16:47:45) +- ./rnndb/copyright.xml ( 6452 bytes, from 2011-08-11 18:25:12) +- ./rnndb/nvchipsets.xml ( 3701 bytes, from 2012-03-22 20:40:59) +- ./rnndb/nv50_defs.xml ( 5468 bytes, from 2011-08-11 18:25:12) -Copyright (C) 2006-2011 by the following authors: +Copyright (C) 2006-2012 by the following authors: - Artur Huillet <[email protected]> (ahuillet) - Ben Skeggs (darktama, darktama_) - B. R. <[email protected]> (koala_br) @@ -265,8 +265,10 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #define NV50_TSC_1_MIPF_NONE 0x00000040 #define NV50_TSC_1_MIPF_NEAREST 0x00000080 #define NV50_TSC_1_MIPF_LINEAR 0x000000c0 +#define NVE4_TSC_1_CUBE_SEAMLESS 0x00000200 #define NV50_TSC_1_LOD_BIAS__MASK 0x01fff000 #define NV50_TSC_1_LOD_BIAS__SHIFT 12 +#define NVE4_TSC_1_FORCE_NONNORMALIZED_COORDS 0x02000000 #define NV50_TSC_2 0x00000008 #define NV50_TSC_2_MIN_LOD__MASK 0x00000fff |