diff options
Diffstat (limited to 'src/gallium/drivers/nv50')
-rw-r--r-- | src/gallium/drivers/nv50/nv50_3d.xml.h | 10 | ||||
-rw-r--r-- | src/gallium/drivers/nv50/nv50_context.c | 12 | ||||
-rw-r--r-- | src/gallium/drivers/nv50/nv50_context.h | 15 | ||||
-rw-r--r-- | src/gallium/drivers/nv50/nv50_debug.h | 25 | ||||
-rw-r--r-- | src/gallium/drivers/nv50/nv50_formats.c | 126 | ||||
-rw-r--r-- | src/gallium/drivers/nv50/nv50_pc.c | 24 | ||||
-rw-r--r-- | src/gallium/drivers/nv50/nv50_pc.h | 19 | ||||
-rw-r--r-- | src/gallium/drivers/nv50/nv50_pc_optimize.c | 13 | ||||
-rw-r--r-- | src/gallium/drivers/nv50/nv50_pc_regalloc.c | 309 | ||||
-rw-r--r-- | src/gallium/drivers/nv50/nv50_program.c | 6 | ||||
-rw-r--r-- | src/gallium/drivers/nv50/nv50_screen.c | 21 | ||||
-rw-r--r-- | src/gallium/drivers/nv50/nv50_shader_state.c | 41 | ||||
-rw-r--r-- | src/gallium/drivers/nv50/nv50_state.c | 29 | ||||
-rw-r--r-- | src/gallium/drivers/nv50/nv50_state_validate.c | 2 | ||||
-rw-r--r-- | src/gallium/drivers/nv50/nv50_stateobj.h | 4 | ||||
-rw-r--r-- | src/gallium/drivers/nv50/nv50_surface.c | 8 | ||||
-rw-r--r-- | src/gallium/drivers/nv50/nv50_tgsi_to_nc.c | 18 |
17 files changed, 511 insertions, 171 deletions
diff --git a/src/gallium/drivers/nv50/nv50_3d.xml.h b/src/gallium/drivers/nv50/nv50_3d.xml.h index 9bb3211728c..41a380ec2ec 100644 --- a/src/gallium/drivers/nv50/nv50_3d.xml.h +++ b/src/gallium/drivers/nv50/nv50_3d.xml.h @@ -558,7 +558,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #define NV50_3D_UNK0F8C 0x00000f8c -#define NV50_3D_UNK0F90 0x00000f90 +#define NV50_3D_COLOR_MASK_COMMON 0x00000f90 #define NV50_3D_UNK0F94 0x00000f94 @@ -1007,7 +1007,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #define NV50_3D_TEX_CACHE_CTL_UNK1__MASK 0x00000030 #define NV50_3D_TEX_CACHE_CTL_UNK1__SHIFT 4 -#define NV50_3D_UNK133C 0x0000133c +#define NV50_3D_BLEND_SEPARATE_ALPHA 0x0000133c #define NV50_3D_BLEND_EQUATION_RGB 0x00001340 #define NV50_3D_BLEND_EQUATION_RGB_FUNC_ADD 0x00008006 @@ -1033,7 +1033,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #define NV50_3D_BLEND_FUNC_DST_ALPHA 0x00001358 -#define NV50_3D_UNK135C 0x0000135c +#define NV50_3D_BLEND_ENABLE_COMMON 0x0000135c #define NV50_3D_BLEND_ENABLE(i0) (0x00001360 + 0x4*(i0)) #define NV50_3D_BLEND_ENABLE__ESIZE 0x00000004 @@ -1673,7 +1673,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #define NV50_3D_MAP_SEMANTIC_0_BFC0_ID__SHIFT 8 #define NV50_3D_MAP_SEMANTIC_0_COLR_NR__MASK 0x00ff0000 #define NV50_3D_MAP_SEMANTIC_0_COLR_NR__SHIFT 16 -#define NV50_3D_MAP_SEMANTIC_0_CLMP_EN 0xff000000 +#define NV50_3D_MAP_SEMANTIC_0_CLMP_EN 0x01000000 #define NV50_3D_MAP_SEMANTIC_1 0x00001908 #define NV50_3D_MAP_SEMANTIC_1_CLIP_START__MASK 0x000000ff @@ -1706,7 +1706,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #define NV50_3D_CULL_FACE_BACK 0x00000405 #define NV50_3D_CULL_FACE_FRONT_AND_BACK 0x00000408 -#define NV50_3D_UNK1924 0x00001924 +#define NV50_3D_LINE_LAST_PIXEL 0x00001924 #define NVA3_3D_FP_MULTISAMPLE 0x00001928 #define NVA3_3D_FP_MULTISAMPLE_EXPORT_SAMPLE_MASK 0x00000001 diff --git a/src/gallium/drivers/nv50/nv50_context.c b/src/gallium/drivers/nv50/nv50_context.c index 930cee7c1e7..632ca4daf74 100644 --- a/src/gallium/drivers/nv50/nv50_context.c +++ b/src/gallium/drivers/nv50/nv50_context.c @@ -46,6 +46,17 @@ nv50_flush(struct pipe_context *pipe, FIRE_RING(screen->channel); } +static void +nv50_texture_barrier(struct pipe_context *pipe) +{ + struct nouveau_channel *chan = nv50_context(pipe)->screen->base.channel; + + BEGIN_RING(chan, RING_3D(SERIALIZE), 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, RING_3D(TEX_CACHE_CTL), 1); + OUT_RING (chan, 0x20); +} + void nv50_default_flush_notify(struct nouveau_channel *chan) { @@ -125,6 +136,7 @@ nv50_create(struct pipe_screen *pscreen, void *priv) pipe->clear = nv50_clear; pipe->flush = nv50_flush; + pipe->texture_barrier = nv50_texture_barrier; if (!screen->cur_ctx) screen->cur_ctx = nv50; diff --git a/src/gallium/drivers/nv50/nv50_context.h b/src/gallium/drivers/nv50/nv50_context.h index 46e6c2250af..3f031994f0a 100644 --- a/src/gallium/drivers/nv50/nv50_context.h +++ b/src/gallium/drivers/nv50/nv50_context.h @@ -1,7 +1,6 @@ #ifndef __NV50_CONTEXT_H__ #define __NV50_CONTEXT_H__ -#include <stdio.h> #include "pipe/p_context.h" #include "pipe/p_defines.h" #include "pipe/p_state.h" @@ -13,6 +12,7 @@ #include "draw/draw_vertex.h" +#include "nv50_debug.h" #include "nv50_winsys.h" #include "nv50_stateobj.h" #include "nv50_screen.h" @@ -26,15 +26,6 @@ #include "nv50_3d.xml.h" #include "nv50_2d.xml.h" -#define NOUVEAU_ERR(fmt, args...) \ - fprintf(stderr, "%s:%d - "fmt, __FUNCTION__, __LINE__, ##args); - -#ifdef NOUVEAU_DEBUG -# define NOUVEAU_DBG(args...) printf(args); -#else -# define NOUVEAU_DBG(args...) -#endif - #define NV50_NEW_BLEND (1 << 0) #define NV50_NEW_RASTERIZER (1 << 1) #define NV50_NEW_ZSA (1 << 2) @@ -80,6 +71,8 @@ struct nv50_context { uint32_t instance_elts; /* bitmask of per-instance elements */ uint32_t instance_base; uint32_t interpolant_ctrl; + uint32_t semantic_color; + uint32_t semantic_psize; int32_t index_bias; boolean prim_restart; boolean point_sprite; @@ -183,7 +176,7 @@ void nv50_fragprog_validate(struct nv50_context *); void nv50_fp_linkage_validate(struct nv50_context *); void nv50_gp_linkage_validate(struct nv50_context *); void nv50_constbufs_validate(struct nv50_context *); -void nv50_sprite_coords_validate(struct nv50_context *); +void nv50_validate_derived_rs(struct nv50_context *); /* nv50_state.c */ extern void nv50_init_state_functions(struct nv50_context *); diff --git a/src/gallium/drivers/nv50/nv50_debug.h b/src/gallium/drivers/nv50/nv50_debug.h new file mode 100644 index 00000000000..f3dee621519 --- /dev/null +++ b/src/gallium/drivers/nv50/nv50_debug.h @@ -0,0 +1,25 @@ + +#ifndef __NV50_DEBUG_H__ +#define __NV50_DEBUG_H__ + +#include <stdio.h> + +#include "util/u_debug.h" + +#define NV50_DEBUG_MISC 0x0001 +#define NV50_DEBUG_SHADER 0x0100 +#define NV50_DEBUG_PROG_IR 0x0200 +#define NV50_DEBUG_PROG_RA 0x0400 +#define NV50_DEBUG_PROG_CFLOW 0x0800 +#define NV50_DEBUG_PROG_ALL 0x1f00 + +#define NV50_DEBUG 0 + +#define NOUVEAU_ERR(fmt, args...) \ + fprintf(stderr, "%s:%d - "fmt, __FUNCTION__, __LINE__, ##args) + +#define NV50_DBGMSG(ch, args...) \ + if ((NV50_DEBUG) & (NV50_DEBUG_##ch)) \ + debug_printf(args) + +#endif /* __NV50_DEBUG_H__ */ diff --git a/src/gallium/drivers/nv50/nv50_formats.c b/src/gallium/drivers/nv50/nv50_formats.c index 7946117cf30..c65189d0671 100644 --- a/src/gallium/drivers/nv50/nv50_formats.c +++ b/src/gallium/drivers/nv50/nv50_formats.c @@ -93,6 +93,10 @@ const struct nv50_format nv50_format_table[PIPE_FORMAT_COUNT] = B_(C2, C1, C0, C3, UNORM, UNORM, UNORM, UNORM, 4_4_4_4, 1), SAMPLER_VIEW }, + [PIPE_FORMAT_B4G4R4X4_UNORM] = { 0, + B_(C2, C1, C0, ONE_FLOAT, UNORM, UNORM, UNORM, UNORM, 4_4_4_4, 1), + SAMPLER_VIEW }, + [PIPE_FORMAT_R10G10B10A2_UNORM] = { NV50_SURFACE_FORMAT_A2B10G10R10_UNORM, A_(C0, C1, C2, C3, UNORM, UNORM, UNORM, UNORM, 2_10_10_10, 0), SAMPLER_VIEW | RENDER_TARGET | VERTEX_BUFFER | SCANOUT }, @@ -138,14 +142,62 @@ const struct nv50_format nv50_format_table[PIPE_FORMAT_COUNT] = A_(C0, C0, C0, ONE_FLOAT, UNORM, UNORM, UNORM, UNORM, 8, 0), SAMPLER_VIEW | RENDER_TARGET }, - [PIPE_FORMAT_I8_UNORM] = { NV50_SURFACE_FORMAT_R8_UNORM, - A_(C0, C0, C0, C0, UNORM, UNORM, UNORM, UNORM, 8, 0), + [PIPE_FORMAT_L16_SNORM] = { NV50_SURFACE_FORMAT_R16_SNORM, + B_(C0, C0, C0, ONE_FLOAT, SNORM, SNORM, SNORM, SNORM, 16, 0), + SAMPLER_VIEW | RENDER_TARGET }, + + [PIPE_FORMAT_L16_FLOAT] = { NV50_SURFACE_FORMAT_R16_FLOAT, + B_(C0, C0, C0, ONE_FLOAT, FLOAT, FLOAT, FLOAT, FLOAT, 16, 0), SAMPLER_VIEW | RENDER_TARGET }, + [PIPE_FORMAT_L32_FLOAT] = { NV50_SURFACE_FORMAT_R32_FLOAT, + B_(C0, C0, C0, ONE_FLOAT, FLOAT, FLOAT, FLOAT, FLOAT, 32, 0), + SAMPLER_VIEW | RENDER_TARGET }, + + [PIPE_FORMAT_I8_UNORM] = { 0, + A_(C0, C0, C0, C0, UNORM, UNORM, UNORM, UNORM, 8, 0), + SAMPLER_VIEW }, + + [PIPE_FORMAT_I16_UNORM] = { NV50_SURFACE_FORMAT_R16_UNORM, + A_(C0, C0, C0, C0, UNORM, UNORM, UNORM, UNORM, 16, 0), + SAMPLER_VIEW }, + + [PIPE_FORMAT_I16_SNORM] = { NV50_SURFACE_FORMAT_R16_SNORM, + B_(C0, C0, C0, C0, SNORM, SNORM, SNORM, SNORM, 16, 0), + SAMPLER_VIEW }, + + [PIPE_FORMAT_I16_FLOAT] = { NV50_SURFACE_FORMAT_R16_FLOAT, + B_(C0, C0, C0, C0, FLOAT, FLOAT, FLOAT, FLOAT, 16, 0), + SAMPLER_VIEW }, + + [PIPE_FORMAT_I32_FLOAT] = { NV50_SURFACE_FORMAT_R32_FLOAT, + B_(C0, C0, C0, C0, FLOAT, FLOAT, FLOAT, FLOAT, 32, 0), + SAMPLER_VIEW }, + [PIPE_FORMAT_A8_UNORM] = { NV50_SURFACE_FORMAT_A8_UNORM, A_(ZERO, ZERO, ZERO, C0, UNORM, UNORM, UNORM, UNORM, 8, 0), SAMPLER_VIEW | RENDER_TARGET }, + [PIPE_FORMAT_A8_SNORM] = { 0, + A_(ZERO, ZERO, ZERO, C0, SNORM, SNORM, SNORM, SNORM, 8, 0), + SAMPLER_VIEW }, + + [PIPE_FORMAT_A16_UNORM] = { 0, + A_(ZERO, ZERO, ZERO, C0, UNORM, UNORM, UNORM, UNORM, 16, 0), + SAMPLER_VIEW }, + + [PIPE_FORMAT_A16_SNORM] = { 0, + A_(ZERO, ZERO, ZERO, C0, SNORM, SNORM, SNORM, SNORM, 16, 0), + SAMPLER_VIEW }, + + [PIPE_FORMAT_A16_FLOAT] = { 0, + B_(ZERO, ZERO, ZERO, C0, FLOAT, FLOAT, FLOAT, FLOAT, 16, 0), + SAMPLER_VIEW }, + + [PIPE_FORMAT_A32_FLOAT] = { 0, + B_(ZERO, ZERO, ZERO, C0, FLOAT, FLOAT, FLOAT, FLOAT, 32, 0), + SAMPLER_VIEW }, + [PIPE_FORMAT_L8A8_UNORM] = { 0, A_(C0, C0, C0, C1, UNORM, UNORM, UNORM, UNORM, 8_8, 0), SAMPLER_VIEW }, @@ -154,6 +206,26 @@ const struct nv50_format nv50_format_table[PIPE_FORMAT_COUNT] = A_(C0, C0, C0, C1, UNORM, UNORM, UNORM, UNORM, 8_8, 0), SAMPLER_VIEW }, + [PIPE_FORMAT_L16A16_UNORM] = { 0, + A_(C0, C0, C0, C1, UNORM, UNORM, UNORM, UNORM, 16_16, 0), + SAMPLER_VIEW }, + + [PIPE_FORMAT_L16A16_SNORM] = { 0, + A_(C0, C0, C0, C1, SNORM, SNORM, SNORM, SNORM, 16_16, 0), + SAMPLER_VIEW }, + + [PIPE_FORMAT_L16A16_FLOAT] = { 0, + B_(C0, C0, C0, C1, FLOAT, FLOAT, FLOAT, FLOAT, 16_16, 0), + SAMPLER_VIEW }, + + [PIPE_FORMAT_L32A32_FLOAT] = { 0, + B_(C0, C0, C0, C1, FLOAT, FLOAT, FLOAT, FLOAT, 32_32, 0), + SAMPLER_VIEW }, + + [PIPE_FORMAT_L4A4_UNORM] = { 0, + B_(C0, C0, C0, C1, UNORM, UNORM, UNORM, UNORM, 4_4, 0), + SAMPLER_VIEW }, + /* DXT, RGTC */ [PIPE_FORMAT_DXT1_RGB] = { 0, @@ -172,6 +244,22 @@ const struct nv50_format nv50_format_table[PIPE_FORMAT_COUNT] = B_(C0, C1, C2, C3, UNORM, UNORM, UNORM, UNORM, DXT5, 0), SAMPLER_VIEW }, + [PIPE_FORMAT_DXT1_SRGB] = { 0, + B_(C0, C1, C2, ONE_FLOAT, UNORM, UNORM, UNORM, UNORM, DXT1, 0), + SAMPLER_VIEW }, + + [PIPE_FORMAT_DXT1_SRGBA] = { 0, + B_(C0, C1, C2, C3, UNORM, UNORM, UNORM, UNORM, DXT1, 0), + SAMPLER_VIEW }, + + [PIPE_FORMAT_DXT3_SRGBA] = { 0, + B_(C0, C1, C2, C3, UNORM, UNORM, UNORM, UNORM, DXT3, 0), + SAMPLER_VIEW }, + + [PIPE_FORMAT_DXT5_SRGBA] = { 0, + B_(C0, C1, C2, C3, UNORM, UNORM, UNORM, UNORM, DXT5, 0), + SAMPLER_VIEW }, + [PIPE_FORMAT_RGTC1_UNORM] = { 0, B_(C0, ZERO, ZERO, ONE_FLOAT, UNORM, UNORM, UNORM, UNORM, RGTC1, 0), SAMPLER_VIEW }, @@ -237,7 +325,7 @@ const struct nv50_format nv50_format_table[PIPE_FORMAT_COUNT] = /* SNORM 32 */ [PIPE_FORMAT_R32G32B32A32_SNORM] = { 0, - A_(C0, C1, C2, C3, FLOAT, FLOAT, FLOAT, FLOAT, 32_32_32_32, 0), + A_(C0, C1, C2, C3, SNORM, SNORM, SNORM, SNORM, 32_32_32_32, 0), VERTEX_BUFFER | SAMPLER_VIEW }, [PIPE_FORMAT_R32G32B32_SNORM] = { 0, @@ -255,7 +343,7 @@ const struct nv50_format nv50_format_table[PIPE_FORMAT_COUNT] = /* UNORM 32 */ [PIPE_FORMAT_R32G32B32A32_UNORM] = { 0, - A_(C0, C1, C2, C3, FLOAT, FLOAT, FLOAT, FLOAT, 32_32_32_32, 0), + A_(C0, C1, C2, C3, UNORM, UNORM, UNORM, UNORM, 32_32_32_32, 0), VERTEX_BUFFER | SAMPLER_VIEW }, [PIPE_FORMAT_R32G32B32_UNORM] = { 0, @@ -281,7 +369,7 @@ const struct nv50_format nv50_format_table[PIPE_FORMAT_COUNT] = VERTEX_BUFFER | SAMPLER_VIEW }, [PIPE_FORMAT_R16G16_SNORM] = { NV50_SURFACE_FORMAT_R16G16_SNORM, - A_(C0, C1, C2, C3, SNORM, SNORM, SNORM, SNORM, 16_16, 0), + A_(C0, C1, ZERO, ONE_FLOAT, SNORM, SNORM, SNORM, SNORM, 16_16, 0), VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET }, [PIPE_FORMAT_R16_SNORM] = { NV50_SURFACE_FORMAT_R16_SNORM, @@ -299,7 +387,7 @@ const struct nv50_format nv50_format_table[PIPE_FORMAT_COUNT] = VERTEX_BUFFER | SAMPLER_VIEW }, [PIPE_FORMAT_R16G16_UNORM] = { NV50_SURFACE_FORMAT_R16G16_UNORM, - A_(C0, C1, C2, C3, UNORM, UNORM, UNORM, UNORM, 16_16, 0), + A_(C0, C1, ZERO, ONE_FLOAT, UNORM, UNORM, UNORM, UNORM, 16_16, 0), VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET }, [PIPE_FORMAT_R16_UNORM] = { NV50_SURFACE_FORMAT_R16_UNORM, @@ -457,4 +545,30 @@ const struct nv50_format nv50_format_table[PIPE_FORMAT_COUNT] = [PIPE_FORMAT_R8_USCALED] = { 0, A_(C0, ZERO, ZERO, ONE_FLOAT, USCALED, USCALED, USCALED, USCALED, 8, 0), VERTEX_BUFFER }, + + /* OTHER FORMATS */ + + [PIPE_FORMAT_R8G8_B8G8_UNORM] = { 0, + B_(C0, C1, C2, ONE_FLOAT, UNORM, UNORM, UNORM, UNORM, C1_C2_C1_C0, 0), + SAMPLER_VIEW }, + + [PIPE_FORMAT_G8R8_G8B8_UNORM] = { 0, + B_(C0, C1, C2, ONE_FLOAT, UNORM, UNORM, UNORM, UNORM, C2_C1_C0_C1, 0), + SAMPLER_VIEW }, + + [PIPE_FORMAT_R8SG8SB8UX8U_NORM] = { 0, + B_(C0, C1, C2, ONE_FLOAT, SNORM, SNORM, UNORM, UNORM, 8_8_8_8, 0), + SAMPLER_VIEW }, + + [PIPE_FORMAT_R5SG5SB6U_NORM] = { 0, + B_(C0, C1, C2, ONE_FLOAT, SNORM, SNORM, UNORM, UNORM, 6_5_5, 0), + SAMPLER_VIEW }, + + [PIPE_FORMAT_R1_UNORM] = { 0, + B_(C0, ZERO, ZERO, ONE_FLOAT, UNORM, UNORM, UNORM, UNORM, BITMAP_8X8, 0), + SAMPLER_VIEW }, + + [PIPE_FORMAT_A8B8G8R8_UNORM] = { 0, + B_(C3, C2, C1, C0, UNORM, UNORM, UNORM, UNORM, 8_8_8_8, 0), + SAMPLER_VIEW }, }; diff --git a/src/gallium/drivers/nv50/nv50_pc.c b/src/gallium/drivers/nv50/nv50_pc.c index 82f1b846527..7900bf811df 100644 --- a/src/gallium/drivers/nv50/nv50_pc.c +++ b/src/gallium/drivers/nv50/nv50_pc.c @@ -20,8 +20,6 @@ * SOFTWARE. */ -/* #define NV50PC_DEBUG */ - #include "nv50_pc.h" #include "nv50_program.h" @@ -180,6 +178,7 @@ nv50_op_can_write_flags(uint opcode) switch (opcode) { /* obvious ones like KIL, CALL, etc. not included */ case NV_OP_PHI: case NV_OP_MOV: + case NV_OP_SELECT: case NV_OP_LINTERP: case NV_OP_PINTERP: case NV_OP_LDA: @@ -367,7 +366,7 @@ nv_print_program(struct nv_pc *pc) nv_print_function(pc->root[i]); } -#ifdef NV50PC_DEBUG +#if NV50_DEBUG & NV50_DEBUG_PROG_CFLOW static void nv_do_print_cfgraph(struct nv_pc *pc, FILE *f, struct nv_basic_block *b) { @@ -425,7 +424,7 @@ nv_print_cfgraph(struct nv_pc *pc, const char *filepath, int subr) fclose(f); } -#endif +#endif /* NV50_DEBUG_PROG_CFLOW */ static INLINE void nvcg_show_bincode(struct nv_pc *pc) @@ -446,7 +445,7 @@ nv50_emit_program(struct nv_pc *pc) uint32_t *code = pc->emit; int n; - NV50_DBGMSG("emitting program: size = %u\n", pc->bin_size); + NV50_DBGMSG(SHADER, "emitting program: size = %u\n", pc->bin_size); for (n = 0; n < pc->num_blocks; ++n) { struct nv_instruction *i; @@ -472,7 +471,7 @@ nv50_emit_program(struct nv_pc *pc) pc->emit = code; code[pc->bin_size / 4 - 1] |= 1; -#ifdef NV50PC_DEBUG +#if NV50_DEBUG & NV50_DEBUG_SHADER nvcg_show_bincode(pc); #endif @@ -500,7 +499,7 @@ nv50_generate_code(struct nv50_translation_info *ti) ret = nv50_tgsi_to_nc(pc, ti); if (ret) goto out; -#ifdef NV50PC_DEBUG +#if NV50_DEBUG & NV50_DEBUG_PROG_IR nv_print_program(pc); #endif @@ -510,7 +509,7 @@ nv50_generate_code(struct nv50_translation_info *ti) ret = nv_pc_exec_pass0(pc); if (ret) goto out; -#ifdef NV50PC_DEBUG +#if NV50_DEBUG & NV50_DEBUG_PROG_IR nv_print_program(pc); #endif @@ -518,7 +517,7 @@ nv50_generate_code(struct nv50_translation_info *ti) ret = nv_pc_exec_pass1(pc); if (ret) goto out; -#ifdef NV50PC_DEBUG +#if NV50_DEBUG & NV50_DEBUG_PROG_CFLOW nv_print_program(pc); nv_print_cfgraph(pc, "nv50_shader_cfgraph.dot", 0); #endif @@ -552,7 +551,7 @@ nv50_generate_code(struct nv50_translation_info *ti) ti->p->uses_lmem = ti->store_to_memory; - NV50_DBGMSG("SHADER TRANSLATION - %s\n", ret ? "failure" : "success"); + NV50_DBGMSG(SHADER, "SHADER TRANSLATION - %s\n", ret ? "failed" : "success"); out: nv_pc_free_refs(pc); @@ -624,6 +623,9 @@ nvbb_insert_tail(struct nv_basic_block *b, struct nv_instruction *i) i->bb = b; b->num_instructions++; + + if (i->prev && i->prev->is_terminator) + nv_nvi_permute(i->prev, i); } void @@ -669,7 +671,7 @@ nv_nvi_delete(struct nv_instruction *nvi) if (nvi == b->phi) { if (nvi->opcode != NV_OP_PHI) - NV50_DBGMSG("NOTE: b->phi points to non-PHI instruction\n"); + NV50_DBGMSG(PROG_IR, "NOTE: b->phi points to non-PHI instruction\n"); assert(!nvi->prev); if (!nvi->next || nvi->next->opcode != NV_OP_PHI) diff --git a/src/gallium/drivers/nv50/nv50_pc.h b/src/gallium/drivers/nv50/nv50_pc.h index e6f3815bafe..5bb0e1296bb 100644 --- a/src/gallium/drivers/nv50/nv50_pc.h +++ b/src/gallium/drivers/nv50/nv50_pc.h @@ -23,13 +23,7 @@ #ifndef __NV50_COMPILER_H__ #define __NV50_COMPILER_H__ -#define NV50PC_DEBUG - -#ifdef NV50PC_DEBUG -# define NV50_DBGMSG(args...) debug_printf(args) -#else -# define NV50_DBGMSG(args...) -#endif +#include "nv50_debug.h" #include "pipe/p_defines.h" #include "util/u_inlines.h" @@ -228,6 +222,8 @@ struct nv_ref { ubyte flags; /* not used yet */ }; +#define NV_REF_FLAG_REGALLOC_PRIV (1 << 0) + struct nv_basic_block; struct nv_instruction { @@ -263,6 +259,15 @@ struct nv_instruction { ubyte quadop; }; +static INLINE int +nvi_vector_size(struct nv_instruction *nvi) +{ + int i; + assert(nvi); + for (i = 0; i < 4 && nvi->def[i]; ++i); + return i; +} + #define CFG_EDGE_FORWARD 0 #define CFG_EDGE_BACK 1 #define CFG_EDGE_LOOP_ENTER 2 diff --git a/src/gallium/drivers/nv50/nv50_pc_optimize.c b/src/gallium/drivers/nv50/nv50_pc_optimize.c index 281ccf7ac61..d72b23c137a 100644 --- a/src/gallium/drivers/nv50/nv50_pc_optimize.c +++ b/src/gallium/drivers/nv50/nv50_pc_optimize.c @@ -20,8 +20,6 @@ * SOFTWARE. */ -/* #define NV50PC_DEBUG */ - #include "nv50_pc.h" #define DESCEND_ARBITRARY(j, f) \ @@ -116,7 +114,7 @@ nvi_isnop(struct nv_instruction *nvi) return FALSE; if (nvi->src[0]->value->join->reg.id < 0) { - NV50_DBGMSG("nvi_isnop: orphaned value detected\n"); + NV50_DBGMSG(PROG_IR, "nvi_isnop: orphaned value detected\n"); return TRUE; } @@ -201,7 +199,7 @@ nv_pc_pass_pre_emission(void *priv, struct nv_basic_block *b) } if (!b->entry) { - NV50_DBGMSG("block %p is now empty\n", b); + NV50_DBGMSG(PROG_IR, "block %p is now empty\n", b); } else if (!b->exit->is_long) { assert(n32); @@ -240,7 +238,7 @@ nv_pc_exec_pass2(struct nv_pc *pc) { int i, ret; - NV50_DBGMSG("preparing %u blocks for emission\n", pc->num_blocks); + NV50_DBGMSG(PROG_IR, "preparing %u blocks for emission\n", pc->num_blocks); pc->num_blocks = 0; /* will reorder bb_list */ @@ -966,7 +964,8 @@ nv_pass_flatten(struct nv_pass *ctx, struct nv_basic_block *b) if (bb_is_if_else_endif(b)) { - NV50_DBGMSG("pass_flatten: IF/ELSE/ENDIF construct at BB:%i\n", b->id); + NV50_DBGMSG(PROG_IR, + "pass_flatten: IF/ELSE/ENDIF construct at BB:%i\n", b->id); for (n0 = 0, nvi = b->out[0]->entry; nvi; nvi = nvi->next, ++n0) if (!nv50_nvi_can_predicate(nvi)) @@ -975,7 +974,7 @@ nv_pass_flatten(struct nv_pass *ctx, struct nv_basic_block *b) for (n1 = 0, nvi = b->out[1]->entry; nvi; nvi = nvi->next, ++n1) if (!nv50_nvi_can_predicate(nvi)) break; -#ifdef NV50PC_DEBUG +#if NV50_DEBUG & NV50_DEBUG_PROG_IR if (nvi) { debug_printf("cannot predicate: "); nv_print_instruction(nvi); } diff --git a/src/gallium/drivers/nv50/nv50_pc_regalloc.c b/src/gallium/drivers/nv50/nv50_pc_regalloc.c index 39ae36681c0..e79fd594cea 100644 --- a/src/gallium/drivers/nv50/nv50_pc_regalloc.c +++ b/src/gallium/drivers/nv50/nv50_pc_regalloc.c @@ -20,11 +20,11 @@ * SOFTWARE. */ -/* #define NV50PC_DEBUG */ - -/* #define NV50_RA_DEBUG_LIVEI */ -/* #define NV50_RA_DEBUG_LIVE_SETS */ -/* #define NV50_RA_DEBUG_JOIN */ +#if NV50_DEBUG & NV50_DEBUG_PROG_RA +# define NV50_RA_DEBUG_LIVEI +# define NV50_RA_DEBUG_LIVE_SETS +# define NV50_RA_DEBUG_JOIN +#endif #include "nv50_context.h" #include "nv50_pc.h" @@ -32,14 +32,39 @@ #include "util/u_simple_list.h" #define NUM_REGISTER_FILES 4 +#define MAX_REGISTER_COUNT 256 struct register_set { struct nv_pc *pc; uint32_t last[NUM_REGISTER_FILES]; - uint32_t bits[NUM_REGISTER_FILES][8]; + uint32_t bits[NUM_REGISTER_FILES][(MAX_REGISTER_COUNT + 31) / 32]; }; +/* using OR because a set bit means occupied/unavailable, aliasing is allowed */ +static void +intersect_register_sets(struct register_set *dst, + struct register_set *src1, struct register_set *src2) +{ + int i, j; + + for (i = 0; i < NUM_REGISTER_FILES; ++i) { + for (j = 0; j < (MAX_REGISTER_COUNT + 31) / 32; ++j) + dst->bits[i][j] = src1->bits[i][j] | src2->bits[i][j]; + } +} + +static void +mask_register_set(struct register_set *set, uint32_t mask, uint32_t umask) +{ + int i, j; + + for (i = 0; i < NUM_REGISTER_FILES; ++i) { + for (j = 0; j < (MAX_REGISTER_COUNT + 31) / 32; ++j) + set->bits[i][j] = (set->bits[i][j] | mask) & umask; + } +} + struct nv_pc_pass { struct nv_pc *pc; @@ -61,11 +86,15 @@ ranges_coalesce(struct nv_range *range) } } +/* @return: TRUE if @new_range can be freed (i.e. was not reused) */ static boolean add_range_ex(struct nv_value *val, int bgn, int end, struct nv_range *new_range) { struct nv_range *range, **nextp = &val->livei; + if (bgn == end) /* [a, a) is invalid / empty */ + return TRUE; + for (range = val->livei; range; range = range->next) { if (end < range->bgn) break; /* insert before */ @@ -251,6 +280,8 @@ reg_occupy(struct register_set *set, struct nv_value *val) id <<= s; m = (1 << (1 << s)) - 1; + assert(s >= 0); /* XXX: remove me */ + set->bits[f][id / 32] |= m << (id % 32); if (set->pc->max_reg[f] < id) @@ -286,15 +317,12 @@ join_allowed(struct nv_pc_pass *ctx, struct nv_value *a, struct nv_value *b) if (a->join->reg.id == b->join->reg.id) return TRUE; -#if 1 /* either a or b or both have been assigned */ if (a->join->reg.id >= 0 && b->join->reg.id >= 0) return FALSE; else if (b->join->reg.id >= 0) { - if (a->join->reg.id >= 0) - return FALSE; val = a; a = b; b = val; @@ -309,8 +337,6 @@ join_allowed(struct nv_pc_pass *ctx, struct nv_value *a, struct nv_value *b) return FALSE; } return TRUE; -#endif - return FALSE; } static INLINE void @@ -336,14 +362,14 @@ do_join_values(struct nv_pc_pass *ctx, struct nv_value *a, struct nv_value *b) assert(b->join == a->join); } -static INLINE void +static INLINE boolean try_join_values(struct nv_pc_pass *ctx, struct nv_value *a, struct nv_value *b) { if (!join_allowed(ctx, a, b)) { #ifdef NV50_RA_DEBUG_JOIN debug_printf("cannot join %i to %i: not allowed\n", b->n, a->n); #endif - return; + return FALSE; } if (livei_have_overlap(a->join, b->join)) { #ifdef NV50_RA_DEBUG_JOIN @@ -351,10 +377,27 @@ try_join_values(struct nv_pc_pass *ctx, struct nv_value *a, struct nv_value *b) livei_print(a); livei_print(b); #endif - return; + return FALSE; } do_join_values(ctx, a, b); + + return TRUE; +} + +static void +join_values_nofail(struct nv_pc_pass *ctx, + struct nv_value *a, struct nv_value *b, boolean type_only) +{ + if (type_only) { + assert(join_allowed(ctx, a, b)); + do_join_values(ctx, a, b); + } else { + boolean ok = try_join_values(ctx, a, b); + if (!ok) { + NOUVEAU_ERR("failed to coalesce values\n"); + } + } } static INLINE boolean @@ -369,20 +412,32 @@ need_new_else_block(struct nv_basic_block *b, struct nv_basic_block *p) return (b->num_in > 1) && (n == 2); } +/* Look for the @phi's operand whose definition reaches @b. */ static int phi_opnd_for_bb(struct nv_instruction *phi, struct nv_basic_block *b, struct nv_basic_block *tb) { + struct nv_ref *srci, *srcj; int i, j; - for (j = -1, i = 0; i < 4 && phi->src[i]; ++i) { - if (!nvbb_reachable_by(b, phi->src[i]->value->insn->bb, tb)) + for (j = -1, i = 0; i < 6 && phi->src[i]; ++i) { + srci = phi->src[i]; + /* if already replaced, check with original source first */ + if (srci->flags & NV_REF_FLAG_REGALLOC_PRIV) + srci = srci->value->insn->src[0]; + if (!nvbb_reachable_by(b, srci->value->insn->bb, NULL)) continue; /* NOTE: back-edges are ignored by the reachable-by check */ - if (j < 0 || !nvbb_reachable_by(phi->src[j]->value->insn->bb, - phi->src[i]->value->insn->bb, tb)) + if (j < 0 || !nvbb_reachable_by(srcj->value->insn->bb, + srci->value->insn->bb, NULL)) { j = i; + srcj = srci; + } } + if (j >= 0 && nvbb_reachable_by(b, phi->def[0]->insn->bb, NULL)) + if (!nvbb_reachable_by(srcj->value->insn->bb, + phi->def[0]->insn->bb, NULL)) + j = -1; return j; } @@ -429,16 +484,21 @@ pass_generate_phi_movs(struct nv_pc_pass *ctx, struct nv_basic_block *b) ctx->pc->current_block = pn; for (i = b->phi; i && i->opcode == NV_OP_PHI; i = i->next) { - if ((j = phi_opnd_for_bb(i, p, b)) < 0) - continue; - val = i->src[j]->value; - - if (i->src[j]->flags) { - val = val->insn->src[0]->value; - while (j < 4 && i->src[j]) - ++j; - assert(j < 4); + j = phi_opnd_for_bb(i, p, b); + + if (j < 0) { + val = i->def[0]; + } else { + val = i->src[j]->value; + if (i->src[j]->flags & NV_REF_FLAG_REGALLOC_PRIV) { + j = -1; + /* use original value, we already encountered & replaced it */ + val = val->insn->src[0]->value; + } } + if (j < 0) /* need an additional source ? */ + for (j = 0; j < 5 && i->src[j] && i->src[j]->value != val; ++j); + assert(j < 5); ni = new_instruction(ctx->pc, NV_OP_MOV); @@ -452,11 +512,13 @@ pass_generate_phi_movs(struct nv_pc_pass *ctx, struct nv_basic_block *b) nv_reference(ctx->pc, &i->src[j], ni->def[0]); - i->src[j]->flags = 1; + i->src[j]->flags |= NV_REF_FLAG_REGALLOC_PRIV; } if (pn != p && pn->exit) { - ctx->pc->current_block = b->in[n ? 0 : 1]; + assert(!b->in[!n]->exit || b->in[!n]->exit->is_terminator); + /* insert terminator (branch to ENDIF) in new else block */ + ctx->pc->current_block = pn; ni = new_instruction(ctx->pc, NV_OP_BRA); ni->target = b; ni->is_terminator = 1; @@ -470,45 +532,50 @@ pass_generate_phi_movs(struct nv_pc_pass *ctx, struct nv_basic_block *b) return 0; } +#define JOIN_MASK_PHI (1 << 0) +#define JOIN_MASK_SELECT (1 << 1) +#define JOIN_MASK_MOV (1 << 2) +#define JOIN_MASK_TEX (1 << 3) + static int -pass_join_values(struct nv_pc_pass *ctx, int iter) +pass_join_values(struct nv_pc_pass *ctx, unsigned mask) { int c, n; for (n = 0; n < ctx->num_insns; ++n) { - struct nv_instruction *i = ctx->insns[n]; + struct nv_instruction *nvi, *i = ctx->insns[n]; switch (i->opcode) { case NV_OP_PHI: - if (iter != 2) + if (!(mask & JOIN_MASK_PHI)) break; - for (c = 0; c < 4 && i->src[c]; ++c) - try_join_values(ctx, i->def[0], i->src[c]->value); + for (c = 0; c < 5 && i->src[c]; ++c) + join_values_nofail(ctx, i->def[0], i->src[c]->value, FALSE); break; case NV_OP_MOV: - if ((iter == 2) && i->src[0]->value->insn && - !nv_is_vector_op(i->src[0]->value->join->insn->opcode)) + if (!(mask & JOIN_MASK_MOV)) + break; + nvi = i->src[0]->value->join->insn; + if (nvi && !nv_is_vector_op(nvi->opcode)) try_join_values(ctx, i->def[0], i->src[0]->value); break; case NV_OP_SELECT: - if (iter != 1) + if (!(mask & JOIN_MASK_SELECT)) break; - for (c = 0; c < 4 && i->src[c]; ++c) { - assert(join_allowed(ctx, i->def[0], i->src[c]->value)); - do_join_values(ctx, i->def[0], i->src[c]->value); - } + for (c = 0; c < 5 && i->src[c]; ++c) + join_values_nofail(ctx, i->def[0], i->src[c]->value, TRUE); break; case NV_OP_TEX: case NV_OP_TXB: case NV_OP_TXL: case NV_OP_TXQ: - if (iter) + if (!(mask & JOIN_MASK_TEX)) break; - for (c = 0; c < 4; ++c) { - if (!i->src[c]) - break; - do_join_values(ctx, i->def[c], i->src[c]->value); - } + /* This should work without conflicts because we always generate + * extra MOVs for the sources of a TEX. + */ + for (c = 0; c < 4 && i->src[c]; ++c) + join_values_nofail(ctx, i->def[c], i->src[c]->value, TRUE); break; default: break; @@ -643,15 +710,15 @@ static void collect_live_values(struct nv_basic_block *b, const int n) { int i; - if (b->out[0]) { - if (b->out[1]) { /* what to do about back-edges ? */ + if (b->out[0] && b->out_kind[0] != CFG_EDGE_FAKE) { + if (b->out[1] && b->out_kind[1] != CFG_EDGE_FAKE) { for (i = 0; i < n; ++i) b->live_set[i] = b->out[0]->live_set[i] | b->out[1]->live_set[i]; } else { memcpy(b->live_set, b->out[0]->live_set, n * sizeof(uint32_t)); } } else - if (b->out[1]) { + if (b->out[1] && b->out_kind[1] != CFG_EDGE_FAKE) { memcpy(b->live_set, b->out[1]->live_set, n * sizeof(uint32_t)); } else { memset(b->live_set, 0, n * sizeof(uint32_t)); @@ -770,8 +837,8 @@ insert_ordered_tail(struct nv_value *list, struct nv_value *nval) struct nv_value *elem; for (elem = list->prev; - elem != list && elem->livei->bgn > nval->livei->bgn; - elem = elem->prev); + elem != list && elem->livei->bgn > nval->livei->bgn; + elem = elem->prev); /* now elem begins before or at the same time as val */ nval->prev = elem; @@ -780,44 +847,49 @@ insert_ordered_tail(struct nv_value *list, struct nv_value *nval) elem->next = nval; } -static int -pass_linear_scan(struct nv_pc_pass *ctx, int iter) +static void +collect_register_values(struct nv_pc_pass *ctx, struct nv_value *head, + boolean assigned_only) { - struct nv_instruction *i; - struct register_set f, free; + struct nv_value *val; int k, n; - struct nv_value *cur, *val, *tmp[2]; - struct nv_value active, inactive, handled, unhandled; - make_empty_list(&active); - make_empty_list(&inactive); - make_empty_list(&handled); - make_empty_list(&unhandled); + make_empty_list(head); - nv50_ctor_register_set(ctx->pc, &free); - - /* joined values should have range = NULL and thus not be added; - * also, fixed memory values won't be added because they're not - * def'd, just used - */ for (n = 0; n < ctx->num_insns; ++n) { - i = ctx->insns[n]; + struct nv_instruction *i = ctx->insns[n]; + /* for joined values, only the representative will have livei != NULL */ for (k = 0; k < 4; ++k) { if (i->def[k] && i->def[k]->livei) - insert_ordered_tail(&unhandled, i->def[k]); - else - if (0 && i->def[k]) - debug_printf("skipping def'd value %i: no livei\n", i->def[k]->n); + if (!assigned_only || i->def[k]->reg.id >= 0) + insert_ordered_tail(head, i->def[k]); } if (i->flags_def && i->flags_def->livei) - insert_ordered_tail(&unhandled, i->flags_def); + if (!assigned_only || i->flags_def->reg.id >= 0) + insert_ordered_tail(head, i->flags_def); } - for (val = unhandled.next; val != unhandled.prev; val = val->next) { + for (val = head->next; val != head->prev; val = val->next) { assert(val->join == val); assert(val->livei->bgn <= val->next->livei->bgn); } +} + +static int +pass_linear_scan(struct nv_pc_pass *ctx, int iter) +{ + struct register_set f, free; + struct nv_value *cur, *val, *tmp[2]; + struct nv_value active, inactive, handled, unhandled; + + make_empty_list(&active); + make_empty_list(&inactive); + make_empty_list(&handled); + + nv50_ctor_register_set(ctx->pc, &free); + + collect_register_values(ctx, &unhandled, FALSE); foreach_s(cur, tmp[0], &unhandled) { remove_from_list(cur); @@ -854,13 +926,7 @@ pass_linear_scan(struct nv_pc_pass *ctx, int iter) reg_occupy(&f, val); if (cur->reg.id < 0) { - boolean mem = FALSE; - - if (nv_is_vector_op(cur->insn->opcode)) - mem = !reg_assign(&f, &cur->insn->def[0], 4); - else - if (iter) - mem = !reg_assign(&f, &cur, 1); + boolean mem = !reg_assign(&f, &cur, 1); if (mem) { NOUVEAU_ERR("out of registers\n"); @@ -874,13 +940,80 @@ pass_linear_scan(struct nv_pc_pass *ctx, int iter) return 0; } +/* Allocate values defined by instructions such as TEX, which have to be + * assigned to consecutive registers. + * Linear scan doesn't really work here since the values can have different + * live intervals. + */ +static int +pass_allocate_constrained_values(struct nv_pc_pass *ctx) +{ + struct nv_value regvals, *val; + struct nv_instruction *i; + struct nv_value *defs[4]; + struct register_set regs[4]; + int n, vsize, c; + uint32_t mask; + boolean mem; + + collect_register_values(ctx, ®vals, TRUE); + + for (n = 0; n < ctx->num_insns; ++n) { + i = ctx->insns[n]; + vsize = nvi_vector_size(i); + if (!(vsize > 1)) + continue; + assert(vsize <= 4); + for (c = 0; c < vsize; ++c) + defs[c] = i->def[c]->join; + + if (defs[0]->reg.id >= 0) { + for (c = 1; c < vsize; ++c) + assert(defs[c]->reg.id >= 0); + continue; + } + + /* Compute registers available for this "vector" of consecutive registers. + * Each value (component) has its own independent live interval. + */ + for (c = 0; c < vsize; ++c) { + nv50_ctor_register_set(ctx->pc, ®s[c]); + + foreach(val, ®vals) { + if (val->reg.id >= 0 && livei_have_overlap(val, defs[c])) + reg_occupy(®s[c], val); + } + /* Only 32 bit GPRs will be allocated here, but register set + * granularity for GPRs is 16 bit. + */ + mask = 0x03030303; + if (vsize == 2) /* granularity is 2 and not 4 */ + mask |= 0x03030303 << 4; + mask_register_set(®s[c], 0, mask << (c * 2)); + + if (defs[c]->livei) + insert_ordered_tail(®vals, defs[c]); + } + for (c = 1; c < vsize; ++c) + intersect_register_sets(®s[0], ®s[0], ®s[c]); + + mem = !reg_assign(®s[0], &defs[0], vsize); + + if (mem) { + NOUVEAU_ERR("out of registers\n"); + abort(); + } + } + return 0; +} + static int nv_pc_pass1(struct nv_pc *pc, struct nv_basic_block *root) { struct nv_pc_pass *ctx; int i, ret; - NV50_DBGMSG("REGISTER ALLOCATION - entering\n"); + NV50_DBGMSG(PROG_RA, "REGISTER ALLOCATION - entering\n"); ctx = CALLOC_STRUCT(nv_pc_pass); if (!ctx) @@ -923,16 +1056,16 @@ nv_pc_pass1(struct nv_pc *pc, struct nv_basic_block *root) livei_print(&pc->values[i]); #endif - ret = pass_join_values(ctx, 0); + ret = pass_join_values(ctx, JOIN_MASK_PHI); if (ret) goto out; - ret = pass_linear_scan(ctx, 0); + ret = pass_join_values(ctx, JOIN_MASK_SELECT | JOIN_MASK_TEX); if (ret) goto out; - ret = pass_join_values(ctx, 1); + ret = pass_join_values(ctx, JOIN_MASK_MOV); if (ret) goto out; - ret = pass_join_values(ctx, 2); + ret = pass_allocate_constrained_values(ctx); if (ret) goto out; ret = pass_linear_scan(ctx, 1); @@ -942,7 +1075,7 @@ nv_pc_pass1(struct nv_pc *pc, struct nv_basic_block *root) for (i = 0; i < pc->num_values; ++i) livei_release(&pc->values[i]); - NV50_DBGMSG("REGISTER ALLOCATION - leaving\n"); + NV50_DBGMSG(PROG_RA, "REGISTER ALLOCATION - leaving\n"); out: FREE(ctx->insns); diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c index a63f9d8a6d5..41d3e14dc0f 100644 --- a/src/gallium/drivers/nv50/nv50_program.c +++ b/src/gallium/drivers/nv50/nv50_program.c @@ -20,8 +20,6 @@ * SOFTWARE. */ -/* #define NV50_PROGRAM_DEBUG */ - #include "nv50_program.h" #include "nv50_pc.h" #include "nv50_context.h" @@ -486,7 +484,7 @@ nv50_fragprog_prepare(struct nv50_translation_info *ti) ++nintp; } - p->fp.colors = (1 << 24) | 4; /* CLAMP, FFC0_ID = 4 */ + p->fp.colors = 4 << NV50_3D_MAP_SEMANTIC_0_FFC0_ID__SHIFT; /* after HPOS */ for (i = 0; i < p->in_nr; ++i) { int j = p->in[i].id; @@ -564,7 +562,7 @@ nv50_prog_scan(struct nv50_translation_info *ti) tgsi_scan_shader(p->pipe.tokens, &ti->scan); -#ifdef NV50_PROGRAM_DEBUG +#if NV50_DEBUG & NV50_DEBUG_SHADER tgsi_dump(p->pipe.tokens, 0); #endif diff --git a/src/gallium/drivers/nv50/nv50_screen.c b/src/gallium/drivers/nv50/nv50_screen.c index 7690c80eef0..641ad7e2780 100644 --- a/src/gallium/drivers/nv50/nv50_screen.c +++ b/src/gallium/drivers/nv50/nv50_screen.c @@ -20,6 +20,7 @@ * SOFTWARE. */ +#include "util/u_format.h" #include "util/u_format_s3tc.h" #include "pipe/p_screen.h" @@ -45,17 +46,8 @@ nv50_screen_is_format_supported(struct pipe_screen *pscreen, if (sample_count > 1) return FALSE; - if (!util_format_s3tc_enabled) { - switch (format) { - case PIPE_FORMAT_DXT1_RGB: - case PIPE_FORMAT_DXT1_RGBA: - case PIPE_FORMAT_DXT3_RGBA: - case PIPE_FORMAT_DXT5_RGBA: - return FALSE; - default: - break; - } - } + if (!util_format_is_supported(format, bindings)) + return FALSE; switch (format) { case PIPE_FORMAT_Z16_UNORM: @@ -108,6 +100,8 @@ nv50_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) return 1; case PIPE_CAP_MAX_RENDER_TARGETS: return 8; + case PIPE_CAP_FRAGMENT_COLOR_CLAMP_CONTROL: + return 1; case PIPE_CAP_TIMER_QUERY: case PIPE_CAP_OCCLUSION_QUERY: return 1; @@ -129,6 +123,7 @@ nv50_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) case PIPE_CAP_PRIMITIVE_RESTART: case PIPE_CAP_TGSI_INSTANCEID: case PIPE_CAP_VERTEX_ELEMENT_INSTANCE_DIVISOR: + case PIPE_CAP_MIXED_COLORBUFFER_FORMATS: return 1; default: NOUVEAU_ERR("unknown PIPE_CAP %d\n", param); @@ -417,6 +412,10 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev) OUT_RING (chan, NV50_3D_MULTISAMPLE_MODE_MS1); BEGIN_RING(chan, RING_3D(MULTISAMPLE_CTRL), 1); OUT_RING (chan, 0); + BEGIN_RING(chan, RING_3D(LINE_LAST_PIXEL), 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, RING_3D(BLEND_SEPARATE_ALPHA), 1); + OUT_RING (chan, 1); BEGIN_RING(chan, RING_3D(SCREEN_Y_CONTROL), 1); OUT_RING (chan, 0); diff --git a/src/gallium/drivers/nv50/nv50_shader_state.c b/src/gallium/drivers/nv50/nv50_shader_state.c index bea9c095bb3..82c346cb5ea 100644 --- a/src/gallium/drivers/nv50/nv50_shader_state.c +++ b/src/gallium/drivers/nv50/nv50_shader_state.c @@ -226,7 +226,7 @@ nv50_gmtyprog_validate(struct nv50_context *nv50) OUT_RING (chan, gp->code_base); } -void +static void nv50_sprite_coords_validate(struct nv50_context *nv50) { struct nouveau_channel *chan = nv50->screen->base.channel; @@ -282,6 +282,39 @@ nv50_sprite_coords_validate(struct nv50_context *nv50) OUT_RINGp (chan, pntc, 8); } +/* Validate state derived from shaders and the rasterizer cso. */ +void +nv50_validate_derived_rs(struct nv50_context *nv50) +{ + struct nouveau_channel *chan = nv50->screen->base.channel; + uint32_t color, psize; + + nv50_sprite_coords_validate(nv50); + + if (nv50->dirty & NV50_NEW_FRAGPROG) + return; + psize = nv50->state.semantic_psize & ~NV50_3D_MAP_SEMANTIC_3_PTSZ_EN__MASK; + color = nv50->state.semantic_color & ~NV50_3D_MAP_SEMANTIC_0_CLMP_EN; + + if (nv50->rast->pipe.clamp_vertex_color) + color |= NV50_3D_MAP_SEMANTIC_0_CLMP_EN; + + if (color != nv50->state.semantic_color) { + nv50->state.semantic_color = color; + BEGIN_RING(chan, RING_3D(MAP_SEMANTIC_0), 1); + OUT_RING (chan, color); + } + + if (nv50->rast->pipe.point_size_per_vertex) + psize |= NV50_3D_MAP_SEMANTIC_3_PTSZ_EN__MASK; + + if (psize != nv50->state.semantic_psize) { + nv50->state.semantic_psize = psize; + BEGIN_RING(chan, RING_3D(MAP_SEMANTIC_3), 1); + OUT_RING (chan, psize); + } +} + static int nv50_vec4_map(uint8_t *map, int mid, uint32_t lin[4], struct nv50_varying *in, struct nv50_varying *out) @@ -372,6 +405,9 @@ nv50_fp_linkage_validate(struct nv50_context *nv50) map[m++] = vp->vp.psiz; } + if (nv50->rast->pipe.clamp_vertex_color) + colors |= NV50_3D_MAP_SEMANTIC_0_CLMP_EN; + n = (m + 3) / 4; assert(m <= 64); @@ -404,6 +440,9 @@ nv50_fp_linkage_validate(struct nv50_context *nv50) nv50->state.interpolant_ctrl = interp; + nv50->state.semantic_color = colors; + nv50->state.semantic_psize = psiz; + BEGIN_RING(chan, RING_3D(NOPERSPECTIVE_BITMAP(0)), 4); OUT_RINGp (chan, lin, 4); diff --git a/src/gallium/drivers/nv50/nv50_state.c b/src/gallium/drivers/nv50/nv50_state.c index db257159698..799f49619d2 100644 --- a/src/gallium/drivers/nv50/nv50_state.c +++ b/src/gallium/drivers/nv50/nv50_state.c @@ -97,8 +97,14 @@ nv50_blend_state_create(struct pipe_context *pipe, so->pipe = *cso; - SB_BEGIN_3D(so, BLEND_ENABLE(0), 8); + SB_BEGIN_3D(so, COLOR_MASK_COMMON, 1); + SB_DATA (so, !cso->independent_blend_enable); + + SB_BEGIN_3D(so, BLEND_ENABLE_COMMON, 1); + SB_DATA (so, !cso->independent_blend_enable); + if (cso->independent_blend_enable) { + SB_BEGIN_3D(so, BLEND_ENABLE(0), 8); for (i = 0; i < 8; ++i) { SB_DATA(so, cso->rt[i].blend_enable); if (cso->rt[i].blend_enable) @@ -121,8 +127,8 @@ nv50_blend_state_create(struct pipe_context *pipe, } } } else { - for (i = 0; i < 8; ++i) - SB_DATA(so, cso->rt[0].blend_enable); + SB_BEGIN_3D(so, BLEND_ENABLE(0), 1); + SB_DATA (so, cso->rt[0].blend_enable); } if (emit_common_func) { @@ -145,17 +151,16 @@ nv50_blend_state_create(struct pipe_context *pipe, SB_DATA (so, 0); } - SB_BEGIN_3D(so, COLOR_MASK(0), 8); if (cso->independent_blend_enable) { + SB_BEGIN_3D(so, COLOR_MASK(0), 8); for (i = 0; i < 8; ++i) SB_DATA(so, nv50_colormask(cso->rt[i].colormask)); } else { - uint32_t cmask = nv50_colormask(cso->rt[0].colormask); - for (i = 0; i < 8; ++i) - SB_DATA(so, cmask); + SB_BEGIN_3D(so, COLOR_MASK(0), 1); + SB_DATA (so, nv50_colormask(cso->rt[0].colormask)); } - assert(so->size < (sizeof(so->state) / sizeof(so->state[0]))); + assert(so->size <= (sizeof(so->state) / sizeof(so->state[0]))); return so; } @@ -174,6 +179,7 @@ nv50_blend_state_delete(struct pipe_context *pipe, void *hwcso) FREE(hwcso); } +/* NOTE: ignoring line_last_pixel, using FALSE (set on screen init) */ static void * nv50_rasterizer_state_create(struct pipe_context *pipe, const struct pipe_rasterizer_state *cso) @@ -198,6 +204,9 @@ nv50_rasterizer_state_create(struct pipe_context *pipe, SB_BEGIN_3D(so, VERTEX_TWO_SIDE_ENABLE, 1); SB_DATA (so, cso->light_twoside); + SB_BEGIN_3D(so, FRAG_COLOR_CLAMP_EN, 1); + SB_DATA (so, cso->clamp_fragment_color ? 0x11111111 : 0x00000000); + SB_BEGIN_3D(so, LINE_WIDTH, 1); SB_DATA (so, fui(cso->line_width)); SB_BEGIN_3D(so, LINE_SMOOTH_ENABLE, 1); @@ -258,7 +267,7 @@ nv50_rasterizer_state_create(struct pipe_context *pipe, SB_DATA (so, fui(cso->offset_units * 2.0f)); } - assert(so->size < (sizeof(so->state) / sizeof(so->state[0]))); + assert(so->size <= (sizeof(so->state) / sizeof(so->state[0]))); return (void *)so; } @@ -337,7 +346,7 @@ nv50_zsa_state_create(struct pipe_context *pipe, SB_DATA (so, 0); } - assert(so->size < (sizeof(so->state) / sizeof(so->state[0]))); + assert(so->size <= (sizeof(so->state) / sizeof(so->state[0]))); return (void *)so; } diff --git a/src/gallium/drivers/nv50/nv50_state_validate.c b/src/gallium/drivers/nv50/nv50_state_validate.c index f3d45eb95e0..cdf1a982fcc 100644 --- a/src/gallium/drivers/nv50/nv50_state_validate.c +++ b/src/gallium/drivers/nv50/nv50_state_validate.c @@ -309,7 +309,7 @@ static struct state_validate { { nv50_fp_linkage_validate, NV50_NEW_FRAGPROG | NV50_NEW_VERTPROG | NV50_NEW_GMTYPROG }, { nv50_gp_linkage_validate, NV50_NEW_GMTYPROG | NV50_NEW_VERTPROG }, - { nv50_sprite_coords_validate, NV50_NEW_FRAGPROG | NV50_NEW_RASTERIZER | + { nv50_validate_derived_rs, NV50_NEW_FRAGPROG | NV50_NEW_RASTERIZER | NV50_NEW_VERTPROG | NV50_NEW_GMTYPROG }, { nv50_constbufs_validate, NV50_NEW_CONSTBUF }, { nv50_validate_textures, NV50_NEW_TEXTURES }, diff --git a/src/gallium/drivers/nv50/nv50_stateobj.h b/src/gallium/drivers/nv50/nv50_stateobj.h index 515e3e78d42..4c98c7e46fc 100644 --- a/src/gallium/drivers/nv50/nv50_stateobj.h +++ b/src/gallium/drivers/nv50/nv50_stateobj.h @@ -21,13 +21,13 @@ struct nv50_blend_stateobj { struct pipe_blend_state pipe; int size; - uint32_t state[78]; + uint32_t state[82]; // TODO: allocate less if !independent_blend_enable }; struct nv50_rasterizer_stateobj { struct pipe_rasterizer_state pipe; int size; - uint32_t state[40]; + uint32_t state[42]; }; struct nv50_zsa_stateobj { diff --git a/src/gallium/drivers/nv50/nv50_surface.c b/src/gallium/drivers/nv50/nv50_surface.c index dc9e2880f0f..3d7e880ccce 100644 --- a/src/gallium/drivers/nv50/nv50_surface.c +++ b/src/gallium/drivers/nv50/nv50_surface.c @@ -27,6 +27,7 @@ #include "util/u_inlines.h" #include "util/u_pack_color.h" #include "util/u_format.h" +#include "util/u_surface.h" #include "nv50_context.h" #include "nv50_resource.h" @@ -198,6 +199,13 @@ nv50_resource_copy_region(struct pipe_context *pipe, int ret; unsigned dst_layer = dstz, src_layer = src_box->z; + /* Fallback for buffers. */ + if (dst->target == PIPE_BUFFER && src->target == PIPE_BUFFER) { + util_resource_copy_region(pipe, dst, dst_level, dstx, dsty, dstz, + src, src_level, src_box); + return; + } + assert((src->format == dst->format) || (nv50_2d_format_faithful(src->format) && nv50_2d_format_faithful(dst->format))); diff --git a/src/gallium/drivers/nv50/nv50_tgsi_to_nc.c b/src/gallium/drivers/nv50/nv50_tgsi_to_nc.c index 1449cb04c69..25dcaaea14f 100644 --- a/src/gallium/drivers/nv50/nv50_tgsi_to_nc.c +++ b/src/gallium/drivers/nv50/nv50_tgsi_to_nc.c @@ -20,8 +20,6 @@ * SOFTWARE. */ -/* #define NV50_TGSI2NC_DEBUG */ - #include <unistd.h> #include "nv50_context.h" @@ -213,7 +211,7 @@ static INLINE void bld_warn_uninitialized(struct bld_context *bld, int kind, struct bld_value_stack *stk, struct nv_basic_block *b) { -#ifdef NV50_TGSI2NC_DEBUG +#if NV50_DEBUG & NV50_DEBUG_PROG_IR long i = (stk - &bld->tvs[0][0]) / 4; long c = (stk - &bld->tvs[0][0]) & 3; @@ -273,6 +271,12 @@ fetch_by_bb(struct bld_value_stack *stack, fetch_by_bb(stack, vals, n, b->in[i]); } +static INLINE boolean +nvbb_is_terminated(struct nv_basic_block *bb) +{ + return bb->exit && bb->exit->is_terminator; +} + static INLINE struct nv_value * bld_load_imm_u32(struct bld_context *bld, uint32_t u); @@ -1556,7 +1560,7 @@ bld_instruction(struct bld_context *bld, int c; uint opcode = translate_opcode(insn->Instruction.Opcode); -#ifdef NV50_TGSI2NC_DEBUG +#if NV50_DEBUG & NV50_DEBUG_PROG_IR debug_printf("bld_instruction:"); tgsi_dump_instruction(insn, 1); #endif @@ -1727,8 +1731,7 @@ bld_instruction(struct bld_context *bld, { struct nv_basic_block *b = new_basic_block(bld->pc); - if (bld->pc->current_block->exit && - !bld->pc->current_block->exit->is_terminator) + if (!nvbb_is_terminated(bld->pc->current_block)) bld_flow(bld, NV_OP_BRA, NV_CC_TR, NULL, b, FALSE); --bld->cond_lvl; @@ -1800,7 +1803,8 @@ bld_instruction(struct bld_context *bld, { struct nv_basic_block *bb = bld->loop_bb[bld->loop_lvl - 1]; - bld_flow(bld, NV_OP_BRA, NV_CC_TR, NULL, bb, FALSE); + if (!nvbb_is_terminated(bld->pc->current_block)) + bld_flow(bld, NV_OP_BRA, NV_CC_TR, NULL, bb, FALSE); nvbb_attach_block(bld->pc->current_block, bb, CFG_EDGE_BACK); |