summaryrefslogtreecommitdiffstats
path: root/src/gallium/drivers/nv50
diff options
context:
space:
mode:
Diffstat (limited to 'src/gallium/drivers/nv50')
-rw-r--r--src/gallium/drivers/nv50/nv50_3d.xml.h10
-rw-r--r--src/gallium/drivers/nv50/nv50_context.c12
-rw-r--r--src/gallium/drivers/nv50/nv50_context.h15
-rw-r--r--src/gallium/drivers/nv50/nv50_debug.h25
-rw-r--r--src/gallium/drivers/nv50/nv50_formats.c126
-rw-r--r--src/gallium/drivers/nv50/nv50_pc.c24
-rw-r--r--src/gallium/drivers/nv50/nv50_pc.h19
-rw-r--r--src/gallium/drivers/nv50/nv50_pc_optimize.c13
-rw-r--r--src/gallium/drivers/nv50/nv50_pc_regalloc.c309
-rw-r--r--src/gallium/drivers/nv50/nv50_program.c6
-rw-r--r--src/gallium/drivers/nv50/nv50_screen.c21
-rw-r--r--src/gallium/drivers/nv50/nv50_shader_state.c41
-rw-r--r--src/gallium/drivers/nv50/nv50_state.c29
-rw-r--r--src/gallium/drivers/nv50/nv50_state_validate.c2
-rw-r--r--src/gallium/drivers/nv50/nv50_stateobj.h4
-rw-r--r--src/gallium/drivers/nv50/nv50_surface.c8
-rw-r--r--src/gallium/drivers/nv50/nv50_tgsi_to_nc.c18
17 files changed, 511 insertions, 171 deletions
diff --git a/src/gallium/drivers/nv50/nv50_3d.xml.h b/src/gallium/drivers/nv50/nv50_3d.xml.h
index 9bb3211728c..41a380ec2ec 100644
--- a/src/gallium/drivers/nv50/nv50_3d.xml.h
+++ b/src/gallium/drivers/nv50/nv50_3d.xml.h
@@ -558,7 +558,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#define NV50_3D_UNK0F8C 0x00000f8c
-#define NV50_3D_UNK0F90 0x00000f90
+#define NV50_3D_COLOR_MASK_COMMON 0x00000f90
#define NV50_3D_UNK0F94 0x00000f94
@@ -1007,7 +1007,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#define NV50_3D_TEX_CACHE_CTL_UNK1__MASK 0x00000030
#define NV50_3D_TEX_CACHE_CTL_UNK1__SHIFT 4
-#define NV50_3D_UNK133C 0x0000133c
+#define NV50_3D_BLEND_SEPARATE_ALPHA 0x0000133c
#define NV50_3D_BLEND_EQUATION_RGB 0x00001340
#define NV50_3D_BLEND_EQUATION_RGB_FUNC_ADD 0x00008006
@@ -1033,7 +1033,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#define NV50_3D_BLEND_FUNC_DST_ALPHA 0x00001358
-#define NV50_3D_UNK135C 0x0000135c
+#define NV50_3D_BLEND_ENABLE_COMMON 0x0000135c
#define NV50_3D_BLEND_ENABLE(i0) (0x00001360 + 0x4*(i0))
#define NV50_3D_BLEND_ENABLE__ESIZE 0x00000004
@@ -1673,7 +1673,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#define NV50_3D_MAP_SEMANTIC_0_BFC0_ID__SHIFT 8
#define NV50_3D_MAP_SEMANTIC_0_COLR_NR__MASK 0x00ff0000
#define NV50_3D_MAP_SEMANTIC_0_COLR_NR__SHIFT 16
-#define NV50_3D_MAP_SEMANTIC_0_CLMP_EN 0xff000000
+#define NV50_3D_MAP_SEMANTIC_0_CLMP_EN 0x01000000
#define NV50_3D_MAP_SEMANTIC_1 0x00001908
#define NV50_3D_MAP_SEMANTIC_1_CLIP_START__MASK 0x000000ff
@@ -1706,7 +1706,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#define NV50_3D_CULL_FACE_BACK 0x00000405
#define NV50_3D_CULL_FACE_FRONT_AND_BACK 0x00000408
-#define NV50_3D_UNK1924 0x00001924
+#define NV50_3D_LINE_LAST_PIXEL 0x00001924
#define NVA3_3D_FP_MULTISAMPLE 0x00001928
#define NVA3_3D_FP_MULTISAMPLE_EXPORT_SAMPLE_MASK 0x00000001
diff --git a/src/gallium/drivers/nv50/nv50_context.c b/src/gallium/drivers/nv50/nv50_context.c
index 930cee7c1e7..632ca4daf74 100644
--- a/src/gallium/drivers/nv50/nv50_context.c
+++ b/src/gallium/drivers/nv50/nv50_context.c
@@ -46,6 +46,17 @@ nv50_flush(struct pipe_context *pipe,
FIRE_RING(screen->channel);
}
+static void
+nv50_texture_barrier(struct pipe_context *pipe)
+{
+ struct nouveau_channel *chan = nv50_context(pipe)->screen->base.channel;
+
+ BEGIN_RING(chan, RING_3D(SERIALIZE), 1);
+ OUT_RING (chan, 0);
+ BEGIN_RING(chan, RING_3D(TEX_CACHE_CTL), 1);
+ OUT_RING (chan, 0x20);
+}
+
void
nv50_default_flush_notify(struct nouveau_channel *chan)
{
@@ -125,6 +136,7 @@ nv50_create(struct pipe_screen *pscreen, void *priv)
pipe->clear = nv50_clear;
pipe->flush = nv50_flush;
+ pipe->texture_barrier = nv50_texture_barrier;
if (!screen->cur_ctx)
screen->cur_ctx = nv50;
diff --git a/src/gallium/drivers/nv50/nv50_context.h b/src/gallium/drivers/nv50/nv50_context.h
index 46e6c2250af..3f031994f0a 100644
--- a/src/gallium/drivers/nv50/nv50_context.h
+++ b/src/gallium/drivers/nv50/nv50_context.h
@@ -1,7 +1,6 @@
#ifndef __NV50_CONTEXT_H__
#define __NV50_CONTEXT_H__
-#include <stdio.h>
#include "pipe/p_context.h"
#include "pipe/p_defines.h"
#include "pipe/p_state.h"
@@ -13,6 +12,7 @@
#include "draw/draw_vertex.h"
+#include "nv50_debug.h"
#include "nv50_winsys.h"
#include "nv50_stateobj.h"
#include "nv50_screen.h"
@@ -26,15 +26,6 @@
#include "nv50_3d.xml.h"
#include "nv50_2d.xml.h"
-#define NOUVEAU_ERR(fmt, args...) \
- fprintf(stderr, "%s:%d - "fmt, __FUNCTION__, __LINE__, ##args);
-
-#ifdef NOUVEAU_DEBUG
-# define NOUVEAU_DBG(args...) printf(args);
-#else
-# define NOUVEAU_DBG(args...)
-#endif
-
#define NV50_NEW_BLEND (1 << 0)
#define NV50_NEW_RASTERIZER (1 << 1)
#define NV50_NEW_ZSA (1 << 2)
@@ -80,6 +71,8 @@ struct nv50_context {
uint32_t instance_elts; /* bitmask of per-instance elements */
uint32_t instance_base;
uint32_t interpolant_ctrl;
+ uint32_t semantic_color;
+ uint32_t semantic_psize;
int32_t index_bias;
boolean prim_restart;
boolean point_sprite;
@@ -183,7 +176,7 @@ void nv50_fragprog_validate(struct nv50_context *);
void nv50_fp_linkage_validate(struct nv50_context *);
void nv50_gp_linkage_validate(struct nv50_context *);
void nv50_constbufs_validate(struct nv50_context *);
-void nv50_sprite_coords_validate(struct nv50_context *);
+void nv50_validate_derived_rs(struct nv50_context *);
/* nv50_state.c */
extern void nv50_init_state_functions(struct nv50_context *);
diff --git a/src/gallium/drivers/nv50/nv50_debug.h b/src/gallium/drivers/nv50/nv50_debug.h
new file mode 100644
index 00000000000..f3dee621519
--- /dev/null
+++ b/src/gallium/drivers/nv50/nv50_debug.h
@@ -0,0 +1,25 @@
+
+#ifndef __NV50_DEBUG_H__
+#define __NV50_DEBUG_H__
+
+#include <stdio.h>
+
+#include "util/u_debug.h"
+
+#define NV50_DEBUG_MISC 0x0001
+#define NV50_DEBUG_SHADER 0x0100
+#define NV50_DEBUG_PROG_IR 0x0200
+#define NV50_DEBUG_PROG_RA 0x0400
+#define NV50_DEBUG_PROG_CFLOW 0x0800
+#define NV50_DEBUG_PROG_ALL 0x1f00
+
+#define NV50_DEBUG 0
+
+#define NOUVEAU_ERR(fmt, args...) \
+ fprintf(stderr, "%s:%d - "fmt, __FUNCTION__, __LINE__, ##args)
+
+#define NV50_DBGMSG(ch, args...) \
+ if ((NV50_DEBUG) & (NV50_DEBUG_##ch)) \
+ debug_printf(args)
+
+#endif /* __NV50_DEBUG_H__ */
diff --git a/src/gallium/drivers/nv50/nv50_formats.c b/src/gallium/drivers/nv50/nv50_formats.c
index 7946117cf30..c65189d0671 100644
--- a/src/gallium/drivers/nv50/nv50_formats.c
+++ b/src/gallium/drivers/nv50/nv50_formats.c
@@ -93,6 +93,10 @@ const struct nv50_format nv50_format_table[PIPE_FORMAT_COUNT] =
B_(C2, C1, C0, C3, UNORM, UNORM, UNORM, UNORM, 4_4_4_4, 1),
SAMPLER_VIEW },
+ [PIPE_FORMAT_B4G4R4X4_UNORM] = { 0,
+ B_(C2, C1, C0, ONE_FLOAT, UNORM, UNORM, UNORM, UNORM, 4_4_4_4, 1),
+ SAMPLER_VIEW },
+
[PIPE_FORMAT_R10G10B10A2_UNORM] = { NV50_SURFACE_FORMAT_A2B10G10R10_UNORM,
A_(C0, C1, C2, C3, UNORM, UNORM, UNORM, UNORM, 2_10_10_10, 0),
SAMPLER_VIEW | RENDER_TARGET | VERTEX_BUFFER | SCANOUT },
@@ -138,14 +142,62 @@ const struct nv50_format nv50_format_table[PIPE_FORMAT_COUNT] =
A_(C0, C0, C0, ONE_FLOAT, UNORM, UNORM, UNORM, UNORM, 8, 0),
SAMPLER_VIEW | RENDER_TARGET },
- [PIPE_FORMAT_I8_UNORM] = { NV50_SURFACE_FORMAT_R8_UNORM,
- A_(C0, C0, C0, C0, UNORM, UNORM, UNORM, UNORM, 8, 0),
+ [PIPE_FORMAT_L16_SNORM] = { NV50_SURFACE_FORMAT_R16_SNORM,
+ B_(C0, C0, C0, ONE_FLOAT, SNORM, SNORM, SNORM, SNORM, 16, 0),
+ SAMPLER_VIEW | RENDER_TARGET },
+
+ [PIPE_FORMAT_L16_FLOAT] = { NV50_SURFACE_FORMAT_R16_FLOAT,
+ B_(C0, C0, C0, ONE_FLOAT, FLOAT, FLOAT, FLOAT, FLOAT, 16, 0),
SAMPLER_VIEW | RENDER_TARGET },
+ [PIPE_FORMAT_L32_FLOAT] = { NV50_SURFACE_FORMAT_R32_FLOAT,
+ B_(C0, C0, C0, ONE_FLOAT, FLOAT, FLOAT, FLOAT, FLOAT, 32, 0),
+ SAMPLER_VIEW | RENDER_TARGET },
+
+ [PIPE_FORMAT_I8_UNORM] = { 0,
+ A_(C0, C0, C0, C0, UNORM, UNORM, UNORM, UNORM, 8, 0),
+ SAMPLER_VIEW },
+
+ [PIPE_FORMAT_I16_UNORM] = { NV50_SURFACE_FORMAT_R16_UNORM,
+ A_(C0, C0, C0, C0, UNORM, UNORM, UNORM, UNORM, 16, 0),
+ SAMPLER_VIEW },
+
+ [PIPE_FORMAT_I16_SNORM] = { NV50_SURFACE_FORMAT_R16_SNORM,
+ B_(C0, C0, C0, C0, SNORM, SNORM, SNORM, SNORM, 16, 0),
+ SAMPLER_VIEW },
+
+ [PIPE_FORMAT_I16_FLOAT] = { NV50_SURFACE_FORMAT_R16_FLOAT,
+ B_(C0, C0, C0, C0, FLOAT, FLOAT, FLOAT, FLOAT, 16, 0),
+ SAMPLER_VIEW },
+
+ [PIPE_FORMAT_I32_FLOAT] = { NV50_SURFACE_FORMAT_R32_FLOAT,
+ B_(C0, C0, C0, C0, FLOAT, FLOAT, FLOAT, FLOAT, 32, 0),
+ SAMPLER_VIEW },
+
[PIPE_FORMAT_A8_UNORM] = { NV50_SURFACE_FORMAT_A8_UNORM,
A_(ZERO, ZERO, ZERO, C0, UNORM, UNORM, UNORM, UNORM, 8, 0),
SAMPLER_VIEW | RENDER_TARGET },
+ [PIPE_FORMAT_A8_SNORM] = { 0,
+ A_(ZERO, ZERO, ZERO, C0, SNORM, SNORM, SNORM, SNORM, 8, 0),
+ SAMPLER_VIEW },
+
+ [PIPE_FORMAT_A16_UNORM] = { 0,
+ A_(ZERO, ZERO, ZERO, C0, UNORM, UNORM, UNORM, UNORM, 16, 0),
+ SAMPLER_VIEW },
+
+ [PIPE_FORMAT_A16_SNORM] = { 0,
+ A_(ZERO, ZERO, ZERO, C0, SNORM, SNORM, SNORM, SNORM, 16, 0),
+ SAMPLER_VIEW },
+
+ [PIPE_FORMAT_A16_FLOAT] = { 0,
+ B_(ZERO, ZERO, ZERO, C0, FLOAT, FLOAT, FLOAT, FLOAT, 16, 0),
+ SAMPLER_VIEW },
+
+ [PIPE_FORMAT_A32_FLOAT] = { 0,
+ B_(ZERO, ZERO, ZERO, C0, FLOAT, FLOAT, FLOAT, FLOAT, 32, 0),
+ SAMPLER_VIEW },
+
[PIPE_FORMAT_L8A8_UNORM] = { 0,
A_(C0, C0, C0, C1, UNORM, UNORM, UNORM, UNORM, 8_8, 0),
SAMPLER_VIEW },
@@ -154,6 +206,26 @@ const struct nv50_format nv50_format_table[PIPE_FORMAT_COUNT] =
A_(C0, C0, C0, C1, UNORM, UNORM, UNORM, UNORM, 8_8, 0),
SAMPLER_VIEW },
+ [PIPE_FORMAT_L16A16_UNORM] = { 0,
+ A_(C0, C0, C0, C1, UNORM, UNORM, UNORM, UNORM, 16_16, 0),
+ SAMPLER_VIEW },
+
+ [PIPE_FORMAT_L16A16_SNORM] = { 0,
+ A_(C0, C0, C0, C1, SNORM, SNORM, SNORM, SNORM, 16_16, 0),
+ SAMPLER_VIEW },
+
+ [PIPE_FORMAT_L16A16_FLOAT] = { 0,
+ B_(C0, C0, C0, C1, FLOAT, FLOAT, FLOAT, FLOAT, 16_16, 0),
+ SAMPLER_VIEW },
+
+ [PIPE_FORMAT_L32A32_FLOAT] = { 0,
+ B_(C0, C0, C0, C1, FLOAT, FLOAT, FLOAT, FLOAT, 32_32, 0),
+ SAMPLER_VIEW },
+
+ [PIPE_FORMAT_L4A4_UNORM] = { 0,
+ B_(C0, C0, C0, C1, UNORM, UNORM, UNORM, UNORM, 4_4, 0),
+ SAMPLER_VIEW },
+
/* DXT, RGTC */
[PIPE_FORMAT_DXT1_RGB] = { 0,
@@ -172,6 +244,22 @@ const struct nv50_format nv50_format_table[PIPE_FORMAT_COUNT] =
B_(C0, C1, C2, C3, UNORM, UNORM, UNORM, UNORM, DXT5, 0),
SAMPLER_VIEW },
+ [PIPE_FORMAT_DXT1_SRGB] = { 0,
+ B_(C0, C1, C2, ONE_FLOAT, UNORM, UNORM, UNORM, UNORM, DXT1, 0),
+ SAMPLER_VIEW },
+
+ [PIPE_FORMAT_DXT1_SRGBA] = { 0,
+ B_(C0, C1, C2, C3, UNORM, UNORM, UNORM, UNORM, DXT1, 0),
+ SAMPLER_VIEW },
+
+ [PIPE_FORMAT_DXT3_SRGBA] = { 0,
+ B_(C0, C1, C2, C3, UNORM, UNORM, UNORM, UNORM, DXT3, 0),
+ SAMPLER_VIEW },
+
+ [PIPE_FORMAT_DXT5_SRGBA] = { 0,
+ B_(C0, C1, C2, C3, UNORM, UNORM, UNORM, UNORM, DXT5, 0),
+ SAMPLER_VIEW },
+
[PIPE_FORMAT_RGTC1_UNORM] = { 0,
B_(C0, ZERO, ZERO, ONE_FLOAT, UNORM, UNORM, UNORM, UNORM, RGTC1, 0),
SAMPLER_VIEW },
@@ -237,7 +325,7 @@ const struct nv50_format nv50_format_table[PIPE_FORMAT_COUNT] =
/* SNORM 32 */
[PIPE_FORMAT_R32G32B32A32_SNORM] = { 0,
- A_(C0, C1, C2, C3, FLOAT, FLOAT, FLOAT, FLOAT, 32_32_32_32, 0),
+ A_(C0, C1, C2, C3, SNORM, SNORM, SNORM, SNORM, 32_32_32_32, 0),
VERTEX_BUFFER | SAMPLER_VIEW },
[PIPE_FORMAT_R32G32B32_SNORM] = { 0,
@@ -255,7 +343,7 @@ const struct nv50_format nv50_format_table[PIPE_FORMAT_COUNT] =
/* UNORM 32 */
[PIPE_FORMAT_R32G32B32A32_UNORM] = { 0,
- A_(C0, C1, C2, C3, FLOAT, FLOAT, FLOAT, FLOAT, 32_32_32_32, 0),
+ A_(C0, C1, C2, C3, UNORM, UNORM, UNORM, UNORM, 32_32_32_32, 0),
VERTEX_BUFFER | SAMPLER_VIEW },
[PIPE_FORMAT_R32G32B32_UNORM] = { 0,
@@ -281,7 +369,7 @@ const struct nv50_format nv50_format_table[PIPE_FORMAT_COUNT] =
VERTEX_BUFFER | SAMPLER_VIEW },
[PIPE_FORMAT_R16G16_SNORM] = { NV50_SURFACE_FORMAT_R16G16_SNORM,
- A_(C0, C1, C2, C3, SNORM, SNORM, SNORM, SNORM, 16_16, 0),
+ A_(C0, C1, ZERO, ONE_FLOAT, SNORM, SNORM, SNORM, SNORM, 16_16, 0),
VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET },
[PIPE_FORMAT_R16_SNORM] = { NV50_SURFACE_FORMAT_R16_SNORM,
@@ -299,7 +387,7 @@ const struct nv50_format nv50_format_table[PIPE_FORMAT_COUNT] =
VERTEX_BUFFER | SAMPLER_VIEW },
[PIPE_FORMAT_R16G16_UNORM] = { NV50_SURFACE_FORMAT_R16G16_UNORM,
- A_(C0, C1, C2, C3, UNORM, UNORM, UNORM, UNORM, 16_16, 0),
+ A_(C0, C1, ZERO, ONE_FLOAT, UNORM, UNORM, UNORM, UNORM, 16_16, 0),
VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET },
[PIPE_FORMAT_R16_UNORM] = { NV50_SURFACE_FORMAT_R16_UNORM,
@@ -457,4 +545,30 @@ const struct nv50_format nv50_format_table[PIPE_FORMAT_COUNT] =
[PIPE_FORMAT_R8_USCALED] = { 0,
A_(C0, ZERO, ZERO, ONE_FLOAT, USCALED, USCALED, USCALED, USCALED, 8, 0),
VERTEX_BUFFER },
+
+ /* OTHER FORMATS */
+
+ [PIPE_FORMAT_R8G8_B8G8_UNORM] = { 0,
+ B_(C0, C1, C2, ONE_FLOAT, UNORM, UNORM, UNORM, UNORM, C1_C2_C1_C0, 0),
+ SAMPLER_VIEW },
+
+ [PIPE_FORMAT_G8R8_G8B8_UNORM] = { 0,
+ B_(C0, C1, C2, ONE_FLOAT, UNORM, UNORM, UNORM, UNORM, C2_C1_C0_C1, 0),
+ SAMPLER_VIEW },
+
+ [PIPE_FORMAT_R8SG8SB8UX8U_NORM] = { 0,
+ B_(C0, C1, C2, ONE_FLOAT, SNORM, SNORM, UNORM, UNORM, 8_8_8_8, 0),
+ SAMPLER_VIEW },
+
+ [PIPE_FORMAT_R5SG5SB6U_NORM] = { 0,
+ B_(C0, C1, C2, ONE_FLOAT, SNORM, SNORM, UNORM, UNORM, 6_5_5, 0),
+ SAMPLER_VIEW },
+
+ [PIPE_FORMAT_R1_UNORM] = { 0,
+ B_(C0, ZERO, ZERO, ONE_FLOAT, UNORM, UNORM, UNORM, UNORM, BITMAP_8X8, 0),
+ SAMPLER_VIEW },
+
+ [PIPE_FORMAT_A8B8G8R8_UNORM] = { 0,
+ B_(C3, C2, C1, C0, UNORM, UNORM, UNORM, UNORM, 8_8_8_8, 0),
+ SAMPLER_VIEW },
};
diff --git a/src/gallium/drivers/nv50/nv50_pc.c b/src/gallium/drivers/nv50/nv50_pc.c
index 82f1b846527..7900bf811df 100644
--- a/src/gallium/drivers/nv50/nv50_pc.c
+++ b/src/gallium/drivers/nv50/nv50_pc.c
@@ -20,8 +20,6 @@
* SOFTWARE.
*/
-/* #define NV50PC_DEBUG */
-
#include "nv50_pc.h"
#include "nv50_program.h"
@@ -180,6 +178,7 @@ nv50_op_can_write_flags(uint opcode)
switch (opcode) { /* obvious ones like KIL, CALL, etc. not included */
case NV_OP_PHI:
case NV_OP_MOV:
+ case NV_OP_SELECT:
case NV_OP_LINTERP:
case NV_OP_PINTERP:
case NV_OP_LDA:
@@ -367,7 +366,7 @@ nv_print_program(struct nv_pc *pc)
nv_print_function(pc->root[i]);
}
-#ifdef NV50PC_DEBUG
+#if NV50_DEBUG & NV50_DEBUG_PROG_CFLOW
static void
nv_do_print_cfgraph(struct nv_pc *pc, FILE *f, struct nv_basic_block *b)
{
@@ -425,7 +424,7 @@ nv_print_cfgraph(struct nv_pc *pc, const char *filepath, int subr)
fclose(f);
}
-#endif
+#endif /* NV50_DEBUG_PROG_CFLOW */
static INLINE void
nvcg_show_bincode(struct nv_pc *pc)
@@ -446,7 +445,7 @@ nv50_emit_program(struct nv_pc *pc)
uint32_t *code = pc->emit;
int n;
- NV50_DBGMSG("emitting program: size = %u\n", pc->bin_size);
+ NV50_DBGMSG(SHADER, "emitting program: size = %u\n", pc->bin_size);
for (n = 0; n < pc->num_blocks; ++n) {
struct nv_instruction *i;
@@ -472,7 +471,7 @@ nv50_emit_program(struct nv_pc *pc)
pc->emit = code;
code[pc->bin_size / 4 - 1] |= 1;
-#ifdef NV50PC_DEBUG
+#if NV50_DEBUG & NV50_DEBUG_SHADER
nvcg_show_bincode(pc);
#endif
@@ -500,7 +499,7 @@ nv50_generate_code(struct nv50_translation_info *ti)
ret = nv50_tgsi_to_nc(pc, ti);
if (ret)
goto out;
-#ifdef NV50PC_DEBUG
+#if NV50_DEBUG & NV50_DEBUG_PROG_IR
nv_print_program(pc);
#endif
@@ -510,7 +509,7 @@ nv50_generate_code(struct nv50_translation_info *ti)
ret = nv_pc_exec_pass0(pc);
if (ret)
goto out;
-#ifdef NV50PC_DEBUG
+#if NV50_DEBUG & NV50_DEBUG_PROG_IR
nv_print_program(pc);
#endif
@@ -518,7 +517,7 @@ nv50_generate_code(struct nv50_translation_info *ti)
ret = nv_pc_exec_pass1(pc);
if (ret)
goto out;
-#ifdef NV50PC_DEBUG
+#if NV50_DEBUG & NV50_DEBUG_PROG_CFLOW
nv_print_program(pc);
nv_print_cfgraph(pc, "nv50_shader_cfgraph.dot", 0);
#endif
@@ -552,7 +551,7 @@ nv50_generate_code(struct nv50_translation_info *ti)
ti->p->uses_lmem = ti->store_to_memory;
- NV50_DBGMSG("SHADER TRANSLATION - %s\n", ret ? "failure" : "success");
+ NV50_DBGMSG(SHADER, "SHADER TRANSLATION - %s\n", ret ? "failed" : "success");
out:
nv_pc_free_refs(pc);
@@ -624,6 +623,9 @@ nvbb_insert_tail(struct nv_basic_block *b, struct nv_instruction *i)
i->bb = b;
b->num_instructions++;
+
+ if (i->prev && i->prev->is_terminator)
+ nv_nvi_permute(i->prev, i);
}
void
@@ -669,7 +671,7 @@ nv_nvi_delete(struct nv_instruction *nvi)
if (nvi == b->phi) {
if (nvi->opcode != NV_OP_PHI)
- NV50_DBGMSG("NOTE: b->phi points to non-PHI instruction\n");
+ NV50_DBGMSG(PROG_IR, "NOTE: b->phi points to non-PHI instruction\n");
assert(!nvi->prev);
if (!nvi->next || nvi->next->opcode != NV_OP_PHI)
diff --git a/src/gallium/drivers/nv50/nv50_pc.h b/src/gallium/drivers/nv50/nv50_pc.h
index e6f3815bafe..5bb0e1296bb 100644
--- a/src/gallium/drivers/nv50/nv50_pc.h
+++ b/src/gallium/drivers/nv50/nv50_pc.h
@@ -23,13 +23,7 @@
#ifndef __NV50_COMPILER_H__
#define __NV50_COMPILER_H__
-#define NV50PC_DEBUG
-
-#ifdef NV50PC_DEBUG
-# define NV50_DBGMSG(args...) debug_printf(args)
-#else
-# define NV50_DBGMSG(args...)
-#endif
+#include "nv50_debug.h"
#include "pipe/p_defines.h"
#include "util/u_inlines.h"
@@ -228,6 +222,8 @@ struct nv_ref {
ubyte flags; /* not used yet */
};
+#define NV_REF_FLAG_REGALLOC_PRIV (1 << 0)
+
struct nv_basic_block;
struct nv_instruction {
@@ -263,6 +259,15 @@ struct nv_instruction {
ubyte quadop;
};
+static INLINE int
+nvi_vector_size(struct nv_instruction *nvi)
+{
+ int i;
+ assert(nvi);
+ for (i = 0; i < 4 && nvi->def[i]; ++i);
+ return i;
+}
+
#define CFG_EDGE_FORWARD 0
#define CFG_EDGE_BACK 1
#define CFG_EDGE_LOOP_ENTER 2
diff --git a/src/gallium/drivers/nv50/nv50_pc_optimize.c b/src/gallium/drivers/nv50/nv50_pc_optimize.c
index 281ccf7ac61..d72b23c137a 100644
--- a/src/gallium/drivers/nv50/nv50_pc_optimize.c
+++ b/src/gallium/drivers/nv50/nv50_pc_optimize.c
@@ -20,8 +20,6 @@
* SOFTWARE.
*/
-/* #define NV50PC_DEBUG */
-
#include "nv50_pc.h"
#define DESCEND_ARBITRARY(j, f) \
@@ -116,7 +114,7 @@ nvi_isnop(struct nv_instruction *nvi)
return FALSE;
if (nvi->src[0]->value->join->reg.id < 0) {
- NV50_DBGMSG("nvi_isnop: orphaned value detected\n");
+ NV50_DBGMSG(PROG_IR, "nvi_isnop: orphaned value detected\n");
return TRUE;
}
@@ -201,7 +199,7 @@ nv_pc_pass_pre_emission(void *priv, struct nv_basic_block *b)
}
if (!b->entry) {
- NV50_DBGMSG("block %p is now empty\n", b);
+ NV50_DBGMSG(PROG_IR, "block %p is now empty\n", b);
} else
if (!b->exit->is_long) {
assert(n32);
@@ -240,7 +238,7 @@ nv_pc_exec_pass2(struct nv_pc *pc)
{
int i, ret;
- NV50_DBGMSG("preparing %u blocks for emission\n", pc->num_blocks);
+ NV50_DBGMSG(PROG_IR, "preparing %u blocks for emission\n", pc->num_blocks);
pc->num_blocks = 0; /* will reorder bb_list */
@@ -966,7 +964,8 @@ nv_pass_flatten(struct nv_pass *ctx, struct nv_basic_block *b)
if (bb_is_if_else_endif(b)) {
- NV50_DBGMSG("pass_flatten: IF/ELSE/ENDIF construct at BB:%i\n", b->id);
+ NV50_DBGMSG(PROG_IR,
+ "pass_flatten: IF/ELSE/ENDIF construct at BB:%i\n", b->id);
for (n0 = 0, nvi = b->out[0]->entry; nvi; nvi = nvi->next, ++n0)
if (!nv50_nvi_can_predicate(nvi))
@@ -975,7 +974,7 @@ nv_pass_flatten(struct nv_pass *ctx, struct nv_basic_block *b)
for (n1 = 0, nvi = b->out[1]->entry; nvi; nvi = nvi->next, ++n1)
if (!nv50_nvi_can_predicate(nvi))
break;
-#ifdef NV50PC_DEBUG
+#if NV50_DEBUG & NV50_DEBUG_PROG_IR
if (nvi) {
debug_printf("cannot predicate: "); nv_print_instruction(nvi);
}
diff --git a/src/gallium/drivers/nv50/nv50_pc_regalloc.c b/src/gallium/drivers/nv50/nv50_pc_regalloc.c
index 39ae36681c0..e79fd594cea 100644
--- a/src/gallium/drivers/nv50/nv50_pc_regalloc.c
+++ b/src/gallium/drivers/nv50/nv50_pc_regalloc.c
@@ -20,11 +20,11 @@
* SOFTWARE.
*/
-/* #define NV50PC_DEBUG */
-
-/* #define NV50_RA_DEBUG_LIVEI */
-/* #define NV50_RA_DEBUG_LIVE_SETS */
-/* #define NV50_RA_DEBUG_JOIN */
+#if NV50_DEBUG & NV50_DEBUG_PROG_RA
+# define NV50_RA_DEBUG_LIVEI
+# define NV50_RA_DEBUG_LIVE_SETS
+# define NV50_RA_DEBUG_JOIN
+#endif
#include "nv50_context.h"
#include "nv50_pc.h"
@@ -32,14 +32,39 @@
#include "util/u_simple_list.h"
#define NUM_REGISTER_FILES 4
+#define MAX_REGISTER_COUNT 256
struct register_set {
struct nv_pc *pc;
uint32_t last[NUM_REGISTER_FILES];
- uint32_t bits[NUM_REGISTER_FILES][8];
+ uint32_t bits[NUM_REGISTER_FILES][(MAX_REGISTER_COUNT + 31) / 32];
};
+/* using OR because a set bit means occupied/unavailable, aliasing is allowed */
+static void
+intersect_register_sets(struct register_set *dst,
+ struct register_set *src1, struct register_set *src2)
+{
+ int i, j;
+
+ for (i = 0; i < NUM_REGISTER_FILES; ++i) {
+ for (j = 0; j < (MAX_REGISTER_COUNT + 31) / 32; ++j)
+ dst->bits[i][j] = src1->bits[i][j] | src2->bits[i][j];
+ }
+}
+
+static void
+mask_register_set(struct register_set *set, uint32_t mask, uint32_t umask)
+{
+ int i, j;
+
+ for (i = 0; i < NUM_REGISTER_FILES; ++i) {
+ for (j = 0; j < (MAX_REGISTER_COUNT + 31) / 32; ++j)
+ set->bits[i][j] = (set->bits[i][j] | mask) & umask;
+ }
+}
+
struct nv_pc_pass {
struct nv_pc *pc;
@@ -61,11 +86,15 @@ ranges_coalesce(struct nv_range *range)
}
}
+/* @return: TRUE if @new_range can be freed (i.e. was not reused) */
static boolean
add_range_ex(struct nv_value *val, int bgn, int end, struct nv_range *new_range)
{
struct nv_range *range, **nextp = &val->livei;
+ if (bgn == end) /* [a, a) is invalid / empty */
+ return TRUE;
+
for (range = val->livei; range; range = range->next) {
if (end < range->bgn)
break; /* insert before */
@@ -251,6 +280,8 @@ reg_occupy(struct register_set *set, struct nv_value *val)
id <<= s;
m = (1 << (1 << s)) - 1;
+ assert(s >= 0); /* XXX: remove me */
+
set->bits[f][id / 32] |= m << (id % 32);
if (set->pc->max_reg[f] < id)
@@ -286,15 +317,12 @@ join_allowed(struct nv_pc_pass *ctx, struct nv_value *a, struct nv_value *b)
if (a->join->reg.id == b->join->reg.id)
return TRUE;
-#if 1
/* either a or b or both have been assigned */
if (a->join->reg.id >= 0 && b->join->reg.id >= 0)
return FALSE;
else
if (b->join->reg.id >= 0) {
- if (a->join->reg.id >= 0)
- return FALSE;
val = a;
a = b;
b = val;
@@ -309,8 +337,6 @@ join_allowed(struct nv_pc_pass *ctx, struct nv_value *a, struct nv_value *b)
return FALSE;
}
return TRUE;
-#endif
- return FALSE;
}
static INLINE void
@@ -336,14 +362,14 @@ do_join_values(struct nv_pc_pass *ctx, struct nv_value *a, struct nv_value *b)
assert(b->join == a->join);
}
-static INLINE void
+static INLINE boolean
try_join_values(struct nv_pc_pass *ctx, struct nv_value *a, struct nv_value *b)
{
if (!join_allowed(ctx, a, b)) {
#ifdef NV50_RA_DEBUG_JOIN
debug_printf("cannot join %i to %i: not allowed\n", b->n, a->n);
#endif
- return;
+ return FALSE;
}
if (livei_have_overlap(a->join, b->join)) {
#ifdef NV50_RA_DEBUG_JOIN
@@ -351,10 +377,27 @@ try_join_values(struct nv_pc_pass *ctx, struct nv_value *a, struct nv_value *b)
livei_print(a);
livei_print(b);
#endif
- return;
+ return FALSE;
}
do_join_values(ctx, a, b);
+
+ return TRUE;
+}
+
+static void
+join_values_nofail(struct nv_pc_pass *ctx,
+ struct nv_value *a, struct nv_value *b, boolean type_only)
+{
+ if (type_only) {
+ assert(join_allowed(ctx, a, b));
+ do_join_values(ctx, a, b);
+ } else {
+ boolean ok = try_join_values(ctx, a, b);
+ if (!ok) {
+ NOUVEAU_ERR("failed to coalesce values\n");
+ }
+ }
}
static INLINE boolean
@@ -369,20 +412,32 @@ need_new_else_block(struct nv_basic_block *b, struct nv_basic_block *p)
return (b->num_in > 1) && (n == 2);
}
+/* Look for the @phi's operand whose definition reaches @b. */
static int
phi_opnd_for_bb(struct nv_instruction *phi, struct nv_basic_block *b,
struct nv_basic_block *tb)
{
+ struct nv_ref *srci, *srcj;
int i, j;
- for (j = -1, i = 0; i < 4 && phi->src[i]; ++i) {
- if (!nvbb_reachable_by(b, phi->src[i]->value->insn->bb, tb))
+ for (j = -1, i = 0; i < 6 && phi->src[i]; ++i) {
+ srci = phi->src[i];
+ /* if already replaced, check with original source first */
+ if (srci->flags & NV_REF_FLAG_REGALLOC_PRIV)
+ srci = srci->value->insn->src[0];
+ if (!nvbb_reachable_by(b, srci->value->insn->bb, NULL))
continue;
/* NOTE: back-edges are ignored by the reachable-by check */
- if (j < 0 || !nvbb_reachable_by(phi->src[j]->value->insn->bb,
- phi->src[i]->value->insn->bb, tb))
+ if (j < 0 || !nvbb_reachable_by(srcj->value->insn->bb,
+ srci->value->insn->bb, NULL)) {
j = i;
+ srcj = srci;
+ }
}
+ if (j >= 0 && nvbb_reachable_by(b, phi->def[0]->insn->bb, NULL))
+ if (!nvbb_reachable_by(srcj->value->insn->bb,
+ phi->def[0]->insn->bb, NULL))
+ j = -1;
return j;
}
@@ -429,16 +484,21 @@ pass_generate_phi_movs(struct nv_pc_pass *ctx, struct nv_basic_block *b)
ctx->pc->current_block = pn;
for (i = b->phi; i && i->opcode == NV_OP_PHI; i = i->next) {
- if ((j = phi_opnd_for_bb(i, p, b)) < 0)
- continue;
- val = i->src[j]->value;
-
- if (i->src[j]->flags) {
- val = val->insn->src[0]->value;
- while (j < 4 && i->src[j])
- ++j;
- assert(j < 4);
+ j = phi_opnd_for_bb(i, p, b);
+
+ if (j < 0) {
+ val = i->def[0];
+ } else {
+ val = i->src[j]->value;
+ if (i->src[j]->flags & NV_REF_FLAG_REGALLOC_PRIV) {
+ j = -1;
+ /* use original value, we already encountered & replaced it */
+ val = val->insn->src[0]->value;
+ }
}
+ if (j < 0) /* need an additional source ? */
+ for (j = 0; j < 5 && i->src[j] && i->src[j]->value != val; ++j);
+ assert(j < 5);
ni = new_instruction(ctx->pc, NV_OP_MOV);
@@ -452,11 +512,13 @@ pass_generate_phi_movs(struct nv_pc_pass *ctx, struct nv_basic_block *b)
nv_reference(ctx->pc, &i->src[j], ni->def[0]);
- i->src[j]->flags = 1;
+ i->src[j]->flags |= NV_REF_FLAG_REGALLOC_PRIV;
}
if (pn != p && pn->exit) {
- ctx->pc->current_block = b->in[n ? 0 : 1];
+ assert(!b->in[!n]->exit || b->in[!n]->exit->is_terminator);
+ /* insert terminator (branch to ENDIF) in new else block */
+ ctx->pc->current_block = pn;
ni = new_instruction(ctx->pc, NV_OP_BRA);
ni->target = b;
ni->is_terminator = 1;
@@ -470,45 +532,50 @@ pass_generate_phi_movs(struct nv_pc_pass *ctx, struct nv_basic_block *b)
return 0;
}
+#define JOIN_MASK_PHI (1 << 0)
+#define JOIN_MASK_SELECT (1 << 1)
+#define JOIN_MASK_MOV (1 << 2)
+#define JOIN_MASK_TEX (1 << 3)
+
static int
-pass_join_values(struct nv_pc_pass *ctx, int iter)
+pass_join_values(struct nv_pc_pass *ctx, unsigned mask)
{
int c, n;
for (n = 0; n < ctx->num_insns; ++n) {
- struct nv_instruction *i = ctx->insns[n];
+ struct nv_instruction *nvi, *i = ctx->insns[n];
switch (i->opcode) {
case NV_OP_PHI:
- if (iter != 2)
+ if (!(mask & JOIN_MASK_PHI))
break;
- for (c = 0; c < 4 && i->src[c]; ++c)
- try_join_values(ctx, i->def[0], i->src[c]->value);
+ for (c = 0; c < 5 && i->src[c]; ++c)
+ join_values_nofail(ctx, i->def[0], i->src[c]->value, FALSE);
break;
case NV_OP_MOV:
- if ((iter == 2) && i->src[0]->value->insn &&
- !nv_is_vector_op(i->src[0]->value->join->insn->opcode))
+ if (!(mask & JOIN_MASK_MOV))
+ break;
+ nvi = i->src[0]->value->join->insn;
+ if (nvi && !nv_is_vector_op(nvi->opcode))
try_join_values(ctx, i->def[0], i->src[0]->value);
break;
case NV_OP_SELECT:
- if (iter != 1)
+ if (!(mask & JOIN_MASK_SELECT))
break;
- for (c = 0; c < 4 && i->src[c]; ++c) {
- assert(join_allowed(ctx, i->def[0], i->src[c]->value));
- do_join_values(ctx, i->def[0], i->src[c]->value);
- }
+ for (c = 0; c < 5 && i->src[c]; ++c)
+ join_values_nofail(ctx, i->def[0], i->src[c]->value, TRUE);
break;
case NV_OP_TEX:
case NV_OP_TXB:
case NV_OP_TXL:
case NV_OP_TXQ:
- if (iter)
+ if (!(mask & JOIN_MASK_TEX))
break;
- for (c = 0; c < 4; ++c) {
- if (!i->src[c])
- break;
- do_join_values(ctx, i->def[c], i->src[c]->value);
- }
+ /* This should work without conflicts because we always generate
+ * extra MOVs for the sources of a TEX.
+ */
+ for (c = 0; c < 4 && i->src[c]; ++c)
+ join_values_nofail(ctx, i->def[c], i->src[c]->value, TRUE);
break;
default:
break;
@@ -643,15 +710,15 @@ static void collect_live_values(struct nv_basic_block *b, const int n)
{
int i;
- if (b->out[0]) {
- if (b->out[1]) { /* what to do about back-edges ? */
+ if (b->out[0] && b->out_kind[0] != CFG_EDGE_FAKE) {
+ if (b->out[1] && b->out_kind[1] != CFG_EDGE_FAKE) {
for (i = 0; i < n; ++i)
b->live_set[i] = b->out[0]->live_set[i] | b->out[1]->live_set[i];
} else {
memcpy(b->live_set, b->out[0]->live_set, n * sizeof(uint32_t));
}
} else
- if (b->out[1]) {
+ if (b->out[1] && b->out_kind[1] != CFG_EDGE_FAKE) {
memcpy(b->live_set, b->out[1]->live_set, n * sizeof(uint32_t));
} else {
memset(b->live_set, 0, n * sizeof(uint32_t));
@@ -770,8 +837,8 @@ insert_ordered_tail(struct nv_value *list, struct nv_value *nval)
struct nv_value *elem;
for (elem = list->prev;
- elem != list && elem->livei->bgn > nval->livei->bgn;
- elem = elem->prev);
+ elem != list && elem->livei->bgn > nval->livei->bgn;
+ elem = elem->prev);
/* now elem begins before or at the same time as val */
nval->prev = elem;
@@ -780,44 +847,49 @@ insert_ordered_tail(struct nv_value *list, struct nv_value *nval)
elem->next = nval;
}
-static int
-pass_linear_scan(struct nv_pc_pass *ctx, int iter)
+static void
+collect_register_values(struct nv_pc_pass *ctx, struct nv_value *head,
+ boolean assigned_only)
{
- struct nv_instruction *i;
- struct register_set f, free;
+ struct nv_value *val;
int k, n;
- struct nv_value *cur, *val, *tmp[2];
- struct nv_value active, inactive, handled, unhandled;
- make_empty_list(&active);
- make_empty_list(&inactive);
- make_empty_list(&handled);
- make_empty_list(&unhandled);
+ make_empty_list(head);
- nv50_ctor_register_set(ctx->pc, &free);
-
- /* joined values should have range = NULL and thus not be added;
- * also, fixed memory values won't be added because they're not
- * def'd, just used
- */
for (n = 0; n < ctx->num_insns; ++n) {
- i = ctx->insns[n];
+ struct nv_instruction *i = ctx->insns[n];
+ /* for joined values, only the representative will have livei != NULL */
for (k = 0; k < 4; ++k) {
if (i->def[k] && i->def[k]->livei)
- insert_ordered_tail(&unhandled, i->def[k]);
- else
- if (0 && i->def[k])
- debug_printf("skipping def'd value %i: no livei\n", i->def[k]->n);
+ if (!assigned_only || i->def[k]->reg.id >= 0)
+ insert_ordered_tail(head, i->def[k]);
}
if (i->flags_def && i->flags_def->livei)
- insert_ordered_tail(&unhandled, i->flags_def);
+ if (!assigned_only || i->flags_def->reg.id >= 0)
+ insert_ordered_tail(head, i->flags_def);
}
- for (val = unhandled.next; val != unhandled.prev; val = val->next) {
+ for (val = head->next; val != head->prev; val = val->next) {
assert(val->join == val);
assert(val->livei->bgn <= val->next->livei->bgn);
}
+}
+
+static int
+pass_linear_scan(struct nv_pc_pass *ctx, int iter)
+{
+ struct register_set f, free;
+ struct nv_value *cur, *val, *tmp[2];
+ struct nv_value active, inactive, handled, unhandled;
+
+ make_empty_list(&active);
+ make_empty_list(&inactive);
+ make_empty_list(&handled);
+
+ nv50_ctor_register_set(ctx->pc, &free);
+
+ collect_register_values(ctx, &unhandled, FALSE);
foreach_s(cur, tmp[0], &unhandled) {
remove_from_list(cur);
@@ -854,13 +926,7 @@ pass_linear_scan(struct nv_pc_pass *ctx, int iter)
reg_occupy(&f, val);
if (cur->reg.id < 0) {
- boolean mem = FALSE;
-
- if (nv_is_vector_op(cur->insn->opcode))
- mem = !reg_assign(&f, &cur->insn->def[0], 4);
- else
- if (iter)
- mem = !reg_assign(&f, &cur, 1);
+ boolean mem = !reg_assign(&f, &cur, 1);
if (mem) {
NOUVEAU_ERR("out of registers\n");
@@ -874,13 +940,80 @@ pass_linear_scan(struct nv_pc_pass *ctx, int iter)
return 0;
}
+/* Allocate values defined by instructions such as TEX, which have to be
+ * assigned to consecutive registers.
+ * Linear scan doesn't really work here since the values can have different
+ * live intervals.
+ */
+static int
+pass_allocate_constrained_values(struct nv_pc_pass *ctx)
+{
+ struct nv_value regvals, *val;
+ struct nv_instruction *i;
+ struct nv_value *defs[4];
+ struct register_set regs[4];
+ int n, vsize, c;
+ uint32_t mask;
+ boolean mem;
+
+ collect_register_values(ctx, &regvals, TRUE);
+
+ for (n = 0; n < ctx->num_insns; ++n) {
+ i = ctx->insns[n];
+ vsize = nvi_vector_size(i);
+ if (!(vsize > 1))
+ continue;
+ assert(vsize <= 4);
+ for (c = 0; c < vsize; ++c)
+ defs[c] = i->def[c]->join;
+
+ if (defs[0]->reg.id >= 0) {
+ for (c = 1; c < vsize; ++c)
+ assert(defs[c]->reg.id >= 0);
+ continue;
+ }
+
+ /* Compute registers available for this "vector" of consecutive registers.
+ * Each value (component) has its own independent live interval.
+ */
+ for (c = 0; c < vsize; ++c) {
+ nv50_ctor_register_set(ctx->pc, &regs[c]);
+
+ foreach(val, &regvals) {
+ if (val->reg.id >= 0 && livei_have_overlap(val, defs[c]))
+ reg_occupy(&regs[c], val);
+ }
+ /* Only 32 bit GPRs will be allocated here, but register set
+ * granularity for GPRs is 16 bit.
+ */
+ mask = 0x03030303;
+ if (vsize == 2) /* granularity is 2 and not 4 */
+ mask |= 0x03030303 << 4;
+ mask_register_set(&regs[c], 0, mask << (c * 2));
+
+ if (defs[c]->livei)
+ insert_ordered_tail(&regvals, defs[c]);
+ }
+ for (c = 1; c < vsize; ++c)
+ intersect_register_sets(&regs[0], &regs[0], &regs[c]);
+
+ mem = !reg_assign(&regs[0], &defs[0], vsize);
+
+ if (mem) {
+ NOUVEAU_ERR("out of registers\n");
+ abort();
+ }
+ }
+ return 0;
+}
+
static int
nv_pc_pass1(struct nv_pc *pc, struct nv_basic_block *root)
{
struct nv_pc_pass *ctx;
int i, ret;
- NV50_DBGMSG("REGISTER ALLOCATION - entering\n");
+ NV50_DBGMSG(PROG_RA, "REGISTER ALLOCATION - entering\n");
ctx = CALLOC_STRUCT(nv_pc_pass);
if (!ctx)
@@ -923,16 +1056,16 @@ nv_pc_pass1(struct nv_pc *pc, struct nv_basic_block *root)
livei_print(&pc->values[i]);
#endif
- ret = pass_join_values(ctx, 0);
+ ret = pass_join_values(ctx, JOIN_MASK_PHI);
if (ret)
goto out;
- ret = pass_linear_scan(ctx, 0);
+ ret = pass_join_values(ctx, JOIN_MASK_SELECT | JOIN_MASK_TEX);
if (ret)
goto out;
- ret = pass_join_values(ctx, 1);
+ ret = pass_join_values(ctx, JOIN_MASK_MOV);
if (ret)
goto out;
- ret = pass_join_values(ctx, 2);
+ ret = pass_allocate_constrained_values(ctx);
if (ret)
goto out;
ret = pass_linear_scan(ctx, 1);
@@ -942,7 +1075,7 @@ nv_pc_pass1(struct nv_pc *pc, struct nv_basic_block *root)
for (i = 0; i < pc->num_values; ++i)
livei_release(&pc->values[i]);
- NV50_DBGMSG("REGISTER ALLOCATION - leaving\n");
+ NV50_DBGMSG(PROG_RA, "REGISTER ALLOCATION - leaving\n");
out:
FREE(ctx->insns);
diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c
index a63f9d8a6d5..41d3e14dc0f 100644
--- a/src/gallium/drivers/nv50/nv50_program.c
+++ b/src/gallium/drivers/nv50/nv50_program.c
@@ -20,8 +20,6 @@
* SOFTWARE.
*/
-/* #define NV50_PROGRAM_DEBUG */
-
#include "nv50_program.h"
#include "nv50_pc.h"
#include "nv50_context.h"
@@ -486,7 +484,7 @@ nv50_fragprog_prepare(struct nv50_translation_info *ti)
++nintp;
}
- p->fp.colors = (1 << 24) | 4; /* CLAMP, FFC0_ID = 4 */
+ p->fp.colors = 4 << NV50_3D_MAP_SEMANTIC_0_FFC0_ID__SHIFT; /* after HPOS */
for (i = 0; i < p->in_nr; ++i) {
int j = p->in[i].id;
@@ -564,7 +562,7 @@ nv50_prog_scan(struct nv50_translation_info *ti)
tgsi_scan_shader(p->pipe.tokens, &ti->scan);
-#ifdef NV50_PROGRAM_DEBUG
+#if NV50_DEBUG & NV50_DEBUG_SHADER
tgsi_dump(p->pipe.tokens, 0);
#endif
diff --git a/src/gallium/drivers/nv50/nv50_screen.c b/src/gallium/drivers/nv50/nv50_screen.c
index 7690c80eef0..641ad7e2780 100644
--- a/src/gallium/drivers/nv50/nv50_screen.c
+++ b/src/gallium/drivers/nv50/nv50_screen.c
@@ -20,6 +20,7 @@
* SOFTWARE.
*/
+#include "util/u_format.h"
#include "util/u_format_s3tc.h"
#include "pipe/p_screen.h"
@@ -45,17 +46,8 @@ nv50_screen_is_format_supported(struct pipe_screen *pscreen,
if (sample_count > 1)
return FALSE;
- if (!util_format_s3tc_enabled) {
- switch (format) {
- case PIPE_FORMAT_DXT1_RGB:
- case PIPE_FORMAT_DXT1_RGBA:
- case PIPE_FORMAT_DXT3_RGBA:
- case PIPE_FORMAT_DXT5_RGBA:
- return FALSE;
- default:
- break;
- }
- }
+ if (!util_format_is_supported(format, bindings))
+ return FALSE;
switch (format) {
case PIPE_FORMAT_Z16_UNORM:
@@ -108,6 +100,8 @@ nv50_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
return 1;
case PIPE_CAP_MAX_RENDER_TARGETS:
return 8;
+ case PIPE_CAP_FRAGMENT_COLOR_CLAMP_CONTROL:
+ return 1;
case PIPE_CAP_TIMER_QUERY:
case PIPE_CAP_OCCLUSION_QUERY:
return 1;
@@ -129,6 +123,7 @@ nv50_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
case PIPE_CAP_PRIMITIVE_RESTART:
case PIPE_CAP_TGSI_INSTANCEID:
case PIPE_CAP_VERTEX_ELEMENT_INSTANCE_DIVISOR:
+ case PIPE_CAP_MIXED_COLORBUFFER_FORMATS:
return 1;
default:
NOUVEAU_ERR("unknown PIPE_CAP %d\n", param);
@@ -417,6 +412,10 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev)
OUT_RING (chan, NV50_3D_MULTISAMPLE_MODE_MS1);
BEGIN_RING(chan, RING_3D(MULTISAMPLE_CTRL), 1);
OUT_RING (chan, 0);
+ BEGIN_RING(chan, RING_3D(LINE_LAST_PIXEL), 1);
+ OUT_RING (chan, 0);
+ BEGIN_RING(chan, RING_3D(BLEND_SEPARATE_ALPHA), 1);
+ OUT_RING (chan, 1);
BEGIN_RING(chan, RING_3D(SCREEN_Y_CONTROL), 1);
OUT_RING (chan, 0);
diff --git a/src/gallium/drivers/nv50/nv50_shader_state.c b/src/gallium/drivers/nv50/nv50_shader_state.c
index bea9c095bb3..82c346cb5ea 100644
--- a/src/gallium/drivers/nv50/nv50_shader_state.c
+++ b/src/gallium/drivers/nv50/nv50_shader_state.c
@@ -226,7 +226,7 @@ nv50_gmtyprog_validate(struct nv50_context *nv50)
OUT_RING (chan, gp->code_base);
}
-void
+static void
nv50_sprite_coords_validate(struct nv50_context *nv50)
{
struct nouveau_channel *chan = nv50->screen->base.channel;
@@ -282,6 +282,39 @@ nv50_sprite_coords_validate(struct nv50_context *nv50)
OUT_RINGp (chan, pntc, 8);
}
+/* Validate state derived from shaders and the rasterizer cso. */
+void
+nv50_validate_derived_rs(struct nv50_context *nv50)
+{
+ struct nouveau_channel *chan = nv50->screen->base.channel;
+ uint32_t color, psize;
+
+ nv50_sprite_coords_validate(nv50);
+
+ if (nv50->dirty & NV50_NEW_FRAGPROG)
+ return;
+ psize = nv50->state.semantic_psize & ~NV50_3D_MAP_SEMANTIC_3_PTSZ_EN__MASK;
+ color = nv50->state.semantic_color & ~NV50_3D_MAP_SEMANTIC_0_CLMP_EN;
+
+ if (nv50->rast->pipe.clamp_vertex_color)
+ color |= NV50_3D_MAP_SEMANTIC_0_CLMP_EN;
+
+ if (color != nv50->state.semantic_color) {
+ nv50->state.semantic_color = color;
+ BEGIN_RING(chan, RING_3D(MAP_SEMANTIC_0), 1);
+ OUT_RING (chan, color);
+ }
+
+ if (nv50->rast->pipe.point_size_per_vertex)
+ psize |= NV50_3D_MAP_SEMANTIC_3_PTSZ_EN__MASK;
+
+ if (psize != nv50->state.semantic_psize) {
+ nv50->state.semantic_psize = psize;
+ BEGIN_RING(chan, RING_3D(MAP_SEMANTIC_3), 1);
+ OUT_RING (chan, psize);
+ }
+}
+
static int
nv50_vec4_map(uint8_t *map, int mid, uint32_t lin[4],
struct nv50_varying *in, struct nv50_varying *out)
@@ -372,6 +405,9 @@ nv50_fp_linkage_validate(struct nv50_context *nv50)
map[m++] = vp->vp.psiz;
}
+ if (nv50->rast->pipe.clamp_vertex_color)
+ colors |= NV50_3D_MAP_SEMANTIC_0_CLMP_EN;
+
n = (m + 3) / 4;
assert(m <= 64);
@@ -404,6 +440,9 @@ nv50_fp_linkage_validate(struct nv50_context *nv50)
nv50->state.interpolant_ctrl = interp;
+ nv50->state.semantic_color = colors;
+ nv50->state.semantic_psize = psiz;
+
BEGIN_RING(chan, RING_3D(NOPERSPECTIVE_BITMAP(0)), 4);
OUT_RINGp (chan, lin, 4);
diff --git a/src/gallium/drivers/nv50/nv50_state.c b/src/gallium/drivers/nv50/nv50_state.c
index db257159698..799f49619d2 100644
--- a/src/gallium/drivers/nv50/nv50_state.c
+++ b/src/gallium/drivers/nv50/nv50_state.c
@@ -97,8 +97,14 @@ nv50_blend_state_create(struct pipe_context *pipe,
so->pipe = *cso;
- SB_BEGIN_3D(so, BLEND_ENABLE(0), 8);
+ SB_BEGIN_3D(so, COLOR_MASK_COMMON, 1);
+ SB_DATA (so, !cso->independent_blend_enable);
+
+ SB_BEGIN_3D(so, BLEND_ENABLE_COMMON, 1);
+ SB_DATA (so, !cso->independent_blend_enable);
+
if (cso->independent_blend_enable) {
+ SB_BEGIN_3D(so, BLEND_ENABLE(0), 8);
for (i = 0; i < 8; ++i) {
SB_DATA(so, cso->rt[i].blend_enable);
if (cso->rt[i].blend_enable)
@@ -121,8 +127,8 @@ nv50_blend_state_create(struct pipe_context *pipe,
}
}
} else {
- for (i = 0; i < 8; ++i)
- SB_DATA(so, cso->rt[0].blend_enable);
+ SB_BEGIN_3D(so, BLEND_ENABLE(0), 1);
+ SB_DATA (so, cso->rt[0].blend_enable);
}
if (emit_common_func) {
@@ -145,17 +151,16 @@ nv50_blend_state_create(struct pipe_context *pipe,
SB_DATA (so, 0);
}
- SB_BEGIN_3D(so, COLOR_MASK(0), 8);
if (cso->independent_blend_enable) {
+ SB_BEGIN_3D(so, COLOR_MASK(0), 8);
for (i = 0; i < 8; ++i)
SB_DATA(so, nv50_colormask(cso->rt[i].colormask));
} else {
- uint32_t cmask = nv50_colormask(cso->rt[0].colormask);
- for (i = 0; i < 8; ++i)
- SB_DATA(so, cmask);
+ SB_BEGIN_3D(so, COLOR_MASK(0), 1);
+ SB_DATA (so, nv50_colormask(cso->rt[0].colormask));
}
- assert(so->size < (sizeof(so->state) / sizeof(so->state[0])));
+ assert(so->size <= (sizeof(so->state) / sizeof(so->state[0])));
return so;
}
@@ -174,6 +179,7 @@ nv50_blend_state_delete(struct pipe_context *pipe, void *hwcso)
FREE(hwcso);
}
+/* NOTE: ignoring line_last_pixel, using FALSE (set on screen init) */
static void *
nv50_rasterizer_state_create(struct pipe_context *pipe,
const struct pipe_rasterizer_state *cso)
@@ -198,6 +204,9 @@ nv50_rasterizer_state_create(struct pipe_context *pipe,
SB_BEGIN_3D(so, VERTEX_TWO_SIDE_ENABLE, 1);
SB_DATA (so, cso->light_twoside);
+ SB_BEGIN_3D(so, FRAG_COLOR_CLAMP_EN, 1);
+ SB_DATA (so, cso->clamp_fragment_color ? 0x11111111 : 0x00000000);
+
SB_BEGIN_3D(so, LINE_WIDTH, 1);
SB_DATA (so, fui(cso->line_width));
SB_BEGIN_3D(so, LINE_SMOOTH_ENABLE, 1);
@@ -258,7 +267,7 @@ nv50_rasterizer_state_create(struct pipe_context *pipe,
SB_DATA (so, fui(cso->offset_units * 2.0f));
}
- assert(so->size < (sizeof(so->state) / sizeof(so->state[0])));
+ assert(so->size <= (sizeof(so->state) / sizeof(so->state[0])));
return (void *)so;
}
@@ -337,7 +346,7 @@ nv50_zsa_state_create(struct pipe_context *pipe,
SB_DATA (so, 0);
}
- assert(so->size < (sizeof(so->state) / sizeof(so->state[0])));
+ assert(so->size <= (sizeof(so->state) / sizeof(so->state[0])));
return (void *)so;
}
diff --git a/src/gallium/drivers/nv50/nv50_state_validate.c b/src/gallium/drivers/nv50/nv50_state_validate.c
index f3d45eb95e0..cdf1a982fcc 100644
--- a/src/gallium/drivers/nv50/nv50_state_validate.c
+++ b/src/gallium/drivers/nv50/nv50_state_validate.c
@@ -309,7 +309,7 @@ static struct state_validate {
{ nv50_fp_linkage_validate, NV50_NEW_FRAGPROG | NV50_NEW_VERTPROG |
NV50_NEW_GMTYPROG },
{ nv50_gp_linkage_validate, NV50_NEW_GMTYPROG | NV50_NEW_VERTPROG },
- { nv50_sprite_coords_validate, NV50_NEW_FRAGPROG | NV50_NEW_RASTERIZER |
+ { nv50_validate_derived_rs, NV50_NEW_FRAGPROG | NV50_NEW_RASTERIZER |
NV50_NEW_VERTPROG | NV50_NEW_GMTYPROG },
{ nv50_constbufs_validate, NV50_NEW_CONSTBUF },
{ nv50_validate_textures, NV50_NEW_TEXTURES },
diff --git a/src/gallium/drivers/nv50/nv50_stateobj.h b/src/gallium/drivers/nv50/nv50_stateobj.h
index 515e3e78d42..4c98c7e46fc 100644
--- a/src/gallium/drivers/nv50/nv50_stateobj.h
+++ b/src/gallium/drivers/nv50/nv50_stateobj.h
@@ -21,13 +21,13 @@
struct nv50_blend_stateobj {
struct pipe_blend_state pipe;
int size;
- uint32_t state[78];
+ uint32_t state[82]; // TODO: allocate less if !independent_blend_enable
};
struct nv50_rasterizer_stateobj {
struct pipe_rasterizer_state pipe;
int size;
- uint32_t state[40];
+ uint32_t state[42];
};
struct nv50_zsa_stateobj {
diff --git a/src/gallium/drivers/nv50/nv50_surface.c b/src/gallium/drivers/nv50/nv50_surface.c
index dc9e2880f0f..3d7e880ccce 100644
--- a/src/gallium/drivers/nv50/nv50_surface.c
+++ b/src/gallium/drivers/nv50/nv50_surface.c
@@ -27,6 +27,7 @@
#include "util/u_inlines.h"
#include "util/u_pack_color.h"
#include "util/u_format.h"
+#include "util/u_surface.h"
#include "nv50_context.h"
#include "nv50_resource.h"
@@ -198,6 +199,13 @@ nv50_resource_copy_region(struct pipe_context *pipe,
int ret;
unsigned dst_layer = dstz, src_layer = src_box->z;
+ /* Fallback for buffers. */
+ if (dst->target == PIPE_BUFFER && src->target == PIPE_BUFFER) {
+ util_resource_copy_region(pipe, dst, dst_level, dstx, dsty, dstz,
+ src, src_level, src_box);
+ return;
+ }
+
assert((src->format == dst->format) ||
(nv50_2d_format_faithful(src->format) &&
nv50_2d_format_faithful(dst->format)));
diff --git a/src/gallium/drivers/nv50/nv50_tgsi_to_nc.c b/src/gallium/drivers/nv50/nv50_tgsi_to_nc.c
index 1449cb04c69..25dcaaea14f 100644
--- a/src/gallium/drivers/nv50/nv50_tgsi_to_nc.c
+++ b/src/gallium/drivers/nv50/nv50_tgsi_to_nc.c
@@ -20,8 +20,6 @@
* SOFTWARE.
*/
-/* #define NV50_TGSI2NC_DEBUG */
-
#include <unistd.h>
#include "nv50_context.h"
@@ -213,7 +211,7 @@ static INLINE void
bld_warn_uninitialized(struct bld_context *bld, int kind,
struct bld_value_stack *stk, struct nv_basic_block *b)
{
-#ifdef NV50_TGSI2NC_DEBUG
+#if NV50_DEBUG & NV50_DEBUG_PROG_IR
long i = (stk - &bld->tvs[0][0]) / 4;
long c = (stk - &bld->tvs[0][0]) & 3;
@@ -273,6 +271,12 @@ fetch_by_bb(struct bld_value_stack *stack,
fetch_by_bb(stack, vals, n, b->in[i]);
}
+static INLINE boolean
+nvbb_is_terminated(struct nv_basic_block *bb)
+{
+ return bb->exit && bb->exit->is_terminator;
+}
+
static INLINE struct nv_value *
bld_load_imm_u32(struct bld_context *bld, uint32_t u);
@@ -1556,7 +1560,7 @@ bld_instruction(struct bld_context *bld,
int c;
uint opcode = translate_opcode(insn->Instruction.Opcode);
-#ifdef NV50_TGSI2NC_DEBUG
+#if NV50_DEBUG & NV50_DEBUG_PROG_IR
debug_printf("bld_instruction:"); tgsi_dump_instruction(insn, 1);
#endif
@@ -1727,8 +1731,7 @@ bld_instruction(struct bld_context *bld,
{
struct nv_basic_block *b = new_basic_block(bld->pc);
- if (bld->pc->current_block->exit &&
- !bld->pc->current_block->exit->is_terminator)
+ if (!nvbb_is_terminated(bld->pc->current_block))
bld_flow(bld, NV_OP_BRA, NV_CC_TR, NULL, b, FALSE);
--bld->cond_lvl;
@@ -1800,7 +1803,8 @@ bld_instruction(struct bld_context *bld,
{
struct nv_basic_block *bb = bld->loop_bb[bld->loop_lvl - 1];
- bld_flow(bld, NV_OP_BRA, NV_CC_TR, NULL, bb, FALSE);
+ if (!nvbb_is_terminated(bld->pc->current_block))
+ bld_flow(bld, NV_OP_BRA, NV_CC_TR, NULL, bb, FALSE);
nvbb_attach_block(bld->pc->current_block, bb, CFG_EDGE_BACK);