summaryrefslogtreecommitdiffstats
path: root/src/gallium/drivers
diff options
context:
space:
mode:
Diffstat (limited to 'src/gallium/drivers')
-rw-r--r--src/gallium/drivers/freedreno/.gitignore1
-rw-r--r--src/gallium/drivers/freedreno/Makefile.sources1
-rw-r--r--src/gallium/drivers/freedreno/a3xx/fd3_context.c4
-rw-r--r--src/gallium/drivers/freedreno/a3xx/fd3_emit.c3
-rw-r--r--src/gallium/drivers/freedreno/a4xx/fd4_context.c4
-rw-r--r--src/gallium/drivers/freedreno/a4xx/fd4_emit.c3
-rw-r--r--src/gallium/drivers/freedreno/freedreno_context.h2
-rw-r--r--src/gallium/drivers/freedreno/freedreno_screen.c9
-rw-r--r--src/gallium/drivers/freedreno/ir3/ir3_cmdline.c15
-rw-r--r--src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c122
-rw-r--r--src/gallium/drivers/freedreno/ir3/ir3_nir.c153
-rw-r--r--src/gallium/drivers/freedreno/ir3/ir3_nir.h7
-rw-r--r--src/gallium/drivers/freedreno/ir3/ir3_shader.c24
-rw-r--r--src/gallium/drivers/freedreno/ir3/ir3_shader.h4
-rw-r--r--src/gallium/drivers/i915/i915_context.h1
-rw-r--r--src/gallium/drivers/i915/i915_screen.c7
-rw-r--r--src/gallium/drivers/i915/i915_state.c3
-rw-r--r--src/gallium/drivers/i915/i915_state_derived.c17
-rw-r--r--src/gallium/drivers/ilo/core/ilo_builder.c8
-rw-r--r--src/gallium/drivers/ilo/ilo_context.c5
-rw-r--r--src/gallium/drivers/ilo/ilo_gpgpu.c2
-rw-r--r--src/gallium/drivers/ilo/ilo_screen.c7
-rw-r--r--src/gallium/drivers/ilo/ilo_state.c6
-rw-r--r--src/gallium/drivers/ilo/shader/ilo_shader_fs.c16
-rw-r--r--src/gallium/drivers/ilo/shader/ilo_shader_vs.c6
-rw-r--r--src/gallium/drivers/ilo/shader/toy_legalize_ra.c4
-rw-r--r--src/gallium/drivers/ilo/shader/toy_tgsi.c8
-rw-r--r--src/gallium/drivers/llvmpipe/lp_bld_interp.h3
-rw-r--r--src/gallium/drivers/llvmpipe/lp_context.h18
-rw-r--r--src/gallium/drivers/llvmpipe/lp_rast.h2
-rw-r--r--src/gallium/drivers/llvmpipe/lp_rast_tri.c290
-rw-r--r--src/gallium/drivers/llvmpipe/lp_rast_tri_tmp.h4
-rw-r--r--src/gallium/drivers/llvmpipe/lp_screen.c7
-rw-r--r--src/gallium/drivers/llvmpipe/lp_setup.c5
-rw-r--r--src/gallium/drivers/llvmpipe/lp_setup_context.h8
-rw-r--r--src/gallium/drivers/llvmpipe/lp_setup_line.c16
-rw-r--r--src/gallium/drivers/llvmpipe/lp_setup_tri.c228
-rw-r--r--src/gallium/drivers/llvmpipe/lp_state_derived.c113
-rw-r--r--src/gallium/drivers/llvmpipe/lp_state_fs.c35
-rw-r--r--src/gallium/drivers/llvmpipe/lp_state_setup.c19
-rw-r--r--src/gallium/drivers/llvmpipe/lp_state_setup.h8
-rw-r--r--src/gallium/drivers/llvmpipe/lp_test_blend.c2
-rw-r--r--src/gallium/drivers/llvmpipe/lp_test_conv.c2
-rw-r--r--src/gallium/drivers/nouveau/codegen/nv50_ir.h3
-rw-r--r--src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h4
-rw-r--r--src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp1
-rw-r--r--src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp5
-rw-r--r--src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp46
-rw-r--r--src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp11
-rw-r--r--src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp21
-rw-r--r--src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp3
-rw-r--r--src/gallium/drivers/nouveau/nouveau_compiler.c2
-rw-r--r--src/gallium/drivers/nouveau/nouveau_vp3_video.h17
-rw-r--r--src/gallium/drivers/nouveau/nouveau_vp3_video_bsp.c91
-rw-r--r--src/gallium/drivers/nouveau/nv30/nv30_draw.c15
-rw-r--r--src/gallium/drivers/nouveau/nv30/nv30_screen.c9
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_context.h3
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_program.c6
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_screen.c8
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_state.c25
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_vbo.c44
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv98_video_bsp.c9
-rw-r--r--src/gallium/drivers/nouveau/nvc0/mme/com9097.mme234
-rw-r--r--src/gallium/drivers/nouveau/nvc0/mme/com9097.mme.h183
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_context.h3
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_macros.h4
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_program.c11
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_program.h1
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_query_hw.c3
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_screen.c10
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_shader_state.c1
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_state.c36
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c143
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_video.c51
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_video.h18
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_video_bsp.c126
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_winsys.h1
-rw-r--r--src/gallium/drivers/r300/compiler/r500_fragprog.c2
-rw-r--r--src/gallium/drivers/r300/r300_context.c4
-rw-r--r--src/gallium/drivers/r300/r300_render.c2
-rw-r--r--src/gallium/drivers/r300/r300_render_translate.c6
-rw-r--r--src/gallium/drivers/r300/r300_screen.c9
-rw-r--r--src/gallium/drivers/r300/r300_screen_buffer.c2
-rw-r--r--src/gallium/drivers/r300/r300_state_derived.c24
-rw-r--r--src/gallium/drivers/r600/evergreen_compute.c2
-rw-r--r--src/gallium/drivers/r600/evergreen_state.c2
-rw-r--r--src/gallium/drivers/r600/r600_llvm.c9
-rw-r--r--src/gallium/drivers/r600/r600_llvm.h4
-rw-r--r--src/gallium/drivers/r600/r600_pipe.c8
-rw-r--r--src/gallium/drivers/r600/r600_pipe.h1
-rw-r--r--src/gallium/drivers/r600/r600_shader.c29
-rw-r--r--src/gallium/drivers/r600/r600_state.c2
-rw-r--r--src/gallium/drivers/r600/r600_state_common.c8
-rw-r--r--src/gallium/drivers/radeon/r600_buffer_common.c2
-rw-r--r--src/gallium/drivers/radeon/r600_pipe_common.c26
-rw-r--r--src/gallium/drivers/radeon/r600_pipe_common.h4
-rw-r--r--src/gallium/drivers/radeon/radeon_llvm_emit.c75
-rw-r--r--src/gallium/drivers/radeon/radeon_llvm_emit.h6
-rw-r--r--src/gallium/drivers/radeonsi/cik_sdma.c2
-rw-r--r--src/gallium/drivers/radeonsi/si_compute.c44
-rw-r--r--src/gallium/drivers/radeonsi/si_descriptors.c14
-rw-r--r--src/gallium/drivers/radeonsi/si_pipe.c9
-rw-r--r--src/gallium/drivers/radeonsi/si_shader.c670
-rw-r--r--src/gallium/drivers/radeonsi/si_shader.h69
-rw-r--r--src/gallium/drivers/radeonsi/si_state_draw.c8
-rw-r--r--src/gallium/drivers/radeonsi/si_state_shaders.c133
-rw-r--r--src/gallium/drivers/softpipe/sp_context.h9
-rw-r--r--src/gallium/drivers/softpipe/sp_prim_vbuf.c4
-rw-r--r--src/gallium/drivers/softpipe/sp_query.c2
-rw-r--r--src/gallium/drivers/softpipe/sp_screen.c7
-rw-r--r--src/gallium/drivers/softpipe/sp_setup.c52
-rw-r--r--src/gallium/drivers/softpipe/sp_setup.h27
-rw-r--r--src/gallium/drivers/softpipe/sp_state.h3
-rw-r--r--src/gallium/drivers/softpipe/sp_state_derived.c195
-rw-r--r--src/gallium/drivers/softpipe/sp_state_shader.c3
-rw-r--r--src/gallium/drivers/svga/svga_cmd.c13
-rw-r--r--src/gallium/drivers/svga/svga_cmd_vgpu10.c10
-rw-r--r--src/gallium/drivers/svga/svga_context.c5
-rw-r--r--src/gallium/drivers/svga/svga_context.h9
-rw-r--r--src/gallium/drivers/svga/svga_draw.c26
-rw-r--r--src/gallium/drivers/svga/svga_pipe_rasterizer.c11
-rw-r--r--src/gallium/drivers/svga/svga_pipe_sampler.c16
-rw-r--r--src/gallium/drivers/svga/svga_resource_buffer_upload.c4
-rw-r--r--src/gallium/drivers/svga/svga_screen.c10
-rw-r--r--src/gallium/drivers/svga/svga_state_constants.c3
-rw-r--r--src/gallium/drivers/svga/svga_state_fs.c1
-rw-r--r--src/gallium/drivers/svga/svga_state_vs.c3
-rw-r--r--src/gallium/drivers/svga/svga_swtnl_state.c10
-rw-r--r--src/gallium/drivers/svga/svga_tgsi_insn.c15
-rw-r--r--src/gallium/drivers/svga/svga_tgsi_vgpu10.c10
-rw-r--r--src/gallium/drivers/svga/svga_winsys.h2
-rw-r--r--src/gallium/drivers/vc4/vc4_context.c5
-rw-r--r--src/gallium/drivers/vc4/vc4_draw.c2
-rw-r--r--src/gallium/drivers/vc4/vc4_opt_algebraic.c37
-rw-r--r--src/gallium/drivers/vc4/vc4_program.c304
-rw-r--r--src/gallium/drivers/vc4/vc4_qir.c52
-rw-r--r--src/gallium/drivers/vc4/vc4_qir.h55
-rw-r--r--src/gallium/drivers/vc4/vc4_qir_schedule.c7
-rw-r--r--src/gallium/drivers/vc4/vc4_qpu_emit.c49
-rw-r--r--src/gallium/drivers/vc4/vc4_resource.c2
-rw-r--r--src/gallium/drivers/vc4/vc4_screen.c29
-rw-r--r--src/gallium/drivers/virgl/virgl_context.c6
-rw-r--r--src/gallium/drivers/virgl/virgl_screen.c7
143 files changed, 2957 insertions, 1574 deletions
diff --git a/src/gallium/drivers/freedreno/.gitignore b/src/gallium/drivers/freedreno/.gitignore
new file mode 100644
index 00000000000..150f5d19f5b
--- /dev/null
+++ b/src/gallium/drivers/freedreno/.gitignore
@@ -0,0 +1 @@
+ir3_compiler
diff --git a/src/gallium/drivers/freedreno/Makefile.sources b/src/gallium/drivers/freedreno/Makefile.sources
index baae9144005..74ef4168655 100644
--- a/src/gallium/drivers/freedreno/Makefile.sources
+++ b/src/gallium/drivers/freedreno/Makefile.sources
@@ -128,6 +128,7 @@ ir3_SOURCES := \
ir3/ir3_group.c \
ir3/ir3.h \
ir3/ir3_legalize.c \
+ ir3/ir3_nir.c \
ir3/ir3_nir.h \
ir3/ir3_nir_lower_if_else.c \
ir3/ir3_print.c \
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_context.c b/src/gallium/drivers/freedreno/a3xx/fd3_context.c
index 74cbbf2edd8..e47bbff5643 100644
--- a/src/gallium/drivers/freedreno/a3xx/fd3_context.c
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_context.c
@@ -171,8 +171,8 @@ fd3_context_create(struct pipe_screen *pscreen, void *priv, unsigned flags)
fd3_query_context_init(pctx);
- fd3_ctx->border_color_uploader = u_upload_create(pctx, 4096,
- 2 * PIPE_MAX_SAMPLERS * BORDERCOLOR_SIZE, 0);
+ fd3_ctx->border_color_uploader = u_upload_create(pctx, 4096, 0,
+ PIPE_USAGE_STREAM);
return pctx;
}
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_emit.c b/src/gallium/drivers/freedreno/a3xx/fd3_emit.c
index 24afbc9e956..e65a352e7f6 100644
--- a/src/gallium/drivers/freedreno/a3xx/fd3_emit.c
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_emit.c
@@ -145,7 +145,8 @@ emit_textures(struct fd_context *ctx, struct fd_ringbuffer *ring,
void *ptr;
u_upload_alloc(fd3_ctx->border_color_uploader,
- 0, 2 * PIPE_MAX_SAMPLERS * BORDERCOLOR_SIZE, &off,
+ 0, BORDER_COLOR_UPLOAD_SIZE,
+ BORDER_COLOR_UPLOAD_SIZE, &off,
&fd3_ctx->border_color_buf,
&ptr);
diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_context.c b/src/gallium/drivers/freedreno/a4xx/fd4_context.c
index e53e0c56c9a..7d6365bbb6d 100644
--- a/src/gallium/drivers/freedreno/a4xx/fd4_context.c
+++ b/src/gallium/drivers/freedreno/a4xx/fd4_context.c
@@ -171,8 +171,8 @@ fd4_context_create(struct pipe_screen *pscreen, void *priv, unsigned flags)
fd4_query_context_init(pctx);
- fd4_ctx->border_color_uploader = u_upload_create(pctx, 4096,
- 2 * PIPE_MAX_SAMPLERS * BORDERCOLOR_SIZE, 0);
+ fd4_ctx->border_color_uploader = u_upload_create(pctx, 4096, 0,
+ PIPE_USAGE_STREAM);
return pctx;
}
diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_emit.c b/src/gallium/drivers/freedreno/a4xx/fd4_emit.c
index b9a28149722..bc62a5d9a4b 100644
--- a/src/gallium/drivers/freedreno/a4xx/fd4_emit.c
+++ b/src/gallium/drivers/freedreno/a4xx/fd4_emit.c
@@ -133,7 +133,8 @@ emit_textures(struct fd_context *ctx, struct fd_ringbuffer *ring,
void *ptr;
u_upload_alloc(fd4_ctx->border_color_uploader,
- 0, 2 * PIPE_MAX_SAMPLERS * BORDERCOLOR_SIZE, &off,
+ 0, BORDER_COLOR_UPLOAD_SIZE,
+ BORDER_COLOR_UPLOAD_SIZE, &off,
&fd4_ctx->border_color_buf,
&ptr);
diff --git a/src/gallium/drivers/freedreno/freedreno_context.h b/src/gallium/drivers/freedreno/freedreno_context.h
index 571c8142bf7..418b71b95de 100644
--- a/src/gallium/drivers/freedreno/freedreno_context.h
+++ b/src/gallium/drivers/freedreno/freedreno_context.h
@@ -40,6 +40,8 @@
#include "freedreno_gmem.h"
#include "freedreno_util.h"
+#define BORDER_COLOR_UPLOAD_SIZE (2 * PIPE_MAX_SAMPLERS * BORDERCOLOR_SIZE)
+
struct fd_vertex_stateobj;
struct fd_texture_stateobj {
diff --git a/src/gallium/drivers/freedreno/freedreno_screen.c b/src/gallium/drivers/freedreno/freedreno_screen.c
index 5bbe4016a2a..9d0cdd8e545 100644
--- a/src/gallium/drivers/freedreno/freedreno_screen.c
+++ b/src/gallium/drivers/freedreno/freedreno_screen.c
@@ -226,6 +226,8 @@ fd_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
case PIPE_CAP_TEXTURE_GATHER_OFFSETS:
case PIPE_CAP_TGSI_VS_WINDOW_SPACE_POSITION:
case PIPE_CAP_DRAW_INDIRECT:
+ case PIPE_CAP_MULTI_DRAW_INDIRECT:
+ case PIPE_CAP_MULTI_DRAW_INDIRECT_PARAMS:
case PIPE_CAP_TGSI_FS_FINE_DERIVATIVE:
case PIPE_CAP_POLYGON_OFFSET_CLAMP:
case PIPE_CAP_MULTISAMPLE_Z_RESOLVE:
@@ -238,6 +240,11 @@ fd_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
case PIPE_CAP_SHAREABLE_SHADERS:
case PIPE_CAP_COPY_BETWEEN_COMPRESSED_AND_PLAIN_FORMATS:
case PIPE_CAP_CLEAR_TEXTURE:
+ case PIPE_CAP_DRAW_PARAMETERS:
+ case PIPE_CAP_TGSI_PACK_HALF_FLOAT:
+ case PIPE_CAP_TGSI_FS_POSITION_IS_SYSVAL:
+ case PIPE_CAP_TGSI_FS_FACE_IS_INTEGER_SYSVAL:
+ case PIPE_CAP_SHADER_BUFFER_OFFSET_ALIGNMENT:
return 0;
case PIPE_CAP_MAX_VIEWPORTS:
@@ -414,6 +421,8 @@ fd_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader,
return PIPE_SHADER_IR_TGSI;
case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT:
return 32;
+ case PIPE_SHADER_CAP_MAX_SHADER_BUFFERS:
+ return 0;
}
debug_printf("unknown shader param %d\n", param);
return 0;
diff --git a/src/gallium/drivers/freedreno/ir3/ir3_cmdline.c b/src/gallium/drivers/freedreno/ir3/ir3_cmdline.c
index d55daeefe06..481859efb17 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3_cmdline.c
+++ b/src/gallium/drivers/freedreno/ir3/ir3_cmdline.c
@@ -40,6 +40,7 @@
#include "freedreno_util.h"
#include "ir3_compiler.h"
+#include "ir3_nir.h"
#include "instr-a3xx.h"
#include "ir3.h"
@@ -105,10 +106,10 @@ int main(int argc, char **argv)
const char *filename;
struct tgsi_token toks[65536];
struct tgsi_parse_context parse;
- struct ir3_compiler *compiler;
struct ir3_shader_variant v;
struct ir3_shader s;
struct ir3_shader_key key = {};
+ /* TODO cmdline option to target different gpus: */
unsigned gpu_id = 320;
const char *info;
void *ptr;
@@ -228,7 +229,12 @@ int main(int argc, char **argv)
if (!tgsi_text_translate(ptr, toks, Elements(toks)))
errx(1, "could not parse `%s'", filename);
- s.tokens = toks;
+ if (fd_mesa_debug & FD_DBG_OPTMSGS)
+ tgsi_dump(toks, 0);
+
+ nir_shader *nir = ir3_tgsi_to_nir(toks);
+ s.compiler = ir3_compiler_create(gpu_id);
+ s.nir = ir3_optimize_nir(&s, nir, NULL);
v.key = key;
v.shader = &s;
@@ -246,11 +252,8 @@ int main(int argc, char **argv)
break;
}
- /* TODO cmdline option to target different gpus: */
- compiler = ir3_compiler_create(gpu_id);
-
info = "NIR compiler";
- ret = ir3_compile_shader_nir(compiler, &v);
+ ret = ir3_compile_shader_nir(s.compiler, &v);
if (ret) {
fprintf(stderr, "compiler failed!\n");
return ret;
diff --git a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c
index 224f7806b3c..86afda4ba08 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c
+++ b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c
@@ -32,10 +32,6 @@
#include "util/u_string.h"
#include "util/u_memory.h"
#include "util/u_inlines.h"
-#include "tgsi/tgsi_lowering.h"
-#include "tgsi/tgsi_strings.h"
-
-#include "nir/tgsi_to_nir.h"
#include "freedreno_util.h"
@@ -123,97 +119,10 @@ struct ir3_compile {
static struct ir3_instruction * create_immed(struct ir3_block *block, uint32_t val);
static struct ir3_block * get_block(struct ir3_compile *ctx, nir_block *nblock);
-static struct nir_shader *to_nir(struct ir3_compile *ctx,
- const struct tgsi_token *tokens, struct ir3_shader_variant *so)
-{
- static const nir_shader_compiler_options options = {
- .lower_fpow = true,
- .lower_fsat = true,
- .lower_scmp = true,
- .lower_flrp = true,
- .lower_ffract = true,
- .native_integers = true,
- };
- struct nir_lower_tex_options tex_options = {
- .lower_rect = 0,
- };
- bool progress;
-
- switch (so->type) {
- case SHADER_FRAGMENT:
- case SHADER_COMPUTE:
- tex_options.saturate_s = so->key.fsaturate_s;
- tex_options.saturate_t = so->key.fsaturate_t;
- tex_options.saturate_r = so->key.fsaturate_r;
- break;
- case SHADER_VERTEX:
- tex_options.saturate_s = so->key.vsaturate_s;
- tex_options.saturate_t = so->key.vsaturate_t;
- tex_options.saturate_r = so->key.vsaturate_r;
- break;
- }
-
- if (ctx->compiler->gpu_id >= 400) {
- /* a4xx seems to have *no* sam.p */
- tex_options.lower_txp = ~0; /* lower all txp */
- } else {
- /* a3xx just needs to avoid sam.p for 3d tex */
- tex_options.lower_txp = (1 << GLSL_SAMPLER_DIM_3D);
- }
-
- struct nir_shader *s = tgsi_to_nir(tokens, &options);
-
- if (fd_mesa_debug & FD_DBG_DISASM) {
- debug_printf("----------------------\n");
- nir_print_shader(s, stdout);
- debug_printf("----------------------\n");
- }
-
- nir_opt_global_to_local(s);
- nir_convert_to_ssa(s);
- if (s->stage == MESA_SHADER_VERTEX) {
- nir_lower_clip_vs(s, so->key.ucp_enables);
- } else if (s->stage == MESA_SHADER_FRAGMENT) {
- nir_lower_clip_fs(s, so->key.ucp_enables);
- }
- nir_lower_tex(s, &tex_options);
- if (so->key.color_two_side)
- nir_lower_two_sided_color(s);
- nir_lower_idiv(s);
- nir_lower_load_const_to_scalar(s);
-
- do {
- progress = false;
-
- nir_lower_vars_to_ssa(s);
- nir_lower_alu_to_scalar(s);
- nir_lower_phis_to_scalar(s);
-
- progress |= nir_copy_prop(s);
- progress |= nir_opt_dce(s);
- progress |= nir_opt_cse(s);
- progress |= ir3_nir_lower_if_else(s);
- progress |= nir_opt_algebraic(s);
- progress |= nir_opt_constant_folding(s);
-
- } while (progress);
-
- nir_remove_dead_variables(s);
- nir_validate_shader(s);
-
- if (fd_mesa_debug & FD_DBG_DISASM) {
- debug_printf("----------------------\n");
- nir_print_shader(s, stdout);
- debug_printf("----------------------\n");
- }
-
- return s;
-}
static struct ir3_compile *
compile_init(struct ir3_compiler *compiler,
- struct ir3_shader_variant *so,
- const struct tgsi_token *tokens)
+ struct ir3_shader_variant *so)
{
struct ir3_compile *ctx = rzalloc(NULL, struct ir3_compile);
@@ -239,7 +148,28 @@ compile_init(struct ir3_compiler *compiler,
ctx->block_ht = _mesa_hash_table_create(ctx,
_mesa_hash_pointer, _mesa_key_pointer_equal);
- ctx->s = to_nir(ctx, tokens, so);
+ /* TODO: maybe generate some sort of bitmask of what key
+ * lowers vs what shader has (ie. no need to lower
+ * texture clamp lowering if no texture sample instrs)..
+ * although should be done further up the stack to avoid
+ * creating duplicate variants..
+ */
+
+ if (ir3_key_lowers_nir(&so->key)) {
+ nir_shader *s = nir_shader_clone(ctx, so->shader->nir);
+ ctx->s = ir3_optimize_nir(so->shader, s, &so->key);
+ } else {
+ /* fast-path for shader key that lowers nothing in NIR: */
+ ctx->s = so->shader->nir;
+ }
+
+ if (fd_mesa_debug & FD_DBG_DISASM) {
+ DBG("dump nir%dv%d: type=%d, k={bp=%u,cts=%u,hp=%u}",
+ so->shader->id, so->id, so->type,
+ so->key.binning_pass, so->key.color_two_side,
+ so->key.half_precision);
+ nir_print_shader(ctx->s, stdout);
+ }
so->first_driver_param = so->first_immediate = ctx->s->num_uniforms;
@@ -1954,8 +1884,6 @@ emit_instr(struct ir3_compile *ctx, nir_instr *instr)
case nir_texop_query_levels:
emit_tex_query_levels(ctx, tex);
break;
- case nir_texop_samples_identical:
- unreachable("nir_texop_samples_identical");
default:
emit_tex(ctx, tex);
break;
@@ -2170,6 +2098,8 @@ emit_stream_out(struct ir3_compile *ctx)
static void
emit_function(struct ir3_compile *ctx, nir_function_impl *impl)
{
+ nir_metadata_require(impl, nir_metadata_block_index);
+
emit_cf_list(ctx, &impl->body);
emit_block(ctx, impl->end_block);
@@ -2499,7 +2429,7 @@ ir3_compile_shader_nir(struct ir3_compiler *compiler,
assert(!so->ir);
- ctx = compile_init(compiler, so, so->shader->tokens);
+ ctx = compile_init(compiler, so);
if (!ctx) {
DBG("INIT failed!");
ret = -1;
diff --git a/src/gallium/drivers/freedreno/ir3/ir3_nir.c b/src/gallium/drivers/freedreno/ir3/ir3_nir.c
new file mode 100644
index 00000000000..565b9c32c1d
--- /dev/null
+++ b/src/gallium/drivers/freedreno/ir3/ir3_nir.c
@@ -0,0 +1,153 @@
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+
+/*
+ * Copyright (C) 2015 Rob Clark <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ * Rob Clark <[email protected]>
+ */
+
+
+#include "freedreno_util.h"
+
+#include "ir3_nir.h"
+#include "ir3_compiler.h"
+#include "ir3_shader.h"
+
+#include "nir/tgsi_to_nir.h"
+
+struct nir_shader *
+ir3_tgsi_to_nir(const struct tgsi_token *tokens)
+{
+ static const nir_shader_compiler_options options = {
+ .lower_fpow = true,
+ .lower_fsat = true,
+ .lower_scmp = true,
+ .lower_flrp = true,
+ .lower_ffract = true,
+ .native_integers = true,
+ };
+ return tgsi_to_nir(tokens, &options);
+}
+
+/* for given shader key, are any steps handled in nir? */
+bool
+ir3_key_lowers_nir(const struct ir3_shader_key *key)
+{
+ return key->fsaturate_s | key->fsaturate_t | key->fsaturate_r |
+ key->vsaturate_s | key->vsaturate_t | key->vsaturate_r |
+ key->ucp_enables | key->color_two_side;
+}
+
+#define OPT(nir, pass, ...) ({ \
+ bool this_progress = false; \
+ NIR_PASS(this_progress, nir, pass, ##__VA_ARGS__); \
+ this_progress; \
+})
+
+#define OPT_V(nir, pass, ...) NIR_PASS_V(nir, pass, ##__VA_ARGS__)
+
+struct nir_shader *
+ir3_optimize_nir(struct ir3_shader *shader, nir_shader *s,
+ const struct ir3_shader_key *key)
+{
+ struct nir_lower_tex_options tex_options = {
+ .lower_rect = 0,
+ };
+ bool progress;
+
+ if (key) {
+ switch (shader->type) {
+ case SHADER_FRAGMENT:
+ case SHADER_COMPUTE:
+ tex_options.saturate_s = key->fsaturate_s;
+ tex_options.saturate_t = key->fsaturate_t;
+ tex_options.saturate_r = key->fsaturate_r;
+ break;
+ case SHADER_VERTEX:
+ tex_options.saturate_s = key->vsaturate_s;
+ tex_options.saturate_t = key->vsaturate_t;
+ tex_options.saturate_r = key->vsaturate_r;
+ break;
+ }
+ }
+
+ if (shader->compiler->gpu_id >= 400) {
+ /* a4xx seems to have *no* sam.p */
+ tex_options.lower_txp = ~0; /* lower all txp */
+ } else {
+ /* a3xx just needs to avoid sam.p for 3d tex */
+ tex_options.lower_txp = (1 << GLSL_SAMPLER_DIM_3D);
+ }
+
+ if (fd_mesa_debug & FD_DBG_DISASM) {
+ debug_printf("----------------------\n");
+ nir_print_shader(s, stdout);
+ debug_printf("----------------------\n");
+ }
+
+ OPT_V(s, nir_opt_global_to_local);
+ OPT_V(s, nir_convert_to_ssa);
+
+ if (key) {
+ if (s->stage == MESA_SHADER_VERTEX) {
+ OPT_V(s, nir_lower_clip_vs, key->ucp_enables);
+ } else if (s->stage == MESA_SHADER_FRAGMENT) {
+ OPT_V(s, nir_lower_clip_fs, key->ucp_enables);
+ }
+ if (key->color_two_side) {
+ OPT_V(s, nir_lower_two_sided_color);
+ }
+ }
+
+ OPT_V(s, nir_lower_tex, &tex_options);
+ OPT_V(s, nir_lower_idiv);
+ OPT_V(s, nir_lower_load_const_to_scalar);
+
+ do {
+ progress = false;
+
+ OPT_V(s, nir_lower_vars_to_ssa);
+ OPT_V(s, nir_lower_alu_to_scalar);
+ OPT_V(s, nir_lower_phis_to_scalar);
+
+ progress |= OPT(s, nir_copy_prop);
+ progress |= OPT(s, nir_opt_dce);
+ progress |= OPT(s, nir_opt_cse);
+ progress |= OPT(s, ir3_nir_lower_if_else);
+ progress |= OPT(s, nir_opt_algebraic);
+ progress |= OPT(s, nir_opt_constant_folding);
+
+ } while (progress);
+
+ OPT_V(s, nir_remove_dead_variables);
+
+ if (fd_mesa_debug & FD_DBG_DISASM) {
+ debug_printf("----------------------\n");
+ nir_print_shader(s, stdout);
+ debug_printf("----------------------\n");
+ }
+
+ nir_sweep(s);
+
+ return s;
+}
diff --git a/src/gallium/drivers/freedreno/ir3/ir3_nir.h b/src/gallium/drivers/freedreno/ir3/ir3_nir.h
index 9950782dc38..534199d3744 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3_nir.h
+++ b/src/gallium/drivers/freedreno/ir3/ir3_nir.h
@@ -32,6 +32,13 @@
#include "glsl/nir/nir.h"
#include "glsl/nir/shader_enums.h"
+#include "ir3_shader.h"
+
bool ir3_nir_lower_if_else(nir_shader *shader);
+struct nir_shader * ir3_tgsi_to_nir(const struct tgsi_token *tokens);
+bool ir3_key_lowers_nir(const struct ir3_shader_key *key);
+struct nir_shader * ir3_optimize_nir(struct ir3_shader *shader, nir_shader *s,
+ const struct ir3_shader_key *key);
+
#endif /* IR3_NIR_H_ */
diff --git a/src/gallium/drivers/freedreno/ir3/ir3_shader.c b/src/gallium/drivers/freedreno/ir3/ir3_shader.c
index 7b565332256..7d17f426ad3 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3_shader.c
+++ b/src/gallium/drivers/freedreno/ir3/ir3_shader.c
@@ -39,7 +39,7 @@
#include "ir3_shader.h"
#include "ir3_compiler.h"
-
+#include "ir3_nir.h"
static void
delete_variant(struct ir3_shader_variant *v)
@@ -187,12 +187,6 @@ create_variant(struct ir3_shader *shader, struct ir3_shader_key key)
v->key = key;
v->type = shader->type;
- if (fd_mesa_debug & FD_DBG_DISASM) {
- DBG("dump tgsi: type=%d, k={bp=%u,cts=%u,hp=%u}", shader->type,
- key.binning_pass, key.color_two_side, key.half_precision);
- tgsi_dump(shader->tokens, 0);
- }
-
ret = ir3_compile_shader_nir(shader->compiler, v);
if (ret) {
debug_error("compile failed!");
@@ -267,7 +261,7 @@ ir3_shader_destroy(struct ir3_shader *shader)
v = v->next;
delete_variant(t);
}
- free((void *)shader->tokens);
+ ralloc_free(shader->nir);
free(shader);
}
@@ -281,14 +275,24 @@ ir3_shader_create(struct pipe_context *pctx,
shader->id = ++shader->compiler->shader_count;
shader->pctx = pctx;
shader->type = type;
- shader->tokens = tgsi_dup_tokens(cso->tokens);
+ if (fd_mesa_debug & FD_DBG_DISASM) {
+ DBG("dump tgsi: type=%d", shader->type);
+ tgsi_dump(cso->tokens, 0);
+ }
+ nir_shader *nir = ir3_tgsi_to_nir(cso->tokens);
+ /* do first pass optimization, ignoring the key: */
+ shader->nir = ir3_optimize_nir(shader, nir, NULL);
+ if (fd_mesa_debug & FD_DBG_DISASM) {
+ DBG("dump nir%d: type=%d", shader->id, shader->type);
+ nir_print_shader(shader->nir, stdout);
+ }
shader->stream_output = cso->stream_output;
if (fd_mesa_debug & FD_DBG_SHADERDB) {
/* if shader-db run, create a standard variant immediately
* (as otherwise nothing will trigger the shader to be
* actually compiled)
*/
- static struct ir3_shader_key key = {};
+ static struct ir3_shader_key key = {0};
ir3_shader_variant(shader, key);
}
return shader;
diff --git a/src/gallium/drivers/freedreno/ir3/ir3_shader.h b/src/gallium/drivers/freedreno/ir3/ir3_shader.h
index cf99a4c05ed..b3c28a41387 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3_shader.h
+++ b/src/gallium/drivers/freedreno/ir3/ir3_shader.h
@@ -230,6 +230,8 @@ struct ir3_shader_variant {
struct ir3_shader *shader;
};
+typedef struct nir_shader nir_shader;
+
struct ir3_shader {
enum shader_t type;
@@ -240,7 +242,7 @@ struct ir3_shader {
struct ir3_compiler *compiler;
struct pipe_context *pctx; /* TODO replace w/ pipe_screen */
- const struct tgsi_token *tokens;
+ nir_shader *nir;
struct pipe_stream_output_info stream_output;
struct ir3_shader_variant *variants;
diff --git a/src/gallium/drivers/i915/i915_context.h b/src/gallium/drivers/i915/i915_context.h
index 1ed685188db..2adaee30fb9 100644
--- a/src/gallium/drivers/i915/i915_context.h
+++ b/src/gallium/drivers/i915/i915_context.h
@@ -195,7 +195,6 @@ struct i915_rasterizer_state {
unsigned light_twoside : 1;
unsigned st;
- enum interp_mode color_interp;
unsigned LIS4;
unsigned LIS7;
diff --git a/src/gallium/drivers/i915/i915_screen.c b/src/gallium/drivers/i915/i915_screen.c
index a5b161882cd..e2a493bc1b5 100644
--- a/src/gallium/drivers/i915/i915_screen.c
+++ b/src/gallium/drivers/i915/i915_screen.c
@@ -254,6 +254,11 @@ i915_get_param(struct pipe_screen *screen, enum pipe_cap cap)
case PIPE_CAP_SHAREABLE_SHADERS:
case PIPE_CAP_COPY_BETWEEN_COMPRESSED_AND_PLAIN_FORMATS:
case PIPE_CAP_CLEAR_TEXTURE:
+ case PIPE_CAP_DRAW_PARAMETERS:
+ case PIPE_CAP_TGSI_PACK_HALF_FLOAT:
+ case PIPE_CAP_TGSI_FS_POSITION_IS_SYSVAL:
+ case PIPE_CAP_TGSI_FS_FACE_IS_INTEGER_SYSVAL:
+ case PIPE_CAP_SHADER_BUFFER_OFFSET_ALIGNMENT:
return 0;
case PIPE_CAP_MAX_DUAL_SOURCE_RENDER_TARGETS:
@@ -264,6 +269,8 @@ i915_get_param(struct pipe_screen *screen, enum pipe_cap cap)
case PIPE_CAP_TGSI_VS_LAYER_VIEWPORT:
case PIPE_CAP_BUFFER_MAP_PERSISTENT_COHERENT:
case PIPE_CAP_DRAW_INDIRECT:
+ case PIPE_CAP_MULTI_DRAW_INDIRECT:
+ case PIPE_CAP_MULTI_DRAW_INDIRECT_PARAMS:
case PIPE_CAP_TGSI_FS_FINE_DERIVATIVE:
case PIPE_CAP_SAMPLER_VIEW_TARGET:
return 0;
diff --git a/src/gallium/drivers/i915/i915_state.c b/src/gallium/drivers/i915/i915_state.c
index 6ba9646f7ab..b54a9fbf4f9 100644
--- a/src/gallium/drivers/i915/i915_state.c
+++ b/src/gallium/drivers/i915/i915_state.c
@@ -423,7 +423,7 @@ i915_prepare_vertex_sampling(struct i915_context *i915)
for (j = view->u.tex.first_level; j <= tex->last_level; j++) {
mip_offsets[j] = i915_texture_offset(i915_tex, j , 0 /* FIXME depth */);
row_stride[j] = i915_tex->stride;
- img_stride[j] = 0; /* FIXME */;
+ img_stride[j] = 0; /* FIXME */
}
draw_set_mapped_texture(i915->draw,
@@ -920,7 +920,6 @@ i915_create_rasterizer_state(struct pipe_context *pipe,
struct i915_rasterizer_state *cso = CALLOC_STRUCT( i915_rasterizer_state );
cso->templ = *rasterizer;
- cso->color_interp = rasterizer->flatshade ? INTERP_CONSTANT : INTERP_LINEAR;
cso->light_twoside = rasterizer->light_twoside;
cso->ds[0].u = _3DSTATE_DEPTH_OFFSET_SCALE;
cso->ds[1].f = rasterizer->offset_scale;
diff --git a/src/gallium/drivers/i915/i915_state_derived.c b/src/gallium/drivers/i915/i915_state_derived.c
index 7ad88a1ce01..bd0f448f645 100644
--- a/src/gallium/drivers/i915/i915_state_derived.c
+++ b/src/gallium/drivers/i915/i915_state_derived.c
@@ -57,7 +57,6 @@ static uint find_mapping(const struct i915_fragment_shader* fs, int unit)
static void calculate_vertex_layout(struct i915_context *i915)
{
const struct i915_fragment_shader *fs = i915->fs;
- const enum interp_mode colorInterp = i915->rasterizer->color_interp;
struct vertex_info vinfo;
boolean texCoords[I915_TEX_UNITS], colors[2], fog, needW, face;
uint i;
@@ -107,12 +106,12 @@ static void calculate_vertex_layout(struct i915_context *i915)
/* pos */
src = draw_find_shader_output(i915->draw, TGSI_SEMANTIC_POSITION, 0);
if (needW) {
- draw_emit_vertex_attr(&vinfo, EMIT_4F, INTERP_LINEAR, src);
+ draw_emit_vertex_attr(&vinfo, EMIT_4F, src);
vinfo.hwfmt[0] |= S4_VFMT_XYZW;
vinfo.attrib[0].emit = EMIT_4F;
}
else {
- draw_emit_vertex_attr(&vinfo, EMIT_3F, INTERP_LINEAR, src);
+ draw_emit_vertex_attr(&vinfo, EMIT_3F, src);
vinfo.hwfmt[0] |= S4_VFMT_XYZ;
vinfo.attrib[0].emit = EMIT_3F;
}
@@ -123,21 +122,21 @@ static void calculate_vertex_layout(struct i915_context *i915)
/* primary color */
if (colors[0]) {
src = draw_find_shader_output(i915->draw, TGSI_SEMANTIC_COLOR, 0);
- draw_emit_vertex_attr(&vinfo, EMIT_4UB_BGRA, colorInterp, src);
+ draw_emit_vertex_attr(&vinfo, EMIT_4UB_BGRA, src);
vinfo.hwfmt[0] |= S4_VFMT_COLOR;
}
/* secondary color */
if (colors[1]) {
src = draw_find_shader_output(i915->draw, TGSI_SEMANTIC_COLOR, 1);
- draw_emit_vertex_attr(&vinfo, EMIT_4UB_BGRA, colorInterp, src);
+ draw_emit_vertex_attr(&vinfo, EMIT_4UB_BGRA, src);
vinfo.hwfmt[0] |= S4_VFMT_SPEC_FOG;
}
/* fog coord, not fog blend factor */
if (fog) {
src = draw_find_shader_output(i915->draw, TGSI_SEMANTIC_FOG, 0);
- draw_emit_vertex_attr(&vinfo, EMIT_1F, INTERP_PERSPECTIVE, src);
+ draw_emit_vertex_attr(&vinfo, EMIT_1F, src);
vinfo.hwfmt[0] |= S4_VFMT_FOG_PARAM;
}
@@ -147,7 +146,7 @@ static void calculate_vertex_layout(struct i915_context *i915)
if (texCoords[i]) {
hwtc = TEXCOORDFMT_4D;
src = draw_find_shader_output(i915->draw, TGSI_SEMANTIC_GENERIC, fs->generic_mapping[i]);
- draw_emit_vertex_attr(&vinfo, EMIT_4F, INTERP_PERSPECTIVE, src);
+ draw_emit_vertex_attr(&vinfo, EMIT_4F, src);
}
else {
hwtc = TEXCOORDFMT_NOT_PRESENT;
@@ -164,7 +163,7 @@ static void calculate_vertex_layout(struct i915_context *i915)
* module by adding an extra shader output.
*/
src = draw_find_shader_output(i915->draw, TGSI_SEMANTIC_FACE, 0);
- draw_emit_vertex_attr(&vinfo, EMIT_1F, INTERP_CONSTANT, src);
+ draw_emit_vertex_attr(&vinfo, EMIT_1F, src);
vinfo.hwfmt[1] &= ~(TEXCOORDFMT_NOT_PRESENT << (slot * 4));
vinfo.hwfmt[1] |= TEXCOORDFMT_1D << (slot * 4);
}
@@ -185,7 +184,7 @@ static void calculate_vertex_layout(struct i915_context *i915)
struct i915_tracked_state i915_update_vertex_layout = {
"vertex_layout",
calculate_vertex_layout,
- I915_NEW_RASTERIZER | I915_NEW_FS | I915_NEW_VS
+ I915_NEW_FS | I915_NEW_VS
};
diff --git a/src/gallium/drivers/ilo/core/ilo_builder.c b/src/gallium/drivers/ilo/core/ilo_builder.c
index 9d5195129b7..079872f4306 100644
--- a/src/gallium/drivers/ilo/core/ilo_builder.c
+++ b/src/gallium/drivers/ilo/core/ilo_builder.c
@@ -333,7 +333,7 @@ ilo_builder_init(struct ilo_builder *builder,
const struct ilo_dev *dev,
struct intel_winsys *winsys)
{
- int i;
+ unsigned i;
assert(ilo_is_zeroed(builder, sizeof(*builder)));
@@ -366,7 +366,7 @@ ilo_builder_init(struct ilo_builder *builder,
void
ilo_builder_reset(struct ilo_builder *builder)
{
- int i;
+ unsigned i;
for (i = 0; i < ILO_BUILDER_WRITER_COUNT; i++)
ilo_builder_writer_reset(builder, i);
@@ -382,7 +382,7 @@ ilo_builder_reset(struct ilo_builder *builder)
bool
ilo_builder_begin(struct ilo_builder *builder)
{
- int i;
+ unsigned i;
for (i = 0; i < ILO_BUILDER_WRITER_COUNT; i++) {
if (!ilo_builder_writer_alloc_and_map(builder, i)) {
@@ -407,7 +407,7 @@ struct intel_bo *
ilo_builder_end(struct ilo_builder *builder, unsigned *used)
{
struct ilo_builder_writer *bat;
- int i;
+ unsigned i;
ilo_builder_batch_patch_sba(builder);
diff --git a/src/gallium/drivers/ilo/ilo_context.c b/src/gallium/drivers/ilo/ilo_context.c
index 2a00cf1c93c..6bcd0bcb8f5 100644
--- a/src/gallium/drivers/ilo/ilo_context.c
+++ b/src/gallium/drivers/ilo/ilo_context.c
@@ -189,8 +189,9 @@ ilo_context_create(struct pipe_screen *screen, void *priv, unsigned flags)
* These must be called last as u_upload/u_blitter are clients of the pipe
* context.
*/
- ilo->uploader = u_upload_create(&ilo->base, 1024 * 1024, 16,
- PIPE_BIND_CONSTANT_BUFFER | PIPE_BIND_INDEX_BUFFER);
+ ilo->uploader = u_upload_create(&ilo->base, 1024 * 1024,
+ PIPE_BIND_CONSTANT_BUFFER | PIPE_BIND_INDEX_BUFFER,
+ PIPE_USAGE_STREAM);
if (!ilo->uploader) {
ilo_context_destroy(&ilo->base);
return NULL;
diff --git a/src/gallium/drivers/ilo/ilo_gpgpu.c b/src/gallium/drivers/ilo/ilo_gpgpu.c
index 9a2ca007f80..b7415901a88 100644
--- a/src/gallium/drivers/ilo/ilo_gpgpu.c
+++ b/src/gallium/drivers/ilo/ilo_gpgpu.c
@@ -92,7 +92,7 @@ ilo_launch_grid(struct pipe_context *pipe,
input_buf.buffer_size =
ilo_shader_get_kernel_param(cs, ILO_KERNEL_CS_INPUT_SIZE);
if (input_buf.buffer_size) {
- u_upload_data(ilo->uploader, 0, input_buf.buffer_size, input,
+ u_upload_data(ilo->uploader, 0, input_buf.buffer_size, 16, input,
&input_buf.buffer_offset, &input_buf.buffer);
}
diff --git a/src/gallium/drivers/ilo/ilo_screen.c b/src/gallium/drivers/ilo/ilo_screen.c
index cfa2fb41152..d5a82ce80ae 100644
--- a/src/gallium/drivers/ilo/ilo_screen.c
+++ b/src/gallium/drivers/ilo/ilo_screen.c
@@ -463,6 +463,8 @@ ilo_get_param(struct pipe_screen *screen, enum pipe_cap param)
case PIPE_CAP_TGSI_VS_WINDOW_SPACE_POSITION:
case PIPE_CAP_MAX_VERTEX_STREAMS:
case PIPE_CAP_DRAW_INDIRECT:
+ case PIPE_CAP_MULTI_DRAW_INDIRECT:
+ case PIPE_CAP_MULTI_DRAW_INDIRECT_PARAMS:
case PIPE_CAP_TGSI_FS_FINE_DERIVATIVE:
case PIPE_CAP_CONDITIONAL_RENDER_INVERTED:
case PIPE_CAP_SAMPLER_VIEW_TARGET:
@@ -476,6 +478,11 @@ ilo_get_param(struct pipe_screen *screen, enum pipe_cap param)
case PIPE_CAP_SHAREABLE_SHADERS:
case PIPE_CAP_COPY_BETWEEN_COMPRESSED_AND_PLAIN_FORMATS:
case PIPE_CAP_CLEAR_TEXTURE:
+ case PIPE_CAP_DRAW_PARAMETERS:
+ case PIPE_CAP_TGSI_PACK_HALF_FLOAT:
+ case PIPE_CAP_TGSI_FS_POSITION_IS_SYSVAL:
+ case PIPE_CAP_TGSI_FS_FACE_IS_INTEGER_SYSVAL:
+ case PIPE_CAP_SHADER_BUFFER_OFFSET_ALIGNMENT:
return 0;
case PIPE_CAP_VENDOR_ID:
diff --git a/src/gallium/drivers/ilo/ilo_state.c b/src/gallium/drivers/ilo/ilo_state.c
index d89765a9d23..8dc2d38e039 100644
--- a/src/gallium/drivers/ilo/ilo_state.c
+++ b/src/gallium/drivers/ilo/ilo_state.c
@@ -376,7 +376,7 @@ finalize_cbuf_state(struct ilo_context *ilo,
if (cbuf->cso[i].resource)
continue;
- u_upload_data(ilo->uploader, 0, cbuf->cso[i].info.size,
+ u_upload_data(ilo->uploader, 0, cbuf->cso[i].info.size, 16,
cbuf->cso[i].user_buffer, &offset, &cbuf->cso[i].resource);
cbuf->cso[i].info.vma = ilo_resource_get_vma(cbuf->cso[i].resource);
@@ -426,12 +426,12 @@ finalize_index_buffer(struct ilo_context *ilo)
unsigned hw_offset;
if (vec->ib.state.user_buffer) {
- u_upload_data(ilo->uploader, 0, size,
+ u_upload_data(ilo->uploader, 0, size, 16,
vec->ib.state.user_buffer + offset,
&hw_offset, &vec->ib.hw_resource);
} else {
u_upload_buffer(ilo->uploader, 0,
- vec->ib.state.offset + offset, size, vec->ib.state.buffer,
+ vec->ib.state.offset + offset, size, 16, vec->ib.state.buffer,
&hw_offset, &vec->ib.hw_resource);
}
diff --git a/src/gallium/drivers/ilo/shader/ilo_shader_fs.c b/src/gallium/drivers/ilo/shader/ilo_shader_fs.c
index 5250115a893..f46126e8427 100644
--- a/src/gallium/drivers/ilo/shader/ilo_shader_fs.c
+++ b/src/gallium/drivers/ilo/shader/ilo_shader_fs.c
@@ -266,7 +266,7 @@ fs_lower_opcode_tgsi_indirect_const(struct fs_compile_context *fcc,
struct toy_inst *inst;
struct toy_src desc, real_src[4];
struct toy_dst tmp, real_dst[4];
- int i;
+ unsigned i;
tsrc_transpose(idx, real_src);
@@ -319,7 +319,7 @@ fs_lower_opcode_tgsi_const_pcb(struct fs_compile_context *fcc,
const int grf_subreg = (idx.val32 & 1) * 16;
struct toy_src src;
struct toy_dst real_dst[4];
- int i;
+ unsigned i;
if (!fcc->variant->use_pcb || dim != 0 || idx.file != TOY_FILE_IMM ||
grf >= fcc->first_attr_grf)
@@ -350,7 +350,7 @@ fs_lower_opcode_tgsi_const_gen6(struct fs_compile_context *fcc,
struct toy_inst *inst;
struct toy_src desc;
struct toy_dst tmp, real_dst[4];
- int i;
+ unsigned i;
if (fs_lower_opcode_tgsi_const_pcb(fcc, dst, dim, idx))
return;
@@ -396,7 +396,7 @@ fs_lower_opcode_tgsi_const_gen7(struct fs_compile_context *fcc,
struct toy_src desc;
struct toy_inst *inst;
struct toy_dst tmp, real_dst[4];
- int i;
+ unsigned i;
if (fs_lower_opcode_tgsi_const_pcb(fcc, dst, dim, idx))
return;
@@ -1168,7 +1168,7 @@ fs_lower_opcode_derivative(struct toy_compiler *tc, struct toy_inst *inst)
{
struct toy_dst dst[4];
struct toy_src src[4];
- int i;
+ unsigned i;
tdst_transpose(inst->dst, dst);
tsrc_transpose(inst->src[0], src);
@@ -1257,7 +1257,7 @@ fs_lower_opcode_kil(struct toy_compiler *tc, struct toy_inst *inst)
}
else {
struct toy_src src[4];
- int i;
+ unsigned i;
tsrc_transpose(inst->src[0], src);
/* mask out killed pixels */
@@ -1583,7 +1583,7 @@ fs_write_fb(struct fs_compile_context *fcc)
static void
fs_setup_shader_out(struct ilo_shader *sh, const struct toy_tgsi *tgsi)
{
- int i;
+ unsigned i;
sh->out.count = tgsi->num_outputs;
for (i = 0; i < tgsi->num_outputs; i++) {
@@ -1603,7 +1603,7 @@ static void
fs_setup_shader_in(struct ilo_shader *sh, const struct toy_tgsi *tgsi,
bool flatshade)
{
- int i;
+ unsigned i;
sh->in.count = tgsi->num_inputs;
for (i = 0; i < tgsi->num_inputs; i++) {
diff --git a/src/gallium/drivers/ilo/shader/ilo_shader_vs.c b/src/gallium/drivers/ilo/shader/ilo_shader_vs.c
index a29baab10c1..0df0afc706b 100644
--- a/src/gallium/drivers/ilo/shader/ilo_shader_vs.c
+++ b/src/gallium/drivers/ilo/shader/ilo_shader_vs.c
@@ -126,7 +126,7 @@ vs_lower_opcode_tgsi_const_gen6(struct vs_compile_context *vcc,
tc_MOV(tc, block_offsets, idx);
msg_type = GEN6_MSG_DP_OWORD_DUAL_BLOCK_READ;
- msg_ctrl = GEN6_MSG_DP_OWORD_DUAL_BLOCK_SIZE_1;;
+ msg_ctrl = GEN6_MSG_DP_OWORD_DUAL_BLOCK_SIZE_1;
msg_len = 2;
desc = tsrc_imm_mdesc_data_port(tc, false, msg_len, 1, true, false,
@@ -522,7 +522,7 @@ vs_prepare_tgsi_sampling(struct vs_compile_context *vcc,
if (num_coords >= 3) {
struct toy_dst tmp, max;
struct toy_src abs_coords[3];
- int i;
+ unsigned i;
tmp = tc_alloc_tmp(tc);
max = tdst_writemask(tmp, TOY_WRITEMASK_W);
@@ -804,7 +804,7 @@ static int
vs_collect_outputs(struct vs_compile_context *vcc, struct toy_src *outs)
{
const struct toy_tgsi *tgsi = &vcc->tgsi;
- int i;
+ unsigned i;
for (i = 0; i < vcc->shader->out.count; i++) {
const int slot = vcc->output_map[i];
diff --git a/src/gallium/drivers/ilo/shader/toy_legalize_ra.c b/src/gallium/drivers/ilo/shader/toy_legalize_ra.c
index b725375fb67..1874faa6be3 100644
--- a/src/gallium/drivers/ilo/shader/toy_legalize_ra.c
+++ b/src/gallium/drivers/ilo/shader/toy_legalize_ra.c
@@ -70,7 +70,7 @@ struct linear_scan {
static void
linear_scan_free_regs(struct linear_scan *ls, int reg, int count)
{
- int i;
+ unsigned i;
for (i = 0; i < count; i++)
ls->free_regs[ls->num_free_regs++] = reg + count - 1 - i;
@@ -221,7 +221,7 @@ linear_scan_spill(struct linear_scan *ls,
static void
linear_scan_spill_range(struct linear_scan *ls, int first, int count)
{
- int i;
+ unsigned i;
for (i = 0; i < count; i++) {
struct linear_scan_live_interval *interval = &ls->intervals[first + i];
diff --git a/src/gallium/drivers/ilo/shader/toy_tgsi.c b/src/gallium/drivers/ilo/shader/toy_tgsi.c
index d38585f1475..9a7140b9a9b 100644
--- a/src/gallium/drivers/ilo/shader/toy_tgsi.c
+++ b/src/gallium/drivers/ilo/shader/toy_tgsi.c
@@ -1593,7 +1593,7 @@ ra_get_type(struct toy_tgsi *tgsi, const struct tgsi_full_instruction *tgsi_inst
tgsi_inst->Src[operand].Register.File;
switch (file) {
case TGSI_FILE_SAMPLER:
- case TGSI_FILE_RESOURCE:
+ case TGSI_FILE_IMAGE:
case TGSI_FILE_SAMPLER_VIEW:
type = TOY_TYPE_D;
break;
@@ -1834,7 +1834,7 @@ ra_get_src_indirect(struct toy_tgsi *tgsi,
src = tsrc_null();
break;
case TGSI_FILE_SAMPLER:
- case TGSI_FILE_RESOURCE:
+ case TGSI_FILE_IMAGE:
case TGSI_FILE_SAMPLER_VIEW:
is_resource = true;
/* fall through */
@@ -1918,7 +1918,7 @@ ra_get_src(struct toy_tgsi *tgsi,
need_vrf = true;
break;
case TGSI_FILE_SAMPLER:
- case TGSI_FILE_RESOURCE:
+ case TGSI_FILE_IMAGE:
case TGSI_FILE_SAMPLER_VIEW:
assert(!s->Register.Dimension);
src = tsrc_imm_d(s->Register.Index);
@@ -2256,7 +2256,7 @@ parse_declaration(struct toy_tgsi *tgsi,
case TGSI_FILE_SAMPLER:
case TGSI_FILE_PREDICATE:
case TGSI_FILE_ADDRESS:
- case TGSI_FILE_RESOURCE:
+ case TGSI_FILE_IMAGE:
case TGSI_FILE_SAMPLER_VIEW:
/* nothing to do */
break;
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_interp.h b/src/gallium/drivers/llvmpipe/lp_bld_interp.h
index 0a52642e395..9029d2a4180 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_interp.h
+++ b/src/gallium/drivers/llvmpipe/lp_bld_interp.h
@@ -63,8 +63,7 @@ enum lp_interp {
LP_INTERP_LINEAR,
LP_INTERP_PERSPECTIVE,
LP_INTERP_POSITION,
- LP_INTERP_FACING,
- LP_INTERP_ZERO
+ LP_INTERP_FACING
};
struct lp_shader_input {
diff --git a/src/gallium/drivers/llvmpipe/lp_context.h b/src/gallium/drivers/llvmpipe/lp_context.h
index 9dcc102e758..62d99bbaac8 100644
--- a/src/gallium/drivers/llvmpipe/lp_context.h
+++ b/src/gallium/drivers/llvmpipe/lp_context.h
@@ -108,28 +108,22 @@ struct llvmpipe_context {
struct vertex_info vertex_info;
/** Which vertex shader output slot contains color */
- uint8_t color_slot[2];
+ int8_t color_slot[2];
/** Which vertex shader output slot contains bcolor */
- uint8_t bcolor_slot[2];
+ int8_t bcolor_slot[2];
/** Which vertex shader output slot contains point size */
- uint8_t psize_slot;
+ int8_t psize_slot;
/** Which vertex shader output slot contains viewport index */
- uint8_t viewport_index_slot;
+ int8_t viewport_index_slot;
/** Which geometry shader output slot contains layer */
- uint8_t layer_slot;
+ int8_t layer_slot;
/** A fake frontface output for unfilled primitives */
- uint8_t face_slot;
-
- /** Which output slot is used for the fake vp index info */
- uint8_t fake_vpindex_slot;
-
- /** Which output slot is used for the fake layer info */
- uint8_t fake_layer_slot;
+ int8_t face_slot;
/** Depth format and bias settings. */
boolean floating_point_depth;
diff --git a/src/gallium/drivers/llvmpipe/lp_rast.h b/src/gallium/drivers/llvmpipe/lp_rast.h
index c19f9318006..db45cbbb057 100644
--- a/src/gallium/drivers/llvmpipe/lp_rast.h
+++ b/src/gallium/drivers/llvmpipe/lp_rast.h
@@ -115,7 +115,7 @@ struct lp_rast_plane {
int32_t dcdy;
/* one-pixel sized trivial reject offsets for each plane */
- int64_t eo;
+ uint32_t eo;
};
/**
diff --git a/src/gallium/drivers/llvmpipe/lp_rast_tri.c b/src/gallium/drivers/llvmpipe/lp_rast_tri.c
index c9b9221d87c..232c8599e42 100644
--- a/src/gallium/drivers/llvmpipe/lp_rast_tri.c
+++ b/src/gallium/drivers/llvmpipe/lp_rast_tri.c
@@ -133,36 +133,8 @@ lp_rast_triangle_4_16(struct lp_rasterizer_task *task,
lp_rast_triangle_4(task, arg2);
}
-#if !defined(PIPE_ARCH_SSE)
+#if defined(PIPE_ARCH_SSE)
-void
-lp_rast_triangle_32_3_16(struct lp_rasterizer_task *task,
- const union lp_rast_cmd_arg arg)
-{
- union lp_rast_cmd_arg arg2;
- arg2.triangle.tri = arg.triangle.tri;
- arg2.triangle.plane_mask = (1<<3)-1;
- lp_rast_triangle_32_3(task, arg2);
-}
-
-void
-lp_rast_triangle_32_4_16(struct lp_rasterizer_task *task,
- const union lp_rast_cmd_arg arg)
-{
- union lp_rast_cmd_arg arg2;
- arg2.triangle.tri = arg.triangle.tri;
- arg2.triangle.plane_mask = (1<<4)-1;
- lp_rast_triangle_32_4(task, arg2);
-}
-
-void
-lp_rast_triangle_32_3_4(struct lp_rasterizer_task *task,
- const union lp_rast_cmd_arg arg)
-{
- lp_rast_triangle_32_3_16(task, arg);
-}
-
-#else
#include <emmintrin.h>
#include "util/u_sse.h"
@@ -265,12 +237,6 @@ sign_bits4(const __m128i *cstep, int cdiff)
#define NR_PLANES 3
-
-
-
-
-
-
void
lp_rast_triangle_32_3_16(struct lp_rasterizer_task *task,
const union lp_rast_cmd_arg arg)
@@ -381,10 +347,6 @@ lp_rast_triangle_32_3_16(struct lp_rasterizer_task *task,
0xffff & ~out[i].mask);
}
-
-
-
-
void
lp_rast_triangle_32_3_4(struct lp_rasterizer_task *task,
const union lp_rast_cmd_arg arg)
@@ -471,6 +433,254 @@ lp_rast_triangle_32_3_4(struct lp_rasterizer_task *task,
}
#undef NR_PLANES
+
+#else
+
+#if defined(_ARCH_PWR8) && defined(PIPE_ARCH_LITTLE_ENDIAN)
+
+#include <altivec.h>
+#include "util/u_pwr8.h"
+
+static inline void
+build_masks_32(int c,
+ int cdiff,
+ int dcdx,
+ int dcdy,
+ unsigned *outmask,
+ unsigned *partmask)
+{
+ __m128i cstep0 = vec_setr_epi32(c, c+dcdx, c+dcdx*2, c+dcdx*3);
+ __m128i xdcdy = (__m128i) vec_splats(dcdy);
+
+ /* Get values across the quad
+ */
+ __m128i cstep1 = vec_add_epi32(cstep0, xdcdy);
+ __m128i cstep2 = vec_add_epi32(cstep1, xdcdy);
+ __m128i cstep3 = vec_add_epi32(cstep2, xdcdy);
+
+ {
+ __m128i cstep01, cstep23, result;
+
+ cstep01 = vec_packs_epi32(cstep0, cstep1);
+ cstep23 = vec_packs_epi32(cstep2, cstep3);
+ result = vec_packs_epi16(cstep01, cstep23);
+
+ *outmask |= vec_movemask_epi8(result);
+ }
+
+
+ {
+ __m128i cio4 = (__m128i) vec_splats(cdiff);
+ __m128i cstep01, cstep23, result;
+
+ cstep0 = vec_add_epi32(cstep0, cio4);
+ cstep1 = vec_add_epi32(cstep1, cio4);
+ cstep2 = vec_add_epi32(cstep2, cio4);
+ cstep3 = vec_add_epi32(cstep3, cio4);
+
+ cstep01 = vec_packs_epi32(cstep0, cstep1);
+ cstep23 = vec_packs_epi32(cstep2, cstep3);
+ result = vec_packs_epi16(cstep01, cstep23);
+
+ *partmask |= vec_movemask_epi8(result);
+ }
+}
+
+static inline unsigned
+build_mask_linear_32(int c, int dcdx, int dcdy)
+{
+ __m128i cstep0 = vec_setr_epi32(c, c+dcdx, c+dcdx*2, c+dcdx*3);
+ __m128i xdcdy = (__m128i) vec_splats(dcdy);
+
+ /* Get values across the quad
+ */
+ __m128i cstep1 = vec_add_epi32(cstep0, xdcdy);
+ __m128i cstep2 = vec_add_epi32(cstep1, xdcdy);
+ __m128i cstep3 = vec_add_epi32(cstep2, xdcdy);
+
+ /* pack pairs of results into epi16
+ */
+ __m128i cstep01 = vec_packs_epi32(cstep0, cstep1);
+ __m128i cstep23 = vec_packs_epi32(cstep2, cstep3);
+
+ /* pack into epi8, preserving sign bits
+ */
+ __m128i result = vec_packs_epi16(cstep01, cstep23);
+
+ /* extract sign bits to create mask
+ */
+ return vec_movemask_epi8(result);
+}
+
+static inline __m128i
+lp_plane_to_m128i(const struct lp_rast_plane *plane)
+{
+ return vec_setr_epi32((int32_t)plane->c, (int32_t)plane->dcdx,
+ (int32_t)plane->dcdy, (int32_t)plane->eo);
+}
+
+#define NR_PLANES 3
+
+void
+lp_rast_triangle_32_3_16(struct lp_rasterizer_task *task,
+ const union lp_rast_cmd_arg arg)
+{
+ const struct lp_rast_triangle *tri = arg.triangle.tri;
+ const struct lp_rast_plane *plane = GET_PLANES(tri);
+ int x = (arg.triangle.plane_mask & 0xff) + task->x;
+ int y = (arg.triangle.plane_mask >> 8) + task->y;
+ unsigned i, j;
+
+ struct { unsigned mask:16; unsigned i:8; unsigned j:8; } out[16];
+ unsigned nr = 0;
+
+ __m128i p0 = lp_plane_to_m128i(&plane[0]); /* c, dcdx, dcdy, eo */
+ __m128i p1 = lp_plane_to_m128i(&plane[1]); /* c, dcdx, dcdy, eo */
+ __m128i p2 = lp_plane_to_m128i(&plane[2]); /* c, dcdx, dcdy, eo */
+ __m128i zero = vec_splats((unsigned char) 0);
+
+ __m128i c;
+ __m128i dcdx;
+ __m128i dcdy;
+ __m128i rej4;
+
+ __m128i dcdx2;
+ __m128i dcdx3;
+
+ __m128i span_0; /* 0,dcdx,2dcdx,3dcdx for plane 0 */
+ __m128i span_1; /* 0,dcdx,2dcdx,3dcdx for plane 1 */
+ __m128i span_2; /* 0,dcdx,2dcdx,3dcdx for plane 2 */
+ __m128i unused;
+
+ __m128i vshuf_mask0;
+ __m128i vshuf_mask1;
+ __m128i vshuf_mask2;
+
+#ifdef PIPE_ARCH_LITTLE_ENDIAN
+ vshuf_mask0 = (__m128i) vec_splats((unsigned int) 0x03020100);
+ vshuf_mask1 = (__m128i) vec_splats((unsigned int) 0x07060504);
+ vshuf_mask2 = (__m128i) vec_splats((unsigned int) 0x0B0A0908);
+#else
+ vshuf_mask0 = (__m128i) vec_splats((unsigned int) 0x0C0D0E0F);
+ vshuf_mask1 = (__m128i) vec_splats((unsigned int) 0x08090A0B);
+ vshuf_mask2 = (__m128i) vec_splats((unsigned int) 0x04050607);
+#endif
+
+ transpose4_epi32(&p0, &p1, &p2, &zero,
+ &c, &dcdx, &dcdy, &rej4);
+
+ /* Adjust dcdx;
+ */
+ dcdx = vec_sub_epi32(zero, dcdx);
+
+ c = vec_add_epi32(c, vec_mullo_epi32(dcdx, (__m128i) vec_splats(x)));
+ c = vec_add_epi32(c, vec_mullo_epi32(dcdy, (__m128i) vec_splats(y)));
+ rej4 = vec_slli_epi32(rej4, 2);
+
+ /*
+ * Adjust so we can just check the sign bit (< 0 comparison),
+ * instead of having to do a less efficient <= 0 comparison
+ */
+ c = vec_sub_epi32(c, (__m128i) vec_splats((unsigned int) 1));
+ rej4 = vec_add_epi32(rej4, (__m128i) vec_splats((unsigned int) 1));
+
+ dcdx2 = vec_add_epi32(dcdx, dcdx);
+ dcdx3 = vec_add_epi32(dcdx2, dcdx);
+
+ transpose4_epi32(&zero, &dcdx, &dcdx2, &dcdx3,
+ &span_0, &span_1, &span_2, &unused);
+
+ for (i = 0; i < 4; i++) {
+ __m128i cx = c;
+
+ for (j = 0; j < 4; j++) {
+ __m128i c4rej = vec_add_epi32(cx, rej4);
+ __m128i rej_masks = vec_srai_epi32(c4rej, 31);
+
+ /* if (is_zero(rej_masks)) */
+ if (vec_movemask_epi8(rej_masks) == 0) {
+ __m128i c0_0 = vec_add_epi32(vec_perm(cx, cx, vshuf_mask0), span_0);
+ __m128i c1_0 = vec_add_epi32(vec_perm(cx, cx, vshuf_mask1), span_1);
+ __m128i c2_0 = vec_add_epi32(vec_perm(cx, cx, vshuf_mask2), span_2);
+
+ __m128i c_0 = vec_or(vec_or(c0_0, c1_0), c2_0);
+
+ __m128i c0_1 = vec_add_epi32(c0_0, vec_perm(dcdy, dcdy, vshuf_mask0));
+ __m128i c1_1 = vec_add_epi32(c1_0, vec_perm(dcdy, dcdy, vshuf_mask1));
+ __m128i c2_1 = vec_add_epi32(c2_0, vec_perm(dcdy, dcdy, vshuf_mask2));
+
+ __m128i c_1 = vec_or(vec_or(c0_1, c1_1), c2_1);
+ __m128i c_01 = vec_packs_epi32(c_0, c_1);
+
+ __m128i c0_2 = vec_add_epi32(c0_1, vec_perm(dcdy, dcdy, vshuf_mask0));
+ __m128i c1_2 = vec_add_epi32(c1_1, vec_perm(dcdy, dcdy, vshuf_mask1));
+ __m128i c2_2 = vec_add_epi32(c2_1, vec_perm(dcdy, dcdy, vshuf_mask2));
+
+ __m128i c_2 = vec_or(vec_or(c0_2, c1_2), c2_2);
+
+ __m128i c0_3 = vec_add_epi32(c0_2, vec_perm(dcdy, dcdy, vshuf_mask0));
+ __m128i c1_3 = vec_add_epi32(c1_2, vec_perm(dcdy, dcdy, vshuf_mask1));
+ __m128i c2_3 = vec_add_epi32(c2_2, vec_perm(dcdy, dcdy, vshuf_mask2));
+
+ __m128i c_3 = vec_or(vec_or(c0_3, c1_3), c2_3);
+ __m128i c_23 = vec_packs_epi32(c_2, c_3);
+ __m128i c_0123 = vec_packs_epi16(c_01, c_23);
+
+ unsigned mask = vec_movemask_epi8(c_0123);
+
+ out[nr].i = i;
+ out[nr].j = j;
+ out[nr].mask = mask;
+ if (mask != 0xffff)
+ nr++;
+ }
+ cx = vec_add_epi32(cx, vec_slli_epi32(dcdx, 2));
+ }
+
+ c = vec_add_epi32(c, vec_slli_epi32(dcdy, 2));
+ }
+
+ for (i = 0; i < nr; i++)
+ lp_rast_shade_quads_mask(task,
+ &tri->inputs,
+ x + 4 * out[i].j,
+ y + 4 * out[i].i,
+ 0xffff & ~out[i].mask);
+}
+
+#undef NR_PLANES
+
+#else
+
+void
+lp_rast_triangle_32_3_16(struct lp_rasterizer_task *task,
+ const union lp_rast_cmd_arg arg)
+{
+ union lp_rast_cmd_arg arg2;
+ arg2.triangle.tri = arg.triangle.tri;
+ arg2.triangle.plane_mask = (1<<3)-1;
+ lp_rast_triangle_32_3(task, arg2);
+}
+
+#endif /* _ARCH_PWR8 && PIPE_ARCH_LITTLE_ENDIAN */
+
+void
+lp_rast_triangle_32_4_16(struct lp_rasterizer_task *task,
+ const union lp_rast_cmd_arg arg)
+{
+ union lp_rast_cmd_arg arg2;
+ arg2.triangle.tri = arg.triangle.tri;
+ arg2.triangle.plane_mask = (1<<4)-1;
+ lp_rast_triangle_32_4(task, arg2);
+}
+
+void
+lp_rast_triangle_32_3_4(struct lp_rasterizer_task *task,
+ const union lp_rast_cmd_arg arg)
+{
+ lp_rast_triangle_32_3_16(task, arg);
+}
+
#endif
@@ -512,7 +722,7 @@ lp_rast_triangle_32_3_4(struct lp_rasterizer_task *task,
#define NR_PLANES 8
#include "lp_rast_tri_tmp.h"
-#ifdef PIPE_ARCH_SSE
+#if defined(PIPE_ARCH_SSE) || (defined(_ARCH_PWR8) && defined(PIPE_ARCH_LITTLE_ENDIAN))
#undef BUILD_MASKS
#undef BUILD_MASK_LINEAR
#define BUILD_MASKS(c, cdiff, dcdx, dcdy, omask, pmask) build_masks_32((int)c, (int)cdiff, dcdx, dcdy, omask, pmask)
diff --git a/src/gallium/drivers/llvmpipe/lp_rast_tri_tmp.h b/src/gallium/drivers/llvmpipe/lp_rast_tri_tmp.h
index 52f6e999683..e0aea94205e 100644
--- a/src/gallium/drivers/llvmpipe/lp_rast_tri_tmp.h
+++ b/src/gallium/drivers/llvmpipe/lp_rast_tri_tmp.h
@@ -82,7 +82,7 @@ TAG(do_block_16)(struct lp_rasterizer_task *task,
const int64_t dcdx = -IMUL64(plane[j].dcdx, 4);
const int64_t dcdy = IMUL64(plane[j].dcdy, 4);
const int64_t cox = IMUL64(plane[j].eo, 4);
- const int64_t ei = plane[j].dcdy - plane[j].dcdx - plane[j].eo;
+ const int64_t ei = plane[j].dcdy - plane[j].dcdx - (int64_t)plane[j].eo;
const int64_t cio = IMUL64(ei, 4) - 1;
BUILD_MASKS(c[j] + cox,
@@ -182,7 +182,7 @@ TAG(lp_rast_triangle)(struct lp_rasterizer_task *task,
const int64_t dcdx = -IMUL64(plane[j].dcdx, 16);
const int64_t dcdy = IMUL64(plane[j].dcdy, 16);
const int64_t cox = IMUL64(plane[j].eo, 16);
- const int64_t ei = plane[j].dcdy - plane[j].dcdx - plane[j].eo;
+ const int64_t ei = plane[j].dcdy - plane[j].dcdx - (int64_t)plane[j].eo;
const int64_t cio = IMUL64(ei, 16) - 1;
BUILD_MASKS(c[j] + cox,
diff --git a/src/gallium/drivers/llvmpipe/lp_screen.c b/src/gallium/drivers/llvmpipe/lp_screen.c
index 899f28da7d3..e29b008c7e8 100644
--- a/src/gallium/drivers/llvmpipe/lp_screen.c
+++ b/src/gallium/drivers/llvmpipe/lp_screen.c
@@ -301,6 +301,13 @@ llvmpipe_get_param(struct pipe_screen *screen, enum pipe_cap param)
case PIPE_CAP_SHAREABLE_SHADERS:
case PIPE_CAP_COPY_BETWEEN_COMPRESSED_AND_PLAIN_FORMATS:
case PIPE_CAP_CLEAR_TEXTURE:
+ case PIPE_CAP_DRAW_PARAMETERS:
+ case PIPE_CAP_TGSI_PACK_HALF_FLOAT:
+ case PIPE_CAP_MULTI_DRAW_INDIRECT:
+ case PIPE_CAP_MULTI_DRAW_INDIRECT_PARAMS:
+ case PIPE_CAP_TGSI_FS_POSITION_IS_SYSVAL:
+ case PIPE_CAP_TGSI_FS_FACE_IS_INTEGER_SYSVAL:
+ case PIPE_CAP_SHADER_BUFFER_OFFSET_ALIGNMENT:
return 0;
}
/* should only get here on unhandled cases */
diff --git a/src/gallium/drivers/llvmpipe/lp_setup.c b/src/gallium/drivers/llvmpipe/lp_setup.c
index ddbb88eb107..bd850519468 100644
--- a/src/gallium/drivers/llvmpipe/lp_setup.c
+++ b/src/gallium/drivers/llvmpipe/lp_setup.c
@@ -486,6 +486,11 @@ lp_setup_try_clear_zs(struct lp_setup_context *setup,
depth,
stencil);
+ /*
+ * XXX: should make a full mask here for things like D24X8,
+ * otherwise we'll do a read-modify-write clear later which
+ * should be unnecessary.
+ */
zsmask = util_pack64_mask_z_stencil(setup->fb.zsbuf->format,
zmask32,
smask8);
diff --git a/src/gallium/drivers/llvmpipe/lp_setup_context.h b/src/gallium/drivers/llvmpipe/lp_setup_context.h
index 4451284c303..80acd74bddd 100644
--- a/src/gallium/drivers/llvmpipe/lp_setup_context.h
+++ b/src/gallium/drivers/llvmpipe/lp_setup_context.h
@@ -105,10 +105,10 @@ struct lp_setup_context
float pixel_offset;
float line_width;
float point_size;
- uint8_t psize_slot;
- uint8_t viewport_index_slot;
- uint8_t layer_slot;
- uint8_t face_slot;
+ int8_t psize_slot;
+ int8_t viewport_index_slot;
+ int8_t layer_slot;
+ int8_t face_slot;
struct pipe_framebuffer_state fb;
struct u_rect framebuffer;
diff --git a/src/gallium/drivers/llvmpipe/lp_setup_line.c b/src/gallium/drivers/llvmpipe/lp_setup_line.c
index fac1cd61d77..a0de599c9c6 100644
--- a/src/gallium/drivers/llvmpipe/lp_setup_line.c
+++ b/src/gallium/drivers/llvmpipe/lp_setup_line.c
@@ -644,19 +644,25 @@ try_setup_line( struct lp_setup_context *setup,
line->inputs.layer = layer;
line->inputs.viewport_index = viewport_index;
+ /*
+ * XXX: this code is mostly identical to the one in lp_setup_tri, except it
+ * uses 4 planes instead of 3. Could share the code (including the sse
+ * assembly, in fact we'd get the 4th plane for free).
+ * The only difference apart from storing the 4th plane would be some
+ * different shuffle for calculating dcdx/dcdy.
+ */
for (i = 0; i < 4; i++) {
- /* half-edge constants, will be interated over the whole render
+ /* half-edge constants, will be iterated over the whole render
* target.
*/
plane[i].c = IMUL64(plane[i].dcdx, x[i]) - IMUL64(plane[i].dcdy, y[i]);
-
- /* correct for top-left vs. bottom-left fill convention.
- */
+ /* correct for top-left vs. bottom-left fill convention.
+ */
if (plane[i].dcdx < 0) {
/* both fill conventions want this - adjust for left edges */
- plane[i].c++;
+ plane[i].c++;
}
else if (plane[i].dcdx == 0) {
if (setup->pixel_offset == 0) {
diff --git a/src/gallium/drivers/llvmpipe/lp_setup_tri.c b/src/gallium/drivers/llvmpipe/lp_setup_tri.c
index b1671dd0ae2..358da442ea7 100644
--- a/src/gallium/drivers/llvmpipe/lp_setup_tri.c
+++ b/src/gallium/drivers/llvmpipe/lp_setup_tri.c
@@ -46,6 +46,9 @@
#if defined(PIPE_ARCH_SSE)
#include <emmintrin.h>
+#elif defined(_ARCH_PWR8) && defined(PIPE_ARCH_LITTLE_ENDIAN)
+#include <altivec.h>
+#include "util/u_pwr8.h"
#endif
static inline int
@@ -387,25 +390,21 @@ do_triangle_ccw(struct lp_setup_context *setup,
plane = GET_PLANES(tri);
#if defined(PIPE_ARCH_SSE)
- if (setup->fb.width <= MAX_FIXED_LENGTH32 &&
- setup->fb.height <= MAX_FIXED_LENGTH32 &&
- (bbox.x1 - bbox.x0) <= MAX_FIXED_LENGTH32 &&
- (bbox.y1 - bbox.y0) <= MAX_FIXED_LENGTH32) {
+ if (1) {
__m128i vertx, verty;
__m128i shufx, shufy;
- __m128i dcdx, dcdy, c;
- __m128i unused;
+ __m128i dcdx, dcdy;
+ __m128i cdx02, cdx13, cdy02, cdy13, c02, c13;
+ __m128i c01, c23, unused;
__m128i dcdx_neg_mask;
__m128i dcdy_neg_mask;
__m128i dcdx_zero_mask;
- __m128i top_left_flag;
- __m128i c_inc_mask, c_inc;
+ __m128i top_left_flag, c_dec;
__m128i eo, p0, p1, p2;
__m128i zero = _mm_setzero_si128();
- PIPE_ALIGN_VAR(16) int32_t temp_vec[4];
- vertx = _mm_loadu_si128((__m128i *)position->x); /* vertex x coords */
- verty = _mm_loadu_si128((__m128i *)position->y); /* vertex y coords */
+ vertx = _mm_load_si128((__m128i *)position->x); /* vertex x coords */
+ verty = _mm_load_si128((__m128i *)position->y); /* vertex y coords */
shufx = _mm_shuffle_epi32(vertx, _MM_SHUFFLE(3,0,2,1));
shufy = _mm_shuffle_epi32(verty, _MM_SHUFFLE(3,0,2,1));
@@ -419,42 +418,161 @@ do_triangle_ccw(struct lp_setup_context *setup,
top_left_flag = _mm_set1_epi32((setup->bottom_edge_rule == 0) ? ~0 : 0);
- c_inc_mask = _mm_or_si128(dcdx_neg_mask,
- _mm_and_si128(dcdx_zero_mask,
- _mm_xor_si128(dcdy_neg_mask,
- top_left_flag)));
-
- c_inc = _mm_srli_epi32(c_inc_mask, 31);
-
- c = _mm_sub_epi32(mm_mullo_epi32(dcdx, vertx),
- mm_mullo_epi32(dcdy, verty));
+ c_dec = _mm_or_si128(dcdx_neg_mask,
+ _mm_and_si128(dcdx_zero_mask,
+ _mm_xor_si128(dcdy_neg_mask,
+ top_left_flag)));
- c = _mm_add_epi32(c, c_inc);
+ /*
+ * 64 bit arithmetic.
+ * Note we need _signed_ mul (_mm_mul_epi32) which we emulate.
+ */
+ cdx02 = mm_mullohi_epi32(dcdx, vertx, &cdx13);
+ cdy02 = mm_mullohi_epi32(dcdy, verty, &cdy13);
+ c02 = _mm_sub_epi64(cdx02, cdy02);
+ c13 = _mm_sub_epi64(cdx13, cdy13);
+ c02 = _mm_sub_epi64(c02, _mm_shuffle_epi32(c_dec,
+ _MM_SHUFFLE(2,2,0,0)));
+ c13 = _mm_sub_epi64(c13, _mm_shuffle_epi32(c_dec,
+ _MM_SHUFFLE(3,3,1,1)));
+
+ /*
+ * Useful for very small fbs/tris (or fewer subpixel bits) only:
+ * c = _mm_sub_epi32(mm_mullo_epi32(dcdx, vertx),
+ * mm_mullo_epi32(dcdy, verty));
+ *
+ * c = _mm_sub_epi32(c, c_dec);
+ */
/* Scale up to match c:
*/
dcdx = _mm_slli_epi32(dcdx, FIXED_ORDER);
dcdy = _mm_slli_epi32(dcdy, FIXED_ORDER);
- /* Calculate trivial reject values:
+ /*
+ * Calculate trivial reject values:
+ * Note eo cannot overflow even if dcdx/dcdy would already have
+ * 31 bits (which they shouldn't have). This is because eo
+ * is never negative (albeit if we rely on that need to be careful...)
*/
eo = _mm_sub_epi32(_mm_andnot_si128(dcdy_neg_mask, dcdy),
_mm_and_si128(dcdx_neg_mask, dcdx));
/* ei = _mm_sub_epi32(_mm_sub_epi32(dcdy, dcdx), eo); */
+ /*
+ * Pointless transpose which gets undone immediately in
+ * rasterization.
+ * It is actually difficult to do away with it - would essentially
+ * need GET_PLANES_DX, GET_PLANES_DY etc., but the calculations
+ * for this then would need to depend on the number of planes.
+ * The transpose is quite special here due to c being 64bit...
+ * The store has to be unaligned (unless we'd make the plane size
+ * a multiple of 128), and of course storing eo separately...
+ */
+ c01 = _mm_unpacklo_epi64(c02, c13);
+ c23 = _mm_unpackhi_epi64(c02, c13);
+ transpose2_64_2_32(&c01, &c23, &dcdx, &dcdy,
+ &p0, &p1, &p2, &unused);
+ _mm_storeu_si128((__m128i *)&plane[0], p0);
+ plane[0].eo = (uint32_t)_mm_cvtsi128_si32(eo);
+ _mm_storeu_si128((__m128i *)&plane[1], p1);
+ eo = _mm_shuffle_epi32(eo, _MM_SHUFFLE(3,2,0,1));
+ plane[1].eo = (uint32_t)_mm_cvtsi128_si32(eo);
+ _mm_storeu_si128((__m128i *)&plane[2], p2);
+ eo = _mm_shuffle_epi32(eo, _MM_SHUFFLE(0,0,0,2));
+ plane[2].eo = (uint32_t)_mm_cvtsi128_si32(eo);
+ } else
+#elif defined(_ARCH_PWR8) && defined(PIPE_ARCH_LITTLE_ENDIAN)
+ /*
+ * XXX this code is effectively disabled for all practical purposes,
+ * as the allowed fb size is tiny if FIXED_ORDER is 8.
+ */
+ if (setup->fb.width <= MAX_FIXED_LENGTH32 &&
+ setup->fb.height <= MAX_FIXED_LENGTH32 &&
+ (bbox.x1 - bbox.x0) <= MAX_FIXED_LENGTH32 &&
+ (bbox.y1 - bbox.y0) <= MAX_FIXED_LENGTH32) {
+ unsigned int bottom_edge;
+ __m128i vertx, verty;
+ __m128i shufx, shufy;
+ __m128i dcdx, dcdy, c;
+ __m128i unused;
+ __m128i dcdx_neg_mask;
+ __m128i dcdy_neg_mask;
+ __m128i dcdx_zero_mask;
+ __m128i top_left_flag;
+ __m128i c_inc_mask, c_inc;
+ __m128i eo, p0, p1, p2;
+ __m128i_union vshuf_mask;
+ __m128i zero = vec_splats((unsigned char) 0);
+ PIPE_ALIGN_VAR(16) int32_t temp_vec[4];
+
+#ifdef PIPE_ARCH_LITTLE_ENDIAN
+ vshuf_mask.i[0] = 0x07060504;
+ vshuf_mask.i[1] = 0x0B0A0908;
+ vshuf_mask.i[2] = 0x03020100;
+ vshuf_mask.i[3] = 0x0F0E0D0C;
+#else
+ vshuf_mask.i[0] = 0x00010203;
+ vshuf_mask.i[1] = 0x0C0D0E0F;
+ vshuf_mask.i[2] = 0x04050607;
+ vshuf_mask.i[3] = 0x08090A0B;
+#endif
+
+ /* vertex x coords */
+ vertx = vec_load_si128((const uint32_t *) position->x);
+ /* vertex y coords */
+ verty = vec_load_si128((const uint32_t *) position->y);
+
+ shufx = vec_perm (vertx, vertx, vshuf_mask.m128i);
+ shufy = vec_perm (verty, verty, vshuf_mask.m128i);
+
+ dcdx = vec_sub_epi32(verty, shufy);
+ dcdy = vec_sub_epi32(vertx, shufx);
+
+ dcdx_neg_mask = vec_srai_epi32(dcdx, 31);
+ dcdx_zero_mask = vec_cmpeq_epi32(dcdx, zero);
+ dcdy_neg_mask = vec_srai_epi32(dcdy, 31);
+
+ bottom_edge = (setup->bottom_edge_rule == 0) ? ~0 : 0;
+ top_left_flag = (__m128i) vec_splats(bottom_edge);
+
+ c_inc_mask = vec_or(dcdx_neg_mask,
+ vec_and(dcdx_zero_mask,
+ vec_xor(dcdy_neg_mask,
+ top_left_flag)));
+
+ c_inc = vec_srli_epi32(c_inc_mask, 31);
+
+ c = vec_sub_epi32(vec_mullo_epi32(dcdx, vertx),
+ vec_mullo_epi32(dcdy, verty));
+
+ c = vec_add_epi32(c, c_inc);
+
+ /* Scale up to match c:
+ */
+ dcdx = vec_slli_epi32(dcdx, FIXED_ORDER);
+ dcdy = vec_slli_epi32(dcdy, FIXED_ORDER);
+
+ /* Calculate trivial reject values:
+ */
+ eo = vec_sub_epi32(vec_andc(dcdy_neg_mask, dcdy),
+ vec_and(dcdx_neg_mask, dcdx));
+
+ /* ei = _mm_sub_epi32(_mm_sub_epi32(dcdy, dcdx), eo); */
+
/* Pointless transpose which gets undone immediately in
* rasterization:
*/
transpose4_epi32(&c, &dcdx, &dcdy, &eo,
&p0, &p1, &p2, &unused);
-#define STORE_PLANE(plane, vec) do { \
- _mm_store_si128((__m128i *)&temp_vec, vec); \
- plane.c = (int64_t)temp_vec[0]; \
- plane.dcdx = temp_vec[1]; \
- plane.dcdy = temp_vec[2]; \
- plane.eo = temp_vec[3]; \
+#define STORE_PLANE(plane, vec) do { \
+ vec_store_si128((uint32_t *)&temp_vec, vec); \
+ plane.c = (int64_t)temp_vec[0]; \
+ plane.dcdx = temp_vec[1]; \
+ plane.dcdy = temp_vec[2]; \
+ plane.eo = temp_vec[3]; \
} while(0)
STORE_PLANE(plane[0], p0);
@@ -473,17 +591,17 @@ do_triangle_ccw(struct lp_setup_context *setup,
plane[2].dcdx = position->dy20;
for (i = 0; i < 3; i++) {
- /* half-edge constants, will be interated over the whole render
+ /* half-edge constants, will be iterated over the whole render
* target.
*/
plane[i].c = IMUL64(plane[i].dcdx, position->x[i]) -
- IMUL64(plane[i].dcdy, position->y[i]);
+ IMUL64(plane[i].dcdy, position->y[i]);
/* correct for top-left vs. bottom-left fill convention.
- */
+ */
if (plane[i].dcdx < 0) {
/* both fill conventions want this - adjust for left edges */
- plane[i].c++;
+ plane[i].c++;
}
else if (plane[i].dcdx == 0) {
if (setup->bottom_edge_rule == 0){
@@ -517,19 +635,19 @@ do_triangle_ccw(struct lp_setup_context *setup,
}
if (0) {
- debug_printf("p0: %"PRIx64"/%08x/%08x/%"PRIx64"\n",
+ debug_printf("p0: %"PRIx64"/%08x/%08x/%08x\n",
plane[0].c,
plane[0].dcdx,
plane[0].dcdy,
plane[0].eo);
-
- debug_printf("p1: %"PRIx64"/%08x/%08x/%"PRIx64"\n",
+
+ debug_printf("p1: %"PRIx64"/%08x/%08x/%08x\n",
plane[1].c,
plane[1].dcdx,
plane[1].dcdy,
plane[1].eo);
-
- debug_printf("p2: %"PRIx64"/%08x/%08x/%"PRIx64"\n",
+
+ debug_printf("p2: %"PRIx64"/%08x/%08x/%08x\n",
plane[2].c,
plane[2].dcdx,
plane[2].dcdy,
@@ -590,7 +708,7 @@ do_triangle_ccw(struct lp_setup_context *setup,
static inline uint32_t
floor_pot(uint32_t n)
{
-#if defined(PIPE_CC_GCC) && defined(PIPE_ARCH_X86)
+#if defined(PIPE_CC_GCC) && (defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64))
if (n == 0)
return 0;
@@ -738,9 +856,9 @@ lp_setup_bin_triangle( struct lp_setup_context *setup,
ei[i] = (plane[i].dcdy -
plane[i].dcdx -
- plane[i].eo) << TILE_ORDER;
+ (int64_t)plane[i].eo) << TILE_ORDER;
- eo[i] = plane[i].eo << TILE_ORDER;
+ eo[i] = (int64_t)plane[i].eo << TILE_ORDER;
xstep[i] = -(((int64_t)plane[i].dcdx) << TILE_ORDER);
ystep[i] = ((int64_t)plane[i].dcdy) << TILE_ORDER;
}
@@ -932,12 +1050,12 @@ rotate_fixed_position_12( struct fixed_position* position )
/**
* Draw triangle if it's CW, cull otherwise.
*/
-static void triangle_cw( struct lp_setup_context *setup,
- const float (*v0)[4],
- const float (*v1)[4],
- const float (*v2)[4] )
+static void triangle_cw(struct lp_setup_context *setup,
+ const float (*v0)[4],
+ const float (*v1)[4],
+ const float (*v2)[4])
{
- struct fixed_position position;
+ PIPE_ALIGN_VAR(16) struct fixed_position position;
calc_fixed_position(setup, &position, v0, v1, v2);
@@ -953,12 +1071,12 @@ static void triangle_cw( struct lp_setup_context *setup,
}
-static void triangle_ccw( struct lp_setup_context *setup,
- const float (*v0)[4],
- const float (*v1)[4],
- const float (*v2)[4])
+static void triangle_ccw(struct lp_setup_context *setup,
+ const float (*v0)[4],
+ const float (*v1)[4],
+ const float (*v2)[4])
{
- struct fixed_position position;
+ PIPE_ALIGN_VAR(16) struct fixed_position position;
calc_fixed_position(setup, &position, v0, v1, v2);
@@ -969,12 +1087,12 @@ static void triangle_ccw( struct lp_setup_context *setup,
/**
* Draw triangle whether it's CW or CCW.
*/
-static void triangle_both( struct lp_setup_context *setup,
- const float (*v0)[4],
- const float (*v1)[4],
- const float (*v2)[4] )
+static void triangle_both(struct lp_setup_context *setup,
+ const float (*v0)[4],
+ const float (*v1)[4],
+ const float (*v2)[4])
{
- struct fixed_position position;
+ PIPE_ALIGN_VAR(16) struct fixed_position position;
struct llvmpipe_context *lp_context = (struct llvmpipe_context *)setup->pipe;
if (lp_context->active_statistics_queries &&
diff --git a/src/gallium/drivers/llvmpipe/lp_state_derived.c b/src/gallium/drivers/llvmpipe/lp_state_derived.c
index f5bcfb2b511..34961cbbac5 100644
--- a/src/gallium/drivers/llvmpipe/lp_state_derived.c
+++ b/src/gallium/drivers/llvmpipe/lp_state_derived.c
@@ -48,21 +48,26 @@
static void
compute_vertex_info(struct llvmpipe_context *llvmpipe)
{
- const struct lp_fragment_shader *lpfs = llvmpipe->fs;
+ const struct tgsi_shader_info *fsInfo = &llvmpipe->fs->info.base;
struct vertex_info *vinfo = &llvmpipe->vertex_info;
int vs_index;
uint i;
draw_prepare_shader_outputs(llvmpipe->draw);
- llvmpipe->color_slot[0] = 0;
- llvmpipe->color_slot[1] = 0;
- llvmpipe->bcolor_slot[0] = 0;
- llvmpipe->bcolor_slot[1] = 0;
- llvmpipe->viewport_index_slot = 0;
- llvmpipe->layer_slot = 0;
- llvmpipe->face_slot = 0;
- llvmpipe->psize_slot = 0;
+ /*
+ * Those can't actually be 0 (because pos is always at 0).
+ * But use ints anyway to avoid confusion (in vs outputs, they
+ * can very well be at pos 0).
+ */
+ llvmpipe->color_slot[0] = -1;
+ llvmpipe->color_slot[1] = -1;
+ llvmpipe->bcolor_slot[0] = -1;
+ llvmpipe->bcolor_slot[1] = -1;
+ llvmpipe->viewport_index_slot = -1;
+ llvmpipe->layer_slot = -1;
+ llvmpipe->face_slot = -1;
+ llvmpipe->psize_slot = -1;
/*
* Match FS inputs against VS outputs, emitting the necessary
@@ -73,60 +78,49 @@ compute_vertex_info(struct llvmpipe_context *llvmpipe)
vinfo->num_attribs = 0;
vs_index = draw_find_shader_output(llvmpipe->draw,
- TGSI_SEMANTIC_POSITION,
- 0);
+ TGSI_SEMANTIC_POSITION, 0);
- draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_PERSPECTIVE, vs_index);
+ draw_emit_vertex_attr(vinfo, EMIT_4F, vs_index);
- for (i = 0; i < lpfs->info.base.num_inputs; i++) {
+ for (i = 0; i < fsInfo->num_inputs; i++) {
/*
* Search for each input in current vs output:
*/
-
vs_index = draw_find_shader_output(llvmpipe->draw,
- lpfs->info.base.input_semantic_name[i],
- lpfs->info.base.input_semantic_index[i]);
+ fsInfo->input_semantic_name[i],
+ fsInfo->input_semantic_index[i]);
- if (lpfs->info.base.input_semantic_name[i] == TGSI_SEMANTIC_COLOR &&
- lpfs->info.base.input_semantic_index[i] < 2) {
- int idx = lpfs->info.base.input_semantic_index[i];
- llvmpipe->color_slot[idx] = vinfo->num_attribs;
+ if (fsInfo->input_semantic_name[i] == TGSI_SEMANTIC_COLOR &&
+ fsInfo->input_semantic_index[i] < 2) {
+ int idx = fsInfo->input_semantic_index[i];
+ llvmpipe->color_slot[idx] = (int)vinfo->num_attribs;
}
- if (lpfs->info.base.input_semantic_name[i] == TGSI_SEMANTIC_FACE) {
- llvmpipe->face_slot = vinfo->num_attribs;
- draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_CONSTANT, vs_index);
- } else if (lpfs->info.base.input_semantic_name[i] == TGSI_SEMANTIC_PRIMID) {
- draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_CONSTANT, vs_index);
+ if (fsInfo->input_semantic_name[i] == TGSI_SEMANTIC_FACE) {
+ llvmpipe->face_slot = (int)vinfo->num_attribs;
+ draw_emit_vertex_attr(vinfo, EMIT_4F, vs_index);
/*
* For vp index and layer, if the fs requires them but the vs doesn't
- * provide them, store the slot - we'll later replace the data directly
- * with zero (as required by ARB_fragment_layer_viewport). This is
- * because draw itself just redirects them to whatever was at output 0.
- * We'll also store the real vpindex/layer slot for setup use.
+ * provide them, draw (vbuf) will give us the required 0 (slot -1).
+ * (This means in this case we'll also use those slots in setup, which
+ * isn't necessary but they'll contain the correct (0) value.)
*/
- } else if (lpfs->info.base.input_semantic_name[i] ==
+ } else if (fsInfo->input_semantic_name[i] ==
TGSI_SEMANTIC_VIEWPORT_INDEX) {
- if (vs_index >= 0) {
- llvmpipe->viewport_index_slot = vinfo->num_attribs;
- }
- else {
- llvmpipe->fake_vpindex_slot = vinfo->num_attribs;
- }
- draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_CONSTANT, vs_index);
- } else if (lpfs->info.base.input_semantic_name[i] == TGSI_SEMANTIC_LAYER) {
- if (vs_index >= 0) {
- llvmpipe->layer_slot = vinfo->num_attribs;
- }
- else {
- llvmpipe->fake_layer_slot = vinfo->num_attribs;
- }
- draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_CONSTANT, vs_index);
+ llvmpipe->viewport_index_slot = (int)vinfo->num_attribs;
+ draw_emit_vertex_attr(vinfo, EMIT_4F, vs_index);
+ } else if (fsInfo->input_semantic_name[i] == TGSI_SEMANTIC_LAYER) {
+ llvmpipe->layer_slot = (int)vinfo->num_attribs;
+ draw_emit_vertex_attr(vinfo, EMIT_4F, vs_index);
} else {
/*
- * Emit the requested fs attribute for all but position.
+ * Note that we'd actually want to skip position (as we won't use
+ * the attribute in the fs) but can't. The reason is that we don't
+ * actually have a input/output map for setup (even though it looks
+ * like we do...). Could adjust for this though even without a map
+ * (in llvmpipe_create_fs_state()).
*/
- draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_PERSPECTIVE, vs_index);
+ draw_emit_vertex_attr(vinfo, EMIT_4F, vs_index);
}
}
@@ -137,8 +131,8 @@ compute_vertex_info(struct llvmpipe_context *llvmpipe)
TGSI_SEMANTIC_BCOLOR, i);
if (vs_index >= 0) {
- llvmpipe->bcolor_slot[i] = vinfo->num_attribs;
- draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_PERSPECTIVE, vs_index);
+ llvmpipe->bcolor_slot[i] = (int)vinfo->num_attribs;
+ draw_emit_vertex_attr(vinfo, EMIT_4F, vs_index);
}
}
@@ -148,29 +142,29 @@ compute_vertex_info(struct llvmpipe_context *llvmpipe)
TGSI_SEMANTIC_PSIZE, 0);
if (vs_index >= 0) {
- llvmpipe->psize_slot = vinfo->num_attribs;
- draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_CONSTANT, vs_index);
+ llvmpipe->psize_slot = (int)vinfo->num_attribs;
+ draw_emit_vertex_attr(vinfo, EMIT_4F, vs_index);
}
/* Figure out if we need viewport index (if it wasn't already in fs input) */
- if (llvmpipe->viewport_index_slot == 0) {
+ if (llvmpipe->viewport_index_slot < 0) {
vs_index = draw_find_shader_output(llvmpipe->draw,
TGSI_SEMANTIC_VIEWPORT_INDEX,
0);
if (vs_index >= 0) {
- llvmpipe->viewport_index_slot = vinfo->num_attribs;
- draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_CONSTANT, vs_index);
+ llvmpipe->viewport_index_slot =(int)vinfo->num_attribs;
+ draw_emit_vertex_attr(vinfo, EMIT_4F, vs_index);
}
}
/* Figure out if we need layer (if it wasn't already in fs input) */
- if (llvmpipe->layer_slot == 0) {
+ if (llvmpipe->layer_slot < 0) {
vs_index = draw_find_shader_output(llvmpipe->draw,
TGSI_SEMANTIC_LAYER,
0);
if (vs_index >= 0) {
- llvmpipe->layer_slot = vinfo->num_attribs;
- draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_CONSTANT, vs_index);
+ llvmpipe->layer_slot = (int)vinfo->num_attribs;
+ draw_emit_vertex_attr(vinfo, EMIT_4F, vs_index);
}
}
@@ -197,10 +191,9 @@ void llvmpipe_update_derived( struct llvmpipe_context *llvmpipe )
llvmpipe->dirty |= LP_NEW_SAMPLER_VIEW;
}
- if (llvmpipe->dirty & (LP_NEW_RASTERIZER |
- LP_NEW_FS |
+ if (llvmpipe->dirty & (LP_NEW_FS |
LP_NEW_VS))
- compute_vertex_info( llvmpipe );
+ compute_vertex_info(llvmpipe);
if (llvmpipe->dirty & (LP_NEW_FS |
LP_NEW_FRAMEBUFFER |
diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c
index 079083e9601..83ff97659fb 100644
--- a/src/gallium/drivers/llvmpipe/lp_state_fs.c
+++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c
@@ -2695,34 +2695,35 @@ llvmpipe_create_fs_state(struct pipe_context *pipe,
switch (shader->info.base.input_interpolate[i]) {
case TGSI_INTERPOLATE_CONSTANT:
- shader->inputs[i].interp = LP_INTERP_CONSTANT;
- break;
+ shader->inputs[i].interp = LP_INTERP_CONSTANT;
+ break;
case TGSI_INTERPOLATE_LINEAR:
- shader->inputs[i].interp = LP_INTERP_LINEAR;
- break;
+ shader->inputs[i].interp = LP_INTERP_LINEAR;
+ break;
case TGSI_INTERPOLATE_PERSPECTIVE:
- shader->inputs[i].interp = LP_INTERP_PERSPECTIVE;
- break;
+ shader->inputs[i].interp = LP_INTERP_PERSPECTIVE;
+ break;
case TGSI_INTERPOLATE_COLOR:
- shader->inputs[i].interp = LP_INTERP_COLOR;
- break;
+ shader->inputs[i].interp = LP_INTERP_COLOR;
+ break;
default:
- assert(0);
- break;
+ assert(0);
+ break;
}
switch (shader->info.base.input_semantic_name[i]) {
case TGSI_SEMANTIC_FACE:
- shader->inputs[i].interp = LP_INTERP_FACING;
- break;
+ shader->inputs[i].interp = LP_INTERP_FACING;
+ break;
case TGSI_SEMANTIC_POSITION:
- /* Position was already emitted above
- */
- shader->inputs[i].interp = LP_INTERP_POSITION;
- shader->inputs[i].src_index = 0;
- continue;
+ /* Position was already emitted above
+ */
+ shader->inputs[i].interp = LP_INTERP_POSITION;
+ shader->inputs[i].src_index = 0;
+ continue;
}
+ /* XXX this is a completely pointless index map... */
shader->inputs[i].src_index = i+1;
}
diff --git a/src/gallium/drivers/llvmpipe/lp_state_setup.c b/src/gallium/drivers/llvmpipe/lp_state_setup.c
index d7ba5c8ad8e..6a4fbbbf202 100644
--- a/src/gallium/drivers/llvmpipe/lp_state_setup.c
+++ b/src/gallium/drivers/llvmpipe/lp_state_setup.c
@@ -372,9 +372,9 @@ load_attribute(struct gallivm_state *gallivm,
/* Potentially modify it according to twoside, etc:
*/
if (key->twoside) {
- if (vert_attr == key->color_slot && key->bcolor_slot > 0)
+ if (vert_attr == key->color_slot && key->bcolor_slot >= 0)
lp_twoside(gallivm, args, key, key->bcolor_slot, attribv);
- else if (vert_attr == key->spec_slot && key->bspec_slot > 0)
+ else if (vert_attr == key->spec_slot && key->bspec_slot >= 0)
lp_twoside(gallivm, args, key, key->bspec_slot, attribv);
}
}
@@ -602,13 +602,6 @@ emit_tri_coef( struct gallivm_state *gallivm,
*/
break;
- case LP_INTERP_ZERO:
- /*
- * The information we get from the output is bogus, replace it
- * with zero.
- */
- emit_constant_coef4(gallivm, args, slot+1, args->bld.zero);
- break;
case LP_INTERP_FACING:
emit_facing_coef(gallivm, args, slot+1);
break;
@@ -879,13 +872,7 @@ lp_make_setup_variant_key(struct llvmpipe_context *lp,
key->pad = 0;
memcpy(key->inputs, fs->inputs, key->num_inputs * sizeof key->inputs[0]);
for (i = 0; i < key->num_inputs; i++) {
- if (key->inputs[i].interp == LP_INTERP_CONSTANT) {
- if (key->inputs[i].src_index == lp->fake_vpindex_slot ||
- key->inputs[i].src_index == lp->fake_layer_slot) {
- key->inputs[i].interp = LP_INTERP_ZERO;
- }
- }
- else if (key->inputs[i].interp == LP_INTERP_COLOR) {
+ if (key->inputs[i].interp == LP_INTERP_COLOR) {
if (lp->rasterizer->flatshade)
key->inputs[i].interp = LP_INTERP_CONSTANT;
else
diff --git a/src/gallium/drivers/llvmpipe/lp_state_setup.h b/src/gallium/drivers/llvmpipe/lp_state_setup.h
index 6cee6fe5eb5..9ad244482de 100644
--- a/src/gallium/drivers/llvmpipe/lp_state_setup.h
+++ b/src/gallium/drivers/llvmpipe/lp_state_setup.h
@@ -17,10 +17,10 @@ struct lp_setup_variant_list_item
struct lp_setup_variant_key {
unsigned size:16;
unsigned num_inputs:8;
- unsigned color_slot:8;
- unsigned bcolor_slot:8;
- unsigned spec_slot:8;
- unsigned bspec_slot:8;
+ int color_slot:8;
+ int bcolor_slot:8;
+ int spec_slot:8;
+ int bspec_slot:8;
unsigned flatshade_first:1;
unsigned pixel_center_half:1;
unsigned twoside:1;
diff --git a/src/gallium/drivers/llvmpipe/lp_test_blend.c b/src/gallium/drivers/llvmpipe/lp_test_blend.c
index 7b19174f345..9139b83f05a 100644
--- a/src/gallium/drivers/llvmpipe/lp_test_blend.c
+++ b/src/gallium/drivers/llvmpipe/lp_test_blend.c
@@ -184,7 +184,7 @@ add_blend_test(struct gallivm_state *gallivm,
LLVMBuildStore(builder, res, res_ptr);
- LLVMBuildRetVoid(builder);;
+ LLVMBuildRetVoid(builder);
gallivm_verify_function(gallivm, func);
diff --git a/src/gallium/drivers/llvmpipe/lp_test_conv.c b/src/gallium/drivers/llvmpipe/lp_test_conv.c
index a30f35c8149..02a63193af5 100644
--- a/src/gallium/drivers/llvmpipe/lp_test_conv.c
+++ b/src/gallium/drivers/llvmpipe/lp_test_conv.c
@@ -140,7 +140,7 @@ add_conv_test(struct gallivm_state *gallivm,
LLVMBuildStore(builder, dst[i], ptr);
}
- LLVMBuildRetVoid(builder);;
+ LLVMBuildRetVoid(builder);
gallivm_verify_function(gallivm, func);
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir.h b/src/gallium/drivers/nouveau/codegen/nv50_ir.h
index d09a0ab0610..d1fdd75495f 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir.h
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir.h
@@ -390,6 +390,9 @@ enum SVSemantic
SV_VERTEX_STRIDE,
SV_INVOCATION_INFO,
SV_THREAD_KILL,
+ SV_BASEVERTEX,
+ SV_BASEINSTANCE,
+ SV_DRAWID,
SV_UNDEFINED,
SV_LAST
};
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h b/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h
index b49bf9d53bc..4504240ac5e 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h
@@ -124,6 +124,7 @@ struct nv50_ir_prog_info
union {
struct {
uint32_t inputMask[4]; /* mask of attributes read (1 bit per scalar) */
+ bool usesDrawParameters;
} vp;
struct {
uint8_t inputPatchSize;
@@ -160,8 +161,9 @@ struct nv50_ir_prog_info
uint8_t clipDistances; /* number of clip distance outputs */
uint8_t cullDistances; /* number of cull distance outputs */
int8_t genUserClip; /* request user clip planes for ClipVertex */
+ uint8_t auxCBSlot; /* constant buffer index of UCP/draw data */
uint16_t ucpBase; /* base address for UCPs */
- uint8_t ucpCBSlot; /* constant buffer index of UCP data */
+ uint16_t drawInfoBase; /* base address for draw parameters */
uint8_t pointSize; /* output index for PointSize */
uint8_t instanceId; /* system value index of InstanceID */
uint8_t vertexId; /* system value index of VertexID */
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp
index e9ddd366391..ec74e7ac811 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp
@@ -740,6 +740,7 @@ CodeEmitterGM107::emitF2F()
emitCC (0x2f);
emitField(0x2d, 1, (insn->op == OP_NEG) || insn->src(0).mod.neg());
emitFMZ (0x2c, 1);
+ emitField(0x29, 1, insn->subOp);
emitRND (0x27, rnd, 0x2a);
emitField(0x0a, 2, util_logbase2(typeSizeof(insn->sType)));
emitField(0x08, 2, util_logbase2(typeSizeof(insn->dType)));
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp
index 1d4f0d92f6b..0b28047e22b 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp
@@ -1030,7 +1030,10 @@ CodeEmitterNVC0::emitCVT(Instruction *i)
// for 8/16 source types, the byte/word is in subOp. word 1 is
// represented as 2.
- code[1] |= i->subOp << 0x17;
+ if (!isFloatType(i->sType))
+ code[1] |= i->subOp << 0x17;
+ else
+ code[1] |= i->subOp << 0x18;
if (sat)
code[0] |= 0x20;
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
index b23386040a7..7b313f3c39c 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
@@ -319,6 +319,10 @@ unsigned int Instruction::srcMask(unsigned int s) const
x |= 2;
return x;
}
+ case TGSI_OPCODE_PK2H:
+ return 0x3;
+ case TGSI_OPCODE_UP2H:
+ return 0x1;
default:
break;
}
@@ -348,7 +352,7 @@ static nv50_ir::DataFile translateFile(uint file)
case TGSI_FILE_PREDICATE: return nv50_ir::FILE_PREDICATE;
case TGSI_FILE_IMMEDIATE: return nv50_ir::FILE_IMMEDIATE;
case TGSI_FILE_SYSTEM_VALUE: return nv50_ir::FILE_SYSTEM_VALUE;
- case TGSI_FILE_RESOURCE: return nv50_ir::FILE_MEMORY_GLOBAL;
+ //case TGSI_FILE_RESOURCE: return nv50_ir::FILE_MEMORY_GLOBAL;
case TGSI_FILE_SAMPLER:
case TGSI_FILE_NULL:
default:
@@ -377,6 +381,9 @@ static nv50_ir::SVSemantic translateSysVal(uint sysval)
case TGSI_SEMANTIC_TESSINNER: return nv50_ir::SV_TESS_INNER;
case TGSI_SEMANTIC_VERTICESIN: return nv50_ir::SV_VERTEX_COUNT;
case TGSI_SEMANTIC_HELPER_INVOCATION: return nv50_ir::SV_THREAD_KILL;
+ case TGSI_SEMANTIC_BASEVERTEX: return nv50_ir::SV_BASEVERTEX;
+ case TGSI_SEMANTIC_BASEINSTANCE: return nv50_ir::SV_BASEINSTANCE;
+ case TGSI_SEMANTIC_DRAWID: return nv50_ir::SV_DRAWID;
default:
assert(0);
return nv50_ir::SV_CLOCK;
@@ -449,6 +456,7 @@ nv50_ir::DataType Instruction::inferSrcType() const
case TGSI_OPCODE_ATOMUMAX:
case TGSI_OPCODE_UBFE:
case TGSI_OPCODE_UMSB:
+ case TGSI_OPCODE_UP2H:
return nv50_ir::TYPE_U32;
case TGSI_OPCODE_I2F:
case TGSI_OPCODE_I2D:
@@ -513,10 +521,12 @@ nv50_ir::DataType Instruction::inferDstType() const
case TGSI_OPCODE_DSGE:
case TGSI_OPCODE_DSLT:
case TGSI_OPCODE_DSNE:
+ case TGSI_OPCODE_PK2H:
return nv50_ir::TYPE_U32;
case TGSI_OPCODE_I2F:
case TGSI_OPCODE_U2F:
case TGSI_OPCODE_D2F:
+ case TGSI_OPCODE_UP2H:
return nv50_ir::TYPE_F32;
case TGSI_OPCODE_I2D:
case TGSI_OPCODE_U2D:
@@ -861,7 +871,7 @@ bool Source::scanSource()
clipVertexOutput = -1;
textureViews.resize(scan.file_max[TGSI_FILE_SAMPLER_VIEW] + 1);
- resources.resize(scan.file_max[TGSI_FILE_RESOURCE] + 1);
+ //resources.resize(scan.file_max[TGSI_FILE_RESOURCE] + 1);
info->immd.bufSize = 0;
@@ -1128,6 +1138,11 @@ bool Source::scanDeclaration(const struct tgsi_full_declaration *decl)
case TGSI_SEMANTIC_SAMPLEPOS:
info->prop.fp.sampleInterp = 1;
break;
+ case TGSI_SEMANTIC_BASEVERTEX:
+ case TGSI_SEMANTIC_BASEINSTANCE:
+ case TGSI_SEMANTIC_DRAWID:
+ info->prop.vp.usesDrawParameters = true;
+ break;
default:
break;
}
@@ -1144,6 +1159,7 @@ bool Source::scanDeclaration(const struct tgsi_full_declaration *decl)
}
}
break;
+/*
case TGSI_FILE_RESOURCE:
for (i = first; i <= last; ++i) {
resources[i].target = decl->Resource.Resource;
@@ -1151,6 +1167,7 @@ bool Source::scanDeclaration(const struct tgsi_full_declaration *decl)
resources[i].slot = i;
}
break;
+*/
case TGSI_FILE_SAMPLER_VIEW:
for (i = first; i <= last; ++i)
textureViews[i].target = decl->SamplerView.Resource;
@@ -1216,11 +1233,13 @@ bool Source::scanInstruction(const struct tgsi_full_instruction *inst)
if (src.isIndirect(0))
mainTempsInLMem = true;
} else
+/*
if (src.getFile() == TGSI_FILE_RESOURCE) {
if (src.getIndex(0) == TGSI_RESOURCE_GLOBAL)
info->io.globalAccess |= (insn.getOpcode() == TGSI_OPCODE_LOAD) ?
0x1 : 0x2;
} else
+*/
if (src.getFile() == TGSI_FILE_OUTPUT) {
if (src.isIndirect(0)) {
// We don't know which one is accessed, just mark everything for
@@ -1271,9 +1290,11 @@ Instruction::getTexture(const tgsi::Source *code, int s) const
unsigned int r;
switch (getSrc(s).getFile()) {
+/*
case TGSI_FILE_RESOURCE:
r = getSrc(s).getIndex(0);
return translateTexture(code->resources.at(r).target);
+*/
case TGSI_FILE_SAMPLER_VIEW:
r = getSrc(s).getIndex(0);
return translateTexture(code->textureViews.at(r).target);
@@ -1639,8 +1660,6 @@ Converter::fetchSrc(tgsi::Instruction::SrcRegister src, int c, Value *ptr)
// don't load masked inputs, won't be assigned a slot
if (!ptr && !(info->in[idx].mask & (1 << swz)))
return loadImm(NULL, swz == TGSI_SWIZZLE_W ? 1.0f : 0.0f);
- if (!ptr && info->in[idx].sn == TGSI_SEMANTIC_FACE)
- return mkOp1v(OP_RDSV, TYPE_F32, getSSA(), mkSysVal(SV_FACE, 0));
return interpolate(src, c, shiftAddress(ptr));
} else
if (prog->getType() == Program::TYPE_GEOMETRY) {
@@ -1681,7 +1700,7 @@ Converter::acquireDst(int d, int c)
const int idx = dst.getIndex(0);
const int idx2d = dst.is2D() ? dst.getIndex(1) : 0;
- if (dst.isMasked(c) || f == TGSI_FILE_RESOURCE)
+ if (dst.isMasked(c)/* || f == TGSI_FILE_RESOURCE*/)
return NULL;
if (dst.isIndirect(0) ||
@@ -2799,6 +2818,21 @@ Converter::handleInstruction(const struct tgsi_full_instruction *insn)
FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi)
mkCvt(OP_CVT, dstTy, dst0[c], srcTy, fetchSrc(0, c));
break;
+ case TGSI_OPCODE_PK2H:
+ val0 = getScratch();
+ val1 = getScratch();
+ mkCvt(OP_CVT, TYPE_F16, val0, TYPE_F32, fetchSrc(0, 0));
+ mkCvt(OP_CVT, TYPE_F16, val1, TYPE_F32, fetchSrc(0, 1));
+ FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi)
+ mkOp3(OP_INSBF, TYPE_U32, dst0[c], val1, mkImm(0x1010), val0);
+ break;
+ case TGSI_OPCODE_UP2H:
+ src0 = fetchSrc(0, 0);
+ FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
+ geni = mkCvt(OP_CVT, TYPE_F32, dst0[c], TYPE_F16, src0);
+ geni->subOp = c & 1;
+ }
+ break;
case TGSI_OPCODE_EMIT:
/* export the saved viewport index */
if (viewport != NULL) {
@@ -3252,7 +3286,7 @@ Converter::handleUserClipPlanes()
for (c = 0; c < 4; ++c) {
for (i = 0; i < info->io.genUserClip; ++i) {
- Symbol *sym = mkSymbol(FILE_MEMORY_CONST, info->io.ucpCBSlot,
+ Symbol *sym = mkSymbol(FILE_MEMORY_CONST, info->io.auxCBSlot,
TYPE_F32, info->io.ucpBase + i * 16 + c * 4);
Value *ucp = mkLoadv(TYPE_F32, sym, NULL);
if (c == 0)
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
index e67bf3eca84..6530078b938 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
@@ -1576,6 +1576,17 @@ NVC0LoweringPass::handleRDSV(Instruction *i)
ld = bld.mkOp1(OP_PIXLD, TYPE_U32, i->getDef(0), bld.mkImm(0));
ld->subOp = NV50_IR_SUBOP_PIXLD_COVMASK;
break;
+ case SV_BASEVERTEX:
+ case SV_BASEINSTANCE:
+ case SV_DRAWID:
+ ld = bld.mkLoad(TYPE_U32, i->getDef(0),
+ bld.mkSymbol(FILE_MEMORY_CONST,
+ prog->driver->io.auxCBSlot,
+ TYPE_U32,
+ prog->driver->io.drawInfoBase +
+ 4 * (sv - SV_BASEVERTEX)),
+ NULL);
+ break;
default:
if (prog->getType() == Program::TYPE_TESSELLATION_EVAL && !i->perPatch)
vtx = bld.mkOp1v(OP_PFETCH, TYPE_U32, bld.getSSA(), bld.mkImm(0));
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
index c2842c2186f..f5c590eef10 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
@@ -676,23 +676,22 @@ ConstantFolding::expr(Instruction *i,
switch (i->op) {
case OP_MAD:
case OP_FMA: {
- i->op = OP_ADD;
+ ImmediateValue src0, src1 = *i->getSrc(0)->asImm();
- /* Move the immediate to the second arg, otherwise the ADD operation
- * won't be emittable
- */
- i->setSrc(1, i->getSrc(0));
+ // Move the immediate into position 1, where we know it might be
+ // emittable. However it might not be anyways, as there may be other
+ // restrictions, so move it into a separate LValue.
+ bld.setPosition(i, false);
+ i->op = OP_ADD;
+ i->setSrc(1, bld.mkMov(bld.getSSA(type), i->getSrc(0), type)->getDef(0));
i->setSrc(0, i->getSrc(2));
i->src(0).mod = i->src(2).mod;
i->setSrc(2, NULL);
- ImmediateValue src0;
if (i->src(0).getImmediate(src0))
- expr(i, src0, *i->getSrc(1)->asImm());
- if (i->saturate && !prog->getTarget()->isSatSupported(i)) {
- bld.setPosition(i, false);
- i->setSrc(1, bld.loadImm(NULL, res.data.u32));
- }
+ expr(i, src0, src1);
+ else
+ opnd(i, src1, 1);
break;
}
case OP_PFETCH:
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp
index 19637ce33f5..014c652eede 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp
@@ -295,6 +295,9 @@ TargetNVC0::getSVAddress(DataFile shaderFile, const Symbol *sym) const
case SV_SAMPLE_INDEX: return 0;
case SV_SAMPLE_POS: return 0;
case SV_SAMPLE_MASK: return 0;
+ case SV_BASEVERTEX: return 0;
+ case SV_BASEINSTANCE: return 0;
+ case SV_DRAWID: return 0;
default:
return 0xffffffff;
}
diff --git a/src/gallium/drivers/nouveau/nouveau_compiler.c b/src/gallium/drivers/nouveau/nouveau_compiler.c
index 670b0c8b135..cd44aa1e1d9 100644
--- a/src/gallium/drivers/nouveau/nouveau_compiler.c
+++ b/src/gallium/drivers/nouveau/nouveau_compiler.c
@@ -112,7 +112,7 @@ nouveau_codegen(int chipset, int type, struct tgsi_token tokens[],
info.bin.sourceRep = NV50_PROGRAM_IR_TGSI;
info.bin.source = tokens;
- info.io.ucpCBSlot = 15;
+ info.io.auxCBSlot = 15;
info.io.ucpBase = NV50_CB_AUX_UCP_OFFSET;
info.io.resInfoCBSlot = 15;
diff --git a/src/gallium/drivers/nouveau/nouveau_vp3_video.h b/src/gallium/drivers/nouveau/nouveau_vp3_video.h
index 58df5ee847f..809e971a678 100644
--- a/src/gallium/drivers/nouveau/nouveau_vp3_video.h
+++ b/src/gallium/drivers/nouveau/nouveau_vp3_video.h
@@ -114,6 +114,11 @@ struct nouveau_vp3_decoder {
unsigned fence_seq, fw_sizes, last_frame_num, tmp_stride, ref_stride;
unsigned bsp_idx, vp_idx, ppp_idx;
+
+ /* End of the bsp bo where new data should be appended between one begin/end
+ * frame.
+ */
+ char *bsp_ptr;
};
struct comm {
@@ -208,11 +213,15 @@ nouveau_vp3_load_firmware(struct nouveau_vp3_decoder *dec,
enum pipe_video_profile profile,
unsigned chipset);
+void
+nouveau_vp3_bsp_begin(struct nouveau_vp3_decoder *dec);
+
+void
+nouveau_vp3_bsp_next(struct nouveau_vp3_decoder *dec, unsigned num_buffers,
+ const void *const *data, const unsigned *num_bytes);
+
uint32_t
-nouveau_vp3_bsp(struct nouveau_vp3_decoder *dec, union pipe_desc desc,
- struct nouveau_vp3_video_buffer *target,
- unsigned comm_seq, unsigned num_buffers,
- const void *const *data, const unsigned *num_bytes);
+nouveau_vp3_bsp_end(struct nouveau_vp3_decoder *dec, union pipe_desc desc);
void
nouveau_vp3_vp_caps(struct nouveau_vp3_decoder *dec, union pipe_desc desc,
diff --git a/src/gallium/drivers/nouveau/nouveau_vp3_video_bsp.c b/src/gallium/drivers/nouveau/nouveau_vp3_video_bsp.c
index 692772e49d1..a3d07deeb18 100644
--- a/src/gallium/drivers/nouveau/nouveau_vp3_video_bsp.c
+++ b/src/gallium/drivers/nouveau/nouveau_vp3_video_bsp.c
@@ -230,20 +230,58 @@ nouveau_vp3_fill_picparm_h264_bsp(struct nouveau_vp3_decoder *dec,
return caps | 3;
}
+static inline struct strparm_bsp *strparm_bsp(struct nouveau_vp3_decoder *dec)
+{
+ unsigned comm_seq = dec->fence_seq;
+ struct nouveau_bo *bsp_bo = dec->bsp_bo[comm_seq % NOUVEAU_VP3_VIDEO_QDEPTH];
+ return (struct strparm_bsp *)(bsp_bo->map + 0x100);
+}
+
+void
+nouveau_vp3_bsp_begin(struct nouveau_vp3_decoder *dec)
+{
+ struct strparm_bsp *str_bsp = strparm_bsp(dec);
+
+ dec->bsp_ptr = (void *)str_bsp;
+ memset(str_bsp, 0, 0x80);
+ dec->bsp_ptr += 0x100;
+ /* Reserved for picparm_vp */
+ dec->bsp_ptr += 0x300;
+ /* Reserved for comm */
+#if !NOUVEAU_VP3_DEBUG_FENCE
+ memset(dec->bsp_ptr, 0, 0x200);
+#endif
+ dec->bsp_ptr += 0x200;
+}
+
+void
+nouveau_vp3_bsp_next(struct nouveau_vp3_decoder *dec, unsigned num_buffers,
+ const void *const *data, const unsigned *num_bytes)
+{
+#ifndef NDEBUG
+ unsigned comm_seq = dec->fence_seq;
+ struct nouveau_bo *bsp_bo = dec->bsp_bo[comm_seq % NOUVEAU_VP3_VIDEO_QDEPTH];
+#endif
+ struct strparm_bsp *str_bsp = strparm_bsp(dec);
+ int i;
+
+ for (i = 0; i < num_buffers; ++i) {
+ assert(bsp_bo->size >= str_bsp->w0[0] + num_bytes[i]);
+ memcpy(dec->bsp_ptr, data[i], num_bytes[i]);
+ dec->bsp_ptr += num_bytes[i];
+ str_bsp->w0[0] += num_bytes[i];
+ }
+}
+
uint32_t
-nouveau_vp3_bsp(struct nouveau_vp3_decoder *dec, union pipe_desc desc,
- struct nouveau_vp3_video_buffer *target,
- unsigned comm_seq, unsigned num_buffers,
- const void *const *data, const unsigned *num_bytes)
+nouveau_vp3_bsp_end(struct nouveau_vp3_decoder *dec, union pipe_desc desc)
{
enum pipe_video_format codec = u_reduce_video_profile(dec->base.profile);
+ unsigned comm_seq = dec->fence_seq;
struct nouveau_bo *bsp_bo = dec->bsp_bo[comm_seq % NOUVEAU_VP3_VIDEO_QDEPTH];
- char *bsp;
uint32_t endmarker, caps;
- struct strparm_bsp *str_bsp;
- int i;
-
- bsp = bsp_bo->map;
+ struct strparm_bsp *str_bsp = strparm_bsp(dec);
+ char *bsp = bsp_bo->map;
/*
* 0x000..0x100: picparm_bsp
* 0x200..0x500: picparm_vp
@@ -277,34 +315,21 @@ nouveau_vp3_bsp(struct nouveau_vp3_decoder *dec, union pipe_desc desc,
caps |= 1 << 17; // enable watchdog
caps |= 0 << 18; // do not report error to VP, so it can continue decoding what we have
caps |= 0 << 19; // if enabled, use crypto crap?
- bsp += 0x100;
- str_bsp = (struct strparm_bsp *)bsp;
- memset(str_bsp, 0, 0x80);
- str_bsp->w0[0] = 16;
+ str_bsp = strparm_bsp(dec);
str_bsp->w1[0] = 0x1;
- bsp += 0x100;
- /* Reserved for picparm_vp */
- bsp += 0x300;
- /* Reserved for comm */
-#if !NOUVEAU_VP3_DEBUG_FENCE
- memset(bsp, 0, 0x200);
-#endif
- bsp += 0x200;
- for (i = 0; i < num_buffers; ++i) {
- memcpy(bsp, data[i], num_bytes[i]);
- bsp += num_bytes[i];
- str_bsp->w0[0] += num_bytes[i];
- }
/* Append end sequence */
- *(uint32_t *)bsp = endmarker;
- bsp += 4;
- *(uint32_t *)bsp = 0x00000000;
- bsp += 4;
- *(uint32_t *)bsp = endmarker;
- bsp += 4;
- *(uint32_t *)bsp = 0x00000000;
+ *(uint32_t *)dec->bsp_ptr = endmarker;
+ dec->bsp_ptr += 4;
+ *(uint32_t *)dec->bsp_ptr = 0x00000000;
+ dec->bsp_ptr += 4;
+ *(uint32_t *)dec->bsp_ptr = endmarker;
+ dec->bsp_ptr += 4;
+ *(uint32_t *)dec->bsp_ptr = 0x00000000;
+ str_bsp->w0[0] += 16;
+
+ dec->bsp_ptr = NULL;
return caps;
}
diff --git a/src/gallium/drivers/nouveau/nv30/nv30_draw.c b/src/gallium/drivers/nouveau/nv30/nv30_draw.c
index 098d6e499fa..7b0d0745766 100644
--- a/src/gallium/drivers/nouveau/nv30/nv30_draw.c
+++ b/src/gallium/drivers/nouveau/nv30/nv30_draw.c
@@ -208,17 +208,16 @@ nv30_render_release_vertices(struct vbuf_render *render)
static const struct {
unsigned emit;
- unsigned interp;
unsigned vp30;
unsigned vp40;
unsigned ow40;
} vroute [] = {
- [TGSI_SEMANTIC_POSITION] = { EMIT_4F, INTERP_PERSPECTIVE, 0, 0, 0x00000000 },
- [TGSI_SEMANTIC_COLOR ] = { EMIT_4F, INTERP_LINEAR , 3, 1, 0x00000001 },
- [TGSI_SEMANTIC_BCOLOR ] = { EMIT_4F, INTERP_LINEAR , 1, 3, 0x00000004 },
- [TGSI_SEMANTIC_FOG ] = { EMIT_4F, INTERP_PERSPECTIVE, 5, 5, 0x00000010 },
- [TGSI_SEMANTIC_PSIZE ] = { EMIT_1F_PSIZE, INTERP_POS , 6, 6, 0x00000020 },
- [TGSI_SEMANTIC_TEXCOORD] = { EMIT_4F, INTERP_PERSPECTIVE, 8, 7, 0x00004000 },
+ [TGSI_SEMANTIC_POSITION] = { EMIT_4F, 0, 0, 0x00000000 },
+ [TGSI_SEMANTIC_COLOR ] = { EMIT_4F, 3, 1, 0x00000001 },
+ [TGSI_SEMANTIC_BCOLOR ] = { EMIT_4F, 1, 3, 0x00000004 },
+ [TGSI_SEMANTIC_FOG ] = { EMIT_4F, 5, 5, 0x00000010 },
+ [TGSI_SEMANTIC_PSIZE ] = { EMIT_1F_PSIZE, 6, 6, 0x00000020 },
+ [TGSI_SEMANTIC_TEXCOORD] = { EMIT_4F, 8, 7, 0x00004000 },
};
static bool
@@ -247,7 +246,7 @@ vroute_add(struct nv30_render *r, uint attrib, uint sem, uint *idx)
if (emit == EMIT_OMIT)
return false;
- draw_emit_vertex_attr(vinfo, emit, vroute[sem].interp, attrib);
+ draw_emit_vertex_attr(vinfo, emit, attrib);
format = draw_translate_vinfo_format(emit);
r->vtxfmt[attrib] = nv30_vtxfmt(&screen->base.base, format)->hw;
diff --git a/src/gallium/drivers/nouveau/nv30/nv30_screen.c b/src/gallium/drivers/nouveau/nv30/nv30_screen.c
index 854f70cf34c..d9c940232c4 100644
--- a/src/gallium/drivers/nouveau/nv30/nv30_screen.c
+++ b/src/gallium/drivers/nouveau/nv30/nv30_screen.c
@@ -157,6 +157,8 @@ nv30_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
case PIPE_CAP_USER_VERTEX_BUFFERS:
case PIPE_CAP_COMPUTE:
case PIPE_CAP_DRAW_INDIRECT:
+ case PIPE_CAP_MULTI_DRAW_INDIRECT:
+ case PIPE_CAP_MULTI_DRAW_INDIRECT_PARAMS:
case PIPE_CAP_TGSI_FS_FINE_DERIVATIVE:
case PIPE_CAP_CONDITIONAL_RENDER_INVERTED:
case PIPE_CAP_SAMPLER_VIEW_TARGET:
@@ -174,6 +176,11 @@ nv30_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
case PIPE_CAP_SHAREABLE_SHADERS:
case PIPE_CAP_COPY_BETWEEN_COMPRESSED_AND_PLAIN_FORMATS:
case PIPE_CAP_CLEAR_TEXTURE:
+ case PIPE_CAP_DRAW_PARAMETERS:
+ case PIPE_CAP_TGSI_PACK_HALF_FLOAT:
+ case PIPE_CAP_TGSI_FS_POSITION_IS_SYSVAL:
+ case PIPE_CAP_TGSI_FS_FACE_IS_INTEGER_SYSVAL:
+ case PIPE_CAP_SHADER_BUFFER_OFFSET_ALIGNMENT:
return 0;
case PIPE_CAP_VENDOR_ID:
@@ -265,6 +272,7 @@ nv30_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader,
case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED:
case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE:
+ case PIPE_SHADER_CAP_MAX_SHADER_BUFFERS:
return 0;
case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT:
return 32;
@@ -308,6 +316,7 @@ nv30_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader,
case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED:
case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE:
+ case PIPE_SHADER_CAP_MAX_SHADER_BUFFERS:
return 0;
case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT:
return 32;
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_context.h b/src/gallium/drivers/nouveau/nv50/nv50_context.h
index 2cebcd99423..712d00ed2d3 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_context.h
+++ b/src/gallium/drivers/nouveau/nv50/nv50_context.h
@@ -134,9 +134,11 @@ struct nv50_context {
struct nv50_constbuf constbuf[3][NV50_MAX_PIPE_CONSTBUFS];
uint16_t constbuf_dirty[3];
uint16_t constbuf_valid[3];
+ uint16_t constbuf_coherent[3];
struct pipe_vertex_buffer vtxbuf[PIPE_MAX_ATTRIBS];
unsigned num_vtxbufs;
+ uint32_t vtxbufs_coherent;
struct pipe_index_buffer idxbuf;
uint32_t vbo_fifo; /* bitmask of vertex elements to be pushed to FIFO */
uint32_t vbo_user; /* bitmask of vertex buffers pointing to user memory */
@@ -148,6 +150,7 @@ struct nv50_context {
struct pipe_sampler_view *textures[3][PIPE_MAX_SAMPLERS];
unsigned num_textures[3];
+ uint32_t textures_coherent[3];
struct nv50_tsc_entry *samplers[3][PIPE_MAX_SAMPLERS];
unsigned num_samplers[3];
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_program.c b/src/gallium/drivers/nouveau/nv50/nv50_program.c
index a4b8ddfda95..888d62e1c52 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_program.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_program.c
@@ -148,7 +148,6 @@ nv50_fragprog_assign_slots(struct nv50_ir_prog_info *info)
for (m = 0, i = 0; i < info->numInputs; ++i) {
switch (info->in[i].sn) {
case TGSI_SEMANTIC_POSITION:
- case TGSI_SEMANTIC_FACE:
continue;
default:
m += info->in[i].flat ? 0 : 1;
@@ -166,9 +165,6 @@ nv50_fragprog_assign_slots(struct nv50_ir_prog_info *info)
for (c = 0; c < 4; ++c)
if (info->in[i].mask & (1 << c))
info->in[i].slot[c] = nintp++;
- } else
- if (info->in[i].sn == TGSI_SEMANTIC_FACE) {
- info->in[i].slot[0] = 255;
} else {
unsigned j = info->in[i].flat ? m++ : n++;
@@ -335,7 +331,7 @@ nv50_program_translate(struct nv50_program *prog, uint16_t chipset,
info->bin.sourceRep = NV50_PROGRAM_IR_TGSI;
info->bin.source = (void *)prog->pipe.tokens;
- info->io.ucpCBSlot = 15;
+ info->io.auxCBSlot = 15;
info->io.ucpBase = NV50_CB_AUX_UCP_OFFSET;
info->io.genUserClip = prog->vp.clpd_nr;
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_screen.c b/src/gallium/drivers/nouveau/nv50/nv50_screen.c
index 272e1d45bff..56c67e0ddfb 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_screen.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_screen.c
@@ -182,6 +182,7 @@ nv50_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
case PIPE_CAP_SHAREABLE_SHADERS:
case PIPE_CAP_CLEAR_TEXTURE:
case PIPE_CAP_COMPUTE:
+ case PIPE_CAP_TGSI_FS_FACE_IS_INTEGER_SYSVAL:
return 1;
case PIPE_CAP_SEAMLESS_CUBE_MAP:
return 1; /* class_3d >= NVA0_3D_CLASS; */
@@ -212,11 +213,17 @@ nv50_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
case PIPE_CAP_TEXTURE_GATHER_OFFSETS:
case PIPE_CAP_TGSI_VS_WINDOW_SPACE_POSITION:
case PIPE_CAP_DRAW_INDIRECT:
+ case PIPE_CAP_MULTI_DRAW_INDIRECT:
+ case PIPE_CAP_MULTI_DRAW_INDIRECT_PARAMS:
case PIPE_CAP_VERTEXID_NOBASE:
case PIPE_CAP_MULTISAMPLE_Z_RESOLVE: /* potentially supported on some hw */
case PIPE_CAP_RESOURCE_FROM_USER_MEMORY:
case PIPE_CAP_DEVICE_RESET_STATUS_QUERY:
case PIPE_CAP_MAX_SHADER_PATCH_VARYINGS:
+ case PIPE_CAP_DRAW_PARAMETERS:
+ case PIPE_CAP_TGSI_PACK_HALF_FLOAT:
+ case PIPE_CAP_TGSI_FS_POSITION_IS_SYSVAL:
+ case PIPE_CAP_SHADER_BUFFER_OFFSET_ALIGNMENT:
return 0;
case PIPE_CAP_VENDOR_ID:
@@ -300,6 +307,7 @@ nv50_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader,
case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED:
case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE:
+ case PIPE_SHADER_CAP_MAX_SHADER_BUFFERS:
return 0;
case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT:
return 32;
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_state.c b/src/gallium/drivers/nouveau/nv50/nv50_state.c
index de655971b66..cb040439139 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_state.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_state.c
@@ -664,6 +664,17 @@ nv50_stage_set_sampler_views(struct nv50_context *nv50, int s,
if (old)
nv50_screen_tic_unlock(nv50->screen, old);
+ if (views[i] && views[i]->texture) {
+ struct pipe_resource *res = views[i]->texture;
+ if (res->target == PIPE_BUFFER &&
+ (res->flags & PIPE_RESOURCE_FLAG_MAP_COHERENT))
+ nv50->textures_coherent[s] |= 1 << i;
+ else
+ nv50->textures_coherent[s] &= ~(1 << i);
+ } else {
+ nv50->textures_coherent[s] &= ~(1 << i);
+ }
+
pipe_sampler_view_reference(&nv50->textures[s][i], views[i]);
}
@@ -847,13 +858,19 @@ nv50_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index,
nv50->constbuf[s][i].u.data = cb->user_buffer;
nv50->constbuf[s][i].size = MIN2(cb->buffer_size, 0x10000);
nv50->constbuf_valid[s] |= 1 << i;
+ nv50->constbuf_coherent[s] &= ~(1 << i);
} else
if (res) {
nv50->constbuf[s][i].offset = cb->buffer_offset;
nv50->constbuf[s][i].size = MIN2(align(cb->buffer_size, 0x100), 0x10000);
nv50->constbuf_valid[s] |= 1 << i;
+ if (res->flags & PIPE_RESOURCE_FLAG_MAP_COHERENT)
+ nv50->constbuf_coherent[s] |= 1 << i;
+ else
+ nv50->constbuf_coherent[s] &= ~(1 << i);
} else {
nv50->constbuf_valid[s] &= ~(1 << i);
+ nv50->constbuf_coherent[s] &= ~(1 << i);
}
nv50->constbuf_dirty[s] |= 1 << i;
@@ -1003,6 +1020,7 @@ nv50_set_vertex_buffers(struct pipe_context *pipe,
if (!vb) {
nv50->vbo_user &= ~(((1ull << count) - 1) << start_slot);
nv50->vbo_constant &= ~(((1ull << count) - 1) << start_slot);
+ nv50->vtxbufs_coherent &= ~(((1ull << count) - 1) << start_slot);
return;
}
@@ -1015,9 +1033,16 @@ nv50_set_vertex_buffers(struct pipe_context *pipe,
nv50->vbo_constant |= 1 << dst_index;
else
nv50->vbo_constant &= ~(1 << dst_index);
+ nv50->vtxbufs_coherent &= ~(1 << dst_index);
} else {
nv50->vbo_user &= ~(1 << dst_index);
nv50->vbo_constant &= ~(1 << dst_index);
+
+ if (vb[i].buffer &&
+ vb[i].buffer->flags & PIPE_RESOURCE_FLAG_MAP_COHERENT)
+ nv50->vtxbufs_coherent |= (1 << dst_index);
+ else
+ nv50->vtxbufs_coherent &= ~(1 << dst_index);
}
}
}
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_vbo.c b/src/gallium/drivers/nouveau/nv50/nv50_vbo.c
index 7de2f1f1d0f..60fa2bc06a8 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_vbo.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_vbo.c
@@ -636,8 +636,8 @@ nv50_draw_elements(struct nv50_context *nv50, bool shorten,
BEGIN_NV04(push, NV50_3D(VERTEX_BEGIN_GL), 1);
PUSH_DATA (push, prim);
- PUSH_REFN(push, buf->bo, NOUVEAU_BO_RD | buf->domain);
nouveau_pushbuf_space(push, 8, 0, 1);
+ PUSH_REFN(push, buf->bo, NOUVEAU_BO_RD | buf->domain);
switch (index_size) {
case 4:
@@ -765,7 +765,7 @@ nv50_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
struct nv50_context *nv50 = nv50_context(pipe);
struct nouveau_pushbuf *push = nv50->base.pushbuf;
bool tex_dirty = false;
- int i, s;
+ int s;
/* NOTE: caller must ensure that (min_index + index_bias) is >= 0 */
nv50->vb_elt_first = info->min_index + info->index_bias;
@@ -794,27 +794,9 @@ nv50_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
push->kick_notify = nv50_draw_vbo_kick_notify;
- /* TODO: Instead of iterating over all the buffer resources looking for
- * coherent buffers, keep track of a context-wide count.
- */
for (s = 0; s < 3 && !nv50->cb_dirty; ++s) {
- uint32_t valid = nv50->constbuf_valid[s];
-
- while (valid && !nv50->cb_dirty) {
- const unsigned i = ffs(valid) - 1;
- struct pipe_resource *res;
-
- valid &= ~(1 << i);
- if (nv50->constbuf[s][i].user)
- continue;
-
- res = nv50->constbuf[s][i].u.buf;
- if (!res)
- continue;
-
- if (res->flags & PIPE_RESOURCE_FLAG_MAP_COHERENT)
- nv50->cb_dirty = true;
- }
+ if (nv50->constbuf_coherent[s])
+ nv50->cb_dirty = true;
}
/* If there are any coherent constbufs, flush the cache */
@@ -825,15 +807,10 @@ nv50_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
}
for (s = 0; s < 3 && !tex_dirty; ++s) {
- for (i = 0; i < nv50->num_textures[s] && !tex_dirty; ++i) {
- if (!nv50->textures[s][i] ||
- nv50->textures[s][i]->texture->target != PIPE_BUFFER)
- continue;
- if (nv50->textures[s][i]->texture->flags &
- PIPE_RESOURCE_FLAG_MAP_COHERENT)
- tex_dirty = true;
- }
+ if (nv50->textures_coherent[s])
+ tex_dirty = true;
}
+
if (tex_dirty) {
BEGIN_NV04(push, NV50_3D(TEX_CACHE_CTL), 1);
PUSH_DATA (push, 0x20);
@@ -853,12 +830,7 @@ nv50_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
PUSH_DATA (push, info->start_instance);
}
- for (i = 0; i < nv50->num_vtxbufs && !nv50->base.vbo_dirty; ++i) {
- if (!nv50->vtxbuf[i].buffer)
- continue;
- if (nv50->vtxbuf[i].buffer->flags & PIPE_RESOURCE_FLAG_MAP_COHERENT)
- nv50->base.vbo_dirty = true;
- }
+ nv50->base.vbo_dirty |= !!nv50->vtxbufs_coherent;
if (nv50->base.vbo_dirty) {
BEGIN_NV04(push, NV50_3D(VERTEX_ARRAY_FLUSH), 1);
diff --git a/src/gallium/drivers/nouveau/nv50/nv98_video_bsp.c b/src/gallium/drivers/nouveau/nv50/nv98_video_bsp.c
index dbde1bfcebe..4fe0e05c96b 100644
--- a/src/gallium/drivers/nouveau/nv50/nv98_video_bsp.c
+++ b/src/gallium/drivers/nouveau/nv50/nv98_video_bsp.c
@@ -77,7 +77,7 @@ nv98_decoder_bsp(struct nouveau_vp3_decoder *dec, union pipe_desc desc,
bsp_size += (1 << 20) - 1;
bsp_size &= ~((1 << 20) - 1);
- ret = nouveau_bo_new(dec->bitplane_bo->device, NOUVEAU_BO_VRAM, 0, bsp_size, NULL, &tmp_bo);
+ ret = nouveau_bo_new(dec->client->device, NOUVEAU_BO_VRAM, 0, bsp_size, NULL, &tmp_bo);
if (ret) {
debug_printf("reallocating bsp %u -> %u failed with %i\n",
bsp_bo ? (unsigned)bsp_bo->size : 0, bsp_size, ret);
@@ -90,7 +90,7 @@ nv98_decoder_bsp(struct nouveau_vp3_decoder *dec, union pipe_desc desc,
if (!inter_bo || bsp_bo->size * 4 > inter_bo->size) {
struct nouveau_bo *tmp_bo = NULL;
- ret = nouveau_bo_new(dec->bitplane_bo->device, NOUVEAU_BO_VRAM, 0, bsp_bo->size * 4, NULL, &tmp_bo);
+ ret = nouveau_bo_new(dec->client->device, NOUVEAU_BO_VRAM, 0, bsp_bo->size * 4, NULL, &tmp_bo);
if (ret) {
debug_printf("reallocating inter %u -> %u failed with %i\n",
inter_bo ? (unsigned)inter_bo->size : 0, (unsigned)bsp_bo->size * 4, ret);
@@ -106,8 +106,9 @@ nv98_decoder_bsp(struct nouveau_vp3_decoder *dec, union pipe_desc desc,
return -1;
}
- caps = nouveau_vp3_bsp(dec, desc, target, comm_seq,
- num_buffers, data, num_bytes);
+ nouveau_vp3_bsp_begin(dec);
+ nouveau_vp3_bsp_next(dec, num_buffers, data, num_bytes);
+ caps = nouveau_vp3_bsp_end(dec, desc);
nouveau_vp3_vp_caps(dec, desc, target, comm_seq, vp_caps, is_ref, refs);
diff --git a/src/gallium/drivers/nouveau/nvc0/mme/com9097.mme b/src/gallium/drivers/nouveau/nvc0/mme/com9097.mme
index b2060d1fa53..4daa57d47bb 100644
--- a/src/gallium/drivers/nouveau/nvc0/mme/com9097.mme
+++ b/src/gallium/drivers/nouveau/nvc0/mme/com9097.mme
@@ -230,27 +230,43 @@ locn_0f_ts:
* Forcefully sets VERTEX_ID_BASE to the value of VB_ELEMENT_BASE.
*
* arg = mode
- * parm[0] = count
- * parm[1] = instance_count
- * parm[2] = start
- * parm[3] = index_bias
- * parm[4] = start_instance
+ * parm[0] = start_drawid
+ * parm[1] = numparams
+ * parm[2 + 5n + 0] = count
+ * parm[2 + 5n + 1] = instance_count
+ * parm[2 + 5n + 2] = start
+ * parm[2 + 5n + 3] = index_bias
+ * parm[2 + 5n + 4] = start_instance
+ *
+ * SCRATCH[0] = saved VB_ELEMENT_BASE
+ * SCRATCH[1] = saved VB_INSTANCE_BASE
*/
.section #mme9097_draw_elts_indirect
+ read $r6 0x50d /* VB_ELEMENT_BASE */
+ read $r7 0x50e /* VB_INSTANCE_BASE */
+ maddr 0x1d00
+ send $r6 /* SCRATCH[0] = VB_ELEMENT_BASE */
+ send $r7 /* SCRATCH[1] = VB_INSTANCE_BASE */
+ parm $r6 /* start_drawid */
+ parm $r7 /* numparams */
+dei_draw_again:
parm $r3 /* count */
parm $r2 /* instance_count */
parm $r4 maddr 0x5f7 /* INDEX_BATCH_FIRST, start */
parm $r4 send $r4 /* index_bias, send start */
+ maddr 0x18e3 /* CB_POS */
+ send 0x180 /* 256 + 128 */
braz $r2 #dei_end
- parm $r5 /* start_instance */
- read $r6 0x50d /* VB_ELEMENT_BASE */
- read $r7 0x50e /* VB_INSTANCE_BASE */
+ parm $r5 send $r4 /* start_instance, send index_bias */
+ send $r5 /* send start_instance */
+ send $r6 /* draw id */
maddr 0x150d /* VB_ELEMENT,INSTANCE_BASE */
send $r4
send $r5
maddr 0x446
send $r4
mov $r4 0x1
+ mov $r1 (extrinsrt $r1 0x0 0 1 26) /* clear INSTANCE_NEXT */
dei_again:
maddr 0x586 /* VERTEX_BEGIN_GL */
send $r1 /* mode */
@@ -260,46 +276,218 @@ dei_again:
maddrsend 0x585 /* VERTEX_END_GL */
branz $r2 #dei_again
mov $r1 (extrinsrt $r1 $r4 0 1 26) /* set INSTANCE_NEXT */
+dei_end:
+ mov $r7 (add $r7 -1)
+ branz $r7 #dei_draw_again
+ mov $r6 (add $r6 1)
+ read $r6 0xd00
+ read $r7 0xd01
maddr 0x150d /* VB_ELEMENT,INSTANCE_BASE */
send $r6
send $r7
exit maddr 0x446
send $r6
-dei_end:
- exit
- nop
/* NVC0_3D_MACRO_DRAW_ARRAYS_INDIRECT:
*
* NOTE: Saves and restores VB_INSTANCE_BASE.
*
* arg = mode
- * parm[0] = count
- * parm[1] = instance_count
- * parm[2] = start
- * parm[3] = start_instance
+ * parm[0] = start_drawid
+ * parm[1] = numparams
+ * parm[2 + 4n + 0] = count
+ * parm[2 + 4n + 1] = instance_count
+ * parm[2 + 4n + 2] = start
+ * parm[2 + 4n + 3] = start_instance
*/
.section #mme9097_draw_arrays_indirect
+ read $r5 0x50e /* VB_INSTANCE_BASE */
+ parm $r6 /* start_drawid */
+ parm $r7 /* numparams */
+dai_draw_again:
parm $r2 /* count */
parm $r3 /* instance_count */
parm $r4 maddr 0x35d /* VERTEX_BUFFER_FIRST, start */
- parm $r4 send $r4 /* start_instance */
braz $r3 #dai_end
- read $r6 0x50e /* VB_INSTANCE_BASE */
+ parm $r4 send $r4 /* start_instance */
+ maddr 0x18e3 /* CB_POS */
+ send 0x180 /* 256 + 128 */
+ send 0x0 /* send 0 as base_vertex */
+ send $r4 /* send start_instance */
+ send $r6 /* draw id */
maddr 0x50e /* VB_INSTANCE_BASE */
- mov $r5 0x1
send $r4
+ mov $r4 0x1
+ mov $r1 (extrinsrt $r1 0x0 0 1 26) /* clear INSTANCE_NEXT */
dai_again:
maddr 0x586 /* VERTEX_BEGIN_GL */
send $r1 /* mode */
maddr 0x35e /* VERTEX_BUFFER_COUNT */
send $r2
- mov $r3 (sub $r3 $r5)
+ mov $r3 (sub $r3 $r4)
maddrsend 0x585 /* VERTEX_END_GL */
branz $r3 #dai_again
- mov $r1 (extrinsrt $r1 $r5 0 1 26) /* set INSTANCE_NEXT */
+ mov $r1 (extrinsrt $r1 $r4 0 1 26) /* set INSTANCE_NEXT */
+dai_end:
+ mov $r7 (add $r7 -1)
+ branz $r7 #dai_draw_again
+ mov $r6 (add $r6 1)
exit maddr 0x50e /* VB_INSTANCE_BASE to restore */
+ send $r5
+
+/* NVC0_3D_MACRO_DRAW_ELEMENTS_INDIRECT_COUNT
+ *
+ * NOTE: Saves and restores VB_ELEMENT,INSTANCE_BASE.
+ * Forcefully sets VERTEX_ID_BASE to the value of VB_ELEMENT_BASE.
+ *
+ * arg = mode
+ * parm[0] = start_drawid
+ * parm[1] = numparams
+ * parm[2] = totaldraws
+ * parm[3 + 5n + 0] = count
+ * parm[3 + 5n + 1] = instance_count
+ * parm[3 + 5n + 2] = start
+ * parm[3 + 5n + 3] = index_bias
+ * parm[3 + 5n + 4] = start_instance
+ *
+ * SCRATCH[0] = saved VB_ELEMENT_BASE
+ * SCRATCH[1] = saved VB_INSTANCE_BASE
+ * SCRATCH[2] = draws left
+ */
+.section #mme9097_draw_elts_indirect_count
+ read $r6 0x50d /* VB_ELEMENT_BASE */
+ read $r7 0x50e /* VB_INSTANCE_BASE */
+ maddr 0x1d00
+ send $r6 /* SCRATCH[0] = VB_ELEMENT_BASE */
+ send $r7 /* SCRATCH[1] = VB_INSTANCE_BASE */
+ parm $r6 /* start_drawid */
+ parm $r7 /* numparams */
+ parm $r5 /* totaldraws */
+ mov $r5 (sub $r5 $r6) /* draws left */
+ braz $r5 #deic_runout
+ mov $r3 (extrinsrt 0x0 $r5 31 1 0) /* extract high bit */
+ branz $r3 #deic_runout
+ send $r5
+deic_draw_again:
+ parm $r3 /* count */
+ parm $r2 /* instance_count */
+ parm $r4 maddr 0x5f7 /* INDEX_BATCH_FIRST, start */
+ parm $r4 send $r4 /* index_bias, send start */
+ maddr 0x18e3 /* CB_POS */
+ send 0x180 /* 256 + 128 */
+ braz $r2 #deic_end
+ parm $r5 send $r4 /* start_instance, send index_bias */
+ send $r5 /* send start_instance */
+ send $r6 /* draw id */
+ maddr 0x150d /* VB_ELEMENT,INSTANCE_BASE */
+ send $r4
+ send $r5
+ maddr 0x446
+ send $r4
+ mov $r4 0x1
+ mov $r1 (extrinsrt $r1 0x0 0 1 26) /* clear INSTANCE_NEXT */
+deic_again:
+ maddr 0x586 /* VERTEX_BEGIN_GL */
+ send $r1 /* mode */
+ maddr 0x5f8 /* INDEX_BATCH_COUNT */
+ send $r3 /* count */
+ mov $r2 (sub $r2 $r4)
+ maddrsend 0x585 /* VERTEX_END_GL */
+ branz $r2 #deic_again
+ mov $r1 (extrinsrt $r1 $r4 0 1 26) /* set INSTANCE_NEXT */
+deic_end:
+ read $r5 0xd02
+ mov $r5 (add $r5 -1)
+ braz $r5 #deic_runout_check
+ mov $r7 (add $r7 -1)
+ maddr 0xd02
+ send $r5
+ branz $r7 #deic_draw_again
+ mov $r6 (add $r6 1)
+deic_restore:
+ read $r6 0xd00
+ read $r7 0xd01
+ maddr 0x150d /* VB_ELEMENT,INSTANCE_BASE */
send $r6
-dai_end:
- exit
- nop
+ send $r7
+ exit maddr 0x446
+ send $r6
+deic_runout:
+ parm $r2
+ parm $r2
+ parm $r2
+ parm $r2
+ parm $r2
+ mov $r7 (add $r7 -1)
+deic_runout_check:
+ branz annul $r7 #deic_runout
+ bra annul #deic_restore
+
+/* NVC0_3D_MACRO_DRAW_ARRAYS_INDIRECT_COUNT:
+ *
+ * NOTE: Saves and restores VB_INSTANCE_BASE.
+ *
+ * arg = mode
+ * parm[0] = start_drawid
+ * parm[1] = numparams
+ * parm[2] = totaldraws
+ * parm[3 + 4n + 0] = count
+ * parm[3 + 4n + 1] = instance_count
+ * parm[3 + 4n + 2] = start
+ * parm[3 + 4n + 3] = start_instance
+ *
+ * SCRATCH[0] = VB_INSTANCE_BASE
+ */
+.section #mme9097_draw_arrays_indirect_count
+ read $r5 0x50e /* VB_INSTANCE_BASE */
+ maddr 0xd00
+ parm $r6 send $r5 /* start_drawid, save VB_INSTANCE_BASE */
+ parm $r7 /* numparams */
+ parm $r5 /* totaldraws */
+ mov $r5 (sub $r5 $r6) /* draws left */
+ braz $r5 #daic_runout
+ mov $r3 (extrinsrt 0x0 $r5 31 1 0) /* extract high bit */
+ branz annul $r3 #daic_runout
+daic_draw_again:
+ parm $r2 /* count */
+ parm $r3 /* instance_count */
+ parm $r4 maddr 0x35d /* VERTEX_BUFFER_FIRST, start */
+ braz $r3 #daic_end
+ parm $r4 send $r4 /* start_instance */
+ maddr 0x18e3 /* CB_POS */
+ send 0x180 /* 256 + 128 */
+ send 0x0 /* send 0 as base_vertex */
+ send $r4 /* send start_instance */
+ send $r6 /* draw id */
+ maddr 0x50e /* VB_INSTANCE_BASE */
+ send $r4
+ mov $r4 0x1
+ mov $r1 (extrinsrt $r1 0x0 0 1 26) /* clear INSTANCE_NEXT */
+daic_again:
+ maddr 0x586 /* VERTEX_BEGIN_GL */
+ send $r1 /* mode */
+ maddr 0x35e /* VERTEX_BUFFER_COUNT */
+ send $r2
+ mov $r3 (sub $r3 $r4)
+ maddrsend 0x585 /* VERTEX_END_GL */
+ branz $r3 #daic_again
+ mov $r1 (extrinsrt $r1 $r4 0 1 26) /* set INSTANCE_NEXT */
+daic_end:
+ mov $r5 (add $r5 -1)
+ braz $r5 #daic_runout_check
+ mov $r7 (add $r7 -1)
+ branz $r7 #daic_draw_again
+ mov $r6 (add $r6 1)
+daic_restore:
+ read $r5 0xd00
+ exit maddr 0x50e /* VB_INSTANCE_BASE to restore */
+ send $r5
+daic_runout:
+ parm $r2
+ parm $r2
+ parm $r2
+ parm $r2
+ mov $r7 (add $r7 -1)
+daic_runout_check:
+ branz annul $r7 #daic_runout
+ bra annul #daic_restore
diff --git a/src/gallium/drivers/nouveau/nvc0/mme/com9097.mme.h b/src/gallium/drivers/nouveau/nvc0/mme/com9097.mme.h
index bac9042c2df..bf8625e0584 100644
--- a/src/gallium/drivers/nouveau/nvc0/mme/com9097.mme.h
+++ b/src/gallium/drivers/nouveau/nvc0/mme/com9097.mme.h
@@ -125,22 +125,33 @@ uint32_t mme9097_tep_select[] = {
};
uint32_t mme9097_draw_elts_indirect[] = {
+ 0x01434615,
+/* 0x0007: dei_draw_again */
+ 0x01438715,
+ 0x07400021,
+ 0x00003041,
+ 0x00003841,
+ 0x00000601,
+/* 0x0018: dei_again */
+ 0x00000701,
0x00000301,
+/* 0x0020: dei_end */
0x00000201,
0x017dc451,
-/* 0x000e: dei_again */
0x00002431,
- 0x0005d007,
- 0x00000501,
-/* 0x001b: dei_end */
- 0x01434615,
- 0x01438715,
+ 0x0638c021,
+ 0x00600041,
+ 0x0004d007,
+ 0x00002531,
+ 0x00002841,
+ 0x00003041,
0x05434021,
0x00002041,
0x00002841,
0x01118021,
0x00002041,
0x00004411,
+ 0xd0400912,
0x01618021,
0x00000841,
0x017e0021,
@@ -149,37 +160,175 @@ uint32_t mme9097_draw_elts_indirect[] = {
0x01614071,
0xfffe9017,
0xd0410912,
+ 0xffffff11,
+ 0xfff9b817,
+ 0x00007611,
+ 0x03400615,
+ 0x03404715,
0x05434021,
0x00003041,
0x00003841,
0x011180a1,
0x00003041,
- 0x00000091,
- 0x00000011,
};
uint32_t mme9097_draw_arrays_indirect[] = {
+/* 0x0003: dai_draw_again */
+ 0x01438515,
+ 0x00000601,
+ 0x00000701,
0x00000201,
+/* 0x0011: dai_again */
0x00000301,
-/* 0x0009: dai_again */
0x00d74451,
+/* 0x0019: dai_end */
+ 0x0004d807,
0x00002431,
-/* 0x0013: dai_end */
- 0x0003d807,
- 0x01438615,
+ 0x0638c021,
+ 0x00600041,
+ 0x00000041,
+ 0x00002041,
+ 0x00003041,
0x01438021,
- 0x00004511,
0x00002041,
+ 0x00004411,
+ 0xd0400912,
0x01618021,
0x00000841,
0x00d78021,
0x00001041,
- 0x00055b10,
+ 0x00051b10,
0x01614071,
0xfffe9817,
- 0xd0414912,
+ 0xd0410912,
+ 0xffffff11,
+ 0xfffa7817,
+ 0x00007611,
0x014380a1,
+ 0x00002841,
+};
+
+uint32_t mme9097_draw_elts_indirect_count[] = {
+ 0x01434615,
+ 0x01438715,
+ 0x07400021,
+/* 0x000d: deic_draw_again */
0x00003041,
- 0x00000091,
- 0x00000011,
+ 0x00003841,
+ 0x00000601,
+ 0x00000701,
+/* 0x001e: deic_again */
+ 0x00000501,
+ 0x0005ad10,
+/* 0x0026: deic_end */
+ 0x000b2807,
+ 0x007f4312,
+/* 0x002e: deic_restore */
+ 0x000a9817,
+ 0x00002841,
+/* 0x0035: deic_runout */
+ 0x00000301,
+/* 0x003b: deic_runout_check */
+ 0x00000201,
+ 0x017dc451,
+ 0x00002431,
+ 0x0638c021,
+ 0x00600041,
+ 0x0004d007,
+ 0x00002531,
+ 0x00002841,
+ 0x00003041,
+ 0x05434021,
+ 0x00002041,
+ 0x00002841,
+ 0x01118021,
+ 0x00002041,
+ 0x00004411,
+ 0xd0400912,
+ 0x01618021,
+ 0x00000841,
+ 0x017e0021,
+ 0x00001841,
+ 0x00051210,
+ 0x01614071,
+ 0xfffe9017,
+ 0xd0410912,
+ 0x03408515,
+ 0xffffed11,
+ 0x0004e807,
+ 0xffffff11,
+ 0x03408021,
+ 0x00002841,
+ 0xfff87817,
+ 0x00007611,
+ 0x03400615,
+ 0x03404715,
+ 0x05434021,
+ 0x00003041,
+ 0x00003841,
+ 0x011180a1,
+ 0x00003041,
+ 0x00000201,
+ 0x00000201,
+ 0x00000201,
+ 0x00000201,
+ 0x00000201,
+ 0xffffff11,
+ 0xfffeb837,
+ 0xfffc8027,
+};
+
+uint32_t mme9097_draw_arrays_indirect_count[] = {
+ 0x01438515,
+ 0x03400021,
+/* 0x0009: daic_draw_again */
+ 0x00002e31,
+ 0x00000701,
+ 0x00000501,
+/* 0x0017: daic_again */
+ 0x0005ad10,
+ 0x00086807,
+/* 0x001f: daic_end */
+ 0x007f4312,
+ 0x0007d837,
+/* 0x0024: daic_restore */
+/* 0x0027: daic_runout */
+ 0x00000201,
+ 0x00000301,
+/* 0x002c: daic_runout_check */
+ 0x00d74451,
+ 0x0004d807,
+ 0x00002431,
+ 0x0638c021,
+ 0x00600041,
+ 0x00000041,
+ 0x00002041,
+ 0x00003041,
+ 0x01438021,
+ 0x00002041,
+ 0x00004411,
+ 0xd0400912,
+ 0x01618021,
+ 0x00000841,
+ 0x00d78021,
+ 0x00001041,
+ 0x00051b10,
+ 0x01614071,
+ 0xfffe9817,
+ 0xd0410912,
+ 0xffffed11,
+ 0x00032807,
+ 0xffffff11,
+ 0xfff9f817,
+ 0x00007611,
+ 0x03400515,
+ 0x014380a1,
+ 0x00002841,
+ 0x00000201,
+ 0x00000201,
+ 0x00000201,
+ 0x00000201,
+ 0xffffff11,
+ 0xfffef837,
+ 0xfffdc027,
};
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_context.h b/src/gallium/drivers/nouveau/nvc0/nvc0_context.h
index 39b73ecb0c2..12195489691 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_context.h
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_context.h
@@ -134,10 +134,12 @@ struct nvc0_context {
struct nvc0_constbuf constbuf[6][NVC0_MAX_PIPE_CONSTBUFS];
uint16_t constbuf_dirty[6];
uint16_t constbuf_valid[6];
+ uint16_t constbuf_coherent[6];
bool cb_dirty;
struct pipe_vertex_buffer vtxbuf[PIPE_MAX_ATTRIBS];
unsigned num_vtxbufs;
+ uint32_t vtxbufs_coherent;
struct pipe_index_buffer idxbuf;
uint32_t constant_vbos;
uint32_t vbo_user; /* bitmask of vertex buffers pointing to user memory */
@@ -149,6 +151,7 @@ struct nvc0_context {
struct pipe_sampler_view *textures[6][PIPE_MAX_SAMPLERS];
unsigned num_textures[6];
uint32_t textures_dirty[6];
+ uint32_t textures_coherent[6];
struct nv50_tsc_entry *samplers[6][PIPE_MAX_SAMPLERS];
unsigned num_samplers[6];
uint16_t samplers_dirty[6];
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_macros.h b/src/gallium/drivers/nouveau/nvc0/nvc0_macros.h
index bf2798a44a0..27c026b8b30 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_macros.h
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_macros.h
@@ -29,4 +29,8 @@
#define NVC0_3D_MACRO_DRAW_ELEMENTS_INDIRECT 0x00003840
+#define NVC0_3D_MACRO_DRAW_ARRAYS_INDIRECT_COUNT 0x00003848
+
+#define NVC0_3D_MACRO_DRAW_ELEMENTS_INDIRECT_COUNT 0x00003850
+
#endif /* __NVC0_MACROS_H__ */
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_program.c b/src/gallium/drivers/nouveau/nvc0/nvc0_program.c
index 67a25acf778..c3b53621630 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_program.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_program.c
@@ -55,7 +55,6 @@ nvc0_shader_input_address(unsigned sn, unsigned si)
case TGSI_SEMANTIC_INSTANCEID: return 0x2f8;
case TGSI_SEMANTIC_VERTEXID: return 0x2fc;
case TGSI_SEMANTIC_TEXCOORD: return 0x300 + si * 0x10;
- case TGSI_SEMANTIC_FACE: return 0x3fc;
default:
assert(!"invalid TGSI input semantic");
return ~0;
@@ -285,8 +284,6 @@ nvc0_tp_get_tess_mode(struct nvc0_program *tp, struct nv50_ir_prog_info *info)
break;
case PIPE_PRIM_TRIANGLES:
tp->tp.tess_mode = NVC0_3D_TESS_MODE_PRIM_TRIANGLES;
- if (info->prop.tp.winding > 0)
- tp->tp.tess_mode |= NVC0_3D_TESS_MODE_CW;
break;
case PIPE_PRIM_QUADS:
tp->tp.tess_mode = NVC0_3D_TESS_MODE_PRIM_QUADS;
@@ -295,6 +292,10 @@ nvc0_tp_get_tess_mode(struct nvc0_program *tp, struct nv50_ir_prog_info *info)
tp->tp.tess_mode = ~0;
return;
}
+
+ if (info->prop.tp.winding > 0)
+ tp->tp.tess_mode |= NVC0_3D_TESS_MODE_CW;
+
if (info->prop.tp.outputPrim != PIPE_PRIM_POINTS)
tp->tp.tess_mode |= NVC0_3D_TESS_MODE_CONNECTED;
@@ -533,8 +534,9 @@ nvc0_program_translate(struct nvc0_program *prog, uint16_t chipset,
info->bin.source = (void *)prog->pipe.tokens;
info->io.genUserClip = prog->vp.num_ucps;
+ info->io.auxCBSlot = 15;
info->io.ucpBase = 256;
- info->io.ucpCBSlot = 15;
+ info->io.drawInfoBase = 256 + 128;
if (prog->type == PIPE_SHADER_COMPUTE) {
if (chipset >= NVISA_GK104_CHIPSET) {
@@ -583,6 +585,7 @@ nvc0_program_translate(struct nvc0_program *prog, uint16_t chipset,
prog->num_barriers = info->numBarriers;
prog->vp.need_vertex_id = info->io.vertexId < PIPE_MAX_SHADER_INPUTS;
+ prog->vp.need_draw_parameters = info->prop.vp.usesDrawParameters;
if (info->io.edgeFlagOut < PIPE_MAX_ATTRIBS)
info->out[info->io.edgeFlagOut].mask = 0; /* for headergen */
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_program.h b/src/gallium/drivers/nouveau/nvc0/nvc0_program.h
index 9c45e7b3e31..8b8d221edfc 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_program.h
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_program.h
@@ -42,6 +42,7 @@ struct nvc0_program {
uint8_t num_ucps; /* also set to max if ClipDistance is used */
uint8_t edgeflag; /* attribute index of edgeflag input */
bool need_vertex_id;
+ bool need_draw_parameters;
} vp;
struct {
uint8_t early_z;
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw.c b/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw.c
index a70d524ea85..1bed0162baf 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw.c
@@ -470,10 +470,7 @@ nvc0_hw_query_pushbuf_submit(struct nouveau_pushbuf *push,
{
struct nvc0_hw_query *hq = nvc0_hw_query(q);
-#define NVC0_IB_ENTRY_1_NO_PREFETCH (1 << (31 - 8))
-
PUSH_REFN(push, hq->bo, NOUVEAU_BO_RD | NOUVEAU_BO_GART);
- nouveau_pushbuf_space(push, 0, 0, 1);
nouveau_pushbuf_data(push, hq->bo, hq->offset + result_offset, 4 |
NVC0_IB_ENTRY_1_NO_PREFETCH);
}
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
index 39954464b9c..33dd17ebeca 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
@@ -184,6 +184,11 @@ nvc0_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
case PIPE_CAP_FORCE_PERSAMPLE_INTERP:
case PIPE_CAP_SHAREABLE_SHADERS:
case PIPE_CAP_CLEAR_TEXTURE:
+ case PIPE_CAP_DRAW_PARAMETERS:
+ case PIPE_CAP_TGSI_PACK_HALF_FLOAT:
+ case PIPE_CAP_MULTI_DRAW_INDIRECT:
+ case PIPE_CAP_MULTI_DRAW_INDIRECT_PARAMS:
+ case PIPE_CAP_TGSI_FS_FACE_IS_INTEGER_SYSVAL:
return 1;
case PIPE_CAP_SEAMLESS_CUBE_MAP_PER_TEXTURE:
return (class_3d >= NVE4_3D_CLASS) ? 1 : 0;
@@ -206,6 +211,8 @@ nvc0_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
case PIPE_CAP_VERTEXID_NOBASE:
case PIPE_CAP_RESOURCE_FROM_USER_MEMORY:
case PIPE_CAP_DEVICE_RESET_STATUS_QUERY:
+ case PIPE_CAP_TGSI_FS_POSITION_IS_SYSVAL:
+ case PIPE_CAP_SHADER_BUFFER_OFFSET_ALIGNMENT:
return 0;
case PIPE_CAP_VENDOR_ID:
@@ -311,6 +318,7 @@ nvc0_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader,
case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED:
case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE:
+ case PIPE_SHADER_CAP_MAX_SHADER_BUFFERS:
return 0;
case PIPE_SHADER_CAP_MAX_TEXTURE_SAMPLERS:
return 16; /* would be 32 in linked (OpenGL-style) mode */
@@ -1025,6 +1033,8 @@ nvc0_screen_create(struct nouveau_device *dev)
MK_MACRO(NVC0_3D_MACRO_POLYGON_MODE_BACK, mme9097_poly_mode_back);
MK_MACRO(NVC0_3D_MACRO_DRAW_ARRAYS_INDIRECT, mme9097_draw_arrays_indirect);
MK_MACRO(NVC0_3D_MACRO_DRAW_ELEMENTS_INDIRECT, mme9097_draw_elts_indirect);
+ MK_MACRO(NVC0_3D_MACRO_DRAW_ARRAYS_INDIRECT_COUNT, mme9097_draw_arrays_indirect_count);
+ MK_MACRO(NVC0_3D_MACRO_DRAW_ELEMENTS_INDIRECT_COUNT, mme9097_draw_elts_indirect_count);
BEGIN_NVC0(push, NVC0_3D(RASTERIZE_ENABLE), 1);
PUSH_DATA (push, 1);
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_shader_state.c b/src/gallium/drivers/nouveau/nvc0/nvc0_shader_state.c
index 5e84ca9e0ea..dc02b011bdf 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_shader_state.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_shader_state.c
@@ -317,6 +317,7 @@ nvc0_tfb_validate(struct nvc0_context *nvc0)
if (!targ->clean)
nvc0_hw_query_fifo_wait(push, nvc0_query(targ->pq));
+ nouveau_pushbuf_space(push, 0, 0, 1);
BEGIN_NVC0(push, NVC0_3D(TFB_BUFFER_ENABLE(b)), 5);
PUSH_DATA (push, 1);
PUSH_DATAh(push, buf->address + targ->pipe.buffer_offset);
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_state.c b/src/gallium/drivers/nouveau/nvc0/nvc0_state.c
index 41a824a97a0..24a6c222dd5 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_state.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_state.c
@@ -554,6 +554,17 @@ nvc0_stage_set_sampler_views(struct nvc0_context *nvc0, int s,
continue;
nvc0->textures_dirty[s] |= 1 << i;
+ if (views[i] && views[i]->texture) {
+ struct pipe_resource *res = views[i]->texture;
+ if (res->target == PIPE_BUFFER &&
+ (res->flags & PIPE_RESOURCE_FLAG_MAP_COHERENT))
+ nvc0->textures_coherent[s] |= 1 << i;
+ else
+ nvc0->textures_coherent[s] &= ~(1 << i);
+ } else {
+ nvc0->textures_coherent[s] &= ~(1 << i);
+ }
+
if (old) {
nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_TEX(s, i));
nvc0_screen_tic_unlock(nvc0->screen, old);
@@ -596,6 +607,17 @@ nvc0_stage_set_sampler_views_range(struct nvc0_context *nvc0, const unsigned s,
continue;
nvc0->textures_dirty[s] |= 1 << i;
+ if (views[p] && views[p]->texture) {
+ struct pipe_resource *res = views[p]->texture;
+ if (res->target == PIPE_BUFFER &&
+ (res->flags & PIPE_RESOURCE_FLAG_MAP_COHERENT))
+ nvc0->textures_coherent[s] |= 1 << i;
+ else
+ nvc0->textures_coherent[s] &= ~(1 << i);
+ } else {
+ nvc0->textures_coherent[s] &= ~(1 << i);
+ }
+
if (nvc0->textures[s][i]) {
struct nv50_tic_entry *old = nv50_tic_entry(nvc0->textures[s][i]);
nouveau_bufctx_reset(bctx, bin + i);
@@ -842,14 +864,20 @@ nvc0_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index,
nvc0->constbuf[s][i].u.data = cb->user_buffer;
nvc0->constbuf[s][i].size = MIN2(cb->buffer_size, 0x10000);
nvc0->constbuf_valid[s] |= 1 << i;
+ nvc0->constbuf_coherent[s] &= ~(1 << i);
} else
if (cb) {
nvc0->constbuf[s][i].offset = cb->buffer_offset;
nvc0->constbuf[s][i].size = MIN2(align(cb->buffer_size, 0x100), 0x10000);
nvc0->constbuf_valid[s] |= 1 << i;
+ if (res && res->flags & PIPE_RESOURCE_FLAG_MAP_COHERENT)
+ nvc0->constbuf_coherent[s] |= 1 << i;
+ else
+ nvc0->constbuf_coherent[s] &= ~(1 << i);
}
else {
nvc0->constbuf_valid[s] &= ~(1 << i);
+ nvc0->constbuf_coherent[s] &= ~(1 << i);
}
}
@@ -1009,6 +1037,7 @@ nvc0_set_vertex_buffers(struct pipe_context *pipe,
if (!vb) {
nvc0->vbo_user &= ~(((1ull << count) - 1) << start_slot);
nvc0->constant_vbos &= ~(((1ull << count) - 1) << start_slot);
+ nvc0->vtxbufs_coherent &= ~(((1ull << count) - 1) << start_slot);
return;
}
@@ -1021,9 +1050,16 @@ nvc0_set_vertex_buffers(struct pipe_context *pipe,
nvc0->constant_vbos |= 1 << dst_index;
else
nvc0->constant_vbos &= ~(1 << dst_index);
+ nvc0->vtxbufs_coherent &= ~(1 << dst_index);
} else {
nvc0->vbo_user &= ~(1 << dst_index);
nvc0->constant_vbos &= ~(1 << dst_index);
+
+ if (vb[i].buffer &&
+ vb[i].buffer->flags & PIPE_RESOURCE_FLAG_MAP_COHERENT)
+ nvc0->vtxbufs_coherent |= (1 << dst_index);
+ else
+ nvc0->vtxbufs_coherent &= ~(1 << dst_index);
}
}
}
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c b/src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c
index 54443bdccc0..ad79d1cbb9c 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c
@@ -787,7 +787,7 @@ nvc0_draw_stream_output(struct nvc0_context *nvc0,
}
while (num_instances--) {
- PUSH_SPACE(push, 8);
+ nouveau_pushbuf_space(push, 9, 0, 1);
BEGIN_NVC0(push, NVC0_3D(VERTEX_BEGIN_GL), 1);
PUSH_DATA (push, mode);
BEGIN_NVC0(push, NVC0_3D(DRAW_TFB_BASE), 1);
@@ -807,19 +807,31 @@ nvc0_draw_indirect(struct nvc0_context *nvc0, const struct pipe_draw_info *info)
{
struct nouveau_pushbuf *push = nvc0->base.pushbuf;
struct nv04_resource *buf = nv04_resource(info->indirect);
- unsigned size;
- const uint32_t offset = buf->offset + info->indirect_offset;
+ struct nv04_resource *buf_count = nv04_resource(info->indirect_params);
+ unsigned size, macro, count = info->indirect_count, drawid = info->drawid;
+ uint32_t offset = buf->offset + info->indirect_offset;
/* must make FIFO wait for engines idle before continuing to process */
- if (buf->fence_wr && !nouveau_fence_signalled(buf->fence_wr))
+ if ((buf->fence_wr && !nouveau_fence_signalled(buf->fence_wr)) ||
+ (buf_count && buf_count->fence_wr &&
+ !nouveau_fence_signalled(buf_count->fence_wr))) {
IMMED_NVC0(push, SUBC_3D(NV10_SUBCHAN_REF_CNT), 0);
+ }
+
+ /* Queue things up to let the macros write params to the driver constbuf */
+ BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3);
+ PUSH_DATA (push, 512);
+ PUSH_DATAh(push, nvc0->screen->uniform_bo->offset + (5 << 16) + (0 << 9));
+ PUSH_DATA (push, nvc0->screen->uniform_bo->offset + (5 << 16) + (0 << 9));
- PUSH_SPACE(push, 8);
if (info->indexed) {
assert(nvc0->idxbuf.buffer);
assert(nouveau_resource_mapped_by_gpu(nvc0->idxbuf.buffer));
- size = 5 * 4;
- BEGIN_1IC0(push, NVC0_3D(MACRO_DRAW_ELEMENTS_INDIRECT), 1 + size / 4);
+ size = 5;
+ if (buf_count)
+ macro = NVC0_3D_MACRO_DRAW_ELEMENTS_INDIRECT_COUNT;
+ else
+ macro = NVC0_3D_MACRO_DRAW_ELEMENTS_INDIRECT;
} else {
if (nvc0->state.index_bias) {
/* index_bias is implied 0 if !info->indexed (really ?) */
@@ -827,15 +839,59 @@ nvc0_draw_indirect(struct nvc0_context *nvc0, const struct pipe_draw_info *info)
IMMED_NVC0(push, NVC0_3D(VERTEX_ID_BASE), 0);
nvc0->state.index_bias = 0;
}
- size = 4 * 4;
- BEGIN_1IC0(push, NVC0_3D(MACRO_DRAW_ARRAYS_INDIRECT), 1 + size / 4);
- }
- PUSH_DATA(push, nvc0_prim_gl(info->mode));
-#define NVC0_IB_ENTRY_1_NO_PREFETCH (1 << (31 - 8))
- PUSH_REFN(push, buf->bo, NOUVEAU_BO_RD | buf->domain);
- nouveau_pushbuf_space(push, 0, 0, 1);
- nouveau_pushbuf_data(push,
- buf->bo, offset, NVC0_IB_ENTRY_1_NO_PREFETCH | size);
+ size = 4;
+ if (buf_count)
+ macro = NVC0_3D_MACRO_DRAW_ARRAYS_INDIRECT_COUNT;
+ else
+ macro = NVC0_3D_MACRO_DRAW_ARRAYS_INDIRECT;
+ }
+
+ /* If the stride is not the natural stride, we have to stick a separate
+ * push data reference for each draw. Otherwise it can all go in as one.
+ * Of course there is a maximum packet size, so we have to break things up
+ * along those borders as well.
+ */
+ while (count) {
+ unsigned draws = count, pushes, i;
+ if (info->indirect_stride == size * 4) {
+ draws = MIN2(draws, (NV04_PFIFO_MAX_PACKET_LEN - 4) / size);
+ pushes = 1;
+ } else {
+ draws = MIN2(draws, 32);
+ pushes = draws;
+ }
+
+ nouveau_pushbuf_space(push, 16, 0, pushes + !!buf_count);
+ PUSH_REFN(push, buf->bo, NOUVEAU_BO_RD | buf->domain);
+ if (buf_count)
+ PUSH_REFN(push, buf_count->bo, NOUVEAU_BO_RD | buf_count->domain);
+ PUSH_DATA(push,
+ NVC0_FIFO_PKHDR_1I(0, macro, 3 + !!buf_count + draws * size));
+ PUSH_DATA(push, nvc0_prim_gl(info->mode));
+ PUSH_DATA(push, drawid);
+ PUSH_DATA(push, draws);
+ if (buf_count) {
+ nouveau_pushbuf_data(push,
+ buf_count->bo,
+ buf_count->offset + info->indirect_params_offset,
+ NVC0_IB_ENTRY_1_NO_PREFETCH | 4);
+ }
+ if (pushes == 1) {
+ nouveau_pushbuf_data(push,
+ buf->bo, offset,
+ NVC0_IB_ENTRY_1_NO_PREFETCH | (size * 4 * draws));
+ offset += draws * info->indirect_stride;
+ } else {
+ for (i = 0; i < pushes; i++) {
+ nouveau_pushbuf_data(push,
+ buf->bo, offset,
+ NVC0_IB_ENTRY_1_NO_PREFETCH | (size * 4));
+ offset += info->indirect_stride;
+ }
+ }
+ count -= draws;
+ drawid += draws;
+ }
}
static inline void
@@ -864,7 +920,7 @@ nvc0_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
{
struct nvc0_context *nvc0 = nvc0_context(pipe);
struct nouveau_pushbuf *push = nvc0->base.pushbuf;
- int i, s;
+ int s;
/* NOTE: caller must ensure that (min_index + index_bias) is >= 0 */
nvc0->vb_elt_first = info->min_index + info->index_bias;
@@ -901,29 +957,25 @@ nvc0_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
/* 8 as minimum to avoid immediate double validation of new buffers */
nvc0_state_validate(nvc0, ~0, 8);
+ if (nvc0->vertprog->vp.need_draw_parameters) {
+ BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3);
+ PUSH_DATA (push, 512);
+ PUSH_DATAh(push, nvc0->screen->uniform_bo->offset + (5 << 16) + (0 << 9));
+ PUSH_DATA (push, nvc0->screen->uniform_bo->offset + (5 << 16) + (0 << 9));
+ if (!info->indirect) {
+ BEGIN_1IC0(push, NVC0_3D(CB_POS), 1 + 3);
+ PUSH_DATA (push, 256 + 128);
+ PUSH_DATA (push, info->index_bias);
+ PUSH_DATA (push, info->start_instance);
+ PUSH_DATA (push, info->drawid);
+ }
+ }
+
push->kick_notify = nvc0_draw_vbo_kick_notify;
- /* TODO: Instead of iterating over all the buffer resources looking for
- * coherent buffers, keep track of a context-wide count.
- */
for (s = 0; s < 5 && !nvc0->cb_dirty; ++s) {
- uint32_t valid = nvc0->constbuf_valid[s];
-
- while (valid && !nvc0->cb_dirty) {
- const unsigned i = ffs(valid) - 1;
- struct pipe_resource *res;
-
- valid &= ~(1 << i);
- if (nvc0->constbuf[s][i].user)
- continue;
-
- res = nvc0->constbuf[s][i].u.buf;
- if (!res)
- continue;
-
- if (res->flags & PIPE_RESOURCE_FLAG_MAP_COHERENT)
- nvc0->cb_dirty = true;
- }
+ if (nvc0->constbuf_coherent[s])
+ nvc0->cb_dirty = true;
}
if (nvc0->cb_dirty) {
@@ -932,14 +984,12 @@ nvc0_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
}
for (s = 0; s < 5; ++s) {
+ if (!nvc0->textures_coherent[s])
+ continue;
+
for (int i = 0; i < nvc0->num_textures[s]; ++i) {
struct nv50_tic_entry *tic = nv50_tic_entry(nvc0->textures[s][i]);
- struct pipe_resource *res;
- if (!tic)
- continue;
- res = nvc0->textures[s][i]->texture;
- if (res->target != PIPE_BUFFER ||
- !(res->flags & PIPE_RESOURCE_FLAG_MAP_COHERENT))
+ if (!(nvc0->textures_coherent[s] & (1 << i)))
continue;
BEGIN_NVC0(push, NVC0_3D(TEX_CACHE_CTL), 1);
@@ -965,12 +1015,7 @@ nvc0_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
PUSH_DATA (push, info->start_instance);
}
- for (i = 0; i < nvc0->num_vtxbufs && !nvc0->base.vbo_dirty; ++i) {
- if (!nvc0->vtxbuf[i].buffer)
- continue;
- if (nvc0->vtxbuf[i].buffer->flags & PIPE_RESOURCE_FLAG_MAP_COHERENT)
- nvc0->base.vbo_dirty = true;
- }
+ nvc0->base.vbo_dirty |= !!nvc0->vtxbufs_coherent;
if (!nvc0->base.vbo_dirty && nvc0->idxbuf.buffer &&
nvc0->idxbuf.buffer->flags & PIPE_RESOURCE_FLAG_MAP_COHERENT)
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_video.c b/src/gallium/drivers/nouveau/nvc0/nvc0_video.c
index 48ffac1b715..a9fd1d20942 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_video.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_video.c
@@ -26,6 +26,24 @@
#include "util/u_format.h"
static void
+nvc0_decoder_begin_frame(struct pipe_video_codec *decoder,
+ struct pipe_video_buffer *target,
+ struct pipe_picture_desc *picture)
+{
+ struct nouveau_vp3_decoder *dec = (struct nouveau_vp3_decoder *)decoder;
+ uint32_t comm_seq = ++dec->fence_seq;
+ unsigned ret = 0;
+
+ assert(dec);
+ assert(target);
+ assert(target->buffer_format == PIPE_FORMAT_NV12);
+
+ ret = nvc0_decoder_bsp_begin(dec, comm_seq);
+
+ assert(ret == 2);
+}
+
+static void
nvc0_decoder_decode_bitstream(struct pipe_video_codec *decoder,
struct pipe_video_buffer *video_target,
struct pipe_picture_desc *picture,
@@ -34,8 +52,24 @@ nvc0_decoder_decode_bitstream(struct pipe_video_codec *decoder,
const unsigned *num_bytes)
{
struct nouveau_vp3_decoder *dec = (struct nouveau_vp3_decoder *)decoder;
+ uint32_t comm_seq = dec->fence_seq;
+ unsigned ret = 0;
+
+ assert(decoder);
+
+ ret = nvc0_decoder_bsp_next(dec, comm_seq, num_buffers, data, num_bytes);
+
+ assert(ret == 2);
+}
+
+static void
+nvc0_decoder_end_frame(struct pipe_video_codec *decoder,
+ struct pipe_video_buffer *video_target,
+ struct pipe_picture_desc *picture)
+{
+ struct nouveau_vp3_decoder *dec = (struct nouveau_vp3_decoder *)decoder;
struct nouveau_vp3_video_buffer *target = (struct nouveau_vp3_video_buffer *)video_target;
- uint32_t comm_seq = ++dec->fence_seq;
+ uint32_t comm_seq = dec->fence_seq;
union pipe_desc desc;
unsigned vp_caps, is_ref, ret;
@@ -43,11 +77,7 @@ nvc0_decoder_decode_bitstream(struct pipe_video_codec *decoder,
desc.base = picture;
- assert(target->base.buffer_format == PIPE_FORMAT_NV12);
-
- ret = nvc0_decoder_bsp(dec, desc, target, comm_seq,
- num_buffers, data, num_bytes,
- &vp_caps, &is_ref, refs);
+ ret = nvc0_decoder_bsp_end(dec, desc, target, comm_seq, &vp_caps, &is_ref, refs);
/* did we decode bitstream correctly? */
assert(ret == 2);
@@ -164,14 +194,19 @@ nvc0_create_decoder(struct pipe_context *context,
PUSH_DATA (push[2], dec->ppp->handle);
dec->base.context = context;
+ dec->base.begin_frame = nvc0_decoder_begin_frame;
dec->base.decode_bitstream = nvc0_decoder_decode_bitstream;
+ dec->base.end_frame = nvc0_decoder_end_frame;
for (i = 0; i < NOUVEAU_VP3_VIDEO_QDEPTH && !ret; ++i)
ret = nouveau_bo_new(screen->device, NOUVEAU_BO_VRAM,
0, 1 << 20, &cfg, &dec->bsp_bo[i]);
- if (!ret)
+ if (!ret) {
+ /* total fudge factor... just has to be bigger for higher bitrates? */
+ unsigned inter_size = align(templ->width * templ->height * 2, 4 << 20);
ret = nouveau_bo_new(screen->device, NOUVEAU_BO_VRAM,
- 0x100, 4 << 20, &cfg, &dec->inter_bo[0]);
+ 0x100, inter_size, &cfg, &dec->inter_bo[0]);
+ }
if (!ret) {
ret = nouveau_bo_new(screen->device, NOUVEAU_BO_VRAM,
0x100, dec->inter_bo[0]->size, &cfg,
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_video.h b/src/gallium/drivers/nouveau/nvc0/nvc0_video.h
index 9ee0280f8ea..cf3c942355b 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_video.h
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_video.h
@@ -30,12 +30,18 @@
#include "util/u_video.h"
extern unsigned
-nvc0_decoder_bsp(struct nouveau_vp3_decoder *dec, union pipe_desc desc,
- struct nouveau_vp3_video_buffer *target,
- unsigned comm_seq, unsigned num_buffers,
- const void *const *data, const unsigned *num_bytes,
- unsigned *vp_caps, unsigned *is_ref,
- struct nouveau_vp3_video_buffer *refs[16]);
+nvc0_decoder_bsp_begin(struct nouveau_vp3_decoder *dec, unsigned comm_seq);
+
+extern unsigned
+nvc0_decoder_bsp_next(struct nouveau_vp3_decoder *dec,
+ unsigned comm_seq, unsigned num_buffers,
+ const void *const *data, const unsigned *num_bytes);
+
+extern unsigned
+nvc0_decoder_bsp_end(struct nouveau_vp3_decoder *dec, union pipe_desc desc,
+ struct nouveau_vp3_video_buffer *target,
+ unsigned comm_seq, unsigned *vp_caps, unsigned *is_ref,
+ struct nouveau_vp3_video_buffer *refs[16]);
extern void
nvc0_decoder_vp(struct nouveau_vp3_decoder *dec, union pipe_desc desc,
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_video_bsp.c b/src/gallium/drivers/nouveau/nvc0/nvc0_video_bsp.c
index 4392f62c530..c53f946a762 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_video_bsp.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_video_bsp.c
@@ -32,40 +32,34 @@ static void dump_comm_bsp(struct comm *comm)
#endif
unsigned
-nvc0_decoder_bsp(struct nouveau_vp3_decoder *dec, union pipe_desc desc,
- struct nouveau_vp3_video_buffer *target,
- unsigned comm_seq, unsigned num_buffers,
- const void *const *data, const unsigned *num_bytes,
- unsigned *vp_caps, unsigned *is_ref,
- struct nouveau_vp3_video_buffer *refs[16])
+nvc0_decoder_bsp_begin(struct nouveau_vp3_decoder *dec, unsigned comm_seq)
{
- struct nouveau_pushbuf *push = dec->pushbuf[0];
- enum pipe_video_format codec = u_reduce_video_profile(dec->base.profile);
- uint32_t bsp_addr, comm_addr, inter_addr;
- uint32_t slice_size, bucket_size, ring_size, bsp_size;
- uint32_t caps, i;
- int ret;
struct nouveau_bo *bsp_bo = dec->bsp_bo[comm_seq % NOUVEAU_VP3_VIDEO_QDEPTH];
- struct nouveau_bo *inter_bo = dec->inter_bo[comm_seq & 1];
- unsigned fence_extra = 0;
- struct nouveau_pushbuf_refn bo_refs[] = {
- { bsp_bo, NOUVEAU_BO_RD | NOUVEAU_BO_VRAM },
- { inter_bo, NOUVEAU_BO_WR | NOUVEAU_BO_VRAM },
-#if NOUVEAU_VP3_DEBUG_FENCE
- { dec->fence_bo, NOUVEAU_BO_WR | NOUVEAU_BO_GART },
-#endif
- { dec->bitplane_bo, NOUVEAU_BO_RDWR | NOUVEAU_BO_VRAM },
- };
- int num_refs = ARRAY_SIZE(bo_refs);
+ unsigned ret = 0;
- if (!dec->bitplane_bo)
- num_refs--;
+ ret = nouveau_bo_map(bsp_bo, NOUVEAU_BO_WR, dec->client);
+ if (ret) {
+ debug_printf("map failed: %i %s\n", ret, strerror(-ret));
+ return -1;
+ }
-#if NOUVEAU_VP3_DEBUG_FENCE
- fence_extra = 4;
-#endif
+ nouveau_vp3_bsp_begin(dec);
+
+ return 2;
+}
- bsp_size = NOUVEAU_VP3_BSP_RESERVED_SIZE;
+unsigned
+nvc0_decoder_bsp_next(struct nouveau_vp3_decoder *dec,
+ unsigned comm_seq, unsigned num_buffers,
+ const void *const *data, const unsigned *num_bytes)
+{
+ struct nouveau_bo *bsp_bo = dec->bsp_bo[comm_seq % NOUVEAU_VP3_VIDEO_QDEPTH];
+ struct nouveau_bo *inter_bo = dec->inter_bo[comm_seq & 1];
+ uint32_t bsp_size = 0;
+ uint32_t i = 0;
+ unsigned ret = 0;
+
+ bsp_size = dec->bsp_ptr - (char *)bsp_bo->map;
for (i = 0; i < num_buffers; i++)
bsp_size += num_bytes[i];
bsp_size += 256; /* the 4 end markers */
@@ -81,14 +75,29 @@ nvc0_decoder_bsp(struct nouveau_vp3_decoder *dec, union pipe_desc desc,
bsp_size += (1 << 20) - 1;
bsp_size &= ~((1 << 20) - 1);
- ret = nouveau_bo_new(dec->bitplane_bo->device, NOUVEAU_BO_VRAM, 0, bsp_size, &cfg, &tmp_bo);
+ ret = nouveau_bo_new(dec->client->device, NOUVEAU_BO_VRAM, 0, bsp_size, &cfg, &tmp_bo);
if (ret) {
debug_printf("reallocating bsp %u -> %u failed with %i\n",
bsp_bo ? (unsigned)bsp_bo->size : 0, bsp_size, ret);
return -1;
}
+
+ ret = nouveau_bo_map(tmp_bo, NOUVEAU_BO_WR, dec->client);
+ if (ret) {
+ debug_printf("map failed: %i %s\n", ret, strerror(-ret));
+ return -1;
+ }
+
+ /* Preserve previous buffer. */
+ /* TODO: offload this copy to the GPU, as otherwise we're reading and
+ * writing to VRAM. */
+ memcpy(tmp_bo->map, bsp_bo->map, bsp_bo->size);
+
+ /* update position to current chunk */
+ dec->bsp_ptr = tmp_bo->map + (dec->bsp_ptr - (char *)bsp_bo->map);
+
nouveau_bo_ref(NULL, &bsp_bo);
- bo_refs[0].bo = dec->bsp_bo[comm_seq % NOUVEAU_VP3_VIDEO_QDEPTH] = bsp_bo = tmp_bo;
+ dec->bsp_bo[comm_seq % NOUVEAU_VP3_VIDEO_QDEPTH] = bsp_bo = tmp_bo;
}
if (!inter_bo || bsp_bo->size * 4 > inter_bo->size) {
@@ -98,24 +107,61 @@ nvc0_decoder_bsp(struct nouveau_vp3_decoder *dec, union pipe_desc desc,
cfg.nvc0.tile_mode = 0x10;
cfg.nvc0.memtype = 0xfe;
- ret = nouveau_bo_new(dec->bitplane_bo->device, NOUVEAU_BO_VRAM, 0, bsp_bo->size * 4, &cfg, &tmp_bo);
+ ret = nouveau_bo_new(dec->client->device, NOUVEAU_BO_VRAM, 0, bsp_bo->size * 4, &cfg, &tmp_bo);
if (ret) {
debug_printf("reallocating inter %u -> %u failed with %i\n",
inter_bo ? (unsigned)inter_bo->size : 0, (unsigned)bsp_bo->size * 4, ret);
return -1;
}
+
+ ret = nouveau_bo_map(tmp_bo, NOUVEAU_BO_WR, dec->client);
+ if (ret) {
+ debug_printf("map failed: %i %s\n", ret, strerror(-ret));
+ return -1;
+ }
+
nouveau_bo_ref(NULL, &inter_bo);
- bo_refs[1].bo = dec->inter_bo[comm_seq & 1] = inter_bo = tmp_bo;
+ dec->inter_bo[comm_seq & 1] = inter_bo = tmp_bo;
}
- ret = nouveau_bo_map(bsp_bo, NOUVEAU_BO_WR, dec->client);
- if (ret) {
- debug_printf("map failed: %i %s\n", ret, strerror(-ret));
- return -1;
- }
+ nouveau_vp3_bsp_next(dec, num_buffers, data, num_bytes);
+
+ return 2;
+}
+
+
+unsigned
+nvc0_decoder_bsp_end(struct nouveau_vp3_decoder *dec, union pipe_desc desc,
+ struct nouveau_vp3_video_buffer *target, unsigned comm_seq,
+ unsigned *vp_caps, unsigned *is_ref,
+ struct nouveau_vp3_video_buffer *refs[16])
+{
+ struct nouveau_pushbuf *push = dec->pushbuf[0];
+ enum pipe_video_format codec = u_reduce_video_profile(dec->base.profile);
+ uint32_t bsp_addr, comm_addr, inter_addr;
+ uint32_t slice_size, bucket_size, ring_size;
+ uint32_t caps;
+ struct nouveau_bo *bsp_bo = dec->bsp_bo[comm_seq % NOUVEAU_VP3_VIDEO_QDEPTH];
+ struct nouveau_bo *inter_bo = dec->inter_bo[comm_seq & 1];
+ unsigned fence_extra = 0;
+ struct nouveau_pushbuf_refn bo_refs[] = {
+ { bsp_bo, NOUVEAU_BO_RD | NOUVEAU_BO_VRAM },
+ { inter_bo, NOUVEAU_BO_WR | NOUVEAU_BO_VRAM },
+#if NOUVEAU_VP3_DEBUG_FENCE
+ { dec->fence_bo, NOUVEAU_BO_WR | NOUVEAU_BO_GART },
+#endif
+ { dec->bitplane_bo, NOUVEAU_BO_RDWR | NOUVEAU_BO_VRAM },
+ };
+ int num_refs = ARRAY_SIZE(bo_refs);
+
+ if (!dec->bitplane_bo)
+ num_refs--;
+
+#if NOUVEAU_VP3_DEBUG_FENCE
+ fence_extra = 4;
+#endif
- caps = nouveau_vp3_bsp(dec, desc, target, comm_seq,
- num_buffers, data, num_bytes);
+ caps = nouveau_vp3_bsp_end(dec, desc);
nouveau_vp3_vp_caps(dec, desc, target, comm_seq, vp_caps, is_ref, refs);
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_winsys.h b/src/gallium/drivers/nouveau/nvc0/nvc0_winsys.h
index 4ea8ca3cfa2..79abe78b77a 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_winsys.h
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_winsys.h
@@ -68,6 +68,7 @@ PUSH_REFN(struct nouveau_pushbuf *push, struct nouveau_bo *bo, uint32_t flags)
#define SUBC_SW(m) 7, (m)
#define NVC0_3D_SERIALIZE NV50_GRAPH_SERIALIZE
+#define NVC0_IB_ENTRY_1_NO_PREFETCH (1 << (31 - 8))
static inline uint32_t
NVC0_FIFO_PKHDR_SQ(int subc, int mthd, unsigned size)
diff --git a/src/gallium/drivers/r300/compiler/r500_fragprog.c b/src/gallium/drivers/r300/compiler/r500_fragprog.c
index 88aad8a054f..4c415afcb05 100644
--- a/src/gallium/drivers/r300/compiler/r500_fragprog.c
+++ b/src/gallium/drivers/r300/compiler/r500_fragprog.c
@@ -384,7 +384,7 @@ void r500FragmentProgramDump(struct radeon_compiler *c, void *user)
case R500_INST_TYPE_OUT: str = "OUT"; break;
case R500_INST_TYPE_FC: str = "FC"; break;
case R500_INST_TYPE_TEX: str = "TEX"; break;
- };
+ }
fprintf(stderr,"%s %s %s %s %s ", str,
inst & R500_INST_TEX_SEM_WAIT ? "TEX_WAIT" : "",
inst & R500_INST_LAST ? "LAST" : "",
diff --git a/src/gallium/drivers/r300/r300_context.c b/src/gallium/drivers/r300/r300_context.c
index b393769c861..82ba0435118 100644
--- a/src/gallium/drivers/r300/r300_context.c
+++ b/src/gallium/drivers/r300/r300_context.c
@@ -421,8 +421,8 @@ struct pipe_context* r300_create_context(struct pipe_screen* screen,
r300->context.create_video_codec = vl_create_decoder;
r300->context.create_video_buffer = vl_video_buffer_create;
- r300->uploader = u_upload_create(&r300->context, 256 * 1024, 4,
- PIPE_BIND_CUSTOM);
+ r300->uploader = u_upload_create(&r300->context, 256 * 1024,
+ PIPE_BIND_CUSTOM, PIPE_USAGE_STREAM);
r300->blitter = util_blitter_create(&r300->context);
if (r300->blitter == NULL)
diff --git a/src/gallium/drivers/r300/r300_render.c b/src/gallium/drivers/r300/r300_render.c
index b482fa140ed..7eda6753d0d 100644
--- a/src/gallium/drivers/r300/r300_render.c
+++ b/src/gallium/drivers/r300/r300_render.c
@@ -1010,7 +1010,7 @@ static void r300_render_draw_elements(struct vbuf_render* render,
CS_LOCALS(r300);
DBG(r300, DBG_DRAW, "r300: render_draw_elements (count: %d)\n", count);
- u_upload_data(r300->uploader, 0, count * 2, indices,
+ u_upload_data(r300->uploader, 0, count * 2, 4, indices,
&index_buffer_offset, &index_buffer);
if (!index_buffer) {
return;
diff --git a/src/gallium/drivers/r300/r300_render_translate.c b/src/gallium/drivers/r300/r300_render_translate.c
index caeeec05909..7221211deea 100644
--- a/src/gallium/drivers/r300/r300_render_translate.c
+++ b/src/gallium/drivers/r300/r300_render_translate.c
@@ -37,7 +37,7 @@ void r300_translate_index_buffer(struct r300_context *r300,
switch (*index_size) {
case 1:
*out_buffer = NULL;
- u_upload_alloc(r300->uploader, 0, count * 2,
+ u_upload_alloc(r300->uploader, 0, count * 2, 4,
&out_offset, out_buffer, &ptr);
util_shorten_ubyte_elts_to_userptr(
@@ -51,7 +51,7 @@ void r300_translate_index_buffer(struct r300_context *r300,
case 2:
if (index_offset) {
*out_buffer = NULL;
- u_upload_alloc(r300->uploader, 0, count * 2,
+ u_upload_alloc(r300->uploader, 0, count * 2, 4,
&out_offset, out_buffer, &ptr);
util_rebuild_ushort_elts_to_userptr(&r300->context, ib,
@@ -65,7 +65,7 @@ void r300_translate_index_buffer(struct r300_context *r300,
case 4:
if (index_offset) {
*out_buffer = NULL;
- u_upload_alloc(r300->uploader, 0, count * 4,
+ u_upload_alloc(r300->uploader, 0, count * 4, 4,
&out_offset, out_buffer, &ptr);
util_rebuild_uint_elts_to_userptr(&r300->context, ib,
diff --git a/src/gallium/drivers/r300/r300_screen.c b/src/gallium/drivers/r300/r300_screen.c
index 606e25f915b..d1b59ab4345 100644
--- a/src/gallium/drivers/r300/r300_screen.c
+++ b/src/gallium/drivers/r300/r300_screen.c
@@ -183,6 +183,8 @@ static int r300_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
case PIPE_CAP_SAMPLE_SHADING:
case PIPE_CAP_TEXTURE_GATHER_OFFSETS:
case PIPE_CAP_DRAW_INDIRECT:
+ case PIPE_CAP_MULTI_DRAW_INDIRECT:
+ case PIPE_CAP_MULTI_DRAW_INDIRECT_PARAMS:
case PIPE_CAP_TGSI_FS_FINE_DERIVATIVE:
case PIPE_CAP_CONDITIONAL_RENDER_INVERTED:
case PIPE_CAP_SAMPLER_VIEW_TARGET:
@@ -200,6 +202,11 @@ static int r300_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
case PIPE_CAP_SHAREABLE_SHADERS:
case PIPE_CAP_COPY_BETWEEN_COMPRESSED_AND_PLAIN_FORMATS:
case PIPE_CAP_CLEAR_TEXTURE:
+ case PIPE_CAP_DRAW_PARAMETERS:
+ case PIPE_CAP_TGSI_PACK_HALF_FLOAT:
+ case PIPE_CAP_TGSI_FS_POSITION_IS_SYSVAL:
+ case PIPE_CAP_TGSI_FS_FACE_IS_INTEGER_SYSVAL:
+ case PIPE_CAP_SHADER_BUFFER_OFFSET_ALIGNMENT:
return 0;
/* SWTCL-only features. */
@@ -304,6 +311,7 @@ static int r300_get_shader_param(struct pipe_screen *pscreen, unsigned shader, e
case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED:
case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED:
case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
+ case PIPE_SHADER_CAP_MAX_SHADER_BUFFERS:
return 0;
case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT:
return 32;
@@ -362,6 +370,7 @@ static int r300_get_shader_param(struct pipe_screen *pscreen, unsigned shader, e
case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED:
case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED:
case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
+ case PIPE_SHADER_CAP_MAX_SHADER_BUFFERS:
return 0;
case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT:
return 32;
diff --git a/src/gallium/drivers/r300/r300_screen_buffer.c b/src/gallium/drivers/r300/r300_screen_buffer.c
index 737a6f5e4f8..42c8e3a0fc5 100644
--- a/src/gallium/drivers/r300/r300_screen_buffer.c
+++ b/src/gallium/drivers/r300/r300_screen_buffer.c
@@ -42,7 +42,7 @@ void r300_upload_index_buffer(struct r300_context *r300,
*index_buffer = NULL;
u_upload_data(r300->uploader,
- 0, count * index_size,
+ 0, count * index_size, 4,
ptr + (*start * index_size),
&index_offset,
index_buffer);
diff --git a/src/gallium/drivers/r300/r300_state_derived.c b/src/gallium/drivers/r300/r300_state_derived.c
index da472f4d7f4..741e263e7ed 100644
--- a/src/gallium/drivers/r300/r300_state_derived.c
+++ b/src/gallium/drivers/r300/r300_state_derived.c
@@ -52,7 +52,6 @@ enum r300_rs_col_write_type {
static void r300_draw_emit_attrib(struct r300_context* r300,
enum attrib_emit emit,
- enum interp_mode interp,
int index)
{
struct r300_vertex_shader* vs = r300->vs_state.state;
@@ -62,7 +61,7 @@ static void r300_draw_emit_attrib(struct r300_context* r300,
output = draw_find_shader_output(r300->draw,
info->output_semantic_name[index],
info->output_semantic_index[index]);
- draw_emit_vertex_attr(&r300->vertex_info, emit, interp, output);
+ draw_emit_vertex_attr(&r300->vertex_info, emit, output);
}
static void r300_draw_emit_all_attribs(struct r300_context* r300)
@@ -73,31 +72,27 @@ static void r300_draw_emit_all_attribs(struct r300_context* r300)
/* Position. */
if (vs_outputs->pos != ATTR_UNUSED) {
- r300_draw_emit_attrib(r300, EMIT_4F, INTERP_PERSPECTIVE,
- vs_outputs->pos);
+ r300_draw_emit_attrib(r300, EMIT_4F, vs_outputs->pos);
} else {
assert(0);
}
/* Point size. */
if (vs_outputs->psize != ATTR_UNUSED) {
- r300_draw_emit_attrib(r300, EMIT_1F_PSIZE, INTERP_POS,
- vs_outputs->psize);
+ r300_draw_emit_attrib(r300, EMIT_1F_PSIZE, vs_outputs->psize);
}
/* Colors. */
for (i = 0; i < ATTR_COLOR_COUNT; i++) {
if (vs_outputs->color[i] != ATTR_UNUSED) {
- r300_draw_emit_attrib(r300, EMIT_4F, INTERP_LINEAR,
- vs_outputs->color[i]);
+ r300_draw_emit_attrib(r300, EMIT_4F, vs_outputs->color[i]);
}
}
/* Back-face colors. */
for (i = 0; i < ATTR_COLOR_COUNT; i++) {
if (vs_outputs->bcolor[i] != ATTR_UNUSED) {
- r300_draw_emit_attrib(r300, EMIT_4F, INTERP_LINEAR,
- vs_outputs->bcolor[i]);
+ r300_draw_emit_attrib(r300, EMIT_4F, vs_outputs->bcolor[i]);
}
}
@@ -108,16 +103,14 @@ static void r300_draw_emit_all_attribs(struct r300_context* r300)
for (i = 0; i < ATTR_GENERIC_COUNT && gen_count < 8; i++) {
if (vs_outputs->generic[i] != ATTR_UNUSED &&
!(r300->sprite_coord_enable & (1 << i))) {
- r300_draw_emit_attrib(r300, EMIT_4F, INTERP_PERSPECTIVE,
- vs_outputs->generic[i]);
+ r300_draw_emit_attrib(r300, EMIT_4F, vs_outputs->generic[i]);
gen_count++;
}
}
/* Fog coordinates. */
if (gen_count < 8 && vs_outputs->fog != ATTR_UNUSED) {
- r300_draw_emit_attrib(r300, EMIT_4F, INTERP_PERSPECTIVE,
- vs_outputs->fog);
+ r300_draw_emit_attrib(r300, EMIT_4F, vs_outputs->fog);
gen_count++;
}
@@ -125,8 +118,7 @@ static void r300_draw_emit_all_attribs(struct r300_context* r300)
if (r300_fs(r300)->shader->inputs.wpos != ATTR_UNUSED && gen_count < 8) {
DBG(r300, DBG_SWTCL, "draw_emit_attrib: WPOS, index: %i\n",
vs_outputs->wpos);
- r300_draw_emit_attrib(r300, EMIT_4F, INTERP_PERSPECTIVE,
- vs_outputs->wpos);
+ r300_draw_emit_attrib(r300, EMIT_4F, vs_outputs->wpos);
}
}
diff --git a/src/gallium/drivers/r600/evergreen_compute.c b/src/gallium/drivers/r600/evergreen_compute.c
index d83eb17c280..20945ece155 100644
--- a/src/gallium/drivers/r600/evergreen_compute.c
+++ b/src/gallium/drivers/r600/evergreen_compute.c
@@ -600,7 +600,7 @@ static void evergreen_launch_grid(
ctx->screen->has_compressed_msaa_texturing);
bc->type = TGSI_PROCESSOR_COMPUTE;
bc->isa = ctx->isa;
- r600_llvm_compile(mod, ctx->b.family, bc, &use_kill, dump);
+ r600_llvm_compile(mod, ctx->b.family, bc, &use_kill, dump, &ctx->b.debug);
if (dump && !sb_disasm) {
r600_bytecode_disasm(bc);
diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c
index 1aee7dd2da8..9dfb84965cf 100644
--- a/src/gallium/drivers/r600/evergreen_state.c
+++ b/src/gallium/drivers/r600/evergreen_state.c
@@ -1956,7 +1956,7 @@ static void evergreen_emit_constant_buffers(struct r600_context *rctx,
if (!gs_ring_buffer) {
radeon_set_context_reg_flag(cs, reg_alu_constbuf_size + buffer_index * 4,
- ALIGN_DIVUP(cb->buffer_size, 256), pkt_flags);
+ DIV_ROUND_UP(cb->buffer_size, 256), pkt_flags);
radeon_set_context_reg_flag(cs, reg_alu_const_cache + buffer_index * 4, va >> 8,
pkt_flags);
}
diff --git a/src/gallium/drivers/r600/r600_llvm.c b/src/gallium/drivers/r600/r600_llvm.c
index 1cc30317ba5..8b91372f3ae 100644
--- a/src/gallium/drivers/r600/r600_llvm.c
+++ b/src/gallium/drivers/r600/r600_llvm.c
@@ -726,7 +726,7 @@ static void tex_fetch_args(
* That operand should be passed as a float value in the args array
* right after the coord vector. After packing it's not used anymore,
* that's why arg_count is not increased */
- coords[4] = lp_build_emit_fetch(bld_base, inst, 1, 0);
+ coords[4] = lp_build_emit_fetch(bld_base, inst, 1, TGSI_CHAN_X);
}
if ((inst->Texture.Texture == TGSI_TEXTURE_CUBE ||
@@ -915,14 +915,17 @@ unsigned r600_llvm_compile(
enum radeon_family family,
struct r600_bytecode *bc,
boolean *use_kill,
- unsigned dump)
+ unsigned dump,
+ struct pipe_debug_callback *debug)
{
unsigned r;
struct radeon_shader_binary binary;
const char * gpu_family = r600_get_llvm_processor_name(family);
memset(&binary, 0, sizeof(struct radeon_shader_binary));
- r = radeon_llvm_compile(mod, &binary, gpu_family, dump, dump, NULL);
+ if (dump)
+ LLVMDumpModule(mod);
+ r = radeon_llvm_compile(mod, &binary, gpu_family, NULL, debug);
r = r600_create_shader(bc, &binary, use_kill);
diff --git a/src/gallium/drivers/r600/r600_llvm.h b/src/gallium/drivers/r600/r600_llvm.h
index 9b5304d9fcb..f570b739fbe 100644
--- a/src/gallium/drivers/r600/r600_llvm.h
+++ b/src/gallium/drivers/r600/r600_llvm.h
@@ -7,6 +7,7 @@
#include "radeon/radeon_llvm.h"
#include <llvm-c/Core.h>
+struct pipe_debug_callback;
struct r600_bytecode;
struct r600_shader_ctx;
struct radeon_llvm_context;
@@ -22,7 +23,8 @@ unsigned r600_llvm_compile(
enum radeon_family family,
struct r600_bytecode *bc,
boolean *use_kill,
- unsigned dump);
+ unsigned dump,
+ struct pipe_debug_callback *debug);
unsigned r600_create_shader(struct r600_bytecode *bc,
const struct radeon_shader_binary *binary,
diff --git a/src/gallium/drivers/r600/r600_pipe.c b/src/gallium/drivers/r600/r600_pipe.c
index 17006f70601..e61d9286542 100644
--- a/src/gallium/drivers/r600/r600_pipe.c
+++ b/src/gallium/drivers/r600/r600_pipe.c
@@ -348,6 +348,13 @@ static int r600_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
case PIPE_CAP_FORCE_PERSAMPLE_INTERP:
case PIPE_CAP_SHAREABLE_SHADERS:
case PIPE_CAP_CLEAR_TEXTURE:
+ case PIPE_CAP_DRAW_PARAMETERS:
+ case PIPE_CAP_TGSI_PACK_HALF_FLOAT:
+ case PIPE_CAP_MULTI_DRAW_INDIRECT:
+ case PIPE_CAP_MULTI_DRAW_INDIRECT_PARAMS:
+ case PIPE_CAP_TGSI_FS_POSITION_IS_SYSVAL:
+ case PIPE_CAP_TGSI_FS_FACE_IS_INTEGER_SYSVAL:
+ case PIPE_CAP_SHADER_BUFFER_OFFSET_ALIGNMENT:
return 0;
case PIPE_CAP_MAX_SHADER_PATCH_VARYINGS:
@@ -522,6 +529,7 @@ static int r600_get_shader_param(struct pipe_screen* pscreen, unsigned shader, e
case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED:
case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED:
case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
+ case PIPE_SHADER_CAP_MAX_SHADER_BUFFERS:
return 0;
case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT:
/* due to a bug in the shader compiler, some loops hang
diff --git a/src/gallium/drivers/r600/r600_pipe.h b/src/gallium/drivers/r600/r600_pipe.h
index 31f2a729494..0e4dd16525b 100644
--- a/src/gallium/drivers/r600/r600_pipe.h
+++ b/src/gallium/drivers/r600/r600_pipe.h
@@ -946,7 +946,6 @@ static inline uint32_t S_FIXED(float value, uint32_t frac_bits)
{
return value * (1 << frac_bits);
}
-#define ALIGN_DIVUP(x, y) (((x) + (y) - 1) / (y))
/* 12.4 fixed-point */
static inline unsigned r600_pack_float_12p4(float x)
diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c
index d411b0be50e..df40f94bdcf 100644
--- a/src/gallium/drivers/r600/r600_shader.c
+++ b/src/gallium/drivers/r600/r600_shader.c
@@ -162,7 +162,8 @@ int r600_pipe_shader_create(struct pipe_context *ctx,
struct r600_context *rctx = (struct r600_context *)ctx;
struct r600_pipe_shader_selector *sel = shader->selector;
int r;
- bool dump = r600_can_dump_shader(&rctx->screen->b, sel->tokens);
+ bool dump = r600_can_dump_shader(&rctx->screen->b,
+ tgsi_get_processor_type(sel->tokens));
unsigned use_sb = !(rctx->screen->b.debug_flags & DBG_NO_SB);
unsigned sb_disasm = use_sb || (rctx->screen->b.debug_flags & DBG_SB_DISASM);
unsigned export_shader;
@@ -394,7 +395,7 @@ static int tgsi_last_instruction(unsigned writemask)
static int tgsi_is_supported(struct r600_shader_ctx *ctx)
{
struct tgsi_full_instruction *i = &ctx->parse.FullToken.FullInstruction;
- int j;
+ unsigned j;
if (i->Instruction.NumDstRegs > 1 && i->Instruction.Opcode != TGSI_OPCODE_DFRACEXP) {
R600_ERR("too many dst (%d)\n", i->Instruction.NumDstRegs);
@@ -1166,7 +1167,7 @@ static int allocate_system_value_inputs(struct r600_shader_ctx *ctx, int gpr_off
*/
static int evergreen_gpr_count(struct r600_shader_ctx *ctx)
{
- int i;
+ unsigned i;
int num_baryc;
struct tgsi_parse_context parse;
@@ -1585,7 +1586,7 @@ static int fetch_gs_input(struct r600_shader_ctx *ctx, struct tgsi_full_src_regi
static int tgsi_split_gs_inputs(struct r600_shader_ctx *ctx)
{
struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
- int i;
+ unsigned i;
for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
struct tgsi_full_src_register *src = &inst->Src[i];
@@ -1854,7 +1855,7 @@ static int fetch_tcs_output(struct r600_shader_ctx *ctx, struct tgsi_full_src_re
static int tgsi_split_lds_inputs(struct r600_shader_ctx *ctx)
{
struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
- int i;
+ unsigned i;
for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
struct tgsi_full_src_register *src = &inst->Src[i];
@@ -2784,7 +2785,7 @@ static int r600_tess_factor_read(struct r600_shader_ctx *ctx,
static int r600_emit_tess_factor(struct r600_shader_ctx *ctx)
{
- int i;
+ unsigned i;
int stride, outer_comps, inner_comps;
int tessinner_idx = -1, tessouter_idx = -1;
int r;
@@ -3238,7 +3239,8 @@ static int r600_shader_from_tgsi(struct r600_context *rctx,
if (use_llvm) {
struct radeon_llvm_context radeon_llvm_ctx;
LLVMModuleRef mod;
- bool dump = r600_can_dump_shader(&rscreen->b, tokens);
+ bool dump = r600_can_dump_shader(&rscreen->b,
+ tgsi_get_processor_type(tokens));
boolean use_kill = false;
memset(&radeon_llvm_ctx, 0, sizeof(radeon_llvm_ctx));
@@ -3259,7 +3261,8 @@ static int r600_shader_from_tgsi(struct r600_context *rctx,
ctx.shader->has_txq_cube_array_z_comp = radeon_llvm_ctx.has_txq_cube_array_z_comp;
ctx.shader->uses_tex_buffers = radeon_llvm_ctx.uses_tex_buffers;
- if (r600_llvm_compile(mod, rscreen->b.family, ctx.bc, &use_kill, dump)) {
+ if (r600_llvm_compile(mod, rscreen->b.family, ctx.bc, &use_kill,
+ dump, &rctx->b.debug)) {
radeon_llvm_dispose(&radeon_llvm_ctx);
use_llvm = 0;
fprintf(stderr, "R600 LLVM backend failed to compile "
@@ -4424,7 +4427,7 @@ static int cayman_mul_double_instr(struct r600_shader_ctx *ctx)
memset(&alu, 0, sizeof(struct r600_bytecode_alu));
alu.op = ctx->inst_info->op;
for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
- r600_bytecode_src(&alu.src[j], &ctx->src[j], k * 2 + ((i == 3) ? 0 : 1));;
+ r600_bytecode_src(&alu.src[j], &ctx->src[j], k * 2 + ((i == 3) ? 0 : 1));
}
alu.dst.sel = t1;
alu.dst.chan = i;
@@ -4791,7 +4794,7 @@ static int tgsi_lit(struct r600_shader_ctx *ctx)
{
int chan;
int sel;
- int i;
+ unsigned i;
if (ctx->bc->chip_class == CAYMAN) {
for (i = 0; i < 3; i++) {
@@ -7925,7 +7928,7 @@ static int tgsi_exp(struct r600_shader_ctx *ctx)
struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
struct r600_bytecode_alu alu;
int r;
- int i;
+ unsigned i;
/* result.x = 2^floor(src); */
if (inst->Dst[0].Register.WriteMask & 1) {
@@ -8054,7 +8057,7 @@ static int tgsi_log(struct r600_shader_ctx *ctx)
struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
struct r600_bytecode_alu alu;
int r;
- int i;
+ unsigned i;
/* result.x = floor(log2(|src|)); */
if (inst->Dst[0].Register.WriteMask & 1) {
@@ -8781,7 +8784,7 @@ static int tgsi_bgnloop(struct r600_shader_ctx *ctx)
static int tgsi_endloop(struct r600_shader_ctx *ctx)
{
- int i;
+ unsigned i;
r600_bytecode_add_cfinst(ctx->bc, CF_OP_LOOP_END);
diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c
index 43b80742cb5..f60e30486a2 100644
--- a/src/gallium/drivers/r600/r600_state.c
+++ b/src/gallium/drivers/r600/r600_state.c
@@ -1768,7 +1768,7 @@ static void r600_emit_constant_buffers(struct r600_context *rctx,
if (!gs_ring_buffer) {
radeon_set_context_reg(cs, reg_alu_constbuf_size + buffer_index * 4,
- ALIGN_DIVUP(cb->buffer_size, 256));
+ DIV_ROUND_UP(cb->buffer_size, 256));
radeon_set_context_reg(cs, reg_alu_const_cache + buffer_index * 4, offset >> 8);
}
diff --git a/src/gallium/drivers/r600/r600_state_common.c b/src/gallium/drivers/r600/r600_state_common.c
index ca589fa7759..c3346f29811 100644
--- a/src/gallium/drivers/r600/r600_state_common.c
+++ b/src/gallium/drivers/r600/r600_state_common.c
@@ -1106,10 +1106,10 @@ static void r600_set_constant_buffer(struct pipe_context *ctx, uint shader, uint
tmpPtr[i] = util_cpu_to_le32(((uint32_t *)ptr)[i]);
}
- u_upload_data(rctx->b.uploader, 0, size, tmpPtr, &cb->buffer_offset, &cb->buffer);
+ u_upload_data(rctx->b.uploader, 0, size, 256, tmpPtr, &cb->buffer_offset, &cb->buffer);
free(tmpPtr);
} else {
- u_upload_data(rctx->b.uploader, 0, input->buffer_size, ptr, &cb->buffer_offset, &cb->buffer);
+ u_upload_data(rctx->b.uploader, 0, input->buffer_size, 256, ptr, &cb->buffer_offset, &cb->buffer);
}
/* account it in gtt */
rctx->b.gtt += input->buffer_size;
@@ -1732,7 +1732,7 @@ static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info
}
}
- u_upload_alloc(rctx->b.uploader, start, count * 2,
+ u_upload_alloc(rctx->b.uploader, start, count * 2, 256,
&out_offset, &out_buffer, &ptr);
util_shorten_ubyte_elts_to_userptr(
@@ -1753,7 +1753,7 @@ static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info
if (ib.user_buffer && (R600_BIG_ENDIAN || info.indirect ||
info.instance_count > 1 ||
info.count*ib.index_size > 20)) {
- u_upload_data(rctx->b.uploader, 0, info.count * ib.index_size,
+ u_upload_data(rctx->b.uploader, 0, info.count * ib.index_size, 256,
ib.user_buffer, &ib.offset, &ib.buffer);
ib.user_buffer = NULL;
}
diff --git a/src/gallium/drivers/radeon/r600_buffer_common.c b/src/gallium/drivers/radeon/r600_buffer_common.c
index 18925277d2d..484f5c8d5b7 100644
--- a/src/gallium/drivers/radeon/r600_buffer_common.c
+++ b/src/gallium/drivers/radeon/r600_buffer_common.c
@@ -298,7 +298,7 @@ static void *r600_buffer_transfer_map(struct pipe_context *ctx,
struct r600_resource *staging = NULL;
u_upload_alloc(rctx->uploader, 0, box->width + (box->x % R600_MAP_BUFFER_ALIGNMENT),
- &offset, (struct pipe_resource**)&staging, (void**)&data);
+ 256, &offset, (struct pipe_resource**)&staging, (void**)&data);
if (staging) {
data += box->x % R600_MAP_BUFFER_ALIGNMENT;
diff --git a/src/gallium/drivers/radeon/r600_pipe_common.c b/src/gallium/drivers/radeon/r600_pipe_common.c
index 9a5e9878176..52c365e81d0 100644
--- a/src/gallium/drivers/radeon/r600_pipe_common.c
+++ b/src/gallium/drivers/radeon/r600_pipe_common.c
@@ -85,7 +85,7 @@ void r600_draw_rectangle(struct blitter_context *blitter,
/* Upload vertices. The hw rectangle has only 3 vertices,
* I guess the 4th one is derived from the first 3.
* The vertex specification should match u_blitter's vertex element state. */
- u_upload_alloc(rctx->uploader, 0, sizeof(float) * 24, &offset, &buf, (void**)&vb);
+ u_upload_alloc(rctx->uploader, 0, sizeof(float) * 24, 256, &offset, &buf, (void**)&vb);
if (!buf)
return;
@@ -227,6 +227,17 @@ static enum pipe_reset_status r600_get_reset_status(struct pipe_context *ctx)
return PIPE_UNKNOWN_CONTEXT_RESET;
}
+static void r600_set_debug_callback(struct pipe_context *ctx,
+ const struct pipe_debug_callback *cb)
+{
+ struct r600_common_context *rctx = (struct r600_common_context *)ctx;
+
+ if (cb)
+ rctx->debug = *cb;
+ else
+ memset(&rctx->debug, 0, sizeof(rctx->debug));
+}
+
bool r600_common_context_init(struct r600_common_context *rctx,
struct r600_common_screen *rscreen)
{
@@ -252,6 +263,7 @@ bool r600_common_context_init(struct r600_common_context *rctx,
rctx->b.transfer_inline_write = u_default_transfer_inline_write;
rctx->b.memory_barrier = r600_memory_barrier;
rctx->b.flush = r600_flush_from_st;
+ rctx->b.set_debug_callback = r600_set_debug_callback;
if (rscreen->info.drm_major == 2 && rscreen->info.drm_minor >= 43) {
rctx->b.get_device_reset_status = r600_get_reset_status;
@@ -272,9 +284,9 @@ bool r600_common_context_init(struct r600_common_context *rctx,
if (!rctx->allocator_so_filled_size)
return false;
- rctx->uploader = u_upload_create(&rctx->b, 1024 * 1024, 256,
+ rctx->uploader = u_upload_create(&rctx->b, 1024 * 1024,
PIPE_BIND_INDEX_BUFFER |
- PIPE_BIND_CONSTANT_BUFFER);
+ PIPE_BIND_CONSTANT_BUFFER, PIPE_USAGE_STREAM);
if (!rctx->uploader)
return false;
@@ -999,13 +1011,9 @@ void r600_destroy_common_screen(struct r600_common_screen *rscreen)
}
bool r600_can_dump_shader(struct r600_common_screen *rscreen,
- const struct tgsi_token *tokens)
+ unsigned processor)
{
- /* Compute shader don't have tgsi_tokens */
- if (!tokens)
- return (rscreen->debug_flags & DBG_CS) != 0;
-
- switch (tgsi_get_processor_type(tokens)) {
+ switch (processor) {
case TGSI_PROCESSOR_VERTEX:
return (rscreen->debug_flags & DBG_VS) != 0;
case TGSI_PROCESSOR_TESS_CTRL:
diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h b/src/gallium/drivers/radeon/r600_pipe_common.h
index c3933b1da98..68b50a9fb0f 100644
--- a/src/gallium/drivers/radeon/r600_pipe_common.h
+++ b/src/gallium/drivers/radeon/r600_pipe_common.h
@@ -440,6 +440,8 @@ struct r600_common_context {
* the GPU addresses are updated. */
struct list_head texture_buffers;
+ struct pipe_debug_callback debug;
+
/* Copy one resource to another using async DMA. */
void (*dma_copy)(struct pipe_context *ctx,
struct pipe_resource *dst,
@@ -514,7 +516,7 @@ bool r600_common_context_init(struct r600_common_context *rctx,
void r600_common_context_cleanup(struct r600_common_context *rctx);
void r600_context_add_resource_size(struct pipe_context *ctx, struct pipe_resource *r);
bool r600_can_dump_shader(struct r600_common_screen *rscreen,
- const struct tgsi_token *tokens);
+ unsigned processor);
void r600_screen_clear_buffer(struct r600_common_screen *rscreen, struct pipe_resource *dst,
unsigned offset, unsigned size, unsigned value,
bool is_framebuffer);
diff --git a/src/gallium/drivers/radeon/radeon_llvm_emit.c b/src/gallium/drivers/radeon/radeon_llvm_emit.c
index 61ed9402122..3d0987624a6 100644
--- a/src/gallium/drivers/radeon/radeon_llvm_emit.c
+++ b/src/gallium/drivers/radeon/radeon_llvm_emit.c
@@ -23,12 +23,15 @@
* Authors: Tom Stellard <[email protected]>
*
*/
+
#include "radeon_llvm_emit.h"
#include "radeon_elf_util.h"
#include "c11/threads.h"
#include "gallivm/lp_bld_misc.h"
+#include "util/u_debug.h"
#include "util/u_memory.h"
#include "pipe/p_shader_tokens.h"
+#include "pipe/p_state.h"
#include <llvm-c/Target.h>
#include <llvm-c/TargetMachine.h>
@@ -123,16 +126,44 @@ LLVMTargetRef radeon_llvm_get_r600_target(const char *triple)
return target;
}
+struct radeon_llvm_diagnostics {
+ struct pipe_debug_callback *debug;
+ unsigned retval;
+};
+
static void radeonDiagnosticHandler(LLVMDiagnosticInfoRef di, void *context)
{
- if (LLVMGetDiagInfoSeverity(di) == LLVMDSError) {
- unsigned int *diagnosticflag = (unsigned int *)context;
- char *diaginfo_message = LLVMGetDiagInfoDescription(di);
+ struct radeon_llvm_diagnostics *diag = (struct radeon_llvm_diagnostics *)context;
+ LLVMDiagnosticSeverity severity = LLVMGetDiagInfoSeverity(di);
+ char *description = LLVMGetDiagInfoDescription(di);
+ const char *severity_str = NULL;
+
+ switch (severity) {
+ case LLVMDSError:
+ severity_str = "error";
+ break;
+ case LLVMDSWarning:
+ severity_str = "warning";
+ break;
+ case LLVMDSRemark:
+ severity_str = "remark";
+ break;
+ case LLVMDSNote:
+ severity_str = "note";
+ break;
+ default:
+ severity_str = "unknown";
+ }
+
+ pipe_debug_message(diag->debug, SHADER_INFO,
+ "LLVM diagnostic (%s): %s", severity_str, description);
- *diagnosticflag = 1;
- fprintf(stderr,"LLVM triggered Diagnostic Handler: %s\n", diaginfo_message);
- LLVMDisposeMessage(diaginfo_message);
+ if (severity == LLVMDSError) {
+ diag->retval = 1;
+ fprintf(stderr,"LLVM triggered Diagnostic Handler: %s\n", description);
}
+
+ LLVMDisposeMessage(description);
}
/**
@@ -141,22 +172,25 @@ static void radeonDiagnosticHandler(LLVMDiagnosticInfoRef di, void *context)
* @returns 0 for success, 1 for failure
*/
unsigned radeon_llvm_compile(LLVMModuleRef M, struct radeon_shader_binary *binary,
- const char *gpu_family, bool dump_ir, bool dump_asm,
- LLVMTargetMachineRef tm)
+ const char *gpu_family,
+ LLVMTargetMachineRef tm,
+ struct pipe_debug_callback *debug)
{
-
+ struct radeon_llvm_diagnostics diag;
char cpu[CPU_STRING_LEN];
char fs[FS_STRING_LEN];
char *err;
bool dispose_tm = false;
LLVMContextRef llvm_ctx;
- unsigned rval = 0;
LLVMMemoryBufferRef out_buffer;
unsigned buffer_size;
const char *buffer_data;
char triple[TRIPLE_STRING_LEN];
LLVMBool mem_err;
+ diag.debug = debug;
+ diag.retval = 0;
+
if (!tm) {
strncpy(triple, "r600--", TRIPLE_STRING_LEN);
LLVMTargetRef target = radeon_llvm_get_r600_target(triple);
@@ -165,20 +199,17 @@ unsigned radeon_llvm_compile(LLVMModuleRef M, struct radeon_shader_binary *binar
}
strncpy(cpu, gpu_family, CPU_STRING_LEN);
memset(fs, 0, sizeof(fs));
- if (dump_asm)
- strncpy(fs, "+DumpCode", FS_STRING_LEN);
+ strncpy(fs, "+DumpCode", FS_STRING_LEN);
tm = LLVMCreateTargetMachine(target, triple, cpu, fs,
LLVMCodeGenLevelDefault, LLVMRelocDefault,
LLVMCodeModelDefault);
dispose_tm = true;
}
- if (dump_ir)
- LLVMDumpModule(M);
+
/* Setup Diagnostic Handler*/
llvm_ctx = LLVMGetModuleContext(M);
- LLVMContextSetDiagnosticHandler(llvm_ctx, radeonDiagnosticHandler, &rval);
- rval = 0;
+ LLVMContextSetDiagnosticHandler(llvm_ctx, radeonDiagnosticHandler, &diag);
/* Compile IR*/
mem_err = LLVMTargetMachineEmitToMemoryBuffer(tm, M, LLVMObjectFile, &err,
@@ -187,15 +218,13 @@ unsigned radeon_llvm_compile(LLVMModuleRef M, struct radeon_shader_binary *binar
/* Process Errors/Warnings */
if (mem_err) {
fprintf(stderr, "%s: %s", __FUNCTION__, err);
+ pipe_debug_message(debug, SHADER_INFO,
+ "LLVM emit error: %s", err);
FREE(err);
- rval = 1;
+ diag.retval = 1;
goto out;
}
- if (0 != rval) {
- fprintf(stderr, "%s: Processing Diag Flag\n", __FUNCTION__);
- }
-
/* Extract Shader Code*/
buffer_size = LLVMGetBufferSize(out_buffer);
buffer_data = LLVMGetBufferStart(out_buffer);
@@ -209,5 +238,7 @@ out:
if (dispose_tm) {
LLVMDisposeTargetMachine(tm);
}
- return rval;
+ if (diag.retval != 0)
+ pipe_debug_message(debug, SHADER_INFO, "LLVM compile failed");
+ return diag.retval;
}
diff --git a/src/gallium/drivers/radeon/radeon_llvm_emit.h b/src/gallium/drivers/radeon/radeon_llvm_emit.h
index e20aed94c6b..45f05a9e0e1 100644
--- a/src/gallium/drivers/radeon/radeon_llvm_emit.h
+++ b/src/gallium/drivers/radeon/radeon_llvm_emit.h
@@ -31,6 +31,7 @@
#include <llvm-c/TargetMachine.h>
#include <stdbool.h>
+struct pipe_debug_callback;
struct radeon_shader_binary;
void radeon_llvm_shader_type(LLVMValueRef F, unsigned type);
@@ -38,7 +39,8 @@ void radeon_llvm_shader_type(LLVMValueRef F, unsigned type);
LLVMTargetRef radeon_llvm_get_r600_target(const char *triple);
unsigned radeon_llvm_compile(LLVMModuleRef M, struct radeon_shader_binary *binary,
- const char *gpu_family, bool dump_ir, bool dump_asm,
- LLVMTargetMachineRef tm);
+ const char *gpu_family,
+ LLVMTargetMachineRef tm,
+ struct pipe_debug_callback *debug);
#endif /* RADEON_LLVM_EMIT_H */
diff --git a/src/gallium/drivers/radeonsi/cik_sdma.c b/src/gallium/drivers/radeonsi/cik_sdma.c
index 2de237b4716..105a1b2a878 100644
--- a/src/gallium/drivers/radeonsi/cik_sdma.c
+++ b/src/gallium/drivers/radeonsi/cik_sdma.c
@@ -196,7 +196,7 @@ static void cik_sdma_copy_tile(struct si_context *ctx,
(tile_split << 11) | (mt << 8) | (array_mode << 3) |
lbpe;
cs->buf[cs->cdw++] = y << 16; /* | x */
- cs->buf[cs->cdw++] = 0; /* z */;
+ cs->buf[cs->cdw++] = 0; /* z */
cs->buf[cs->cdw++] = addr & 0xfffffffc;
cs->buf[cs->cdw++] = addr >> 32;
cs->buf[cs->cdw++] = (pitch / bpe) - 1;
diff --git a/src/gallium/drivers/radeonsi/si_compute.c b/src/gallium/drivers/radeonsi/si_compute.c
index 47a74eea0e0..5a08cbfb198 100644
--- a/src/gallium/drivers/radeonsi/si_compute.c
+++ b/src/gallium/drivers/radeonsi/si_compute.c
@@ -67,9 +67,9 @@ static void init_scratch_buffer(struct si_context *sctx, struct si_compute *prog
program->shader.binary.global_symbol_offsets[i];
unsigned scratch_bytes_needed;
- si_shader_binary_read_config(sctx->screen,
- &program->shader, offset);
- scratch_bytes_needed = program->shader.scratch_bytes_per_wave;
+ si_shader_binary_read_config(&program->shader.binary,
+ &program->shader.config, offset);
+ scratch_bytes_needed = program->shader.config.scratch_bytes_per_wave;
scratch_bytes = MAX2(scratch_bytes, scratch_bytes_needed);
}
@@ -87,7 +87,7 @@ static void init_scratch_buffer(struct si_context *sctx, struct si_compute *prog
* to the maximum bytes needed, so it can compute the stride
* correctly.
*/
- program->shader.scratch_bytes_per_wave = scratch_bytes;
+ program->shader.config.scratch_bytes_per_wave = scratch_bytes;
/* Patch the shader with the scratch buffer address. */
si_shader_apply_scratch_relocs(sctx,
@@ -122,8 +122,12 @@ static void *si_create_compute_state(
for (i = 0; i < program->num_kernels; i++) {
LLVMModuleRef mod = radeon_llvm_get_kernel_module(program->llvm_ctx, i,
code, header->num_bytes);
- si_compile_llvm(sctx->screen, &program->kernels[i], sctx->tm,
- mod);
+ si_compile_llvm(sctx->screen, &program->kernels[i].binary,
+ &program->kernels[i].config, sctx->tm,
+ mod, &sctx->b.debug, TGSI_PROCESSOR_COMPUTE);
+ si_shader_dump(sctx->screen, &program->kernels[i],
+ &sctx->b.debug, TGSI_PROCESSOR_COMPUTE);
+ si_shader_binary_upload(sctx->screen, &program->kernels[i]);
LLVMDisposeModule(mod);
}
}
@@ -136,7 +140,11 @@ static void *si_create_compute_state(
* the shader code to the GPU.
*/
init_scratch_buffer(sctx, program);
- si_shader_binary_read(sctx->screen, &program->shader);
+ si_shader_binary_read_config(&program->shader.binary,
+ &program->shader.config, 0);
+ si_shader_dump(sctx->screen, &program->shader, &sctx->b.debug,
+ TGSI_PROCESSOR_COMPUTE);
+ si_shader_binary_upload(sctx->screen, &program->shader);
#endif
program->input_buffer = si_resource_create_custom(sctx->b.b.screen,
@@ -259,7 +267,7 @@ static void si_launch_grid(
#if HAVE_LLVM >= 0x0306
/* Read the config information */
- si_shader_binary_read_config(sctx->screen, shader, pc);
+ si_shader_binary_read_config(&shader->binary, &shader->config, pc);
#endif
/* Upload the kernel arguments */
@@ -280,12 +288,12 @@ static void si_launch_grid(
memcpy(kernel_args + (num_work_size_bytes / 4), input, program->input_size);
- if (shader->scratch_bytes_per_wave > 0) {
+ if (shader->config.scratch_bytes_per_wave > 0) {
COMPUTE_DBG(sctx->screen, "Waves: %u; Scratch per wave: %u bytes; "
"Total Scratch: %u bytes\n", num_waves_for_scratch,
- shader->scratch_bytes_per_wave,
- shader->scratch_bytes_per_wave *
+ shader->config.scratch_bytes_per_wave,
+ shader->config.scratch_bytes_per_wave *
num_waves_for_scratch);
radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx,
@@ -312,7 +320,7 @@ static void si_launch_grid(
si_pm4_set_reg(pm4, R_00B900_COMPUTE_USER_DATA_0 + 8, scratch_buffer_va);
si_pm4_set_reg(pm4, R_00B900_COMPUTE_USER_DATA_0 + 12,
S_008F04_BASE_ADDRESS_HI(scratch_buffer_va >> 32)
- | S_008F04_STRIDE(shader->scratch_bytes_per_wave / 64));
+ | S_008F04_STRIDE(shader->config.scratch_bytes_per_wave / 64));
si_pm4_set_reg(pm4, R_00B810_COMPUTE_START_X, 0);
si_pm4_set_reg(pm4, R_00B814_COMPUTE_START_Y, 0);
@@ -360,9 +368,9 @@ static void si_launch_grid(
si_pm4_set_reg(pm4, R_00B830_COMPUTE_PGM_LO, shader_va >> 8);
si_pm4_set_reg(pm4, R_00B834_COMPUTE_PGM_HI, shader_va >> 40);
- si_pm4_set_reg(pm4, R_00B848_COMPUTE_PGM_RSRC1, shader->rsrc1);
+ si_pm4_set_reg(pm4, R_00B848_COMPUTE_PGM_RSRC1, shader->config.rsrc1);
- lds_blocks = shader->lds_size;
+ lds_blocks = shader->config.lds_size;
/* XXX: We are over allocating LDS. For SI, the shader reports LDS in
* blocks of 256 bytes, so if there are 4 bytes lds allocated in
* the shader and 4 bytes allocated by the state tracker, then
@@ -376,10 +384,10 @@ static void si_launch_grid(
assert(lds_blocks <= 0xFF);
- shader->rsrc2 &= C_00B84C_LDS_SIZE;
- shader->rsrc2 |= S_00B84C_LDS_SIZE(lds_blocks);
+ shader->config.rsrc2 &= C_00B84C_LDS_SIZE;
+ shader->config.rsrc2 |= S_00B84C_LDS_SIZE(lds_blocks);
- si_pm4_set_reg(pm4, R_00B84C_COMPUTE_PGM_RSRC2, shader->rsrc2);
+ si_pm4_set_reg(pm4, R_00B84C_COMPUTE_PGM_RSRC2, shader->config.rsrc2);
si_pm4_set_reg(pm4, R_00B854_COMPUTE_RESOURCE_LIMITS, 0);
si_pm4_set_reg(pm4, R_00B858_COMPUTE_STATIC_THREAD_MGMT_SE0,
@@ -401,7 +409,7 @@ static void si_launch_grid(
* COMPUTE_PGM_RSRC2.SCRATCH_EN is enabled.
*/
S_00B860_WAVES(num_waves_for_scratch)
- | S_00B860_WAVESIZE(shader->scratch_bytes_per_wave >> 10))
+ | S_00B860_WAVESIZE(shader->config.scratch_bytes_per_wave >> 10))
;
si_pm4_cmd_begin(pm4, PKT3_DISPATCH_DIRECT);
diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c b/src/gallium/drivers/radeonsi/si_descriptors.c
index b3719dea252..d157a9ffb00 100644
--- a/src/gallium/drivers/radeonsi/si_descriptors.c
+++ b/src/gallium/drivers/radeonsi/si_descriptors.c
@@ -109,7 +109,7 @@ static bool si_upload_descriptors(struct si_context *sctx,
if (!desc->list_dirty)
return true;
- u_upload_alloc(sctx->b.uploader, 0, list_size,
+ u_upload_alloc(sctx->b.uploader, 0, list_size, 256,
&desc->buffer_offset,
(struct pipe_resource**)&desc->buffer, &ptr);
if (!desc->buffer)
@@ -391,7 +391,7 @@ static bool si_upload_vertex_buffer_descriptors(struct si_context *sctx)
* directly through a staging buffer and don't go through
* the fine-grained upload path.
*/
- u_upload_alloc(sctx->b.uploader, 0, count * 16, &desc->buffer_offset,
+ u_upload_alloc(sctx->b.uploader, 0, count * 16, 256, &desc->buffer_offset,
(struct pipe_resource**)&desc->buffer, (void**)&ptr);
if (!desc->buffer)
return false;
@@ -465,7 +465,7 @@ void si_upload_const_buffer(struct si_context *sctx, struct r600_resource **rbuf
{
void *tmp;
- u_upload_alloc(sctx->b.uploader, 0, size, const_offset,
+ u_upload_alloc(sctx->b.uploader, 0, size, 256, const_offset,
(struct pipe_resource**)rbuffer, &tmp);
if (rbuffer)
util_memcpy_cpu_to_le32(tmp, ptr, size);
@@ -1011,19 +1011,19 @@ void si_init_all_descriptors(struct si_context *sctx)
for (i = 0; i < SI_NUM_SHADERS; i++) {
si_init_buffer_resources(&sctx->const_buffers[i],
- SI_NUM_CONST_BUFFERS, SI_SGPR_CONST,
+ SI_NUM_CONST_BUFFERS, SI_SGPR_CONST_BUFFERS,
RADEON_USAGE_READ, RADEON_PRIO_CONST_BUFFER);
si_init_buffer_resources(&sctx->rw_buffers[i],
SI_NUM_RW_BUFFERS, SI_SGPR_RW_BUFFERS,
RADEON_USAGE_READWRITE, RADEON_PRIO_RINGS_STREAMOUT);
si_init_descriptors(&sctx->samplers[i].views.desc,
- SI_SGPR_RESOURCE, 8, SI_NUM_SAMPLER_VIEWS);
+ SI_SGPR_SAMPLER_VIEWS, 8, SI_NUM_SAMPLER_VIEWS);
si_init_descriptors(&sctx->samplers[i].states.desc,
- SI_SGPR_SAMPLER, 4, SI_NUM_SAMPLER_STATES);
+ SI_SGPR_SAMPLER_STATES, 4, SI_NUM_SAMPLER_STATES);
}
- si_init_descriptors(&sctx->vertex_buffers, SI_SGPR_VERTEX_BUFFER,
+ si_init_descriptors(&sctx->vertex_buffers, SI_SGPR_VERTEX_BUFFERS,
4, SI_NUM_VERTEX_BUFFERS);
/* Set pipe_context functions. */
diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c
index ac13407e2a1..c2ca94339ac 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.c
+++ b/src/gallium/drivers/radeonsi/si_pipe.c
@@ -340,6 +340,13 @@ static int si_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
case PIPE_CAP_TEXTURE_GATHER_OFFSETS:
case PIPE_CAP_VERTEXID_NOBASE:
case PIPE_CAP_CLEAR_TEXTURE:
+ case PIPE_CAP_DRAW_PARAMETERS:
+ case PIPE_CAP_TGSI_PACK_HALF_FLOAT:
+ case PIPE_CAP_MULTI_DRAW_INDIRECT:
+ case PIPE_CAP_MULTI_DRAW_INDIRECT_PARAMS:
+ case PIPE_CAP_TGSI_FS_POSITION_IS_SYSVAL:
+ case PIPE_CAP_TGSI_FS_FACE_IS_INTEGER_SYSVAL:
+ case PIPE_CAP_SHADER_BUFFER_OFFSET_ALIGNMENT:
return 0;
case PIPE_CAP_MAX_SHADER_PATCH_VARYINGS:
@@ -512,6 +519,8 @@ static int si_get_shader_param(struct pipe_screen* pscreen, unsigned shader, enu
return 1;
case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT:
return 32;
+ case PIPE_SHADER_CAP_MAX_SHADER_BUFFERS:
+ return 0;
}
return 0;
}
diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
index 0e98784d51b..1db3e484915 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -82,11 +82,11 @@ struct si_shader_context
int param_es2gs_offset;
LLVMTargetMachineRef tm;
LLVMValueRef const_md;
- LLVMValueRef const_resource[SI_NUM_CONST_BUFFERS];
+ LLVMValueRef const_buffers[SI_NUM_CONST_BUFFERS];
LLVMValueRef lds;
LLVMValueRef *constants[SI_NUM_CONST_BUFFERS];
- LLVMValueRef resources[SI_NUM_SAMPLER_VIEWS];
- LLVMValueRef samplers[SI_NUM_SAMPLER_STATES];
+ LLVMValueRef sampler_views[SI_NUM_SAMPLER_VIEWS];
+ LLVMValueRef sampler_states[SI_NUM_SAMPLER_STATES];
LLVMValueRef so_buffers[4];
LLVMValueRef esgs_ring;
LLVMValueRef gsvs_ring[4];
@@ -394,7 +394,7 @@ static void declare_input_vs(
LLVMValueRef input;
/* Load the T list */
- t_list_ptr = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, SI_PARAM_VERTEX_BUFFER);
+ t_list_ptr = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, SI_PARAM_VERTEX_BUFFERS);
t_offset = lp_build_const_int32(gallivm, input_index);
@@ -1065,7 +1065,7 @@ static LLVMValueRef load_sample_position(struct radeon_llvm_context *radeon_bld,
struct lp_build_context *uint_bld = &radeon_bld->soa.bld_base.uint_bld;
struct gallivm_state *gallivm = &radeon_bld->gallivm;
LLVMBuilderRef builder = gallivm->builder;
- LLVMValueRef desc = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, SI_PARAM_CONST);
+ LLVMValueRef desc = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, SI_PARAM_CONST_BUFFERS);
LLVMValueRef buf_index = lp_build_const_int32(gallivm, SI_DRIVER_STATE_CONST_BUF);
LLVMValueRef resource = build_indexed_load_const(si_shader_ctx, desc, buf_index);
@@ -1233,13 +1233,13 @@ static LLVMValueRef fetch_constant(
}
if (reg->Register.Dimension && reg->Dimension.Indirect) {
- LLVMValueRef ptr = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, SI_PARAM_CONST);
+ LLVMValueRef ptr = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, SI_PARAM_CONST_BUFFERS);
LLVMValueRef index;
index = get_indirect_index(si_shader_ctx, &reg->DimIndirect,
reg->Dimension.Index);
bufp = build_indexed_load_const(si_shader_ctx, ptr, index);
} else
- bufp = si_shader_ctx->const_resource[buf];
+ bufp = si_shader_ctx->const_buffers[buf];
addr = si_shader_ctx->radeon_bld.soa.addr[ireg->Index][ireg->Swizzle];
addr = LLVMBuildLoad(base->gallivm->builder, addr, "load addr reg");
@@ -1260,7 +1260,7 @@ static LLVMValueRef fetch_constant(
addr2 = lp_build_add(&bld_base->uint_bld, addr2,
lp_build_const_int32(base->gallivm, idx * 4));
- result2 = buffer_load_const(base->gallivm->builder, si_shader_ctx->const_resource[buf],
+ result2 = buffer_load_const(base->gallivm->builder, si_shader_ctx->const_buffers[buf],
addr2, bld_base->base.elem_type);
result = radeon_llvm_emit_fetch_double(bld_base,
@@ -1302,18 +1302,8 @@ static void si_llvm_init_export_args(struct lp_build_tgsi_context *bld_base,
if (si_shader_ctx->type == TGSI_PROCESSOR_FRAGMENT) {
int cbuf = target - V_008DFC_SQ_EXP_MRT;
- if (cbuf >= 0 && cbuf < 8) {
+ if (cbuf >= 0 && cbuf < 8)
compressed = (si_shader_ctx->shader->key.ps.export_16bpc >> cbuf) & 0x1;
-
- if (compressed)
- si_shader_ctx->shader->spi_shader_col_format |=
- V_028714_SPI_SHADER_FP16_ABGR << (4 * cbuf);
- else
- si_shader_ctx->shader->spi_shader_col_format |=
- V_028714_SPI_SHADER_32_ABGR << (4 * cbuf);
-
- si_shader_ctx->shader->cb_shader_mask |= 0xf << (4 * cbuf);
- }
}
/* Set COMPR flag */
@@ -1333,34 +1323,19 @@ static void si_llvm_init_export_args(struct lp_build_tgsi_context *bld_base,
LLVMInt32TypeInContext(base->gallivm->context),
pack_args, 2,
LLVMReadNoneAttribute | LLVMNoUnwindAttribute);
- args[chan + 7] = args[chan + 5] =
+ args[chan + 5] =
LLVMBuildBitCast(base->gallivm->builder,
packed,
LLVMFloatTypeInContext(base->gallivm->context),
"");
+ args[chan + 7] = base->undef;
}
} else
memcpy(&args[5], values, sizeof(values[0]) * 4);
}
-/* Load from output pointers and initialize arguments for the shader export intrinsic */
-static void si_llvm_init_export_args_load(struct lp_build_tgsi_context *bld_base,
- LLVMValueRef *out_ptr,
- unsigned target,
- LLVMValueRef *args)
-{
- struct gallivm_state *gallivm = bld_base->base.gallivm;
- LLVMValueRef values[4];
- int i;
-
- for (i = 0; i < 4; i++)
- values[i] = LLVMBuildLoad(gallivm->builder, out_ptr[i], "");
-
- si_llvm_init_export_args(bld_base, values, target, args);
-}
-
static void si_alpha_test(struct lp_build_tgsi_context *bld_base,
- LLVMValueRef alpha_ptr)
+ LLVMValueRef alpha)
{
struct si_shader_context *si_shader_ctx = si_shader_context(bld_base);
struct gallivm_state *gallivm = bld_base->base.gallivm;
@@ -1372,8 +1347,7 @@ static void si_alpha_test(struct lp_build_tgsi_context *bld_base,
LLVMValueRef alpha_pass =
lp_build_cmp(&bld_base->base,
si_shader_ctx->shader->key.ps.alpha_func,
- LLVMBuildLoad(gallivm->builder, alpha_ptr, ""),
- alpha_ref);
+ alpha, alpha_ref);
LLVMValueRef arg =
lp_build_select(&bld_base->base,
alpha_pass,
@@ -1390,16 +1364,14 @@ static void si_alpha_test(struct lp_build_tgsi_context *bld_base,
LLVMVoidTypeInContext(gallivm->context),
NULL, 0, 0);
}
-
- si_shader_ctx->shader->db_shader_control |= S_02880C_KILL_ENABLE(1);
}
-static void si_scale_alpha_by_sample_mask(struct lp_build_tgsi_context *bld_base,
- LLVMValueRef alpha_ptr)
+static LLVMValueRef si_scale_alpha_by_sample_mask(struct lp_build_tgsi_context *bld_base,
+ LLVMValueRef alpha)
{
struct si_shader_context *si_shader_ctx = si_shader_context(bld_base);
struct gallivm_state *gallivm = bld_base->base.gallivm;
- LLVMValueRef coverage, alpha;
+ LLVMValueRef coverage;
/* alpha = alpha * popcount(coverage) / SI_NUM_SMOOTH_AA_SAMPLES */
coverage = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn,
@@ -1417,9 +1389,7 @@ static void si_scale_alpha_by_sample_mask(struct lp_build_tgsi_context *bld_base
lp_build_const_float(gallivm,
1.0 / SI_NUM_SMOOTH_AA_SAMPLES), "");
- alpha = LLVMBuildLoad(gallivm->builder, alpha_ptr, "");
- alpha = LLVMBuildFMul(gallivm->builder, alpha, coverage, "");
- LLVMBuildStore(gallivm->builder, alpha, alpha_ptr);
+ return LLVMBuildFMul(gallivm->builder, alpha, coverage, "");
}
static void si_llvm_emit_clipvertex(struct lp_build_tgsi_context * bld_base,
@@ -1432,7 +1402,7 @@ static void si_llvm_emit_clipvertex(struct lp_build_tgsi_context * bld_base,
unsigned chan;
unsigned const_chan;
LLVMValueRef base_elt;
- LLVMValueRef ptr = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, SI_PARAM_CONST);
+ LLVMValueRef ptr = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, SI_PARAM_CONST_BUFFERS);
LLVMValueRef constbuf_index = lp_build_const_int32(base->gallivm, SI_DRIVER_STATE_CONST_BUF);
LLVMValueRef const_resource = build_indexed_load_const(si_shader_ctx, ptr, constbuf_index);
@@ -2112,197 +2082,193 @@ static void si_llvm_emit_vs_epilogue(struct lp_build_tgsi_context * bld_base)
FREE(outputs);
}
-static void si_llvm_emit_fs_epilogue(struct lp_build_tgsi_context * bld_base)
+static void si_export_mrt_z(struct lp_build_tgsi_context *bld_base,
+ LLVMValueRef depth, LLVMValueRef stencil,
+ LLVMValueRef samplemask)
{
- struct si_shader_context * si_shader_ctx = si_shader_context(bld_base);
- struct si_shader * shader = si_shader_ctx->shader;
- struct lp_build_context * base = &bld_base->base;
- struct lp_build_context * uint = &bld_base->uint_bld;
- struct tgsi_shader_info *info = &shader->selector->info;
- LLVMBuilderRef builder = base->gallivm->builder;
+ struct si_screen *sscreen = si_shader_context(bld_base)->screen;
+ struct lp_build_context *base = &bld_base->base;
+ struct lp_build_context *uint = &bld_base->uint_bld;
LLVMValueRef args[9];
- LLVMValueRef last_args[9] = { 0 };
- int depth_index = -1, stencil_index = -1, samplemask_index = -1;
- int i;
+ unsigned mask = 0;
- for (i = 0; i < info->num_outputs; i++) {
- unsigned semantic_name = info->output_semantic_name[i];
- unsigned semantic_index = info->output_semantic_index[i];
- unsigned target;
- LLVMValueRef alpha_ptr;
+ assert(depth || stencil || samplemask);
- /* Select the correct target */
- switch (semantic_name) {
- case TGSI_SEMANTIC_POSITION:
- depth_index = i;
- continue;
- case TGSI_SEMANTIC_STENCIL:
- stencil_index = i;
- continue;
- case TGSI_SEMANTIC_SAMPLEMASK:
- samplemask_index = i;
- continue;
- case TGSI_SEMANTIC_COLOR:
- target = V_008DFC_SQ_EXP_MRT + semantic_index;
- alpha_ptr = si_shader_ctx->radeon_bld.soa.outputs[i][3];
+ args[1] = uint->one; /* whether the EXEC mask is valid */
+ args[2] = uint->one; /* DONE bit */
- if (si_shader_ctx->shader->key.ps.clamp_color) {
- for (int j = 0; j < 4; j++) {
- LLVMValueRef ptr = si_shader_ctx->radeon_bld.soa.outputs[i][j];
- LLVMValueRef result = LLVMBuildLoad(builder, ptr, "");
+ /* Specify the target we are exporting */
+ args[3] = lp_build_const_int32(base->gallivm, V_008DFC_SQ_EXP_MRTZ);
- result = radeon_llvm_saturate(bld_base, result);
- LLVMBuildStore(builder, result, ptr);
- }
- }
+ args[4] = uint->zero; /* COMP flag */
+ args[5] = base->undef; /* R, depth */
+ args[6] = base->undef; /* G, stencil test value[0:7], stencil op value[8:15] */
+ args[7] = base->undef; /* B, sample mask */
+ args[8] = base->undef; /* A, alpha to mask */
- if (si_shader_ctx->shader->key.ps.alpha_to_one)
- LLVMBuildStore(base->gallivm->builder,
- base->one, alpha_ptr);
+ if (depth) {
+ args[5] = depth;
+ mask |= 0x1;
+ }
- if (semantic_index == 0 &&
- si_shader_ctx->shader->key.ps.alpha_func != PIPE_FUNC_ALWAYS)
- si_alpha_test(bld_base, alpha_ptr);
+ if (stencil) {
+ args[6] = stencil;
+ mask |= 0x2;
+ }
- if (si_shader_ctx->shader->key.ps.poly_line_smoothing)
- si_scale_alpha_by_sample_mask(bld_base, alpha_ptr);
+ if (samplemask) {
+ args[7] = samplemask;
+ mask |= 0x4;
+ }
- break;
- default:
- target = 0;
- fprintf(stderr,
- "Warning: SI unhandled fs output type:%d\n",
- semantic_name);
- }
+ /* SI (except OLAND) has a bug that it only looks
+ * at the X writemask component. */
+ if (sscreen->b.chip_class == SI &&
+ sscreen->b.family != CHIP_OLAND)
+ mask |= 0x1;
- si_llvm_init_export_args_load(bld_base,
- si_shader_ctx->radeon_bld.soa.outputs[i],
- target, args);
-
- if (semantic_name == TGSI_SEMANTIC_COLOR) {
- /* If there is an export instruction waiting to be emitted, do so now. */
- if (last_args[0]) {
- lp_build_intrinsic(base->gallivm->builder,
- "llvm.SI.export",
- LLVMVoidTypeInContext(base->gallivm->context),
- last_args, 9, 0);
- }
+ /* Specify which components to enable */
+ args[0] = lp_build_const_int32(base->gallivm, mask);
- /* This instruction will be emitted at the end of the shader. */
- memcpy(last_args, args, sizeof(args));
-
- /* Handle FS_COLOR0_WRITES_ALL_CBUFS. */
- if (shader->selector->info.properties[TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS] &&
- semantic_index == 0 &&
- si_shader_ctx->shader->key.ps.last_cbuf > 0) {
- for (int c = 1; c <= si_shader_ctx->shader->key.ps.last_cbuf; c++) {
- si_llvm_init_export_args_load(bld_base,
- si_shader_ctx->radeon_bld.soa.outputs[i],
- V_008DFC_SQ_EXP_MRT + c, args);
- lp_build_intrinsic(base->gallivm->builder,
- "llvm.SI.export",
- LLVMVoidTypeInContext(base->gallivm->context),
- args, 9, 0);
- }
- }
- } else {
- lp_build_intrinsic(base->gallivm->builder,
- "llvm.SI.export",
+ lp_build_intrinsic(base->gallivm->builder, "llvm.SI.export",
+ LLVMVoidTypeInContext(base->gallivm->context),
+ args, 9, 0);
+}
+
+static void si_export_mrt_color(struct lp_build_tgsi_context *bld_base,
+ LLVMValueRef *color, unsigned index,
+ bool is_last)
+{
+ struct si_shader_context *si_shader_ctx = si_shader_context(bld_base);
+ struct lp_build_context *base = &bld_base->base;
+ LLVMValueRef args[9];
+ int i;
+
+ /* Clamp color */
+ if (si_shader_ctx->shader->key.ps.clamp_color)
+ for (i = 0; i < 4; i++)
+ color[i] = radeon_llvm_saturate(bld_base, color[i]);
+
+ /* Alpha to one */
+ if (si_shader_ctx->shader->key.ps.alpha_to_one)
+ color[3] = base->one;
+
+ /* Alpha test */
+ if (index == 0 &&
+ si_shader_ctx->shader->key.ps.alpha_func != PIPE_FUNC_ALWAYS)
+ si_alpha_test(bld_base, color[3]);
+
+ /* Line & polygon smoothing */
+ if (si_shader_ctx->shader->key.ps.poly_line_smoothing)
+ color[3] = si_scale_alpha_by_sample_mask(bld_base, color[3]);
+
+ /* If last_cbuf > 0, FS_COLOR0_WRITES_ALL_CBUFS is true. */
+ if (index == 0 &&
+ si_shader_ctx->shader->key.ps.last_cbuf > 0) {
+ for (int c = 1; c <= si_shader_ctx->shader->key.ps.last_cbuf; c++) {
+ si_llvm_init_export_args(bld_base, color,
+ V_008DFC_SQ_EXP_MRT + c, args);
+ lp_build_intrinsic(base->gallivm->builder, "llvm.SI.export",
LLVMVoidTypeInContext(base->gallivm->context),
args, 9, 0);
}
}
- if (depth_index >= 0 || stencil_index >= 0 || samplemask_index >= 0) {
- LLVMValueRef out_ptr;
- unsigned mask = 0;
-
- /* Specify the target we are exporting */
- args[3] = lp_build_const_int32(base->gallivm, V_008DFC_SQ_EXP_MRTZ);
-
- args[5] = base->zero; /* R, depth */
- args[6] = base->zero; /* G, stencil test value[0:7], stencil op value[8:15] */
- args[7] = base->zero; /* B, sample mask */
- args[8] = base->zero; /* A, alpha to mask */
-
- if (depth_index >= 0) {
- out_ptr = si_shader_ctx->radeon_bld.soa.outputs[depth_index][2];
- args[5] = LLVMBuildLoad(base->gallivm->builder, out_ptr, "");
- mask |= 0x1;
- si_shader_ctx->shader->db_shader_control |= S_02880C_Z_EXPORT_ENABLE(1);
- }
-
- if (stencil_index >= 0) {
- out_ptr = si_shader_ctx->radeon_bld.soa.outputs[stencil_index][1];
- args[6] = LLVMBuildLoad(base->gallivm->builder, out_ptr, "");
- mask |= 0x2;
- si_shader_ctx->shader->db_shader_control |=
- S_02880C_STENCIL_TEST_VAL_EXPORT_ENABLE(1);
- }
-
- if (samplemask_index >= 0) {
- out_ptr = si_shader_ctx->radeon_bld.soa.outputs[samplemask_index][0];
- args[7] = LLVMBuildLoad(base->gallivm->builder, out_ptr, "");
- mask |= 0x4;
- si_shader_ctx->shader->db_shader_control |= S_02880C_MASK_EXPORT_ENABLE(1);
- }
-
- /* SI (except OLAND) has a bug that it only looks
- * at the X writemask component. */
- if (si_shader_ctx->screen->b.chip_class == SI &&
- si_shader_ctx->screen->b.family != CHIP_OLAND)
- mask |= 0x1;
+ /* Export */
+ si_llvm_init_export_args(bld_base, color, V_008DFC_SQ_EXP_MRT + index,
+ args);
+ if (is_last) {
+ args[1] = bld_base->uint_bld.one; /* whether the EXEC mask is valid */
+ args[2] = bld_base->uint_bld.one; /* DONE bit */
+ }
+ lp_build_intrinsic(base->gallivm->builder, "llvm.SI.export",
+ LLVMVoidTypeInContext(base->gallivm->context),
+ args, 9, 0);
+}
- if (samplemask_index >= 0)
- si_shader_ctx->shader->spi_shader_z_format = V_028710_SPI_SHADER_32_ABGR;
- else if (stencil_index >= 0)
- si_shader_ctx->shader->spi_shader_z_format = V_028710_SPI_SHADER_32_GR;
- else
- si_shader_ctx->shader->spi_shader_z_format = V_028710_SPI_SHADER_32_R;
+static void si_export_null(struct lp_build_tgsi_context *bld_base)
+{
+ struct lp_build_context *base = &bld_base->base;
+ struct lp_build_context *uint = &bld_base->uint_bld;
+ LLVMValueRef args[9];
- /* Specify which components to enable */
- args[0] = lp_build_const_int32(base->gallivm, mask);
+ args[0] = lp_build_const_int32(base->gallivm, 0x0); /* enabled channels */
+ args[1] = uint->one; /* whether the EXEC mask is valid */
+ args[2] = uint->one; /* DONE bit */
+ args[3] = lp_build_const_int32(base->gallivm, V_008DFC_SQ_EXP_NULL);
+ args[4] = uint->zero; /* COMPR flag (0 = 32-bit export) */
+ args[5] = uint->undef; /* R */
+ args[6] = uint->undef; /* G */
+ args[7] = uint->undef; /* B */
+ args[8] = uint->undef; /* A */
+
+ lp_build_intrinsic(base->gallivm->builder, "llvm.SI.export",
+ LLVMVoidTypeInContext(base->gallivm->context),
+ args, 9, 0);
+}
- args[1] =
- args[2] =
- args[4] = uint->zero;
+static void si_llvm_emit_fs_epilogue(struct lp_build_tgsi_context * bld_base)
+{
+ struct si_shader_context * si_shader_ctx = si_shader_context(bld_base);
+ struct si_shader * shader = si_shader_ctx->shader;
+ struct lp_build_context * base = &bld_base->base;
+ struct tgsi_shader_info *info = &shader->selector->info;
+ LLVMBuilderRef builder = base->gallivm->builder;
+ LLVMValueRef depth = NULL, stencil = NULL, samplemask = NULL;
+ int last_color_export = -1;
+ int i;
- if (last_args[0])
- lp_build_intrinsic(base->gallivm->builder,
- "llvm.SI.export",
- LLVMVoidTypeInContext(base->gallivm->context),
- args, 9, 0);
- else
- memcpy(last_args, args, sizeof(args));
+ /* If there are no outputs, add a dummy export. */
+ if (!info->num_outputs) {
+ si_export_null(bld_base);
+ return;
}
- if (!last_args[0]) {
- /* Specify which components to enable */
- last_args[0] = lp_build_const_int32(base->gallivm, 0x0);
+ /* Determine the last export. If MRTZ is present, it's always last.
+ * Otherwise, find the last color export.
+ */
+ if (!info->writes_z && !info->writes_stencil && !info->writes_samplemask)
+ for (i = 0; i < info->num_outputs; i++)
+ if (info->output_semantic_name[i] == TGSI_SEMANTIC_COLOR)
+ last_color_export = i;
- /* Specify the target we are exporting */
- last_args[3] = lp_build_const_int32(base->gallivm, V_008DFC_SQ_EXP_MRT);
+ for (i = 0; i < info->num_outputs; i++) {
+ unsigned semantic_name = info->output_semantic_name[i];
+ unsigned semantic_index = info->output_semantic_index[i];
+ unsigned j;
+ LLVMValueRef color[4] = {};
- /* Set COMPR flag to zero to export data as 32-bit */
- last_args[4] = uint->zero;
+ /* Select the correct target */
+ switch (semantic_name) {
+ case TGSI_SEMANTIC_POSITION:
+ depth = LLVMBuildLoad(builder,
+ si_shader_ctx->radeon_bld.soa.outputs[i][2], "");
+ break;
+ case TGSI_SEMANTIC_STENCIL:
+ stencil = LLVMBuildLoad(builder,
+ si_shader_ctx->radeon_bld.soa.outputs[i][1], "");
+ break;
+ case TGSI_SEMANTIC_SAMPLEMASK:
+ samplemask = LLVMBuildLoad(builder,
+ si_shader_ctx->radeon_bld.soa.outputs[i][0], "");
+ break;
+ case TGSI_SEMANTIC_COLOR:
+ for (j = 0; j < 4; j++)
+ color[j] = LLVMBuildLoad(builder,
+ si_shader_ctx->radeon_bld.soa.outputs[i][j], "");
- /* dummy bits */
- last_args[5]= uint->zero;
- last_args[6]= uint->zero;
- last_args[7]= uint->zero;
- last_args[8]= uint->zero;
+ si_export_mrt_color(bld_base, color, semantic_index,
+ last_color_export == i);
+ break;
+ default:
+ fprintf(stderr,
+ "Warning: SI unhandled fs output type:%d\n",
+ semantic_name);
+ }
}
- /* Specify whether the EXEC mask represents the valid mask */
- last_args[1] = uint->one;
-
- /* Specify that this is the last export */
- last_args[2] = lp_build_const_int32(base->gallivm, 1);
-
- lp_build_intrinsic(base->gallivm->builder,
- "llvm.SI.export",
- LLVMVoidTypeInContext(base->gallivm->context),
- last_args, 9, 0);
+ if (depth || stencil || samplemask)
+ si_export_mrt_z(bld_base, depth, stencil, samplemask);
}
static void build_tex_intrinsic(const struct lp_build_tgsi_action * action,
@@ -2390,10 +2356,10 @@ static void tex_fetch_ptrs(
ind_index = get_indirect_index(si_shader_ctx, &reg->Indirect, reg->Register.Index);
- *res_ptr = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, SI_PARAM_RESOURCE);
+ *res_ptr = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, SI_PARAM_SAMPLER_VIEWS);
*res_ptr = build_indexed_load_const(si_shader_ctx, *res_ptr, ind_index);
- *samp_ptr = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, SI_PARAM_SAMPLER);
+ *samp_ptr = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, SI_PARAM_SAMPLER_STATES);
*samp_ptr = build_indexed_load_const(si_shader_ctx, *samp_ptr, ind_index);
if (target == TGSI_TEXTURE_2D_MSAA ||
@@ -2401,13 +2367,13 @@ static void tex_fetch_ptrs(
ind_index = LLVMBuildAdd(gallivm->builder, ind_index,
lp_build_const_int32(gallivm,
SI_FMASK_TEX_OFFSET), "");
- *fmask_ptr = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, SI_PARAM_RESOURCE);
+ *fmask_ptr = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, SI_PARAM_SAMPLER_VIEWS);
*fmask_ptr = build_indexed_load_const(si_shader_ctx, *fmask_ptr, ind_index);
}
} else {
- *res_ptr = si_shader_ctx->resources[sampler_index];
- *samp_ptr = si_shader_ctx->samplers[sampler_index];
- *fmask_ptr = si_shader_ctx->resources[SI_FMASK_TEX_OFFSET + sampler_index];
+ *res_ptr = si_shader_ctx->sampler_views[sampler_index];
+ *samp_ptr = si_shader_ctx->sampler_states[sampler_index];
+ *fmask_ptr = si_shader_ctx->sampler_views[SI_FMASK_TEX_OFFSET + sampler_index];
}
}
@@ -2487,7 +2453,7 @@ static void tex_fetch_args(
emit_data->dst_type = LLVMVectorType(bld_base->base.elem_type, 4);
emit_data->args[0] = res;
emit_data->args[1] = bld_base->uint_bld.zero;
- emit_data->args[2] = lp_build_emit_fetch(bld_base, emit_data->inst, 0, 0);
+ emit_data->args[2] = lp_build_emit_fetch(bld_base, emit_data->inst, 0, TGSI_CHAN_X);
emit_data->arg_count = 3;
return;
}
@@ -2536,12 +2502,12 @@ static void tex_fetch_args(
if (opcode == TGSI_OPCODE_TXB)
address[count++] = coords[3];
if (opcode == TGSI_OPCODE_TXB2)
- address[count++] = lp_build_emit_fetch(bld_base, inst, 1, 0);
+ address[count++] = lp_build_emit_fetch(bld_base, inst, 1, TGSI_CHAN_X);
/* Pack depth comparison value */
if (tgsi_is_shadow_target(target) && opcode != TGSI_OPCODE_LODQ) {
if (target == TGSI_TEXTURE_SHADOWCUBE_ARRAY) {
- address[count++] = lp_build_emit_fetch(bld_base, inst, 1, 0);
+ address[count++] = lp_build_emit_fetch(bld_base, inst, 1, TGSI_CHAN_X);
} else {
assert(ref_pos >= 0);
address[count++] = coords[ref_pos];
@@ -2612,7 +2578,7 @@ static void tex_fetch_args(
if (opcode == TGSI_OPCODE_TXL || opcode == TGSI_OPCODE_TXF)
address[count++] = coords[3];
else if (opcode == TGSI_OPCODE_TXL2)
- address[count++] = lp_build_emit_fetch(bld_base, inst, 1, 0);
+ address[count++] = lp_build_emit_fetch(bld_base, inst, 1, TGSI_CHAN_X);
if (count > 16) {
assert(!"Cannot handle more than 16 texture address parameters");
@@ -3105,10 +3071,10 @@ static void interp_fetch_args(
/* offset is in second src, first two channels */
emit_data->args[0] = lp_build_emit_fetch(bld_base,
emit_data->inst, 1,
- 0);
+ TGSI_CHAN_X);
emit_data->args[1] = lp_build_emit_fetch(bld_base,
emit_data->inst, 1,
- 1);
+ TGSI_CHAN_Y);
emit_data->arg_count = 2;
} else if (inst->Instruction.Opcode == TGSI_OPCODE_INTERP_SAMPLE) {
LLVMValueRef sample_position;
@@ -3119,7 +3085,7 @@ static void interp_fetch_args(
* and place into first two channels.
*/
sample_id = lp_build_emit_fetch(bld_base,
- emit_data->inst, 1, 0);
+ emit_data->inst, 1, TGSI_CHAN_X);
sample_id = LLVMBuildBitCast(gallivm->builder, sample_id,
LLVMInt32TypeInContext(gallivm->context),
"");
@@ -3432,15 +3398,15 @@ static void create_function(struct si_shader_context *si_shader_ctx)
v16i8 = LLVMVectorType(i8, 16);
params[SI_PARAM_RW_BUFFERS] = const_array(v16i8, SI_NUM_RW_BUFFERS);
- params[SI_PARAM_CONST] = const_array(v16i8, SI_NUM_CONST_BUFFERS);
- params[SI_PARAM_SAMPLER] = const_array(v4i32, SI_NUM_SAMPLER_STATES);
- params[SI_PARAM_RESOURCE] = const_array(v8i32, SI_NUM_SAMPLER_VIEWS);
- last_array_pointer = SI_PARAM_RESOURCE;
+ params[SI_PARAM_CONST_BUFFERS] = const_array(v16i8, SI_NUM_CONST_BUFFERS);
+ params[SI_PARAM_SAMPLER_STATES] = const_array(v4i32, SI_NUM_SAMPLER_STATES);
+ params[SI_PARAM_SAMPLER_VIEWS] = const_array(v8i32, SI_NUM_SAMPLER_VIEWS);
+ last_array_pointer = SI_PARAM_SAMPLER_VIEWS;
switch (si_shader_ctx->type) {
case TGSI_PROCESSOR_VERTEX:
- params[SI_PARAM_VERTEX_BUFFER] = const_array(v16i8, SI_NUM_VERTEX_BUFFERS);
- last_array_pointer = SI_PARAM_VERTEX_BUFFER;
+ params[SI_PARAM_VERTEX_BUFFERS] = const_array(v16i8, SI_NUM_VERTEX_BUFFERS);
+ last_array_pointer = SI_PARAM_VERTEX_BUFFERS;
params[SI_PARAM_BASE_VERTEX] = i32;
params[SI_PARAM_START_INSTANCE] = i32;
num_params = SI_PARAM_START_INSTANCE+1;
@@ -3452,8 +3418,8 @@ static void create_function(struct si_shader_context *si_shader_ctx)
num_params = SI_PARAM_LS_OUT_LAYOUT+1;
} else {
if (shader->is_gs_copy_shader) {
- last_array_pointer = SI_PARAM_CONST;
- num_params = SI_PARAM_CONST+1;
+ last_array_pointer = SI_PARAM_CONST_BUFFERS;
+ num_params = SI_PARAM_CONST_BUFFERS+1;
} else {
params[SI_PARAM_VS_STATE_BITS] = i32;
num_params = SI_PARAM_VS_STATE_BITS+1;
@@ -3610,7 +3576,7 @@ static void preload_constants(struct si_shader_context *si_shader_ctx)
struct gallivm_state * gallivm = bld_base->base.gallivm;
const struct tgsi_shader_info * info = bld_base->info;
unsigned buf;
- LLVMValueRef ptr = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, SI_PARAM_CONST);
+ LLVMValueRef ptr = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, SI_PARAM_CONST_BUFFERS);
for (buf = 0; buf < SI_NUM_CONST_BUFFERS; buf++) {
unsigned i, num_const = info->const_file_max[buf] + 1;
@@ -3622,14 +3588,14 @@ static void preload_constants(struct si_shader_context *si_shader_ctx)
si_shader_ctx->constants[buf] = CALLOC(num_const * 4, sizeof(LLVMValueRef));
/* Load the resource descriptor */
- si_shader_ctx->const_resource[buf] =
+ si_shader_ctx->const_buffers[buf] =
build_indexed_load_const(si_shader_ctx, ptr, lp_build_const_int32(gallivm, buf));
/* Load the constants, we rely on the code sinking to do the rest */
for (i = 0; i < num_const * 4; ++i) {
si_shader_ctx->constants[buf][i] =
buffer_load_const(gallivm->builder,
- si_shader_ctx->const_resource[buf],
+ si_shader_ctx->const_buffers[buf],
lp_build_const_int32(gallivm, i * 4),
bld_base->base.elem_type);
}
@@ -3650,23 +3616,23 @@ static void preload_samplers(struct si_shader_context *si_shader_ctx)
if (num_samplers == 0)
return;
- res_ptr = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, SI_PARAM_RESOURCE);
- samp_ptr = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, SI_PARAM_SAMPLER);
+ res_ptr = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, SI_PARAM_SAMPLER_VIEWS);
+ samp_ptr = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, SI_PARAM_SAMPLER_STATES);
/* Load the resources and samplers, we rely on the code sinking to do the rest */
for (i = 0; i < num_samplers; ++i) {
/* Resource */
offset = lp_build_const_int32(gallivm, i);
- si_shader_ctx->resources[i] = build_indexed_load_const(si_shader_ctx, res_ptr, offset);
+ si_shader_ctx->sampler_views[i] = build_indexed_load_const(si_shader_ctx, res_ptr, offset);
/* Sampler */
offset = lp_build_const_int32(gallivm, i);
- si_shader_ctx->samplers[i] = build_indexed_load_const(si_shader_ctx, samp_ptr, offset);
+ si_shader_ctx->sampler_states[i] = build_indexed_load_const(si_shader_ctx, samp_ptr, offset);
/* FMASK resource */
if (info->is_msaa_sampler[i]) {
offset = lp_build_const_int32(gallivm, SI_FMASK_TEX_OFFSET + i);
- si_shader_ctx->resources[SI_FMASK_TEX_OFFSET + i] =
+ si_shader_ctx->sampler_views[SI_FMASK_TEX_OFFSET + i] =
build_indexed_load_const(si_shader_ctx, res_ptr, offset);
}
}
@@ -3741,20 +3707,19 @@ static void preload_ring_buffers(struct si_shader_context *si_shader_ctx)
}
}
-void si_shader_binary_read_config(const struct si_screen *sscreen,
- struct si_shader *shader,
- unsigned symbol_offset)
+void si_shader_binary_read_config(struct radeon_shader_binary *binary,
+ struct si_shader_config *conf,
+ unsigned symbol_offset)
{
unsigned i;
const unsigned char *config =
- radeon_shader_binary_config_start(&shader->binary,
- symbol_offset);
+ radeon_shader_binary_config_start(binary, symbol_offset);
/* XXX: We may be able to emit some of these values directly rather than
* extracting fields to be emitted later.
*/
- for (i = 0; i < shader->binary.config_size_per_symbol; i+= 8) {
+ for (i = 0; i < binary->config_size_per_symbol; i+= 8) {
unsigned reg = util_le32_to_cpu(*(uint32_t*)(config + i));
unsigned value = util_le32_to_cpu(*(uint32_t*)(config + i + 4));
switch (reg) {
@@ -3762,25 +3727,25 @@ void si_shader_binary_read_config(const struct si_screen *sscreen,
case R_00B128_SPI_SHADER_PGM_RSRC1_VS:
case R_00B228_SPI_SHADER_PGM_RSRC1_GS:
case R_00B848_COMPUTE_PGM_RSRC1:
- shader->num_sgprs = MAX2(shader->num_sgprs, (G_00B028_SGPRS(value) + 1) * 8);
- shader->num_vgprs = MAX2(shader->num_vgprs, (G_00B028_VGPRS(value) + 1) * 4);
- shader->float_mode = G_00B028_FLOAT_MODE(value);
- shader->rsrc1 = value;
+ conf->num_sgprs = MAX2(conf->num_sgprs, (G_00B028_SGPRS(value) + 1) * 8);
+ conf->num_vgprs = MAX2(conf->num_vgprs, (G_00B028_VGPRS(value) + 1) * 4);
+ conf->float_mode = G_00B028_FLOAT_MODE(value);
+ conf->rsrc1 = value;
break;
case R_00B02C_SPI_SHADER_PGM_RSRC2_PS:
- shader->lds_size = MAX2(shader->lds_size, G_00B02C_EXTRA_LDS_SIZE(value));
+ conf->lds_size = MAX2(conf->lds_size, G_00B02C_EXTRA_LDS_SIZE(value));
break;
case R_00B84C_COMPUTE_PGM_RSRC2:
- shader->lds_size = MAX2(shader->lds_size, G_00B84C_LDS_SIZE(value));
- shader->rsrc2 = value;
+ conf->lds_size = MAX2(conf->lds_size, G_00B84C_LDS_SIZE(value));
+ conf->rsrc2 = value;
break;
case R_0286CC_SPI_PS_INPUT_ENA:
- shader->spi_ps_input_ena = value;
+ conf->spi_ps_input_ena = value;
break;
case R_0286E8_SPI_TMPRING_SIZE:
case R_00B860_COMPUTE_TMPRING_SIZE:
/* WAVESIZE is in units of 256 dwords. */
- shader->scratch_bytes_per_wave =
+ conf->scratch_bytes_per_wave =
G_00B860_WAVESIZE(value) * 256 * 4 * 1;
break;
default:
@@ -3799,7 +3764,7 @@ void si_shader_apply_scratch_relocs(struct si_context *sctx,
uint32_t scratch_rsrc_dword0 = scratch_va;
uint32_t scratch_rsrc_dword1 =
S_008F04_BASE_ADDRESS_HI(scratch_va >> 32)
- | S_008F04_STRIDE(shader->scratch_bytes_per_wave / 64);
+ | S_008F04_STRIDE(shader->config.scratch_bytes_per_wave / 64);
for (i = 0 ; i < shader->binary.reloc_count; i++) {
const struct radeon_shader_reloc *reloc =
@@ -3840,80 +3805,124 @@ int si_shader_binary_upload(struct si_screen *sscreen, struct si_shader *shader)
return 0;
}
-int si_shader_binary_read(struct si_screen *sscreen, struct si_shader *shader)
+static void si_shader_dump_disassembly(const struct radeon_shader_binary *binary,
+ struct pipe_debug_callback *debug)
{
- const struct radeon_shader_binary *binary = &shader->binary;
- unsigned i;
- int r;
- bool dump = r600_can_dump_shader(&sscreen->b,
- shader->selector ? shader->selector->tokens : NULL);
+ char *line, *p;
+ unsigned i, count;
+
+ if (binary->disasm_string) {
+ fprintf(stderr, "\nShader Disassembly:\n\n");
+ fprintf(stderr, "%s\n", binary->disasm_string);
+
+ if (debug && debug->debug_message) {
+ /* Very long debug messages are cut off, so send the
+ * disassembly one line at a time. This causes more
+ * overhead, but on the plus side it simplifies
+ * parsing of resulting logs.
+ */
+ pipe_debug_message(debug, SHADER_INFO,
+ "Shader Disassembly Begin");
- si_shader_binary_read_config(sscreen, shader, 0);
- r = si_shader_binary_upload(sscreen, shader);
- if (r)
- return r;
-
- if (dump) {
- if (!(sscreen->b.debug_flags & DBG_NO_ASM)) {
- if (binary->disasm_string) {
- fprintf(stderr, "\nShader Disassembly:\n\n");
- fprintf(stderr, "%s\n", binary->disasm_string);
- } else {
- fprintf(stderr, "SI CODE:\n");
- for (i = 0; i < binary->code_size; i+=4 ) {
- fprintf(stderr, "@0x%x: %02x%02x%02x%02x\n", i, binary->code[i + 3],
- binary->code[i + 2], binary->code[i + 1],
- binary->code[i]);
+ line = binary->disasm_string;
+ while (*line) {
+ p = strchrnul(line, '\n');
+ count = p - line;
+
+ if (count) {
+ pipe_debug_message(debug, SHADER_INFO,
+ "%.*s", count, line);
}
+
+ if (!*p)
+ break;
+ line = p + 1;
}
+
+ pipe_debug_message(debug, SHADER_INFO,
+ "Shader Disassembly End");
+ }
+ } else {
+ fprintf(stderr, "SI CODE:\n");
+ for (i = 0; i < binary->code_size; i += 4) {
+ fprintf(stderr, "@0x%x: %02x%02x%02x%02x\n", i,
+ binary->code[i + 3], binary->code[i + 2],
+ binary->code[i + 1], binary->code[i]);
}
+ }
+}
+static void si_shader_dump_stats(struct si_screen *sscreen,
+ struct si_shader_config *conf,
+ unsigned code_size,
+ struct pipe_debug_callback *debug,
+ unsigned processor)
+{
+ if (r600_can_dump_shader(&sscreen->b, processor)) {
fprintf(stderr, "*** SHADER STATS ***\n"
"SGPRS: %d\nVGPRS: %d\nCode Size: %d bytes\nLDS: %d blocks\n"
"Scratch: %d bytes per wave\n********************\n",
- shader->num_sgprs, shader->num_vgprs, binary->code_size,
- shader->lds_size, shader->scratch_bytes_per_wave);
+ conf->num_sgprs, conf->num_vgprs, code_size,
+ conf->lds_size, conf->scratch_bytes_per_wave);
}
- return 0;
+
+ pipe_debug_message(debug, SHADER_INFO,
+ "Shader Stats: SGPRS: %d VGPRS: %d Code Size: %d LDS: %d Scratch: %d",
+ conf->num_sgprs, conf->num_vgprs, code_size,
+ conf->lds_size, conf->scratch_bytes_per_wave);
+}
+
+void si_shader_dump(struct si_screen *sscreen, struct si_shader *shader,
+ struct pipe_debug_callback *debug, unsigned processor)
+{
+ if (r600_can_dump_shader(&sscreen->b, processor))
+ if (!(sscreen->b.debug_flags & DBG_NO_ASM))
+ si_shader_dump_disassembly(&shader->binary, debug);
+
+ si_shader_dump_stats(sscreen, &shader->config,
+ shader->binary.code_size, debug, processor);
}
-int si_compile_llvm(struct si_screen *sscreen, struct si_shader *shader,
- LLVMTargetMachineRef tm, LLVMModuleRef mod)
+int si_compile_llvm(struct si_screen *sscreen,
+ struct radeon_shader_binary *binary,
+ struct si_shader_config *conf,
+ LLVMTargetMachineRef tm,
+ LLVMModuleRef mod,
+ struct pipe_debug_callback *debug,
+ unsigned processor)
{
int r = 0;
- bool dump_asm = r600_can_dump_shader(&sscreen->b,
- shader->selector ? shader->selector->tokens : NULL);
- bool dump_ir = dump_asm && !(sscreen->b.debug_flags & DBG_NO_IR);
unsigned count = p_atomic_inc_return(&sscreen->b.num_compilations);
- if (dump_ir || dump_asm)
+ if (r600_can_dump_shader(&sscreen->b, processor)) {
fprintf(stderr, "radeonsi: Compiling shader %d\n", count);
- if (!si_replace_shader(count, &shader->binary)) {
- r = radeon_llvm_compile(mod, &shader->binary,
- r600_get_llvm_processor_name(sscreen->b.family), dump_ir, dump_asm, tm);
+ if (!(sscreen->b.debug_flags & DBG_NO_IR))
+ LLVMDumpModule(mod);
+ }
+
+ if (!si_replace_shader(count, binary)) {
+ r = radeon_llvm_compile(mod, binary,
+ r600_get_llvm_processor_name(sscreen->b.family), tm,
+ debug);
if (r)
return r;
}
- r = si_shader_binary_read(sscreen, shader);
+ si_shader_binary_read_config(binary, conf, 0);
- FREE(shader->binary.config);
- FREE(shader->binary.rodata);
- FREE(shader->binary.global_symbol_offsets);
- if (shader->scratch_bytes_per_wave == 0) {
- FREE(shader->binary.code);
- FREE(shader->binary.relocs);
- memset(&shader->binary, 0,
- offsetof(struct radeon_shader_binary, disasm_string));
- }
+ FREE(binary->config);
+ FREE(binary->global_symbol_offsets);
+ binary->config = NULL;
+ binary->global_symbol_offsets = NULL;
return r;
}
/* Generate code for the hardware VS shader stage to go with a geometry shader */
static int si_generate_gs_copy_shader(struct si_screen *sscreen,
struct si_shader_context *si_shader_ctx,
- struct si_shader *gs, bool dump)
+ struct si_shader *gs, bool dump,
+ struct pipe_debug_callback *debug)
{
struct gallivm_state *gallivm = &si_shader_ctx->radeon_bld.gallivm;
struct lp_build_tgsi_context *bld_base = &si_shader_ctx->radeon_bld.soa.bld_base;
@@ -3979,8 +3988,15 @@ static int si_generate_gs_copy_shader(struct si_screen *sscreen,
if (dump)
fprintf(stderr, "Copy Vertex Shader for Geometry Shader:\n\n");
- r = si_compile_llvm(sscreen, si_shader_ctx->shader,
- si_shader_ctx->tm, bld_base->base.gallivm->module);
+ r = si_compile_llvm(sscreen, &si_shader_ctx->shader->binary,
+ &si_shader_ctx->shader->config, si_shader_ctx->tm,
+ bld_base->base.gallivm->module,
+ debug, TGSI_PROCESSOR_GEOMETRY);
+ if (!r) {
+ si_shader_dump(sscreen, si_shader_ctx->shader, debug,
+ TGSI_PROCESSOR_GEOMETRY);
+ r = si_shader_binary_upload(sscreen, si_shader_ctx->shader);
+ }
radeon_llvm_dispose(&si_shader_ctx->radeon_bld);
@@ -4034,7 +4050,8 @@ void si_dump_shader_key(unsigned shader, union si_shader_key *key, FILE *f)
}
int si_shader_create(struct si_screen *sscreen, LLVMTargetMachineRef tm,
- struct si_shader *shader)
+ struct si_shader *shader,
+ struct pipe_debug_callback *debug)
{
struct si_shader_selector *sel = shader->selector;
struct tgsi_token *tokens = sel->tokens;
@@ -4045,11 +4062,12 @@ int si_shader_create(struct si_screen *sscreen, LLVMTargetMachineRef tm,
int r = 0;
bool poly_stipple = sel->type == PIPE_SHADER_FRAGMENT &&
shader->key.ps.poly_stipple;
- bool dump = r600_can_dump_shader(&sscreen->b, sel->tokens);
+ bool dump = r600_can_dump_shader(&sscreen->b, sel->info.processor);
if (poly_stipple) {
tokens = util_pstipple_create_fragment_shader(tokens, NULL,
- SI_POLY_STIPPLE_SAMPLER);
+ SI_POLY_STIPPLE_SAMPLER,
+ TGSI_FILE_INPUT);
tgsi_scan_shader(tokens, &stipple_shader_info);
}
@@ -4070,9 +4088,6 @@ int si_shader_create(struct si_screen *sscreen, LLVMTargetMachineRef tm,
if (sel->type != PIPE_SHADER_COMPUTE)
shader->dx10_clamp_mode = true;
- if (sel->info.uses_kill)
- shader->db_shader_control |= S_02880C_KILL_ENABLE(1);
-
shader->uses_instanceid = sel->info.uses_instanceid;
bld_base->info = poly_stipple ? &stipple_shader_info : &sel->info;
bld_base->emit_fetch_funcs[TGSI_FILE_CONSTANT] = fetch_constant;
@@ -4147,17 +4162,6 @@ int si_shader_create(struct si_screen *sscreen, LLVMTargetMachineRef tm,
case TGSI_PROCESSOR_FRAGMENT:
si_shader_ctx.radeon_bld.load_input = declare_input_fs;
bld_base->emit_epilogue = si_llvm_emit_fs_epilogue;
-
- switch (sel->info.properties[TGSI_PROPERTY_FS_DEPTH_LAYOUT]) {
- case TGSI_FS_DEPTH_LAYOUT_GREATER:
- shader->db_shader_control |=
- S_02880C_CONSERVATIVE_Z_EXPORT(V_02880C_EXPORT_GREATER_THAN_Z);
- break;
- case TGSI_FS_DEPTH_LAYOUT_LESS:
- shader->db_shader_control |=
- S_02880C_CONSERVATIVE_Z_EXPORT(V_02880C_EXPORT_LESS_THAN_Z);
- break;
- }
break;
default:
assert(!"Unsupported shader type");
@@ -4188,12 +4192,21 @@ int si_shader_create(struct si_screen *sscreen, LLVMTargetMachineRef tm,
radeon_llvm_finalize_module(&si_shader_ctx.radeon_bld);
mod = bld_base->base.gallivm->module;
- r = si_compile_llvm(sscreen, shader, tm, mod);
+ r = si_compile_llvm(sscreen, &shader->binary, &shader->config, tm,
+ mod, debug, si_shader_ctx.type);
if (r) {
fprintf(stderr, "LLVM failed to compile shader\n");
goto out;
}
+ si_shader_dump(sscreen, shader, debug, si_shader_ctx.type);
+
+ r = si_shader_binary_upload(sscreen, shader);
+ if (r) {
+ fprintf(stderr, "LLVM failed to upload shader\n");
+ goto out;
+ }
+
radeon_llvm_dispose(&si_shader_ctx.radeon_bld);
if (si_shader_ctx.type == TGSI_PROCESSOR_GEOMETRY) {
@@ -4202,7 +4215,7 @@ int si_shader_create(struct si_screen *sscreen, LLVMTargetMachineRef tm,
shader->gs_copy_shader->key = shader->key;
si_shader_ctx.shader = shader->gs_copy_shader;
if ((r = si_generate_gs_copy_shader(sscreen, &si_shader_ctx,
- shader, dump))) {
+ shader, dump, debug))) {
free(shader->gs_copy_shader);
shader->gs_copy_shader = NULL;
goto out;
@@ -4217,6 +4230,14 @@ out:
return r;
}
+void si_shader_destroy_binary(struct radeon_shader_binary *binary)
+{
+ FREE(binary->code);
+ FREE(binary->rodata);
+ FREE(binary->relocs);
+ FREE(binary->disasm_string);
+}
+
void si_shader_destroy(struct si_shader *shader)
{
if (shader->gs_copy_shader) {
@@ -4228,8 +4249,5 @@ void si_shader_destroy(struct si_shader *shader)
r600_resource_reference(&shader->scratch_bo, NULL);
r600_resource_reference(&shader->bo, NULL);
-
- FREE(shader->binary.code);
- FREE(shader->binary.relocs);
- FREE(shader->binary.disasm_string);
+ si_shader_destroy_binary(&shader->binary);
}
diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h
index b0c8680ecb3..1635358d505 100644
--- a/src/gallium/drivers/radeonsi/si_shader.h
+++ b/src/gallium/drivers/radeonsi/si_shader.h
@@ -76,10 +76,10 @@ struct radeon_shader_binary;
struct radeon_shader_reloc;
#define SI_SGPR_RW_BUFFERS 0 /* rings (& stream-out, VS only) */
-#define SI_SGPR_CONST 2
-#define SI_SGPR_SAMPLER 4
-#define SI_SGPR_RESOURCE 6
-#define SI_SGPR_VERTEX_BUFFER 8 /* VS only */
+#define SI_SGPR_CONST_BUFFERS 2
+#define SI_SGPR_SAMPLER_STATES 4
+#define SI_SGPR_SAMPLER_VIEWS 6
+#define SI_SGPR_VERTEX_BUFFERS 8 /* VS only */
#define SI_SGPR_BASE_VERTEX 10 /* VS only */
#define SI_SGPR_START_INSTANCE 11 /* VS only */
#define SI_SGPR_VS_STATE_BITS 12 /* VS(VS) only */
@@ -101,12 +101,12 @@ struct radeon_shader_reloc;
/* LLVM function parameter indices */
#define SI_PARAM_RW_BUFFERS 0
-#define SI_PARAM_CONST 1
-#define SI_PARAM_SAMPLER 2
-#define SI_PARAM_RESOURCE 3
+#define SI_PARAM_CONST_BUFFERS 1
+#define SI_PARAM_SAMPLER_STATES 2
+#define SI_PARAM_SAMPLER_VIEWS 3
/* VS only parameters */
-#define SI_PARAM_VERTEX_BUFFER 4
+#define SI_PARAM_VERTEX_BUFFERS 4
#define SI_PARAM_BASE_VERTEX 5
#define SI_PARAM_START_INSTANCE 6
/* [0] = clamp vertex color */
@@ -201,6 +201,7 @@ struct si_shader_selector {
bool forces_persample_interp_for_persp;
bool forces_persample_interp_for_linear;
+ /* GS parameters. */
unsigned esgs_itemsize;
unsigned gs_input_verts_per_prim;
unsigned gs_output_prim;
@@ -210,6 +211,9 @@ struct si_shader_selector {
unsigned gsvs_vertex_size;
unsigned max_gsvs_emit_size;
+ /* PS parameters. */
+ unsigned db_shader_control;
+
/* masks of "get_unique_index" bits */
uint64_t outputs_written;
uint32_t patch_outputs_written;
@@ -258,6 +262,17 @@ union si_shader_key {
} tes; /* tessellation evaluation shader */
};
+struct si_shader_config {
+ unsigned num_sgprs;
+ unsigned num_vgprs;
+ unsigned lds_size;
+ unsigned spi_ps_input_ena;
+ unsigned float_mode;
+ unsigned scratch_bytes_per_wave;
+ unsigned rsrc1;
+ unsigned rsrc2;
+};
+
struct si_shader {
struct si_shader_selector *selector;
struct si_shader *next_variant;
@@ -266,18 +281,9 @@ struct si_shader {
struct si_pm4_state *pm4;
struct r600_resource *bo;
struct r600_resource *scratch_bo;
- struct radeon_shader_binary binary;
- unsigned num_sgprs;
- unsigned num_vgprs;
- unsigned lds_size;
- unsigned spi_ps_input_ena;
- unsigned float_mode;
- unsigned scratch_bytes_per_wave;
- unsigned spi_shader_col_format;
- unsigned spi_shader_z_format;
- unsigned db_shader_control;
- unsigned cb_shader_mask;
union si_shader_key key;
+ struct radeon_shader_binary binary;
+ struct si_shader_config config;
unsigned nparam;
unsigned vs_output_param_offset[PIPE_MAX_SHADER_OUTPUTS];
@@ -288,9 +294,6 @@ struct si_shader {
unsigned nr_param_exports;
bool is_gs_copy_shader;
bool dx10_clamp_mode; /* convert NaNs to 0 */
-
- unsigned rsrc1;
- unsigned rsrc2;
};
static inline struct tgsi_shader_info *si_get_vs_info(struct si_context *sctx)
@@ -327,19 +330,27 @@ static inline bool si_vs_exports_prim_id(struct si_shader *shader)
/* radeonsi_shader.c */
int si_shader_create(struct si_screen *sscreen, LLVMTargetMachineRef tm,
- struct si_shader *shader);
+ struct si_shader *shader,
+ struct pipe_debug_callback *debug);
void si_dump_shader_key(unsigned shader, union si_shader_key *key, FILE *f);
-int si_compile_llvm(struct si_screen *sscreen, struct si_shader *shader,
- LLVMTargetMachineRef tm, LLVMModuleRef mod);
+int si_compile_llvm(struct si_screen *sscreen,
+ struct radeon_shader_binary *binary,
+ struct si_shader_config *conf,
+ LLVMTargetMachineRef tm,
+ LLVMModuleRef mod,
+ struct pipe_debug_callback *debug,
+ unsigned processor);
void si_shader_destroy(struct si_shader *shader);
+void si_shader_destroy_binary(struct radeon_shader_binary *binary);
unsigned si_shader_io_get_unique_index(unsigned semantic_name, unsigned index);
int si_shader_binary_upload(struct si_screen *sscreen, struct si_shader *shader);
-int si_shader_binary_read(struct si_screen *sscreen, struct si_shader *shader);
+void si_shader_dump(struct si_screen *sscreen, struct si_shader *shader,
+ struct pipe_debug_callback *debug, unsigned processor);
void si_shader_apply_scratch_relocs(struct si_context *sctx,
struct si_shader *shader,
uint64_t scratch_va);
-void si_shader_binary_read_config(const struct si_screen *sscreen,
- struct si_shader *shader,
- unsigned symbol_offset);
+void si_shader_binary_read_config(struct radeon_shader_binary *binary,
+ struct si_shader_config *conf,
+ unsigned symbol_offset);
#endif
diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c b/src/gallium/drivers/radeonsi/si_state_draw.c
index e5500111f43..91ccd073267 100644
--- a/src/gallium/drivers/radeonsi/si_state_draw.c
+++ b/src/gallium/drivers/radeonsi/si_state_draw.c
@@ -163,7 +163,7 @@ static void si_emit_derived_tess_state(struct si_context *sctx,
perpatch_output_offset = output_patch0_offset + pervertex_output_patch_size;
lds_size = output_patch0_offset + output_patch_size * *num_patches;
- ls_rsrc2 = ls->current->rsrc2;
+ ls_rsrc2 = ls->current->config.rsrc2;
if (sctx->b.chip_class >= CIK) {
assert(lds_size <= 65536);
@@ -178,7 +178,7 @@ static void si_emit_derived_tess_state(struct si_context *sctx,
if (sctx->b.chip_class == CIK && sctx->b.family != CHIP_HAWAII)
radeon_set_sh_reg(cs, R_00B52C_SPI_SHADER_PGM_RSRC2_LS, ls_rsrc2);
radeon_set_sh_reg_seq(cs, R_00B528_SPI_SHADER_PGM_RSRC1_LS, 2);
- radeon_emit(cs, ls->current->rsrc1);
+ radeon_emit(cs, ls->current->config.rsrc1);
radeon_emit(cs, ls_rsrc2);
/* Compute userdata SGPRs. */
@@ -818,7 +818,7 @@ void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info)
si_get_draw_start_count(sctx, info, &start, &count);
start_offset = start * ib.index_size;
- u_upload_alloc(sctx->b.uploader, start_offset, count * 2,
+ u_upload_alloc(sctx->b.uploader, start_offset, count * 2, 256,
&out_offset, &out_buffer, &ptr);
if (!out_buffer) {
pipe_resource_reference(&ib.buffer, NULL);
@@ -842,7 +842,7 @@ void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info)
start_offset = start * ib.index_size;
u_upload_data(sctx->b.uploader, start_offset, count * ib.index_size,
- (char*)ib.user_buffer + start_offset,
+ 256, (char*)ib.user_buffer + start_offset,
&ib.offset, &ib.buffer);
if (!ib.buffer)
return;
diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c
index 8700590435f..64adf699604 100644
--- a/src/gallium/drivers/radeonsi/si_state_shaders.c
+++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
@@ -111,7 +111,7 @@ static void si_shader_ls(struct si_shader *shader)
vgpr_comp_cnt = shader->uses_instanceid ? 3 : 1;
num_user_sgprs = SI_LS_NUM_USER_SGPR;
- num_sgprs = shader->num_sgprs;
+ num_sgprs = shader->config.num_sgprs;
if (num_user_sgprs > num_sgprs) {
/* Last 2 reserved SGPRs are used for VCC */
num_sgprs = num_user_sgprs + 2;
@@ -121,12 +121,12 @@ static void si_shader_ls(struct si_shader *shader)
si_pm4_set_reg(pm4, R_00B520_SPI_SHADER_PGM_LO_LS, va >> 8);
si_pm4_set_reg(pm4, R_00B524_SPI_SHADER_PGM_HI_LS, va >> 40);
- shader->rsrc1 = S_00B528_VGPRS((shader->num_vgprs - 1) / 4) |
+ shader->config.rsrc1 = S_00B528_VGPRS((shader->config.num_vgprs - 1) / 4) |
S_00B528_SGPRS((num_sgprs - 1) / 8) |
S_00B528_VGPR_COMP_CNT(vgpr_comp_cnt) |
S_00B528_DX10_CLAMP(shader->dx10_clamp_mode);
- shader->rsrc2 = S_00B52C_USER_SGPR(num_user_sgprs) |
- S_00B52C_SCRATCH_EN(shader->scratch_bytes_per_wave > 0);
+ shader->config.rsrc2 = S_00B52C_USER_SGPR(num_user_sgprs) |
+ S_00B52C_SCRATCH_EN(shader->config.scratch_bytes_per_wave > 0);
}
static void si_shader_hs(struct si_shader *shader)
@@ -143,7 +143,7 @@ static void si_shader_hs(struct si_shader *shader)
si_pm4_add_bo(pm4, shader->bo, RADEON_USAGE_READ, RADEON_PRIO_USER_SHADER);
num_user_sgprs = SI_TCS_NUM_USER_SGPR;
- num_sgprs = shader->num_sgprs;
+ num_sgprs = shader->config.num_sgprs;
/* One SGPR after user SGPRs is pre-loaded with tessellation factor
* buffer offset. */
if ((num_user_sgprs + 1) > num_sgprs) {
@@ -155,12 +155,12 @@ static void si_shader_hs(struct si_shader *shader)
si_pm4_set_reg(pm4, R_00B420_SPI_SHADER_PGM_LO_HS, va >> 8);
si_pm4_set_reg(pm4, R_00B424_SPI_SHADER_PGM_HI_HS, va >> 40);
si_pm4_set_reg(pm4, R_00B428_SPI_SHADER_PGM_RSRC1_HS,
- S_00B428_VGPRS((shader->num_vgprs - 1) / 4) |
+ S_00B428_VGPRS((shader->config.num_vgprs - 1) / 4) |
S_00B428_SGPRS((num_sgprs - 1) / 8) |
S_00B428_DX10_CLAMP(shader->dx10_clamp_mode));
si_pm4_set_reg(pm4, R_00B42C_SPI_SHADER_PGM_RSRC2_HS,
S_00B42C_USER_SGPR(num_user_sgprs) |
- S_00B42C_SCRATCH_EN(shader->scratch_bytes_per_wave > 0));
+ S_00B42C_SCRATCH_EN(shader->config.scratch_bytes_per_wave > 0));
}
static void si_shader_es(struct si_shader *shader)
@@ -187,7 +187,7 @@ static void si_shader_es(struct si_shader *shader)
} else
unreachable("invalid shader selector type");
- num_sgprs = shader->num_sgprs;
+ num_sgprs = shader->config.num_sgprs;
/* One SGPR after user SGPRs is pre-loaded with es2gs_offset */
if ((num_user_sgprs + 1) > num_sgprs) {
/* Last 2 reserved SGPRs are used for VCC */
@@ -200,13 +200,13 @@ static void si_shader_es(struct si_shader *shader)
si_pm4_set_reg(pm4, R_00B320_SPI_SHADER_PGM_LO_ES, va >> 8);
si_pm4_set_reg(pm4, R_00B324_SPI_SHADER_PGM_HI_ES, va >> 40);
si_pm4_set_reg(pm4, R_00B328_SPI_SHADER_PGM_RSRC1_ES,
- S_00B328_VGPRS((shader->num_vgprs - 1) / 4) |
+ S_00B328_VGPRS((shader->config.num_vgprs - 1) / 4) |
S_00B328_SGPRS((num_sgprs - 1) / 8) |
S_00B328_VGPR_COMP_CNT(vgpr_comp_cnt) |
S_00B328_DX10_CLAMP(shader->dx10_clamp_mode));
si_pm4_set_reg(pm4, R_00B32C_SPI_SHADER_PGM_RSRC2_ES,
S_00B32C_USER_SGPR(num_user_sgprs) |
- S_00B32C_SCRATCH_EN(shader->scratch_bytes_per_wave > 0));
+ S_00B32C_SCRATCH_EN(shader->config.scratch_bytes_per_wave > 0));
if (shader->selector->type == PIPE_SHADER_TESS_EVAL)
si_set_tesseval_regs(shader, pm4);
@@ -272,7 +272,7 @@ static void si_shader_gs(struct si_shader *shader)
si_pm4_set_reg(pm4, R_00B224_SPI_SHADER_PGM_HI_GS, va >> 40);
num_user_sgprs = SI_GS_NUM_USER_SGPR;
- num_sgprs = shader->num_sgprs;
+ num_sgprs = shader->config.num_sgprs;
/* Two SGPRs after user SGPRs are pre-loaded with gs2vs_offset, gs_wave_id */
if ((num_user_sgprs + 2) > num_sgprs) {
/* Last 2 reserved SGPRs are used for VCC */
@@ -281,12 +281,12 @@ static void si_shader_gs(struct si_shader *shader)
assert(num_sgprs <= 104);
si_pm4_set_reg(pm4, R_00B228_SPI_SHADER_PGM_RSRC1_GS,
- S_00B228_VGPRS((shader->num_vgprs - 1) / 4) |
+ S_00B228_VGPRS((shader->config.num_vgprs - 1) / 4) |
S_00B228_SGPRS((num_sgprs - 1) / 8) |
S_00B228_DX10_CLAMP(shader->dx10_clamp_mode));
si_pm4_set_reg(pm4, R_00B22C_SPI_SHADER_PGM_RSRC2_GS,
S_00B22C_USER_SGPR(num_user_sgprs) |
- S_00B22C_SCRATCH_EN(shader->scratch_bytes_per_wave > 0));
+ S_00B22C_SCRATCH_EN(shader->config.scratch_bytes_per_wave > 0));
}
static void si_shader_vs(struct si_shader *shader)
@@ -329,7 +329,7 @@ static void si_shader_vs(struct si_shader *shader)
} else
unreachable("invalid shader selector type");
- num_sgprs = shader->num_sgprs;
+ num_sgprs = shader->config.num_sgprs;
if (num_user_sgprs > num_sgprs) {
/* Last 2 reserved SGPRs are used for VCC */
num_sgprs = num_user_sgprs + 2;
@@ -356,7 +356,7 @@ static void si_shader_vs(struct si_shader *shader)
si_pm4_set_reg(pm4, R_00B120_SPI_SHADER_PGM_LO_VS, va >> 8);
si_pm4_set_reg(pm4, R_00B124_SPI_SHADER_PGM_HI_VS, va >> 40);
si_pm4_set_reg(pm4, R_00B128_SPI_SHADER_PGM_RSRC1_VS,
- S_00B128_VGPRS((shader->num_vgprs - 1) / 4) |
+ S_00B128_VGPRS((shader->config.num_vgprs - 1) / 4) |
S_00B128_SGPRS((num_sgprs - 1) / 8) |
S_00B128_VGPR_COMP_CNT(vgpr_comp_cnt) |
S_00B128_DX10_CLAMP(shader->dx10_clamp_mode));
@@ -367,7 +367,7 @@ static void si_shader_vs(struct si_shader *shader)
S_00B12C_SO_BASE2_EN(!!shader->selector->so.stride[2]) |
S_00B12C_SO_BASE3_EN(!!shader->selector->so.stride[3]) |
S_00B12C_SO_EN(!!shader->selector->so.num_outputs) |
- S_00B12C_SCRATCH_EN(shader->scratch_bytes_per_wave > 0));
+ S_00B12C_SCRATCH_EN(shader->config.scratch_bytes_per_wave > 0));
if (window_space)
si_pm4_set_reg(pm4, R_028818_PA_CL_VTE_CNTL,
S_028818_VTX_XY_FMT(1) | S_028818_VTX_Z_FMT(1));
@@ -387,6 +387,8 @@ static void si_shader_ps(struct si_shader *shader)
struct tgsi_shader_info *info = &shader->selector->info;
struct si_pm4_state *pm4;
unsigned i, spi_ps_in_control;
+ unsigned spi_shader_col_format = 0, cb_shader_mask = 0;
+ unsigned colors_written, export_16bpc;
unsigned num_sgprs, num_user_sgprs;
unsigned spi_baryc_cntl = 0;
uint64_t va;
@@ -422,19 +424,43 @@ static void si_shader_ps(struct si_shader *shader)
}
}
- has_centroid = G_0286CC_PERSP_CENTROID_ENA(shader->spi_ps_input_ena) ||
- G_0286CC_LINEAR_CENTROID_ENA(shader->spi_ps_input_ena);
+ /* Find out what SPI_SHADER_COL_FORMAT and CB_SHADER_MASK should be. */
+ colors_written = info->colors_written;
+ export_16bpc = shader->key.ps.export_16bpc;
+
+ if (info->colors_written == 0x1 &&
+ info->properties[TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS]) {
+ colors_written |= (1 << (shader->key.ps.last_cbuf + 1)) - 1;
+ }
+
+ while (colors_written) {
+ i = u_bit_scan(&colors_written);
+ if (export_16bpc & (1 << i))
+ spi_shader_col_format |= V_028714_SPI_SHADER_FP16_ABGR << (4 * i);
+ else
+ spi_shader_col_format |= V_028714_SPI_SHADER_32_ABGR << (4 * i);
+ cb_shader_mask |= 0xf << (4 * i);
+ }
+
+ /* Set interpolation controls. */
+ has_centroid = G_0286CC_PERSP_CENTROID_ENA(shader->config.spi_ps_input_ena) ||
+ G_0286CC_LINEAR_CENTROID_ENA(shader->config.spi_ps_input_ena);
spi_ps_in_control = S_0286D8_NUM_INTERP(shader->nparam) |
S_0286D8_BC_OPTIMIZE_DISABLE(has_centroid);
+ /* Set registers. */
si_pm4_set_reg(pm4, R_0286E0_SPI_BARYC_CNTL, spi_baryc_cntl);
si_pm4_set_reg(pm4, R_0286D8_SPI_PS_IN_CONTROL, spi_ps_in_control);
- si_pm4_set_reg(pm4, R_028710_SPI_SHADER_Z_FORMAT, shader->spi_shader_z_format);
- si_pm4_set_reg(pm4, R_028714_SPI_SHADER_COL_FORMAT,
- shader->spi_shader_col_format);
- si_pm4_set_reg(pm4, R_02823C_CB_SHADER_MASK, shader->cb_shader_mask);
+ si_pm4_set_reg(pm4, R_028710_SPI_SHADER_Z_FORMAT,
+ info->writes_samplemask ? V_028710_SPI_SHADER_32_ABGR :
+ info->writes_stencil ? V_028710_SPI_SHADER_32_GR :
+ info->writes_z ? V_028710_SPI_SHADER_32_R :
+ V_028710_SPI_SHADER_ZERO);
+
+ si_pm4_set_reg(pm4, R_028714_SPI_SHADER_COL_FORMAT, spi_shader_col_format);
+ si_pm4_set_reg(pm4, R_02823C_CB_SHADER_MASK, cb_shader_mask);
va = shader->bo->gpu_address;
si_pm4_add_bo(pm4, shader->bo, RADEON_USAGE_READ, RADEON_PRIO_USER_SHADER);
@@ -442,7 +468,7 @@ static void si_shader_ps(struct si_shader *shader)
si_pm4_set_reg(pm4, R_00B024_SPI_SHADER_PGM_HI_PS, va >> 40);
num_user_sgprs = SI_PS_NUM_USER_SGPR;
- num_sgprs = shader->num_sgprs;
+ num_sgprs = shader->config.num_sgprs;
/* One SGPR after user SGPRs is pre-loaded with {prim_mask, lds_offset} */
if ((num_user_sgprs + 1) > num_sgprs) {
/* Last 2 reserved SGPRs are used for VCC */
@@ -451,13 +477,13 @@ static void si_shader_ps(struct si_shader *shader)
assert(num_sgprs <= 104);
si_pm4_set_reg(pm4, R_00B028_SPI_SHADER_PGM_RSRC1_PS,
- S_00B028_VGPRS((shader->num_vgprs - 1) / 4) |
+ S_00B028_VGPRS((shader->config.num_vgprs - 1) / 4) |
S_00B028_SGPRS((num_sgprs - 1) / 8) |
S_00B028_DX10_CLAMP(shader->dx10_clamp_mode));
si_pm4_set_reg(pm4, R_00B02C_SPI_SHADER_PGM_RSRC2_PS,
- S_00B02C_EXTRA_LDS_SIZE(shader->lds_size) |
+ S_00B02C_EXTRA_LDS_SIZE(shader->config.lds_size) |
S_00B02C_USER_SGPR(num_user_sgprs) |
- S_00B32C_SCRATCH_EN(shader->scratch_bytes_per_wave > 0));
+ S_00B32C_SCRATCH_EN(shader->config.scratch_bytes_per_wave > 0));
}
static void si_shader_init_pm4_state(struct si_shader *shader)
@@ -496,6 +522,16 @@ static void si_shader_init_pm4_state(struct si_shader *shader)
}
}
+static unsigned si_get_alpha_test_func(struct si_context *sctx)
+{
+ /* Alpha-test should be disabled if colorbuffer 0 is integer. */
+ if (sctx->queued.named.dsa &&
+ !sctx->framebuffer.cb0_is_integer)
+ return sctx->queued.named.dsa->alpha_func;
+
+ return PIPE_FUNC_ALWAYS;
+}
+
/* Compute the key for the hw shader variant */
static inline void si_shader_selector_key(struct pipe_context *ctx,
struct si_shader_selector *sel,
@@ -537,8 +573,10 @@ static inline void si_shader_selector_key(struct pipe_context *ctx,
case PIPE_SHADER_FRAGMENT: {
struct si_state_rasterizer *rs = sctx->queued.named.rasterizer;
- if (sel->info.properties[TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS])
+ if (sel->info.properties[TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS] &&
+ sel->info.colors_written == 0x1)
key->ps.last_cbuf = MAX2(sctx->framebuffer.state.nr_cbufs, 1) - 1;
+
key->ps.export_16bpc = sctx->framebuffer.export_16bpc;
if (rs) {
@@ -562,11 +600,7 @@ static inline void si_shader_selector_key(struct pipe_context *ctx,
key->ps.clamp_color = rs->clamp_fragment_color;
}
- key->ps.alpha_func = PIPE_FUNC_ALWAYS;
- /* Alpha-test should be disabled if colorbuffer 0 is integer. */
- if (sctx->queued.named.dsa &&
- !sctx->framebuffer.cb0_is_integer)
- key->ps.alpha_func = sctx->queued.named.dsa->alpha_func;
+ key->ps.alpha_func = si_get_alpha_test_func(sctx);
break;
}
default:
@@ -616,7 +650,7 @@ static int si_shader_select(struct pipe_context *ctx,
shader->selector = sel;
shader->key = key;
- r = si_shader_create(sctx->screen, sctx->tm, shader);
+ r = si_shader_create(sctx->screen, sctx->tm, shader, &sctx->b.debug);
if (unlikely(r)) {
R600_ERR("Failed to build shader variant (type=%u) %d\n",
sel->type, r);
@@ -731,6 +765,25 @@ static void *si_create_shader_selector(struct pipe_context *ctx,
break;
}
+ /* DB_SHADER_CONTROL */
+ sel->db_shader_control =
+ S_02880C_Z_EXPORT_ENABLE(sel->info.writes_z) |
+ S_02880C_STENCIL_TEST_VAL_EXPORT_ENABLE(sel->info.writes_stencil) |
+ S_02880C_MASK_EXPORT_ENABLE(sel->info.writes_samplemask) |
+ S_02880C_KILL_ENABLE(sel->info.uses_kill);
+
+ switch (sel->info.properties[TGSI_PROPERTY_FS_DEPTH_LAYOUT]) {
+ case TGSI_FS_DEPTH_LAYOUT_GREATER:
+ sel->db_shader_control |=
+ S_02880C_CONSERVATIVE_Z_EXPORT(V_02880C_EXPORT_GREATER_THAN_Z);
+ break;
+ case TGSI_FS_DEPTH_LAYOUT_LESS:
+ sel->db_shader_control |=
+ S_02880C_CONSERVATIVE_Z_EXPORT(V_02880C_EXPORT_LESS_THAN_Z);
+ break;
+ }
+
+ /* Pre-compilation. */
if (sscreen->b.debug_flags & DBG_PRECOMPILE) {
struct si_shader_ctx_state state = {sel};
@@ -987,7 +1040,7 @@ static void si_emit_spi_ps_input(struct si_context *sctx, struct r600_atom *atom
if (!ps)
return;
- input_ena = ps->spi_ps_input_ena;
+ input_ena = ps->config.spi_ps_input_ena;
/* we need to enable at least one of them, otherwise we hang the GPU */
assert(G_0286CC_PERSP_SAMPLE_ENA(input_ena) ||
@@ -1216,7 +1269,7 @@ static int si_update_scratch_buffer(struct si_context *sctx,
return 0;
/* This shader doesn't need a scratch buffer */
- if (shader->scratch_bytes_per_wave == 0)
+ if (shader->config.scratch_bytes_per_wave == 0)
return 0;
/* This shader is already configured to use the current
@@ -1248,7 +1301,7 @@ static unsigned si_get_current_scratch_buffer_size(struct si_context *sctx)
static unsigned si_get_scratch_buffer_bytes_per_wave(struct si_shader *shader)
{
- return shader ? shader->scratch_bytes_per_wave : 0;
+ return shader ? shader->config.scratch_bytes_per_wave : 0;
}
static unsigned si_get_max_scratch_bytes_per_wave(struct si_context *sctx)
@@ -1549,6 +1602,10 @@ bool si_update_shaders(struct si_context *sctx)
si_update_vgt_shader_config(sctx);
if (sctx->ps_shader.cso) {
+ unsigned db_shader_control =
+ sctx->ps_shader.cso->db_shader_control |
+ S_02880C_KILL_ENABLE(si_get_alpha_test_func(sctx) != PIPE_FUNC_ALWAYS);
+
r = si_shader_select(ctx, &sctx->ps_shader);
if (r)
return false;
@@ -1568,8 +1625,8 @@ bool si_update_shaders(struct si_context *sctx)
si_mark_atom_dirty(sctx, &sctx->spi_ps_input);
}
- if (sctx->ps_db_shader_control != sctx->ps_shader.current->db_shader_control) {
- sctx->ps_db_shader_control = sctx->ps_shader.current->db_shader_control;
+ if (sctx->ps_db_shader_control != db_shader_control) {
+ sctx->ps_db_shader_control = db_shader_control;
si_mark_atom_dirty(sctx, &sctx->db_render_state);
}
diff --git a/src/gallium/drivers/softpipe/sp_context.h b/src/gallium/drivers/softpipe/sp_context.h
index 8e5e24217a5..d5c4aaae638 100644
--- a/src/gallium/drivers/softpipe/sp_context.h
+++ b/src/gallium/drivers/softpipe/sp_context.h
@@ -37,6 +37,7 @@
#include "draw/draw_vertex.h"
#include "sp_quad_pipe.h"
+#include "sp_setup.h"
/** Do polygon stipple in the draw module? */
@@ -117,17 +118,17 @@ struct softpipe_context {
unsigned const_buffer_size[PIPE_SHADER_TYPES][PIPE_MAX_CONSTANT_BUFFERS];
/** Vertex format */
+ struct sp_setup_info setup_info;
struct vertex_info vertex_info;
- struct vertex_info vertex_info_vbuf;
/** Which vertex shader output slot contains point size */
- int psize_slot;
+ int8_t psize_slot;
/** Which vertex shader output slot contains viewport index */
- int viewport_index_slot;
+ int8_t viewport_index_slot;
/** Which vertex shader output slot contains layer */
- int layer_slot;
+ int8_t layer_slot;
/** The reduced version of the primitive supplied by the state tracker */
unsigned reduced_api_prim;
diff --git a/src/gallium/drivers/softpipe/sp_prim_vbuf.c b/src/gallium/drivers/softpipe/sp_prim_vbuf.c
index f8a3eacdb37..95d1ac1514f 100644
--- a/src/gallium/drivers/softpipe/sp_prim_vbuf.c
+++ b/src/gallium/drivers/softpipe/sp_prim_vbuf.c
@@ -161,7 +161,7 @@ sp_vbuf_draw_elements(struct vbuf_render *vbr, const ushort *indices, uint nr)
{
struct softpipe_vbuf_render *cvbr = softpipe_vbuf_render(vbr);
struct softpipe_context *softpipe = cvbr->softpipe;
- const unsigned stride = softpipe->vertex_info_vbuf.size * sizeof(float);
+ const unsigned stride = softpipe->vertex_info.size * sizeof(float);
const void *vertex_buffer = cvbr->vertex_buffer;
struct setup_context *setup = cvbr->setup;
const boolean flatshade_first = softpipe->rasterizer->flatshade_first;
@@ -358,7 +358,7 @@ sp_vbuf_draw_arrays(struct vbuf_render *vbr, uint start, uint nr)
struct softpipe_vbuf_render *cvbr = softpipe_vbuf_render(vbr);
struct softpipe_context *softpipe = cvbr->softpipe;
struct setup_context *setup = cvbr->setup;
- const unsigned stride = softpipe->vertex_info_vbuf.size * sizeof(float);
+ const unsigned stride = softpipe->vertex_info.size * sizeof(float);
const void *vertex_buffer =
(void *) get_vert(cvbr->vertex_buffer, start, stride);
const boolean flatshade_first = softpipe->rasterizer->flatshade_first;
diff --git a/src/gallium/drivers/softpipe/sp_query.c b/src/gallium/drivers/softpipe/sp_query.c
index 76105b4c0ec..c28d28d5f5d 100644
--- a/src/gallium/drivers/softpipe/sp_query.c
+++ b/src/gallium/drivers/softpipe/sp_query.c
@@ -223,7 +223,7 @@ softpipe_get_query_result(struct pipe_context *pipe,
break;
case PIPE_QUERY_PIPELINE_STATISTICS:
memcpy(vresult, &sq->stats,
- sizeof(struct pipe_query_data_pipeline_statistics));;
+ sizeof(struct pipe_query_data_pipeline_statistics));
break;
case PIPE_QUERY_GPU_FINISHED:
vresult->b = TRUE;
diff --git a/src/gallium/drivers/softpipe/sp_screen.c b/src/gallium/drivers/softpipe/sp_screen.c
index 9939720e259..29e392b94e8 100644
--- a/src/gallium/drivers/softpipe/sp_screen.c
+++ b/src/gallium/drivers/softpipe/sp_screen.c
@@ -251,6 +251,13 @@ softpipe_get_param(struct pipe_screen *screen, enum pipe_cap param)
case PIPE_CAP_SHAREABLE_SHADERS:
case PIPE_CAP_COPY_BETWEEN_COMPRESSED_AND_PLAIN_FORMATS:
case PIPE_CAP_CLEAR_TEXTURE:
+ case PIPE_CAP_DRAW_PARAMETERS:
+ case PIPE_CAP_TGSI_PACK_HALF_FLOAT:
+ case PIPE_CAP_MULTI_DRAW_INDIRECT:
+ case PIPE_CAP_MULTI_DRAW_INDIRECT_PARAMS:
+ case PIPE_CAP_TGSI_FS_POSITION_IS_SYSVAL:
+ case PIPE_CAP_TGSI_FS_FACE_IS_INTEGER_SYSVAL:
+ case PIPE_CAP_SHADER_BUFFER_OFFSET_ALIGNMENT:
return 0;
}
/* should only get here on unhandled cases */
diff --git a/src/gallium/drivers/softpipe/sp_setup.c b/src/gallium/drivers/softpipe/sp_setup.c
index ac2d97825ce..ffe49260b9a 100644
--- a/src/gallium/drivers/softpipe/sp_setup.c
+++ b/src/gallium/drivers/softpipe/sp_setup.c
@@ -38,7 +38,6 @@
#include "sp_setup.h"
#include "sp_state.h"
#include "draw/draw_context.h"
-#include "draw/draw_vertex.h"
#include "pipe/p_shader_tokens.h"
#include "util/u_math.h"
#include "util/u_memory.h"
@@ -599,10 +598,12 @@ setup_tri_coefficients(struct setup_context *setup)
{
struct softpipe_context *softpipe = setup->softpipe;
const struct tgsi_shader_info *fsInfo = &setup->softpipe->fs_variant->info;
- const struct vertex_info *vinfo = softpipe_get_vertex_info(softpipe);
+ const struct sp_setup_info *sinfo = &softpipe->setup_info;
uint fragSlot;
float v[3];
+ assert(sinfo->valid);
+
/* z and w are done by linear interpolation:
*/
v[0] = setup->vmin[0][2];
@@ -618,15 +619,16 @@ setup_tri_coefficients(struct setup_context *setup)
/* setup interpolation for all the remaining attributes:
*/
for (fragSlot = 0; fragSlot < fsInfo->num_inputs; fragSlot++) {
- const uint vertSlot = vinfo->attrib[fragSlot].src_index;
+ const uint vertSlot = sinfo->attrib[fragSlot].src_index;
uint j;
- switch (vinfo->attrib[fragSlot].interp_mode) {
- case INTERP_CONSTANT:
- for (j = 0; j < TGSI_NUM_CHANNELS; j++)
+ switch (sinfo->attrib[fragSlot].interp) {
+ case SP_INTERP_CONSTANT:
+ for (j = 0; j < TGSI_NUM_CHANNELS; j++) {
const_coeff(setup, &setup->coef[fragSlot], vertSlot, j);
+ }
break;
- case INTERP_LINEAR:
+ case SP_INTERP_LINEAR:
for (j = 0; j < TGSI_NUM_CHANNELS; j++) {
tri_apply_cylindrical_wrap(setup->vmin[vertSlot][j],
setup->vmid[vertSlot][j],
@@ -636,7 +638,7 @@ setup_tri_coefficients(struct setup_context *setup)
tri_linear_coeff(setup, &setup->coef[fragSlot], j, v);
}
break;
- case INTERP_PERSPECTIVE:
+ case SP_INTERP_PERSPECTIVE:
for (j = 0; j < TGSI_NUM_CHANNELS; j++) {
tri_apply_cylindrical_wrap(setup->vmin[vertSlot][j],
setup->vmid[vertSlot][j],
@@ -646,7 +648,7 @@ setup_tri_coefficients(struct setup_context *setup)
tri_persp_coeff(setup, &setup->coef[fragSlot], j, v);
}
break;
- case INTERP_POS:
+ case SP_INTERP_POS:
setup_fragcoord_coeff(setup, fragSlot);
break;
default:
@@ -966,11 +968,13 @@ setup_line_coefficients(struct setup_context *setup,
{
struct softpipe_context *softpipe = setup->softpipe;
const struct tgsi_shader_info *fsInfo = &setup->softpipe->fs_variant->info;
- const struct vertex_info *vinfo = softpipe_get_vertex_info(softpipe);
+ const struct sp_setup_info *sinfo = &softpipe->setup_info;
uint fragSlot;
float area;
float v[2];
+ assert(sinfo->valid);
+
/* use setup->vmin, vmax to point to vertices */
if (softpipe->rasterizer->flatshade_first)
setup->vprovoke = v0;
@@ -1001,15 +1005,15 @@ setup_line_coefficients(struct setup_context *setup,
/* setup interpolation for all the remaining attributes:
*/
for (fragSlot = 0; fragSlot < fsInfo->num_inputs; fragSlot++) {
- const uint vertSlot = vinfo->attrib[fragSlot].src_index;
+ const uint vertSlot = sinfo->attrib[fragSlot].src_index;
uint j;
- switch (vinfo->attrib[fragSlot].interp_mode) {
- case INTERP_CONSTANT:
+ switch (sinfo->attrib[fragSlot].interp) {
+ case SP_INTERP_CONSTANT:
for (j = 0; j < TGSI_NUM_CHANNELS; j++)
const_coeff(setup, &setup->coef[fragSlot], vertSlot, j);
break;
- case INTERP_LINEAR:
+ case SP_INTERP_LINEAR:
for (j = 0; j < TGSI_NUM_CHANNELS; j++) {
line_apply_cylindrical_wrap(setup->vmin[vertSlot][j],
setup->vmax[vertSlot][j],
@@ -1018,7 +1022,7 @@ setup_line_coefficients(struct setup_context *setup,
line_linear_coeff(setup, &setup->coef[fragSlot], j, v);
}
break;
- case INTERP_PERSPECTIVE:
+ case SP_INTERP_PERSPECTIVE:
for (j = 0; j < TGSI_NUM_CHANNELS; j++) {
line_apply_cylindrical_wrap(setup->vmin[vertSlot][j],
setup->vmax[vertSlot][j],
@@ -1027,7 +1031,7 @@ setup_line_coefficients(struct setup_context *setup,
line_persp_coeff(setup, &setup->coef[fragSlot], j, v);
}
break;
- case INTERP_POS:
+ case SP_INTERP_POS:
setup_fragcoord_coeff(setup, fragSlot);
break;
default:
@@ -1236,7 +1240,7 @@ sp_setup_point(struct setup_context *setup,
const boolean round = (boolean) setup->softpipe->rasterizer->point_smooth;
const float x = v0[0][0]; /* Note: data[0] is always position */
const float y = v0[0][1];
- const struct vertex_info *vinfo = softpipe_get_vertex_info(softpipe);
+ const struct sp_setup_info *sinfo = &softpipe->setup_info;
uint fragSlot;
uint layer = 0;
unsigned viewport_index = 0;
@@ -1245,6 +1249,8 @@ sp_setup_point(struct setup_context *setup,
print_vertex(setup, v0);
#endif
+ assert(sinfo->valid);
+
if (setup->softpipe->no_rast || setup->softpipe->rasterizer->rasterizer_discard)
return;
@@ -1285,22 +1291,22 @@ sp_setup_point(struct setup_context *setup,
const_coeff(setup, &setup->posCoef, 0, 3);
for (fragSlot = 0; fragSlot < fsInfo->num_inputs; fragSlot++) {
- const uint vertSlot = vinfo->attrib[fragSlot].src_index;
+ const uint vertSlot = sinfo->attrib[fragSlot].src_index;
uint j;
- switch (vinfo->attrib[fragSlot].interp_mode) {
- case INTERP_CONSTANT:
+ switch (sinfo->attrib[fragSlot].interp) {
+ case SP_INTERP_CONSTANT:
/* fall-through */
- case INTERP_LINEAR:
+ case SP_INTERP_LINEAR:
for (j = 0; j < TGSI_NUM_CHANNELS; j++)
const_coeff(setup, &setup->coef[fragSlot], vertSlot, j);
break;
- case INTERP_PERSPECTIVE:
+ case SP_INTERP_PERSPECTIVE:
for (j = 0; j < TGSI_NUM_CHANNELS; j++)
point_persp_coeff(setup, setup->vprovoke,
&setup->coef[fragSlot], vertSlot, j);
break;
- case INTERP_POS:
+ case SP_INTERP_POS:
setup_fragcoord_coeff(setup, fragSlot);
break;
default:
diff --git a/src/gallium/drivers/softpipe/sp_setup.h b/src/gallium/drivers/softpipe/sp_setup.h
index 191494acbb8..a54dc5dad0c 100644
--- a/src/gallium/drivers/softpipe/sp_setup.h
+++ b/src/gallium/drivers/softpipe/sp_setup.h
@@ -30,11 +30,30 @@
struct setup_context;
struct softpipe_context;
+/**
+ * Attribute interpolation mode
+ */
+enum sp_interp_mode {
+ SP_INTERP_POS, /**< special case for frag position */
+ SP_INTERP_CONSTANT,
+ SP_INTERP_LINEAR,
+ SP_INTERP_PERSPECTIVE
+};
+
+
+struct sp_setup_info {
+ unsigned valid;
+ struct {
+ unsigned interp:8; /**< SP_INTERP_X */
+ int src_index:8;
+ } attrib[PIPE_MAX_SHADER_OUTPUTS];
+};
+
void
-sp_setup_tri( struct setup_context *setup,
- const float (*v0)[4],
- const float (*v1)[4],
- const float (*v2)[4] );
+sp_setup_tri(struct setup_context *setup,
+ const float (*v0)[4],
+ const float (*v1)[4],
+ const float (*v2)[4]);
void
sp_setup_line(struct setup_context *setup,
diff --git a/src/gallium/drivers/softpipe/sp_state.h b/src/gallium/drivers/softpipe/sp_state.h
index c35534c931d..16a2897f526 100644
--- a/src/gallium/drivers/softpipe/sp_state.h
+++ b/src/gallium/drivers/softpipe/sp_state.h
@@ -175,9 +175,6 @@ softpipe_unmap_texture_surfaces(struct softpipe_context *sp);
struct vertex_info *
-softpipe_get_vertex_info(struct softpipe_context *softpipe);
-
-struct vertex_info *
softpipe_get_vbuf_vertex_info(struct softpipe_context *softpipe);
diff --git a/src/gallium/drivers/softpipe/sp_state_derived.c b/src/gallium/drivers/softpipe/sp_state_derived.c
index 7e998af1325..d4d03f1be50 100644
--- a/src/gallium/drivers/softpipe/sp_state_derived.c
+++ b/src/gallium/drivers/softpipe/sp_state_derived.c
@@ -48,7 +48,7 @@
static void
invalidate_vertex_layout(struct softpipe_context *softpipe)
{
- softpipe->vertex_info.num_attribs = 0;
+ softpipe->setup_info.valid = 0;
}
@@ -57,50 +57,64 @@ invalidate_vertex_layout(struct softpipe_context *softpipe)
* (simple float[][4]) used by the 'draw' module into vertices for
* rasterization.
*
- * This function validates the vertex layout and returns a pointer to a
- * vertex_info object.
+ * This function validates the vertex layout.
*/
-struct vertex_info *
-softpipe_get_vertex_info(struct softpipe_context *softpipe)
+static void
+softpipe_compute_vertex_info(struct softpipe_context *softpipe)
{
- struct vertex_info *vinfo = &softpipe->vertex_info;
- int vs_index;
+ struct sp_setup_info *sinfo = &softpipe->setup_info;
- if (vinfo->num_attribs == 0) {
- /* compute vertex layout now */
+ if (sinfo->valid == 0) {
const struct tgsi_shader_info *fsInfo = &softpipe->fs_variant->info;
- struct vertex_info *vinfo_vbuf = &softpipe->vertex_info_vbuf;
- const uint num = draw_num_shader_outputs(softpipe->draw);
+ struct vertex_info *vinfo = &softpipe->vertex_info;
uint i;
-
- /* Tell draw_vbuf to simply emit the whole post-xform vertex
- * as-is. No longer any need to try and emit draw vertex_header
- * info.
+ int vs_index;
+ /*
+ * This doesn't quite work right (wrt face injection, prim id,
+ * wide points) - hit a couple assertions, misrenderings plus
+ * memory corruption. Albeit could fix (the former two) by calling
+ * this "more often" (rasterizer changes etc.). (The latter would
+ * need to be included in draw_prepare_shader_outputs, but it looks
+ * like that would potentially allocate quite some unused additional
+ * vertex outputs.)
+ * draw_prepare_shader_outputs(softpipe->draw);
*/
- vinfo_vbuf->num_attribs = 0;
- for (i = 0; i < num; i++) {
- draw_emit_vertex_attr(vinfo_vbuf, EMIT_4F, INTERP_PERSPECTIVE, i);
- }
- draw_compute_vertex_size(vinfo_vbuf);
/*
- * Loop over fragment shader inputs, searching for the matching output
- * from the vertex shader.
+ * Those can't actually be 0 (because pos is always at 0).
+ * But use ints anyway to avoid confusion (in vs outputs, they
+ * can very well be at pos 0).
*/
+ softpipe->viewport_index_slot = -1;
+ softpipe->layer_slot = -1;
+ softpipe->psize_slot = -1;
+
vinfo->num_attribs = 0;
+
+ /*
+ * Put position always first (setup needs it there).
+ */
+ vs_index = draw_find_shader_output(softpipe->draw,
+ TGSI_SEMANTIC_POSITION, 0);
+
+ draw_emit_vertex_attr(vinfo, EMIT_4F, vs_index);
+
+ /*
+ * Match FS inputs against VS outputs, emitting the necessary
+ * attributes.
+ */
for (i = 0; i < fsInfo->num_inputs; i++) {
- int src;
- enum interp_mode interp = INTERP_LINEAR;
+ enum sp_interp_mode interp = SP_INTERP_LINEAR;
switch (fsInfo->input_interpolate[i]) {
case TGSI_INTERPOLATE_CONSTANT:
- interp = INTERP_CONSTANT;
+ interp = SP_INTERP_CONSTANT;
break;
case TGSI_INTERPOLATE_LINEAR:
- interp = INTERP_LINEAR;
+ interp = SP_INTERP_LINEAR;
break;
case TGSI_INTERPOLATE_PERSPECTIVE:
- interp = INTERP_PERSPECTIVE;
+ interp = SP_INTERP_PERSPECTIVE;
break;
case TGSI_INTERPOLATE_COLOR:
assert(fsInfo->input_semantic_name[i] == TGSI_SEMANTIC_COLOR);
@@ -111,88 +125,121 @@ softpipe_get_vertex_info(struct softpipe_context *softpipe)
switch (fsInfo->input_semantic_name[i]) {
case TGSI_SEMANTIC_POSITION:
- interp = INTERP_POS;
+ interp = SP_INTERP_POS;
break;
case TGSI_SEMANTIC_COLOR:
if (fsInfo->input_interpolate[i] == TGSI_INTERPOLATE_COLOR) {
if (softpipe->rasterizer->flatshade)
- interp = INTERP_CONSTANT;
+ interp = SP_INTERP_CONSTANT;
else
- interp = INTERP_PERSPECTIVE;
+ interp = SP_INTERP_PERSPECTIVE;
}
break;
}
- /* this includes texcoords and varying vars */
- src = draw_find_shader_output(softpipe->draw,
- fsInfo->input_semantic_name[i],
- fsInfo->input_semantic_index[i]);
- if (fsInfo->input_semantic_name[i] == TGSI_SEMANTIC_COLOR && src == -1)
- /* try and find a bcolor */
- src = draw_find_shader_output(softpipe->draw,
- TGSI_SEMANTIC_BCOLOR, fsInfo->input_semantic_index[i]);
+ /*
+ * Search for each input in current vs output:
+ */
+ vs_index = draw_find_shader_output(softpipe->draw,
+ fsInfo->input_semantic_name[i],
+ fsInfo->input_semantic_index[i]);
+
+ if (fsInfo->input_semantic_name[i] == TGSI_SEMANTIC_COLOR &&
+ vs_index == -1) {
+ /*
+ * try and find a bcolor.
+ * Note that if there's both front and back color, draw will
+ * have copied back to front color already.
+ */
+ vs_index = draw_find_shader_output(softpipe->draw,
+ TGSI_SEMANTIC_BCOLOR,
+ fsInfo->input_semantic_index[i]);
+ }
- draw_emit_vertex_attr(vinfo, EMIT_4F, interp, src);
+ sinfo->attrib[i].interp = interp;
+ /* extremely pointless index map */
+ sinfo->attrib[i].src_index = i + 1;
+ /*
+ * For vp index and layer, if the fs requires them but the vs doesn't
+ * provide them, draw (vbuf) will give us the required 0 (slot -1).
+ * (This means in this case we'll also use those slots in setup, which
+ * isn't necessary but they'll contain the correct (0) value.)
+ */
+ if (fsInfo->input_semantic_name[i] ==
+ TGSI_SEMANTIC_VIEWPORT_INDEX) {
+ softpipe->viewport_index_slot = (int)vinfo->num_attribs;
+ draw_emit_vertex_attr(vinfo, EMIT_4F, vs_index);
+ } else if (fsInfo->input_semantic_name[i] == TGSI_SEMANTIC_LAYER) {
+ softpipe->layer_slot = (int)vinfo->num_attribs;
+ draw_emit_vertex_attr(vinfo, EMIT_4F, vs_index);
+ /*
+ * Note that we'd actually want to skip position (as we won't use
+ * the attribute in the fs) but can't. The reason is that we don't
+ * actually have a input/output map for setup (even though it looks
+ * like we do...). Could adjust for this though even without a map.
+ */
+ } else {
+ /*
+ * Note that we'd actually want to skip position (as we won't use
+ * the attribute in the fs) but can't. The reason is that we don't
+ * actually have a input/output map for setup (even though it looks
+ * like we do...). Could adjust for this though even without a map.
+ */
+ draw_emit_vertex_attr(vinfo, EMIT_4F, vs_index);
+ }
}
- /* Figure out if we need pointsize as well. */
+ /* Figure out if we need pointsize as well.
+ */
vs_index = draw_find_shader_output(softpipe->draw,
TGSI_SEMANTIC_PSIZE, 0);
if (vs_index >= 0) {
- softpipe->psize_slot = vinfo->num_attribs;
- draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_CONSTANT, vs_index);
+ softpipe->psize_slot = (int)vinfo->num_attribs;
+ draw_emit_vertex_attr(vinfo, EMIT_4F, vs_index);
}
- /* Figure out if we need viewport index */
- vs_index = draw_find_shader_output(softpipe->draw,
- TGSI_SEMANTIC_VIEWPORT_INDEX,
- 0);
- if (vs_index >= 0) {
- softpipe->viewport_index_slot = vinfo->num_attribs;
- draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_CONSTANT, vs_index);
- } else {
- softpipe->viewport_index_slot = 0;
+ /* Figure out if we need viewport index (if it wasn't already in fs input) */
+ if (softpipe->viewport_index_slot < 0) {
+ vs_index = draw_find_shader_output(softpipe->draw,
+ TGSI_SEMANTIC_VIEWPORT_INDEX,
+ 0);
+ if (vs_index >= 0) {
+ softpipe->viewport_index_slot =(int)vinfo->num_attribs;
+ draw_emit_vertex_attr(vinfo, EMIT_4F, vs_index);
+ }
}
- /* Figure out if we need layer */
- vs_index = draw_find_shader_output(softpipe->draw,
- TGSI_SEMANTIC_LAYER,
- 0);
- if (vs_index >= 0) {
- softpipe->layer_slot = vinfo->num_attribs;
- draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_CONSTANT, vs_index);
- } else {
- softpipe->layer_slot = 0;
+ /* Figure out if we need layer (if it wasn't already in fs input) */
+ if (softpipe->layer_slot < 0) {
+ vs_index = draw_find_shader_output(softpipe->draw,
+ TGSI_SEMANTIC_LAYER,
+ 0);
+ if (vs_index >= 0) {
+ softpipe->layer_slot = (int)vinfo->num_attribs;
+ draw_emit_vertex_attr(vinfo, EMIT_4F, vs_index);
+ }
}
draw_compute_vertex_size(vinfo);
+ softpipe->setup_info.valid = 1;
}
-
- return vinfo;
+ return;
}
/**
* Called from vbuf module.
*
- * Note that there's actually two different vertex layouts in softpipe.
- *
- * The normal one is computed in softpipe_get_vertex_info() above and is
- * used by the point/line/tri "setup" code.
- *
- * The other one (this one) is only used by the vbuf module (which is
- * not normally used by default but used in testing). For the vbuf module,
- * we basically want to pass-through the draw module's vertex layout as-is.
- * When the softpipe vbuf code begins drawing, the normal vertex layout
- * will come into play again.
+ * This will trigger validation of the vertex layout (and also compute
+ * the required information for setup).
*/
struct vertex_info *
softpipe_get_vbuf_vertex_info(struct softpipe_context *softpipe)
{
- (void) softpipe_get_vertex_info(softpipe);
- return &softpipe->vertex_info_vbuf;
+ softpipe_compute_vertex_info(softpipe);
+ return &softpipe->vertex_info;
}
diff --git a/src/gallium/drivers/softpipe/sp_state_shader.c b/src/gallium/drivers/softpipe/sp_state_shader.c
index dce0404de5b..f0d66a53ec6 100644
--- a/src/gallium/drivers/softpipe/sp_state_shader.c
+++ b/src/gallium/drivers/softpipe/sp_state_shader.c
@@ -64,7 +64,8 @@ create_fs_variant(struct softpipe_context *softpipe,
/* get new shader that implements polygon stippling */
var->tokens =
util_pstipple_create_fragment_shader(curfs->tokens,
- &var->stipple_sampler_unit, 0);
+ &var->stipple_sampler_unit, 0,
+ TGSI_FILE_INPUT);
}
else
#endif
diff --git a/src/gallium/drivers/svga/svga_cmd.c b/src/gallium/drivers/svga/svga_cmd.c
index 10442cb46e7..e45b3e72aeb 100644
--- a/src/gallium/drivers/svga/svga_cmd.c
+++ b/src/gallium/drivers/svga/svga_cmd.c
@@ -337,7 +337,7 @@ SVGA3D_DefineSurface2D(struct svga_winsys_context *swc, // IN
mipSizes[0].height = height;
mipSizes[0].depth = 1;
- swc->commit(swc);;
+ swc->commit(swc);
return PIPE_OK;
}
@@ -372,7 +372,7 @@ SVGA3D_DestroySurface(struct svga_winsys_context *swc,
swc->surface_relocation(swc, &cmd->sid, NULL, sid,
SVGA_RELOC_WRITE | SVGA_RELOC_INTERNAL);
- swc->commit(swc);;
+ swc->commit(swc);
return PIPE_OK;
}
@@ -473,6 +473,7 @@ SVGA3D_SurfaceDMA(struct svga_winsys_context *swc,
pSuffix->flags = flags;
swc->commit(swc);
+ swc->hints |= SVGA_HINT_FLAG_CAN_PRE_FLUSH;
return PIPE_OK;
}
@@ -543,6 +544,7 @@ SVGA3D_BufferDMA(struct svga_winsys_context *swc,
pSuffix->flags = flags;
swc->commit(swc);
+ swc->hints |= SVGA_HINT_FLAG_CAN_PRE_FLUSH;
return PIPE_OK;
}
@@ -1016,7 +1018,7 @@ SVGA3D_BeginDrawPrimitives(struct svga_winsys_context *swc,
*decls = declArray;
*ranges = rangeArray;
- swc->hints |= SVGA_HINT_FLAG_DRAW_EMITTED;
+ swc->hints |= SVGA_HINT_FLAG_CAN_PRE_FLUSH;
return PIPE_OK;
}
@@ -1720,6 +1722,7 @@ SVGA3D_UpdateGBImage(struct svga_winsys_context *swc,
cmd->box = *box;
swc->commit(swc);
+ swc->hints |= SVGA_HINT_FLAG_CAN_PRE_FLUSH;
return PIPE_OK;
}
@@ -1746,6 +1749,7 @@ SVGA3D_UpdateGBSurface(struct svga_winsys_context *swc,
SVGA_RELOC_WRITE | SVGA_RELOC_INTERNAL);
swc->commit(swc);
+ swc->hints |= SVGA_HINT_FLAG_CAN_PRE_FLUSH;
return PIPE_OK;
}
@@ -1775,6 +1779,7 @@ SVGA3D_ReadbackGBImage(struct svga_winsys_context *swc,
cmd->image.mipmap = mipLevel;
swc->commit(swc);
+ swc->hints |= SVGA_HINT_FLAG_CAN_PRE_FLUSH;
return PIPE_OK;
}
@@ -1801,6 +1806,7 @@ SVGA3D_ReadbackGBSurface(struct svga_winsys_context *swc,
SVGA_RELOC_READ | SVGA_RELOC_INTERNAL);
swc->commit(swc);
+ swc->hints |= SVGA_HINT_FLAG_CAN_PRE_FLUSH;
return PIPE_OK;
}
@@ -1829,6 +1835,7 @@ SVGA3D_ReadbackGBImagePartial(struct svga_winsys_context *swc,
cmd->invertBox = invertBox;
swc->commit(swc);
+ swc->hints |= SVGA_HINT_FLAG_CAN_PRE_FLUSH;
return PIPE_OK;
}
diff --git a/src/gallium/drivers/svga/svga_cmd_vgpu10.c b/src/gallium/drivers/svga/svga_cmd_vgpu10.c
index 5c121089f91..4cd9d5b9d1e 100644
--- a/src/gallium/drivers/svga/svga_cmd_vgpu10.c
+++ b/src/gallium/drivers/svga/svga_cmd_vgpu10.c
@@ -535,7 +535,7 @@ SVGA3D_vgpu10_Draw(struct svga_winsys_context *swc,
SVGA3D_COPY_BASIC_2(vertexCount, startVertexLocation);
- swc->hints |= SVGA_HINT_FLAG_DRAW_EMITTED;
+ swc->hints |= SVGA_HINT_FLAG_CAN_PRE_FLUSH;
swc->commit(swc);
return PIPE_OK;
}
@@ -551,7 +551,7 @@ SVGA3D_vgpu10_DrawIndexed(struct svga_winsys_context *swc,
SVGA3D_COPY_BASIC_3(indexCount, startIndexLocation,
baseVertexLocation);
- swc->hints |= SVGA_HINT_FLAG_DRAW_EMITTED;
+ swc->hints |= SVGA_HINT_FLAG_CAN_PRE_FLUSH;
swc->commit(swc);
return PIPE_OK;
}
@@ -568,7 +568,7 @@ SVGA3D_vgpu10_DrawInstanced(struct svga_winsys_context *swc,
SVGA3D_COPY_BASIC_4(vertexCountPerInstance, instanceCount,
startVertexLocation, startInstanceLocation);
- swc->hints |= SVGA_HINT_FLAG_DRAW_EMITTED;
+ swc->hints |= SVGA_HINT_FLAG_CAN_PRE_FLUSH;
swc->commit(swc);
return PIPE_OK;
}
@@ -588,7 +588,7 @@ SVGA3D_vgpu10_DrawIndexedInstanced(struct svga_winsys_context *swc,
startInstanceLocation);
- swc->hints |= SVGA_HINT_FLAG_DRAW_EMITTED;
+ swc->hints |= SVGA_HINT_FLAG_CAN_PRE_FLUSH;
swc->commit(swc);
return PIPE_OK;
}
@@ -598,7 +598,7 @@ SVGA3D_vgpu10_DrawAuto(struct svga_winsys_context *swc)
{
SVGA3D_CREATE_COMMAND(DrawAuto, DRAW_AUTO);
- swc->hints |= SVGA_HINT_FLAG_DRAW_EMITTED;
+ swc->hints |= SVGA_HINT_FLAG_CAN_PRE_FLUSH;
swc->commit(swc);
return PIPE_OK;
}
diff --git a/src/gallium/drivers/svga/svga_context.c b/src/gallium/drivers/svga/svga_context.c
index d407785ddd9..b10eb45e548 100644
--- a/src/gallium/drivers/svga/svga_context.c
+++ b/src/gallium/drivers/svga/svga_context.c
@@ -46,7 +46,6 @@
#include "svga_winsys.h"
#define CONST0_UPLOAD_DEFAULT_SIZE 65536
-#define CONST0_UPLOAD_ALIGNMENT 256
DEBUG_GET_ONCE_BOOL_OPTION(no_swtnl, "SVGA_NO_SWTNL", FALSE)
DEBUG_GET_ONCE_BOOL_OPTION(force_swtnl, "SVGA_FORCE_SWTNL", FALSE);
@@ -220,8 +219,8 @@ struct pipe_context *svga_context_create(struct pipe_screen *screen,
svga->const0_upload = u_upload_create(&svga->pipe,
CONST0_UPLOAD_DEFAULT_SIZE,
- CONST0_UPLOAD_ALIGNMENT,
- PIPE_BIND_CONSTANT_BUFFER);
+ PIPE_BIND_CONSTANT_BUFFER,
+ PIPE_USAGE_STREAM);
if (!svga->const0_upload)
goto cleanup;
diff --git a/src/gallium/drivers/svga/svga_context.h b/src/gallium/drivers/svga/svga_context.h
index 78e346a92b9..e4f29b8497e 100644
--- a/src/gallium/drivers/svga/svga_context.h
+++ b/src/gallium/drivers/svga/svga_context.h
@@ -74,6 +74,8 @@
*/
#define SVGA_MAX_CONST_BUF_SIZE (4096 * 4 * sizeof(int))
+#define CONST0_UPLOAD_ALIGNMENT 256
+
struct draw_vertex_shader;
struct draw_fragment_shader;
struct svga_shader_variant;
@@ -312,7 +314,7 @@ struct svga_hw_view_state
struct svga_sampler_view *v;
unsigned min_lod;
unsigned max_lod;
- int dirty;
+ boolean dirty;
};
/* Updated by calling svga_update_state( SVGA_STATE_HW_DRAW )
@@ -343,6 +345,11 @@ struct svga_hw_draw_state
SVGA3dElementLayoutId layout_id;
SVGA3dPrimitiveType topology;
+ /** Vertex buffer state */
+ SVGA3dVertexBuffer vbuffers[PIPE_MAX_ATTRIBS];
+ struct svga_winsys_surface *vbuffer_handles[PIPE_MAX_ATTRIBS];
+ unsigned num_vbuffers;
+
struct svga_winsys_surface *ib; /**< index buffer for drawing */
SVGA3dSurfaceFormat ib_format;
unsigned ib_offset;
diff --git a/src/gallium/drivers/svga/svga_draw.c b/src/gallium/drivers/svga/svga_draw.c
index 2d3631d6f9c..80526ed4d15 100644
--- a/src/gallium/drivers/svga/svga_draw.c
+++ b/src/gallium/drivers/svga/svga_draw.c
@@ -517,11 +517,27 @@ draw_vgpu10(struct svga_hwtnl *hwtnl,
buffers[i].offset = hwtnl->cmd.vbufs[i].buffer_offset;
}
if (vbuf_count > 0) {
- ret = SVGA3D_vgpu10_SetVertexBuffers(svga->swc, vbuf_count,
- 0, /* startBuffer */
- buffers, vb_handle);
- if (ret != PIPE_OK)
- return ret;
+ /* If we haven't yet emitted a drawing command or if any
+ * vertex buffer state is changing, issue that state now.
+ */
+ if (((hwtnl->cmd.swc->hints & SVGA_HINT_FLAG_CAN_PRE_FLUSH) == 0) ||
+ vbuf_count != svga->state.hw_draw.num_vbuffers ||
+ memcmp(buffers, svga->state.hw_draw.vbuffers,
+ vbuf_count * sizeof(buffers[0])) ||
+ memcmp(vb_handle, svga->state.hw_draw.vbuffer_handles,
+ vbuf_count * sizeof(vb_handle[0]))) {
+ ret = SVGA3D_vgpu10_SetVertexBuffers(svga->swc, vbuf_count,
+ 0, /* startBuffer */
+ buffers, vb_handle);
+ if (ret != PIPE_OK)
+ return ret;
+
+ svga->state.hw_draw.num_vbuffers = vbuf_count;
+ memcpy(svga->state.hw_draw.vbuffers, buffers,
+ vbuf_count * sizeof(buffers[0]));
+ memcpy(svga->state.hw_draw.vbuffer_handles, vb_handle,
+ vbuf_count * sizeof(vb_handle[0]));
+ }
}
}
diff --git a/src/gallium/drivers/svga/svga_pipe_rasterizer.c b/src/gallium/drivers/svga/svga_pipe_rasterizer.c
index fa1744fc33e..8e0db539574 100644
--- a/src/gallium/drivers/svga/svga_pipe_rasterizer.c
+++ b/src/gallium/drivers/svga/svga_pipe_rasterizer.c
@@ -368,13 +368,16 @@ static void svga_bind_rasterizer_state( struct pipe_context *pipe,
struct svga_context *svga = svga_context(pipe);
struct svga_rasterizer_state *raster = (struct svga_rasterizer_state *)state;
+ if (!raster ||
+ !svga->curr.rast ||
+ raster->templ.poly_stipple_enable !=
+ svga->curr.rast->templ.poly_stipple_enable) {
+ svga->dirty |= SVGA_NEW_STIPPLE;
+ }
+
svga->curr.rast = raster;
svga->dirty |= SVGA_NEW_RAST;
-
- if (raster && raster->templ.poly_stipple_enable) {
- svga->dirty |= SVGA_NEW_STIPPLE;
- }
}
static void
diff --git a/src/gallium/drivers/svga/svga_pipe_sampler.c b/src/gallium/drivers/svga/svga_pipe_sampler.c
index 95241176510..3e778f0a087 100644
--- a/src/gallium/drivers/svga/svga_pipe_sampler.c
+++ b/src/gallium/drivers/svga/svga_pipe_sampler.c
@@ -287,6 +287,7 @@ svga_bind_sampler_states(struct pipe_context *pipe,
{
struct svga_context *svga = svga_context(pipe);
unsigned i;
+ boolean any_change = FALSE;
assert(shader < PIPE_SHADER_TYPES);
assert(start + num <= PIPE_MAX_SAMPLERS);
@@ -295,8 +296,15 @@ svga_bind_sampler_states(struct pipe_context *pipe,
if (!svga_have_vgpu10(svga) && shader != PIPE_SHADER_FRAGMENT)
return;
- for (i = 0; i < num; i++)
+ for (i = 0; i < num; i++) {
+ if (svga->curr.sampler[shader][start + i] != samplers[i])
+ any_change = TRUE;
svga->curr.sampler[shader][start + i] = samplers[i];
+ }
+
+ if (!any_change) {
+ return;
+ }
/* find highest non-null sampler[] entry */
{
@@ -405,6 +413,7 @@ svga_set_sampler_views(struct pipe_context *pipe,
unsigned flag_1d = 0;
unsigned flag_srgb = 0;
uint i;
+ boolean any_change = FALSE;
assert(shader < PIPE_SHADER_TYPES);
assert(start + num <= Elements(svga->curr.sampler_views[shader]));
@@ -422,6 +431,7 @@ svga_set_sampler_views(struct pipe_context *pipe,
pipe_sampler_view_release(pipe, &svga->curr.sampler_views[shader][start + i]);
pipe_sampler_view_reference(&svga->curr.sampler_views[shader][start + i],
views[i]);
+ any_change = TRUE;
}
if (!views[i])
@@ -434,6 +444,10 @@ svga_set_sampler_views(struct pipe_context *pipe,
flag_1d |= 1 << (start + i);
}
+ if (!any_change) {
+ return;
+ }
+
/* find highest non-null sampler_views[] entry */
{
unsigned j = MAX2(svga->curr.num_sampler_views[shader], start + num);
diff --git a/src/gallium/drivers/svga/svga_resource_buffer_upload.c b/src/gallium/drivers/svga/svga_resource_buffer_upload.c
index 8c5cff5abc1..7f7ceab0aa5 100644
--- a/src/gallium/drivers/svga/svga_resource_buffer_upload.c
+++ b/src/gallium/drivers/svga/svga_resource_buffer_upload.c
@@ -221,7 +221,7 @@ svga_buffer_upload_gb_command(struct svga_context *svga,
struct svga_3d_update_gb_image *whole_update_cmd = NULL;
uint32 numBoxes = sbuf->map.num_ranges;
struct pipe_resource *dummy;
- unsigned int i;
+ unsigned i;
assert(numBoxes);
assert(sbuf->dma.updates == NULL);
@@ -308,6 +308,7 @@ svga_buffer_upload_gb_command(struct svga_context *svga,
pipe_resource_reference(&dummy, &sbuf->b.b);
SVGA_FIFOCommitAll(swc);
+ swc->hints |= SVGA_HINT_FLAG_CAN_PRE_FLUSH;
sbuf->dma.flags.discard = FALSE;
return PIPE_OK;
@@ -381,6 +382,7 @@ svga_buffer_upload_command(struct svga_context *svga,
SVGA_FIFOCommitAll(swc);
+ swc->hints |= SVGA_HINT_FLAG_CAN_PRE_FLUSH;
sbuf->dma.flags.discard = FALSE;
return PIPE_OK;
diff --git a/src/gallium/drivers/svga/svga_screen.c b/src/gallium/drivers/svga/svga_screen.c
index fca501bc47d..0f41e4ea254 100644
--- a/src/gallium/drivers/svga/svga_screen.c
+++ b/src/gallium/drivers/svga/svga_screen.c
@@ -342,6 +342,8 @@ svga_get_param(struct pipe_screen *screen, enum pipe_cap param)
case PIPE_CAP_TEXTURE_GATHER_OFFSETS:
case PIPE_CAP_TGSI_VS_WINDOW_SPACE_POSITION:
case PIPE_CAP_DRAW_INDIRECT:
+ case PIPE_CAP_MULTI_DRAW_INDIRECT:
+ case PIPE_CAP_MULTI_DRAW_INDIRECT_PARAMS:
case PIPE_CAP_TGSI_FS_FINE_DERIVATIVE:
case PIPE_CAP_CONDITIONAL_RENDER_INVERTED:
case PIPE_CAP_SAMPLER_VIEW_TARGET:
@@ -349,6 +351,8 @@ svga_get_param(struct pipe_screen *screen, enum pipe_cap param)
case PIPE_CAP_VERTEXID_NOBASE:
case PIPE_CAP_POLYGON_OFFSET_CLAMP:
case PIPE_CAP_MULTISAMPLE_Z_RESOLVE:
+ case PIPE_CAP_TGSI_PACK_HALF_FLOAT:
+ case PIPE_CAP_SHADER_BUFFER_OFFSET_ALIGNMENT:
return 0;
case PIPE_CAP_MIN_MAP_BUFFER_ALIGNMENT:
return 64;
@@ -384,6 +388,9 @@ svga_get_param(struct pipe_screen *screen, enum pipe_cap param)
case PIPE_CAP_SHAREABLE_SHADERS:
case PIPE_CAP_COPY_BETWEEN_COMPRESSED_AND_PLAIN_FORMATS:
case PIPE_CAP_CLEAR_TEXTURE:
+ case PIPE_CAP_DRAW_PARAMETERS:
+ case PIPE_CAP_TGSI_FS_POSITION_IS_SYSVAL:
+ case PIPE_CAP_TGSI_FS_FACE_IS_INTEGER_SYSVAL:
return 0;
}
@@ -457,6 +464,7 @@ vgpu9_get_shader_param(struct pipe_screen *screen, unsigned shader,
case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED:
case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE:
+ case PIPE_SHADER_CAP_MAX_SHADER_BUFFERS:
return 0;
case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT:
return 32;
@@ -515,6 +523,7 @@ vgpu9_get_shader_param(struct pipe_screen *screen, unsigned shader,
case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED:
case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE:
+ case PIPE_SHADER_CAP_MAX_SHADER_BUFFERS:
return 0;
case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT:
return 32;
@@ -606,6 +615,7 @@ vgpu10_get_shader_param(struct pipe_screen *screen, unsigned shader,
case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED:
case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE:
+ case PIPE_SHADER_CAP_MAX_SHADER_BUFFERS:
return 0;
case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT:
return 32;
diff --git a/src/gallium/drivers/svga/svga_state_constants.c b/src/gallium/drivers/svga/svga_state_constants.c
index 2cf41134bd6..8ab1693088a 100644
--- a/src/gallium/drivers/svga/svga_state_constants.c
+++ b/src/gallium/drivers/svga/svga_state_constants.c
@@ -613,7 +613,8 @@ emit_constbuf_vgpu10(struct svga_context *svga, unsigned shader)
*/
new_buf_size = align(new_buf_size, 16);
- u_upload_alloc(svga->const0_upload, 0, new_buf_size, &offset,
+ u_upload_alloc(svga->const0_upload, 0, new_buf_size,
+ CONST0_UPLOAD_ALIGNMENT, &offset,
&dst_buffer, &dst_map);
if (!dst_map) {
if (src_map)
diff --git a/src/gallium/drivers/svga/svga_state_fs.c b/src/gallium/drivers/svga/svga_state_fs.c
index e392778c2fb..bac91669be1 100644
--- a/src/gallium/drivers/svga/svga_state_fs.c
+++ b/src/gallium/drivers/svga/svga_state_fs.c
@@ -452,6 +452,7 @@ struct svga_tracked_state svga_hw_fs =
SVGA_NEW_TEXTURE_BINDING |
SVGA_NEW_NEED_SWTNL |
SVGA_NEW_RAST |
+ SVGA_NEW_STIPPLE |
SVGA_NEW_REDUCED_PRIMITIVE |
SVGA_NEW_SAMPLER |
SVGA_NEW_FRAME_BUFFER |
diff --git a/src/gallium/drivers/svga/svga_state_vs.c b/src/gallium/drivers/svga/svga_state_vs.c
index 24574c1bf85..a103dab25fe 100644
--- a/src/gallium/drivers/svga/svga_state_vs.c
+++ b/src/gallium/drivers/svga/svga_state_vs.c
@@ -173,8 +173,11 @@ make_vs_key(struct svga_context *svga, struct svga_compile_key *key)
return;
}
+ /* SVGA_NEW_PRESCALE */
key->vs.need_prescale = svga->state.hw_clear.prescale.enabled &&
(svga->curr.gs == NULL);
+
+ /* SVGA_NEW_RAST */
key->vs.allow_psiz = svga->curr.rast->templ.point_size_per_vertex;
/* SVGA_NEW_FS */
diff --git a/src/gallium/drivers/svga/svga_swtnl_state.c b/src/gallium/drivers/svga/svga_swtnl_state.c
index 79dc0bf580c..4d21f4f0e60 100644
--- a/src/gallium/drivers/svga/svga_swtnl_state.c
+++ b/src/gallium/drivers/svga/svga_swtnl_state.c
@@ -220,8 +220,6 @@ svga_swtnl_update_vdecl( struct svga_context *svga )
struct draw_context *draw = svga->swtnl.draw;
struct vertex_info *vinfo = &svga_render->vertex_info;
SVGA3dVertexDecl vdecl[PIPE_MAX_ATTRIBS];
- const enum interp_mode colorInterp =
- svga->curr.rast->templ.flatshade ? INTERP_CONSTANT : INTERP_LINEAR;
struct svga_fragment_shader *fs = svga->curr.fs;
int offset = 0;
int nr_decls = 0;
@@ -236,7 +234,7 @@ svga_swtnl_update_vdecl( struct svga_context *svga )
/* always add position */
src = draw_find_shader_output(draw, TGSI_SEMANTIC_POSITION, 0);
- draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_LINEAR, src);
+ draw_emit_vertex_attr(vinfo, EMIT_4F, src);
vinfo->attrib[0].emit = EMIT_4F;
vdecl[0].array.offset = offset;
vdecl[0].identity.method = SVGA3D_DECLMETHOD_DEFAULT;
@@ -257,14 +255,14 @@ svga_swtnl_update_vdecl( struct svga_context *svga )
switch (sem_name) {
case TGSI_SEMANTIC_COLOR:
- draw_emit_vertex_attr(vinfo, EMIT_4F, colorInterp, src);
+ draw_emit_vertex_attr(vinfo, EMIT_4F, src);
vdecl[nr_decls].identity.usage = SVGA3D_DECLUSAGE_COLOR;
vdecl[nr_decls].identity.type = SVGA3D_DECLTYPE_FLOAT4;
offset += 16;
nr_decls++;
break;
case TGSI_SEMANTIC_GENERIC:
- draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_PERSPECTIVE, src);
+ draw_emit_vertex_attr(vinfo, EMIT_4F, src);
vdecl[nr_decls].identity.usage = SVGA3D_DECLUSAGE_TEXCOORD;
vdecl[nr_decls].identity.type = SVGA3D_DECLTYPE_FLOAT4;
vdecl[nr_decls].identity.usageIndex =
@@ -273,7 +271,7 @@ svga_swtnl_update_vdecl( struct svga_context *svga )
nr_decls++;
break;
case TGSI_SEMANTIC_FOG:
- draw_emit_vertex_attr(vinfo, EMIT_1F, INTERP_PERSPECTIVE, src);
+ draw_emit_vertex_attr(vinfo, EMIT_1F, src);
vdecl[nr_decls].identity.usage = SVGA3D_DECLUSAGE_TEXCOORD;
vdecl[nr_decls].identity.type = SVGA3D_DECLTYPE_FLOAT1;
assert(vdecl[nr_decls].identity.usageIndex == 0);
diff --git a/src/gallium/drivers/svga/svga_tgsi_insn.c b/src/gallium/drivers/svga/svga_tgsi_insn.c
index dbb90f7654e..489e68f88e8 100644
--- a/src/gallium/drivers/svga/svga_tgsi_insn.c
+++ b/src/gallium/drivers/svga/svga_tgsi_insn.c
@@ -166,7 +166,7 @@ scalar(struct src_register src, unsigned comp)
static boolean
svga_arl_needs_adjustment( const struct svga_shader_emitter *emit )
{
- int i;
+ unsigned i;
for (i = 0; i < emit->num_arl_consts; ++i) {
if (emit->arl_consts[i].arl_num == emit->current_arl)
@@ -179,7 +179,7 @@ svga_arl_needs_adjustment( const struct svga_shader_emitter *emit )
static int
svga_arl_adjustment( const struct svga_shader_emitter *emit )
{
- int i;
+ unsigned i;
for (i = 0; i < emit->num_arl_consts; ++i) {
if (emit->arl_consts[i].arl_num == emit->current_arl)
@@ -1175,7 +1175,7 @@ emit_div(struct svga_shader_emitter *emit,
const struct src_register src1 =
translate_src_register(emit, &insn->Src[1] );
SVGA3dShaderDestToken temp = get_temp( emit );
- int i;
+ unsigned i;
/* For each enabled element, perform a RCP instruction. Note that
* RCP is scalar in SVGA3D:
@@ -1822,7 +1822,7 @@ emit_tex_swizzle(struct svga_shader_emitter *emit,
const unsigned swizzleIn[4] = {swizzle_x, swizzle_y, swizzle_z, swizzle_w};
unsigned srcSwizzle[4];
unsigned srcWritemask = 0x0, zeroWritemask = 0x0, oneWritemask = 0x0;
- int i;
+ unsigned i;
/* build writemasks and srcSwizzle terms */
for (i = 0; i < 4; i++) {
@@ -3371,7 +3371,7 @@ emit_light_twoside(struct svga_shader_emitter *emit)
struct src_register back[2];
SVGA3dShaderDestToken color[2];
int count = emit->internal_color_count;
- int i;
+ unsigned i;
SVGA3dShaderInstToken if_token;
if (count == 0)
@@ -3698,7 +3698,7 @@ static boolean
pre_parse_add_indirect( struct svga_shader_emitter *emit,
int num, int current_arl)
{
- int i;
+ unsigned i;
assert(num < 0);
for (i = 0; i < emit->num_arl_consts; ++i) {
@@ -3844,7 +3844,8 @@ svga_shader_emit_instructions(struct svga_shader_emitter *emit,
if (emit->unit == PIPE_SHADER_FRAGMENT && emit->key.fs.pstipple) {
unsigned unit;
- new_tokens = util_pstipple_create_fragment_shader(tokens, &unit, 0);
+ new_tokens = util_pstipple_create_fragment_shader(tokens, &unit, 0,
+ TGSI_FILE_INPUT);
if (new_tokens) {
/* Setup texture state for stipple */
diff --git a/src/gallium/drivers/svga/svga_tgsi_vgpu10.c b/src/gallium/drivers/svga/svga_tgsi_vgpu10.c
index c979f4a8a56..1223e446055 100644
--- a/src/gallium/drivers/svga/svga_tgsi_vgpu10.c
+++ b/src/gallium/drivers/svga/svga_tgsi_vgpu10.c
@@ -2298,11 +2298,13 @@ emit_vgpu10_declaration(struct svga_shader_emitter_v10 *emit,
emit->num_samplers = MAX2(emit->num_samplers, decl->Range.Last + 1);
return TRUE;
+#if 0
case TGSI_FILE_RESOURCE:
/*opcode0.opcodeType = VGPU10_OPCODE_DCL_RESOURCE;*/
/* XXX more, VGPU10_RETURN_TYPE_FLOAT */
assert(!"TGSI_FILE_RESOURCE not handled yet");
return FALSE;
+#endif
case TGSI_FILE_ADDRESS:
emit->num_address_regs = MAX2(emit->num_address_regs,
@@ -6170,6 +6172,11 @@ emit_vertex_attrib_instructions(struct svga_shader_emitter_v10 *emit)
while (adjust_mask) {
unsigned index = u_bit_scan(&adjust_mask);
+
+ /* skip the instruction if this vertex attribute is not being used */
+ if (emit->info.input_usage_mask[index] == 0)
+ continue;
+
unsigned tmp = emit->vs.adjusted_input[index];
struct tgsi_full_src_register input_src =
make_src_reg(TGSI_FILE_INPUT, index);
@@ -6604,7 +6611,8 @@ transform_fs_pstipple(struct svga_shader_emitter_v10 *emit,
tgsi_dump(tokens,0);
}
- new_tokens = util_pstipple_create_fragment_shader(tokens, &unit, 0);
+ new_tokens = util_pstipple_create_fragment_shader(tokens, &unit, 0,
+ TGSI_FILE_INPUT);
emit->fs.pstipple_sampler_unit = unit;
diff --git a/src/gallium/drivers/svga/svga_winsys.h b/src/gallium/drivers/svga/svga_winsys.h
index 3129e46ed06..562c6690fc1 100644
--- a/src/gallium/drivers/svga/svga_winsys.h
+++ b/src/gallium/drivers/svga/svga_winsys.h
@@ -85,7 +85,7 @@ struct winsys_handle;
#define SVGA_QUERY_FLAG_SET (1 << 0)
#define SVGA_QUERY_FLAG_REF (1 << 1)
-#define SVGA_HINT_FLAG_DRAW_EMITTED (1 << 0)
+#define SVGA_HINT_FLAG_CAN_PRE_FLUSH (1 << 0) /* Can preemptively flush */
/** Opaque surface handle */
struct svga_winsys_surface;
diff --git a/src/gallium/drivers/vc4/vc4_context.c b/src/gallium/drivers/vc4/vc4_context.c
index 312b006f96e..a0888f23265 100644
--- a/src/gallium/drivers/vc4/vc4_context.c
+++ b/src/gallium/drivers/vc4/vc4_context.c
@@ -254,8 +254,9 @@ vc4_context_create(struct pipe_screen *pscreen, void *priv, unsigned flags)
if (!vc4->primconvert)
goto fail;
- vc4->uploader = u_upload_create(pctx, 16 * 1024, 4,
- PIPE_BIND_INDEX_BUFFER);
+ vc4->uploader = u_upload_create(pctx, 16 * 1024,
+ PIPE_BIND_INDEX_BUFFER,
+ PIPE_USAGE_STREAM);
vc4_debug |= saved_shaderdb_flag;
diff --git a/src/gallium/drivers/vc4/vc4_draw.c b/src/gallium/drivers/vc4/vc4_draw.c
index c00855698b8..9b0b540d3fc 100644
--- a/src/gallium/drivers/vc4/vc4_draw.c
+++ b/src/gallium/drivers/vc4/vc4_draw.c
@@ -319,7 +319,7 @@ vc4_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info)
if (vc4->indexbuf.user_buffer) {
prsc = NULL;
u_upload_data(vc4->uploader, 0,
- info->count * index_size,
+ info->count * index_size, 4,
vc4->indexbuf.user_buffer,
&offset, &prsc);
} else {
diff --git a/src/gallium/drivers/vc4/vc4_opt_algebraic.c b/src/gallium/drivers/vc4/vc4_opt_algebraic.c
index aea2b9dbe87..b8ce377ff6b 100644
--- a/src/gallium/drivers/vc4/vc4_opt_algebraic.c
+++ b/src/gallium/drivers/vc4/vc4_opt_algebraic.c
@@ -145,43 +145,6 @@ qir_opt_algebraic(struct vc4_compile *c)
list_for_each_entry(struct qinst, inst, &c->instructions, link) {
switch (inst->op) {
- case QOP_SEL_X_Y_ZS:
- case QOP_SEL_X_Y_ZC:
- case QOP_SEL_X_Y_NS:
- case QOP_SEL_X_Y_NC:
- case QOP_SEL_X_Y_CS:
- case QOP_SEL_X_Y_CC:
- if (is_zero(c, inst->src[1])) {
- /* Replace references to a 0 uniform value
- * with the SEL_X_0 equivalent.
- */
- dump_from(c, inst);
- inst->op -= (QOP_SEL_X_Y_ZS - QOP_SEL_X_0_ZS);
- inst->src[1] = c->undef;
- progress = true;
- dump_to(c, inst);
- break;
- }
-
- if (is_zero(c, inst->src[0])) {
- /* Replace references to a 0 uniform value
- * with the SEL_X_0 equivalent, flipping the
- * condition being evaluated since the operand
- * order is flipped.
- */
- dump_from(c, inst);
- inst->op -= QOP_SEL_X_Y_ZS;
- inst->op ^= 1;
- inst->op += QOP_SEL_X_0_ZS;
- inst->src[0] = inst->src[1];
- inst->src[1] = c->undef;
- progress = true;
- dump_to(c, inst);
- break;
- }
-
- break;
-
case QOP_FMIN:
if (is_1f(c, inst->src[1]) &&
inst->src[0].pack >= QPU_UNPACK_8D_REP &&
diff --git a/src/gallium/drivers/vc4/vc4_program.c b/src/gallium/drivers/vc4/vc4_program.c
index da0d21111a0..3e402d048ba 100644
--- a/src/gallium/drivers/vc4/vc4_program.c
+++ b/src/gallium/drivers/vc4/vc4_program.c
@@ -89,7 +89,7 @@ indirect_uniform_load(struct vc4_compile *c, nir_intrinsic_instr *intr)
range->dst_offset = c->next_ubo_dst_offset;
c->next_ubo_dst_offset += range->size;
c->num_ubo_ranges++;
- };
+ }
offset -= range->src_offset;
@@ -204,27 +204,6 @@ ntq_get_alu_src(struct vc4_compile *c, nir_alu_instr *instr,
return r;
};
-static struct qreg
-get_swizzled_channel(struct vc4_compile *c,
- struct qreg *srcs, int swiz)
-{
- switch (swiz) {
- default:
- case UTIL_FORMAT_SWIZZLE_NONE:
- fprintf(stderr, "warning: unknown swizzle\n");
- /* FALLTHROUGH */
- case UTIL_FORMAT_SWIZZLE_0:
- return qir_uniform_f(c, 0.0);
- case UTIL_FORMAT_SWIZZLE_1:
- return qir_uniform_f(c, 1.0);
- case UTIL_FORMAT_SWIZZLE_X:
- case UTIL_FORMAT_SWIZZLE_Y:
- case UTIL_FORMAT_SWIZZLE_Z:
- case UTIL_FORMAT_SWIZZLE_W:
- return srcs[swiz];
- }
-}
-
static inline struct qreg
qir_SAT(struct vc4_compile *c, struct qreg val)
{
@@ -275,7 +254,7 @@ qir_srgb_decode(struct vc4_compile *c, struct qreg srgb)
qir_uniform_f(c, 2.4));
qir_SF(c, qir_FSUB(c, srgb, qir_uniform_f(c, 0.04045)));
- return qir_SEL_X_Y_NS(c, low, high);
+ return qir_SEL(c, QPU_COND_NS, low, high);
}
static struct qreg
@@ -338,30 +317,20 @@ ntq_emit_txf(struct vc4_compile *c, nir_tex_instr *instr)
struct qreg tex = qir_TEX_RESULT(c);
c->num_texture_samples++;
- struct qreg texture_output[4];
+ struct qreg *dest = ntq_get_dest(c, &instr->dest);
enum pipe_format format = c->key->tex[unit].format;
if (util_format_is_depth_or_stencil(format)) {
struct qreg scaled = ntq_scale_depth_texture(c, tex);
for (int i = 0; i < 4; i++)
- texture_output[i] = scaled;
+ dest[i] = scaled;
} else {
- struct qreg tex_result_unpacked[4];
for (int i = 0; i < 4; i++)
- tex_result_unpacked[i] = qir_UNPACK_8_F(c, tex, i);
-
- const uint8_t *format_swiz =
- vc4_get_format_swizzle(c->key->tex[unit].format);
- for (int i = 0; i < 4; i++) {
- texture_output[i] =
- get_swizzled_channel(c, tex_result_unpacked,
- format_swiz[i]);
- }
+ dest[i] = qir_UNPACK_8_F(c, tex, i);
}
- struct qreg *dest = ntq_get_dest(c, &instr->dest);
for (int i = 0; i < 4; i++) {
- dest[i] = get_swizzled_channel(c, texture_output,
- c->key->tex[unit].swizzle[i]);
+ if (c->tex_srgb_decode[unit] & (1 << i))
+ dest[i] = qir_srgb_decode(c, dest[i]);
}
}
@@ -470,12 +439,13 @@ ntq_emit_tex(struct vc4_compile *c, nir_tex_instr *instr)
enum pipe_format format = c->key->tex[unit].format;
- struct qreg unpacked[4];
+ struct qreg *dest = ntq_get_dest(c, &instr->dest);
if (util_format_is_depth_or_stencil(format)) {
struct qreg normalized = ntq_scale_depth_texture(c, tex);
struct qreg depth_output;
- struct qreg one = qir_uniform_f(c, 1.0f);
+ struct qreg u0 = qir_uniform_f(c, 0.0f);
+ struct qreg u1 = qir_uniform_f(c, 1.0f);
if (c->key->tex[unit].compare_mode) {
if (has_proj)
compare = qir_FMUL(c, compare, proj);
@@ -485,31 +455,31 @@ ntq_emit_tex(struct vc4_compile *c, nir_tex_instr *instr)
depth_output = qir_uniform_f(c, 0.0f);
break;
case PIPE_FUNC_ALWAYS:
- depth_output = one;
+ depth_output = u1;
break;
case PIPE_FUNC_EQUAL:
qir_SF(c, qir_FSUB(c, compare, normalized));
- depth_output = qir_SEL_X_0_ZS(c, one);
+ depth_output = qir_SEL(c, QPU_COND_ZS, u1, u0);
break;
case PIPE_FUNC_NOTEQUAL:
qir_SF(c, qir_FSUB(c, compare, normalized));
- depth_output = qir_SEL_X_0_ZC(c, one);
+ depth_output = qir_SEL(c, QPU_COND_ZC, u1, u0);
break;
case PIPE_FUNC_GREATER:
qir_SF(c, qir_FSUB(c, compare, normalized));
- depth_output = qir_SEL_X_0_NC(c, one);
+ depth_output = qir_SEL(c, QPU_COND_NC, u1, u0);
break;
case PIPE_FUNC_GEQUAL:
qir_SF(c, qir_FSUB(c, normalized, compare));
- depth_output = qir_SEL_X_0_NS(c, one);
+ depth_output = qir_SEL(c, QPU_COND_NS, u1, u0);
break;
case PIPE_FUNC_LESS:
qir_SF(c, qir_FSUB(c, compare, normalized));
- depth_output = qir_SEL_X_0_NS(c, one);
+ depth_output = qir_SEL(c, QPU_COND_NS, u1, u0);
break;
case PIPE_FUNC_LEQUAL:
qir_SF(c, qir_FSUB(c, normalized, compare));
- depth_output = qir_SEL_X_0_NC(c, one);
+ depth_output = qir_SEL(c, QPU_COND_NC, u1, u0);
break;
}
} else {
@@ -517,29 +487,15 @@ ntq_emit_tex(struct vc4_compile *c, nir_tex_instr *instr)
}
for (int i = 0; i < 4; i++)
- unpacked[i] = depth_output;
+ dest[i] = depth_output;
} else {
for (int i = 0; i < 4; i++)
- unpacked[i] = qir_UNPACK_8_F(c, tex, i);
- }
-
- const uint8_t *format_swiz = vc4_get_format_swizzle(format);
- struct qreg texture_output[4];
- for (int i = 0; i < 4; i++) {
- texture_output[i] = get_swizzled_channel(c, unpacked,
- format_swiz[i]);
- }
-
- if (util_format_is_srgb(format)) {
- for (int i = 0; i < 3; i++)
- texture_output[i] = qir_srgb_decode(c,
- texture_output[i]);
+ dest[i] = qir_UNPACK_8_F(c, tex, i);
}
- struct qreg *dest = ntq_get_dest(c, &instr->dest);
for (int i = 0; i < 4; i++) {
- dest[i] = get_swizzled_channel(c, texture_output,
- c->key->tex[unit].swizzle[i]);
+ if (c->tex_srgb_decode[unit] & (1 << i))
+ dest[i] = qir_srgb_decode(c, dest[i]);
}
}
@@ -553,9 +509,8 @@ ntq_ffract(struct vc4_compile *c, struct qreg src)
struct qreg trunc = qir_ITOF(c, qir_FTOI(c, src));
struct qreg diff = qir_FSUB(c, src, trunc);
qir_SF(c, diff);
- return qir_SEL_X_Y_NS(c,
- qir_FADD(c, diff, qir_uniform_f(c, 1.0)),
- diff);
+ return qir_SEL(c, QPU_COND_NS,
+ qir_FADD(c, diff, qir_uniform_f(c, 1.0)), diff);
}
/**
@@ -572,9 +527,8 @@ ntq_ffloor(struct vc4_compile *c, struct qreg src)
*/
qir_SF(c, qir_FSUB(c, src, trunc));
- return qir_SEL_X_Y_NS(c,
- qir_FSUB(c, trunc, qir_uniform_f(c, 1.0)),
- trunc);
+ return qir_SEL(c, QPU_COND_NS,
+ qir_FSUB(c, trunc, qir_uniform_f(c, 1.0)), trunc);
}
/**
@@ -591,9 +545,8 @@ ntq_fceil(struct vc4_compile *c, struct qreg src)
*/
qir_SF(c, qir_FSUB(c, trunc, src));
- return qir_SEL_X_Y_NS(c,
- qir_FADD(c, trunc, qir_uniform_f(c, 1.0)),
- trunc);
+ return qir_SEL(c, QPU_COND_NS,
+ qir_FADD(c, trunc, qir_uniform_f(c, 1.0)), trunc);
}
static struct qreg
@@ -668,10 +621,13 @@ ntq_fcos(struct vc4_compile *c, struct qreg src)
static struct qreg
ntq_fsign(struct vc4_compile *c, struct qreg src)
{
+ struct qreg t = qir_get_temp(c);
+
qir_SF(c, src);
- return qir_SEL_X_Y_NC(c,
- qir_SEL_X_0_ZC(c, qir_uniform_f(c, 1.0)),
- qir_uniform_f(c, -1.0));
+ qir_MOV_dest(c, t, qir_uniform_f(c, 0.0));
+ qir_MOV_dest(c, t, qir_uniform_f(c, 1.0))->cond = QPU_COND_ZC;
+ qir_MOV_dest(c, t, qir_uniform_f(c, -1.0))->cond = QPU_COND_NS;
+ return t;
}
static void
@@ -888,6 +844,100 @@ ntq_emit_ubfe(struct vc4_compile *c, struct qreg base, struct qreg offset,
return qir_UNPACK_8_I(c, base, offset_bit / 8);
}
+/**
+ * If compare_instr is a valid comparison instruction, emits the
+ * compare_instr's comparison and returns the sel_instr's return value based
+ * on the compare_instr's result.
+ */
+static bool
+ntq_emit_comparison(struct vc4_compile *c, struct qreg *dest,
+ nir_alu_instr *compare_instr,
+ nir_alu_instr *sel_instr)
+{
+ enum qpu_cond cond;
+
+ switch (compare_instr->op) {
+ case nir_op_feq:
+ case nir_op_ieq:
+ case nir_op_seq:
+ cond = QPU_COND_ZS;
+ break;
+ case nir_op_fne:
+ case nir_op_ine:
+ case nir_op_sne:
+ cond = QPU_COND_ZC;
+ break;
+ case nir_op_fge:
+ case nir_op_ige:
+ case nir_op_uge:
+ case nir_op_sge:
+ cond = QPU_COND_NC;
+ break;
+ case nir_op_flt:
+ case nir_op_ilt:
+ case nir_op_slt:
+ cond = QPU_COND_NS;
+ break;
+ default:
+ return false;
+ }
+
+ struct qreg src0 = ntq_get_alu_src(c, compare_instr, 0);
+ struct qreg src1 = ntq_get_alu_src(c, compare_instr, 1);
+
+ if (nir_op_infos[compare_instr->op].input_types[0] == nir_type_float)
+ qir_SF(c, qir_FSUB(c, src0, src1));
+ else
+ qir_SF(c, qir_SUB(c, src0, src1));
+
+ switch (sel_instr->op) {
+ case nir_op_seq:
+ case nir_op_sne:
+ case nir_op_sge:
+ case nir_op_slt:
+ *dest = qir_SEL(c, cond,
+ qir_uniform_f(c, 1.0), qir_uniform_f(c, 0.0));
+ break;
+
+ case nir_op_bcsel:
+ *dest = qir_SEL(c, cond,
+ ntq_get_alu_src(c, sel_instr, 1),
+ ntq_get_alu_src(c, sel_instr, 2));
+ break;
+
+ default:
+ *dest = qir_SEL(c, cond,
+ qir_uniform_ui(c, ~0), qir_uniform_ui(c, 0));
+ break;
+ }
+
+ return true;
+}
+
+/**
+ * Attempts to fold a comparison generating a boolean result into the
+ * condition code for selecting between two values, instead of comparing the
+ * boolean result against 0 to generate the condition code.
+ */
+static struct qreg ntq_emit_bcsel(struct vc4_compile *c, nir_alu_instr *instr,
+ struct qreg *src)
+{
+ if (!instr->src[0].src.is_ssa)
+ goto out;
+ nir_alu_instr *compare =
+ nir_instr_as_alu(instr->src[0].src.ssa->parent_instr);
+ if (!compare)
+ goto out;
+
+ struct qreg dest;
+ if (ntq_emit_comparison(c, &dest, compare, instr))
+ return dest;
+
+out:
+ qir_SF(c, src[0]);
+ return qir_SEL(c, QPU_COND_NS, src[1], src[2]);
+}
+
static void
ntq_emit_alu(struct vc4_compile *c, nir_alu_instr *instr)
{
@@ -974,7 +1024,9 @@ ntq_emit_alu(struct vc4_compile *c, nir_alu_instr *instr)
case nir_op_i2b:
case nir_op_f2b:
qir_SF(c, src[0]);
- *dest = qir_SEL_X_0_ZC(c, qir_uniform_ui(c, ~0));
+ *dest = qir_SEL(c, QPU_COND_ZC,
+ qir_uniform_ui(c, ~0),
+ qir_uniform_ui(c, 0));
break;
case nir_op_iadd:
@@ -1016,65 +1068,29 @@ ntq_emit_alu(struct vc4_compile *c, nir_alu_instr *instr)
break;
case nir_op_seq:
- qir_SF(c, qir_FSUB(c, src[0], src[1]));
- *dest = qir_SEL_X_0_ZS(c, qir_uniform_f(c, 1.0));
- break;
case nir_op_sne:
- qir_SF(c, qir_FSUB(c, src[0], src[1]));
- *dest = qir_SEL_X_0_ZC(c, qir_uniform_f(c, 1.0));
- break;
case nir_op_sge:
- qir_SF(c, qir_FSUB(c, src[0], src[1]));
- *dest = qir_SEL_X_0_NC(c, qir_uniform_f(c, 1.0));
- break;
case nir_op_slt:
- qir_SF(c, qir_FSUB(c, src[0], src[1]));
- *dest = qir_SEL_X_0_NS(c, qir_uniform_f(c, 1.0));
- break;
case nir_op_feq:
- qir_SF(c, qir_FSUB(c, src[0], src[1]));
- *dest = qir_SEL_X_0_ZS(c, qir_uniform_ui(c, ~0));
- break;
case nir_op_fne:
- qir_SF(c, qir_FSUB(c, src[0], src[1]));
- *dest = qir_SEL_X_0_ZC(c, qir_uniform_ui(c, ~0));
- break;
case nir_op_fge:
- qir_SF(c, qir_FSUB(c, src[0], src[1]));
- *dest = qir_SEL_X_0_NC(c, qir_uniform_ui(c, ~0));
- break;
case nir_op_flt:
- qir_SF(c, qir_FSUB(c, src[0], src[1]));
- *dest = qir_SEL_X_0_NS(c, qir_uniform_ui(c, ~0));
- break;
case nir_op_ieq:
- qir_SF(c, qir_SUB(c, src[0], src[1]));
- *dest = qir_SEL_X_0_ZS(c, qir_uniform_ui(c, ~0));
- break;
case nir_op_ine:
- qir_SF(c, qir_SUB(c, src[0], src[1]));
- *dest = qir_SEL_X_0_ZC(c, qir_uniform_ui(c, ~0));
- break;
case nir_op_ige:
- qir_SF(c, qir_SUB(c, src[0], src[1]));
- *dest = qir_SEL_X_0_NC(c, qir_uniform_ui(c, ~0));
- break;
case nir_op_uge:
- qir_SF(c, qir_SUB(c, src[0], src[1]));
- *dest = qir_SEL_X_0_CC(c, qir_uniform_ui(c, ~0));
- break;
case nir_op_ilt:
- qir_SF(c, qir_SUB(c, src[0], src[1]));
- *dest = qir_SEL_X_0_NS(c, qir_uniform_ui(c, ~0));
+ if (!ntq_emit_comparison(c, dest, instr, instr)) {
+ fprintf(stderr, "Bad comparison instruction\n");
+ }
break;
case nir_op_bcsel:
- qir_SF(c, src[0]);
- *dest = qir_SEL_X_Y_NS(c, src[1], src[2]);
+ *dest = ntq_emit_bcsel(c, instr, src);
break;
case nir_op_fcsel:
qir_SF(c, src[0]);
- *dest = qir_SEL_X_Y_ZC(c, src[1], src[2]);
+ *dest = qir_SEL(c, QPU_COND_ZC, src[1], src[2]);
break;
case nir_op_frcp:
@@ -1789,6 +1805,56 @@ vc4_shader_ntq(struct vc4_context *vc4, enum qstage stage,
if (stage == QSTAGE_FRAG)
vc4_nir_lower_blend(c);
+ struct nir_lower_tex_options tex_options = {
+ /* We would need to implement txs, but we don't want the
+ * int/float conversions
+ */
+ .lower_rect = false,
+
+ /* We want to use this, but we don't want to newton-raphson
+ * its rcp.
+ */
+ .lower_txp = false,
+
+ /* Apply swizzles to all samplers. */
+ .swizzle_result = ~0,
+ };
+
+ /* Lower the format swizzle and ARB_texture_swizzle-style swizzle.
+ * The format swizzling applies before sRGB decode, and
+ * ARB_texture_swizzle is the last thing before returning the sample.
+ */
+ for (int i = 0; i < ARRAY_SIZE(key->tex); i++) {
+ enum pipe_format format = c->key->tex[i].format;
+
+ if (!format)
+ continue;
+
+ const uint8_t *format_swizzle = vc4_get_format_swizzle(format);
+
+ for (int j = 0; j < 4; j++) {
+ uint8_t arb_swiz = c->key->tex[i].swizzle[j];
+
+ if (arb_swiz <= 3) {
+ tex_options.swizzles[i][j] =
+ format_swizzle[arb_swiz];
+ } else {
+ tex_options.swizzles[i][j] = arb_swiz;
+ }
+
+ /* If ARB_texture_swizzle is reading from the R, G, or
+ * B channels of an sRGB texture, then we need to
+ * apply sRGB decode to this channel at sample time.
+ */
+ if (arb_swiz < 3 && util_format_is_srgb(format)) {
+ c->tex_srgb_decode[i] |= (1 << j);
+ }
+
+ }
+ }
+
+ nir_lower_tex(c->s, &tex_options);
+
if (c->fs_key && c->fs_key->light_twoside)
nir_lower_two_sided_color(c->s);
diff --git a/src/gallium/drivers/vc4/vc4_qir.c b/src/gallium/drivers/vc4/vc4_qir.c
index c6916c48e7e..efbb69b71a7 100644
--- a/src/gallium/drivers/vc4/vc4_qir.c
+++ b/src/gallium/drivers/vc4/vc4_qir.c
@@ -65,19 +65,6 @@ static const struct qir_op_info qir_op_info[] = {
[QOP_XOR] = { "xor", 1, 2 },
[QOP_NOT] = { "not", 1, 1 },
- [QOP_SEL_X_0_NS] = { "fsel_x_0_ns", 1, 1, false, true },
- [QOP_SEL_X_0_NC] = { "fsel_x_0_nc", 1, 1, false, true },
- [QOP_SEL_X_0_ZS] = { "fsel_x_0_zs", 1, 1, false, true },
- [QOP_SEL_X_0_ZC] = { "fsel_x_0_zc", 1, 1, false, true },
- [QOP_SEL_X_0_CS] = { "fsel_x_0_cs", 1, 1, false, true },
- [QOP_SEL_X_0_CC] = { "fsel_x_0_cc", 1, 1, false, true },
- [QOP_SEL_X_Y_NS] = { "fsel_x_y_ns", 1, 2, false, true },
- [QOP_SEL_X_Y_NC] = { "fsel_x_y_nc", 1, 2, false, true },
- [QOP_SEL_X_Y_ZS] = { "fsel_x_y_zs", 1, 2, false, true },
- [QOP_SEL_X_Y_ZC] = { "fsel_x_y_zc", 1, 2, false, true },
- [QOP_SEL_X_Y_CS] = { "fsel_x_y_cs", 1, 2, false, true },
- [QOP_SEL_X_Y_CC] = { "fsel_x_y_cc", 1, 2, false, true },
-
[QOP_RCP] = { "rcp", 1, 1, false, true },
[QOP_RSQ] = { "rsq", 1, 1, false, true },
[QOP_EXP2] = { "exp2", 1, 2, false, true },
@@ -219,23 +206,8 @@ qir_is_tex(struct qinst *inst)
bool
qir_depends_on_flags(struct qinst *inst)
{
- switch (inst->op) {
- case QOP_SEL_X_0_NS:
- case QOP_SEL_X_0_NC:
- case QOP_SEL_X_0_ZS:
- case QOP_SEL_X_0_ZC:
- case QOP_SEL_X_0_CS:
- case QOP_SEL_X_0_CC:
- case QOP_SEL_X_Y_NS:
- case QOP_SEL_X_Y_NC:
- case QOP_SEL_X_Y_ZS:
- case QOP_SEL_X_Y_ZC:
- case QOP_SEL_X_Y_CS:
- case QOP_SEL_X_Y_CC:
- return true;
- default:
- return false;
- }
+ return (inst->cond != QPU_COND_ALWAYS &&
+ inst->cond != QPU_COND_NEVER);
}
bool
@@ -292,8 +264,19 @@ qir_print_reg(struct vc4_compile *c, struct qreg reg, bool write)
void
qir_dump_inst(struct vc4_compile *c, struct qinst *inst)
{
- fprintf(stderr, "%s%s ",
+ static const char *conditions[] = {
+ [QPU_COND_ALWAYS] = "",
+ [QPU_COND_NEVER] = ".never",
+ [QPU_COND_ZS] = ".zs",
+ [QPU_COND_ZC] = ".zc",
+ [QPU_COND_NS] = ".ns",
+ [QPU_COND_NC] = ".nc",
+ [QPU_COND_CS] = ".cs",
+ [QPU_COND_CC] = ".cc",
+ };
+ fprintf(stderr, "%s%s%s ",
qir_get_op_name(inst->op),
+ conditions[inst->cond],
inst->sf ? ".sf" : "");
qir_print_reg(c, inst->dst, true);
@@ -352,6 +335,7 @@ qir_inst(enum qop op, struct qreg dst, struct qreg src0, struct qreg src1)
inst->src = calloc(2, sizeof(inst->src[0]));
inst->src[0] = src0;
inst->src[1] = src1;
+ inst->cond = QPU_COND_ALWAYS;
return inst;
}
@@ -503,9 +487,9 @@ qir_SF(struct vc4_compile *c, struct qreg src)
if (!list_empty(&c->instructions))
last_inst = (struct qinst *)c->instructions.prev;
- if (!last_inst ||
- last_inst->dst.file != src.file ||
- last_inst->dst.index != src.index ||
+ if (src.file != QFILE_TEMP ||
+ !c->defs[src.index] ||
+ last_inst != c->defs[src.index] ||
qir_is_multi_instruction(last_inst)) {
src = qir_MOV(c, src);
last_inst = (struct qinst *)c->instructions.prev;
diff --git a/src/gallium/drivers/vc4/vc4_qir.h b/src/gallium/drivers/vc4/vc4_qir.h
index b0fbb4c1db2..4ab4d35d0ca 100644
--- a/src/gallium/drivers/vc4/vc4_qir.h
+++ b/src/gallium/drivers/vc4/vc4_qir.h
@@ -93,23 +93,6 @@ enum qop {
QOP_XOR,
QOP_NOT,
- /* Note: Orderings of these compares must be the same as in
- * qpu_defines.h. Selects the src[0] if the ns flag bit is set,
- * otherwise 0. */
- QOP_SEL_X_0_ZS,
- QOP_SEL_X_0_ZC,
- QOP_SEL_X_0_NS,
- QOP_SEL_X_0_NC,
- QOP_SEL_X_0_CS,
- QOP_SEL_X_0_CC,
- /* Selects the src[0] if the ns flag bit is set, otherwise src[1]. */
- QOP_SEL_X_Y_ZS,
- QOP_SEL_X_Y_ZC,
- QOP_SEL_X_Y_NS,
- QOP_SEL_X_Y_NC,
- QOP_SEL_X_Y_CS,
- QOP_SEL_X_Y_CC,
-
QOP_FTOI,
QOP_ITOF,
QOP_RCP,
@@ -170,6 +153,7 @@ struct qinst {
struct qreg dst;
struct qreg *src;
bool sf;
+ uint8_t cond;
};
enum qstage {
@@ -385,6 +369,11 @@ struct vc4_compile {
uint8_t vattr_sizes[8];
+ /* Bitfield for whether a given channel of a sampler needs sRGB
+ * decode.
+ */
+ uint8_t tex_srgb_decode[VC4_MAX_TEXTURE_SAMPLERS];
+
/**
* Array of the VARYING_SLOT_* of all FS QFILE_VARY reads.
*
@@ -463,9 +452,11 @@ void qir_schedule_instructions(struct vc4_compile *c);
void qir_reorder_uniforms(struct vc4_compile *c);
void qir_emit(struct vc4_compile *c, struct qinst *inst);
-static inline void qir_emit_nodef(struct vc4_compile *c, struct qinst *inst)
+static inline struct qinst *
+qir_emit_nodef(struct vc4_compile *c, struct qinst *inst)
{
list_addtail(&inst->link, &c->instructions);
+ return inst;
}
struct qreg qir_get_temp(struct vc4_compile *c);
@@ -536,11 +527,12 @@ qir_##name(struct vc4_compile *c, struct qreg a) \
qir_emit(c, qir_inst(QOP_##name, t, a, c->undef)); \
return t; \
} \
-static inline void \
+static inline struct qinst * \
qir_##name##_dest(struct vc4_compile *c, struct qreg dest, \
struct qreg a) \
{ \
- qir_emit_nodef(c, qir_inst(QOP_##name, dest, a, c->undef)); \
+ return qir_emit_nodef(c, qir_inst(QOP_##name, dest, a, \
+ c->undef)); \
}
#define QIR_ALU2(name) \
@@ -592,18 +584,6 @@ QIR_ALU2(V8MAX)
QIR_ALU2(V8ADDS)
QIR_ALU2(V8SUBS)
QIR_ALU2(MUL24)
-QIR_ALU1(SEL_X_0_ZS)
-QIR_ALU1(SEL_X_0_ZC)
-QIR_ALU1(SEL_X_0_NS)
-QIR_ALU1(SEL_X_0_NC)
-QIR_ALU1(SEL_X_0_CS)
-QIR_ALU1(SEL_X_0_CC)
-QIR_ALU2(SEL_X_Y_ZS)
-QIR_ALU2(SEL_X_Y_ZC)
-QIR_ALU2(SEL_X_Y_NS)
-QIR_ALU2(SEL_X_Y_NC)
-QIR_ALU2(SEL_X_Y_CS)
-QIR_ALU2(SEL_X_Y_CC)
QIR_ALU2(FMIN)
QIR_ALU2(FMAX)
QIR_ALU2(FMINABS)
@@ -648,6 +628,17 @@ QIR_NODST_1(TLB_STENCIL_SETUP)
QIR_NODST_1(MS_MASK)
static inline struct qreg
+qir_SEL(struct vc4_compile *c, uint8_t cond, struct qreg src0, struct qreg src1)
+{
+ struct qreg t = qir_get_temp(c);
+ struct qinst *a = qir_MOV_dest(c, t, src0);
+ struct qinst *b = qir_MOV_dest(c, t, src1);
+ a->cond = cond;
+ b->cond = cond ^ 1;
+ return t;
+}
+
+static inline struct qreg
qir_UNPACK_8_F(struct vc4_compile *c, struct qreg src, int i)
{
struct qreg t = qir_FMOV(c, src);
diff --git a/src/gallium/drivers/vc4/vc4_qir_schedule.c b/src/gallium/drivers/vc4/vc4_qir_schedule.c
index d20815f055e..2f280c54523 100644
--- a/src/gallium/drivers/vc4/vc4_qir_schedule.c
+++ b/src/gallium/drivers/vc4/vc4_qir_schedule.c
@@ -250,12 +250,11 @@ calculate_deps(struct schedule_setup_state *state, struct schedule_node *n)
else if (inst->dst.file == QFILE_TEMP)
add_write_dep(dir, &state->last_temp_write[inst->dst.index], n);
+ if (qir_depends_on_flags(inst))
+ add_dep(dir, state->last_sf, n);
+
if (inst->sf)
add_write_dep(dir, &state->last_sf, n);
-
- if (qir_depends_on_flags(inst)) {
- add_dep(dir, state->last_sf, n);
- }
}
static void
diff --git a/src/gallium/drivers/vc4/vc4_qpu_emit.c b/src/gallium/drivers/vc4/vc4_qpu_emit.c
index cb4e0cfcc3f..b06702afea2 100644
--- a/src/gallium/drivers/vc4/vc4_qpu_emit.c
+++ b/src/gallium/drivers/vc4/vc4_qpu_emit.c
@@ -64,6 +64,12 @@ set_last_cond_add(struct vc4_compile *c, uint32_t cond)
*last_inst(c) = qpu_set_cond_add(*last_inst(c), cond);
}
+static void
+set_last_cond_mul(struct vc4_compile *c, uint32_t cond)
+{
+ *last_inst(c) = qpu_set_cond_mul(*last_inst(c), cond);
+}
+
/**
* Some special registers can be read from either file, which lets us resolve
* raddr conflicts without extra MOVs.
@@ -306,42 +312,9 @@ vc4_generate_code(struct vc4_context *vc4, struct vc4_compile *c)
break;
}
- switch (qinst->op) {
- case QOP_SEL_X_0_ZS:
- case QOP_SEL_X_0_ZC:
- case QOP_SEL_X_0_NS:
- case QOP_SEL_X_0_NC:
- case QOP_SEL_X_0_CS:
- case QOP_SEL_X_0_CC:
- queue(c, qpu_a_MOV(dst, src[0]) | unpack);
- set_last_cond_add(c, qinst->op - QOP_SEL_X_0_ZS +
- QPU_COND_ZS);
-
- queue(c, qpu_a_XOR(dst, qpu_r0(), qpu_r0()));
- set_last_cond_add(c, ((qinst->op - QOP_SEL_X_0_ZS) ^
- 1) + QPU_COND_ZS);
- break;
-
- case QOP_SEL_X_Y_ZS:
- case QOP_SEL_X_Y_ZC:
- case QOP_SEL_X_Y_NS:
- case QOP_SEL_X_Y_NC:
- case QOP_SEL_X_Y_CS:
- case QOP_SEL_X_Y_CC:
- queue(c, qpu_a_MOV(dst, src[0]));
- if (qinst->src[0].pack)
- *(last_inst(c)) |= unpack;
- set_last_cond_add(c, qinst->op - QOP_SEL_X_Y_ZS +
- QPU_COND_ZS);
-
- queue(c, qpu_a_MOV(dst, src[1]));
- if (qinst->src[1].pack)
- *(last_inst(c)) |= unpack;
- set_last_cond_add(c, ((qinst->op - QOP_SEL_X_Y_ZS) ^
- 1) + QPU_COND_ZS);
-
- break;
+ bool handled_qinst_cond = true;
+ switch (qinst->op) {
case QOP_RCP:
case QOP_RSQ:
case QOP_EXP2:
@@ -497,16 +470,22 @@ vc4_generate_code(struct vc4_context *vc4, struct vc4_compile *c)
queue(c, qpu_m_alu2(translate[qinst->op].op,
dst,
src[0], src[1]) | unpack);
+ set_last_cond_mul(c, qinst->cond);
} else {
queue(c, qpu_a_alu2(translate[qinst->op].op,
dst,
src[0], src[1]) | unpack);
+ set_last_cond_add(c, qinst->cond);
}
+ handled_qinst_cond = true;
set_last_dst_pack(c, qinst);
break;
}
+ assert(qinst->cond == QPU_COND_ALWAYS ||
+ handled_qinst_cond);
+
if (qinst->sf) {
assert(!qir_is_multi_instruction(qinst));
*last_inst(c) |= QPU_SF;
diff --git a/src/gallium/drivers/vc4/vc4_resource.c b/src/gallium/drivers/vc4/vc4_resource.c
index 9e6678a0625..036da329987 100644
--- a/src/gallium/drivers/vc4/vc4_resource.c
+++ b/src/gallium/drivers/vc4/vc4_resource.c
@@ -921,7 +921,7 @@ vc4_get_shadow_index_buffer(struct pipe_context *pctx,
void *data;
struct pipe_resource *shadow_rsc = NULL;
- u_upload_alloc(vc4->uploader, 0, count * 2,
+ u_upload_alloc(vc4->uploader, 0, count * 2, 4,
shadow_offset, &shadow_rsc, &data);
uint16_t *dst = data;
diff --git a/src/gallium/drivers/vc4/vc4_screen.c b/src/gallium/drivers/vc4/vc4_screen.c
index 8ddf0865d21..0e289432bbe 100644
--- a/src/gallium/drivers/vc4/vc4_screen.c
+++ b/src/gallium/drivers/vc4/vc4_screen.c
@@ -100,6 +100,7 @@ vc4_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
case PIPE_CAP_TWO_SIDED_STENCIL:
case PIPE_CAP_USER_INDEX_BUFFERS:
case PIPE_CAP_TEXTURE_MULTISAMPLE:
+ case PIPE_CAP_TEXTURE_SWIZZLE:
return 1;
/* lying for GL 2.0 */
@@ -128,7 +129,6 @@ vc4_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
case PIPE_CAP_TEXTURE_BUFFER_OBJECTS:
case PIPE_CAP_CUBE_MAP_ARRAY:
case PIPE_CAP_TEXTURE_MIRROR_CLAMP:
- case PIPE_CAP_TEXTURE_SWIZZLE:
case PIPE_CAP_VERTEX_ELEMENT_INSTANCE_DIVISOR:
case PIPE_CAP_MIXED_COLORBUFFER_FORMATS:
case PIPE_CAP_SEAMLESS_CUBE_MAP:
@@ -171,6 +171,8 @@ vc4_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
case PIPE_CAP_MAX_TEXEL_OFFSET:
case PIPE_CAP_MAX_VERTEX_STREAMS:
case PIPE_CAP_DRAW_INDIRECT:
+ case PIPE_CAP_MULTI_DRAW_INDIRECT:
+ case PIPE_CAP_MULTI_DRAW_INDIRECT_PARAMS:
case PIPE_CAP_TGSI_FS_FINE_DERIVATIVE:
case PIPE_CAP_CONDITIONAL_RENDER_INVERTED:
case PIPE_CAP_SAMPLER_VIEW_TARGET:
@@ -180,15 +182,20 @@ vc4_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
case PIPE_CAP_MULTISAMPLE_Z_RESOLVE:
case PIPE_CAP_RESOURCE_FROM_USER_MEMORY:
case PIPE_CAP_DEVICE_RESET_STATUS_QUERY:
- case PIPE_CAP_MAX_SHADER_PATCH_VARYINGS:
- case PIPE_CAP_TEXTURE_FLOAT_LINEAR:
- case PIPE_CAP_TEXTURE_HALF_FLOAT_LINEAR:
- case PIPE_CAP_DEPTH_BOUNDS_TEST:
- case PIPE_CAP_TGSI_TXQS:
- case PIPE_CAP_FORCE_PERSAMPLE_INTERP:
- case PIPE_CAP_SHAREABLE_SHADERS:
- case PIPE_CAP_COPY_BETWEEN_COMPRESSED_AND_PLAIN_FORMATS:
- case PIPE_CAP_CLEAR_TEXTURE:
+ case PIPE_CAP_MAX_SHADER_PATCH_VARYINGS:
+ case PIPE_CAP_TEXTURE_FLOAT_LINEAR:
+ case PIPE_CAP_TEXTURE_HALF_FLOAT_LINEAR:
+ case PIPE_CAP_DEPTH_BOUNDS_TEST:
+ case PIPE_CAP_TGSI_TXQS:
+ case PIPE_CAP_FORCE_PERSAMPLE_INTERP:
+ case PIPE_CAP_SHAREABLE_SHADERS:
+ case PIPE_CAP_COPY_BETWEEN_COMPRESSED_AND_PLAIN_FORMATS:
+ case PIPE_CAP_CLEAR_TEXTURE:
+ case PIPE_CAP_DRAW_PARAMETERS:
+ case PIPE_CAP_TGSI_PACK_HALF_FLOAT:
+ case PIPE_CAP_TGSI_FS_POSITION_IS_SYSVAL:
+ case PIPE_CAP_TGSI_FS_FACE_IS_INTEGER_SYSVAL:
+ case PIPE_CAP_SHADER_BUFFER_OFFSET_ALIGNMENT:
return 0;
/* Stream output. */
@@ -345,6 +352,8 @@ vc4_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader,
return PIPE_SHADER_IR_TGSI;
case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT:
return 32;
+ case PIPE_SHADER_CAP_MAX_SHADER_BUFFERS:
+ return 0;
default:
fprintf(stderr, "unknown shader param %d\n", param);
return 0;
diff --git a/src/gallium/drivers/virgl/virgl_context.c b/src/gallium/drivers/virgl/virgl_context.c
index 527f7637cb6..c322503d816 100644
--- a/src/gallium/drivers/virgl/virgl_context.c
+++ b/src/gallium/drivers/virgl/virgl_context.c
@@ -605,7 +605,7 @@ static void virgl_draw_vbo(struct pipe_context *ctx,
ib.offset = vctx->index_buffer.offset + info.start * ib.index_size;
if (ib.user_buffer) {
- u_upload_data(vctx->uploader, 0, info.count * ib.index_size,
+ u_upload_data(vctx->uploader, 0, info.count * ib.index_size, 256,
ib.user_buffer, &ib.offset, &ib.buffer);
ib.user_buffer = NULL;
}
@@ -948,8 +948,8 @@ struct pipe_context *virgl_context_create(struct pipe_screen *pscreen,
16, UTIL_SLAB_SINGLETHREADED);
vctx->primconvert = util_primconvert_create(&vctx->base, rs->caps.caps.v1.prim_mask);
- vctx->uploader = u_upload_create(&vctx->base, 1024 * 1024, 256,
- PIPE_BIND_INDEX_BUFFER);
+ vctx->uploader = u_upload_create(&vctx->base, 1024 * 1024,
+ PIPE_BIND_INDEX_BUFFER, PIPE_USAGE_STREAM);
if (!vctx->uploader)
goto fail;
diff --git a/src/gallium/drivers/virgl/virgl_screen.c b/src/gallium/drivers/virgl/virgl_screen.c
index 26a4f7736e3..e8d82b37c0f 100644
--- a/src/gallium/drivers/virgl/virgl_screen.c
+++ b/src/gallium/drivers/virgl/virgl_screen.c
@@ -201,6 +201,8 @@ virgl_get_param(struct pipe_screen *screen, enum pipe_cap param)
case PIPE_CAP_TGSI_VS_WINDOW_SPACE_POSITION:
case PIPE_CAP_MAX_VERTEX_STREAMS:
case PIPE_CAP_DRAW_INDIRECT:
+ case PIPE_CAP_MULTI_DRAW_INDIRECT:
+ case PIPE_CAP_MULTI_DRAW_INDIRECT_PARAMS:
case PIPE_CAP_TGSI_FS_FINE_DERIVATIVE:
case PIPE_CAP_CONDITIONAL_RENDER_INVERTED:
case PIPE_CAP_MAX_VERTEX_ATTRIB_STRIDE:
@@ -219,6 +221,11 @@ virgl_get_param(struct pipe_screen *screen, enum pipe_cap param)
case PIPE_CAP_FORCE_PERSAMPLE_INTERP:
case PIPE_CAP_SHAREABLE_SHADERS:
case PIPE_CAP_CLEAR_TEXTURE:
+ case PIPE_CAP_DRAW_PARAMETERS:
+ case PIPE_CAP_TGSI_PACK_HALF_FLOAT:
+ case PIPE_CAP_TGSI_FS_POSITION_IS_SYSVAL:
+ case PIPE_CAP_TGSI_FS_FACE_IS_INTEGER_SYSVAL:
+ case PIPE_CAP_SHADER_BUFFER_OFFSET_ALIGNMENT:
return 0;
case PIPE_CAP_VENDOR_ID:
return 0x1af4;