summaryrefslogtreecommitdiffstats
path: root/src/gallium
diff options
context:
space:
mode:
Diffstat (limited to 'src/gallium')
-rw-r--r--src/gallium/auxiliary/Makefile.am18
-rw-r--r--src/gallium/auxiliary/cso_cache/cso_context.c4
-rw-r--r--src/gallium/auxiliary/cso_cache/cso_context.h1
-rw-r--r--src/gallium/auxiliary/draw/draw_context.c18
-rw-r--r--src/gallium/auxiliary/draw/draw_context.h6
-rw-r--r--src/gallium/auxiliary/draw/draw_gs.c4
-rw-r--r--src/gallium/auxiliary/draw/draw_private.h3
-rw-r--r--src/gallium/auxiliary/draw/draw_vs_exec.c5
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_arit.c116
-rw-r--r--src/gallium/auxiliary/hud/hud_context.c1
-rw-r--r--src/gallium/auxiliary/nir/tgsi_to_nir.c2
-rw-r--r--src/gallium/auxiliary/pipebuffer/pb_buffer.h6
-rw-r--r--src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c5
-rw-r--r--src/gallium/auxiliary/pipebuffer/pb_bufmgr_debug.c7
-rw-r--r--src/gallium/auxiliary/postprocess/pp_run.c1
-rw-r--r--src/gallium/auxiliary/rtasm/rtasm_x86sse.c2
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_exec.c217
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_exec.h37
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_lowering.c2
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_parse.c2
-rw-r--r--src/gallium/auxiliary/util/u_blit.c1
-rw-r--r--src/gallium/auxiliary/util/u_blitter.c4
-rw-r--r--src/gallium/auxiliary/util/u_math.h6
-rw-r--r--src/gallium/docs/source/context.rst3
-rw-r--r--src/gallium/docs/source/screen.rst5
-rw-r--r--src/gallium/docs/source/tgsi.rst2
-rw-r--r--src/gallium/drivers/ddebug/dd_context.c9
-rw-r--r--src/gallium/drivers/freedreno/a4xx/fd4_draw.c17
-rw-r--r--src/gallium/drivers/freedreno/a4xx/fd4_query.c7
-rw-r--r--src/gallium/drivers/freedreno/freedreno_context.h1
-rw-r--r--src/gallium/drivers/freedreno/freedreno_draw.c13
-rw-r--r--src/gallium/drivers/freedreno/freedreno_query.c6
-rw-r--r--src/gallium/drivers/freedreno/freedreno_query_sw.c2
-rw-r--r--src/gallium/drivers/freedreno/freedreno_screen.c12
-rw-r--r--src/gallium/drivers/freedreno/freedreno_state.c3
-rw-r--r--src/gallium/drivers/freedreno/freedreno_util.h1
-rw-r--r--src/gallium/drivers/freedreno/ir3/ir3.h12
-rw-r--r--src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c12
-rw-r--r--src/gallium/drivers/freedreno/ir3/ir3_cp.c31
-rw-r--r--src/gallium/drivers/freedreno/ir3/ir3_legalize.c8
-rw-r--r--src/gallium/drivers/freedreno/ir3/ir3_nir.c3
-rw-r--r--src/gallium/drivers/i915/i915_query.c6
-rw-r--r--src/gallium/drivers/i915/i915_screen.c1
-rw-r--r--src/gallium/drivers/ilo/ilo_query.c6
-rw-r--r--src/gallium/drivers/ilo/ilo_screen.c1
-rw-r--r--src/gallium/drivers/llvmpipe/lp_query.c6
-rw-r--r--src/gallium/drivers/llvmpipe/lp_screen.c1
-rw-r--r--src/gallium/drivers/llvmpipe/lp_test_arit.c3
-rw-r--r--src/gallium/drivers/noop/noop_pipe.c6
-rw-r--r--src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp4
-rw-r--r--src/gallium/drivers/nouveau/nouveau_buffer.c33
-rw-r--r--src/gallium/drivers/nouveau/nouveau_buffer.h4
-rw-r--r--src/gallium/drivers/nouveau/nv30/nv30_query.c6
-rw-r--r--src/gallium/drivers/nouveau/nv30/nv30_screen.c2
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_context.c25
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_query.c6
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_resource.c8
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_resource.h3
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_screen.c5
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_context.c25
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_program.c7
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_query.c6
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_resource.c1
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_screen.c13
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c36
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_surface.c4
-rw-r--r--src/gallium/drivers/r300/r300_query.c6
-rw-r--r--src/gallium/drivers/r300/r300_screen.c1
-rw-r--r--src/gallium/drivers/r300/r300_texture_desc.c3
-rw-r--r--src/gallium/drivers/r600/evergreen_state.c108
-rw-r--r--src/gallium/drivers/r600/evergreend.h16
-rw-r--r--src/gallium/drivers/r600/r600_blit.c9
-rw-r--r--src/gallium/drivers/r600/r600_hw_context.c25
-rw-r--r--src/gallium/drivers/r600/r600_pipe.c1
-rw-r--r--src/gallium/drivers/r600/r600_pipe.h24
-rw-r--r--src/gallium/drivers/r600/r600_state.c103
-rw-r--r--src/gallium/drivers/r600/r600_state_common.c106
-rw-r--r--src/gallium/drivers/r600/r600d.h11
-rw-r--r--src/gallium/drivers/radeon/Makefile.sources1
-rw-r--r--src/gallium/drivers/radeon/r600_buffer_common.c15
-rw-r--r--src/gallium/drivers/radeon/r600_perfcounter.c1
-rw-r--r--src/gallium/drivers/radeon/r600_pipe_common.c28
-rw-r--r--src/gallium/drivers/radeon/r600_pipe_common.h117
-rw-r--r--src/gallium/drivers/radeon/r600_query.c69
-rw-r--r--src/gallium/drivers/radeon/r600_query.h3
-rw-r--r--src/gallium/drivers/radeon/r600_streamout.c6
-rw-r--r--src/gallium/drivers/radeon/r600_texture.c29
-rw-r--r--src/gallium/drivers/radeon/r600_viewport.c350
-rw-r--r--src/gallium/drivers/radeon/r600d_common.h21
-rw-r--r--src/gallium/drivers/radeon/radeon_uvd.c2
-rw-r--r--src/gallium/drivers/radeon/radeon_winsys.h5
-rw-r--r--src/gallium/drivers/radeonsi/si_blit.c7
-rw-r--r--src/gallium/drivers/radeonsi/si_cp_dma.c6
-rw-r--r--src/gallium/drivers/radeonsi/si_descriptors.c108
-rw-r--r--src/gallium/drivers/radeonsi/si_hw_context.c11
-rw-r--r--src/gallium/drivers/radeonsi/si_pipe.c11
-rw-r--r--src/gallium/drivers/radeonsi/si_pipe.h17
-rw-r--r--src/gallium/drivers/radeonsi/si_shader.c381
-rw-r--r--src/gallium/drivers/radeonsi/si_shader.h114
-rw-r--r--src/gallium/drivers/radeonsi/si_state.c257
-rw-r--r--src/gallium/drivers/radeonsi/si_state.h2
-rw-r--r--src/gallium/drivers/radeonsi/si_state_draw.c13
-rw-r--r--src/gallium/drivers/radeonsi/si_state_shaders.c25
-rw-r--r--src/gallium/drivers/radeonsi/sid.h8
-rw-r--r--src/gallium/drivers/rbug/rbug_context.c12
-rw-r--r--src/gallium/drivers/softpipe/Makefile.sources2
-rw-r--r--src/gallium/drivers/softpipe/sp_buffer.c371
-rw-r--r--src/gallium/drivers/softpipe/sp_buffer.h37
-rw-r--r--src/gallium/drivers/softpipe/sp_context.c17
-rw-r--r--src/gallium/drivers/softpipe/sp_context.h2
-rw-r--r--src/gallium/drivers/softpipe/sp_fs_exec.c7
-rw-r--r--src/gallium/drivers/softpipe/sp_image.c8
-rw-r--r--src/gallium/drivers/softpipe/sp_query.c7
-rw-r--r--src/gallium/drivers/softpipe/sp_screen.c7
-rw-r--r--src/gallium/drivers/softpipe/sp_state.h4
-rw-r--r--src/gallium/drivers/softpipe/sp_state_derived.c3
-rw-r--r--src/gallium/drivers/softpipe/sp_state_image.c28
-rw-r--r--src/gallium/drivers/softpipe/sp_tile_cache.c4
-rw-r--r--src/gallium/drivers/svga/svga_pipe_depthstencil.c6
-rw-r--r--src/gallium/drivers/svga/svga_pipe_query.c7
-rw-r--r--src/gallium/drivers/svga/svga_screen.c1
-rw-r--r--src/gallium/drivers/svga/svga_state_rss.c4
-rw-r--r--src/gallium/drivers/svga/svga_state_tss.c4
-rw-r--r--src/gallium/drivers/swr/rasterizer/common/os.h10
-rw-r--r--src/gallium/drivers/swr/rasterizer/core/api.cpp100
-rw-r--r--src/gallium/drivers/swr/rasterizer/core/arena.h232
-rw-r--r--src/gallium/drivers/swr/rasterizer/core/backend.cpp39
-rw-r--r--src/gallium/drivers/swr/rasterizer/core/backend.h2
-rw-r--r--src/gallium/drivers/swr/rasterizer/core/clip.cpp4
-rw-r--r--src/gallium/drivers/swr/rasterizer/core/clip.h4
-rw-r--r--src/gallium/drivers/swr/rasterizer/core/context.h47
-rw-r--r--src/gallium/drivers/swr/rasterizer/core/depthstencil.h40
-rw-r--r--src/gallium/drivers/swr/rasterizer/core/frontend.cpp10
-rw-r--r--src/gallium/drivers/swr/rasterizer/core/knobs.h3
-rw-r--r--src/gallium/drivers/swr/rasterizer/core/pa.h14
-rw-r--r--src/gallium/drivers/swr/rasterizer/core/rasterizer.cpp16
-rw-r--r--src/gallium/drivers/swr/rasterizer/core/state.h1
-rw-r--r--src/gallium/drivers/swr/rasterizer/core/threads.cpp141
-rw-r--r--src/gallium/drivers/swr/rasterizer/core/threads.h4
-rw-r--r--src/gallium/drivers/swr/rasterizer/core/tilemgr.cpp22
-rw-r--r--src/gallium/drivers/swr/rasterizer/core/tilemgr.h8
-rw-r--r--src/gallium/drivers/swr/rasterizer/scripts/knob_defs.py19
-rw-r--r--src/gallium/drivers/swr/swr_query.cpp7
-rw-r--r--src/gallium/drivers/swr/swr_screen.cpp5
-rw-r--r--src/gallium/drivers/swr/swr_shader.cpp102
-rw-r--r--src/gallium/drivers/swr/swr_shader.h42
-rw-r--r--src/gallium/drivers/swr/swr_state.cpp153
-rw-r--r--src/gallium/drivers/swr/swr_state.h6
-rw-r--r--src/gallium/drivers/swr/swr_tex_sample.cpp33
-rw-r--r--src/gallium/drivers/swr/swr_tex_sample.h2
-rw-r--r--src/gallium/drivers/trace/tr_context.c19
-rw-r--r--src/gallium/drivers/vc4/vc4_draw.c110
-rw-r--r--src/gallium/drivers/vc4/vc4_nir_lower_blend.c4
-rw-r--r--src/gallium/drivers/vc4/vc4_nir_lower_io.c30
-rw-r--r--src/gallium/drivers/vc4/vc4_nir_lower_txf_ms.c4
-rw-r--r--src/gallium/drivers/vc4/vc4_opt_vpm.c4
-rw-r--r--src/gallium/drivers/vc4/vc4_program.c124
-rw-r--r--src/gallium/drivers/vc4/vc4_qir.c74
-rw-r--r--src/gallium/drivers/vc4/vc4_qir.h40
-rw-r--r--src/gallium/drivers/vc4/vc4_qir_lower_uniforms.c2
-rw-r--r--src/gallium/drivers/vc4/vc4_qir_schedule.c34
-rw-r--r--src/gallium/drivers/vc4/vc4_qpu_emit.c100
-rw-r--r--src/gallium/drivers/vc4/vc4_query.c6
-rw-r--r--src/gallium/drivers/vc4/vc4_screen.c1
-rw-r--r--src/gallium/drivers/virgl/virgl_query.c6
-rw-r--r--src/gallium/drivers/virgl/virgl_screen.c1
-rw-r--r--src/gallium/include/pipe/p_context.h6
-rw-r--r--src/gallium/include/pipe/p_defines.h1
-rw-r--r--src/gallium/state_trackers/clover/llvm/invocation.cpp14
-rw-r--r--src/gallium/state_trackers/va/context.c4
-rw-r--r--src/gallium/state_trackers/va/image.c4
-rw-r--r--src/gallium/targets/libgl-xlib/SConscript12
-rw-r--r--src/gallium/winsys/amdgpu/drm/amdgpu_bo.c39
-rw-r--r--src/gallium/winsys/amdgpu/drm/amdgpu_surface.c2
-rw-r--r--src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c4
-rw-r--r--src/gallium/winsys/radeon/drm/radeon_drm_bo.c21
-rw-r--r--src/gallium/winsys/radeon/drm/radeon_drm_winsys.c24
-rw-r--r--src/gallium/winsys/svga/drm/pb_buffer_simple_fenced.c5
-rw-r--r--src/gallium/winsys/svga/drm/vmw_buffer.c4
179 files changed, 3606 insertions, 1543 deletions
diff --git a/src/gallium/auxiliary/Makefile.am b/src/gallium/auxiliary/Makefile.am
index 82c2869b99b..296ed59317b 100644
--- a/src/gallium/auxiliary/Makefile.am
+++ b/src/gallium/auxiliary/Makefile.am
@@ -1,10 +1,11 @@
include Makefile.sources
include $(top_srcdir)/src/gallium/Automake.inc
-noinst_LTLIBRARIES = libgallium_nir.la
+noinst_LTLIBRARIES = libgallium.la
AM_CFLAGS = \
-I$(top_srcdir)/src/loader \
+ -I$(top_builddir)/src/compiler/nir \
-I$(top_srcdir)/src/gallium/auxiliary/util \
$(GALLIUM_CFLAGS) \
$(VISIBILITY_CFLAGS) \
@@ -14,24 +15,11 @@ AM_CXXFLAGS = \
$(VISIBILITY_CXXFLAGS) \
$(MSVC2013_COMPAT_CXXFLAGS)
-libgallium_nir_la_SOURCES = \
- $(NIR_SOURCES)
-
-libgallium_nir_la_CFLAGS = \
- -I$(top_builddir)/src/compiler/nir \
- $(GALLIUM_CFLAGS) \
- $(VISIBILITY_CFLAGS) \
- $(MSVC2013_COMPAT_CFLAGS)
-
-noinst_LTLIBRARIES += libgallium.la
-
libgallium_la_SOURCES = \
$(C_SOURCES) \
+ $(NIR_SOURCES) \
$(GENERATED_SOURCES)
-libgallium_la_LIBADD = \
- libgallium_nir.la
-
if HAVE_MESA_LLVM
AM_CFLAGS += \
diff --git a/src/gallium/auxiliary/cso_cache/cso_context.c b/src/gallium/auxiliary/cso_cache/cso_context.c
index 790e1211898..4e0cbdd8f9a 100644
--- a/src/gallium/auxiliary/cso_cache/cso_context.c
+++ b/src/gallium/auxiliary/cso_cache/cso_context.c
@@ -1539,6 +1539,8 @@ cso_save_state(struct cso_context *cso, unsigned state_mask)
cso_save_vertex_shader(cso);
if (state_mask & CSO_BIT_VIEWPORT)
cso_save_viewport(cso);
+ if (state_mask & CSO_BIT_PAUSE_QUERIES)
+ cso->pipe->set_active_query_state(cso->pipe, false);
}
@@ -1590,6 +1592,8 @@ cso_restore_state(struct cso_context *cso)
cso_restore_vertex_shader(cso);
if (state_mask & CSO_BIT_VIEWPORT)
cso_restore_viewport(cso);
+ if (state_mask & CSO_BIT_PAUSE_QUERIES)
+ cso->pipe->set_active_query_state(cso->pipe, true);
cso->saved_state = 0;
}
diff --git a/src/gallium/auxiliary/cso_cache/cso_context.h b/src/gallium/auxiliary/cso_cache/cso_context.h
index a3563d83a02..e27cbe9f721 100644
--- a/src/gallium/auxiliary/cso_cache/cso_context.h
+++ b/src/gallium/auxiliary/cso_cache/cso_context.h
@@ -170,6 +170,7 @@ void cso_set_render_condition(struct cso_context *cso,
#define CSO_BIT_VERTEX_ELEMENTS 0x10000
#define CSO_BIT_VERTEX_SHADER 0x20000
#define CSO_BIT_VIEWPORT 0x40000
+#define CSO_BIT_PAUSE_QUERIES 0x80000
#define CSO_BITS_ALL_SHADERS (CSO_BIT_VERTEX_SHADER | \
CSO_BIT_FRAGMENT_SHADER | \
diff --git a/src/gallium/auxiliary/draw/draw_context.c b/src/gallium/auxiliary/draw/draw_context.c
index 2ba9b099664..75551fbe2dd 100644
--- a/src/gallium/auxiliary/draw/draw_context.c
+++ b/src/gallium/auxiliary/draw/draw_context.c
@@ -749,7 +749,23 @@ draw_image(struct draw_context *draw,
}
}
-
+/**
+ * Provide TGSI buffer objects for vertex/geometry shaders that use
+ * load/store/atomic ops. This state only needs to be set once per context.
+ * This might only be used by software drivers for the time being.
+ */
+void
+draw_buffer(struct draw_context *draw,
+ uint shader,
+ struct tgsi_buffer *buffer)
+{
+ if (shader == PIPE_SHADER_VERTEX) {
+ draw->vs.tgsi.buffer = buffer;
+ } else {
+ debug_assert(shader == PIPE_SHADER_GEOMETRY);
+ draw->gs.tgsi.buffer = buffer;
+ }
+}
void draw_set_render( struct draw_context *draw,
diff --git a/src/gallium/auxiliary/draw/draw_context.h b/src/gallium/auxiliary/draw/draw_context.h
index 5d9870b115c..3e6722fcb7e 100644
--- a/src/gallium/auxiliary/draw/draw_context.h
+++ b/src/gallium/auxiliary/draw/draw_context.h
@@ -49,6 +49,7 @@ struct draw_geometry_shader;
struct draw_fragment_shader;
struct tgsi_sampler;
struct tgsi_image;
+struct tgsi_buffer;
/*
* structure to contain driver internal information
@@ -161,6 +162,11 @@ draw_image(struct draw_context *draw,
struct tgsi_image *image);
void
+draw_buffer(struct draw_context *draw,
+ uint shader_type,
+ struct tgsi_buffer *buffer);
+
+void
draw_set_sampler_views(struct draw_context *draw,
unsigned shader_stage,
struct pipe_sampler_view **views,
diff --git a/src/gallium/auxiliary/draw/draw_gs.c b/src/gallium/auxiliary/draw/draw_gs.c
index 14db2d6f39d..ef217fa5ceb 100644
--- a/src/gallium/auxiliary/draw/draw_gs.c
+++ b/src/gallium/auxiliary/draw/draw_gs.c
@@ -681,7 +681,9 @@ void draw_geometry_shader_prepare(struct draw_geometry_shader *shader,
if (!use_llvm && shader && shader->machine->Tokens != shader->state.tokens) {
tgsi_exec_machine_bind_shader(shader->machine,
shader->state.tokens,
- draw->gs.tgsi.sampler, draw->gs.tgsi.image);
+ draw->gs.tgsi.sampler,
+ draw->gs.tgsi.image,
+ draw->gs.tgsi.buffer);
}
}
diff --git a/src/gallium/auxiliary/draw/draw_private.h b/src/gallium/auxiliary/draw/draw_private.h
index 211bd6f7e70..a18f6632124 100644
--- a/src/gallium/auxiliary/draw/draw_private.h
+++ b/src/gallium/auxiliary/draw/draw_private.h
@@ -67,6 +67,7 @@ struct vbuf_render;
struct tgsi_exec_machine;
struct tgsi_sampler;
struct tgsi_image;
+struct tgsi_buffer;
struct draw_pt_front_end;
struct draw_assembler;
struct draw_llvm;
@@ -269,6 +270,7 @@ struct draw_context
struct tgsi_sampler *sampler;
struct tgsi_image *image;
+ struct tgsi_buffer *buffer;
} tgsi;
struct translate *fetch;
@@ -289,6 +291,7 @@ struct draw_context
struct tgsi_sampler *sampler;
struct tgsi_image *image;
+ struct tgsi_buffer *buffer;
} tgsi;
} gs;
diff --git a/src/gallium/auxiliary/draw/draw_vs_exec.c b/src/gallium/auxiliary/draw/draw_vs_exec.c
index 5b53cff29f0..da0d1a7f9a8 100644
--- a/src/gallium/auxiliary/draw/draw_vs_exec.c
+++ b/src/gallium/auxiliary/draw/draw_vs_exec.c
@@ -70,7 +70,9 @@ vs_exec_prepare( struct draw_vertex_shader *shader,
if (evs->machine->Tokens != shader->state.tokens) {
tgsi_exec_machine_bind_shader(evs->machine,
shader->state.tokens,
- draw->vs.tgsi.sampler, draw->vs.tgsi.image);
+ draw->vs.tgsi.sampler,
+ draw->vs.tgsi.image,
+ draw->vs.tgsi.buffer);
}
}
@@ -159,6 +161,7 @@ vs_exec_run_linear( struct draw_vertex_shader *shader,
input = (const float (*)[4])((const char *)input + input_stride);
}
+ machine->NonHelperMask = (1 << max_vertices) - 1;
/* run interpreter */
tgsi_exec_machine_run( machine );
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_arit.c b/src/gallium/auxiliary/gallivm/lp_bld_arit.c
index 0c43617d531..beff4143fd8 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_arit.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_arit.c
@@ -1492,9 +1492,20 @@ lp_build_abs(struct lp_build_context *bld,
return a;
if(type.floating) {
- char intrinsic[32];
- lp_format_intrinsic(intrinsic, sizeof intrinsic, "llvm.fabs", vec_type);
- return lp_build_intrinsic_unary(builder, intrinsic, vec_type, a);
+ if (0x0306 <= HAVE_LLVM && HAVE_LLVM < 0x0309) {
+ /* Workaround llvm.org/PR27332 */
+ LLVMTypeRef int_vec_type = lp_build_int_vec_type(bld->gallivm, type);
+ unsigned long long absMask = ~(1ULL << (type.width - 1));
+ LLVMValueRef mask = lp_build_const_int_vec(bld->gallivm, type, ((unsigned long long) absMask));
+ a = LLVMBuildBitCast(builder, a, int_vec_type, "");
+ a = LLVMBuildAnd(builder, a, mask, "");
+ a = LLVMBuildBitCast(builder, a, vec_type, "");
+ return a;
+ } else {
+ char intrinsic[32];
+ lp_format_intrinsic(intrinsic, sizeof intrinsic, "llvm.fabs", vec_type);
+ return lp_build_intrinsic_unary(builder, intrinsic, vec_type, a);
+ }
}
if(type.width*type.length == 128 && util_cpu_caps.has_ssse3) {
@@ -1663,99 +1674,6 @@ enum lp_build_round_mode
LP_BUILD_ROUND_TRUNCATE = 3
};
-/**
- * Helper for SSE4.1's ROUNDxx instructions.
- *
- * NOTE: In the SSE4.1's nearest mode, if two values are equally close, the
- * result is the even value. That is, rounding 2.5 will be 2.0, and not 3.0.
- */
-static inline LLVMValueRef
-lp_build_nearest_sse41(struct lp_build_context *bld,
- LLVMValueRef a)
-{
- LLVMBuilderRef builder = bld->gallivm->builder;
- const struct lp_type type = bld->type;
- LLVMTypeRef i32t = LLVMInt32TypeInContext(bld->gallivm->context);
- LLVMValueRef mode = LLVMConstNull(i32t);
- const char *intrinsic;
- LLVMValueRef res;
-
- assert(type.floating);
-
- assert(lp_check_value(type, a));
- assert(util_cpu_caps.has_sse4_1);
-
- if (type.length == 1) {
- LLVMTypeRef vec_type;
- LLVMValueRef undef;
- LLVMValueRef args[3];
- LLVMValueRef index0 = LLVMConstInt(i32t, 0, 0);
-
- switch(type.width) {
- case 32:
- intrinsic = "llvm.x86.sse41.round.ss";
- break;
- case 64:
- intrinsic = "llvm.x86.sse41.round.sd";
- break;
- default:
- assert(0);
- return bld->undef;
- }
-
- vec_type = LLVMVectorType(bld->elem_type, 4);
-
- undef = LLVMGetUndef(vec_type);
-
- args[0] = undef;
- args[1] = LLVMBuildInsertElement(builder, undef, a, index0, "");
- args[2] = mode;
-
- res = lp_build_intrinsic(builder, intrinsic,
- vec_type, args, Elements(args), 0);
-
- res = LLVMBuildExtractElement(builder, res, index0, "");
- }
- else {
- if (type.width * type.length == 128) {
- switch(type.width) {
- case 32:
- intrinsic = "llvm.x86.sse41.round.ps";
- break;
- case 64:
- intrinsic = "llvm.x86.sse41.round.pd";
- break;
- default:
- assert(0);
- return bld->undef;
- }
- }
- else {
- assert(type.width * type.length == 256);
- assert(util_cpu_caps.has_avx);
-
- switch(type.width) {
- case 32:
- intrinsic = "llvm.x86.avx.round.ps.256";
- break;
- case 64:
- intrinsic = "llvm.x86.avx.round.pd.256";
- break;
- default:
- assert(0);
- return bld->undef;
- }
- }
-
- res = lp_build_intrinsic_binary(builder, intrinsic,
- bld->vec_type, a,
- mode);
- }
-
- return res;
-}
-
-
static inline LLVMValueRef
lp_build_iround_nearest_sse2(struct lp_build_context *bld,
LLVMValueRef a)
@@ -1863,11 +1781,7 @@ lp_build_round_arch(struct lp_build_context *bld,
switch (mode) {
case LP_BUILD_ROUND_NEAREST:
- if (HAVE_LLVM >= 0x0304) {
- intrinsic_root = "llvm.round";
- } else {
- return lp_build_nearest_sse41(bld, a);
- }
+ intrinsic_root = "llvm.nearbyint";
break;
case LP_BUILD_ROUND_FLOOR:
intrinsic_root = "llvm.floor";
diff --git a/src/gallium/auxiliary/hud/hud_context.c b/src/gallium/auxiliary/hud/hud_context.c
index 4673458171e..40017c8614f 100644
--- a/src/gallium/auxiliary/hud/hud_context.c
+++ b/src/gallium/auxiliary/hud/hud_context.c
@@ -477,6 +477,7 @@ hud_draw(struct hud_context *hud, struct pipe_resource *tex)
CSO_BIT_VERTEX_SHADER |
CSO_BIT_VERTEX_ELEMENTS |
CSO_BIT_AUX_VERTEX_BUFFER_SLOT |
+ CSO_BIT_PAUSE_QUERIES |
CSO_BIT_RENDER_CONDITION));
cso_save_constant_buffer_slot0(cso, PIPE_SHADER_VERTEX);
diff --git a/src/gallium/auxiliary/nir/tgsi_to_nir.c b/src/gallium/auxiliary/nir/tgsi_to_nir.c
index 7ec8b662200..d76b6d900ce 100644
--- a/src/gallium/auxiliary/nir/tgsi_to_nir.c
+++ b/src/gallium/auxiliary/nir/tgsi_to_nir.c
@@ -454,7 +454,7 @@ ttn_emit_immediate(struct ttn_compile *c)
nir_load_const_instr *load_const;
int i;
- load_const = nir_load_const_instr_create(b->shader, 4);
+ load_const = nir_load_const_instr_create(b->shader, 4, 32);
c->imm_defs[c->next_imm] = &load_const->def;
c->next_imm++;
diff --git a/src/gallium/auxiliary/pipebuffer/pb_buffer.h b/src/gallium/auxiliary/pipebuffer/pb_buffer.h
index 803c1d39192..33c23068c27 100644
--- a/src/gallium/auxiliary/pipebuffer/pb_buffer.h
+++ b/src/gallium/auxiliary/pipebuffer/pb_buffer.h
@@ -87,9 +87,9 @@ struct pb_desc
/**
- * Size. Regular (32bit) unsigned for now.
+ * 64-bit type for GPU buffer sizes and offsets.
*/
-typedef unsigned pb_size;
+typedef uint64_t pb_size;
/**
@@ -98,8 +98,8 @@ typedef unsigned pb_size;
struct pb_buffer
{
struct pipe_reference reference;
- unsigned size;
unsigned alignment;
+ pb_size size;
unsigned usage;
/**
diff --git a/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c b/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c
index fbbe8d11eb0..64af321558e 100644
--- a/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c
+++ b/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c
@@ -40,6 +40,7 @@
#include <unistd.h>
#include <sched.h>
#endif
+#include <inttypes.h>
#include "pipe/p_compiler.h"
#include "pipe/p_defines.h"
@@ -208,7 +209,7 @@ fenced_manager_dump_locked(struct fenced_manager *fenced_mgr)
while (curr != &fenced_mgr->unfenced) {
fenced_buf = LIST_ENTRY(struct fenced_buffer, curr, head);
assert(!fenced_buf->fence);
- debug_printf("%10p %7u %8u %7s\n",
+ debug_printf("%10p %"PRIu64" %8u %7s\n",
(void *) fenced_buf,
fenced_buf->base.size,
p_atomic_read(&fenced_buf->base.reference.count),
@@ -224,7 +225,7 @@ fenced_manager_dump_locked(struct fenced_manager *fenced_mgr)
fenced_buf = LIST_ENTRY(struct fenced_buffer, curr, head);
assert(fenced_buf->buffer);
signaled = ops->fence_signalled(ops, fenced_buf->fence, 0);
- debug_printf("%10p %7u %8u %7s %10p %s\n",
+ debug_printf("%10p %"PRIu64" %8u %7s %10p %s\n",
(void *) fenced_buf,
fenced_buf->base.size,
p_atomic_read(&fenced_buf->base.reference.count),
diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_debug.c b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_debug.c
index 3d3a7aba7fb..4e36866e08c 100644
--- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_debug.c
+++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_debug.c
@@ -41,6 +41,7 @@
#include "util/list.h"
#include "util/u_time.h"
#include "util/u_debug_stack.h"
+#include <inttypes.h>
#include "pb_buffer.h"
#include "pb_bufmgr.h"
@@ -190,7 +191,7 @@ pb_debug_buffer_check(struct pb_debug_buffer *buf)
underflow = !check_random_pattern(map, buf->underflow_size,
&min_ofs, &max_ofs);
if(underflow) {
- debug_printf("buffer underflow (offset -%u%s to -%u bytes) detected\n",
+ debug_printf("buffer underflow (offset -%"PRIu64"%s to -%"PRIu64" bytes) detected\n",
buf->underflow_size - min_ofs,
min_ofs == 0 ? "+" : "",
buf->underflow_size - max_ofs);
@@ -200,7 +201,7 @@ pb_debug_buffer_check(struct pb_debug_buffer *buf)
buf->overflow_size,
&min_ofs, &max_ofs);
if(overflow) {
- debug_printf("buffer overflow (size %u plus offset %u to %u%s bytes) detected\n",
+ debug_printf("buffer overflow (size %"PRIu64" plus offset %"PRIu64" to %"PRIu64"%s bytes) detected\n",
buf->base.size,
min_ofs,
max_ofs,
@@ -349,7 +350,7 @@ pb_debug_manager_dump_locked(struct pb_debug_manager *mgr)
buf = LIST_ENTRY(struct pb_debug_buffer, curr, head);
debug_printf("buffer = %p\n", (void *) buf);
- debug_printf(" .size = 0x%x\n", buf->base.size);
+ debug_printf(" .size = 0x%"PRIx64"\n", buf->base.size);
debug_backtrace_dump(buf->create_backtrace, PB_DEBUG_CREATE_BACKTRACE);
curr = next;
diff --git a/src/gallium/auxiliary/postprocess/pp_run.c b/src/gallium/auxiliary/postprocess/pp_run.c
index 9dc8fb51ae2..bc79c5aab6e 100644
--- a/src/gallium/auxiliary/postprocess/pp_run.c
+++ b/src/gallium/auxiliary/postprocess/pp_run.c
@@ -133,6 +133,7 @@ pp_run(struct pp_queue_t *ppq, struct pipe_resource *in,
CSO_BIT_VERTEX_SHADER |
CSO_BIT_VIEWPORT |
CSO_BIT_AUX_VERTEX_BUFFER_SLOT |
+ CSO_BIT_PAUSE_QUERIES |
CSO_BIT_RENDER_CONDITION));
cso_save_constant_buffer_slot0(cso, PIPE_SHADER_VERTEX);
cso_save_constant_buffer_slot0(cso, PIPE_SHADER_FRAGMENT);
diff --git a/src/gallium/auxiliary/rtasm/rtasm_x86sse.c b/src/gallium/auxiliary/rtasm/rtasm_x86sse.c
index 27ee8f1242a..3e7d699627a 100644
--- a/src/gallium/auxiliary/rtasm/rtasm_x86sse.c
+++ b/src/gallium/auxiliary/rtasm/rtasm_x86sse.c
@@ -2203,7 +2203,7 @@ voidptr_to_x86_func(void *v)
void *v;
x86_func f;
} u;
- assert(sizeof(u.v) == sizeof(u.f));
+ STATIC_ASSERT(sizeof(u.v) == sizeof(u.f));
u.v = v;
return u.f;
}
diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c
index a595bbbc6d3..41dd0f0466a 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_exec.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c
@@ -854,7 +854,8 @@ tgsi_exec_machine_bind_shader(
struct tgsi_exec_machine *mach,
const struct tgsi_token *tokens,
struct tgsi_sampler *sampler,
- struct tgsi_image *image)
+ struct tgsi_image *image,
+ struct tgsi_buffer *buffer)
{
uint k;
struct tgsi_parse_context parse;
@@ -873,6 +874,7 @@ tgsi_exec_machine_bind_shader(
mach->Tokens = tokens;
mach->Sampler = sampler;
mach->Image = image;
+ mach->Buffer = buffer;
if (!tokens) {
/* unbind and free all */
@@ -3758,8 +3760,8 @@ get_image_coord_sample(unsigned tgsi_tex)
}
static void
-exec_load(struct tgsi_exec_machine *mach,
- const struct tgsi_full_instruction *inst)
+exec_load_img(struct tgsi_exec_machine *mach,
+ const struct tgsi_full_instruction *inst)
{
union tgsi_exec_channel r[4], sample_r;
uint unit;
@@ -3805,8 +3807,51 @@ exec_load(struct tgsi_exec_machine *mach,
}
static void
-exec_store(struct tgsi_exec_machine *mach,
- const struct tgsi_full_instruction *inst)
+exec_load_buf(struct tgsi_exec_machine *mach,
+ const struct tgsi_full_instruction *inst)
+{
+ union tgsi_exec_channel r[4];
+ uint unit;
+ int j;
+ uint chan;
+ float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE];
+ struct tgsi_buffer_params params;
+ int kilmask = mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0];
+
+ unit = fetch_sampler_unit(mach, inst, 0);
+
+ params.execmask = mach->ExecMask & mach->NonHelperMask & ~kilmask;
+ params.unit = unit;
+ IFETCH(&r[0], 1, TGSI_CHAN_X);
+
+ mach->Buffer->load(mach->Buffer, &params,
+ r[0].i, rgba);
+ for (j = 0; j < TGSI_QUAD_SIZE; j++) {
+ r[0].f[j] = rgba[0][j];
+ r[1].f[j] = rgba[1][j];
+ r[2].f[j] = rgba[2][j];
+ r[3].f[j] = rgba[3][j];
+ }
+ for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
+ if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
+ store_dest(mach, &r[chan], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT);
+ }
+ }
+}
+
+static void
+exec_load(struct tgsi_exec_machine *mach,
+ const struct tgsi_full_instruction *inst)
+{
+ if (inst->Src[0].Register.File == TGSI_FILE_IMAGE)
+ exec_load_img(mach, inst);
+ else
+ exec_load_buf(mach, inst);
+}
+
+static void
+exec_store_img(struct tgsi_exec_machine *mach,
+ const struct tgsi_full_instruction *inst)
{
union tgsi_exec_channel r[3], sample_r;
union tgsi_exec_channel value[4];
@@ -3850,8 +3895,53 @@ exec_store(struct tgsi_exec_machine *mach,
}
static void
-exec_atomop(struct tgsi_exec_machine *mach,
- const struct tgsi_full_instruction *inst)
+exec_store_buf(struct tgsi_exec_machine *mach,
+ const struct tgsi_full_instruction *inst)
+{
+ union tgsi_exec_channel r[3];
+ union tgsi_exec_channel value[4];
+ float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE];
+ struct tgsi_buffer_params params;
+ int i, j;
+ uint unit;
+ int kilmask = mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0];
+
+ unit = inst->Dst[0].Register.Index;
+
+ params.execmask = mach->ExecMask & mach->NonHelperMask & ~kilmask;
+ params.unit = unit;
+ params.writemask = inst->Dst[0].Register.WriteMask;
+
+ IFETCH(&r[0], 0, TGSI_CHAN_X);
+ for (i = 0; i < 4; i++) {
+ FETCH(&value[i], 1, TGSI_CHAN_X + i);
+ }
+
+ for (j = 0; j < TGSI_QUAD_SIZE; j++) {
+ rgba[0][j] = value[0].f[j];
+ rgba[1][j] = value[1].f[j];
+ rgba[2][j] = value[2].f[j];
+ rgba[3][j] = value[3].f[j];
+ }
+
+ mach->Buffer->store(mach->Buffer, &params,
+ r[0].i,
+ rgba);
+}
+
+static void
+exec_store(struct tgsi_exec_machine *mach,
+ const struct tgsi_full_instruction *inst)
+{
+ if (inst->Dst[0].Register.File == TGSI_FILE_IMAGE)
+ exec_store_img(mach, inst);
+ else
+ exec_store_buf(mach, inst);
+}
+
+static void
+exec_atomop_img(struct tgsi_exec_machine *mach,
+ const struct tgsi_full_instruction *inst)
{
union tgsi_exec_channel r[4], sample_r;
union tgsi_exec_channel value[4], value2[4];
@@ -3918,8 +4008,77 @@ exec_atomop(struct tgsi_exec_machine *mach,
}
static void
-exec_resq(struct tgsi_exec_machine *mach,
- const struct tgsi_full_instruction *inst)
+exec_atomop_buf(struct tgsi_exec_machine *mach,
+ const struct tgsi_full_instruction *inst)
+{
+ union tgsi_exec_channel r[4];
+ union tgsi_exec_channel value[4], value2[4];
+ float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE];
+ float rgba2[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE];
+ struct tgsi_buffer_params params;
+ int i, j;
+ uint unit, chan;
+ int kilmask = mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0];
+
+ unit = fetch_sampler_unit(mach, inst, 0);
+
+ params.execmask = mach->ExecMask & mach->NonHelperMask & ~kilmask;
+ params.unit = unit;
+ params.writemask = inst->Dst[0].Register.WriteMask;
+
+ IFETCH(&r[0], 1, TGSI_CHAN_X);
+
+ for (i = 0; i < 4; i++) {
+ FETCH(&value[i], 2, TGSI_CHAN_X + i);
+ if (inst->Instruction.Opcode == TGSI_OPCODE_ATOMCAS)
+ FETCH(&value2[i], 3, TGSI_CHAN_X + i);
+ }
+
+ for (j = 0; j < TGSI_QUAD_SIZE; j++) {
+ rgba[0][j] = value[0].f[j];
+ rgba[1][j] = value[1].f[j];
+ rgba[2][j] = value[2].f[j];
+ rgba[3][j] = value[3].f[j];
+ }
+ if (inst->Instruction.Opcode == TGSI_OPCODE_ATOMCAS) {
+ for (j = 0; j < TGSI_QUAD_SIZE; j++) {
+ rgba2[0][j] = value2[0].f[j];
+ rgba2[1][j] = value2[1].f[j];
+ rgba2[2][j] = value2[2].f[j];
+ rgba2[3][j] = value2[3].f[j];
+ }
+ }
+
+ mach->Buffer->op(mach->Buffer, &params, inst->Instruction.Opcode,
+ r[0].i,
+ rgba, rgba2);
+
+ for (j = 0; j < TGSI_QUAD_SIZE; j++) {
+ r[0].f[j] = rgba[0][j];
+ r[1].f[j] = rgba[1][j];
+ r[2].f[j] = rgba[2][j];
+ r[3].f[j] = rgba[3][j];
+ }
+ for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
+ if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
+ store_dest(mach, &r[chan], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT);
+ }
+ }
+}
+
+static void
+exec_atomop(struct tgsi_exec_machine *mach,
+ const struct tgsi_full_instruction *inst)
+{
+ if (inst->Src[0].Register.File == TGSI_FILE_IMAGE)
+ exec_atomop_img(mach, inst);
+ else
+ exec_atomop_buf(mach, inst);
+}
+
+static void
+exec_resq_img(struct tgsi_exec_machine *mach,
+ const struct tgsi_full_instruction *inst)
{
int result[4];
union tgsi_exec_channel r[4];
@@ -3952,6 +4111,46 @@ exec_resq(struct tgsi_exec_machine *mach,
}
static void
+exec_resq_buf(struct tgsi_exec_machine *mach,
+ const struct tgsi_full_instruction *inst)
+{
+ int result;
+ union tgsi_exec_channel r[4];
+ uint unit;
+ int i, chan;
+ struct tgsi_buffer_params params;
+ int kilmask = mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0];
+
+ unit = fetch_sampler_unit(mach, inst, 0);
+
+ params.execmask = mach->ExecMask & mach->NonHelperMask & ~kilmask;
+ params.unit = unit;
+
+ mach->Buffer->get_dims(mach->Buffer, &params, &result);
+
+ for (i = 0; i < TGSI_QUAD_SIZE; i++) {
+ r[0].i[i] = result;
+ }
+
+ for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
+ if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
+ store_dest(mach, &r[chan], &inst->Dst[0], inst, chan,
+ TGSI_EXEC_DATA_INT);
+ }
+ }
+}
+
+static void
+exec_resq(struct tgsi_exec_machine *mach,
+ const struct tgsi_full_instruction *inst)
+{
+ if (inst->Src[0].Register.File == TGSI_FILE_IMAGE)
+ exec_resq_img(mach, inst);
+ else
+ exec_resq_buf(mach, inst);
+}
+
+static void
micro_i2f(union tgsi_exec_channel *dst,
const union tgsi_exec_channel *src)
{
diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.h b/src/gallium/auxiliary/tgsi/tgsi_exec.h
index 45fb8d43c88..42fb922baa5 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_exec.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_exec.h
@@ -138,6 +138,36 @@ struct tgsi_image {
int dims[4]);
};
+struct tgsi_buffer_params {
+ unsigned unit;
+ unsigned execmask;
+ unsigned writemask;
+};
+
+struct tgsi_buffer {
+ /* buffer interfaces */
+ void (*load)(const struct tgsi_buffer *buffer,
+ const struct tgsi_buffer_params *params,
+ const int s[TGSI_QUAD_SIZE],
+ float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE]);
+
+ void (*store)(const struct tgsi_buffer *buffer,
+ const struct tgsi_buffer_params *params,
+ const int s[TGSI_QUAD_SIZE],
+ float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE]);
+
+ void (*op)(const struct tgsi_buffer *buffer,
+ const struct tgsi_buffer_params *params,
+ unsigned opcode,
+ const int s[TGSI_QUAD_SIZE],
+ float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE],
+ float rgba2[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE]);
+
+ void (*get_dims)(const struct tgsi_buffer *buffer,
+ const struct tgsi_buffer_params *params,
+ int *dim);
+};
+
/**
* Information for sampling textures, which must be implemented
* by code outside the TGSI executor.
@@ -334,6 +364,7 @@ struct tgsi_exec_machine
struct tgsi_sampler *Sampler;
struct tgsi_image *Image;
+ struct tgsi_buffer *Buffer;
unsigned ImmLimit;
const void *Consts[PIPE_MAX_CONSTANT_BUFFERS];
@@ -424,7 +455,8 @@ tgsi_exec_machine_bind_shader(
struct tgsi_exec_machine *mach,
const struct tgsi_token *tokens,
struct tgsi_sampler *sampler,
- struct tgsi_image *image);
+ struct tgsi_image *image,
+ struct tgsi_buffer *buffer);
uint
tgsi_exec_machine_run(
@@ -496,8 +528,9 @@ tgsi_exec_get_shader_param(enum pipe_shader_cap param)
return 1;
case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED:
case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
- case PIPE_SHADER_CAP_MAX_SHADER_BUFFERS:
return 0;
+ case PIPE_SHADER_CAP_MAX_SHADER_BUFFERS:
+ return PIPE_MAX_SHADER_BUFFERS;
case PIPE_SHADER_CAP_MAX_SHADER_IMAGES:
return PIPE_MAX_SHADER_IMAGES;
diff --git a/src/gallium/auxiliary/tgsi/tgsi_lowering.c b/src/gallium/auxiliary/tgsi/tgsi_lowering.c
index a3b90bdb509..0ffd855793a 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_lowering.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_lowering.c
@@ -1430,7 +1430,7 @@ tgsi_transform_lowering(const struct tgsi_lowering_config *config,
int newlen, numtmp;
/* sanity check in case limit is ever increased: */
- assert((sizeof(config->saturate_s) * 8) >= PIPE_MAX_SAMPLERS);
+ STATIC_ASSERT((sizeof(config->saturate_s) * 8) >= PIPE_MAX_SAMPLERS);
memset(&ctx, 0, sizeof(ctx));
ctx.base.transform_instruction = transform_instr;
diff --git a/src/gallium/auxiliary/tgsi/tgsi_parse.c b/src/gallium/auxiliary/tgsi/tgsi_parse.c
index ae95ebd82a4..16564ddf301 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_parse.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_parse.c
@@ -313,7 +313,7 @@ tgsi_dump_tokens(const struct tgsi_token *tokens)
int nr = tgsi_num_tokens(tokens);
int i;
- assert(sizeof(*tokens) == sizeof(unsigned));
+ STATIC_ASSERT(sizeof(*tokens) == sizeof(unsigned));
debug_printf("const unsigned tokens[%d] = {\n", nr);
for (i = 0; i < nr; i++)
diff --git a/src/gallium/auxiliary/util/u_blit.c b/src/gallium/auxiliary/util/u_blit.c
index 22c40d1382d..3677515423c 100644
--- a/src/gallium/auxiliary/util/u_blit.c
+++ b/src/gallium/auxiliary/util/u_blit.c
@@ -551,6 +551,7 @@ util_blit_pixels_tex(struct blit_state *ctx,
CSO_BIT_STREAM_OUTPUTS |
CSO_BIT_VIEWPORT |
CSO_BIT_FRAMEBUFFER |
+ CSO_BIT_PAUSE_QUERIES |
CSO_BIT_FRAGMENT_SHADER |
CSO_BIT_VERTEX_SHADER |
CSO_BIT_TESSCTRL_SHADER |
diff --git a/src/gallium/auxiliary/util/u_blitter.c b/src/gallium/auxiliary/util/u_blitter.c
index 43fbd8e6452..3ca2c48c4c7 100644
--- a/src/gallium/auxiliary/util/u_blitter.c
+++ b/src/gallium/auxiliary/util/u_blitter.c
@@ -529,6 +529,8 @@ static void blitter_set_running_flag(struct blitter_context_priv *ctx)
__LINE__);
}
ctx->base.running = TRUE;
+
+ ctx->base.pipe->set_active_query_state(ctx->base.pipe, false);
}
static void blitter_unset_running_flag(struct blitter_context_priv *ctx)
@@ -538,6 +540,8 @@ static void blitter_unset_running_flag(struct blitter_context_priv *ctx)
__LINE__);
}
ctx->base.running = FALSE;
+
+ ctx->base.pipe->set_active_query_state(ctx->base.pipe, true);
}
static void blitter_check_saved_vertex_states(struct blitter_context_priv *ctx)
diff --git a/src/gallium/auxiliary/util/u_math.h b/src/gallium/auxiliary/util/u_math.h
index e92f83a8109..b4ac0db3c50 100644
--- a/src/gallium/auxiliary/util/u_math.h
+++ b/src/gallium/auxiliary/util/u_math.h
@@ -792,6 +792,12 @@ align(int value, int alignment)
return (value + alignment - 1) & ~(alignment - 1);
}
+static inline uint64_t
+align64(uint64_t value, unsigned alignment)
+{
+ return (value + alignment - 1) & ~(alignment - 1);
+}
+
/**
* Works like align but on npot alignments.
*/
diff --git a/src/gallium/docs/source/context.rst b/src/gallium/docs/source/context.rst
index 904e1ff04e7..3a45f402cd8 100644
--- a/src/gallium/docs/source/context.rst
+++ b/src/gallium/docs/source/context.rst
@@ -330,6 +330,9 @@ a resource without synchronizing with the CPU. This write will optionally
wait for the query to complete, and will optionally write whether the value
is available instead of the value itself.
+``set_active_query_state`` Set whether all current non-driver queries except
+TIME_ELAPSED are active or paused.
+
The interface currently includes the following types of queries:
``PIPE_QUERY_OCCLUSION_COUNTER`` counts the number of fragments which
diff --git a/src/gallium/docs/source/screen.rst b/src/gallium/docs/source/screen.rst
index 824f580ed44..94510757254 100644
--- a/src/gallium/docs/source/screen.rst
+++ b/src/gallium/docs/source/screen.rst
@@ -331,6 +331,11 @@ The integer capabilities:
primitive on a layer is obtained from ``PIPE_CAP_MAX_TEXTURE_ARRAY_LAYERS``
even though it can be larger than the number of layers supported by either
rendering or textures.
+* ``PIPE_CAP_ROBUST_BUFFER_ACCESS_BEHAVIOR``: Implementation uses bounds
+ checking on resource accesses by shader if the context is created with
+ PIPE_CONTEXT_ROBUST_BUFFER_ACCESS. See the ARB_robust_buffer_access_behavior
+ extension for information on the required behavior for out of bounds accesses
+ and accesses to unbound resources.
.. _pipe_capf:
diff --git a/src/gallium/docs/source/tgsi.rst b/src/gallium/docs/source/tgsi.rst
index ac6052a244a..85c302f0dc3 100644
--- a/src/gallium/docs/source/tgsi.rst
+++ b/src/gallium/docs/source/tgsi.rst
@@ -2710,7 +2710,7 @@ TGSI_SEMANTIC_COLOR
"""""""""""""""""""
For vertex shader outputs or fragment shader inputs/outputs, this
-label indicates that the resister contains an R,G,B,A color.
+label indicates that the register contains an R,G,B,A color.
Several shader inputs/outputs may contain colors so the semantic index
is used to distinguish them. For example, color[0] may be the diffuse
diff --git a/src/gallium/drivers/ddebug/dd_context.c b/src/gallium/drivers/ddebug/dd_context.c
index 9dfaa0af289..72a950a456a 100644
--- a/src/gallium/drivers/ddebug/dd_context.c
+++ b/src/gallium/drivers/ddebug/dd_context.c
@@ -124,6 +124,14 @@ dd_context_get_query_result(struct pipe_context *_pipe,
}
static void
+dd_context_set_active_query_state(struct pipe_context *_pipe, boolean enable)
+{
+ struct pipe_context *pipe = dd_context(_pipe)->pipe;
+
+ pipe->set_active_query_state(pipe, enable);
+}
+
+static void
dd_context_render_condition(struct pipe_context *_pipe,
struct pipe_query *query, boolean condition,
uint mode)
@@ -667,6 +675,7 @@ dd_context_create(struct dd_screen *dscreen, struct pipe_context *pipe)
CTX_INIT(begin_query);
CTX_INIT(end_query);
CTX_INIT(get_query_result);
+ CTX_INIT(set_active_query_state);
CTX_INIT(create_blend_state);
CTX_INIT(bind_blend_state);
CTX_INIT(delete_blend_state);
diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_draw.c b/src/gallium/drivers/freedreno/a4xx/fd4_draw.c
index c34f9441c7b..e874d223187 100644
--- a/src/gallium/drivers/freedreno/a4xx/fd4_draw.c
+++ b/src/gallium/drivers/freedreno/a4xx/fd4_draw.c
@@ -157,7 +157,24 @@ fd4_draw_vbo(struct fd_context *ctx, const struct pipe_draw_info *info)
emit.dirty = dirty;
emit.vp = NULL; /* we changed key so need to refetch vp */
emit.fp = NULL;
+
+ if (ctx->rasterizer->rasterizer_discard) {
+ fd_wfi(ctx, ctx->ring);
+ OUT_PKT3(ctx->ring, CP_REG_RMW, 3);
+ OUT_RING(ctx->ring, REG_A4XX_RB_RENDER_CONTROL);
+ OUT_RING(ctx->ring, ~A4XX_RB_RENDER_CONTROL_DISABLE_COLOR_PIPE);
+ OUT_RING(ctx->ring, A4XX_RB_RENDER_CONTROL_DISABLE_COLOR_PIPE);
+ }
+
draw_impl(ctx, ctx->ring, &emit);
+
+ if (ctx->rasterizer->rasterizer_discard) {
+ fd_wfi(ctx, ctx->ring);
+ OUT_PKT3(ctx->ring, CP_REG_RMW, 3);
+ OUT_RING(ctx->ring, REG_A4XX_RB_RENDER_CONTROL);
+ OUT_RING(ctx->ring, ~A4XX_RB_RENDER_CONTROL_DISABLE_COLOR_PIPE);
+ OUT_RING(ctx->ring, 0);
+ }
}
/* clear operations ignore viewport state, so we need to reset it
diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_query.c b/src/gallium/drivers/freedreno/a4xx/fd4_query.c
index 77e203f6c56..69decbcb251 100644
--- a/src/gallium/drivers/freedreno/a4xx/fd4_query.c
+++ b/src/gallium/drivers/freedreno/a4xx/fd4_query.c
@@ -82,12 +82,7 @@ static uint64_t
count_samples(const struct fd_rb_samp_ctrs *start,
const struct fd_rb_samp_ctrs *end)
{
- uint64_t n = 0;
-
- for (unsigned i = 0; i < 16; i += 4)
- n += end->ctr[i] - start->ctr[i];
-
- return n / 2;
+ return end->ctr[0] - start->ctr[0];
}
static void
diff --git a/src/gallium/drivers/freedreno/freedreno_context.h b/src/gallium/drivers/freedreno/freedreno_context.h
index 85ce97c16b7..86992960960 100644
--- a/src/gallium/drivers/freedreno/freedreno_context.h
+++ b/src/gallium/drivers/freedreno/freedreno_context.h
@@ -241,6 +241,7 @@ struct fd_context {
*/
struct {
uint64_t prims_emitted;
+ uint64_t prims_generated;
uint64_t draw_calls;
uint64_t batch_total, batch_sysmem, batch_gmem, batch_restore;
} stats;
diff --git a/src/gallium/drivers/freedreno/freedreno_draw.c b/src/gallium/drivers/freedreno/freedreno_draw.c
index bf803cc77bc..66bb1163df2 100644
--- a/src/gallium/drivers/freedreno/freedreno_draw.c
+++ b/src/gallium/drivers/freedreno/freedreno_draw.c
@@ -174,7 +174,16 @@ fd_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info)
prims = u_reduced_prims_for_vertices(info->mode, info->count);
ctx->stats.draw_calls++;
- ctx->stats.prims_emitted += prims;
+
+ /* TODO prims_emitted should be clipped when the stream-out buffer is
+ * not large enough. See max_tf_vtx().. probably need to move that
+ * into common code. Although a bit more annoying since a2xx doesn't
+ * use ir3 so no common way to get at the pipe_stream_output_info
+ * which is needed for this calculation.
+ */
+ if (ctx->streamout.num_targets > 0)
+ ctx->stats.prims_emitted += prims;
+ ctx->stats.prims_generated += prims;
/* any buffers that haven't been cleared yet, we need to restore: */
ctx->restore |= buffers & (FD_BUFFER_ALL & ~ctx->cleared);
@@ -189,7 +198,7 @@ fd_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info)
ctx->draw_vbo(ctx, info);
for (i = 0; i < ctx->streamout.num_targets; i++)
- ctx->streamout.offsets[i] += prims;
+ ctx->streamout.offsets[i] += info->count;
if (fd_mesa_debug & FD_DBG_DDRAW)
ctx->dirty = 0xffffffff;
diff --git a/src/gallium/drivers/freedreno/freedreno_query.c b/src/gallium/drivers/freedreno/freedreno_query.c
index b87e8250719..a9427058579 100644
--- a/src/gallium/drivers/freedreno/freedreno_query.c
+++ b/src/gallium/drivers/freedreno/freedreno_query.c
@@ -114,6 +114,11 @@ fd_get_driver_query_info(struct pipe_screen *pscreen,
return 1;
}
+static void
+fd_set_active_query_state(struct pipe_context *pipe, boolean enable)
+{
+}
+
void
fd_query_screen_init(struct pipe_screen *pscreen)
{
@@ -128,5 +133,6 @@ fd_query_context_init(struct pipe_context *pctx)
pctx->begin_query = fd_begin_query;
pctx->end_query = fd_end_query;
pctx->get_query_result = fd_get_query_result;
+ pctx->set_active_query_state = fd_set_active_query_state;
pctx->render_condition = fd_render_condition;
}
diff --git a/src/gallium/drivers/freedreno/freedreno_query_sw.c b/src/gallium/drivers/freedreno/freedreno_query_sw.c
index 514df145fa8..4af6a125e03 100644
--- a/src/gallium/drivers/freedreno/freedreno_query_sw.c
+++ b/src/gallium/drivers/freedreno/freedreno_query_sw.c
@@ -54,7 +54,7 @@ read_counter(struct fd_context *ctx, int type)
{
switch (type) {
case PIPE_QUERY_PRIMITIVES_GENERATED:
- /* for now same thing as _PRIMITIVES_EMITTED */
+ return ctx->stats.prims_generated;
case PIPE_QUERY_PRIMITIVES_EMITTED:
return ctx->stats.prims_emitted;
case FD_QUERY_DRAW_CALLS:
diff --git a/src/gallium/drivers/freedreno/freedreno_screen.c b/src/gallium/drivers/freedreno/freedreno_screen.c
index 707be17513b..05100186495 100644
--- a/src/gallium/drivers/freedreno/freedreno_screen.c
+++ b/src/gallium/drivers/freedreno/freedreno_screen.c
@@ -71,6 +71,7 @@ static const struct debug_named_value debug_options[] = {
{"glsl120", FD_DBG_GLSL120,"Temporary flag to force GLSL 1.20 (rather than 1.30) on a3xx+"},
{"shaderdb", FD_DBG_SHADERDB, "Enable shaderdb output"},
{"flush", FD_DBG_FLUSH, "Force flush after every draw"},
+ {"deqp", FD_DBG_DEQP, "Enable dEQP hacks"},
DEBUG_NAMED_VALUE_END
};
@@ -256,6 +257,7 @@ fd_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
case PIPE_CAP_GENERATE_MIPMAP:
case PIPE_CAP_SURFACE_REINTERPRET_BLOCKS:
case PIPE_CAP_FRAMEBUFFER_NO_ATTACHMENT:
+ case PIPE_CAP_ROBUST_BUFFER_ACCESS_BEHAVIOR:
return 0;
case PIPE_CAP_MAX_VIEWPORTS:
@@ -352,6 +354,16 @@ fd_screen_get_paramf(struct pipe_screen *pscreen, enum pipe_capf param)
switch (param) {
case PIPE_CAPF_MAX_LINE_WIDTH:
case PIPE_CAPF_MAX_LINE_WIDTH_AA:
+ /* NOTE: actual value is 127.0f, but this is working around a deqp
+ * bug.. dEQP-GLES3.functional.rasterization.primitives.lines_wide
+ * uses too small of a render target size, and gets confused when
+ * the lines start going offscreen.
+ *
+ * See: https://code.google.com/p/android/issues/detail?id=206513
+ */
+ if (fd_mesa_debug & FD_DBG_DEQP)
+ return 63.0f;
+ return 127.0f;
case PIPE_CAPF_MAX_POINT_WIDTH:
case PIPE_CAPF_MAX_POINT_WIDTH_AA:
return 4092.0f;
diff --git a/src/gallium/drivers/freedreno/freedreno_state.c b/src/gallium/drivers/freedreno/freedreno_state.c
index 685d3a75659..6c472d19815 100644
--- a/src/gallium/drivers/freedreno/freedreno_state.c
+++ b/src/gallium/drivers/freedreno/freedreno_state.c
@@ -359,7 +359,8 @@ fd_set_stream_output_targets(struct pipe_context *pctx,
if (!changed && append)
continue;
- so->offsets[i] = 0;
+ if (!append)
+ so->offsets[i] = offsets[i];
pipe_so_target_reference(&so->targets[i], targets[i]);
}
diff --git a/src/gallium/drivers/freedreno/freedreno_util.h b/src/gallium/drivers/freedreno/freedreno_util.h
index 47dd467f498..85dac982314 100644
--- a/src/gallium/drivers/freedreno/freedreno_util.h
+++ b/src/gallium/drivers/freedreno/freedreno_util.h
@@ -73,6 +73,7 @@ enum adreno_stencil_op fd_stencil_op(unsigned op);
#define FD_DBG_GLSL120 0x0400
#define FD_DBG_SHADERDB 0x0800
#define FD_DBG_FLUSH 0x1000
+#define FD_DBG_DEQP 0x2000
extern int fd_mesa_debug;
extern bool fd_binning_enabled;
diff --git a/src/gallium/drivers/freedreno/ir3/ir3.h b/src/gallium/drivers/freedreno/ir3/ir3.h
index 3859f6a39f3..f68275e568c 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3.h
+++ b/src/gallium/drivers/freedreno/ir3/ir3.h
@@ -308,8 +308,14 @@ struct ir3_instruction {
static inline struct ir3_instruction *
ir3_neighbor_first(struct ir3_instruction *instr)
{
- while (instr->cp.left)
+ int cnt = 0;
+ while (instr->cp.left) {
instr = instr->cp.left;
+ if (++cnt > 0xffff) {
+ debug_assert(0);
+ break;
+ }
+ }
return instr;
}
@@ -322,6 +328,10 @@ static inline int ir3_neighbor_count(struct ir3_instruction *instr)
while (instr->cp.right) {
num++;
instr = instr->cp.right;
+ if (num > 0xffff) {
+ debug_assert(0);
+ break;
+ }
}
return num;
diff --git a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c
index 245b61f31e5..940ca7744a2 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c
+++ b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c
@@ -103,6 +103,11 @@ struct ir3_compile {
*/
bool unminify_coords;
+ /* on a4xx, for array textures we need to add 0.5 to the array
+ * index coordinate:
+ */
+ bool array_index_add_half;
+
/* for looking up which system value is which */
unsigned sysval_semantics[8];
@@ -128,11 +133,13 @@ compile_init(struct ir3_compiler *compiler,
ctx->flat_bypass = true;
ctx->levels_add_one = false;
ctx->unminify_coords = false;
+ ctx->array_index_add_half = true;
} else {
/* no special handling for "flat" */
ctx->flat_bypass = false;
ctx->levels_add_one = true;
ctx->unminify_coords = true;
+ ctx->array_index_add_half = false;
}
ctx->compiler = compiler;
@@ -1447,9 +1454,8 @@ emit_tex(struct ir3_compile *ctx, nir_tex_instr *tex)
}
/* the array coord for cube arrays needs 0.5 added to it */
- if (tex->sampler_dim == GLSL_SAMPLER_DIM_CUBE && tex->is_array &&
- opc != OPC_ISAML)
- coord[3] = ir3_ADD_F(b, coord[3], 0, create_immed(b, fui(0.5)), 0);
+ if (ctx->array_index_add_half && tex->is_array && (opc != OPC_ISAML))
+ coord[coords] = ir3_ADD_F(b, coord[coords], 0, create_immed(b, fui(0.5)), 0);
/*
* lay out the first argument in the proper order:
diff --git a/src/gallium/drivers/freedreno/ir3/ir3_cp.c b/src/gallium/drivers/freedreno/ir3/ir3_cp.c
index 6037becf22f..e8a2f099391 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3_cp.c
+++ b/src/gallium/drivers/freedreno/ir3/ir3_cp.c
@@ -442,6 +442,37 @@ instr_cp(struct ir3_instruction *instr)
instr_cp(instr->address);
ir3_instr_set_address(instr, eliminate_output_mov(instr->address));
}
+
+ /* we can end up with extra cmps.s from frontend, which uses a
+ *
+ * cmps.s p0.x, cond, 0
+ *
+ * as a way to mov into the predicate register. But frequently 'cond'
+ * is itself a cmps.s/cmps.f/cmps.u. So detect this special case and
+ * just re-write the instruction writing predicate register to get rid
+ * of the double cmps.
+ */
+ if ((instr->opc == OPC_CMPS_S) &&
+ (instr->regs[0]->num == regid(REG_P0, 0)) &&
+ ssa(instr->regs[1]) &&
+ (instr->regs[2]->flags & IR3_REG_IMMED) &&
+ (instr->regs[2]->iim_val == 0)) {
+ struct ir3_instruction *cond = ssa(instr->regs[1]);
+ switch (cond->opc) {
+ case OPC_CMPS_S:
+ case OPC_CMPS_F:
+ case OPC_CMPS_U:
+ instr->opc = cond->opc;
+ instr->flags = cond->flags;
+ instr->cat2 = cond->cat2;
+ instr->address = cond->address;
+ instr->regs[1] = cond->regs[1];
+ instr->regs[2] = cond->regs[2];
+ break;
+ default:
+ break;
+ }
+ }
}
void
diff --git a/src/gallium/drivers/freedreno/ir3/ir3_legalize.c b/src/gallium/drivers/freedreno/ir3/ir3_legalize.c
index 77cd0e622f0..7a49f4c371c 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3_legalize.c
+++ b/src/gallium/drivers/freedreno/ir3/ir3_legalize.c
@@ -183,7 +183,13 @@ legalize_block(struct ir3_legalize_ctx *ctx, struct ir3_block *block)
ctx->has_samp = true;
regmask_set(&needs_sy, n->regs[0]);
} else if (is_load(n)) {
- regmask_set(&needs_sy, n->regs[0]);
+ /* seems like ldlv needs (ss) bit instead?? which is odd but
+ * makes a bunch of flat-varying tests start working on a4xx.
+ */
+ if (n->opc == OPC_LDLV)
+ regmask_set(&needs_ss, n->regs[0]);
+ else
+ regmask_set(&needs_sy, n->regs[0]);
}
/* both tex/sfu appear to not always immediately consume
diff --git a/src/gallium/drivers/freedreno/ir3/ir3_nir.c b/src/gallium/drivers/freedreno/ir3/ir3_nir.c
index 73c65d6ad27..897b3b963be 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3_nir.c
+++ b/src/gallium/drivers/freedreno/ir3/ir3_nir.c
@@ -45,6 +45,7 @@ ir3_tgsi_to_nir(const struct tgsi_token *tokens)
.lower_flrp = true,
.lower_ffract = true,
.native_integers = true,
+ .vertex_id_zero_based = true,
.lower_extract_byte = true,
.lower_extract_word = true,
};
@@ -141,7 +142,7 @@ ir3_optimize_nir(struct ir3_shader *shader, nir_shader *s,
} while (progress);
- OPT_V(s, nir_remove_dead_variables);
+ OPT_V(s, nir_remove_dead_variables, nir_var_local);
if (fd_mesa_debug & FD_DBG_DISASM) {
debug_printf("----------------------\n");
diff --git a/src/gallium/drivers/i915/i915_query.c b/src/gallium/drivers/i915/i915_query.c
index 78d67cea2c9..fa1b01d1804 100644
--- a/src/gallium/drivers/i915/i915_query.c
+++ b/src/gallium/drivers/i915/i915_query.c
@@ -76,6 +76,11 @@ static boolean i915_get_query_result(struct pipe_context *ctx,
return TRUE;
}
+static void
+i915_set_active_query_state(struct pipe_context *pipe, boolean enable)
+{
+}
+
void
i915_init_query_functions(struct i915_context *i915)
{
@@ -84,5 +89,6 @@ i915_init_query_functions(struct i915_context *i915)
i915->base.begin_query = i915_begin_query;
i915->base.end_query = i915_end_query;
i915->base.get_query_result = i915_get_query_result;
+ i915->base.set_active_query_state = i915_set_active_query_state;
}
diff --git a/src/gallium/drivers/i915/i915_screen.c b/src/gallium/drivers/i915/i915_screen.c
index 68e32e51c34..9b6a6604965 100644
--- a/src/gallium/drivers/i915/i915_screen.c
+++ b/src/gallium/drivers/i915/i915_screen.c
@@ -270,6 +270,7 @@ i915_get_param(struct pipe_screen *screen, enum pipe_cap cap)
case PIPE_CAP_PCI_DEVICE:
case PIPE_CAP_PCI_FUNCTION:
case PIPE_CAP_FRAMEBUFFER_NO_ATTACHMENT:
+ case PIPE_CAP_ROBUST_BUFFER_ACCESS_BEHAVIOR:
return 0;
case PIPE_CAP_MAX_DUAL_SOURCE_RENDER_TARGETS:
diff --git a/src/gallium/drivers/ilo/ilo_query.c b/src/gallium/drivers/ilo/ilo_query.c
index 106bd42a335..8a42f58a87f 100644
--- a/src/gallium/drivers/ilo/ilo_query.c
+++ b/src/gallium/drivers/ilo/ilo_query.c
@@ -222,6 +222,11 @@ ilo_get_query_result(struct pipe_context *pipe, struct pipe_query *query,
return true;
}
+static void
+ilo_set_active_query_state(struct pipe_context *pipe, boolean enable)
+{
+}
+
/**
* Initialize query-related functions.
*/
@@ -233,4 +238,5 @@ ilo_init_query_functions(struct ilo_context *ilo)
ilo->base.begin_query = ilo_begin_query;
ilo->base.end_query = ilo_end_query;
ilo->base.get_query_result = ilo_get_query_result;
+ ilo->base.set_active_query_state = ilo_set_active_query_state;
}
diff --git a/src/gallium/drivers/ilo/ilo_screen.c b/src/gallium/drivers/ilo/ilo_screen.c
index 142d6f1fa21..538f817e242 100644
--- a/src/gallium/drivers/ilo/ilo_screen.c
+++ b/src/gallium/drivers/ilo/ilo_screen.c
@@ -499,6 +499,7 @@ ilo_get_param(struct pipe_screen *screen, enum pipe_cap param)
case PIPE_CAP_PCI_DEVICE:
case PIPE_CAP_PCI_FUNCTION:
case PIPE_CAP_FRAMEBUFFER_NO_ATTACHMENT:
+ case PIPE_CAP_ROBUST_BUFFER_ACCESS_BEHAVIOR:
return 0;
case PIPE_CAP_VENDOR_ID:
diff --git a/src/gallium/drivers/llvmpipe/lp_query.c b/src/gallium/drivers/llvmpipe/lp_query.c
index fc593670671..2fddc90503f 100644
--- a/src/gallium/drivers/llvmpipe/lp_query.c
+++ b/src/gallium/drivers/llvmpipe/lp_query.c
@@ -320,6 +320,11 @@ llvmpipe_check_render_cond(struct llvmpipe_context *lp)
return TRUE;
}
+static void
+llvmpipe_set_active_query_state(struct pipe_context *pipe, boolean enable)
+{
+}
+
void llvmpipe_init_query_funcs(struct llvmpipe_context *llvmpipe )
{
llvmpipe->pipe.create_query = llvmpipe_create_query;
@@ -327,6 +332,7 @@ void llvmpipe_init_query_funcs(struct llvmpipe_context *llvmpipe )
llvmpipe->pipe.begin_query = llvmpipe_begin_query;
llvmpipe->pipe.end_query = llvmpipe_end_query;
llvmpipe->pipe.get_query_result = llvmpipe_get_query_result;
+ llvmpipe->pipe.set_active_query_state = llvmpipe_set_active_query_state;
}
diff --git a/src/gallium/drivers/llvmpipe/lp_screen.c b/src/gallium/drivers/llvmpipe/lp_screen.c
index 6a5f906adc6..cb681bac939 100644
--- a/src/gallium/drivers/llvmpipe/lp_screen.c
+++ b/src/gallium/drivers/llvmpipe/lp_screen.c
@@ -320,6 +320,7 @@ llvmpipe_get_param(struct pipe_screen *screen, enum pipe_cap param)
case PIPE_CAP_PCI_DEVICE:
case PIPE_CAP_PCI_FUNCTION:
case PIPE_CAP_FRAMEBUFFER_NO_ATTACHMENT:
+ case PIPE_CAP_ROBUST_BUFFER_ACCESS_BEHAVIOR:
return 0;
}
/* should only get here on unhandled cases */
diff --git a/src/gallium/drivers/llvmpipe/lp_test_arit.c b/src/gallium/drivers/llvmpipe/lp_test_arit.c
index a0f2db780bb..ba831f37c05 100644
--- a/src/gallium/drivers/llvmpipe/lp_test_arit.c
+++ b/src/gallium/drivers/llvmpipe/lp_test_arit.c
@@ -218,6 +218,7 @@ const float round_values[] = {
-10.0, -1, 0.0, 12.0,
-1.49, -0.25, 1.25, 2.51,
-0.99, -0.01, 0.01, 0.99,
+ -1.5, -0.5, 0.5, 1.5,
1.401298464324817e-45f, // smallest denormal
-1.401298464324817e-45f,
1.62981451e-08f,
@@ -283,7 +284,7 @@ unary_tests[] = {
{"sin", &lp_build_sin, &sinf, sincos_values, Elements(sincos_values), 20.0 },
{"cos", &lp_build_cos, &cosf, sincos_values, Elements(sincos_values), 20.0 },
{"sgn", &lp_build_sgn, &sgnf, exp2_values, Elements(exp2_values), 20.0 },
- {"round", &lp_build_round, &roundf, round_values, Elements(round_values), 24.0 },
+ {"round", &lp_build_round, &nearbyintf, round_values, Elements(round_values), 24.0 },
{"trunc", &lp_build_trunc, &truncf, round_values, Elements(round_values), 24.0 },
{"floor", &lp_build_floor, &floorf, round_values, Elements(round_values), 24.0 },
{"ceil", &lp_build_ceil, &ceilf, round_values, Elements(round_values), 24.0 },
diff --git a/src/gallium/drivers/noop/noop_pipe.c b/src/gallium/drivers/noop/noop_pipe.c
index fd0a5d0f830..55aca74628e 100644
--- a/src/gallium/drivers/noop/noop_pipe.c
+++ b/src/gallium/drivers/noop/noop_pipe.c
@@ -78,6 +78,11 @@ static boolean noop_get_query_result(struct pipe_context *ctx,
return TRUE;
}
+static void
+noop_set_active_query_state(struct pipe_context *pipe, boolean enable)
+{
+}
+
/*
* resource
@@ -284,6 +289,7 @@ static struct pipe_context *noop_create_context(struct pipe_screen *screen,
ctx->begin_query = noop_begin_query;
ctx->end_query = noop_end_query;
ctx->get_query_result = noop_get_query_result;
+ ctx->set_active_query_state = noop_set_active_query_state;
ctx->transfer_map = noop_transfer_map;
ctx->transfer_flush_region = noop_transfer_flush_region;
ctx->transfer_unmap = noop_transfer_unmap;
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp
index 500ab8915de..1b595aec364 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp
@@ -1327,7 +1327,11 @@ GCRA::simplify()
bestScore = score;
}
}
+#if __cplusplus >= 201103L
+ if (std::isinf(bestScore)) {
+#else
if (isinf(bestScore)) {
+#endif
ERROR("no viable spill candidates left\n");
break;
}
diff --git a/src/gallium/drivers/nouveau/nouveau_buffer.c b/src/gallium/drivers/nouveau/nouveau_buffer.c
index 1695553d793..ba43a614b90 100644
--- a/src/gallium/drivers/nouveau/nouveau_buffer.c
+++ b/src/gallium/drivers/nouveau/nouveau_buffer.c
@@ -843,6 +843,39 @@ nouveau_user_buffer_upload(struct nouveau_context *nv,
return true;
}
+/* Invalidate underlying buffer storage, reset fences, reallocate to non-busy
+ * buffer.
+ */
+void
+nouveau_buffer_invalidate(struct pipe_context *pipe,
+ struct pipe_resource *resource)
+{
+ struct nouveau_context *nv = nouveau_context(pipe);
+ struct nv04_resource *buf = nv04_resource(resource);
+ int ref = buf->base.reference.count - 1;
+
+ /* Shared buffers shouldn't get reallocated */
+ if (unlikely(buf->base.bind & PIPE_BIND_SHARED))
+ return;
+
+ /* We can't touch persistent/coherent buffers */
+ if (buf->base.flags & (PIPE_RESOURCE_FLAG_MAP_PERSISTENT |
+ PIPE_RESOURCE_FLAG_MAP_COHERENT))
+ return;
+
+ /* If the buffer is sub-allocated and not currently being written, just
+ * wipe the valid buffer range. Otherwise we have to create fresh
+ * storage. (We don't keep track of fences for non-sub-allocated BO's.)
+ */
+ if (buf->mm && !nouveau_buffer_busy(buf, PIPE_TRANSFER_WRITE)) {
+ util_range_set_empty(&buf->valid_buffer_range);
+ } else {
+ nouveau_buffer_reallocate(nv->screen, buf, buf->domain);
+ if (ref > 0) /* any references inside context possible ? */
+ nv->invalidate_resource_storage(nv, &buf->base, ref);
+ }
+}
+
/* Scratch data allocation. */
diff --git a/src/gallium/drivers/nouveau/nouveau_buffer.h b/src/gallium/drivers/nouveau/nouveau_buffer.h
index d45bf7aebcf..3a33fae9ce2 100644
--- a/src/gallium/drivers/nouveau/nouveau_buffer.h
+++ b/src/gallium/drivers/nouveau/nouveau_buffer.h
@@ -99,6 +99,10 @@ bool
nouveau_user_buffer_upload(struct nouveau_context *, struct nv04_resource *,
unsigned base, unsigned size);
+void
+nouveau_buffer_invalidate(struct pipe_context *pipe,
+ struct pipe_resource *resource);
+
/* Copy data to a scratch buffer and return address & bo the data resides in.
* Returns 0 on failure.
*/
diff --git a/src/gallium/drivers/nouveau/nv30/nv30_query.c b/src/gallium/drivers/nouveau/nv30/nv30_query.c
index 75a4b0446fe..cb53a3663e5 100644
--- a/src/gallium/drivers/nouveau/nv30/nv30_query.c
+++ b/src/gallium/drivers/nouveau/nv30/nv30_query.c
@@ -263,6 +263,11 @@ nv40_query_render_condition(struct pipe_context *pipe,
PUSH_DATA (push, 0x02000000 | q->qo[1]->hw->start);
}
+static void
+nv30_set_active_query_state(struct pipe_context *pipe, boolean enable)
+{
+}
+
void
nv30_query_init(struct pipe_context *pipe)
{
@@ -273,6 +278,7 @@ nv30_query_init(struct pipe_context *pipe)
pipe->begin_query = nv30_query_begin;
pipe->end_query = nv30_query_end;
pipe->get_query_result = nv30_query_result;
+ pipe->set_active_query_state = nv30_set_active_query_state;
if (eng3d->oclass >= NV40_3D_CLASS)
pipe->render_condition = nv40_query_render_condition;
}
diff --git a/src/gallium/drivers/nouveau/nv30/nv30_screen.c b/src/gallium/drivers/nouveau/nv30/nv30_screen.c
index db7c2d15fb1..400e9f5c04d 100644
--- a/src/gallium/drivers/nouveau/nv30/nv30_screen.c
+++ b/src/gallium/drivers/nouveau/nv30/nv30_screen.c
@@ -193,6 +193,7 @@ nv30_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
case PIPE_CAP_PCI_DEVICE:
case PIPE_CAP_PCI_FUNCTION:
case PIPE_CAP_FRAMEBUFFER_NO_ATTACHMENT:
+ case PIPE_CAP_ROBUST_BUFFER_ACCESS_BEHAVIOR:
return 0;
case PIPE_CAP_VENDOR_ID:
@@ -324,6 +325,7 @@ nv30_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader,
case PIPE_SHADER_CAP_INDIRECT_TEMP_ADDR:
case PIPE_SHADER_CAP_INDIRECT_CONST_ADDR:
case PIPE_SHADER_CAP_SUBROUTINES:
+ case PIPE_SHADER_CAP_INTEGERS:
case PIPE_SHADER_CAP_DOUBLES:
case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED:
case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED:
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_context.c b/src/gallium/drivers/nouveau/nv50/nv50_context.c
index 61a52c4b366..5af0e9b3a27 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_context.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_context.c
@@ -93,6 +93,30 @@ nv50_memory_barrier(struct pipe_context *pipe, unsigned flags)
}
}
+static void
+nv50_emit_string_marker(struct pipe_context *pipe, const char *str, int len)
+{
+ struct nouveau_pushbuf *push = nv50_context(pipe)->base.pushbuf;
+ int string_words = len / 4;
+ int data_words;
+
+ if (len <= 0)
+ return;
+ string_words = MIN2(string_words, NV04_PFIFO_MAX_PACKET_LEN);
+ if (string_words == NV04_PFIFO_MAX_PACKET_LEN)
+ data_words = string_words;
+ else
+ data_words = string_words + !!(len & 3);
+ BEGIN_NI04(push, SUBC_3D(NV04_GRAPH_NOP), data_words);
+ if (string_words)
+ PUSH_DATAp(push, str, string_words);
+ if (string_words != data_words) {
+ int data = 0;
+ memcpy(&data, &str[string_words * 4], len & 3);
+ PUSH_DATA (push, data);
+ }
+}
+
void
nv50_default_kick_notify(struct nouveau_pushbuf *push)
{
@@ -309,6 +333,7 @@ nv50_create(struct pipe_screen *pscreen, void *priv, unsigned ctxflags)
pipe->texture_barrier = nv50_texture_barrier;
pipe->memory_barrier = nv50_memory_barrier;
pipe->get_sample_position = nv50_context_get_sample_position;
+ pipe->emit_string_marker = nv50_emit_string_marker;
if (!screen->cur_ctx) {
/* Restore the last context's state here, normally handled during
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_query.c b/src/gallium/drivers/nouveau/nv50/nv50_query.c
index 4cd3b615606..fa70fb6950e 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_query.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_query.c
@@ -143,6 +143,11 @@ nv50_render_condition(struct pipe_context *pipe,
PUSH_DATA (push, hq->bo->offset + hq->offset);
}
+static void
+nv50_set_active_query_state(struct pipe_context *pipe, boolean enable)
+{
+}
+
void
nv50_init_query_functions(struct nv50_context *nv50)
{
@@ -153,6 +158,7 @@ nv50_init_query_functions(struct nv50_context *nv50)
pipe->begin_query = nv50_begin_query;
pipe->end_query = nv50_end_query;
pipe->get_query_result = nv50_get_query_result;
+ pipe->set_active_query_state = nv50_set_active_query_state;
pipe->render_condition = nv50_render_condition;
nv50->cond_condmode = NV50_3D_COND_MODE_ALWAYS;
}
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_resource.c b/src/gallium/drivers/nouveau/nv50/nv50_resource.c
index ad5f3b814db..b090a30aed6 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_resource.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_resource.c
@@ -85,6 +85,13 @@ nv50_surface_destroy(struct pipe_context *pipe, struct pipe_surface *ps)
}
void
+nv50_invalidate_resource(struct pipe_context *pipe, struct pipe_resource *res)
+{
+ if (res->target == PIPE_BUFFER)
+ nouveau_buffer_invalidate(pipe, res);
+}
+
+void
nv50_init_resource_functions(struct pipe_context *pcontext)
{
pcontext->transfer_map = u_transfer_map_vtbl;
@@ -93,6 +100,7 @@ nv50_init_resource_functions(struct pipe_context *pcontext)
pcontext->transfer_inline_write = u_transfer_inline_write_vtbl;
pcontext->create_surface = nv50_surface_create;
pcontext->surface_destroy = nv50_surface_destroy;
+ pcontext->invalidate_resource = nv50_invalidate_resource;
}
void
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_resource.h b/src/gallium/drivers/nouveau/nv50/nv50_resource.h
index b40370a1d78..5d03925b0d0 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_resource.h
+++ b/src/gallium/drivers/nouveau/nv50/nv50_resource.h
@@ -152,6 +152,9 @@ void
nv50_surface_destroy(struct pipe_context *, struct pipe_surface *);
void
+nv50_invalidate_resource(struct pipe_context *, struct pipe_resource *);
+
+void
nv50_clear_texture(struct pipe_context *pipe,
struct pipe_resource *res,
unsigned level,
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_screen.c b/src/gallium/drivers/nouveau/nv50/nv50_screen.c
index 20fb61b51f4..ef114e50529 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_screen.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_screen.c
@@ -193,6 +193,8 @@ nv50_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
case PIPE_CAP_CLEAR_TEXTURE:
case PIPE_CAP_COMPUTE:
case PIPE_CAP_TGSI_FS_FACE_IS_INTEGER_SYSVAL:
+ case PIPE_CAP_INVALIDATE_BUFFER:
+ case PIPE_CAP_STRING_MARKER:
return 1;
case PIPE_CAP_SEAMLESS_CUBE_MAP:
return 1; /* class_3d >= NVA0_3D_CLASS; */
@@ -234,9 +236,7 @@ nv50_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
case PIPE_CAP_TGSI_PACK_HALF_FLOAT:
case PIPE_CAP_TGSI_FS_POSITION_IS_SYSVAL:
case PIPE_CAP_SHADER_BUFFER_OFFSET_ALIGNMENT:
- case PIPE_CAP_INVALIDATE_BUFFER:
case PIPE_CAP_GENERATE_MIPMAP:
- case PIPE_CAP_STRING_MARKER:
case PIPE_CAP_BUFFER_SAMPLER_VIEW_RGBA_ONLY:
case PIPE_CAP_SURFACE_REINTERPRET_BLOCKS:
case PIPE_CAP_QUERY_BUFFER_OBJECT:
@@ -246,6 +246,7 @@ nv50_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
case PIPE_CAP_PCI_DEVICE:
case PIPE_CAP_PCI_FUNCTION:
case PIPE_CAP_FRAMEBUFFER_NO_ATTACHMENT:
+ case PIPE_CAP_ROBUST_BUFFER_ACCESS_BEHAVIOR:
return 0;
case PIPE_CAP_VENDOR_ID:
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_context.c b/src/gallium/drivers/nouveau/nvc0/nvc0_context.c
index 007cccfd10b..fcb8289beda 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_context.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_context.c
@@ -97,6 +97,30 @@ nvc0_memory_barrier(struct pipe_context *pipe, unsigned flags)
}
static void
+nvc0_emit_string_marker(struct pipe_context *pipe, const char *str, int len)
+{
+ struct nouveau_pushbuf *push = nvc0_context(pipe)->base.pushbuf;
+ int string_words = len / 4;
+ int data_words;
+
+ if (len <= 0)
+ return;
+ string_words = MIN2(string_words, NV04_PFIFO_MAX_PACKET_LEN);
+ if (string_words == NV04_PFIFO_MAX_PACKET_LEN)
+ data_words = string_words;
+ else
+ data_words = string_words + !!(len & 3);
+ BEGIN_NIC0(push, SUBC_3D(NV04_GRAPH_NOP), data_words);
+ if (string_words)
+ PUSH_DATAp(push, str, string_words);
+ if (string_words != data_words) {
+ int data = 0;
+ memcpy(&data, &str[string_words * 4], len & 3);
+ PUSH_DATA (push, data);
+ }
+}
+
+static void
nvc0_context_unreference_resources(struct nvc0_context *nvc0)
{
unsigned s, i;
@@ -333,6 +357,7 @@ nvc0_create(struct pipe_screen *pscreen, void *priv, unsigned ctxflags)
pipe->texture_barrier = nvc0_texture_barrier;
pipe->memory_barrier = nvc0_memory_barrier;
pipe->get_sample_position = nvc0_context_get_sample_position;
+ pipe->emit_string_marker = nvc0_emit_string_marker;
nouveau_context_init(&nvc0->base);
nvc0_init_query_functions(nvc0);
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_program.c b/src/gallium/drivers/nouveau/nvc0/nvc0_program.c
index db02fa2df5c..d3024f9fa06 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_program.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_program.c
@@ -456,6 +456,13 @@ nvc0_fp_gen_header(struct nvc0_program *fp, struct nv50_ir_prog_info *info)
fp->hdr[18] |= 0xf << info->out[i].slot[0];
}
+ /* There are no "regular" attachments, but the shader still needs to be
+ * executed. It seems like it wants to think that it has some color
+ * outputs in order to actually run.
+ */
+ if (info->prop.fp.numColourResults == 0 && !info->prop.fp.writesDepth)
+ fp->hdr[18] |= 0xf;
+
fp->fp.early_z = info->prop.fp.earlyFragTests;
return 0;
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_query.c b/src/gallium/drivers/nouveau/nvc0/nvc0_query.c
index 92ca613cda1..b34271c4911 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_query.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_query.c
@@ -254,6 +254,11 @@ nvc0_screen_get_driver_query_group_info(struct pipe_screen *pscreen,
return 0;
}
+static void
+nvc0_set_active_query_state(struct pipe_context *pipe, boolean enable)
+{
+}
+
void
nvc0_init_query_functions(struct nvc0_context *nvc0)
{
@@ -265,6 +270,7 @@ nvc0_init_query_functions(struct nvc0_context *nvc0)
pipe->end_query = nvc0_end_query;
pipe->get_query_result = nvc0_get_query_result;
pipe->get_query_result_resource = nvc0_get_query_result_resource;
+ pipe->set_active_query_state = nvc0_set_active_query_state;
pipe->render_condition = nvc0_render_condition;
nvc0->cond_condmode = NVC0_3D_COND_MODE_ALWAYS;
}
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_resource.c b/src/gallium/drivers/nouveau/nvc0/nvc0_resource.c
index c034d0fd011..0aee5890fd8 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_resource.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_resource.c
@@ -52,6 +52,7 @@ nvc0_init_resource_functions(struct pipe_context *pcontext)
pcontext->transfer_inline_write = u_transfer_inline_write_vtbl;
pcontext->create_surface = nvc0_surface_create;
pcontext->surface_destroy = nv50_surface_destroy;
+ pcontext->invalidate_resource = nv50_invalidate_resource;
}
void
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
index c41912a6037..9a34007c6e5 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
@@ -52,6 +52,12 @@ nvc0_screen_is_format_supported(struct pipe_screen *pscreen,
if (!(0x117 & (1 << sample_count))) /* 0, 1, 2, 4 or 8 */
return false;
+ /* Short-circuit the rest of the logic -- this is used by the state tracker
+ * to determine valid MS levels in a no-attachments scenario.
+ */
+ if (format == PIPE_FORMAT_NONE && bindings & PIPE_BIND_RENDER_TARGET)
+ return true;
+
if (!util_format_is_supported(format, bindings))
return false;
@@ -216,6 +222,9 @@ nvc0_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
case PIPE_CAP_MULTI_DRAW_INDIRECT_PARAMS:
case PIPE_CAP_TGSI_FS_FACE_IS_INTEGER_SYSVAL:
case PIPE_CAP_QUERY_BUFFER_OBJECT:
+ case PIPE_CAP_INVALIDATE_BUFFER:
+ case PIPE_CAP_STRING_MARKER:
+ case PIPE_CAP_FRAMEBUFFER_NO_ATTACHMENT:
return 1;
case PIPE_CAP_SEAMLESS_CUBE_MAP_PER_TEXTURE:
return (class_3d >= NVE4_3D_CLASS) ? 1 : 0;
@@ -241,9 +250,7 @@ nvc0_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
case PIPE_CAP_RESOURCE_FROM_USER_MEMORY:
case PIPE_CAP_DEVICE_RESET_STATUS_QUERY:
case PIPE_CAP_TGSI_FS_POSITION_IS_SYSVAL:
- case PIPE_CAP_INVALIDATE_BUFFER:
case PIPE_CAP_GENERATE_MIPMAP:
- case PIPE_CAP_STRING_MARKER:
case PIPE_CAP_BUFFER_SAMPLER_VIEW_RGBA_ONLY:
case PIPE_CAP_SURFACE_REINTERPRET_BLOCKS:
case PIPE_CAP_QUERY_MEMORY_INFO:
@@ -251,7 +258,7 @@ nvc0_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
case PIPE_CAP_PCI_BUS:
case PIPE_CAP_PCI_DEVICE:
case PIPE_CAP_PCI_FUNCTION:
- case PIPE_CAP_FRAMEBUFFER_NO_ATTACHMENT:
+ case PIPE_CAP_ROBUST_BUFFER_ACCESS_BEHAVIOR:
return 0;
case PIPE_CAP_VENDOR_ID:
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c b/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c
index 9c64482f2e2..d0d9315dd2b 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c
@@ -56,15 +56,18 @@ nvc0_validate_zcull(struct nvc0_context *nvc0)
#endif
static inline void
-nvc0_fb_set_null_rt(struct nouveau_pushbuf *push, unsigned i)
+nvc0_fb_set_null_rt(struct nouveau_pushbuf *push, unsigned i, unsigned layers)
{
- BEGIN_NVC0(push, NVC0_3D(RT_ADDRESS_HIGH(i)), 6);
- PUSH_DATA (push, 0);
- PUSH_DATA (push, 0);
- PUSH_DATA (push, 64);
- PUSH_DATA (push, 0);
+ BEGIN_NVC0(push, NVC0_3D(RT_ADDRESS_HIGH(i)), 9);
PUSH_DATA (push, 0);
PUSH_DATA (push, 0);
+ PUSH_DATA (push, 64); // width
+ PUSH_DATA (push, 0); // height
+ PUSH_DATA (push, 0); // format
+ PUSH_DATA (push, 0); // tile mode
+ PUSH_DATA (push, layers); // layers
+ PUSH_DATA (push, 0); // layer stride
+ PUSH_DATA (push, 0); // base layer
}
static void
@@ -75,12 +78,11 @@ nvc0_validate_fb(struct nvc0_context *nvc0)
struct nvc0_screen *screen = nvc0->screen;
unsigned i, ms;
unsigned ms_mode = NVC0_3D_MULTISAMPLE_MODE_MS1;
+ unsigned nr_cbufs = fb->nr_cbufs;
bool serialize = false;
nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_3D_FB);
- BEGIN_NVC0(push, NVC0_3D(RT_CONTROL), 1);
- PUSH_DATA (push, (076543210 << 4) | fb->nr_cbufs);
BEGIN_NVC0(push, NVC0_3D(SCREEN_SCISSOR_HORIZ), 2);
PUSH_DATA (push, fb->width << 16);
PUSH_DATA (push, fb->height << 16);
@@ -91,7 +93,7 @@ nvc0_validate_fb(struct nvc0_context *nvc0)
struct nouveau_bo *bo;
if (!fb->cbufs[i]) {
- nvc0_fb_set_null_rt(push, i);
+ nvc0_fb_set_null_rt(push, i, 0);
continue;
}
@@ -179,6 +181,19 @@ nvc0_validate_fb(struct nvc0_context *nvc0)
PUSH_DATA (push, 0);
}
+ if (nr_cbufs == 0 && !fb->zsbuf) {
+ assert(util_is_power_of_two(fb->samples));
+ assert(fb->samples <= 8);
+
+ nvc0_fb_set_null_rt(push, 0, fb->layers);
+
+ if (fb->samples > 1)
+ ms_mode = ffs(fb->samples) - 1;
+ nr_cbufs = 1;
+ }
+
+ BEGIN_NVC0(push, NVC0_3D(RT_CONTROL), 1);
+ PUSH_DATA (push, (076543210 << 4) | nr_cbufs);
IMMED_NVC0(push, NVC0_3D(MULTISAMPLE_MODE), ms_mode);
ms = 1 << ms_mode;
@@ -592,8 +607,9 @@ nvc0_validate_derived_2(struct nvc0_context *nvc0)
struct nouveau_pushbuf *push = nvc0->base.pushbuf;
if (nvc0->zsa && nvc0->zsa->pipe.alpha.enabled &&
+ nvc0->framebuffer.zsbuf &&
nvc0->framebuffer.nr_cbufs == 0) {
- nvc0_fb_set_null_rt(push, 0);
+ nvc0_fb_set_null_rt(push, 0, 0);
BEGIN_NVC0(push, NVC0_3D(RT_CONTROL), 1);
PUSH_DATA (push, (076543210 << 4) | 1);
}
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c b/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c
index e657204128e..e108590e215 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c
@@ -1043,6 +1043,8 @@ nvc0_blitctx_pre_blit(struct nvc0_blitctx *ctx)
ctx->saved.fb.width = nvc0->framebuffer.width;
ctx->saved.fb.height = nvc0->framebuffer.height;
+ ctx->saved.fb.samples = nvc0->framebuffer.samples;
+ ctx->saved.fb.layers = nvc0->framebuffer.layers;
ctx->saved.fb.nr_cbufs = nvc0->framebuffer.nr_cbufs;
ctx->saved.fb.cbufs[0] = nvc0->framebuffer.cbufs[0];
ctx->saved.fb.zsbuf = nvc0->framebuffer.zsbuf;
@@ -1110,6 +1112,8 @@ nvc0_blitctx_post_blit(struct nvc0_blitctx *blit)
nvc0->framebuffer.width = blit->saved.fb.width;
nvc0->framebuffer.height = blit->saved.fb.height;
+ nvc0->framebuffer.samples = blit->saved.fb.samples;
+ nvc0->framebuffer.layers = blit->saved.fb.layers;
nvc0->framebuffer.nr_cbufs = blit->saved.fb.nr_cbufs;
nvc0->framebuffer.cbufs[0] = blit->saved.fb.cbufs[0];
nvc0->framebuffer.zsbuf = blit->saved.fb.zsbuf;
diff --git a/src/gallium/drivers/r300/r300_query.c b/src/gallium/drivers/r300/r300_query.c
index 6414e80828e..7603985b14b 100644
--- a/src/gallium/drivers/r300/r300_query.c
+++ b/src/gallium/drivers/r300/r300_query.c
@@ -200,6 +200,11 @@ static void r300_render_condition(struct pipe_context *pipe,
}
}
+static void
+r300_set_active_query_state(struct pipe_context *pipe, boolean enable)
+{
+}
+
void r300_init_query_functions(struct r300_context* r300)
{
r300->context.create_query = r300_create_query;
@@ -207,5 +212,6 @@ void r300_init_query_functions(struct r300_context* r300)
r300->context.begin_query = r300_begin_query;
r300->context.end_query = r300_end_query;
r300->context.get_query_result = r300_get_query_result;
+ r300->context.set_active_query_state = r300_set_active_query_state;
r300->context.render_condition = r300_render_condition;
}
diff --git a/src/gallium/drivers/r300/r300_screen.c b/src/gallium/drivers/r300/r300_screen.c
index b3a7f049e10..eae53e16a54 100644
--- a/src/gallium/drivers/r300/r300_screen.c
+++ b/src/gallium/drivers/r300/r300_screen.c
@@ -215,6 +215,7 @@ static int r300_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
case PIPE_CAP_QUERY_BUFFER_OBJECT:
case PIPE_CAP_QUERY_MEMORY_INFO:
case PIPE_CAP_FRAMEBUFFER_NO_ATTACHMENT:
+ case PIPE_CAP_ROBUST_BUFFER_ACCESS_BEHAVIOR:
return 0;
/* SWTCL-only features. */
diff --git a/src/gallium/drivers/r300/r300_texture_desc.c b/src/gallium/drivers/r300/r300_texture_desc.c
index 8fa98c5804e..2442d726cd1 100644
--- a/src/gallium/drivers/r300/r300_texture_desc.c
+++ b/src/gallium/drivers/r300/r300_texture_desc.c
@@ -25,6 +25,7 @@
#include "r300_context.h"
#include "util/u_format.h"
+#include <inttypes.h>
/* Returns the number of pixels that the texture should be aligned to
* in the given dimension. */
@@ -614,7 +615,7 @@ void r300_texture_desc_init(struct r300_screen *rscreen,
"r300: I got a pre-allocated buffer to use it as a texture "
"storage, but the buffer is too small. I'll use the buffer "
"anyway, because I can't crash here, but it's dangerous. "
- "This can be a DDX bug. Got: %iB, Need: %iB, Info:\n",
+ "This can be a DDX bug. Got: %"PRIu64"B, Need: %uB, Info:\n",
tex->buf->size, tex->tex.size_in_bytes);
r300_tex_print_info(tex, "texture_desc_init");
/* Ooops, what now. Apps will break if we fail this,
diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c
index 65952676987..2ad9e3eb1ab 100644
--- a/src/gallium/drivers/r600/evergreen_state.c
+++ b/src/gallium/drivers/r600/evergreen_state.c
@@ -472,6 +472,7 @@ static void *evergreen_create_rs_state(struct pipe_context *ctx,
r600_init_command_buffer(&rs->buffer, 30);
+ rs->scissor_enable = state->scissor;
rs->flatshade = state->flatshade;
rs->sprite_coord_enable = state->sprite_coord_enable;
rs->two_side = state->light_twoside;
@@ -528,7 +529,7 @@ static void *evergreen_create_rs_state(struct pipe_context *ctx,
r600_store_context_reg(&rs->buffer, R_0286D4_SPI_INTERP_CONTROL_0, spi_interp);
r600_store_context_reg(&rs->buffer, R_028A48_PA_SC_MODE_CNTL_0,
S_028A48_MSAA_ENABLE(state->multisample) |
- S_028A48_VPORT_SCISSOR_ENABLE(state->scissor) |
+ S_028A48_VPORT_SCISSOR_ENABLE(1) |
S_028A48_LINE_STIPPLE_ENABLE(state->line_stipple_enable));
if (rctx->b.chip_class == CAYMAN) {
@@ -560,8 +561,11 @@ static void *evergreen_create_rs_state(struct pipe_context *ctx,
static void *evergreen_create_sampler_state(struct pipe_context *ctx,
const struct pipe_sampler_state *state)
{
+ struct r600_common_screen *rscreen = (struct r600_common_screen*)ctx->screen;
struct r600_pipe_sampler_state *ss = CALLOC_STRUCT(r600_pipe_sampler_state);
- unsigned aniso_flag_offset = state->max_anisotropy > 1 ? 2 : 0;
+ unsigned max_aniso = rscreen->force_aniso >= 0 ? rscreen->force_aniso
+ : state->max_anisotropy;
+ unsigned max_aniso_ratio = r600_tex_aniso_filter(max_aniso);
if (!ss) {
return NULL;
@@ -574,10 +578,10 @@ static void *evergreen_create_sampler_state(struct pipe_context *ctx,
S_03C000_CLAMP_X(r600_tex_wrap(state->wrap_s)) |
S_03C000_CLAMP_Y(r600_tex_wrap(state->wrap_t)) |
S_03C000_CLAMP_Z(r600_tex_wrap(state->wrap_r)) |
- S_03C000_XY_MAG_FILTER(r600_tex_filter(state->mag_img_filter) | aniso_flag_offset) |
- S_03C000_XY_MIN_FILTER(r600_tex_filter(state->min_img_filter) | aniso_flag_offset) |
+ S_03C000_XY_MAG_FILTER(eg_tex_filter(state->mag_img_filter, max_aniso)) |
+ S_03C000_XY_MIN_FILTER(eg_tex_filter(state->min_img_filter, max_aniso)) |
S_03C000_MIP_FILTER(r600_tex_mipfilter(state->min_mip_filter)) |
- S_03C000_MAX_ANISO(r600_tex_aniso_filter(state->max_anisotropy)) |
+ S_03C000_MAX_ANISO_RATIO(max_aniso_ratio) |
S_03C000_DEPTH_COMPARE_FUNCTION(r600_tex_compare(state->compare_func)) |
S_03C000_BORDER_COLOR_TYPE(ss->border_color_use ? V_03C000_SQ_TEX_BORDER_COLOR_REGISTER : 0);
/* R_03C004_SQ_TEX_SAMPLER_WORD1_0 */
@@ -849,10 +853,12 @@ evergreen_create_sampler_view_custom(struct pipe_context *ctx,
view->tex_resource_words[5] |= S_030014_LAST_LEVEL(log_samples);
view->tex_resource_words[6] |= S_030018_FMASK_BANK_HEIGHT(fmask_bankh);
} else {
+ bool no_mip = first_level == last_level;
+
view->tex_resource_words[4] |= S_030010_BASE_LEVEL(first_level);
view->tex_resource_words[5] |= S_030014_LAST_LEVEL(last_level);
/* aniso max 16 samples */
- view->tex_resource_words[6] |= S_030018_MAX_ANISO(4);
+ view->tex_resource_words[6] |= S_030018_MAX_ANISO_RATIO(no_mip ? 0 : 4);
}
view->tex_resource_words[7] = S_03001C_DATA_FORMAT(format) |
@@ -919,60 +925,12 @@ static void evergreen_get_scissor_rect(struct r600_context *rctx,
unsigned tl_x, unsigned tl_y, unsigned br_x, unsigned br_y,
uint32_t *tl, uint32_t *br)
{
- /* EG hw workaround */
- if (br_x == 0)
- tl_x = 1;
- if (br_y == 0)
- tl_y = 1;
+ struct pipe_scissor_state scissor = {tl_x, tl_y, br_x, br_y};
- /* cayman hw workaround */
- if (rctx->b.chip_class == CAYMAN) {
- if (br_x == 1 && br_y == 1)
- br_x = 2;
- }
+ evergreen_apply_scissor_bug_workaround(&rctx->b, &scissor);
- *tl = S_028240_TL_X(tl_x) | S_028240_TL_Y(tl_y);
- *br = S_028244_BR_X(br_x) | S_028244_BR_Y(br_y);
-}
-
-static void evergreen_set_scissor_states(struct pipe_context *ctx,
- unsigned start_slot,
- unsigned num_scissors,
- const struct pipe_scissor_state *state)
-{
- struct r600_context *rctx = (struct r600_context *)ctx;
- struct r600_scissor_state *rstate = &rctx->scissor;
- int i;
-
- for (i = start_slot; i < start_slot + num_scissors; i++)
- rstate->scissor[i] = state[i - start_slot];
- rstate->dirty_mask |= ((1 << num_scissors) - 1) << start_slot;
- rstate->atom.num_dw = util_bitcount(rstate->dirty_mask) * 4;
- r600_mark_atom_dirty(rctx, &rstate->atom);
-}
-
-static void evergreen_emit_scissor_state(struct r600_context *rctx, struct r600_atom *atom)
-{
- struct radeon_winsys_cs *cs = rctx->b.gfx.cs;
- struct r600_scissor_state *rstate = &rctx->scissor;
- struct pipe_scissor_state *state;
- uint32_t dirty_mask;
- unsigned i, offset;
- uint32_t tl, br;
-
- dirty_mask = rstate->dirty_mask;
- while (dirty_mask != 0) {
- i = u_bit_scan(&dirty_mask);
- state = &rstate->scissor[i];
- evergreen_get_scissor_rect(rctx, state->minx, state->miny, state->maxx, state->maxy, &tl, &br);
-
- offset = i * 4 * 2;
- radeon_set_context_reg_seq(cs, R_028250_PA_SC_VPORT_SCISSOR_0_TL + offset, 2);
- radeon_emit(cs, tl);
- radeon_emit(cs, br);
- }
- rstate->dirty_mask = 0;
- rstate->atom.num_dw = 0;
+ *tl = S_028240_TL_X(scissor.minx) | S_028240_TL_Y(scissor.miny);
+ *br = S_028244_BR_X(scissor.maxx) | S_028244_BR_Y(scissor.maxy);
}
/**
@@ -1802,12 +1760,15 @@ static void evergreen_emit_db_misc_state(struct r600_context *rctx, struct r600_
S_02800C_FORCE_HIS_ENABLE0(V_02800C_FORCE_DISABLE) |
S_02800C_FORCE_HIS_ENABLE1(V_02800C_FORCE_DISABLE);
- if (a->occlusion_query_enabled) {
+ if (rctx->b.num_occlusion_queries > 0 &&
+ !a->occlusion_queries_disabled) {
db_count_control |= S_028004_PERFECT_ZPASS_COUNTS(1);
if (rctx->b.chip_class == CAYMAN) {
db_count_control |= S_028004_SAMPLE_RATE(a->log_samples);
}
db_render_override |= S_02800C_NOOP_CULL_DISABLE(1);
+ } else {
+ db_count_control |= S_028004_ZPASS_INCREMENT_DISABLE(1);
}
/* This is to fix a lockup when hyperz and alpha test are enabled at
@@ -2392,6 +2353,12 @@ static void cayman_init_atom_start_cs(struct r600_context *rctx)
r600_store_value(cb, PKT3(PKT3_EVENT_WRITE, 0, 0));
r600_store_value(cb, EVENT_TYPE(EVENT_TYPE_PS_PARTIAL_FLUSH) | EVENT_INDEX(4));
+ /* This enables pipeline stat & streamout queries.
+ * They are only disabled by blits.
+ */
+ r600_store_value(cb, PKT3(PKT3_EVENT_WRITE, 0, 0));
+ r600_store_value(cb, EVENT_TYPE(EVENT_TYPE_PIPELINESTAT_START) | EVENT_INDEX(0));
+
cayman_init_common_regs(cb, rctx->b.chip_class,
rctx->b.family, rctx->screen->b.info.drm_minor);
@@ -2474,12 +2441,6 @@ static void cayman_init_atom_start_cs(struct r600_context *rctx)
r600_store_context_reg(cb, R_028230_PA_SC_EDGERULE, 0xAAAAAAAA);
r600_store_context_reg(cb, R_028820_PA_CL_NANINF_CNTL, 0);
- r600_store_context_reg_seq(cb, CM_R_028BE8_PA_CL_GB_VERT_CLIP_ADJ, 4);
- r600_store_value(cb, fui(1.0)); /* CM_R_028BE8_PA_CL_GB_VERT_CLIP_ADJ */
- r600_store_value(cb, fui(1.0)); /* CM_R_028BEC_PA_CL_GB_VERT_DISC_ADJ */
- r600_store_value(cb, fui(1.0)); /* CM_R_028BF0_PA_CL_GB_HORZ_CLIP_ADJ */
- r600_store_value(cb, fui(1.0)); /* CM_R_028BF4_PA_CL_GB_HORZ_DISC_ADJ */
-
r600_store_context_reg_seq(cb, R_028240_PA_SC_GENERIC_SCISSOR_TL, 2);
r600_store_value(cb, 0); /* R_028240_PA_SC_GENERIC_SCISSOR_TL */
r600_store_value(cb, S_028244_BR_X(16384) | S_028244_BR_Y(16384)); /* R_028244_PA_SC_GENERIC_SCISSOR_BR */
@@ -2645,6 +2606,12 @@ void evergreen_init_atom_start_cs(struct r600_context *rctx)
r600_store_value(cb, PKT3(PKT3_EVENT_WRITE, 0, 0));
r600_store_value(cb, EVENT_TYPE(EVENT_TYPE_PS_PARTIAL_FLUSH) | EVENT_INDEX(4));
+ /* This enables pipeline stat & streamout queries.
+ * They are only disabled by blits.
+ */
+ r600_store_value(cb, PKT3(PKT3_EVENT_WRITE, 0, 0));
+ r600_store_value(cb, EVENT_TYPE(EVENT_TYPE_PIPELINESTAT_START) | EVENT_INDEX(0));
+
evergreen_init_common_regs(rctx, cb, rctx->b.chip_class,
rctx->b.family, rctx->screen->b.info.drm_minor);
@@ -2889,12 +2856,6 @@ void evergreen_init_atom_start_cs(struct r600_context *rctx)
r600_store_value(cb, 0); /* R_028AC4_DB_SRESULTS_COMPARE_STATE1 */
r600_store_value(cb, 0); /* R_028AC8_DB_PRELOAD_CONTROL */
- r600_store_context_reg_seq(cb, R_028C0C_PA_CL_GB_VERT_CLIP_ADJ, 4);
- r600_store_value(cb, fui(1.0)); /* R_028C0C_PA_CL_GB_VERT_CLIP_ADJ */
- r600_store_value(cb, fui(1.0)); /* R_028C10_PA_CL_GB_VERT_DISC_ADJ */
- r600_store_value(cb, fui(1.0)); /* R_028C14_PA_CL_GB_HORZ_CLIP_ADJ */
- r600_store_value(cb, fui(1.0)); /* R_028C18_PA_CL_GB_HORZ_DISC_ADJ */
-
r600_store_context_reg_seq(cb, R_028240_PA_SC_GENERIC_SCISSOR_TL, 2);
r600_store_value(cb, 0); /* R_028240_PA_SC_GENERIC_SCISSOR_TL */
r600_store_value(cb, S_028244_BR_X(16384) | S_028244_BR_Y(16384)); /* R_028244_PA_SC_GENERIC_SCISSOR_BR */
@@ -3696,8 +3657,8 @@ void evergreen_init_state_functions(struct r600_context *rctx)
r600_init_atom(rctx, &rctx->dsa_state.atom, id++, r600_emit_cso_state, 0);
r600_init_atom(rctx, &rctx->poly_offset_state.atom, id++, evergreen_emit_polygon_offset, 6);
r600_init_atom(rctx, &rctx->rasterizer_state.atom, id++, r600_emit_cso_state, 0);
- r600_init_atom(rctx, &rctx->scissor.atom, id++, evergreen_emit_scissor_state, 0);
- r600_init_atom(rctx, &rctx->viewport.atom, id++, r600_emit_viewport_state, 0);
+ r600_add_atom(rctx, &rctx->b.scissors.atom, id++);
+ r600_add_atom(rctx, &rctx->b.viewports.atom, id++);
r600_init_atom(rctx, &rctx->stencil_ref.atom, id++, r600_emit_stencil_ref, 4);
r600_init_atom(rctx, &rctx->vertex_fetch_shader.atom, id++, evergreen_emit_vertex_fetch_shader, 5);
r600_add_atom(rctx, &rctx->b.render_cond_atom, id++);
@@ -3716,7 +3677,6 @@ void evergreen_init_state_functions(struct r600_context *rctx)
rctx->b.b.set_framebuffer_state = evergreen_set_framebuffer_state;
rctx->b.b.set_polygon_stipple = evergreen_set_polygon_stipple;
rctx->b.b.set_min_samples = evergreen_set_min_samples;
- rctx->b.b.set_scissor_states = evergreen_set_scissor_states;
rctx->b.b.set_tess_state = evergreen_set_tess_state;
if (rctx->b.chip_class == EVERGREEN)
rctx->b.b.get_sample_position = evergreen_get_sample_position;
diff --git a/src/gallium/drivers/r600/evergreend.h b/src/gallium/drivers/r600/evergreend.h
index ebe8c4a65ba..ece421e3d33 100644
--- a/src/gallium/drivers/r600/evergreend.h
+++ b/src/gallium/drivers/r600/evergreend.h
@@ -1202,11 +1202,11 @@
#define G_030014_LAST_ARRAY(x) (((x) >> 17) & 0x1FFF)
#define C_030014_LAST_ARRAY 0xC001FFFF
#define R_030018_SQ_TEX_RESOURCE_WORD6_0 0x030018
-/* FMASK_BANK_HEIGHT and MAX_ANISO share the first two bits.
+/* FMASK_BANK_HEIGHT and MAX_ANISO_RATIO share the first two bits.
* The former is only used with MSAA textures. */
-#define S_030018_MAX_ANISO(x) (((x) & 0x7) << 0)
-#define G_030018_MAX_ANISO(x) (((x) >> 0) & 0x7)
-#define C_030018_MAX_ANISO 0xFFFFFFF8
+#define S_030018_MAX_ANISO_RATIO(x) (((x) & 0x7) << 0)
+#define G_030018_MAX_ANISO_RATIO(x) (((x) >> 0) & 0x7)
+#define C_030018_MAX_ANISO_RATIO 0xFFFFFFF8
#define S_030018_FMASK_BANK_HEIGHT(x) (((x) & 0x3) << 0)
#define S_030018_PERF_MODULATION(x) (((x) & 0x7) << 3)
#define G_030018_PERF_MODULATION(x) (((x) >> 3) & 0x7)
@@ -1344,9 +1344,9 @@
#define S_03C000_MIP_FILTER(x) (((x) & 0x3) << 15)
#define G_03C000_MIP_FILTER(x) (((x) >> 15) & 0x3)
#define C_03C000_MIP_FILTER 0xFFFE7FFF
-#define S_03C000_MAX_ANISO(x) (((x) & 0x7) << 17)
-#define G_03C000_MAX_ANISO(x) (((x) >> 17) & 0x7)
-#define C_03C000_MAX_ANISO 0xFFF1FFFF
+#define S_03C000_MAX_ANISO_RATIO(x) (((x) & 0x7) << 17)
+#define G_03C000_MAX_ANISO_RATIO(x) (((x) >> 17) & 0x7)
+#define C_03C000_MAX_ANISO_RATIO 0xFFF1FFFF
#define S_03C000_BORDER_COLOR_TYPE(x) (((x) & 0x3) << 20)
#define G_03C000_BORDER_COLOR_TYPE(x) (((x) >> 20) & 0x3)
#define C_03C000_BORDER_COLOR_TYPE 0xFFCFFFFF
@@ -1735,7 +1735,7 @@
#define S_028000_COPY_SAMPLE(x) (((x) & 0x7) << 8)
#define S_028000_COLOR_DISABLE(x) (((x) & 0x1) << 12)
#define R_028004_DB_COUNT_CONTROL 0x00028004
-#define S_028004_ZPASS_INCREMENT_DISABLE (((x) & 0x1) << 0)
+#define S_028004_ZPASS_INCREMENT_DISABLE(x) (((x) & 0x1) << 0)
#define S_028004_PERFECT_ZPASS_COUNTS(x) (((x) & 0x1) << 1)
#define S_028004_SAMPLE_RATE(x) (((x) & 0x7) << 4) /* cayman only */
#define R_028008_DB_DEPTH_VIEW 0x00028008
diff --git a/src/gallium/drivers/r600/r600_blit.c b/src/gallium/drivers/r600/r600_blit.c
index c52d5a9bad0..1a4cc425394 100644
--- a/src/gallium/drivers/r600/r600_blit.c
+++ b/src/gallium/drivers/r600/r600_blit.c
@@ -54,8 +54,6 @@ static void r600_blitter_begin(struct pipe_context *ctx, enum r600_blitter_op op
{
struct r600_context *rctx = (struct r600_context *)ctx;
- r600_suspend_nontimer_queries(&rctx->b);
-
util_blitter_save_vertex_buffer_slot(rctx->blitter, rctx->vertex_buffer_state.vb);
util_blitter_save_vertex_elements(rctx->blitter, rctx->vertex_fetch_shader.cso);
util_blitter_save_vertex_shader(rctx->blitter, rctx->vs_shader);
@@ -67,8 +65,8 @@ static void r600_blitter_begin(struct pipe_context *ctx, enum r600_blitter_op op
util_blitter_save_rasterizer(rctx->blitter, rctx->rasterizer_state.cso);
if (op & R600_SAVE_FRAGMENT_STATE) {
- util_blitter_save_viewport(rctx->blitter, &rctx->viewport.state[0]);
- util_blitter_save_scissor(rctx->blitter, &rctx->scissor.scissor[0]);
+ util_blitter_save_viewport(rctx->blitter, &rctx->b.viewports.states[0]);
+ util_blitter_save_scissor(rctx->blitter, &rctx->b.scissors.states[0]);
util_blitter_save_fragment_shader(rctx->blitter, rctx->ps_shader);
util_blitter_save_blend(rctx->blitter, rctx->blend_state.cso);
util_blitter_save_depth_stencil_alpha(rctx->blitter, rctx->dsa_state.cso);
@@ -98,7 +96,6 @@ static void r600_blitter_end(struct pipe_context *ctx)
struct r600_context *rctx = (struct r600_context *)ctx;
rctx->b.render_cond_force_off = false;
- r600_resume_nontimer_queries(&rctx->b);
}
static unsigned u_max_sample(struct pipe_resource *r)
@@ -584,7 +581,7 @@ static void r600_copy_global_buffer(struct pipe_context *ctx,
}
static void r600_clear_buffer(struct pipe_context *ctx, struct pipe_resource *dst,
- unsigned offset, unsigned size, unsigned value,
+ uint64_t offset, uint64_t size, unsigned value,
bool is_framebuffer)
{
struct r600_context *rctx = (struct r600_context*)ctx;
diff --git a/src/gallium/drivers/r600/r600_hw_context.c b/src/gallium/drivers/r600/r600_hw_context.c
index 7a6f957945b..2bc6d3ffce4 100644
--- a/src/gallium/drivers/r600/r600_hw_context.c
+++ b/src/gallium/drivers/r600/r600_hw_context.c
@@ -64,9 +64,8 @@ void r600_need_cs_space(struct r600_context *ctx, unsigned num_dw,
num_dw += R600_MAX_FLUSH_CS_DWORDS + R600_MAX_DRAW_CS_DWORDS;
}
- /* Count in queries_suspend. */
- num_dw += ctx->b.num_cs_dw_nontimer_queries_suspend +
- ctx->b.num_cs_dw_timer_queries_suspend;
+ /* Count in r600_suspend_queries. */
+ num_dw += ctx->b.num_cs_dw_queries_suspend;
/* Count in streamout_end at the end of CS. */
if (ctx->b.streamout.begin_emitted) {
@@ -223,6 +222,16 @@ void r600_flush_emit(struct r600_context *rctx)
cs->buf[cs->cdw++] = 0x0000000A; /* POLL_INTERVAL */
}
+ if (rctx->b.flags & R600_CONTEXT_START_PIPELINE_STATS) {
+ radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
+ radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_PIPELINESTAT_START) |
+ EVENT_INDEX(0));
+ } else if (rctx->b.flags & R600_CONTEXT_STOP_PIPELINE_STATS) {
+ radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
+ radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_PIPELINESTAT_STOP) |
+ EVENT_INDEX(0));
+ }
+
if (wait_until) {
/* Use of WAIT_UNTIL is deprecated on Cayman+ */
if (rctx->b.family < CHIP_CAYMAN) {
@@ -295,12 +304,10 @@ void r600_begin_new_cs(struct r600_context *ctx)
r600_mark_atom_dirty(ctx, &ctx->poly_offset_state.atom);
r600_mark_atom_dirty(ctx, &ctx->vgt_state.atom);
r600_mark_atom_dirty(ctx, &ctx->sample_mask.atom);
- ctx->scissor.dirty_mask = (1 << R600_MAX_VIEWPORTS) - 1;
- ctx->scissor.atom.num_dw = R600_MAX_VIEWPORTS * 4;
- r600_mark_atom_dirty(ctx, &ctx->scissor.atom);
- ctx->viewport.dirty_mask = (1 << R600_MAX_VIEWPORTS) - 1;
- ctx->viewport.atom.num_dw = R600_MAX_VIEWPORTS * 8;
- r600_mark_atom_dirty(ctx, &ctx->viewport.atom);
+ ctx->b.scissors.dirty_mask = (1 << R600_MAX_VIEWPORTS) - 1;
+ r600_mark_atom_dirty(ctx, &ctx->b.scissors.atom);
+ ctx->b.viewports.dirty_mask = (1 << R600_MAX_VIEWPORTS) - 1;
+ r600_mark_atom_dirty(ctx, &ctx->b.viewports.atom);
if (ctx->b.chip_class <= EVERGREEN) {
r600_mark_atom_dirty(ctx, &ctx->config_state.atom);
}
diff --git a/src/gallium/drivers/r600/r600_pipe.c b/src/gallium/drivers/r600/r600_pipe.c
index 36b808fbbca..c594f5cb18b 100644
--- a/src/gallium/drivers/r600/r600_pipe.c
+++ b/src/gallium/drivers/r600/r600_pipe.c
@@ -365,6 +365,7 @@ static int r600_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
case PIPE_CAP_GENERATE_MIPMAP:
case PIPE_CAP_STRING_MARKER:
case PIPE_CAP_QUERY_BUFFER_OBJECT:
+ case PIPE_CAP_ROBUST_BUFFER_ACCESS_BEHAVIOR:
return 0;
case PIPE_CAP_MAX_SHADER_PATCH_VARYINGS:
diff --git a/src/gallium/drivers/r600/r600_pipe.h b/src/gallium/drivers/r600/r600_pipe.h
index cd0052a519f..6c2a48ca412 100644
--- a/src/gallium/drivers/r600/r600_pipe.h
+++ b/src/gallium/drivers/r600/r600_pipe.h
@@ -38,8 +38,6 @@
#define R600_NUM_ATOMS 52
-#define R600_MAX_VIEWPORTS 16
-
/* read caches */
#define R600_CONTEXT_INV_VERTEX_CACHE (R600_CONTEXT_PRIVATE_FLAG << 0)
#define R600_CONTEXT_INV_TEX_CACHE (R600_CONTEXT_PRIVATE_FLAG << 1)
@@ -56,7 +54,7 @@
#define R600_CONTEXT_WAIT_CP_DMA_IDLE (R600_CONTEXT_PRIVATE_FLAG << 10)
/* the number of CS dwords for flushing and drawing */
-#define R600_MAX_FLUSH_CS_DWORDS 16
+#define R600_MAX_FLUSH_CS_DWORDS 18
#define R600_MAX_DRAW_CS_DWORDS 58
#define R600_MAX_USER_CONST_BUFFERS 13
@@ -120,7 +118,7 @@ struct r600_db_state {
struct r600_db_misc_state {
struct r600_atom atom;
- bool occlusion_query_enabled;
+ bool occlusion_queries_disabled;
bool flush_depthstencil_through_cb;
bool flush_depth_inplace;
bool flush_stencil_inplace;
@@ -221,12 +219,6 @@ struct r600_stencil_ref_state {
struct pipe_stencil_ref pipe_state;
};
-struct r600_viewport_state {
- struct r600_atom atom;
- struct pipe_viewport_state state[R600_MAX_VIEWPORTS];
- uint32_t dirty_mask;
-};
-
struct r600_shader_stages_state {
struct r600_atom atom;
unsigned geom_enable;
@@ -412,14 +404,6 @@ struct r600_cso_state
struct r600_command_buffer *cb;
};
-struct r600_scissor_state
-{
- struct r600_atom atom;
- struct pipe_scissor_state scissor[R600_MAX_VIEWPORTS];
- uint32_t dirty_mask;
- bool enable; /* r6xx only */
-};
-
struct r600_fetch_shader {
struct r600_resource *buffer;
unsigned offset;
@@ -480,12 +464,10 @@ struct r600_context {
struct r600_poly_offset_state poly_offset_state;
struct r600_cso_state rasterizer_state;
struct r600_sample_mask sample_mask;
- struct r600_scissor_state scissor;
struct r600_seamless_cube_map seamless_cube_map;
struct r600_config_state config_state;
struct r600_stencil_ref_state stencil_ref;
struct r600_vgt_state vgt_state;
- struct r600_viewport_state viewport;
/* Shaders and shader resources. */
struct r600_cso_state vertex_fetch_shader;
struct r600_shader_state hw_shader_stages[EG_NUM_HW_STAGES];
@@ -730,7 +712,6 @@ void r600_emit_blend_color(struct r600_context *rctx, struct r600_atom *atom);
void r600_emit_vgt_state(struct r600_context *rctx, struct r600_atom *atom);
void r600_emit_clip_misc_state(struct r600_context *rctx, struct r600_atom *atom);
void r600_emit_stencil_ref(struct r600_context *rctx, struct r600_atom *atom);
-void r600_emit_viewport_state(struct r600_context *rctx, struct r600_atom *atom);
void r600_emit_shader(struct r600_context *rctx, struct r600_atom *a);
void r600_add_atom(struct r600_context *rctx, struct r600_atom *atom, unsigned id);
void r600_init_atom(struct r600_context *rctx, struct r600_atom *atom, unsigned id,
@@ -746,7 +727,6 @@ void r600_set_sample_locations_constant_buffer(struct r600_context *rctx);
uint32_t r600_translate_stencil_op(int s_op);
uint32_t r600_translate_fill(uint32_t func);
unsigned r600_tex_wrap(unsigned wrap);
-unsigned r600_tex_filter(unsigned filter);
unsigned r600_tex_mipfilter(unsigned filter);
unsigned r600_tex_compare(unsigned compare);
bool sampler_state_needs_border_color(const struct pipe_sampler_state *state);
diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c
index 3189a1360b1..91e747fa937 100644
--- a/src/gallium/drivers/r600/r600_state.c
+++ b/src/gallium/drivers/r600/r600_state.c
@@ -457,6 +457,7 @@ static void *r600_create_rs_state(struct pipe_context *ctx,
r600_init_command_buffer(&rs->buffer, 30);
+ rs->scissor_enable = state->scissor;
rs->flatshade = state->flatshade;
rs->sprite_coord_enable = state->sprite_coord_enable;
rs->two_side = state->light_twoside;
@@ -501,10 +502,9 @@ static void *r600_create_rs_state(struct pipe_context *ctx,
if (rctx->b.chip_class >= R700) {
sc_mode_cntl |= S_028A4C_FORCE_EOV_REZ_ENABLE(1) |
S_028A4C_R700_ZMM_LINE_OFFSET(1) |
- S_028A4C_R700_VPORT_SCISSOR_ENABLE(state->scissor);
+ S_028A4C_R700_VPORT_SCISSOR_ENABLE(1);
} else {
sc_mode_cntl |= S_028A4C_WALK_ALIGN8_PRIM_FITS_ST(1);
- rs->scissor_enable = state->scissor;
}
spi_interp = S_0286D4_FLAT_SHADE_ENA(1);
@@ -558,11 +558,24 @@ static void *r600_create_rs_state(struct pipe_context *ctx,
return rs;
}
+static unsigned r600_tex_filter(unsigned filter, unsigned max_aniso)
+{
+ if (filter == PIPE_TEX_FILTER_LINEAR)
+ return max_aniso > 1 ? V_03C000_SQ_TEX_XY_FILTER_ANISO_BILINEAR
+ : V_03C000_SQ_TEX_XY_FILTER_BILINEAR;
+ else
+ return max_aniso > 1 ? V_03C000_SQ_TEX_XY_FILTER_ANISO_POINT
+ : V_03C000_SQ_TEX_XY_FILTER_POINT;
+}
+
static void *r600_create_sampler_state(struct pipe_context *ctx,
const struct pipe_sampler_state *state)
{
+ struct r600_common_screen *rscreen = (struct r600_common_screen*)ctx->screen;
struct r600_pipe_sampler_state *ss = CALLOC_STRUCT(r600_pipe_sampler_state);
- unsigned aniso_flag_offset = state->max_anisotropy > 1 ? 4 : 0;
+ unsigned max_aniso = rscreen->force_aniso >= 0 ? rscreen->force_aniso
+ : state->max_anisotropy;
+ unsigned max_aniso_ratio = r600_tex_aniso_filter(max_aniso);
if (!ss) {
return NULL;
@@ -576,10 +589,10 @@ static void *r600_create_sampler_state(struct pipe_context *ctx,
S_03C000_CLAMP_X(r600_tex_wrap(state->wrap_s)) |
S_03C000_CLAMP_Y(r600_tex_wrap(state->wrap_t)) |
S_03C000_CLAMP_Z(r600_tex_wrap(state->wrap_r)) |
- S_03C000_XY_MAG_FILTER(r600_tex_filter(state->mag_img_filter) | aniso_flag_offset) |
- S_03C000_XY_MIN_FILTER(r600_tex_filter(state->min_img_filter) | aniso_flag_offset) |
+ S_03C000_XY_MAG_FILTER(r600_tex_filter(state->mag_img_filter, max_aniso)) |
+ S_03C000_XY_MIN_FILTER(r600_tex_filter(state->min_img_filter, max_aniso)) |
S_03C000_MIP_FILTER(r600_tex_mipfilter(state->min_mip_filter)) |
- S_03C000_MAX_ANISO(r600_tex_aniso_filter(state->max_anisotropy)) |
+ S_03C000_MAX_ANISO_RATIO(max_aniso_ratio) |
S_03C000_DEPTH_COMPARE_FUNCTION(r600_tex_compare(state->compare_func)) |
S_03C000_BORDER_COLOR_TYPE(ss->border_color_use ? V_03C000_SQ_TEX_BORDER_COLOR_REGISTER : 0);
/* R_03C004_SQ_TEX_SAMPLER_WORD1_0 */
@@ -777,61 +790,6 @@ static void r600_set_polygon_stipple(struct pipe_context *ctx,
{
}
-static void r600_emit_scissor_state(struct r600_context *rctx, struct r600_atom *atom)
-{
- struct radeon_winsys_cs *cs = rctx->b.gfx.cs;
- struct r600_scissor_state *rstate = &rctx->scissor;
- struct pipe_scissor_state *state;
- bool do_disable_workaround = false;
- uint32_t dirty_mask;
- unsigned i, offset;
- uint32_t tl, br;
-
- if (rctx->b.chip_class == R600 && !rctx->scissor.enable) {
- tl = S_028240_TL_X(0) | S_028240_TL_Y(0) | S_028240_WINDOW_OFFSET_DISABLE(1);
- br = S_028244_BR_X(8192) | S_028244_BR_Y(8192);
- do_disable_workaround = true;
- }
-
- dirty_mask = rstate->dirty_mask;
- while (dirty_mask != 0)
- {
- i = u_bit_scan(&dirty_mask);
- offset = i * 4 * 2;
- radeon_set_context_reg_seq(cs, R_028250_PA_SC_VPORT_SCISSOR_0_TL + offset, 2);
- if (!do_disable_workaround) {
- state = &rstate->scissor[i];
- tl = S_028240_TL_X(state->minx) | S_028240_TL_Y(state->miny) |
- S_028240_WINDOW_OFFSET_DISABLE(1);
- br = S_028244_BR_X(state->maxx) | S_028244_BR_Y(state->maxy);
- }
- radeon_emit(cs, tl);
- radeon_emit(cs, br);
- }
- rstate->dirty_mask = 0;
- rstate->atom.num_dw = 0;
-}
-
-static void r600_set_scissor_states(struct pipe_context *ctx,
- unsigned start_slot,
- unsigned num_scissors,
- const struct pipe_scissor_state *state)
-{
- struct r600_context *rctx = (struct r600_context *)ctx;
- struct r600_scissor_state *rstate = &rctx->scissor;
- int i;
-
- for (i = start_slot ; i < start_slot + num_scissors; i++)
- rstate->scissor[i] = state[i - start_slot];
- rstate->dirty_mask |= ((1 << num_scissors) - 1) << start_slot;
- rstate->atom.num_dw = util_bitcount(rstate->dirty_mask) * 4;
-
- if (rctx->b.chip_class == R600 && !rstate->enable)
- return;
-
- r600_mark_atom_dirty(rctx, &rstate->atom);
-}
-
static struct r600_resource *r600_buffer_create_helper(struct r600_screen *rscreen,
unsigned size, unsigned alignment)
{
@@ -1644,12 +1602,16 @@ static void r600_emit_db_misc_state(struct r600_context *rctx, struct r600_atom
}
}
- if (a->occlusion_query_enabled) {
+ if (rctx->b.num_occlusion_queries > 0 &&
+ !a->occlusion_queries_disabled) {
if (rctx->b.chip_class >= R700) {
db_render_control |= S_028D0C_R700_PERFECT_ZPASS_COUNTS(1);
}
db_render_override |= S_028D10_NOOP_CULL_DISABLE(1);
+ } else {
+ db_render_control |= S_028D0C_ZPASS_INCREMENT_DISABLE(1);
}
+
if (rctx->db_state.rsurf && rctx->db_state.rsurf->db_htile_surface) {
/* FORCE_OFF means HiZ/HiS are determined by DB_SHADER_CONTROL */
db_render_override |= S_028D10_FORCE_HIZ_ENABLE(V_028D10_FORCE_OFF);
@@ -2173,6 +2135,12 @@ void r600_init_atom_start_cs(struct r600_context *rctx)
r600_store_value(cb, PKT3(PKT3_EVENT_WRITE, 0, 0));
r600_store_value(cb, EVENT_TYPE(EVENT_TYPE_PS_PARTIAL_FLUSH) | EVENT_INDEX(4));
+ /* This enables pipeline stat & streamout queries.
+ * They are only disabled by blits.
+ */
+ r600_store_value(cb, PKT3(PKT3_EVENT_WRITE, 0, 0));
+ r600_store_value(cb, EVENT_TYPE(EVENT_TYPE_PIPELINESTAT_START) | EVENT_INDEX(0));
+
family = rctx->b.family;
ps_prio = 0;
vs_prio = 1;
@@ -2424,12 +2392,6 @@ void r600_init_atom_start_cs(struct r600_context *rctx)
r600_store_context_reg(cb, R_028820_PA_CL_NANINF_CNTL, 0);
r600_store_context_reg(cb, R_028A48_PA_SC_MPASS_PS_CNTL, 0);
- r600_store_context_reg_seq(cb, R_028C0C_PA_CL_GB_VERT_CLIP_ADJ, 4);
- r600_store_value(cb, fui(1.0)); /* R_028C0C_PA_CL_GB_VERT_CLIP_ADJ */
- r600_store_value(cb, fui(1.0)); /* R_028C10_PA_CL_GB_VERT_DISC_ADJ */
- r600_store_value(cb, fui(1.0)); /* R_028C14_PA_CL_GB_HORZ_CLIP_ADJ */
- r600_store_value(cb, fui(1.0)); /* R_028C18_PA_CL_GB_HORZ_DISC_ADJ */
-
r600_store_context_reg_seq(cb, R_0282D0_PA_SC_VPORT_ZMIN_0, 2 * R600_MAX_VIEWPORTS);
for (tmp = 0; tmp < R600_MAX_VIEWPORTS; tmp++) {
r600_store_value(cb, 0); /* R_0282D0_PA_SC_VPORT_ZMIN_0 */
@@ -3132,8 +3094,8 @@ void r600_init_state_functions(struct r600_context *rctx)
r600_init_atom(rctx, &rctx->dsa_state.atom, id++, r600_emit_cso_state, 0);
r600_init_atom(rctx, &rctx->poly_offset_state.atom, id++, r600_emit_polygon_offset, 6);
r600_init_atom(rctx, &rctx->rasterizer_state.atom, id++, r600_emit_cso_state, 0);
- r600_init_atom(rctx, &rctx->scissor.atom, id++, r600_emit_scissor_state, 0);
- r600_init_atom(rctx, &rctx->viewport.atom, id++, r600_emit_viewport_state, 0);
+ r600_add_atom(rctx, &rctx->b.scissors.atom, id++);
+ r600_add_atom(rctx, &rctx->b.viewports.atom, id++);
r600_init_atom(rctx, &rctx->config_state.atom, id++, r600_emit_config_state, 3);
r600_init_atom(rctx, &rctx->stencil_ref.atom, id++, r600_emit_stencil_ref, 4);
r600_init_atom(rctx, &rctx->vertex_fetch_shader.atom, id++, r600_emit_vertex_fetch_shader, 5);
@@ -3153,7 +3115,6 @@ void r600_init_state_functions(struct r600_context *rctx)
rctx->b.b.set_framebuffer_state = r600_set_framebuffer_state;
rctx->b.b.set_polygon_stipple = r600_set_polygon_stipple;
rctx->b.b.set_min_samples = r600_set_min_samples;
- rctx->b.b.set_scissor_states = r600_set_scissor_states;
rctx->b.b.get_sample_position = r600_get_sample_position;
rctx->b.dma_copy = r600_dma_copy;
}
diff --git a/src/gallium/drivers/r600/r600_state_common.c b/src/gallium/drivers/r600/r600_state_common.c
index df41d3f028d..cb40c20a7dd 100644
--- a/src/gallium/drivers/r600/r600_state_common.c
+++ b/src/gallium/drivers/r600/r600_state_common.c
@@ -364,14 +364,7 @@ static void r600_bind_rs_state(struct pipe_context *ctx, void *state)
r600_mark_atom_dirty(rctx, &rctx->clip_misc_state.atom);
}
- /* Workaround for a missing scissor enable on r600. */
- if (rctx->b.chip_class == R600 &&
- rs->scissor_enable != rctx->scissor.enable) {
- rctx->scissor.enable = rs->scissor_enable;
- rctx->scissor.dirty_mask = (1 << R600_MAX_VIEWPORTS) - 1;
- rctx->scissor.atom.num_dw = R600_MAX_VIEWPORTS * 4;
- r600_mark_atom_dirty(rctx, &rctx->scissor.atom);
- }
+ r600_set_scissor_enable(&rctx->b, rs->scissor_enable);
/* Re-emit PA_SC_LINE_STIPPLE. */
rctx->last_primitive_type = -1;
@@ -713,47 +706,6 @@ static void r600_update_compressed_colortex_mask(struct r600_samplerview_state *
}
}
-static void r600_set_viewport_states(struct pipe_context *ctx,
- unsigned start_slot,
- unsigned num_viewports,
- const struct pipe_viewport_state *state)
-{
- struct r600_context *rctx = (struct r600_context *)ctx;
- struct r600_viewport_state *rstate = &rctx->viewport;
- int i;
-
- for (i = start_slot; i < start_slot + num_viewports; i++)
- rstate->state[i] = state[i - start_slot];
- rstate->dirty_mask |= ((1 << num_viewports) - 1) << start_slot;
- rstate->atom.num_dw = util_bitcount(rstate->dirty_mask) * 8;
- r600_mark_atom_dirty(rctx, &rctx->viewport.atom);
-}
-
-void r600_emit_viewport_state(struct r600_context *rctx, struct r600_atom *atom)
-{
- struct radeon_winsys_cs *cs = rctx->b.gfx.cs;
- struct r600_viewport_state *rstate = &rctx->viewport;
- struct pipe_viewport_state *state;
- uint32_t dirty_mask;
- unsigned i, offset;
-
- dirty_mask = rstate->dirty_mask;
- while (dirty_mask != 0) {
- i = u_bit_scan(&dirty_mask);
- offset = i * 6 * 4;
- radeon_set_context_reg_seq(cs, R_02843C_PA_CL_VPORT_XSCALE_0 + offset, 6);
- state = &rstate->state[i];
- radeon_emit(cs, fui(state->scale[0])); /* R_02843C_PA_CL_VPORT_XSCALE_0 */
- radeon_emit(cs, fui(state->translate[0])); /* R_028440_PA_CL_VPORT_XOFFSET_0 */
- radeon_emit(cs, fui(state->scale[1])); /* R_028444_PA_CL_VPORT_YSCALE_0 */
- radeon_emit(cs, fui(state->translate[1])); /* R_028448_PA_CL_VPORT_YOFFSET_0 */
- radeon_emit(cs, fui(state->scale[2])); /* R_02844C_PA_CL_VPORT_ZSCALE_0 */
- radeon_emit(cs, fui(state->translate[2])); /* R_028450_PA_CL_VPORT_ZOFFSET_0 */
- }
- rstate->dirty_mask = 0;
- rstate->atom.num_dw = 0;
-}
-
/* Compute the key for the hw shader variant */
static inline union r600_shader_key r600_shader_selector_key(struct pipe_context * ctx,
struct r600_pipe_shader_selector * sel)
@@ -961,6 +913,18 @@ static void r600_bind_ps_state(struct pipe_context *ctx, void *state)
rctx->ps_shader = (struct r600_pipe_shader_selector *)state;
}
+static struct tgsi_shader_info *r600_get_vs_info(struct r600_context *rctx)
+{
+ if (rctx->gs_shader)
+ return &rctx->gs_shader->info;
+ else if (rctx->tes_shader)
+ return &rctx->tes_shader->info;
+ else if (rctx->vs_shader)
+ return &rctx->vs_shader->info;
+ else
+ return NULL;
+}
+
static void r600_bind_vs_state(struct pipe_context *ctx, void *state)
{
struct r600_context *rctx = (struct r600_context *)ctx;
@@ -969,6 +933,7 @@ static void r600_bind_vs_state(struct pipe_context *ctx, void *state)
return;
rctx->vs_shader = (struct r600_pipe_shader_selector *)state;
+ r600_update_vs_writes_viewport_index(&rctx->b, r600_get_vs_info(rctx));
rctx->b.streamout.stride_in_dw = rctx->vs_shader->so.stride;
}
@@ -977,6 +942,7 @@ static void r600_bind_gs_state(struct pipe_context *ctx, void *state)
struct r600_context *rctx = (struct r600_context *)ctx;
rctx->gs_shader = (struct r600_pipe_shader_selector *)state;
+ r600_update_vs_writes_viewport_index(&rctx->b, r600_get_vs_info(rctx));
if (!state)
return;
@@ -995,6 +961,7 @@ static void r600_bind_tes_state(struct pipe_context *ctx, void *state)
struct r600_context *rctx = (struct r600_context *)ctx;
rctx->tes_shader = (struct r600_pipe_shader_selector *)state;
+ r600_update_vs_writes_viewport_index(&rctx->b, r600_get_vs_info(rctx));
if (!state)
return;
@@ -1841,8 +1808,7 @@ static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info
ia_switch_on_eop = true;
}
- if (rctx->b.streamout.streamout_enabled ||
- rctx->b.streamout.prims_gen_query_enabled)
+ if (r600_get_strmout_en(&rctx->b))
partial_vs_wave = true;
radeon_set_context_reg(cs, CM_R_028AA8_IA_MULTI_VGT_PARAM,
@@ -2018,7 +1984,7 @@ static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info
rctx->b.family == CHIP_RV635) {
/* if we have gs shader or streamout
we need to do a wait idle after every draw */
- if (rctx->gs_shader || rctx->b.streamout.streamout_enabled) {
+ if (rctx->gs_shader || r600_get_strmout_en(&rctx->b)) {
radeon_set_config_reg(cs, R_008040_WAIT_UNTIL, S_008040_WAIT_3D_IDLE(1));
}
}
@@ -2123,17 +2089,6 @@ unsigned r600_tex_wrap(unsigned wrap)
}
}
-unsigned r600_tex_filter(unsigned filter)
-{
- switch (filter) {
- default:
- case PIPE_TEX_FILTER_NEAREST:
- return V_03C000_SQ_TEX_XY_FILTER_POINT;
- case PIPE_TEX_FILTER_LINEAR:
- return V_03C000_SQ_TEX_XY_FILTER_BILINEAR;
- }
-}
-
unsigned r600_tex_mipfilter(unsigned filter)
{
switch (filter) {
@@ -2861,16 +2816,33 @@ static void r600_invalidate_buffer(struct pipe_context *ctx, struct pipe_resourc
}
}
-static void r600_set_occlusion_query_state(struct pipe_context *ctx, bool enable)
+static void r600_set_active_query_state(struct pipe_context *ctx, boolean enable)
{
struct r600_context *rctx = (struct r600_context*)ctx;
- if (rctx->db_misc_state.occlusion_query_enabled != enable) {
- rctx->db_misc_state.occlusion_query_enabled = enable;
+ /* Pipeline stat & streamout queries. */
+ if (enable) {
+ rctx->b.flags &= ~R600_CONTEXT_STOP_PIPELINE_STATS;
+ rctx->b.flags |= R600_CONTEXT_START_PIPELINE_STATS;
+ } else {
+ rctx->b.flags &= ~R600_CONTEXT_START_PIPELINE_STATS;
+ rctx->b.flags |= R600_CONTEXT_STOP_PIPELINE_STATS;
+ }
+
+ /* Occlusion queries. */
+ if (rctx->db_misc_state.occlusion_queries_disabled != !enable) {
+ rctx->db_misc_state.occlusion_queries_disabled = !enable;
r600_mark_atom_dirty(rctx, &rctx->db_misc_state.atom);
}
}
+static void r600_set_occlusion_query_state(struct pipe_context *ctx, bool enable)
+{
+ struct r600_context *rctx = (struct r600_context*)ctx;
+
+ r600_mark_atom_dirty(rctx, &rctx->db_misc_state.atom);
+}
+
static void r600_need_gfx_cs_space(struct pipe_context *ctx, unsigned num_dw,
bool include_draw_vbo)
{
@@ -2911,13 +2883,13 @@ void r600_init_common_state_functions(struct r600_context *rctx)
rctx->b.b.set_constant_buffer = r600_set_constant_buffer;
rctx->b.b.set_sample_mask = r600_set_sample_mask;
rctx->b.b.set_stencil_ref = r600_set_pipe_stencil_ref;
- rctx->b.b.set_viewport_states = r600_set_viewport_states;
rctx->b.b.set_vertex_buffers = r600_set_vertex_buffers;
rctx->b.b.set_index_buffer = r600_set_index_buffer;
rctx->b.b.set_sampler_views = r600_set_sampler_views;
rctx->b.b.sampler_view_destroy = r600_sampler_view_destroy;
rctx->b.b.texture_barrier = r600_texture_barrier;
rctx->b.b.set_stream_output_targets = r600_set_streamout_targets;
+ rctx->b.b.set_active_query_state = r600_set_active_query_state;
rctx->b.b.draw_vbo = r600_draw_vbo;
rctx->b.invalidate_buffer = r600_invalidate_buffer;
rctx->b.set_occlusion_query_state = r600_set_occlusion_query_state;
diff --git a/src/gallium/drivers/r600/r600d.h b/src/gallium/drivers/r600/r600d.h
index 3d223edb5f4..ecabb340a9c 100644
--- a/src/gallium/drivers/r600/r600d.h
+++ b/src/gallium/drivers/r600/r600d.h
@@ -780,7 +780,8 @@
#define S_028D0C_STENCIL_COMPRESS_DISABLE(x) (((x) & 0x1) << 5)
#define S_028D0C_DEPTH_COMPRESS_DISABLE(x) (((x) & 0x1) << 6)
#define S_028D0C_COPY_CENTROID(x) (((x) & 0x1) << 7)
-#define S_028D0C_COPY_SAMPLE(x) (((x) & 0x1) << 8)
+#define S_028D0C_COPY_SAMPLE(x) (((x) & 0x03) << 8)
+#define S_028D0C_ZPASS_INCREMENT_DISABLE(x) (((x) & 0x1) << 11)
#define S_028D0C_R700_PERFECT_ZPASS_COUNTS(x) (((x) & 0x1) << 15)
#define S_028D0C_CONSERVATIVE_Z_EXPORT(x) (((x) & 0x03) << 13)
#define G_028D0C_CONSERVATIVE_Z_EXPORT(x) (((x) >> 13) & 0x03)
@@ -1266,6 +1267,8 @@
#define V_03C000_SQ_TEX_XY_FILTER_POINT 0x00000000
#define V_03C000_SQ_TEX_XY_FILTER_BILINEAR 0x00000001
#define V_03C000_SQ_TEX_XY_FILTER_BICUBIC 0x00000002
+#define V_03C000_SQ_TEX_XY_FILTER_ANISO_POINT 0x00000004
+#define V_03C000_SQ_TEX_XY_FILTER_ANISO_BILINEAR 0x00000005
#define S_03C000_XY_MIN_FILTER(x) (((x) & 0x7) << 12)
#define G_03C000_XY_MIN_FILTER(x) (((x) >> 12) & 0x7)
#define C_03C000_XY_MIN_FILTER 0xFFFF8FFF
@@ -1278,9 +1281,9 @@
#define S_03C000_MIP_FILTER(x) (((x) & 0x3) << 17)
#define G_03C000_MIP_FILTER(x) (((x) >> 17) & 0x3)
#define C_03C000_MIP_FILTER 0xFFF9FFFF
-#define S_03C000_MAX_ANISO(x) (((x) & 0x7) << 19)
-#define G_03C000_MAX_ANISO(x) (((x) >> 19) & 0x7)
-#define C_03C000_MAX_ANISO 0xFFB7FFFF
+#define S_03C000_MAX_ANISO_RATIO(x) (((x) & 0x7) << 19)
+#define G_03C000_MAX_ANISO_RATIO(x) (((x) >> 19) & 0x7)
+#define C_03C000_MAX_ANISO_RATIO 0xFFB7FFFF
#define S_03C000_BORDER_COLOR_TYPE(x) (((x) & 0x3) << 22)
#define G_03C000_BORDER_COLOR_TYPE(x) (((x) >> 22) & 0x3)
#define C_03C000_BORDER_COLOR_TYPE 0xFF3FFFFF
diff --git a/src/gallium/drivers/radeon/Makefile.sources b/src/gallium/drivers/radeon/Makefile.sources
index eb171f7da5f..f993d75a6ad 100644
--- a/src/gallium/drivers/radeon/Makefile.sources
+++ b/src/gallium/drivers/radeon/Makefile.sources
@@ -11,6 +11,7 @@ C_SOURCES := \
r600_query.h \
r600_streamout.c \
r600_texture.c \
+ r600_viewport.c \
radeon_uvd.c \
radeon_uvd.h \
radeon_vce_40_2_2.c \
diff --git a/src/gallium/drivers/radeon/r600_buffer_common.c b/src/gallium/drivers/radeon/r600_buffer_common.c
index 33ba0fbca9b..47514e91d23 100644
--- a/src/gallium/drivers/radeon/r600_buffer_common.c
+++ b/src/gallium/drivers/radeon/r600_buffer_common.c
@@ -102,7 +102,7 @@ void *r600_buffer_map_sync_with_rings(struct r600_common_context *ctx,
bool r600_init_resource(struct r600_common_screen *rscreen,
struct r600_resource *res,
- unsigned size, unsigned alignment,
+ uint64_t size, unsigned alignment,
bool use_reusable_pool)
{
struct r600_texture *rtex = (struct r600_texture*)res;
@@ -160,9 +160,18 @@ bool r600_init_resource(struct r600_common_screen *rscreen,
rtex->surface.level[0].mode >= RADEON_SURF_MODE_1D) {
res->domains = RADEON_DOMAIN_VRAM;
flags &= ~RADEON_FLAG_CPU_ACCESS;
- flags |= RADEON_FLAG_NO_CPU_ACCESS;
+ flags |= RADEON_FLAG_NO_CPU_ACCESS |
+ RADEON_FLAG_GTT_WC;
}
+ /* If VRAM is just stolen system memory, allow both VRAM and GTT,
+ * whichever has free space. If a buffer is evicted from VRAM to GTT,
+ * it will stay there.
+ */
+ if (!rscreen->info.has_dedicated_vram &&
+ res->domains == RADEON_DOMAIN_VRAM)
+ res->domains = RADEON_DOMAIN_VRAM_GTT;
+
if (rscreen->debug_flags & DBG_NO_WC)
flags &= ~RADEON_FLAG_GTT_WC;
@@ -192,7 +201,7 @@ bool r600_init_resource(struct r600_common_screen *rscreen,
res->TC_L2_dirty = false;
if (rscreen->debug_flags & DBG_VM && res->b.b.target == PIPE_BUFFER) {
- fprintf(stderr, "VM start=0x%"PRIX64" end=0x%"PRIX64" | Buffer %u bytes\n",
+ fprintf(stderr, "VM start=0x%"PRIX64" end=0x%"PRIX64" | Buffer %"PRIu64" bytes\n",
res->gpu_address, res->gpu_address + res->buf->size,
res->buf->size);
}
diff --git a/src/gallium/drivers/radeon/r600_perfcounter.c b/src/gallium/drivers/radeon/r600_perfcounter.c
index f3529a1fe0f..9ab17d9e04c 100644
--- a/src/gallium/drivers/radeon/r600_perfcounter.c
+++ b/src/gallium/drivers/radeon/r600_perfcounter.c
@@ -310,7 +310,6 @@ struct pipe_query *r600_create_batch_query(struct pipe_context *ctx,
query->b.b.ops = &batch_query_ops;
query->b.ops = &batch_query_hw_ops;
- query->b.flags = R600_QUERY_HW_FLAG_TIMER;
query->num_counters = num_queries;
diff --git a/src/gallium/drivers/radeon/r600_pipe_common.c b/src/gallium/drivers/radeon/r600_pipe_common.c
index 32bd6e40d32..a7477abea34 100644
--- a/src/gallium/drivers/radeon/r600_pipe_common.c
+++ b/src/gallium/drivers/radeon/r600_pipe_common.c
@@ -156,14 +156,8 @@ static void r600_memory_barrier(struct pipe_context *ctx, unsigned flags)
void r600_preflush_suspend_features(struct r600_common_context *ctx)
{
/* suspend queries */
- if (ctx->num_cs_dw_nontimer_queries_suspend) {
- /* Since non-timer queries are suspended during blits,
- * we have to guard against double-suspends. */
- r600_suspend_nontimer_queries(ctx);
- ctx->nontimer_queries_suspended_by_flush = true;
- }
- if (!LIST_IS_EMPTY(&ctx->active_timer_queries))
- r600_suspend_timer_queries(ctx);
+ if (!LIST_IS_EMPTY(&ctx->active_queries))
+ r600_suspend_queries(ctx);
ctx->streamout.suspended = false;
if (ctx->streamout.begin_emitted) {
@@ -180,12 +174,8 @@ void r600_postflush_resume_features(struct r600_common_context *ctx)
}
/* resume queries */
- if (!LIST_IS_EMPTY(&ctx->active_timer_queries))
- r600_resume_timer_queries(ctx);
- if (ctx->nontimer_queries_suspended_by_flush) {
- ctx->nontimer_queries_suspended_by_flush = false;
- r600_resume_nontimer_queries(ctx);
- }
+ if (!LIST_IS_EMPTY(&ctx->active_queries))
+ r600_resume_queries(ctx);
}
static void r600_flush_from_st(struct pipe_context *ctx,
@@ -296,6 +286,7 @@ bool r600_common_context_init(struct r600_common_context *rctx,
LIST_INITHEAD(&rctx->texture_buffers);
r600_init_context_texture_functions(rctx);
+ r600_init_viewport_functions(rctx);
r600_streamout_init(rctx);
r600_query_init(rctx);
cayman_init_msaa(&rctx->b);
@@ -898,6 +889,13 @@ bool r600_common_screen_init(struct r600_common_screen *rscreen,
rscreen->chip_class = rscreen->info.chip_class;
rscreen->debug_flags = debug_get_flags_option("R600_DEBUG", common_debug_options, 0);
+ rscreen->force_aniso = MIN2(16, debug_get_num_option("R600_TEX_ANISO", -1));
+ if (rscreen->force_aniso >= 0) {
+ printf("radeon: Forcing anisotropy filter to %ix\n",
+ /* round down to a power of two */
+ 1 << util_logbase2(rscreen->force_aniso));
+ }
+
util_format_s3tc_init();
pipe_mutex_init(rscreen->aux_context_lock);
pipe_mutex_init(rscreen->gpu_load_mutex);
@@ -973,7 +971,7 @@ bool r600_can_dump_shader(struct r600_common_screen *rscreen,
}
void r600_screen_clear_buffer(struct r600_common_screen *rscreen, struct pipe_resource *dst,
- unsigned offset, unsigned size, unsigned value,
+ uint64_t offset, uint64_t size, unsigned value,
bool is_framebuffer)
{
struct r600_common_context *rctx = (struct r600_common_context*)rscreen->aux_context;
diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h b/src/gallium/drivers/radeon/r600_pipe_common.h
index 062c3193947..a6abe09d438 100644
--- a/src/gallium/drivers/radeon/r600_pipe_common.h
+++ b/src/gallium/drivers/radeon/r600_pipe_common.h
@@ -50,7 +50,10 @@
#define R600_RESOURCE_FLAG_FORCE_TILING (PIPE_RESOURCE_FLAG_DRV_PRIV << 2)
#define R600_CONTEXT_STREAMOUT_FLUSH (1u << 0)
-#define R600_CONTEXT_PRIVATE_FLAG (1u << 1)
+/* Pipeline & streamout query controls. */
+#define R600_CONTEXT_START_PIPELINE_STATS (1u << 1)
+#define R600_CONTEXT_STOP_PIPELINE_STATS (1u << 2)
+#define R600_CONTEXT_PRIVATE_FLAG (1u << 3)
/* special primitive types */
#define R600_PRIM_RECTANGLE_LIST PIPE_PRIM_MAX
@@ -94,9 +97,11 @@
#define DBG_MONOLITHIC_SHADERS (1llu << 47)
#define R600_MAP_BUFFER_ALIGNMENT 64
+#define R600_MAX_VIEWPORTS 16
struct r600_common_context;
struct r600_perfcounters;
+struct tgsi_shader_info;
struct radeon_shader_reloc {
char name[32];
@@ -137,6 +142,9 @@ struct radeon_shader_binary {
void radeon_shader_binary_init(struct radeon_shader_binary *b);
void radeon_shader_binary_clean(struct radeon_shader_binary *b);
+/* Only 32-bit buffer allocations are supported, gallium doesn't support more
+ * at the moment.
+ */
struct r600_resource {
struct u_resource b;
@@ -181,8 +189,8 @@ struct r600_transfer {
};
struct r600_fmask_info {
- unsigned offset;
- unsigned size;
+ uint64_t offset;
+ uint64_t size;
unsigned alignment;
unsigned pitch_in_pixels;
unsigned bank_height;
@@ -191,8 +199,8 @@ struct r600_fmask_info {
};
struct r600_cmask_info {
- unsigned offset;
- unsigned size;
+ uint64_t offset;
+ uint64_t size;
unsigned alignment;
unsigned pitch;
unsigned height;
@@ -212,7 +220,7 @@ struct r600_htile_info {
struct r600_texture {
struct r600_resource resource;
- unsigned size;
+ uint64_t size;
bool is_depth;
unsigned dirty_level_mask; /* each bit says if that mipmap is compressed */
unsigned stencil_dirty_level_mask; /* each bit says if that mipmap is compressed */
@@ -224,7 +232,7 @@ struct r600_texture {
struct r600_fmask_info fmask;
struct r600_cmask_info cmask;
struct r600_resource *cmask_buffer;
- unsigned dcc_offset; /* 0 = disabled */
+ uint64_t dcc_offset; /* 0 = disabled */
unsigned cb_color_info; /* fast clear enable bit */
unsigned color_clear_value[2];
@@ -298,6 +306,9 @@ struct r600_common_screen {
bool has_cp_dma;
bool has_streamout;
+ /* Texture filter settings. */
+ int force_aniso; /* -1 = disabled */
+
/* Auxiliary context. Mainly used to initialize resources.
* It must be locked prior to using and flushed before unlocking. */
struct pipe_context *aux_context;
@@ -388,6 +399,26 @@ struct r600_streamout {
int num_prims_gen_queries;
};
+struct r600_signed_scissor {
+ int minx;
+ int miny;
+ int maxx;
+ int maxy;
+};
+
+struct r600_scissors {
+ struct r600_atom atom;
+ unsigned dirty_mask;
+ struct pipe_scissor_state states[R600_MAX_VIEWPORTS];
+};
+
+struct r600_viewports {
+ struct r600_atom atom;
+ unsigned dirty_mask;
+ struct pipe_viewport_state states[R600_MAX_VIEWPORTS];
+ struct r600_signed_scissor as_scissor[R600_MAX_VIEWPORTS];
+};
+
struct r600_ring {
struct radeon_winsys_cs *cs;
void (*flush)(void *ctx, unsigned flags,
@@ -420,23 +451,20 @@ struct r600_common_context {
/* States. */
struct r600_streamout streamout;
+ struct r600_scissors scissors;
+ struct r600_viewports viewports;
+ bool scissor_enabled;
+ bool vs_writes_viewport_index;
/* Additional context states. */
unsigned flags; /* flush flags */
/* Queries. */
- /* The list of active queries. */
+ /* Maintain the list of active queries for pausing between IBs. */
int num_occlusion_queries;
int num_perfect_occlusion_queries;
- /* Keep track of non-timer queries, because they should be suspended
- * during context flushing.
- * The timer queries (TIME_ELAPSED) shouldn't be suspended for blits,
- * but they should be suspended between IBs. */
- struct list_head active_nontimer_queries;
- struct list_head active_timer_queries;
- unsigned num_cs_dw_nontimer_queries_suspend;
- bool nontimer_queries_suspended_by_flush;
- unsigned num_cs_dw_timer_queries_suspend;
+ struct list_head active_queries;
+ unsigned num_cs_dw_queries_suspend;
/* Additional hardware info. */
unsigned backend_mask;
unsigned max_db; /* for OQ */
@@ -476,7 +504,7 @@ struct r600_common_context {
const struct pipe_box *src_box);
void (*clear_buffer)(struct pipe_context *ctx, struct pipe_resource *dst,
- unsigned offset, unsigned size, unsigned value,
+ uint64_t offset, uint64_t size, unsigned value,
bool is_framebuffer);
void (*blit_decompress_depth)(struct pipe_context *ctx,
@@ -513,7 +541,7 @@ void *r600_buffer_map_sync_with_rings(struct r600_common_context *ctx,
unsigned usage);
bool r600_init_resource(struct r600_common_screen *rscreen,
struct r600_resource *res,
- unsigned size, unsigned alignment,
+ uint64_t size, unsigned alignment,
bool use_reusable_pool);
struct pipe_resource *r600_buffer_create(struct pipe_screen *screen,
const struct pipe_resource *templ,
@@ -548,7 +576,7 @@ void r600_context_add_resource_size(struct pipe_context *ctx, struct pipe_resour
bool r600_can_dump_shader(struct r600_common_screen *rscreen,
unsigned processor);
void r600_screen_clear_buffer(struct r600_common_screen *rscreen, struct pipe_resource *dst,
- unsigned offset, unsigned size, unsigned value,
+ uint64_t offset, uint64_t size, unsigned value,
bool is_framebuffer);
struct pipe_resource *r600_resource_create_common(struct pipe_screen *screen,
const struct pipe_resource *templ);
@@ -566,10 +594,8 @@ void r600_perfcounters_destroy(struct r600_common_screen *rscreen);
/* r600_query.c */
void r600_init_screen_query_functions(struct r600_common_screen *rscreen);
void r600_query_init(struct r600_common_context *rctx);
-void r600_suspend_nontimer_queries(struct r600_common_context *ctx);
-void r600_resume_nontimer_queries(struct r600_common_context *ctx);
-void r600_suspend_timer_queries(struct r600_common_context *ctx);
-void r600_resume_timer_queries(struct r600_common_context *ctx);
+void r600_suspend_queries(struct r600_common_context *ctx);
+void r600_resume_queries(struct r600_common_context *ctx);
void r600_query_init_backend_mask(struct r600_common_context *ctx);
/* r600_streamout.c */
@@ -612,6 +638,14 @@ void r600_texture_disable_dcc(struct r600_common_screen *rscreen,
void r600_init_screen_texture_functions(struct r600_common_screen *rscreen);
void r600_init_context_texture_functions(struct r600_common_context *rctx);
+/* r600_viewport.c */
+void evergreen_apply_scissor_bug_workaround(struct r600_common_context *rctx,
+ struct pipe_scissor_state *scissor);
+void r600_set_scissor_enable(struct r600_common_context *rctx, bool enable);
+void r600_update_vs_writes_viewport_index(struct r600_common_context *rctx,
+ struct tgsi_shader_info *info);
+void r600_init_viewport_functions(struct r600_common_context *rctx);
+
/* cayman_msaa.c */
extern const uint32_t eg_sample_locs_2x[4];
extern const unsigned eg_max_dist_2x;
@@ -639,13 +673,38 @@ r600_resource_reference(struct r600_resource **ptr, struct r600_resource *res)
(struct pipe_resource *)res);
}
+static inline bool r600_get_strmout_en(struct r600_common_context *rctx)
+{
+ return rctx->streamout.streamout_enabled ||
+ rctx->streamout.prims_gen_query_enabled;
+}
+
+#define SQ_TEX_XY_FILTER_POINT 0x00
+#define SQ_TEX_XY_FILTER_BILINEAR 0x01
+#define SQ_TEX_XY_FILTER_ANISO_POINT 0x02
+#define SQ_TEX_XY_FILTER_ANISO_BILINEAR 0x03
+
+static inline unsigned eg_tex_filter(unsigned filter, unsigned max_aniso)
+{
+ if (filter == PIPE_TEX_FILTER_LINEAR)
+ return max_aniso > 1 ? SQ_TEX_XY_FILTER_ANISO_BILINEAR
+ : SQ_TEX_XY_FILTER_BILINEAR;
+ else
+ return max_aniso > 1 ? SQ_TEX_XY_FILTER_ANISO_POINT
+ : SQ_TEX_XY_FILTER_POINT;
+}
+
static inline unsigned r600_tex_aniso_filter(unsigned filter)
{
- if (filter <= 1) return 0;
- if (filter <= 2) return 1;
- if (filter <= 4) return 2;
- if (filter <= 8) return 3;
- /* else */ return 4;
+ if (filter < 2)
+ return 0;
+ if (filter < 4)
+ return 1;
+ if (filter < 8)
+ return 2;
+ if (filter < 16)
+ return 3;
+ return 4;
}
static inline unsigned r600_wavefront_size(enum radeon_family family)
diff --git a/src/gallium/drivers/radeon/r600_query.c b/src/gallium/drivers/radeon/r600_query.c
index 7a2d2ee7f31..de6e37b9f62 100644
--- a/src/gallium/drivers/radeon/r600_query.c
+++ b/src/gallium/drivers/radeon/r600_query.c
@@ -369,13 +369,11 @@ static struct pipe_query *r600_query_hw_create(struct r600_common_context *rctx,
query->result_size = 16;
query->num_cs_dw_begin = 8;
query->num_cs_dw_end = 8;
- query->flags = R600_QUERY_HW_FLAG_TIMER;
break;
case PIPE_QUERY_TIMESTAMP:
query->result_size = 8;
query->num_cs_dw_end = 8;
- query->flags = R600_QUERY_HW_FLAG_TIMER |
- R600_QUERY_HW_FLAG_NO_START;
+ query->flags = R600_QUERY_HW_FLAG_NO_START;
break;
case PIPE_QUERY_PRIMITIVES_EMITTED:
case PIPE_QUERY_PRIMITIVES_GENERATED:
@@ -516,10 +514,7 @@ static void r600_query_hw_emit_start(struct r600_common_context *ctx,
query->ops->emit_start(ctx, query, query->buffer.buf, va);
- if (query->flags & R600_QUERY_HW_FLAG_TIMER)
- ctx->num_cs_dw_timer_queries_suspend += query->num_cs_dw_end;
- else
- ctx->num_cs_dw_nontimer_queries_suspend += query->num_cs_dw_end;
+ ctx->num_cs_dw_queries_suspend += query->num_cs_dw_end;
}
static void r600_query_hw_do_emit_stop(struct r600_common_context *ctx,
@@ -590,12 +585,8 @@ static void r600_query_hw_emit_stop(struct r600_common_context *ctx,
query->buffer.results_end += query->result_size;
- if (!(query->flags & R600_QUERY_HW_FLAG_NO_START)) {
- if (query->flags & R600_QUERY_HW_FLAG_TIMER)
- ctx->num_cs_dw_timer_queries_suspend -= query->num_cs_dw_end;
- else
- ctx->num_cs_dw_nontimer_queries_suspend -= query->num_cs_dw_end;
- }
+ if (!(query->flags & R600_QUERY_HW_FLAG_NO_START))
+ ctx->num_cs_dw_queries_suspend -= query->num_cs_dw_end;
r600_update_occlusion_query_state(ctx, query->b.type, -1);
r600_update_prims_generated_query_state(ctx, query->b.type, -1);
@@ -730,11 +721,8 @@ boolean r600_query_hw_begin(struct r600_common_context *rctx,
r600_query_hw_emit_start(rctx, query);
- if (query->flags & R600_QUERY_HW_FLAG_TIMER)
- LIST_ADDTAIL(&query->list, &rctx->active_timer_queries);
- else
- LIST_ADDTAIL(&query->list, &rctx->active_nontimer_queries);
- return true;
+ LIST_ADDTAIL(&query->list, &rctx->active_queries);
+ return true;
}
static void r600_end_query(struct pipe_context *ctx, struct pipe_query *query)
@@ -973,28 +961,14 @@ static void r600_render_condition(struct pipe_context *ctx,
rctx->set_atom_dirty(rctx, atom, query != NULL);
}
-static void r600_suspend_queries(struct r600_common_context *ctx,
- struct list_head *query_list,
- unsigned *num_cs_dw_queries_suspend)
+void r600_suspend_queries(struct r600_common_context *ctx)
{
struct r600_query_hw *query;
- LIST_FOR_EACH_ENTRY(query, query_list, list) {
+ LIST_FOR_EACH_ENTRY(query, &ctx->active_queries, list) {
r600_query_hw_emit_stop(ctx, query);
}
- assert(*num_cs_dw_queries_suspend == 0);
-}
-
-void r600_suspend_nontimer_queries(struct r600_common_context *ctx)
-{
- r600_suspend_queries(ctx, &ctx->active_nontimer_queries,
- &ctx->num_cs_dw_nontimer_queries_suspend);
-}
-
-void r600_suspend_timer_queries(struct r600_common_context *ctx)
-{
- r600_suspend_queries(ctx, &ctx->active_timer_queries,
- &ctx->num_cs_dw_timer_queries_suspend);
+ assert(ctx->num_cs_dw_queries_suspend == 0);
}
static unsigned r600_queries_num_cs_dw_for_resuming(struct r600_common_context *ctx,
@@ -1022,35 +996,21 @@ static unsigned r600_queries_num_cs_dw_for_resuming(struct r600_common_context *
return num_dw;
}
-static void r600_resume_queries(struct r600_common_context *ctx,
- struct list_head *query_list,
- unsigned *num_cs_dw_queries_suspend)
+void r600_resume_queries(struct r600_common_context *ctx)
{
struct r600_query_hw *query;
- unsigned num_cs_dw = r600_queries_num_cs_dw_for_resuming(ctx, query_list);
+ unsigned num_cs_dw = r600_queries_num_cs_dw_for_resuming(ctx, &ctx->active_queries);
- assert(*num_cs_dw_queries_suspend == 0);
+ assert(ctx->num_cs_dw_queries_suspend == 0);
/* Check CS space here. Resuming must not be interrupted by flushes. */
ctx->need_gfx_cs_space(&ctx->b, num_cs_dw, TRUE);
- LIST_FOR_EACH_ENTRY(query, query_list, list) {
+ LIST_FOR_EACH_ENTRY(query, &ctx->active_queries, list) {
r600_query_hw_emit_start(ctx, query);
}
}
-void r600_resume_nontimer_queries(struct r600_common_context *ctx)
-{
- r600_resume_queries(ctx, &ctx->active_nontimer_queries,
- &ctx->num_cs_dw_nontimer_queries_suspend);
-}
-
-void r600_resume_timer_queries(struct r600_common_context *ctx)
-{
- r600_resume_queries(ctx, &ctx->active_timer_queries,
- &ctx->num_cs_dw_timer_queries_suspend);
-}
-
/* Get backends mask */
void r600_query_init_backend_mask(struct r600_common_context *ctx)
{
@@ -1274,8 +1234,7 @@ void r600_query_init(struct r600_common_context *rctx)
if (((struct r600_common_screen*)rctx->b.screen)->info.num_render_backends > 0)
rctx->b.render_condition = r600_render_condition;
- LIST_INITHEAD(&rctx->active_nontimer_queries);
- LIST_INITHEAD(&rctx->active_timer_queries);
+ LIST_INITHEAD(&rctx->active_queries);
}
void r600_init_screen_query_functions(struct r600_common_screen *rscreen)
diff --git a/src/gallium/drivers/radeon/r600_query.h b/src/gallium/drivers/radeon/r600_query.h
index 8b2c4e3fe93..9f3a917d727 100644
--- a/src/gallium/drivers/radeon/r600_query.h
+++ b/src/gallium/drivers/radeon/r600_query.h
@@ -84,8 +84,7 @@ struct r600_query {
enum {
R600_QUERY_HW_FLAG_NO_START = (1 << 0),
- R600_QUERY_HW_FLAG_TIMER = (1 << 1),
- R600_QUERY_HW_FLAG_PREDICATE = (1 << 2),
+ R600_QUERY_HW_FLAG_PREDICATE = (1 << 1),
};
struct r600_query_hw_ops {
diff --git a/src/gallium/drivers/radeon/r600_streamout.c b/src/gallium/drivers/radeon/r600_streamout.c
index e977ed9fa10..fc9ec4859f6 100644
--- a/src/gallium/drivers/radeon/r600_streamout.c
+++ b/src/gallium/drivers/radeon/r600_streamout.c
@@ -311,12 +311,6 @@ void r600_emit_streamout_end(struct r600_common_context *rctx)
* are no buffers bound.
*/
-static bool r600_get_strmout_en(struct r600_common_context *rctx)
-{
- return rctx->streamout.streamout_enabled ||
- rctx->streamout.prims_gen_query_enabled;
-}
-
static void r600_emit_streamout_enable(struct r600_common_context *rctx,
struct r600_atom *atom)
{
diff --git a/src/gallium/drivers/radeon/r600_texture.c b/src/gallium/drivers/radeon/r600_texture.c
index 4850b73f291..72af5344b70 100644
--- a/src/gallium/drivers/radeon/r600_texture.c
+++ b/src/gallium/drivers/radeon/r600_texture.c
@@ -222,10 +222,6 @@ static int r600_setup_surface(struct pipe_screen *screen,
rtex->surface.level[0].nblk_x = pitch_in_bytes_override / rtex->surface.bpe;
rtex->surface.level[0].pitch_bytes = pitch_in_bytes_override;
rtex->surface.level[0].slice_size = pitch_in_bytes_override * rtex->surface.level[0].nblk_y;
- if (rtex->surface.flags & RADEON_SURF_SBUFFER) {
- rtex->surface.stencil_offset =
- rtex->surface.stencil_level[0].offset = rtex->surface.level[0].slice_size;
- }
}
if (offset) {
@@ -482,7 +478,7 @@ static void r600_texture_allocate_fmask(struct r600_common_screen *rscreen,
r600_texture_get_fmask_info(rscreen, rtex,
rtex->resource.b.b.nr_samples, &rtex->fmask);
- rtex->fmask.offset = align(rtex->size, rtex->fmask.alignment);
+ rtex->fmask.offset = align64(rtex->size, rtex->fmask.alignment);
rtex->size = rtex->fmask.offset + rtex->fmask.size;
}
@@ -585,7 +581,7 @@ static void r600_texture_allocate_cmask(struct r600_common_screen *rscreen,
r600_texture_get_cmask_info(rscreen, rtex, &rtex->cmask);
}
- rtex->cmask.offset = align(rtex->size, rtex->cmask.alignment);
+ rtex->cmask.offset = align64(rtex->size, rtex->cmask.alignment);
rtex->size = rtex->cmask.offset + rtex->cmask.size;
if (rscreen->chip_class >= SI)
@@ -747,14 +743,14 @@ void r600_print_texture_info(struct r600_texture *rtex, FILE *f)
(rtex->surface.flags & RADEON_SURF_SCANOUT) != 0);
if (rtex->fmask.size)
- fprintf(f, " FMask: offset=%u, size=%u, alignment=%u, pitch_in_pixels=%u, "
+ fprintf(f, " FMask: offset=%"PRIu64", size=%"PRIu64", alignment=%u, pitch_in_pixels=%u, "
"bankh=%u, slice_tile_max=%u, tile_mode_index=%u\n",
rtex->fmask.offset, rtex->fmask.size, rtex->fmask.alignment,
rtex->fmask.pitch_in_pixels, rtex->fmask.bank_height,
rtex->fmask.slice_tile_max, rtex->fmask.tile_mode_index);
if (rtex->cmask.size)
- fprintf(f, " CMask: offset=%u, size=%u, alignment=%u, pitch=%u, "
+ fprintf(f, " CMask: offset=%"PRIu64", size=%"PRIu64", alignment=%u, pitch=%u, "
"height=%u, xalign=%u, yalign=%u, slice_tile_max=%u\n",
rtex->cmask.offset, rtex->cmask.size, rtex->cmask.alignment,
rtex->cmask.pitch, rtex->cmask.height, rtex->cmask.xalign,
@@ -768,7 +764,7 @@ void r600_print_texture_info(struct r600_texture *rtex, FILE *f)
rtex->htile.height, rtex->htile.xalign, rtex->htile.yalign);
if (rtex->dcc_offset) {
- fprintf(f, " DCC: offset=%u, size=%"PRIu64", alignment=%"PRIu64"\n",
+ fprintf(f, " DCC: offset=%"PRIu64", size=%"PRIu64", alignment=%"PRIu64"\n",
rtex->dcc_offset, rtex->surface.dcc_size,
rtex->surface.dcc_alignment);
for (i = 0; i <= rtex->surface.last_level; i++)
@@ -873,7 +869,7 @@ r600_texture_create_object(struct pipe_screen *screen,
if (!buf && rtex->surface.dcc_size &&
!(rscreen->debug_flags & DBG_NO_DCC)) {
/* Reserve space for the DCC buffer. */
- rtex->dcc_offset = align(rtex->size, rtex->surface.dcc_alignment);
+ rtex->dcc_offset = align64(rtex->size, rtex->surface.dcc_alignment);
rtex->size = rtex->dcc_offset + rtex->surface.dcc_size;
rtex->cb_color_info |= VI_S_028C70_DCC_ENABLE(1);
}
@@ -947,13 +943,12 @@ static unsigned r600_choose_tiling(struct r600_common_screen *rscreen,
force_tiling = true;
/* Handle common candidates for the linear mode.
- * Compressed textures must always be tiled. */
- if (!force_tiling && !util_format_is_compressed(templ->format)) {
- /* Not everything can be linear, so we cannot enforce it
- * for all textures. */
- if ((rscreen->debug_flags & DBG_NO_TILING) &&
- (!util_format_is_depth_or_stencil(templ->format) ||
- !(templ->flags & R600_RESOURCE_FLAG_FLUSHED_DEPTH)))
+ * Compressed textures and DB surfaces must always be tiled.
+ */
+ if (!force_tiling && !util_format_is_compressed(templ->format) &&
+ (!util_format_is_depth_or_stencil(templ->format) ||
+ templ->flags & R600_RESOURCE_FLAG_FLUSHED_DEPTH)) {
+ if (rscreen->debug_flags & DBG_NO_TILING)
return RADEON_SURF_MODE_LINEAR_ALIGNED;
/* Tiling doesn't work with the 422 (SUBSAMPLED) formats on R600+. */
diff --git a/src/gallium/drivers/radeon/r600_viewport.c b/src/gallium/drivers/radeon/r600_viewport.c
new file mode 100644
index 00000000000..ea558cd22de
--- /dev/null
+++ b/src/gallium/drivers/radeon/r600_viewport.c
@@ -0,0 +1,350 @@
+/*
+ * Copyright 2012 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "r600_cs.h"
+#include "tgsi/tgsi_scan.h"
+
+#define GET_MAX_SCISSOR(rctx) (rctx->chip_class >= EVERGREEN ? 16384 : 8192)
+
+static void r600_set_scissor_states(struct pipe_context *ctx,
+ unsigned start_slot,
+ unsigned num_scissors,
+ const struct pipe_scissor_state *state)
+{
+ struct r600_common_context *rctx = (struct r600_common_context *)ctx;
+ int i;
+
+ for (i = 0; i < num_scissors; i++)
+ rctx->scissors.states[start_slot + i] = state[i];
+
+ if (!rctx->scissor_enabled)
+ return;
+
+ rctx->scissors.dirty_mask |= ((1 << num_scissors) - 1) << start_slot;
+ rctx->set_atom_dirty(rctx, &rctx->scissors.atom, true);
+}
+
+/* Since the guard band disables clipping, we have to clip per-pixel
+ * using a scissor.
+ */
+static void r600_get_scissor_from_viewport(struct r600_common_context *rctx,
+ const struct pipe_viewport_state *vp,
+ struct r600_signed_scissor *scissor)
+{
+ int tmp;
+
+ /* Convert (-1, -1) and (1, 1) from clip space into window space. */
+ scissor->minx = -vp->scale[0] + vp->translate[0];
+ scissor->miny = -vp->scale[1] + vp->translate[1];
+ scissor->maxx = vp->scale[0] + vp->translate[0];
+ scissor->maxy = vp->scale[1] + vp->translate[1];
+
+ /* r600_draw_rectangle sets this. Disable the scissor. */
+ if (scissor->minx == -1 && scissor->miny == -1 &&
+ scissor->maxx == 1 && scissor->maxy == 1) {
+ scissor->minx = scissor->miny = 0;
+ scissor->maxx = scissor->maxy = GET_MAX_SCISSOR(rctx);
+ }
+
+ /* Handle inverted viewports. */
+ if (scissor->minx > scissor->maxx) {
+ tmp = scissor->minx;
+ scissor->minx = scissor->maxx;
+ scissor->maxx = tmp;
+ }
+ if (scissor->miny > scissor->maxy) {
+ tmp = scissor->miny;
+ scissor->miny = scissor->maxy;
+ scissor->maxy = tmp;
+ }
+}
+
+static void r600_clamp_scissor(struct r600_common_context *rctx,
+ struct pipe_scissor_state *out,
+ struct r600_signed_scissor *scissor)
+{
+ unsigned max_scissor = GET_MAX_SCISSOR(rctx);
+ out->minx = CLAMP(scissor->minx, 0, max_scissor);
+ out->miny = CLAMP(scissor->miny, 0, max_scissor);
+ out->maxx = CLAMP(scissor->maxx, 0, max_scissor);
+ out->maxy = CLAMP(scissor->maxy, 0, max_scissor);
+}
+
+static void r600_clip_scissor(struct pipe_scissor_state *out,
+ struct pipe_scissor_state *clip)
+{
+ out->minx = MAX2(out->minx, clip->minx);
+ out->miny = MAX2(out->miny, clip->miny);
+ out->maxx = MIN2(out->maxx, clip->maxx);
+ out->maxy = MIN2(out->maxy, clip->maxy);
+}
+
+static void r600_scissor_make_union(struct r600_signed_scissor *out,
+ struct r600_signed_scissor *in)
+{
+ out->minx = MIN2(out->minx, in->minx);
+ out->miny = MIN2(out->miny, in->miny);
+ out->maxx = MAX2(out->maxx, in->maxx);
+ out->maxy = MAX2(out->maxy, in->maxy);
+}
+
+void evergreen_apply_scissor_bug_workaround(struct r600_common_context *rctx,
+ struct pipe_scissor_state *scissor)
+{
+ if (rctx->chip_class == EVERGREEN || rctx->chip_class == CAYMAN) {
+ if (scissor->maxx == 0)
+ scissor->minx = 1;
+ if (scissor->maxy == 0)
+ scissor->miny = 1;
+
+ if (rctx->chip_class == CAYMAN &&
+ scissor->maxx == 1 && scissor->maxy == 1)
+ scissor->maxx = 2;
+ }
+}
+
+static void r600_emit_one_scissor(struct r600_common_context *rctx,
+ struct radeon_winsys_cs *cs,
+ struct r600_signed_scissor *vp_scissor,
+ struct pipe_scissor_state *scissor)
+{
+ struct pipe_scissor_state final;
+
+ r600_clamp_scissor(rctx, &final, vp_scissor);
+
+ if (scissor)
+ r600_clip_scissor(&final, scissor);
+
+ evergreen_apply_scissor_bug_workaround(rctx, &final);
+
+ radeon_emit(cs, S_028250_TL_X(final.minx) |
+ S_028250_TL_Y(final.miny) |
+ S_028250_WINDOW_OFFSET_DISABLE(1));
+ radeon_emit(cs, S_028254_BR_X(final.maxx) |
+ S_028254_BR_Y(final.maxy));
+}
+
+/* the range is [-MAX, MAX] */
+#define GET_MAX_VIEWPORT_RANGE(rctx) (rctx->chip_class >= EVERGREEN ? 32768 : 16384)
+
+static void r600_emit_guardband(struct r600_common_context *rctx,
+ struct r600_signed_scissor *vp_as_scissor)
+{
+ struct radeon_winsys_cs *cs = rctx->gfx.cs;
+ struct pipe_viewport_state vp;
+ float left, top, right, bottom, max_range, guardband_x, guardband_y;
+
+ /* Reconstruct the viewport transformation from the scissor. */
+ vp.translate[0] = (vp_as_scissor->minx + vp_as_scissor->maxx) / 2.0;
+ vp.translate[1] = (vp_as_scissor->miny + vp_as_scissor->maxy) / 2.0;
+ vp.scale[0] = vp_as_scissor->maxx - vp.translate[0];
+ vp.scale[1] = vp_as_scissor->maxy - vp.translate[1];
+
+ /* Treat a 0x0 viewport as 1x1 to prevent division by zero. */
+ if (vp_as_scissor->minx == vp_as_scissor->maxx)
+ vp.scale[0] = 0.5;
+ if (vp_as_scissor->miny == vp_as_scissor->maxy)
+ vp.scale[1] = 0.5;
+
+ /* Find the biggest guard band that is inside the supported viewport
+ * range. The guard band is specified as a horizontal and vertical
+ * distance from (0,0) in clip space.
+ *
+ * This is done by applying the inverse viewport transformation
+ * on the viewport limits to get those limits in clip space.
+ *
+ * Use a limit one pixel smaller to allow for some precision error.
+ */
+ max_range = GET_MAX_VIEWPORT_RANGE(rctx) - 1;
+ left = (-max_range - vp.translate[0]) / vp.scale[0];
+ right = ( max_range - vp.translate[0]) / vp.scale[0];
+ top = (-max_range - vp.translate[1]) / vp.scale[1];
+ bottom = ( max_range - vp.translate[1]) / vp.scale[1];
+
+ assert(left <= -1 && top <= -1 && right >= 1 && bottom >= 1);
+
+ guardband_x = MIN2(-left, right);
+ guardband_y = MIN2(-top, bottom);
+
+ /* If any of the GB registers is updated, all of them must be updated. */
+ if (rctx->chip_class >= CAYMAN)
+ radeon_set_context_reg_seq(cs, CM_R_028BE8_PA_CL_GB_VERT_CLIP_ADJ, 4);
+ else
+ radeon_set_context_reg_seq(cs, R600_R_028C0C_PA_CL_GB_VERT_CLIP_ADJ, 4);
+
+ radeon_emit(cs, fui(guardband_y)); /* R_028BE8_PA_CL_GB_VERT_CLIP_ADJ */
+ radeon_emit(cs, fui(1.0)); /* R_028BEC_PA_CL_GB_VERT_DISC_ADJ */
+ radeon_emit(cs, fui(guardband_x)); /* R_028BF0_PA_CL_GB_HORZ_CLIP_ADJ */
+ radeon_emit(cs, fui(1.0)); /* R_028BF4_PA_CL_GB_HORZ_DISC_ADJ */
+}
+
+static void r600_emit_scissors(struct r600_common_context *rctx, struct r600_atom *atom)
+{
+ struct radeon_winsys_cs *cs = rctx->gfx.cs;
+ struct pipe_scissor_state *states = rctx->scissors.states;
+ unsigned mask = rctx->scissors.dirty_mask;
+ bool scissor_enabled = rctx->scissor_enabled;
+ struct r600_signed_scissor max_vp_scissor;
+ int i;
+
+ /* The simple case: Only 1 viewport is active. */
+ if (!rctx->vs_writes_viewport_index) {
+ struct r600_signed_scissor *vp = &rctx->viewports.as_scissor[0];
+
+ if (!(mask & 1))
+ return;
+
+ radeon_set_context_reg_seq(cs, R_028250_PA_SC_VPORT_SCISSOR_0_TL, 2);
+ r600_emit_one_scissor(rctx, cs, vp, scissor_enabled ? &states[0] : NULL);
+ r600_emit_guardband(rctx, vp);
+ rctx->scissors.dirty_mask &= ~1; /* clear one bit */
+ return;
+ }
+
+ /* Shaders can draw to any viewport. Make a union of all viewports. */
+ max_vp_scissor = rctx->viewports.as_scissor[0];
+ for (i = 1; i < R600_MAX_VIEWPORTS; i++)
+ r600_scissor_make_union(&max_vp_scissor,
+ &rctx->viewports.as_scissor[i]);
+
+ while (mask) {
+ int start, count, i;
+
+ u_bit_scan_consecutive_range(&mask, &start, &count);
+
+ radeon_set_context_reg_seq(cs, R_028250_PA_SC_VPORT_SCISSOR_0_TL +
+ start * 4 * 2, count * 2);
+ for (i = start; i < start+count; i++) {
+ r600_emit_one_scissor(rctx, cs, &rctx->viewports.as_scissor[i],
+ scissor_enabled ? &states[i] : NULL);
+ }
+ }
+ r600_emit_guardband(rctx, &max_vp_scissor);
+ rctx->scissors.dirty_mask = 0;
+}
+
+static void r600_set_viewport_states(struct pipe_context *ctx,
+ unsigned start_slot,
+ unsigned num_viewports,
+ const struct pipe_viewport_state *state)
+{
+ struct r600_common_context *rctx = (struct r600_common_context *)ctx;
+ int i;
+
+ for (i = 0; i < num_viewports; i++) {
+ unsigned index = start_slot + i;
+
+ rctx->viewports.states[index] = state[i];
+ r600_get_scissor_from_viewport(rctx, &state[i],
+ &rctx->viewports.as_scissor[index]);
+ }
+
+ rctx->viewports.dirty_mask |= ((1 << num_viewports) - 1) << start_slot;
+ rctx->scissors.dirty_mask |= ((1 << num_viewports) - 1) << start_slot;
+ rctx->set_atom_dirty(rctx, &rctx->viewports.atom, true);
+ rctx->set_atom_dirty(rctx, &rctx->scissors.atom, true);
+}
+
+static void r600_emit_viewports(struct r600_common_context *rctx, struct r600_atom *atom)
+{
+ struct radeon_winsys_cs *cs = rctx->gfx.cs;
+ struct pipe_viewport_state *states = rctx->viewports.states;
+ unsigned mask = rctx->viewports.dirty_mask;
+
+ /* The simple case: Only 1 viewport is active. */
+ if (!rctx->vs_writes_viewport_index) {
+ if (!(mask & 1))
+ return;
+
+ radeon_set_context_reg_seq(cs, R_02843C_PA_CL_VPORT_XSCALE, 6);
+ radeon_emit(cs, fui(states[0].scale[0]));
+ radeon_emit(cs, fui(states[0].translate[0]));
+ radeon_emit(cs, fui(states[0].scale[1]));
+ radeon_emit(cs, fui(states[0].translate[1]));
+ radeon_emit(cs, fui(states[0].scale[2]));
+ radeon_emit(cs, fui(states[0].translate[2]));
+ rctx->viewports.dirty_mask &= ~1; /* clear one bit */
+ return;
+ }
+
+ while (mask) {
+ int start, count, i;
+
+ u_bit_scan_consecutive_range(&mask, &start, &count);
+
+ radeon_set_context_reg_seq(cs, R_02843C_PA_CL_VPORT_XSCALE +
+ start * 4 * 6, count * 6);
+ for (i = start; i < start+count; i++) {
+ radeon_emit(cs, fui(states[i].scale[0]));
+ radeon_emit(cs, fui(states[i].translate[0]));
+ radeon_emit(cs, fui(states[i].scale[1]));
+ radeon_emit(cs, fui(states[i].translate[1]));
+ radeon_emit(cs, fui(states[i].scale[2]));
+ radeon_emit(cs, fui(states[i].translate[2]));
+ }
+ }
+ rctx->viewports.dirty_mask = 0;
+}
+
+void r600_set_scissor_enable(struct r600_common_context *rctx, bool enable)
+{
+ if (rctx->scissor_enabled != enable) {
+ rctx->scissor_enabled = enable;
+ rctx->scissors.dirty_mask = (1 << R600_MAX_VIEWPORTS) - 1;
+ rctx->set_atom_dirty(rctx, &rctx->scissors.atom, true);
+ }
+}
+
+/**
+ * Normally, we only emit 1 viewport and 1 scissor if no shader is using
+ * the VIEWPORT_INDEX output, and emitting the other viewports and scissors
+ * is delayed. When a shader with VIEWPORT_INDEX appears, this should be
+ * called to emit the rest.
+ */
+void r600_update_vs_writes_viewport_index(struct r600_common_context *rctx,
+ struct tgsi_shader_info *info)
+{
+ if (!info)
+ return;
+
+ rctx->vs_writes_viewport_index = info->writes_viewport_index;
+ if (!rctx->vs_writes_viewport_index)
+ return;
+
+ if (rctx->scissors.dirty_mask)
+ rctx->set_atom_dirty(rctx, &rctx->scissors.atom, true);
+ if (rctx->viewports.dirty_mask)
+ rctx->set_atom_dirty(rctx, &rctx->viewports.atom, true);
+}
+
+void r600_init_viewport_functions(struct r600_common_context *rctx)
+{
+ rctx->scissors.atom.emit = r600_emit_scissors;
+ rctx->viewports.atom.emit = r600_emit_viewports;
+
+ rctx->scissors.atom.num_dw = (2 + 16 * 2) + 6;
+ rctx->viewports.atom.num_dw = 2 + 16 * 6;
+
+ rctx->b.set_scissor_states = r600_set_scissor_states;
+ rctx->b.set_viewport_states = r600_set_viewport_states;
+}
diff --git a/src/gallium/drivers/radeon/r600d_common.h b/src/gallium/drivers/radeon/r600d_common.h
index eeec6ef7385..c8cb5e217e8 100644
--- a/src/gallium/drivers/radeon/r600d_common.h
+++ b/src/gallium/drivers/radeon/r600d_common.h
@@ -220,4 +220,25 @@
/*CIK+*/
#define R_0300FC_CP_STRMOUT_CNTL 0x0300FC
+#define R600_R_028C0C_PA_CL_GB_VERT_CLIP_ADJ 0x028C0C
+#define CM_R_028BE8_PA_CL_GB_VERT_CLIP_ADJ 0x28be8
+#define R_02843C_PA_CL_VPORT_XSCALE 0x02843C
+
+#define R_028250_PA_SC_VPORT_SCISSOR_0_TL 0x028250
+#define S_028250_TL_X(x) (((x) & 0x7FFF) << 0)
+#define G_028250_TL_X(x) (((x) >> 0) & 0x7FFF)
+#define C_028250_TL_X 0xFFFF8000
+#define S_028250_TL_Y(x) (((x) & 0x7FFF) << 16)
+#define G_028250_TL_Y(x) (((x) >> 16) & 0x7FFF)
+#define C_028250_TL_Y 0x8000FFFF
+#define S_028250_WINDOW_OFFSET_DISABLE(x) (((x) & 0x1) << 31)
+#define G_028250_WINDOW_OFFSET_DISABLE(x) (((x) >> 31) & 0x1)
+#define C_028250_WINDOW_OFFSET_DISABLE 0x7FFFFFFF
+#define S_028254_BR_X(x) (((x) & 0x7FFF) << 0)
+#define G_028254_BR_X(x) (((x) >> 0) & 0x7FFF)
+#define C_028254_BR_X 0xFFFF8000
+#define S_028254_BR_Y(x) (((x) & 0x7FFF) << 16)
+#define G_028254_BR_Y(x) (((x) >> 16) & 0x7FFF)
+#define C_028254_BR_Y 0x8000FFFF
+
#endif
diff --git a/src/gallium/drivers/radeon/radeon_uvd.c b/src/gallium/drivers/radeon/radeon_uvd.c
index 233f46091a4..098baf20797 100644
--- a/src/gallium/drivers/radeon/radeon_uvd.c
+++ b/src/gallium/drivers/radeon/radeon_uvd.c
@@ -1003,7 +1003,7 @@ static void ruvd_end_frame(struct pipe_video_codec *decoder,
dec->msg->body.decode.dpb_size = dec->dpb.res->buf->size;
dec->msg->body.decode.bsd_size = bs_size;
- dec->msg->body.decode.db_pitch = dec->base.width;
+ dec->msg->body.decode.db_pitch = align(dec->base.width, 16);
dt = dec->set_dtb(dec->msg, (struct vl_video_buffer *)target);
if (((struct r600_common_screen*)dec->screen)->family >= CHIP_STONEY)
diff --git a/src/gallium/drivers/radeon/radeon_winsys.h b/src/gallium/drivers/radeon/radeon_winsys.h
index baecca72383..0c03652081c 100644
--- a/src/gallium/drivers/radeon/radeon_winsys.h
+++ b/src/gallium/drivers/radeon/radeon_winsys.h
@@ -245,6 +245,7 @@ struct radeon_info {
enum chip_class chip_class;
uint64_t gart_size;
uint64_t vram_size;
+ bool has_dedicated_vram;
boolean has_virtual_memory;
bool gfx_ib_pad_with_type2;
boolean has_sdma;
@@ -449,7 +450,7 @@ struct radeon_winsys {
* \return The created buffer object.
*/
struct pb_buffer *(*buffer_create)(struct radeon_winsys *ws,
- unsigned size,
+ uint64_t size,
unsigned alignment,
boolean use_reusable_pool,
enum radeon_bo_domain domain,
@@ -528,7 +529,7 @@ struct radeon_winsys {
* \param Size Size in bytes for the new buffer.
*/
struct pb_buffer *(*buffer_from_ptr)(struct radeon_winsys *ws,
- void *pointer, unsigned size);
+ void *pointer, uint64_t size);
/**
* Whether the buffer was created from a user pointer.
diff --git a/src/gallium/drivers/radeonsi/si_blit.c b/src/gallium/drivers/radeonsi/si_blit.c
index c5ea8b17119..54da7a20203 100644
--- a/src/gallium/drivers/radeonsi/si_blit.c
+++ b/src/gallium/drivers/radeonsi/si_blit.c
@@ -52,8 +52,6 @@ static void si_blitter_begin(struct pipe_context *ctx, enum si_blitter_op op)
{
struct si_context *sctx = (struct si_context *)ctx;
- r600_suspend_nontimer_queries(&sctx->b);
-
util_blitter_save_vertex_buffer_slot(sctx->blitter, sctx->vertex_buffer);
util_blitter_save_vertex_elements(sctx->blitter, sctx->vertex_elements);
util_blitter_save_vertex_shader(sctx->blitter, sctx->vs_shader.cso);
@@ -70,8 +68,8 @@ static void si_blitter_begin(struct pipe_context *ctx, enum si_blitter_op op)
util_blitter_save_stencil_ref(sctx->blitter, &sctx->stencil_ref.state);
util_blitter_save_fragment_shader(sctx->blitter, sctx->ps_shader.cso);
util_blitter_save_sample_mask(sctx->blitter, sctx->sample_mask.sample_mask);
- util_blitter_save_viewport(sctx->blitter, &sctx->viewports.states[0]);
- util_blitter_save_scissor(sctx->blitter, &sctx->scissors.states[0]);
+ util_blitter_save_viewport(sctx->blitter, &sctx->b.viewports.states[0]);
+ util_blitter_save_scissor(sctx->blitter, &sctx->b.scissors.states[0]);
}
if (op & SI_SAVE_FRAMEBUFFER)
@@ -95,7 +93,6 @@ static void si_blitter_end(struct pipe_context *ctx)
struct si_context *sctx = (struct si_context *)ctx;
sctx->b.render_cond_force_off = false;
- r600_resume_nontimer_queries(&sctx->b);
}
static unsigned u_max_sample(struct pipe_resource *r)
diff --git a/src/gallium/drivers/radeonsi/si_cp_dma.c b/src/gallium/drivers/radeonsi/si_cp_dma.c
index dc62415823e..001ddd4bfae 100644
--- a/src/gallium/drivers/radeonsi/si_cp_dma.c
+++ b/src/gallium/drivers/radeonsi/si_cp_dma.c
@@ -124,7 +124,7 @@ static unsigned get_tc_l2_flag(struct si_context *sctx, bool is_framebuffer)
static void si_cp_dma_prepare(struct si_context *sctx, struct pipe_resource *dst,
struct pipe_resource *src, unsigned byte_count,
- unsigned remaining_size, unsigned *flags)
+ uint64_t remaining_size, unsigned *flags)
{
si_need_cs_space(sctx);
@@ -158,7 +158,7 @@ static void si_cp_dma_prepare(struct si_context *sctx, struct pipe_resource *dst
#define CP_DMA_MAX_BYTE_COUNT ((1 << 21) - CP_DMA_ALIGNMENT)
static void si_clear_buffer(struct pipe_context *ctx, struct pipe_resource *dst,
- unsigned offset, unsigned size, unsigned value,
+ uint64_t offset, uint64_t size, unsigned value,
bool is_framebuffer)
{
struct si_context *sctx = (struct si_context*)ctx;
@@ -180,7 +180,7 @@ static void si_clear_buffer(struct pipe_context *ctx, struct pipe_resource *dst,
sctx->b.gfx.cs,
PIPE_TRANSFER_WRITE);
map += offset;
- for (unsigned i = 0; i < size; i++) {
+ for (uint64_t i = 0; i < size; i++) {
unsigned byte_within_dword = (offset + i) % 4;
*map++ = (value >> (byte_within_dword * 8)) & 0xff;
}
diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c b/src/gallium/drivers/radeonsi/si_descriptors.c
index 6dd2e4fd89d..b5557d800c7 100644
--- a/src/gallium/drivers/radeonsi/si_descriptors.c
+++ b/src/gallium/drivers/radeonsi/si_descriptors.c
@@ -746,6 +746,55 @@ static void si_set_constant_buffer(struct pipe_context *ctx, uint shader, uint s
buffers->desc.list_dirty = true;
}
+/* SHADER BUFFERS */
+
+static void si_set_shader_buffers(struct pipe_context *ctx, unsigned shader,
+ unsigned start_slot, unsigned count,
+ struct pipe_shader_buffer *sbuffers)
+{
+ struct si_context *sctx = (struct si_context *)ctx;
+ struct si_buffer_resources *buffers = &sctx->shader_buffers[shader];
+ unsigned i;
+
+ assert(start_slot + count <= SI_NUM_SHADER_BUFFERS);
+
+ for (i = 0; i < count; ++i) {
+ struct pipe_shader_buffer *sbuffer = sbuffers ? &sbuffers[i] : NULL;
+ struct r600_resource *buf;
+ unsigned slot = start_slot + i;
+ uint32_t *desc = buffers->desc.list + slot * 4;
+ uint64_t va;
+
+ if (!sbuffer || !sbuffer->buffer) {
+ pipe_resource_reference(&buffers->buffers[slot], NULL);
+ memset(desc, 0, sizeof(uint32_t) * 4);
+ buffers->desc.enabled_mask &= ~(1llu << slot);
+ continue;
+ }
+
+ buf = (struct r600_resource *)sbuffer->buffer;
+ va = buf->gpu_address + sbuffer->buffer_offset;
+
+ desc[0] = va;
+ desc[1] = S_008F04_BASE_ADDRESS_HI(va >> 32) |
+ S_008F04_STRIDE(0);
+ desc[2] = sbuffer->buffer_size;
+ desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
+ S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
+ S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
+ S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
+ S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
+ S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32);
+
+ pipe_resource_reference(&buffers->buffers[slot], &buf->b.b);
+ radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx, buf,
+ buffers->shader_usage, buffers->priority);
+ buffers->desc.enabled_mask |= 1llu << slot;
+ }
+
+ buffers->desc.list_dirty = true;
+}
+
/* RING BUFFERS */
void si_set_ring_buffer(struct pipe_context *ctx, uint shader, uint slot,
@@ -883,6 +932,12 @@ static void si_set_streamout_targets(struct pipe_context *ctx,
SI_CONTEXT_VS_PARTIAL_FLUSH;
}
+ /* All readers of the streamout targets need to be finished before we can
+ * start writing to the targets.
+ */
+ if (num_targets)
+ sctx->b.flags |= SI_CONTEXT_PS_PARTIAL_FLUSH;
+
/* Streamout buffers must be bound in 2 places:
* 1) in VGT by setting the VGT_STRMOUT registers
* 2) as shader resources
@@ -977,6 +1032,30 @@ void si_update_compressed_colortex_masks(struct si_context *sctx)
/* BUFFER DISCARD/INVALIDATION */
+/** Reset descriptors of buffer resources after \p buf has been invalidated. */
+static void si_reset_buffer_resources(struct si_context *sctx,
+ struct si_buffer_resources *buffers,
+ struct pipe_resource *buf,
+ uint64_t old_va)
+{
+ uint64_t mask = buffers->desc.enabled_mask;
+
+ while (mask) {
+ unsigned i = u_bit_scan64(&mask);
+ if (buffers->buffers[i] == buf) {
+ si_desc_reset_buffer_offset(&sctx->b.b,
+ buffers->desc.list + i*4,
+ old_va, buf);
+ buffers->desc.list_dirty = true;
+
+ radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx,
+ (struct r600_resource *)buf,
+ buffers->shader_usage,
+ buffers->priority);
+ }
+ }
+}
+
/* Reallocate a buffer a update all resource bindings where the buffer is
* bound.
*
@@ -1048,23 +1127,12 @@ static void si_invalidate_buffer(struct pipe_context *ctx, struct pipe_resource
}
}
- /* Constant buffers. */
+ /* Constant and shader buffers. */
for (shader = 0; shader < SI_NUM_SHADERS; shader++) {
- struct si_buffer_resources *buffers = &sctx->const_buffers[shader];
- uint64_t mask = buffers->desc.enabled_mask;
-
- while (mask) {
- unsigned i = u_bit_scan64(&mask);
- if (buffers->buffers[i] == buf) {
- si_desc_reset_buffer_offset(ctx, buffers->desc.list + i*4,
- old_va, buf);
- buffers->desc.list_dirty = true;
-
- radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx,
- rbuffer, buffers->shader_usage,
- buffers->priority);
- }
- }
+ si_reset_buffer_resources(sctx, &sctx->const_buffers[shader],
+ buf, old_va);
+ si_reset_buffer_resources(sctx, &sctx->shader_buffers[shader],
+ buf, old_va);
}
/* Texture buffers - update virtual addresses in sampler view descriptors. */
@@ -1244,6 +1312,7 @@ void si_emit_shader_userdata(struct si_context *sctx, struct r600_atom *atom)
si_emit_shader_pointer(sctx, &sctx->rw_buffers[i].desc, base, false);
si_emit_shader_pointer(sctx, &sctx->const_buffers[i].desc, base, false);
+ si_emit_shader_pointer(sctx, &sctx->shader_buffers[i].desc, base, false);
si_emit_shader_pointer(sctx, &sctx->samplers[i].views.desc, base, false);
si_emit_shader_pointer(sctx, &sctx->images[i].desc, base, false);
}
@@ -1263,6 +1332,9 @@ void si_init_all_descriptors(struct si_context *sctx)
si_init_buffer_resources(&sctx->rw_buffers[i],
SI_NUM_RW_BUFFERS, SI_SGPR_RW_BUFFERS,
RADEON_USAGE_READWRITE, RADEON_PRIO_RINGS_STREAMOUT);
+ si_init_buffer_resources(&sctx->shader_buffers[i],
+ SI_NUM_SHADER_BUFFERS, SI_SGPR_SHADER_BUFFERS,
+ RADEON_USAGE_READWRITE, RADEON_PRIO_SHADER_RW_BUFFER);
si_init_descriptors(&sctx->samplers[i].views.desc,
SI_SGPR_SAMPLERS, 16, SI_NUM_SAMPLERS,
@@ -1280,6 +1352,7 @@ void si_init_all_descriptors(struct si_context *sctx)
sctx->b.b.bind_sampler_states = si_bind_sampler_states;
sctx->b.b.set_shader_images = si_set_shader_images;
sctx->b.b.set_constant_buffer = si_set_constant_buffer;
+ sctx->b.b.set_shader_buffers = si_set_shader_buffers;
sctx->b.b.set_sampler_views = si_set_sampler_views;
sctx->b.b.set_stream_output_targets = si_set_streamout_targets;
sctx->b.invalidate_buffer = si_invalidate_buffer;
@@ -1302,6 +1375,7 @@ bool si_upload_shader_descriptors(struct si_context *sctx)
for (i = 0; i < SI_NUM_SHADERS; i++) {
if (!si_upload_descriptors(sctx, &sctx->const_buffers[i].desc) ||
!si_upload_descriptors(sctx, &sctx->rw_buffers[i].desc) ||
+ !si_upload_descriptors(sctx, &sctx->shader_buffers[i].desc) ||
!si_upload_descriptors(sctx, &sctx->samplers[i].views.desc) ||
!si_upload_descriptors(sctx, &sctx->images[i].desc))
return false;
@@ -1316,6 +1390,7 @@ void si_release_all_descriptors(struct si_context *sctx)
for (i = 0; i < SI_NUM_SHADERS; i++) {
si_release_buffer_resources(&sctx->const_buffers[i]);
si_release_buffer_resources(&sctx->rw_buffers[i]);
+ si_release_buffer_resources(&sctx->shader_buffers[i]);
si_release_sampler_views(&sctx->samplers[i].views);
si_release_image_views(&sctx->images[i]);
}
@@ -1329,6 +1404,7 @@ void si_all_descriptors_begin_new_cs(struct si_context *sctx)
for (i = 0; i < SI_NUM_SHADERS; i++) {
si_buffer_resources_begin_new_cs(sctx, &sctx->const_buffers[i]);
si_buffer_resources_begin_new_cs(sctx, &sctx->rw_buffers[i]);
+ si_buffer_resources_begin_new_cs(sctx, &sctx->shader_buffers[i]);
si_sampler_views_begin_new_cs(sctx, &sctx->samplers[i].views);
si_image_views_begin_new_cs(sctx, &sctx->images[i]);
}
diff --git a/src/gallium/drivers/radeonsi/si_hw_context.c b/src/gallium/drivers/radeonsi/si_hw_context.c
index 8c900a4ecb6..b621b55abd3 100644
--- a/src/gallium/drivers/radeonsi/si_hw_context.c
+++ b/src/gallium/drivers/radeonsi/si_hw_context.c
@@ -155,7 +155,8 @@ void si_begin_new_cs(struct si_context *ctx)
SI_CONTEXT_INV_VMEM_L1 |
SI_CONTEXT_INV_GLOBAL_L2 |
SI_CONTEXT_INV_SMEM_L1 |
- SI_CONTEXT_INV_ICACHE;
+ SI_CONTEXT_INV_ICACHE |
+ R600_CONTEXT_START_PIPELINE_STATS;
/* set all valid group as dirty so they get reemited on
* next draw command
@@ -185,10 +186,10 @@ void si_begin_new_cs(struct si_context *ctx)
si_mark_atom_dirty(ctx, &ctx->b.render_cond_atom);
si_all_descriptors_begin_new_cs(ctx);
- ctx->scissors.dirty_mask = (1 << SI_MAX_VIEWPORTS) - 1;
- ctx->viewports.dirty_mask = (1 << SI_MAX_VIEWPORTS) - 1;
- si_mark_atom_dirty(ctx, &ctx->scissors.atom);
- si_mark_atom_dirty(ctx, &ctx->viewports.atom);
+ ctx->b.scissors.dirty_mask = (1 << R600_MAX_VIEWPORTS) - 1;
+ ctx->b.viewports.dirty_mask = (1 << R600_MAX_VIEWPORTS) - 1;
+ si_mark_atom_dirty(ctx, &ctx->b.scissors.atom);
+ si_mark_atom_dirty(ctx, &ctx->b.viewports.atom);
r600_postflush_resume_features(&ctx->b);
diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c
index 41bb84d68df..6a990ed64c3 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.c
+++ b/src/gallium/drivers/radeonsi/si_pipe.c
@@ -308,6 +308,7 @@ static int si_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
case PIPE_CAP_QUERY_MEMORY_INFO:
case PIPE_CAP_TGSI_PACK_HALF_FLOAT:
case PIPE_CAP_FRAMEBUFFER_NO_ATTACHMENT:
+ case PIPE_CAP_ROBUST_BUFFER_ACCESS_BEHAVIOR:
return 1;
case PIPE_CAP_RESOURCE_FROM_USER_MEMORY:
@@ -332,9 +333,12 @@ static int si_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
case PIPE_CAP_TEXTURE_BUFFER_OFFSET_ALIGNMENT:
case PIPE_CAP_MAX_TEXTURE_GATHER_COMPONENTS:
return 4;
+ case PIPE_CAP_SHADER_BUFFER_OFFSET_ALIGNMENT:
+ return HAVE_LLVM >= 0x0309 ? 4 : 0;
case PIPE_CAP_GLSL_FEATURE_LEVEL:
- return HAVE_LLVM >= 0x0307 ? 410 : 330;
+ return HAVE_LLVM >= 0x0309 ? 420 :
+ HAVE_LLVM >= 0x0307 ? 410 : 330;
case PIPE_CAP_MAX_TEXTURE_BUFFER_SIZE:
return MIN2(sscreen->b.info.vram_size, 0xFFFFFFFF);
@@ -353,7 +357,6 @@ static int si_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
case PIPE_CAP_DRAW_PARAMETERS:
case PIPE_CAP_MULTI_DRAW_INDIRECT:
case PIPE_CAP_MULTI_DRAW_INDIRECT_PARAMS:
- case PIPE_CAP_SHADER_BUFFER_OFFSET_ALIGNMENT:
case PIPE_CAP_GENERATE_MIPMAP:
case PIPE_CAP_STRING_MARKER:
case PIPE_CAP_QUERY_BUFFER_OBJECT:
@@ -401,7 +404,7 @@ static int si_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
return 8;
case PIPE_CAP_MAX_VIEWPORTS:
- return SI_MAX_VIEWPORTS;
+ return R600_MAX_VIEWPORTS;
/* Timer queries, present when the clock frequency is non zero. */
case PIPE_CAP_QUERY_TIMESTAMP:
@@ -539,7 +542,7 @@ static int si_get_shader_param(struct pipe_screen* pscreen, unsigned shader, enu
case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT:
return 32;
case PIPE_SHADER_CAP_MAX_SHADER_BUFFERS:
- return 0;
+ return HAVE_LLVM >= 0x0309 ? SI_NUM_SHADER_BUFFERS : 0;
case PIPE_SHADER_CAP_MAX_SHADER_IMAGES:
return HAVE_LLVM >= 0x0309 ? SI_NUM_IMAGES : 0;
}
diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h
index 4158fc5461e..0398b1df61e 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@@ -76,7 +76,6 @@
#define SI_IS_TRACE_POINT(x) (((x) & 0xcafe0000) == 0xcafe0000)
#define SI_GET_TRACE_POINT_ID(x) ((x) & 0xffff)
-#define SI_MAX_VIEWPORTS 16
#define SI_MAX_BORDER_COLORS 4096
struct si_compute;
@@ -173,18 +172,6 @@ struct si_sample_mask {
uint16_t sample_mask;
};
-struct si_scissors {
- struct r600_atom atom;
- unsigned dirty_mask;
- struct pipe_scissor_state states[SI_MAX_VIEWPORTS];
-};
-
-struct si_viewports {
- struct r600_atom atom;
- unsigned dirty_mask;
- struct pipe_viewport_state states[SI_MAX_VIEWPORTS];
-};
-
/* A shader state consists of the shader selector, which is a constant state
* object shared by multiple contexts and shouldn't be modified, and
* the current shader variant selected for this context.
@@ -228,8 +215,6 @@ struct si_context {
struct r600_atom clip_regs;
struct si_clip_state clip_state;
struct si_shader_data shader_userdata;
- struct si_scissors scissors;
- struct si_viewports viewports;
struct si_stencil_ref stencil_ref;
struct r600_atom spi_map;
@@ -256,6 +241,7 @@ struct si_context {
struct si_descriptors vertex_buffers;
struct si_buffer_resources const_buffers[SI_NUM_SHADERS];
struct si_buffer_resources rw_buffers[SI_NUM_SHADERS];
+ struct si_buffer_resources shader_buffers[SI_NUM_SHADERS];
struct si_textures_info samplers[SI_NUM_SHADERS];
struct si_images_info images[SI_NUM_SHADERS];
@@ -289,6 +275,7 @@ struct si_context {
bool db_stencil_clear;
bool db_stencil_disable_expclear;
unsigned ps_db_shader_control;
+ bool occlusion_queries_disabled;
/* Emitted draw state. */
int last_base_vertex;
diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
index 08da3e37550..c58467ddcb0 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -98,6 +98,7 @@ struct si_shader_context
LLVMValueRef const_buffers[SI_NUM_CONST_BUFFERS];
LLVMValueRef lds;
LLVMValueRef *constants[SI_NUM_CONST_BUFFERS];
+ LLVMValueRef shader_buffers[SI_NUM_SHADER_BUFFERS];
LLVMValueRef sampler_views[SI_NUM_SAMPLERS];
LLVMValueRef sampler_states[SI_NUM_SAMPLERS];
LLVMValueRef fmasks[SI_NUM_USER_SAMPLERS];
@@ -2775,6 +2776,24 @@ static void membar_emit(
emit_optimization_barrier(ctx);
}
+static LLVMValueRef
+shader_buffer_fetch_rsrc(struct si_shader_context *ctx,
+ const struct tgsi_full_src_register *reg)
+{
+ LLVMValueRef ind_index;
+ LLVMValueRef rsrc_ptr;
+
+ if (!reg->Register.Indirect)
+ return ctx->shader_buffers[reg->Register.Index];
+
+ ind_index = get_bounded_indirect_index(ctx, &reg->Indirect,
+ reg->Register.Index,
+ SI_NUM_SHADER_BUFFERS);
+
+ rsrc_ptr = LLVMGetParam(ctx->radeon_bld.main_fn, SI_PARAM_SHADER_BUFFERS);
+ return build_indexed_load_const(ctx, rsrc_ptr, ind_index);
+}
+
static bool tgsi_is_array_sampler(unsigned target)
{
return target == TGSI_TEXTURE_1D_ARRAY ||
@@ -2924,32 +2943,46 @@ static void image_append_args(
}
/**
+ * Given a 256 bit resource, extract the top half (which stores the buffer
+ * resource in the case of textures and images).
+ */
+static LLVMValueRef extract_rsrc_top_half(
+ struct si_shader_context *ctx,
+ LLVMValueRef rsrc)
+{
+ struct gallivm_state *gallivm = &ctx->radeon_bld.gallivm;
+ struct lp_build_tgsi_context *bld_base = &ctx->radeon_bld.soa.bld_base;
+ LLVMTypeRef v2i128 = LLVMVectorType(ctx->i128, 2);
+
+ rsrc = LLVMBuildBitCast(gallivm->builder, rsrc, v2i128, "");
+ rsrc = LLVMBuildExtractElement(gallivm->builder, rsrc, bld_base->uint_bld.one, "");
+ rsrc = LLVMBuildBitCast(gallivm->builder, rsrc, ctx->v4i32, "");
+
+ return rsrc;
+}
+
+/**
* Append the resource and indexing arguments for buffer intrinsics.
*
- * \param rsrc the 256 bit resource
- * \param index index into the buffer
+ * \param rsrc the v4i32 buffer resource
+ * \param index index into the buffer (stride-based)
+ * \param offset byte offset into the buffer
*/
static void buffer_append_args(
struct si_shader_context *ctx,
struct lp_build_emit_data *emit_data,
LLVMValueRef rsrc,
LLVMValueRef index,
+ LLVMValueRef offset,
bool atomic)
{
- struct gallivm_state *gallivm = &ctx->radeon_bld.gallivm;
- struct lp_build_tgsi_context *bld_base = &ctx->radeon_bld.soa.bld_base;
const struct tgsi_full_instruction *inst = emit_data->inst;
- LLVMTypeRef v2i128 = LLVMVectorType(ctx->i128, 2);
LLVMValueRef i1false = LLVMConstInt(ctx->i1, 0, 0);
LLVMValueRef i1true = LLVMConstInt(ctx->i1, 1, 0);
- rsrc = LLVMBuildBitCast(gallivm->builder, rsrc, v2i128, "");
- rsrc = LLVMBuildExtractElement(gallivm->builder, rsrc, bld_base->uint_bld.one, "");
- rsrc = LLVMBuildBitCast(gallivm->builder, rsrc, ctx->v4i32, "");
-
emit_data->args[emit_data->arg_count++] = rsrc;
emit_data->args[emit_data->arg_count++] = index; /* vindex */
- emit_data->args[emit_data->arg_count++] = bld_base->uint_bld.zero; /* voffset */
+ emit_data->args[emit_data->arg_count++] = offset; /* voffset */
if (!atomic) {
emit_data->args[emit_data->arg_count++] =
inst->Memory.Qualifier & (TGSI_MEMORY_COHERENT | TGSI_MEMORY_VOLATILE) ?
@@ -2966,24 +2999,73 @@ static void load_fetch_args(
struct gallivm_state *gallivm = bld_base->base.gallivm;
const struct tgsi_full_instruction * inst = emit_data->inst;
unsigned target = inst->Memory.Texture;
- LLVMValueRef coords;
LLVMValueRef rsrc;
emit_data->dst_type = LLVMVectorType(bld_base->base.elem_type, 4);
- image_fetch_rsrc(bld_base, &inst->Src[0], false, &rsrc);
- coords = image_fetch_coords(bld_base, inst, 1);
+ if (inst->Src[0].Register.File == TGSI_FILE_BUFFER) {
+ LLVMBuilderRef builder = gallivm->builder;
+ LLVMValueRef offset;
+ LLVMValueRef tmp;
- if (target == TGSI_TEXTURE_BUFFER) {
- buffer_append_args(ctx, emit_data, rsrc, coords, false);
+ rsrc = shader_buffer_fetch_rsrc(ctx, &inst->Src[0]);
+
+ tmp = lp_build_emit_fetch(bld_base, inst, 1, 0);
+ offset = LLVMBuildBitCast(builder, tmp, bld_base->uint_bld.elem_type, "");
+
+ buffer_append_args(ctx, emit_data, rsrc, bld_base->uint_bld.zero,
+ offset, false);
} else {
- emit_data->args[0] = coords;
- emit_data->args[1] = rsrc;
- emit_data->args[2] = lp_build_const_int32(gallivm, 15); /* dmask */
- emit_data->arg_count = 3;
+ LLVMValueRef coords;
- image_append_args(ctx, emit_data, target, false);
+ image_fetch_rsrc(bld_base, &inst->Src[0], false, &rsrc);
+ coords = image_fetch_coords(bld_base, inst, 1);
+
+ if (target == TGSI_TEXTURE_BUFFER) {
+ rsrc = extract_rsrc_top_half(ctx, rsrc);
+ buffer_append_args(ctx, emit_data, rsrc, coords,
+ bld_base->uint_bld.zero, false);
+ } else {
+ emit_data->args[0] = coords;
+ emit_data->args[1] = rsrc;
+ emit_data->args[2] = lp_build_const_int32(gallivm, 15); /* dmask */
+ emit_data->arg_count = 3;
+
+ image_append_args(ctx, emit_data, target, false);
+ }
+ }
+}
+
+static void load_emit_buffer(struct si_shader_context *ctx,
+ struct lp_build_emit_data *emit_data)
+{
+ const struct tgsi_full_instruction *inst = emit_data->inst;
+ struct gallivm_state *gallivm = &ctx->radeon_bld.gallivm;
+ LLVMBuilderRef builder = gallivm->builder;
+ uint writemask = inst->Dst[0].Register.WriteMask;
+ uint count = util_last_bit(writemask);
+ const char *intrinsic_name;
+ LLVMTypeRef dst_type;
+
+ switch (count) {
+ case 1:
+ intrinsic_name = "llvm.amdgcn.buffer.load.f32";
+ dst_type = ctx->f32;
+ break;
+ case 2:
+ intrinsic_name = "llvm.amdgcn.buffer.load.v2f32";
+ dst_type = LLVMVectorType(ctx->f32, 2);
+ break;
+ default: // 3 & 4
+ intrinsic_name = "llvm.amdgcn.buffer.load.v4f32";
+ dst_type = ctx->v4f32;
+ count = 4;
}
+
+ emit_data->output[emit_data->chan] = lp_build_intrinsic(
+ builder, intrinsic_name, dst_type,
+ emit_data->args, emit_data->arg_count,
+ LLVMReadOnlyAttribute | LLVMNoUnwindAttribute);
}
static void load_emit(
@@ -2995,18 +3077,23 @@ static void load_emit(
struct gallivm_state *gallivm = bld_base->base.gallivm;
LLVMBuilderRef builder = gallivm->builder;
const struct tgsi_full_instruction * inst = emit_data->inst;
- unsigned target = inst->Memory.Texture;
char intrinsic_name[32];
char coords_type[8];
if (inst->Memory.Qualifier & TGSI_MEMORY_VOLATILE)
emit_optimization_barrier(ctx);
- if (target == TGSI_TEXTURE_BUFFER) {
- emit_data->output[emit_data->chan] = lp_build_intrinsic(
- builder, "llvm.amdgcn.buffer.load.format.v4f32", emit_data->dst_type,
- emit_data->args, emit_data->arg_count,
- LLVMReadOnlyAttribute | LLVMNoUnwindAttribute);
+ if (inst->Src[0].Register.File == TGSI_FILE_BUFFER) {
+ load_emit_buffer(ctx, emit_data);
+ return;
+ }
+
+ if (inst->Memory.Texture == TGSI_TEXTURE_BUFFER) {
+ emit_data->output[emit_data->chan] =
+ lp_build_intrinsic(
+ builder, "llvm.amdgcn.buffer.load.format.v4f32", emit_data->dst_type,
+ emit_data->args, emit_data->arg_count,
+ LLVMReadOnlyAttribute | LLVMNoUnwindAttribute);
} else {
build_int_type_name(LLVMTypeOf(emit_data->args[0]),
coords_type, sizeof(coords_type));
@@ -3028,39 +3115,129 @@ static void store_fetch_args(
{
struct si_shader_context *ctx = si_shader_context(bld_base);
struct gallivm_state *gallivm = bld_base->base.gallivm;
+ LLVMBuilderRef builder = gallivm->builder;
const struct tgsi_full_instruction * inst = emit_data->inst;
- struct tgsi_full_src_register image;
- unsigned target = inst->Memory.Texture;
+ struct tgsi_full_src_register memory;
LLVMValueRef chans[4];
LLVMValueRef data;
- LLVMValueRef coords;
LLVMValueRef rsrc;
unsigned chan;
emit_data->dst_type = LLVMVoidTypeInContext(gallivm->context);
- image = tgsi_full_src_register_from_dst(&inst->Dst[0]);
- coords = image_fetch_coords(bld_base, inst, 0);
-
for (chan = 0; chan < 4; ++chan) {
chans[chan] = lp_build_emit_fetch(bld_base, inst, 1, chan);
}
data = lp_build_gather_values(gallivm, chans, 4);
- if (target == TGSI_TEXTURE_BUFFER) {
- image_fetch_rsrc(bld_base, &image, false, &rsrc);
- emit_data->args[0] = data;
- emit_data->arg_count = 1;
+ emit_data->args[emit_data->arg_count++] = data;
+
+ memory = tgsi_full_src_register_from_dst(&inst->Dst[0]);
- buffer_append_args(ctx, emit_data, rsrc, coords, false);
+ if (inst->Dst[0].Register.File == TGSI_FILE_BUFFER) {
+ LLVMValueRef offset;
+ LLVMValueRef tmp;
+
+ rsrc = shader_buffer_fetch_rsrc(ctx, &memory);
+
+ tmp = lp_build_emit_fetch(bld_base, inst, 0, 0);
+ offset = LLVMBuildBitCast(builder, tmp, bld_base->uint_bld.elem_type, "");
+
+ buffer_append_args(ctx, emit_data, rsrc, bld_base->uint_bld.zero,
+ offset, false);
} else {
+ unsigned target = inst->Memory.Texture;
+ LLVMValueRef coords;
+
+ coords = image_fetch_coords(bld_base, inst, 0);
+
+ if (target == TGSI_TEXTURE_BUFFER) {
+ image_fetch_rsrc(bld_base, &memory, false, &rsrc);
+
+ rsrc = extract_rsrc_top_half(ctx, rsrc);
+ buffer_append_args(ctx, emit_data, rsrc, coords,
+ bld_base->uint_bld.zero, false);
+ } else {
+ emit_data->args[1] = coords;
+ image_fetch_rsrc(bld_base, &memory, true, &emit_data->args[2]);
+ emit_data->args[3] = lp_build_const_int32(gallivm, 15); /* dmask */
+ emit_data->arg_count = 4;
+
+ image_append_args(ctx, emit_data, target, false);
+ }
+ }
+}
+
+static void store_emit_buffer(
+ struct si_shader_context *ctx,
+ struct lp_build_emit_data *emit_data)
+{
+ const struct tgsi_full_instruction *inst = emit_data->inst;
+ struct gallivm_state *gallivm = &ctx->radeon_bld.gallivm;
+ LLVMBuilderRef builder = gallivm->builder;
+ struct lp_build_context *uint_bld = &ctx->radeon_bld.soa.bld_base.uint_bld;
+ LLVMValueRef base_data = emit_data->args[0];
+ LLVMValueRef base_offset = emit_data->args[3];
+ unsigned writemask = inst->Dst[0].Register.WriteMask;
+
+ while (writemask) {
+ int start, count;
+ const char *intrinsic_name;
+ LLVMValueRef data;
+ LLVMValueRef offset;
+ LLVMValueRef tmp;
+
+ u_bit_scan_consecutive_range(&writemask, &start, &count);
+
+ /* Due to an LLVM limitation, split 3-element writes
+ * into a 2-element and a 1-element write. */
+ if (count == 3) {
+ writemask |= 1 << (start + 2);
+ count = 2;
+ }
+
+ if (count == 4) {
+ data = base_data;
+ intrinsic_name = "llvm.amdgcn.buffer.store.v4f32";
+ } else if (count == 2) {
+ LLVMTypeRef v2f32 = LLVMVectorType(ctx->f32, 2);
+
+ tmp = LLVMBuildExtractElement(
+ builder, base_data,
+ lp_build_const_int32(gallivm, start), "");
+ data = LLVMBuildInsertElement(
+ builder, LLVMGetUndef(v2f32), tmp,
+ uint_bld->zero, "");
+
+ tmp = LLVMBuildExtractElement(
+ builder, base_data,
+ lp_build_const_int32(gallivm, start + 1), "");
+ data = LLVMBuildInsertElement(
+ builder, data, tmp, uint_bld->one, "");
+
+ intrinsic_name = "llvm.amdgcn.buffer.store.v2f32";
+ } else {
+ assert(count == 1);
+ data = LLVMBuildExtractElement(
+ builder, base_data,
+ lp_build_const_int32(gallivm, start), "");
+ intrinsic_name = "llvm.amdgcn.buffer.store.f32";
+ }
+
+ offset = base_offset;
+ if (start != 0) {
+ offset = LLVMBuildAdd(
+ builder, offset,
+ lp_build_const_int32(gallivm, start * 4), "");
+ }
+
emit_data->args[0] = data;
- emit_data->args[1] = coords;
- image_fetch_rsrc(bld_base, &image, true, &emit_data->args[2]);
- emit_data->args[3] = lp_build_const_int32(gallivm, 15); /* dmask */
- emit_data->arg_count = 4;
+ emit_data->args[3] = offset;
- image_append_args(ctx, emit_data, target, false);
+ lp_build_intrinsic(
+ builder, intrinsic_name, emit_data->dst_type,
+ emit_data->args, emit_data->arg_count,
+ LLVMNoUnwindAttribute);
}
}
@@ -3076,6 +3253,11 @@ static void store_emit(
char intrinsic_name[32];
char coords_type[8];
+ if (inst->Dst[0].Register.File == TGSI_FILE_BUFFER) {
+ store_emit_buffer(si_shader_context(bld_base), emit_data);
+ return;
+ }
+
if (target == TGSI_TEXTURE_BUFFER) {
emit_data->output[emit_data->chan] = lp_build_intrinsic(
builder, "llvm.amdgcn.buffer.store.format.v4f32",
@@ -3103,18 +3285,12 @@ static void atomic_fetch_args(
struct gallivm_state *gallivm = bld_base->base.gallivm;
LLVMBuilderRef builder = gallivm->builder;
const struct tgsi_full_instruction * inst = emit_data->inst;
- unsigned target = inst->Memory.Texture;
LLVMValueRef data1, data2;
- LLVMValueRef coords;
LLVMValueRef rsrc;
LLVMValueRef tmp;
emit_data->dst_type = bld_base->base.elem_type;
- image_fetch_rsrc(bld_base, &inst->Src[0], target != TGSI_TEXTURE_BUFFER,
- &rsrc);
- coords = image_fetch_coords(bld_base, inst, 1);
-
tmp = lp_build_emit_fetch(bld_base, inst, 2, 0);
data1 = LLVMBuildBitCast(builder, tmp, bld_base->uint_bld.elem_type, "");
@@ -3130,13 +3306,34 @@ static void atomic_fetch_args(
emit_data->args[emit_data->arg_count++] = data2;
emit_data->args[emit_data->arg_count++] = data1;
- if (target == TGSI_TEXTURE_BUFFER) {
- buffer_append_args(ctx, emit_data, rsrc, coords, true);
+ if (inst->Src[0].Register.File == TGSI_FILE_BUFFER) {
+ LLVMValueRef offset;
+
+ rsrc = shader_buffer_fetch_rsrc(ctx, &inst->Src[0]);
+
+ tmp = lp_build_emit_fetch(bld_base, inst, 1, 0);
+ offset = LLVMBuildBitCast(builder, tmp, bld_base->uint_bld.elem_type, "");
+
+ buffer_append_args(ctx, emit_data, rsrc, bld_base->uint_bld.zero,
+ offset, true);
} else {
- emit_data->args[emit_data->arg_count++] = coords;
- emit_data->args[emit_data->arg_count++] = rsrc;
+ unsigned target = inst->Memory.Texture;
+ LLVMValueRef coords;
+
+ image_fetch_rsrc(bld_base, &inst->Src[0],
+ target != TGSI_TEXTURE_BUFFER, &rsrc);
+ coords = image_fetch_coords(bld_base, inst, 1);
+
+ if (target == TGSI_TEXTURE_BUFFER) {
+ rsrc = extract_rsrc_top_half(ctx, rsrc);
+ buffer_append_args(ctx, emit_data, rsrc, coords,
+ bld_base->uint_bld.zero, true);
+ } else {
+ emit_data->args[emit_data->arg_count++] = coords;
+ emit_data->args[emit_data->arg_count++] = rsrc;
- image_append_args(ctx, emit_data, target, true);
+ image_append_args(ctx, emit_data, target, true);
+ }
}
}
@@ -3148,11 +3345,11 @@ static void atomic_emit(
struct gallivm_state *gallivm = bld_base->base.gallivm;
LLVMBuilderRef builder = gallivm->builder;
const struct tgsi_full_instruction * inst = emit_data->inst;
- unsigned target = inst->Memory.Texture;
char intrinsic_name[40];
LLVMValueRef tmp;
- if (target == TGSI_TEXTURE_BUFFER) {
+ if (inst->Src[0].Register.File == TGSI_FILE_BUFFER ||
+ inst->Memory.Texture == TGSI_TEXTURE_BUFFER) {
snprintf(intrinsic_name, sizeof(intrinsic_name),
"llvm.amdgcn.buffer.atomic.%s", action->intr_name);
} else {
@@ -3177,14 +3374,17 @@ static void resq_fetch_args(
struct lp_build_tgsi_context * bld_base,
struct lp_build_emit_data * emit_data)
{
+ struct si_shader_context *ctx = si_shader_context(bld_base);
struct gallivm_state *gallivm = bld_base->base.gallivm;
const struct tgsi_full_instruction *inst = emit_data->inst;
const struct tgsi_full_src_register *reg = &inst->Src[0];
- unsigned tex_target = inst->Memory.Texture;
emit_data->dst_type = LLVMVectorType(bld_base->base.elem_type, 4);
- if (tex_target == TGSI_TEXTURE_BUFFER) {
+ if (reg->Register.File == TGSI_FILE_BUFFER) {
+ emit_data->args[0] = shader_buffer_fetch_rsrc(ctx, reg);
+ emit_data->arg_count = 1;
+ } else if (inst->Memory.Texture == TGSI_TEXTURE_BUFFER) {
image_fetch_rsrc(bld_base, reg, false, &emit_data->args[0]);
emit_data->arg_count = 1;
} else {
@@ -3193,7 +3393,7 @@ static void resq_fetch_args(
emit_data->args[2] = lp_build_const_int32(gallivm, 15); /* dmask */
emit_data->args[3] = bld_base->uint_bld.zero; /* unorm */
emit_data->args[4] = bld_base->uint_bld.zero; /* r128 */
- emit_data->args[5] = tgsi_is_array_image(tex_target) ?
+ emit_data->args[5] = tgsi_is_array_image(inst->Memory.Texture) ?
bld_base->uint_bld.one : bld_base->uint_bld.zero; /* da */
emit_data->args[6] = bld_base->uint_bld.zero; /* glc */
emit_data->args[7] = bld_base->uint_bld.zero; /* slc */
@@ -3211,10 +3411,12 @@ static void resq_emit(
struct gallivm_state *gallivm = bld_base->base.gallivm;
LLVMBuilderRef builder = gallivm->builder;
const struct tgsi_full_instruction *inst = emit_data->inst;
- unsigned target = inst->Memory.Texture;
LLVMValueRef out;
- if (target == TGSI_TEXTURE_BUFFER) {
+ if (inst->Src[0].Register.File == TGSI_FILE_BUFFER) {
+ out = LLVMBuildExtractElement(builder, emit_data->args[0],
+ lp_build_const_int32(gallivm, 2), "");
+ } else if (inst->Memory.Texture == TGSI_TEXTURE_BUFFER) {
out = get_buffer_size(bld_base, emit_data->args[0]);
} else {
out = lp_build_intrinsic(
@@ -3223,7 +3425,7 @@ static void resq_emit(
LLVMReadNoneAttribute | LLVMNoUnwindAttribute);
/* Divide the number of layers by 6 to get the number of cubes. */
- if (target == TGSI_TEXTURE_CUBE_ARRAY) {
+ if (inst->Memory.Texture == TGSI_TEXTURE_CUBE_ARRAY) {
LLVMValueRef imm2 = lp_build_const_int32(gallivm, 2);
LLVMValueRef imm6 = lp_build_const_int32(gallivm, 6);
@@ -3339,6 +3541,35 @@ static LLVMValueRef get_sampler_desc(struct si_shader_context *ctx,
return get_sampler_desc_custom(ctx, list, index, type);
}
+/* Disable anisotropic filtering if BASE_LEVEL == LAST_LEVEL.
+ *
+ * SI-CI:
+ * If BASE_LEVEL == LAST_LEVEL, the shader must disable anisotropic
+ * filtering manually. The driver sets img7 to a mask clearing
+ * MAX_ANISO_RATIO if BASE_LEVEL == LAST_LEVEL. The shader must do:
+ * s_and_b32 samp0, samp0, img7
+ *
+ * VI:
+ * The ANISO_OVERRIDE sampler field enables this fix in TA.
+ */
+static LLVMValueRef sici_fix_sampler_aniso(struct si_shader_context *ctx,
+ LLVMValueRef res, LLVMValueRef samp)
+{
+ LLVMBuilderRef builder = ctx->radeon_bld.gallivm.builder;
+ LLVMValueRef img7, samp0;
+
+ if (ctx->screen->b.chip_class >= VI)
+ return samp;
+
+ img7 = LLVMBuildExtractElement(builder, res,
+ LLVMConstInt(ctx->i32, 7, 0), "");
+ samp0 = LLVMBuildExtractElement(builder, samp,
+ LLVMConstInt(ctx->i32, 0, 0), "");
+ samp0 = LLVMBuildAnd(builder, samp0, img7, "");
+ return LLVMBuildInsertElement(builder, samp, samp0,
+ LLVMConstInt(ctx->i32, 0, 0), "");
+}
+
static void tex_fetch_ptrs(
struct lp_build_tgsi_context *bld_base,
struct lp_build_emit_data *emit_data,
@@ -3370,6 +3601,7 @@ static void tex_fetch_ptrs(
*fmask_ptr = get_sampler_desc(ctx, ind_index, DESC_FMASK);
} else {
*samp_ptr = get_sampler_desc(ctx, ind_index, DESC_SAMPLER);
+ *samp_ptr = sici_fix_sampler_aniso(ctx, *res_ptr, *samp_ptr);
*fmask_ptr = NULL;
}
} else {
@@ -4420,7 +4652,8 @@ static void create_function(struct si_shader_context *ctx)
params[SI_PARAM_CONST_BUFFERS] = const_array(ctx->v16i8, SI_NUM_CONST_BUFFERS);
params[SI_PARAM_SAMPLERS] = const_array(ctx->v8i32, SI_NUM_SAMPLERS);
params[SI_PARAM_IMAGES] = const_array(ctx->v8i32, SI_NUM_IMAGES);
- last_array_pointer = SI_PARAM_IMAGES;
+ params[SI_PARAM_SHADER_BUFFERS] = const_array(ctx->v4i32, SI_NUM_SHADER_BUFFERS);
+ last_array_pointer = SI_PARAM_SHADER_BUFFERS;
switch (ctx->type) {
case TGSI_PROCESSOR_VERTEX:
@@ -4679,6 +4912,21 @@ static void preload_constants(struct si_shader_context *ctx)
}
}
+static void preload_shader_buffers(struct si_shader_context *ctx)
+{
+ struct gallivm_state *gallivm = &ctx->radeon_bld.gallivm;
+ LLVMValueRef ptr = LLVMGetParam(ctx->radeon_bld.main_fn, SI_PARAM_SHADER_BUFFERS);
+ int buf, maxbuf;
+
+ maxbuf = MIN2(ctx->shader->selector->info.file_max[TGSI_FILE_BUFFER],
+ SI_NUM_SHADER_BUFFERS - 1);
+ for (buf = 0; buf <= maxbuf; ++buf) {
+ ctx->shader_buffers[buf] =
+ build_indexed_load_const(
+ ctx, ptr, lp_build_const_int32(gallivm, buf));
+ }
+}
+
static void preload_samplers(struct si_shader_context *ctx)
{
struct lp_build_tgsi_context *bld_base = &ctx->radeon_bld.soa.bld_base;
@@ -4701,9 +4949,13 @@ static void preload_samplers(struct si_shader_context *ctx)
if (info->is_msaa_sampler[i])
ctx->fmasks[i] =
get_sampler_desc(ctx, offset, DESC_FMASK);
- else
+ else {
ctx->sampler_states[i] =
get_sampler_desc(ctx, offset, DESC_SAMPLER);
+ ctx->sampler_states[i] =
+ sici_fix_sampler_aniso(ctx, ctx->sampler_views[i],
+ ctx->sampler_states[i]);
+ }
}
}
@@ -5540,6 +5792,7 @@ int si_compile_tgsi_shader(struct si_screen *sscreen,
create_meta_data(&ctx);
create_function(&ctx);
preload_constants(&ctx);
+ preload_shader_buffers(&ctx);
preload_samplers(&ctx);
preload_images(&ctx);
preload_streamout_buffers(&ctx);
@@ -6000,6 +6253,7 @@ static bool si_compile_tcs_epilog(struct si_screen *sscreen,
params[SI_PARAM_CONST_BUFFERS] = ctx.i64;
params[SI_PARAM_SAMPLERS] = ctx.i64;
params[SI_PARAM_IMAGES] = ctx.i64;
+ params[SI_PARAM_SHADER_BUFFERS] = ctx.i64;
params[SI_PARAM_TCS_OUT_OFFSETS] = ctx.i32;
params[SI_PARAM_TCS_OUT_LAYOUT] = ctx.i32;
params[SI_PARAM_TCS_IN_LAYOUT] = ctx.i32;
@@ -6250,6 +6504,7 @@ static bool si_compile_ps_epilog(struct si_screen *sscreen,
params[SI_PARAM_CONST_BUFFERS] = ctx.i64;
params[SI_PARAM_SAMPLERS] = ctx.i64;
params[SI_PARAM_IMAGES] = ctx.i64;
+ params[SI_PARAM_SHADER_BUFFERS] = ctx.i64;
params[SI_PARAM_ALPHA_REF] = ctx.f32;
last_array_pointer = -1;
last_sgpr = SI_PARAM_ALPHA_REF;
diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h
index 8059edf6395..013c8a2c114 100644
--- a/src/gallium/drivers/radeonsi/si_shader.h
+++ b/src/gallium/drivers/radeonsi/si_shader.h
@@ -81,95 +81,97 @@ struct radeon_shader_reloc;
#define SI_SGPR_CONST_BUFFERS 2
#define SI_SGPR_SAMPLERS 4 /* images & sampler states interleaved */
#define SI_SGPR_IMAGES 6
-#define SI_SGPR_VERTEX_BUFFERS 8 /* VS only */
-#define SI_SGPR_BASE_VERTEX 10 /* VS only */
-#define SI_SGPR_START_INSTANCE 11 /* VS only */
-#define SI_SGPR_VS_STATE_BITS 12 /* VS(VS) only */
-#define SI_SGPR_LS_OUT_LAYOUT 12 /* VS(LS) only */
-#define SI_SGPR_TCS_OUT_OFFSETS 8 /* TCS & TES only */
-#define SI_SGPR_TCS_OUT_LAYOUT 9 /* TCS & TES only */
-#define SI_SGPR_TCS_IN_LAYOUT 10 /* TCS only */
-#define SI_SGPR_ALPHA_REF 8 /* PS only */
-
-#define SI_VS_NUM_USER_SGPR 13 /* API VS */
-#define SI_ES_NUM_USER_SGPR 12 /* API VS */
-#define SI_LS_NUM_USER_SGPR 13 /* API VS */
-#define SI_TCS_NUM_USER_SGPR 11
-#define SI_TES_NUM_USER_SGPR 10
-#define SI_GS_NUM_USER_SGPR 8
+#define SI_SGPR_SHADER_BUFFERS 8
+#define SI_SGPR_VERTEX_BUFFERS 10 /* VS only */
+#define SI_SGPR_BASE_VERTEX 12 /* VS only */
+#define SI_SGPR_START_INSTANCE 13 /* VS only */
+#define SI_SGPR_VS_STATE_BITS 14 /* VS(VS) only */
+#define SI_SGPR_LS_OUT_LAYOUT 14 /* VS(LS) only */
+#define SI_SGPR_TCS_OUT_OFFSETS 10 /* TCS & TES only */
+#define SI_SGPR_TCS_OUT_LAYOUT 11 /* TCS & TES only */
+#define SI_SGPR_TCS_IN_LAYOUT 12 /* TCS only */
+#define SI_SGPR_ALPHA_REF 10 /* PS only */
+
+#define SI_VS_NUM_USER_SGPR 15 /* API VS */
+#define SI_ES_NUM_USER_SGPR 14 /* API VS */
+#define SI_LS_NUM_USER_SGPR 15 /* API VS */
+#define SI_TCS_NUM_USER_SGPR 13
+#define SI_TES_NUM_USER_SGPR 12
+#define SI_GS_NUM_USER_SGPR 10
#define SI_GSCOPY_NUM_USER_SGPR 4
-#define SI_PS_NUM_USER_SGPR 9
+#define SI_PS_NUM_USER_SGPR 11
/* LLVM function parameter indices */
#define SI_PARAM_RW_BUFFERS 0
#define SI_PARAM_CONST_BUFFERS 1
#define SI_PARAM_SAMPLERS 2
#define SI_PARAM_IMAGES 3
+#define SI_PARAM_SHADER_BUFFERS 4
/* VS only parameters */
-#define SI_PARAM_VERTEX_BUFFERS 4
-#define SI_PARAM_BASE_VERTEX 5
-#define SI_PARAM_START_INSTANCE 6
+#define SI_PARAM_VERTEX_BUFFERS 5
+#define SI_PARAM_BASE_VERTEX 6
+#define SI_PARAM_START_INSTANCE 7
/* [0] = clamp vertex color */
-#define SI_PARAM_VS_STATE_BITS 7
+#define SI_PARAM_VS_STATE_BITS 8
/* the other VS parameters are assigned dynamically */
/* Offsets where TCS outputs and TCS patch outputs live in LDS:
* [0:15] = TCS output patch0 offset / 16, max = NUM_PATCHES * 32 * 32
* [16:31] = TCS output patch0 offset for per-patch / 16, max = NUM_PATCHES*32*32* + 32*32
*/
-#define SI_PARAM_TCS_OUT_OFFSETS 4 /* for TCS & TES */
+#define SI_PARAM_TCS_OUT_OFFSETS 5 /* for TCS & TES */
/* Layout of TCS outputs / TES inputs:
* [0:12] = stride between output patches in dwords, num_outputs * num_vertices * 4, max = 32*32*4
* [13:20] = stride between output vertices in dwords = num_inputs * 4, max = 32*4
* [26:31] = gl_PatchVerticesIn, max = 32
*/
-#define SI_PARAM_TCS_OUT_LAYOUT 5 /* for TCS & TES */
+#define SI_PARAM_TCS_OUT_LAYOUT 6 /* for TCS & TES */
/* Layout of LS outputs / TCS inputs
* [0:12] = stride between patches in dwords = num_inputs * num_vertices * 4, max = 32*32*4
* [13:20] = stride between vertices in dwords = num_inputs * 4, max = 32*4
*/
-#define SI_PARAM_TCS_IN_LAYOUT 6 /* TCS only */
-#define SI_PARAM_LS_OUT_LAYOUT 7 /* same value as TCS_IN_LAYOUT, LS only */
+#define SI_PARAM_TCS_IN_LAYOUT 7 /* TCS only */
+#define SI_PARAM_LS_OUT_LAYOUT 8 /* same value as TCS_IN_LAYOUT, LS only */
/* TCS only parameters. */
-#define SI_PARAM_TESS_FACTOR_OFFSET 7
-#define SI_PARAM_PATCH_ID 8
-#define SI_PARAM_REL_IDS 9
+#define SI_PARAM_TESS_FACTOR_OFFSET 8
+#define SI_PARAM_PATCH_ID 9
+#define SI_PARAM_REL_IDS 10
/* GS only parameters */
-#define SI_PARAM_GS2VS_OFFSET 4
-#define SI_PARAM_GS_WAVE_ID 5
-#define SI_PARAM_VTX0_OFFSET 6
-#define SI_PARAM_VTX1_OFFSET 7
-#define SI_PARAM_PRIMITIVE_ID 8
-#define SI_PARAM_VTX2_OFFSET 9
-#define SI_PARAM_VTX3_OFFSET 10
-#define SI_PARAM_VTX4_OFFSET 11
-#define SI_PARAM_VTX5_OFFSET 12
-#define SI_PARAM_GS_INSTANCE_ID 13
+#define SI_PARAM_GS2VS_OFFSET 5
+#define SI_PARAM_GS_WAVE_ID 6
+#define SI_PARAM_VTX0_OFFSET 7
+#define SI_PARAM_VTX1_OFFSET 8
+#define SI_PARAM_PRIMITIVE_ID 9
+#define SI_PARAM_VTX2_OFFSET 10
+#define SI_PARAM_VTX3_OFFSET 11
+#define SI_PARAM_VTX4_OFFSET 12
+#define SI_PARAM_VTX5_OFFSET 13
+#define SI_PARAM_GS_INSTANCE_ID 14
/* PS only parameters */
-#define SI_PARAM_ALPHA_REF 4
-#define SI_PARAM_PRIM_MASK 5
-#define SI_PARAM_PERSP_SAMPLE 6
-#define SI_PARAM_PERSP_CENTER 7
-#define SI_PARAM_PERSP_CENTROID 8
-#define SI_PARAM_PERSP_PULL_MODEL 9
-#define SI_PARAM_LINEAR_SAMPLE 10
-#define SI_PARAM_LINEAR_CENTER 11
-#define SI_PARAM_LINEAR_CENTROID 12
-#define SI_PARAM_LINE_STIPPLE_TEX 13
-#define SI_PARAM_POS_X_FLOAT 14
-#define SI_PARAM_POS_Y_FLOAT 15
-#define SI_PARAM_POS_Z_FLOAT 16
-#define SI_PARAM_POS_W_FLOAT 17
-#define SI_PARAM_FRONT_FACE 18
-#define SI_PARAM_ANCILLARY 19
-#define SI_PARAM_SAMPLE_COVERAGE 20
-#define SI_PARAM_POS_FIXED_PT 21
+#define SI_PARAM_ALPHA_REF 5
+#define SI_PARAM_PRIM_MASK 6
+#define SI_PARAM_PERSP_SAMPLE 7
+#define SI_PARAM_PERSP_CENTER 8
+#define SI_PARAM_PERSP_CENTROID 9
+#define SI_PARAM_PERSP_PULL_MODEL 10
+#define SI_PARAM_LINEAR_SAMPLE 11
+#define SI_PARAM_LINEAR_CENTER 12
+#define SI_PARAM_LINEAR_CENTROID 13
+#define SI_PARAM_LINE_STIPPLE_TEX 14
+#define SI_PARAM_POS_X_FLOAT 15
+#define SI_PARAM_POS_Y_FLOAT 16
+#define SI_PARAM_POS_Z_FLOAT 17
+#define SI_PARAM_POS_W_FLOAT 18
+#define SI_PARAM_FRONT_FACE 19
+#define SI_PARAM_ANCILLARY 20
+#define SI_PARAM_SAMPLE_COVERAGE 21
+#define SI_PARAM_POS_FIXED_PT 22
#define SI_NUM_PARAMS (SI_PARAM_POS_FIXED_PT + 9) /* +8 for COLOR[0..1] */
diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c
index 8087d2331ff..82ae4c43245 100644
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -752,7 +752,7 @@ static void si_emit_blend_color(struct si_context *sctx, struct r600_atom *atom)
}
/*
- * Clipping, scissors and viewport
+ * Clipping
*/
static void si_set_clip_state(struct pipe_context *ctx,
@@ -819,179 +819,6 @@ static void si_emit_clip_regs(struct si_context *sctx, struct r600_atom *atom)
S_028AB4_REUSE_OFF(info->writes_viewport_index));
}
-static void si_set_scissor_states(struct pipe_context *ctx,
- unsigned start_slot,
- unsigned num_scissors,
- const struct pipe_scissor_state *state)
-{
- struct si_context *sctx = (struct si_context *)ctx;
- int i;
-
- for (i = 0; i < num_scissors; i++)
- sctx->scissors.states[start_slot + i] = state[i];
-
- if (!sctx->queued.named.rasterizer ||
- !sctx->queued.named.rasterizer->scissor_enable)
- return;
-
- sctx->scissors.dirty_mask |= ((1 << num_scissors) - 1) << start_slot;
- si_mark_atom_dirty(sctx, &sctx->scissors.atom);
-}
-
-static void si_get_scissor_from_viewport(struct pipe_viewport_state *vp,
- struct pipe_scissor_state *scissor)
-{
- /* These must be signed, unlike pipe_scissor_state. */
- int minx, miny, maxx, maxy, tmp;
-
- /* Convert (-1, -1) and (1, 1) from clip space into window space. */
- minx = -vp->scale[0] + vp->translate[0];
- miny = -vp->scale[1] + vp->translate[1];
- maxx = vp->scale[0] + vp->translate[0];
- maxy = vp->scale[1] + vp->translate[1];
-
- /* r600_draw_rectangle sets this. Disable the scissor. */
- if (minx == -1 && miny == -1 && maxx == 1 && maxy == 1) {
- minx = miny = 0;
- maxx = maxy = 16384;
- }
-
- /* Handle inverted viewports. */
- if (minx > maxx) {
- tmp = minx;
- minx = maxx;
- maxx = tmp;
- }
- if (miny > maxy) {
- tmp = miny;
- miny = maxy;
- maxy = tmp;
- }
-
- scissor->minx = CLAMP(minx, 0, 16384);
- scissor->miny = CLAMP(miny, 0, 16384);
- scissor->maxx = CLAMP(maxx, 0, 16384);
- scissor->maxy = CLAMP(maxy, 0, 16384);
-}
-
-static void si_clip_scissor(struct pipe_scissor_state *out,
- struct pipe_scissor_state *clip)
-{
- out->minx = MAX2(out->minx, clip->minx);
- out->miny = MAX2(out->miny, clip->miny);
- out->maxx = MIN2(out->maxx, clip->maxx);
- out->maxy = MIN2(out->maxy, clip->maxy);
-}
-
-static void si_emit_one_scissor(struct radeon_winsys_cs *cs,
- struct pipe_viewport_state *vp,
- struct pipe_scissor_state *scissor)
-{
- struct pipe_scissor_state final;
-
- /* Since the guard band disables clipping, we have to clip per-pixel
- * using a scissor.
- */
- si_get_scissor_from_viewport(vp, &final);
-
- if (scissor)
- si_clip_scissor(&final, scissor);
-
- radeon_emit(cs, S_028250_TL_X(final.minx) |
- S_028250_TL_Y(final.miny) |
- S_028250_WINDOW_OFFSET_DISABLE(1));
- radeon_emit(cs, S_028254_BR_X(final.maxx) |
- S_028254_BR_Y(final.maxy));
-}
-
-static void si_emit_scissors(struct si_context *sctx, struct r600_atom *atom)
-{
- struct radeon_winsys_cs *cs = sctx->b.gfx.cs;
- struct pipe_scissor_state *states = sctx->scissors.states;
- unsigned mask = sctx->scissors.dirty_mask;
- bool scissor_enable = sctx->queued.named.rasterizer->scissor_enable;
-
- /* The simple case: Only 1 viewport is active. */
- if (mask & 1 &&
- !si_get_vs_info(sctx)->writes_viewport_index) {
- radeon_set_context_reg_seq(cs, R_028250_PA_SC_VPORT_SCISSOR_0_TL, 2);
- si_emit_one_scissor(cs, &sctx->viewports.states[0],
- scissor_enable ? &states[0] : NULL);
- sctx->scissors.dirty_mask &= ~1; /* clear one bit */
- return;
- }
-
- while (mask) {
- int start, count, i;
-
- u_bit_scan_consecutive_range(&mask, &start, &count);
-
- radeon_set_context_reg_seq(cs, R_028250_PA_SC_VPORT_SCISSOR_0_TL +
- start * 4 * 2, count * 2);
- for (i = start; i < start+count; i++) {
- si_emit_one_scissor(cs, &sctx->viewports.states[i],
- scissor_enable ? &states[i] : NULL);
- }
- }
- sctx->scissors.dirty_mask = 0;
-}
-
-static void si_set_viewport_states(struct pipe_context *ctx,
- unsigned start_slot,
- unsigned num_viewports,
- const struct pipe_viewport_state *state)
-{
- struct si_context *sctx = (struct si_context *)ctx;
- int i;
-
- for (i = 0; i < num_viewports; i++)
- sctx->viewports.states[start_slot + i] = state[i];
-
- sctx->viewports.dirty_mask |= ((1 << num_viewports) - 1) << start_slot;
- sctx->scissors.dirty_mask |= ((1 << num_viewports) - 1) << start_slot;
- si_mark_atom_dirty(sctx, &sctx->viewports.atom);
- si_mark_atom_dirty(sctx, &sctx->scissors.atom);
-}
-
-static void si_emit_viewports(struct si_context *sctx, struct r600_atom *atom)
-{
- struct radeon_winsys_cs *cs = sctx->b.gfx.cs;
- struct pipe_viewport_state *states = sctx->viewports.states;
- unsigned mask = sctx->viewports.dirty_mask;
-
- /* The simple case: Only 1 viewport is active. */
- if (mask & 1 &&
- !si_get_vs_info(sctx)->writes_viewport_index) {
- radeon_set_context_reg_seq(cs, R_02843C_PA_CL_VPORT_XSCALE, 6);
- radeon_emit(cs, fui(states[0].scale[0]));
- radeon_emit(cs, fui(states[0].translate[0]));
- radeon_emit(cs, fui(states[0].scale[1]));
- radeon_emit(cs, fui(states[0].translate[1]));
- radeon_emit(cs, fui(states[0].scale[2]));
- radeon_emit(cs, fui(states[0].translate[2]));
- sctx->viewports.dirty_mask &= ~1; /* clear one bit */
- return;
- }
-
- while (mask) {
- int start, count, i;
-
- u_bit_scan_consecutive_range(&mask, &start, &count);
-
- radeon_set_context_reg_seq(cs, R_02843C_PA_CL_VPORT_XSCALE +
- start * 4 * 6, count * 6);
- for (i = start; i < start+count; i++) {
- radeon_emit(cs, fui(states[i].scale[0]));
- radeon_emit(cs, fui(states[i].translate[0]));
- radeon_emit(cs, fui(states[i].scale[1]));
- radeon_emit(cs, fui(states[i].translate[1]));
- radeon_emit(cs, fui(states[i].scale[2]));
- radeon_emit(cs, fui(states[i].translate[2]));
- }
- }
- sctx->viewports.dirty_mask = 0;
-}
-
/*
* inferred state between framebuffer and rasterizer
*/
@@ -1173,10 +1000,7 @@ static void si_bind_rs_state(struct pipe_context *ctx, void *state)
(!old_rs || old_rs->multisample_enable != rs->multisample_enable))
si_mark_atom_dirty(sctx, &sctx->db_render_state);
- if (!old_rs || old_rs->scissor_enable != rs->scissor_enable) {
- sctx->scissors.dirty_mask = (1 << SI_MAX_VIEWPORTS) - 1;
- si_mark_atom_dirty(sctx, &sctx->scissors.atom);
- }
+ r600_set_scissor_enable(&sctx->b, rs->scissor_enable);
si_pm4_bind_state(sctx, rasterizer, rs);
si_update_poly_offset_state(sctx);
@@ -1348,6 +1172,26 @@ static void *si_create_db_flush_dsa(struct si_context *sctx)
/* DB RENDER STATE */
+static void si_set_active_query_state(struct pipe_context *ctx, boolean enable)
+{
+ struct si_context *sctx = (struct si_context*)ctx;
+
+ /* Pipeline stat & streamout queries. */
+ if (enable) {
+ sctx->b.flags &= ~R600_CONTEXT_STOP_PIPELINE_STATS;
+ sctx->b.flags |= R600_CONTEXT_START_PIPELINE_STATS;
+ } else {
+ sctx->b.flags &= ~R600_CONTEXT_START_PIPELINE_STATS;
+ sctx->b.flags |= R600_CONTEXT_STOP_PIPELINE_STATS;
+ }
+
+ /* Occlusion queries. */
+ if (sctx->occlusion_queries_disabled != !enable) {
+ sctx->occlusion_queries_disabled = !enable;
+ si_mark_atom_dirty(sctx, &sctx->db_render_state);
+ }
+}
+
static void si_set_occlusion_query_state(struct pipe_context *ctx, bool enable)
{
struct si_context *sctx = (struct si_context*)ctx;
@@ -1382,7 +1226,8 @@ static void si_emit_db_render_state(struct si_context *sctx, struct r600_atom *s
}
/* DB_COUNT_CONTROL (occlusion queries) */
- if (sctx->b.num_occlusion_queries > 0) {
+ if (sctx->b.num_occlusion_queries > 0 &&
+ !sctx->occlusion_queries_disabled) {
bool perfect = sctx->b.num_perfect_occlusion_queries > 0;
if (sctx->b.chip_class >= CIK) {
@@ -1838,17 +1683,6 @@ static unsigned si_tex_wrap(unsigned wrap)
}
}
-static unsigned si_tex_filter(unsigned filter)
-{
- switch (filter) {
- default:
- case PIPE_TEX_FILTER_NEAREST:
- return V_008F38_SQ_TEX_XY_FILTER_POINT;
- case PIPE_TEX_FILTER_LINEAR:
- return V_008F38_SQ_TEX_XY_FILTER_BILINEAR;
- }
-}
-
static unsigned si_tex_mipfilter(unsigned filter)
{
switch (filter) {
@@ -3122,6 +2956,16 @@ si_make_texture_descriptor(struct si_screen *screen,
} else {
state[6] = 0;
state[7] = 0;
+
+ /* The last dword is unused by hw. The shader uses it to clear
+ * bits in the first dword of sampler state.
+ */
+ if (screen->b.chip_class <= CIK && res->nr_samples <= 1) {
+ if (first_level == last_level)
+ state[7] = C_008F30_MAX_ANISO_RATIO;
+ else
+ state[7] = 0xffffffff;
+ }
}
/* Initialize the sampler view for FMASK. */
@@ -3318,9 +3162,12 @@ static void *si_create_sampler_state(struct pipe_context *ctx,
const struct pipe_sampler_state *state)
{
struct si_context *sctx = (struct si_context *)ctx;
+ struct r600_common_screen *rscreen = sctx->b.screen;
struct si_sampler_state *rstate = CALLOC_STRUCT(si_sampler_state);
- unsigned aniso_flag_offset = state->max_anisotropy > 1 ? 2 : 0;
unsigned border_color_type, border_color_index = 0;
+ unsigned max_aniso = rscreen->force_aniso >= 0 ? rscreen->force_aniso
+ : state->max_anisotropy;
+ unsigned max_aniso_ratio = r600_tex_aniso_filter(max_aniso);
if (!rstate) {
return NULL;
@@ -3378,16 +3225,21 @@ static void *si_create_sampler_state(struct pipe_context *ctx,
rstate->val[0] = (S_008F30_CLAMP_X(si_tex_wrap(state->wrap_s)) |
S_008F30_CLAMP_Y(si_tex_wrap(state->wrap_t)) |
S_008F30_CLAMP_Z(si_tex_wrap(state->wrap_r)) |
- r600_tex_aniso_filter(state->max_anisotropy) << 9 |
+ S_008F30_MAX_ANISO_RATIO(max_aniso_ratio) |
S_008F30_DEPTH_COMPARE_FUNC(si_tex_compare(state->compare_func)) |
S_008F30_FORCE_UNNORMALIZED(!state->normalized_coords) |
- S_008F30_DISABLE_CUBE_WRAP(!state->seamless_cube_map));
+ S_008F30_DISABLE_CUBE_WRAP(!state->seamless_cube_map) |
+ S_008F30_COMPAT_MODE(sctx->b.chip_class >= VI));
rstate->val[1] = (S_008F34_MIN_LOD(S_FIXED(CLAMP(state->min_lod, 0, 15), 8)) |
S_008F34_MAX_LOD(S_FIXED(CLAMP(state->max_lod, 0, 15), 8)));
rstate->val[2] = (S_008F38_LOD_BIAS(S_FIXED(CLAMP(state->lod_bias, -16, 16), 8)) |
- S_008F38_XY_MAG_FILTER(si_tex_filter(state->mag_img_filter) | aniso_flag_offset) |
- S_008F38_XY_MIN_FILTER(si_tex_filter(state->min_img_filter) | aniso_flag_offset) |
- S_008F38_MIP_FILTER(si_tex_mipfilter(state->min_mip_filter)));
+ S_008F38_XY_MAG_FILTER(eg_tex_filter(state->mag_img_filter, max_aniso)) |
+ S_008F38_XY_MIN_FILTER(eg_tex_filter(state->min_img_filter, max_aniso)) |
+ S_008F38_MIP_FILTER(si_tex_mipfilter(state->min_mip_filter)) |
+ S_008F38_MIP_POINT_PRECLAMP(1) |
+ S_008F38_DISABLE_LSB_CEIL(1) |
+ S_008F38_FILTER_PREC_FIX(1) |
+ S_008F38_ANISO_OVERRIDE(sctx->b.chip_class >= VI));
rstate->val[3] = S_008F3C_BORDER_COLOR_PTR(border_color_index) |
S_008F3C_BORDER_COLOR_TYPE(border_color_type);
return rstate;
@@ -3430,7 +3282,7 @@ static void *si_create_vertex_elements(struct pipe_context *ctx,
struct si_vertex_element *v = CALLOC_STRUCT(si_vertex_element);
int i;
- assert(count < SI_MAX_ATTRIBS);
+ assert(count <= SI_MAX_ATTRIBS);
if (!v)
return NULL;
@@ -3678,6 +3530,8 @@ void si_init_state_functions(struct si_context *sctx)
si_init_external_atom(sctx, &sctx->b.render_cond_atom, &sctx->atoms.s.render_cond);
si_init_external_atom(sctx, &sctx->b.streamout.begin_atom, &sctx->atoms.s.streamout_begin);
si_init_external_atom(sctx, &sctx->b.streamout.enable_atom, &sctx->atoms.s.streamout_enable);
+ si_init_external_atom(sctx, &sctx->b.scissors.atom, &sctx->atoms.s.scissors);
+ si_init_external_atom(sctx, &sctx->b.viewports.atom, &sctx->atoms.s.viewports);
si_init_atom(sctx, &sctx->cache_flush, &sctx->atoms.s.cache_flush, si_emit_cache_flush);
si_init_atom(sctx, &sctx->framebuffer.atom, &sctx->atoms.s.framebuffer, si_emit_framebuffer_state);
@@ -3689,8 +3543,6 @@ void si_init_state_functions(struct si_context *sctx)
si_init_atom(sctx, &sctx->blend_color.atom, &sctx->atoms.s.blend_color, si_emit_blend_color);
si_init_atom(sctx, &sctx->clip_regs, &sctx->atoms.s.clip_regs, si_emit_clip_regs);
si_init_atom(sctx, &sctx->clip_state.atom, &sctx->atoms.s.clip_state, si_emit_clip_state);
- si_init_atom(sctx, &sctx->scissors.atom, &sctx->atoms.s.scissors, si_emit_scissors);
- si_init_atom(sctx, &sctx->viewports.atom, &sctx->atoms.s.viewports, si_emit_viewports);
si_init_atom(sctx, &sctx->stencil_ref.atom, &sctx->atoms.s.stencil_ref, si_emit_stencil_ref);
sctx->b.b.create_blend_state = si_create_blend_state;
@@ -3713,8 +3565,6 @@ void si_init_state_functions(struct si_context *sctx)
sctx->custom_blend_dcc_decompress = si_create_blend_custom(sctx, V_028808_CB_DCC_DECOMPRESS);
sctx->b.b.set_clip_state = si_set_clip_state;
- sctx->b.b.set_scissor_states = si_set_scissor_states;
- sctx->b.b.set_viewport_states = si_set_viewport_states;
sctx->b.b.set_stencil_ref = si_set_stencil_ref;
sctx->b.b.set_framebuffer_state = si_set_framebuffer_state;
@@ -3740,6 +3590,7 @@ void si_init_state_functions(struct si_context *sctx)
sctx->b.b.set_min_samples = si_set_min_samples;
sctx->b.b.set_tess_state = si_set_tess_state;
+ sctx->b.b.set_active_query_state = si_set_active_query_state;
sctx->b.set_occlusion_query_state = si_set_occlusion_query_state;
sctx->b.need_gfx_cs_space = si_need_gfx_cs_space;
@@ -4098,10 +3949,6 @@ static void si_init_config(struct si_context *sctx)
/* PA_SU_HARDWARE_SCREEN_OFFSET must be 0 due to hw bug on SI */
si_pm4_set_reg(pm4, R_028234_PA_SU_HARDWARE_SCREEN_OFFSET, 0);
si_pm4_set_reg(pm4, R_028820_PA_CL_NANINF_CNTL, 0);
- si_pm4_set_reg(pm4, R_028BE8_PA_CL_GB_VERT_CLIP_ADJ, fui(1.0));
- si_pm4_set_reg(pm4, R_028BEC_PA_CL_GB_VERT_DISC_ADJ, fui(1.0));
- si_pm4_set_reg(pm4, R_028BF0_PA_CL_GB_HORZ_CLIP_ADJ, fui(1.0));
- si_pm4_set_reg(pm4, R_028BF4_PA_CL_GB_HORZ_DISC_ADJ, fui(1.0));
si_pm4_set_reg(pm4, R_028AC0_DB_SRESULTS_COMPARE_STATE0, 0x0);
si_pm4_set_reg(pm4, R_028AC4_DB_SRESULTS_COMPARE_STATE1, 0x0);
si_pm4_set_reg(pm4, R_028AC8_DB_PRELOAD_CONTROL, 0x0);
diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h
index f55f19e2918..6748f802c7d 100644
--- a/src/gallium/drivers/radeonsi/si_state.h
+++ b/src/gallium/drivers/radeonsi/si_state.h
@@ -161,6 +161,8 @@ struct si_shader_data {
#define SI_NUM_IMAGES 16
+#define SI_NUM_SHADER_BUFFERS 16
+
/* Read-write buffer slots.
*
* Ring buffers: 0..1
diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c b/src/gallium/drivers/radeonsi/si_state_draw.c
index 84b850a2992..40cad504e09 100644
--- a/src/gallium/drivers/radeonsi/si_state_draw.c
+++ b/src/gallium/drivers/radeonsi/si_state_draw.c
@@ -722,6 +722,16 @@ void si_emit_cache_flush(struct si_context *si_ctx, struct r600_atom *atom)
}
}
+ if (sctx->flags & R600_CONTEXT_START_PIPELINE_STATS) {
+ radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
+ radeon_emit(cs, EVENT_TYPE(V_028A90_PIPELINESTAT_START) |
+ EVENT_INDEX(0));
+ } else if (sctx->flags & R600_CONTEXT_STOP_PIPELINE_STATS) {
+ radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
+ radeon_emit(cs, EVENT_TYPE(V_028A90_PIPELINESTAT_STOP) |
+ EVENT_INDEX(0));
+ }
+
sctx->flags = 0;
}
@@ -882,8 +892,7 @@ void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info)
if ((sctx->b.family == CHIP_HAWAII ||
sctx->b.family == CHIP_TONGA ||
sctx->b.family == CHIP_FIJI) &&
- (sctx->b.streamout.streamout_enabled ||
- sctx->b.streamout.prims_gen_query_enabled)) {
+ r600_get_strmout_en(&sctx->b)) {
sctx->b.flags |= SI_CONTEXT_VGT_STREAMOUT_SYNC;
}
diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c
index 02489583423..b7ebb48e6a9 100644
--- a/src/gallium/drivers/radeonsi/si_state_shaders.c
+++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
@@ -1270,25 +1270,6 @@ error:
return NULL;
}
-/**
- * Normally, we only emit 1 viewport and 1 scissor if no shader is using
- * the VIEWPORT_INDEX output, and emitting the other viewports and scissors
- * is delayed. When a shader with VIEWPORT_INDEX appears, this should be
- * called to emit the rest.
- */
-static void si_update_viewports_and_scissors(struct si_context *sctx)
-{
- struct tgsi_shader_info *info = si_get_vs_info(sctx);
-
- if (!info || !info->writes_viewport_index)
- return;
-
- if (sctx->scissors.dirty_mask)
- si_mark_atom_dirty(sctx, &sctx->scissors.atom);
- if (sctx->viewports.dirty_mask)
- si_mark_atom_dirty(sctx, &sctx->viewports.atom);
-}
-
static void si_bind_vs_shader(struct pipe_context *ctx, void *state)
{
struct si_context *sctx = (struct si_context *)ctx;
@@ -1300,7 +1281,7 @@ static void si_bind_vs_shader(struct pipe_context *ctx, void *state)
sctx->vs_shader.cso = sel;
sctx->vs_shader.current = sel ? sel->first_variant : NULL;
si_mark_atom_dirty(sctx, &sctx->clip_regs);
- si_update_viewports_and_scissors(sctx);
+ r600_update_vs_writes_viewport_index(&sctx->b, si_get_vs_info(sctx));
}
static void si_bind_gs_shader(struct pipe_context *ctx, void *state)
@@ -1319,7 +1300,7 @@ static void si_bind_gs_shader(struct pipe_context *ctx, void *state)
if (enable_changed)
si_shader_change_notify(sctx);
- si_update_viewports_and_scissors(sctx);
+ r600_update_vs_writes_viewport_index(&sctx->b, si_get_vs_info(sctx));
}
static void si_bind_tcs_shader(struct pipe_context *ctx, void *state)
@@ -1356,7 +1337,7 @@ static void si_bind_tes_shader(struct pipe_context *ctx, void *state)
si_shader_change_notify(sctx);
sctx->last_tes_sh_base = -1; /* invalidate derived tess state */
}
- si_update_viewports_and_scissors(sctx);
+ r600_update_vs_writes_viewport_index(&sctx->b, si_get_vs_info(sctx));
}
static void si_bind_ps_shader(struct pipe_context *ctx, void *state)
diff --git a/src/gallium/drivers/radeonsi/sid.h b/src/gallium/drivers/radeonsi/sid.h
index 892084707d2..f0aa605c2d9 100644
--- a/src/gallium/drivers/radeonsi/sid.h
+++ b/src/gallium/drivers/radeonsi/sid.h
@@ -2307,6 +2307,9 @@
#define V_008F30_SQ_TEX_MIRROR_ONCE_HALF_BORDER 0x05
#define V_008F30_SQ_TEX_CLAMP_BORDER 0x06
#define V_008F30_SQ_TEX_MIRROR_ONCE_BORDER 0x07
+#define S_008F30_MAX_ANISO_RATIO(x) (((x) & 0x07) << 9)
+#define G_008F30_MAX_ANISO_RATIO(x) (((x) >> 9) & 0x07)
+#define C_008F30_MAX_ANISO_RATIO 0xFFFFF1FF
#define S_008F30_DEPTH_COMPARE_FUNC(x) (((x) & 0x07) << 12)
#define G_008F30_DEPTH_COMPARE_FUNC(x) (((x) >> 12) & 0x07)
#define C_008F30_DEPTH_COMPARE_FUNC 0xFFFF8FFF
@@ -2371,6 +2374,8 @@
#define C_008F38_XY_MIN_FILTER 0xFF3FFFFF
#define V_008F38_SQ_TEX_XY_FILTER_POINT 0x00
#define V_008F38_SQ_TEX_XY_FILTER_BILINEAR 0x01
+#define V_008F38_SQ_TEX_XY_FILTER_ANISO_POINT 0x02
+#define V_008F38_SQ_TEX_XY_FILTER_ANISO_BILINEAR 0x03
#define S_008F38_Z_FILTER(x) (((x) & 0x03) << 24)
#define G_008F38_Z_FILTER(x) (((x) >> 24) & 0x03)
#define C_008F38_Z_FILTER 0xFCFFFFFF
@@ -2392,6 +2397,9 @@
#define S_008F38_FILTER_PREC_FIX(x) (((x) & 0x1) << 30)
#define G_008F38_FILTER_PREC_FIX(x) (((x) >> 30) & 0x1)
#define C_008F38_FILTER_PREC_FIX 0xBFFFFFFF
+#define S_008F38_ANISO_OVERRIDE(x) (((x) & 0x1) << 31)
+#define G_008F38_ANISO_OVERRIDE(x) (((x) >> 31) & 0x1)
+#define C_008F38_ANISO_OVERRIDE 0x7FFFFFFF
#define R_008F3C_SQ_IMG_SAMP_WORD3 0x008F3C
#define S_008F3C_BORDER_COLOR_PTR(x) (((x) & 0xFFF) << 0)
#define G_008F3C_BORDER_COLOR_PTR(x) (((x) >> 0) & 0xFFF)
diff --git a/src/gallium/drivers/rbug/rbug_context.c b/src/gallium/drivers/rbug/rbug_context.c
index 9ecddad05ec..1280c45b539 100644
--- a/src/gallium/drivers/rbug/rbug_context.c
+++ b/src/gallium/drivers/rbug/rbug_context.c
@@ -211,6 +211,17 @@ rbug_get_query_result(struct pipe_context *_pipe,
return ret;
}
+static void
+rbug_set_active_query_state(struct pipe_context *_pipe, boolean enable)
+{
+ struct rbug_context *rb_pipe = rbug_context(_pipe);
+ struct pipe_context *pipe = rb_pipe->pipe;
+
+ pipe_mutex_lock(rb_pipe->call_mutex);
+ pipe->set_active_query_state(pipe, enable);
+ pipe_mutex_unlock(rb_pipe->call_mutex);
+}
+
static void *
rbug_create_blend_state(struct pipe_context *_pipe,
const struct pipe_blend_state *blend)
@@ -1184,6 +1195,7 @@ rbug_context_create(struct pipe_screen *_screen, struct pipe_context *pipe)
rb_pipe->base.begin_query = rbug_begin_query;
rb_pipe->base.end_query = rbug_end_query;
rb_pipe->base.get_query_result = rbug_get_query_result;
+ rb_pipe->base.set_active_query_state = rbug_set_active_query_state;
rb_pipe->base.create_blend_state = rbug_create_blend_state;
rb_pipe->base.bind_blend_state = rbug_bind_blend_state;
rb_pipe->base.delete_blend_state = rbug_delete_blend_state;
diff --git a/src/gallium/drivers/softpipe/Makefile.sources b/src/gallium/drivers/softpipe/Makefile.sources
index efe88468e3f..1d42351f975 100644
--- a/src/gallium/drivers/softpipe/Makefile.sources
+++ b/src/gallium/drivers/softpipe/Makefile.sources
@@ -1,4 +1,5 @@
C_SOURCES := \
+ sp_buffer.c \
sp_clear.c \
sp_clear.h \
sp_context.c \
@@ -11,6 +12,7 @@ C_SOURCES := \
sp_fs_exec.c \
sp_fs.h \
sp_image.c \
+ sp_image.h \
sp_limits.h \
sp_prim_vbuf.c \
sp_prim_vbuf.h \
diff --git a/src/gallium/drivers/softpipe/sp_buffer.c b/src/gallium/drivers/softpipe/sp_buffer.c
new file mode 100644
index 00000000000..69a6bd18c3b
--- /dev/null
+++ b/src/gallium/drivers/softpipe/sp_buffer.c
@@ -0,0 +1,371 @@
+/*
+ * Copyright 2016 Red Hat.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "sp_context.h"
+#include "sp_buffer.h"
+#include "sp_texture.h"
+
+#include "util/u_format.h"
+
+static bool
+get_dimensions(const struct pipe_shader_buffer *bview,
+ const struct softpipe_resource *spr,
+ unsigned *width)
+{
+ *width = bview->buffer_size;
+ /*
+ * Bounds check the buffer size from the view
+ * and the buffer size from the underlying buffer.
+ */
+ if (*width > spr->base.width0)
+ return false;
+ return true;
+}
+
+/*
+ * Implement the image LOAD operation.
+ */
+static void
+sp_tgsi_load(const struct tgsi_buffer *buffer,
+ const struct tgsi_buffer_params *params,
+ const int s[TGSI_QUAD_SIZE],
+ float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE])
+{
+ struct sp_tgsi_buffer *sp_buf = (struct sp_tgsi_buffer *)buffer;
+ struct pipe_shader_buffer *bview;
+ struct softpipe_resource *spr;
+ unsigned width;
+ int c, j;
+ unsigned char *data_ptr;
+ const struct util_format_description *format_desc = util_format_description(PIPE_FORMAT_R32_UINT);
+
+ if (params->unit >= PIPE_MAX_SHADER_BUFFERS)
+ goto fail_write_all_zero;
+
+ bview = &sp_buf->sp_bview[params->unit];
+ spr = softpipe_resource(bview->buffer);
+ if (!spr)
+ goto fail_write_all_zero;
+
+ if (!get_dimensions(bview, spr, &width))
+ return;
+
+ for (j = 0; j < TGSI_QUAD_SIZE; j++) {
+ int s_coord;
+ bool fill_zero = false;
+ uint32_t sdata[4];
+
+ if (!(params->execmask & (1 << j)))
+ fill_zero = true;
+
+ s_coord = s[j];
+ if (s_coord >= width)
+ fill_zero = true;
+
+ if (fill_zero) {
+ for (c = 0; c < 4; c++)
+ rgba[c][j] = 0;
+ continue;
+ }
+ data_ptr = (unsigned char *)spr->data + bview->buffer_offset + s_coord;
+ for (c = 0; c < 4; c++) {
+ format_desc->fetch_rgba_uint(sdata, data_ptr, 0, 0);
+ ((uint32_t *)rgba[c])[j] = sdata[0];
+ data_ptr += 4;
+ }
+ }
+ return;
+fail_write_all_zero:
+ memset(rgba, 0, TGSI_NUM_CHANNELS * TGSI_QUAD_SIZE * 4);
+ return;
+}
+
+/*
+ * Implement the buffer STORE operation.
+ */
+static void
+sp_tgsi_store(const struct tgsi_buffer *buffer,
+ const struct tgsi_buffer_params *params,
+ const int s[TGSI_QUAD_SIZE],
+ float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE])
+{
+ struct sp_tgsi_buffer *sp_buf = (struct sp_tgsi_buffer *)buffer;
+ struct pipe_shader_buffer *bview;
+ struct softpipe_resource *spr;
+ unsigned width;
+ unsigned char *data_ptr;
+ int j, c;
+ const struct util_format_description *format_desc = util_format_description(PIPE_FORMAT_R32_UINT);
+
+ if (params->unit >= PIPE_MAX_SHADER_BUFFERS)
+ return;
+
+ bview = &sp_buf->sp_bview[params->unit];
+ spr = softpipe_resource(bview->buffer);
+ if (!spr)
+ return;
+
+ if (!get_dimensions(bview, spr, &width))
+ return;
+
+ for (j = 0; j < TGSI_QUAD_SIZE; j++) {
+ int s_coord;
+
+ if (!(params->execmask & (1 << j)))
+ continue;
+
+ s_coord = s[j];
+ if (s_coord >= width)
+ continue;
+
+ data_ptr = (unsigned char *)spr->data + bview->buffer_offset + s_coord;
+
+ for (c = 0; c < 4; c++) {
+ if (params->writemask & (1 << c)) {
+ unsigned temp[4];
+ unsigned char *dptr = data_ptr + (c * 4);
+ temp[0] = ((uint32_t *)rgba[c])[j];
+ format_desc->pack_rgba_uint(dptr, 0, temp, 0, 1, 1);
+ }
+ }
+ }
+}
+
+/*
+ * Implement atomic operations on unsigned integers.
+ */
+static void
+handle_op_uint(const struct pipe_shader_buffer *bview,
+ bool just_read,
+ unsigned char *data_ptr,
+ uint qi,
+ unsigned opcode,
+ unsigned writemask,
+ float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE],
+ float rgba2[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE])
+{
+ uint c;
+ const struct util_format_description *format_desc = util_format_description(PIPE_FORMAT_R32_UINT);
+ unsigned sdata[4];
+
+ for (c = 0; c < 4; c++) {
+ unsigned temp[4];
+ unsigned char *dptr = data_ptr + (c * 4);
+ format_desc->fetch_rgba_uint(temp, dptr, 0, 0);
+ sdata[c] = temp[0];
+ }
+
+ if (just_read) {
+ for (c = 0; c < 4; c++) {
+ ((uint32_t *)rgba[c])[qi] = sdata[c];
+ }
+ return;
+ }
+
+ switch (opcode) {
+ case TGSI_OPCODE_ATOMUADD:
+ for (c = 0; c < 4; c++) {
+ unsigned temp = sdata[c];
+ sdata[c] += ((uint32_t *)rgba[c])[qi];
+ ((uint32_t *)rgba[c])[qi] = temp;
+ }
+ break;
+ case TGSI_OPCODE_ATOMXCHG:
+ for (c = 0; c < 4; c++) {
+ unsigned temp = sdata[c];
+ sdata[c] = ((uint32_t *)rgba[c])[qi];
+ ((uint32_t *)rgba[c])[qi] = temp;
+ }
+ break;
+ case TGSI_OPCODE_ATOMCAS:
+ for (c = 0; c < 4; c++) {
+ unsigned dst_x = sdata[c];
+ unsigned cmp_x = ((uint32_t *)rgba[c])[qi];
+ unsigned src_x = ((uint32_t *)rgba2[c])[qi];
+ unsigned temp = sdata[c];
+ sdata[c] = (dst_x == cmp_x) ? src_x : dst_x;
+ ((uint32_t *)rgba[c])[qi] = temp;
+ }
+ break;
+ case TGSI_OPCODE_ATOMAND:
+ for (c = 0; c < 4; c++) {
+ unsigned temp = sdata[c];
+ sdata[c] &= ((uint32_t *)rgba[c])[qi];
+ ((uint32_t *)rgba[c])[qi] = temp;
+ }
+ break;
+ case TGSI_OPCODE_ATOMOR:
+ for (c = 0; c < 4; c++) {
+ unsigned temp = sdata[c];
+ sdata[c] |= ((uint32_t *)rgba[c])[qi];
+ ((uint32_t *)rgba[c])[qi] = temp;
+ }
+ break;
+ case TGSI_OPCODE_ATOMXOR:
+ for (c = 0; c < 4; c++) {
+ unsigned temp = sdata[c];
+ sdata[c] ^= ((uint32_t *)rgba[c])[qi];
+ ((uint32_t *)rgba[c])[qi] = temp;
+ }
+ break;
+ case TGSI_OPCODE_ATOMUMIN:
+ for (c = 0; c < 4; c++) {
+ unsigned dst_x = sdata[c];
+ unsigned src_x = ((uint32_t *)rgba[c])[qi];
+ sdata[c] = MIN2(dst_x, src_x);
+ ((uint32_t *)rgba[c])[qi] = dst_x;
+ }
+ break;
+ case TGSI_OPCODE_ATOMUMAX:
+ for (c = 0; c < 4; c++) {
+ unsigned dst_x = sdata[c];
+ unsigned src_x = ((uint32_t *)rgba[c])[qi];
+ sdata[c] = MAX2(dst_x, src_x);
+ ((uint32_t *)rgba[c])[qi] = dst_x;
+ }
+ break;
+ case TGSI_OPCODE_ATOMIMIN:
+ for (c = 0; c < 4; c++) {
+ int dst_x = sdata[c];
+ int src_x = ((uint32_t *)rgba[c])[qi];
+ sdata[c] = MIN2(dst_x, src_x);
+ ((uint32_t *)rgba[c])[qi] = dst_x;
+ }
+ break;
+ case TGSI_OPCODE_ATOMIMAX:
+ for (c = 0; c < 4; c++) {
+ int dst_x = sdata[c];
+ int src_x = ((uint32_t *)rgba[c])[qi];
+ sdata[c] = MAX2(dst_x, src_x);
+ ((uint32_t *)rgba[c])[qi] = dst_x;
+ }
+ break;
+ default:
+ assert(!"Unexpected TGSI opcode in sp_tgsi_op");
+ break;
+ }
+
+ for (c = 0; c < 4; c++) {
+ if (writemask & (1 << c)) {
+ unsigned temp[4];
+ unsigned char *dptr = data_ptr + (c * 4);
+ temp[0] = sdata[c];
+ format_desc->pack_rgba_uint(dptr, 0, temp, 0, 1, 1);
+ }
+ }
+}
+
+/*
+ * Implement atomic buffer operations.
+ */
+static void
+sp_tgsi_op(const struct tgsi_buffer *buffer,
+ const struct tgsi_buffer_params *params,
+ unsigned opcode,
+ const int s[TGSI_QUAD_SIZE],
+ float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE],
+ float rgba2[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE])
+{
+ struct sp_tgsi_buffer *sp_buf = (struct sp_tgsi_buffer *)buffer;
+ struct pipe_shader_buffer *bview;
+ struct softpipe_resource *spr;
+ unsigned width;
+ int j, c;
+ unsigned char *data_ptr;
+
+ if (params->unit >= PIPE_MAX_SHADER_BUFFERS)
+ return;
+
+ bview = &sp_buf->sp_bview[params->unit];
+ spr = softpipe_resource(bview->buffer);
+ if (!spr)
+ goto fail_write_all_zero;
+
+ if (!get_dimensions(bview, spr, &width))
+ goto fail_write_all_zero;
+
+ for (j = 0; j < TGSI_QUAD_SIZE; j++) {
+ int s_coord;
+ bool just_read = false;
+
+ s_coord = s[j];
+ if (s_coord >= width) {
+ for (c = 0; c < 4; c++) {
+ rgba[c][j] = 0;
+ }
+ continue;
+ }
+
+ /* just readback the value for atomic if execmask isn't set */
+ if (!(params->execmask & (1 << j))) {
+ just_read = true;
+ }
+
+ data_ptr = (unsigned char *)spr->data + bview->buffer_offset + s_coord;
+ /* we should see atomic operations on r32 formats */
+
+ handle_op_uint(bview, just_read, data_ptr, j,
+ opcode, params->writemask, rgba, rgba2);
+ }
+ return;
+fail_write_all_zero:
+ memset(rgba, 0, TGSI_NUM_CHANNELS * TGSI_QUAD_SIZE * 4);
+ return;
+}
+
+/*
+ * return size of the attached buffer for RESQ opcode.
+ */
+static void
+sp_tgsi_get_dims(const struct tgsi_buffer *buffer,
+ const struct tgsi_buffer_params *params,
+ int *dim)
+{
+ struct sp_tgsi_buffer *sp_buf = (struct sp_tgsi_buffer *)buffer;
+ struct pipe_shader_buffer *bview;
+ struct softpipe_resource *spr;
+
+ if (params->unit >= PIPE_MAX_SHADER_BUFFERS)
+ return;
+
+ bview = &sp_buf->sp_bview[params->unit];
+ spr = softpipe_resource(bview->buffer);
+ if (!spr)
+ return;
+
+ *dim = bview->buffer_size;
+}
+
+struct sp_tgsi_buffer *
+sp_create_tgsi_buffer(void)
+{
+ struct sp_tgsi_buffer *buf = CALLOC_STRUCT(sp_tgsi_buffer);
+ if (!buf)
+ return NULL;
+
+ buf->base.load = sp_tgsi_load;
+ buf->base.store = sp_tgsi_store;
+ buf->base.op = sp_tgsi_op;
+ buf->base.get_dims = sp_tgsi_get_dims;
+ return buf;
+};
diff --git a/src/gallium/drivers/softpipe/sp_buffer.h b/src/gallium/drivers/softpipe/sp_buffer.h
new file mode 100644
index 00000000000..1822fe709fe
--- /dev/null
+++ b/src/gallium/drivers/softpipe/sp_buffer.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright 2016 Red Hat.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef SP_BUFFER_H
+#define SP_BUFFER_H
+#include "tgsi/tgsi_exec.h"
+
+struct sp_tgsi_buffer
+{
+ struct tgsi_buffer base;
+ struct pipe_shader_buffer sp_bview[PIPE_MAX_SHADER_BUFFERS];
+};
+
+struct sp_tgsi_buffer *
+sp_create_tgsi_buffer(void);
+
+#endif
diff --git a/src/gallium/drivers/softpipe/sp_context.c b/src/gallium/drivers/softpipe/sp_context.c
index 30b0276cfe0..0342fc6f8cd 100644
--- a/src/gallium/drivers/softpipe/sp_context.c
+++ b/src/gallium/drivers/softpipe/sp_context.c
@@ -38,6 +38,7 @@
#include "util/u_pstipple.h"
#include "util/u_inlines.h"
#include "tgsi/tgsi_exec.h"
+#include "sp_buffer.h"
#include "sp_clear.h"
#include "sp_context.h"
#include "sp_flush.h"
@@ -116,6 +117,8 @@ softpipe_destroy( struct pipe_context *pipe )
for (i = 0; i < PIPE_SHADER_TYPES; i++) {
FREE(softpipe->tgsi.sampler[i]);
+ FREE(softpipe->tgsi.image[i]);
+ FREE(softpipe->tgsi.buffer[i]);
}
FREE( softpipe );
@@ -203,6 +206,10 @@ softpipe_create_context(struct pipe_screen *screen,
softpipe->tgsi.image[i] = sp_create_tgsi_image();
}
+ for (i = 0; i < PIPE_SHADER_TYPES; i++) {
+ softpipe->tgsi.buffer[i] = sp_create_tgsi_buffer();
+ }
+
softpipe->dump_fs = debug_get_bool_option( "SOFTPIPE_DUMP_FS", FALSE );
softpipe->dump_gs = debug_get_bool_option( "SOFTPIPE_DUMP_GS", FALSE );
@@ -288,6 +295,16 @@ softpipe_create_context(struct pipe_screen *screen,
(struct tgsi_image *)
softpipe->tgsi.image[PIPE_SHADER_GEOMETRY]);
+ draw_buffer(softpipe->draw,
+ PIPE_SHADER_VERTEX,
+ (struct tgsi_buffer *)
+ softpipe->tgsi.buffer[PIPE_SHADER_VERTEX]);
+
+ draw_buffer(softpipe->draw,
+ PIPE_SHADER_GEOMETRY,
+ (struct tgsi_buffer *)
+ softpipe->tgsi.buffer[PIPE_SHADER_GEOMETRY]);
+
if (debug_get_bool_option( "SOFTPIPE_NO_RAST", FALSE ))
softpipe->no_rast = TRUE;
diff --git a/src/gallium/drivers/softpipe/sp_context.h b/src/gallium/drivers/softpipe/sp_context.h
index 20a12353b38..70d00c88b6e 100644
--- a/src/gallium/drivers/softpipe/sp_context.h
+++ b/src/gallium/drivers/softpipe/sp_context.h
@@ -84,6 +84,7 @@ struct softpipe_context {
struct pipe_sampler_view *sampler_views[PIPE_SHADER_TYPES][PIPE_MAX_SHADER_SAMPLER_VIEWS];
struct pipe_image_view images[PIPE_SHADER_TYPES][PIPE_MAX_SHADER_IMAGES];
+ struct pipe_shader_buffer buffers[PIPE_SHADER_TYPES][PIPE_MAX_SHADER_BUFFERS];
struct pipe_viewport_state viewports[PIPE_MAX_VIEWPORTS];
struct pipe_vertex_buffer vertex_buffer[PIPE_MAX_ATTRIBS];
struct pipe_index_buffer index_buffer;
@@ -174,6 +175,7 @@ struct softpipe_context {
struct {
struct sp_tgsi_sampler *sampler[PIPE_SHADER_TYPES];
struct sp_tgsi_image *image[PIPE_SHADER_TYPES];
+ struct sp_tgsi_buffer *buffer[PIPE_SHADER_TYPES];
} tgsi;
struct tgsi_exec_machine *fs_machine;
diff --git a/src/gallium/drivers/softpipe/sp_fs_exec.c b/src/gallium/drivers/softpipe/sp_fs_exec.c
index bfd9a4b7496..155382af825 100644
--- a/src/gallium/drivers/softpipe/sp_fs_exec.c
+++ b/src/gallium/drivers/softpipe/sp_fs_exec.c
@@ -63,14 +63,15 @@ static void
exec_prepare( const struct sp_fragment_shader_variant *var,
struct tgsi_exec_machine *machine,
struct tgsi_sampler *sampler,
- struct tgsi_image *image )
+ struct tgsi_image *image,
+ struct tgsi_buffer *buffer )
{
/*
* Bind tokens/shader to the interpreter's machine state.
*/
tgsi_exec_machine_bind_shader(machine,
var->tokens,
- sampler, image);
+ sampler, image, buffer);
}
@@ -186,7 +187,7 @@ exec_delete(struct sp_fragment_shader_variant *var,
struct tgsi_exec_machine *machine)
{
if (machine->Tokens == var->tokens) {
- tgsi_exec_machine_bind_shader(machine, NULL, NULL, NULL);
+ tgsi_exec_machine_bind_shader(machine, NULL, NULL, NULL, NULL);
}
FREE( (void *) var->tokens );
diff --git a/src/gallium/drivers/softpipe/sp_image.c b/src/gallium/drivers/softpipe/sp_image.c
index 3488fa83185..a7c73280a80 100644
--- a/src/gallium/drivers/softpipe/sp_image.c
+++ b/src/gallium/drivers/softpipe/sp_image.c
@@ -217,7 +217,7 @@ sp_tgsi_load(const struct tgsi_image *image,
char *data_ptr;
unsigned offset = 0;
- if (params->unit > PIPE_MAX_SHADER_IMAGES)
+ if (params->unit >= PIPE_MAX_SHADER_IMAGES)
goto fail_write_all_zero;
iview = &sp_img->sp_iview[params->unit];
spr = (struct softpipe_resource *)iview->resource;
@@ -320,7 +320,7 @@ sp_tgsi_store(const struct tgsi_image *image,
unsigned offset = 0;
unsigned pformat = params->format;
- if (params->unit > PIPE_MAX_SHADER_IMAGES)
+ if (params->unit >= PIPE_MAX_SHADER_IMAGES)
return;
iview = &sp_img->sp_iview[params->unit];
spr = (struct softpipe_resource *)iview->resource;
@@ -630,7 +630,7 @@ sp_tgsi_op(const struct tgsi_image *image,
unsigned offset;
char *data_ptr;
- if (params->unit > PIPE_MAX_SHADER_IMAGES)
+ if (params->unit >= PIPE_MAX_SHADER_IMAGES)
return;
iview = &sp_img->sp_iview[params->unit];
spr = (struct softpipe_resource *)iview->resource;
@@ -704,7 +704,7 @@ sp_tgsi_get_dims(const struct tgsi_image *image,
struct softpipe_resource *spr;
int level;
- if (params->unit > PIPE_MAX_SHADER_IMAGES)
+ if (params->unit >= PIPE_MAX_SHADER_IMAGES)
return;
iview = &sp_img->sp_iview[params->unit];
spr = (struct softpipe_resource *)iview->resource;
diff --git a/src/gallium/drivers/softpipe/sp_query.c b/src/gallium/drivers/softpipe/sp_query.c
index c28d28d5f5d..81e97107d59 100644
--- a/src/gallium/drivers/softpipe/sp_query.c
+++ b/src/gallium/drivers/softpipe/sp_query.c
@@ -283,6 +283,12 @@ softpipe_check_render_cond(struct softpipe_context *sp)
}
+static void
+softpipe_set_active_query_state(struct pipe_context *pipe, boolean enable)
+{
+}
+
+
void softpipe_init_query_funcs(struct softpipe_context *softpipe )
{
softpipe->pipe.create_query = softpipe_create_query;
@@ -290,6 +296,7 @@ void softpipe_init_query_funcs(struct softpipe_context *softpipe )
softpipe->pipe.begin_query = softpipe_begin_query;
softpipe->pipe.end_query = softpipe_end_query;
softpipe->pipe.get_query_result = softpipe_get_query_result;
+ softpipe->pipe.set_active_query_state = softpipe_set_active_query_state;
}
diff --git a/src/gallium/drivers/softpipe/sp_screen.c b/src/gallium/drivers/softpipe/sp_screen.c
index 90f29d6e52a..d89d95c884c 100644
--- a/src/gallium/drivers/softpipe/sp_screen.c
+++ b/src/gallium/drivers/softpipe/sp_screen.c
@@ -239,6 +239,8 @@ softpipe_get_param(struct pipe_screen *screen, enum pipe_cap param)
case PIPE_CAP_TEXTURE_FLOAT_LINEAR:
case PIPE_CAP_TEXTURE_HALF_FLOAT_LINEAR:
return 1;
+ case PIPE_CAP_FRAMEBUFFER_NO_ATTACHMENT:
+ return 1;
case PIPE_CAP_VERTEXID_NOBASE:
return 0;
case PIPE_CAP_POLYGON_OFFSET_CLAMP:
@@ -259,7 +261,6 @@ softpipe_get_param(struct pipe_screen *screen, enum pipe_cap param)
case PIPE_CAP_MULTI_DRAW_INDIRECT_PARAMS:
case PIPE_CAP_TGSI_FS_POSITION_IS_SYSVAL:
case PIPE_CAP_TGSI_FS_FACE_IS_INTEGER_SYSVAL:
- case PIPE_CAP_SHADER_BUFFER_OFFSET_ALIGNMENT:
case PIPE_CAP_INVALIDATE_BUFFER:
case PIPE_CAP_GENERATE_MIPMAP:
case PIPE_CAP_STRING_MARKER:
@@ -270,8 +271,10 @@ softpipe_get_param(struct pipe_screen *screen, enum pipe_cap param)
case PIPE_CAP_PCI_BUS:
case PIPE_CAP_PCI_DEVICE:
case PIPE_CAP_PCI_FUNCTION:
- case PIPE_CAP_FRAMEBUFFER_NO_ATTACHMENT:
+ case PIPE_CAP_ROBUST_BUFFER_ACCESS_BEHAVIOR:
return 0;
+ case PIPE_CAP_SHADER_BUFFER_OFFSET_ALIGNMENT:
+ return 4;
}
/* should only get here on unhandled cases */
debug_printf("Unexpected PIPE_CAP %d query\n", param);
diff --git a/src/gallium/drivers/softpipe/sp_state.h b/src/gallium/drivers/softpipe/sp_state.h
index 570bc549cc4..2fc48ab13d8 100644
--- a/src/gallium/drivers/softpipe/sp_state.h
+++ b/src/gallium/drivers/softpipe/sp_state.h
@@ -57,6 +57,7 @@
struct tgsi_sampler;
struct tgsi_image;
+struct tgsi_buffer;
struct tgsi_exec_machine;
struct vertex_info;
@@ -83,7 +84,8 @@ struct sp_fragment_shader_variant
void (*prepare)(const struct sp_fragment_shader_variant *shader,
struct tgsi_exec_machine *machine,
struct tgsi_sampler *sampler,
- struct tgsi_image *image);
+ struct tgsi_image *image,
+ struct tgsi_buffer *buffer);
unsigned (*run)(const struct sp_fragment_shader_variant *shader,
struct tgsi_exec_machine *machine,
diff --git a/src/gallium/drivers/softpipe/sp_state_derived.c b/src/gallium/drivers/softpipe/sp_state_derived.c
index 65679e73515..4ce9d95bc6e 100644
--- a/src/gallium/drivers/softpipe/sp_state_derived.c
+++ b/src/gallium/drivers/softpipe/sp_state_derived.c
@@ -344,7 +344,8 @@ update_fragment_shader(struct softpipe_context *softpipe, unsigned prim)
softpipe->fs_machine,
(struct tgsi_sampler *) softpipe->
tgsi.sampler[PIPE_SHADER_FRAGMENT],
- (struct tgsi_image *)softpipe->tgsi.image[PIPE_SHADER_FRAGMENT]);
+ (struct tgsi_image *)softpipe->tgsi.image[PIPE_SHADER_FRAGMENT],
+ (struct tgsi_buffer *)softpipe->tgsi.buffer[PIPE_SHADER_FRAGMENT]);
}
else {
softpipe->fs_variant = NULL;
diff --git a/src/gallium/drivers/softpipe/sp_state_image.c b/src/gallium/drivers/softpipe/sp_state_image.c
index 8909fa26864..5947c934e86 100644
--- a/src/gallium/drivers/softpipe/sp_state_image.c
+++ b/src/gallium/drivers/softpipe/sp_state_image.c
@@ -24,6 +24,7 @@
#include "sp_context.h"
#include "sp_state.h"
#include "sp_image.h"
+#include "sp_buffer.h"
static void softpipe_set_shader_images(struct pipe_context *pipe,
unsigned shader,
@@ -51,7 +52,34 @@ static void softpipe_set_shader_images(struct pipe_context *pipe,
}
}
+static void softpipe_set_shader_buffers(struct pipe_context *pipe,
+ unsigned shader,
+ unsigned start,
+ unsigned num,
+ struct pipe_shader_buffer *buffers)
+{
+ struct softpipe_context *softpipe = softpipe_context(pipe);
+ unsigned i;
+ assert(shader < PIPE_SHADER_TYPES);
+ assert(start + num <= Elements(softpipe->buffers[shader]));
+
+ /* set the new images */
+ for (i = 0; i < num; i++) {
+ int idx = start + i;
+
+ if (buffers) {
+ pipe_resource_reference(&softpipe->tgsi.buffer[shader]->sp_bview[idx].buffer, buffers[i].buffer);
+ softpipe->tgsi.buffer[shader]->sp_bview[idx] = buffers[i];
+ }
+ else {
+ pipe_resource_reference(&softpipe->tgsi.buffer[shader]->sp_bview[idx].buffer, NULL);
+ memset(&softpipe->tgsi.buffer[shader]->sp_bview[idx], 0, sizeof(struct pipe_shader_buffer));
+ }
+ }
+}
+
void softpipe_init_image_funcs(struct pipe_context *pipe)
{
pipe->set_shader_images = softpipe_set_shader_images;
+ pipe->set_shader_buffers = softpipe_set_shader_buffers;
}
diff --git a/src/gallium/drivers/softpipe/sp_tile_cache.c b/src/gallium/drivers/softpipe/sp_tile_cache.c
index 9cc8ac12525..c6233261938 100644
--- a/src/gallium/drivers/softpipe/sp_tile_cache.c
+++ b/src/gallium/drivers/softpipe/sp_tile_cache.c
@@ -99,9 +99,9 @@ sp_create_tile_cache( struct pipe_context *pipe )
maxTexSize = 1 << (maxLevels - 1);
assert(MAX_WIDTH >= maxTexSize);
- assert(sizeof(union tile_address) == 4);
+ STATIC_ASSERT(sizeof(union tile_address) == 4);
- assert((TILE_SIZE << TILE_ADDR_BITS) >= MAX_WIDTH);
+ STATIC_ASSERT((TILE_SIZE << TILE_ADDR_BITS) >= MAX_WIDTH);
tc = CALLOC_STRUCT( softpipe_tile_cache );
if (tc) {
diff --git a/src/gallium/drivers/svga/svga_pipe_depthstencil.c b/src/gallium/drivers/svga/svga_pipe_depthstencil.c
index 83fcdc3d80b..c5d83c33f29 100644
--- a/src/gallium/drivers/svga/svga_pipe_depthstencil.c
+++ b/src/gallium/drivers/svga/svga_pipe_depthstencil.c
@@ -86,9 +86,9 @@ define_depth_stencil_state_object(struct svga_context *svga,
ds->id = util_bitmask_add(svga->ds_object_id_bm);
/* spot check that these comparision tokens are the same */
- assert(SVGA3D_COMPARISON_NEVER == SVGA3D_CMP_NEVER);
- assert(SVGA3D_COMPARISON_LESS == SVGA3D_CMP_LESS);
- assert(SVGA3D_COMPARISON_NOT_EQUAL == SVGA3D_CMP_NOTEQUAL);
+ STATIC_ASSERT(SVGA3D_COMPARISON_NEVER == SVGA3D_CMP_NEVER);
+ STATIC_ASSERT(SVGA3D_COMPARISON_LESS == SVGA3D_CMP_LESS);
+ STATIC_ASSERT(SVGA3D_COMPARISON_NOT_EQUAL == SVGA3D_CMP_NOTEQUAL);
/* Loop in case command buffer is full and we need to flush and retry */
for (try = 0; try < 2; try++) {
diff --git a/src/gallium/drivers/svga/svga_pipe_query.c b/src/gallium/drivers/svga/svga_pipe_query.c
index 88f41eadc1d..75bc9ce092b 100644
--- a/src/gallium/drivers/svga/svga_pipe_query.c
+++ b/src/gallium/drivers/svga/svga_pipe_query.c
@@ -1246,6 +1246,12 @@ svga_get_timestamp(struct pipe_context *pipe)
}
+static void
+svga_set_active_query_state(struct pipe_context *pipe, boolean enable)
+{
+}
+
+
void
svga_init_query_functions(struct svga_context *svga)
{
@@ -1254,6 +1260,7 @@ svga_init_query_functions(struct svga_context *svga)
svga->pipe.begin_query = svga_begin_query;
svga->pipe.end_query = svga_end_query;
svga->pipe.get_query_result = svga_get_query_result;
+ svga->pipe.set_active_query_state = svga_set_active_query_state;
svga->pipe.render_condition = svga_render_condition;
svga->pipe.get_timestamp = svga_get_timestamp;
}
diff --git a/src/gallium/drivers/svga/svga_screen.c b/src/gallium/drivers/svga/svga_screen.c
index 536fb6f786f..010d94b1b58 100644
--- a/src/gallium/drivers/svga/svga_screen.c
+++ b/src/gallium/drivers/svga/svga_screen.c
@@ -364,6 +364,7 @@ svga_get_param(struct pipe_screen *screen, enum pipe_cap param)
case PIPE_CAP_PCI_BUS:
case PIPE_CAP_PCI_DEVICE:
case PIPE_CAP_PCI_FUNCTION:
+ case PIPE_CAP_ROBUST_BUFFER_ACCESS_BEHAVIOR:
return 0;
case PIPE_CAP_MIN_MAP_BUFFER_ALIGNMENT:
return 64;
diff --git a/src/gallium/drivers/svga/svga_state_rss.c b/src/gallium/drivers/svga/svga_state_rss.c
index d43894d71b1..317b44eef00 100644
--- a/src/gallium/drivers/svga/svga_state_rss.c
+++ b/src/gallium/drivers/svga/svga_state_rss.c
@@ -47,7 +47,7 @@ struct rs_queue {
#define EMIT_RS(svga, value, token, fail) \
do { \
- assert(SVGA3D_RS_##token < Elements(svga->state.hw_draw.rs)); \
+ STATIC_ASSERT(SVGA3D_RS_##token < Elements(svga->state.hw_draw.rs)); \
if (svga->state.hw_draw.rs[SVGA3D_RS_##token] != value) { \
svga_queue_rs( &queue, SVGA3D_RS_##token, value ); \
svga->state.hw_draw.rs[SVGA3D_RS_##token] = value; \
@@ -57,7 +57,7 @@ do { \
#define EMIT_RS_FLOAT(svga, fvalue, token, fail) \
do { \
unsigned value = fui(fvalue); \
- assert(SVGA3D_RS_##token < Elements(svga->state.hw_draw.rs)); \
+ STATIC_ASSERT(SVGA3D_RS_##token < Elements(svga->state.hw_draw.rs)); \
if (svga->state.hw_draw.rs[SVGA3D_RS_##token] != value) { \
svga_queue_rs( &queue, SVGA3D_RS_##token, value ); \
svga->state.hw_draw.rs[SVGA3D_RS_##token] = value; \
diff --git a/src/gallium/drivers/svga/svga_state_tss.c b/src/gallium/drivers/svga/svga_state_tss.c
index 4debbf1669b..fd6d1ce84b4 100644
--- a/src/gallium/drivers/svga/svga_state_tss.c
+++ b/src/gallium/drivers/svga/svga_state_tss.c
@@ -327,7 +327,7 @@ svga_queue_tss( struct ts_queue *q,
#define EMIT_TS(svga, unit, val, token) \
do { \
assert(unit < Elements(svga->state.hw_draw.ts)); \
- assert(SVGA3D_TS_##token < Elements(svga->state.hw_draw.ts[unit])); \
+ STATIC_ASSERT(SVGA3D_TS_##token < Elements(svga->state.hw_draw.ts[unit])); \
if (svga->state.hw_draw.ts[unit][SVGA3D_TS_##token] != val) { \
svga_queue_tss( queue, unit, SVGA3D_TS_##token, val ); \
svga->state.hw_draw.ts[unit][SVGA3D_TS_##token] = val; \
@@ -338,7 +338,7 @@ do { \
do { \
unsigned val = fui(fvalue); \
assert(unit < Elements(svga->state.hw_draw.ts)); \
- assert(SVGA3D_TS_##token < Elements(svga->state.hw_draw.ts[unit])); \
+ STATIC_ASSERT(SVGA3D_TS_##token < Elements(svga->state.hw_draw.ts[unit])); \
if (svga->state.hw_draw.ts[unit][SVGA3D_TS_##token] != val) { \
svga_queue_tss( queue, unit, SVGA3D_TS_##token, val ); \
svga->state.hw_draw.ts[unit][SVGA3D_TS_##token] = val; \
diff --git a/src/gallium/drivers/swr/rasterizer/common/os.h b/src/gallium/drivers/swr/rasterizer/common/os.h
index 5794f3f625a..180a0560822 100644
--- a/src/gallium/drivers/swr/rasterizer/common/os.h
+++ b/src/gallium/drivers/swr/rasterizer/common/os.h
@@ -30,10 +30,6 @@
#define SWR_API __cdecl
-#ifndef _CRT_SECURE_NO_WARNINGS
-#define _CRT_SECURE_NO_WARNINGS
-#endif
-
#ifndef NOMINMAX
#define NOMINMAX
#endif
@@ -52,7 +48,6 @@
#define PRAGMA_WARNING_POP() __pragma(warning(pop))
-#if defined(_WIN32)
#if defined(_WIN64)
#define BitScanReverseSizeT BitScanReverse64
#define BitScanForwardSizeT BitScanForward64
@@ -62,7 +57,6 @@
#define BitScanForwardSizeT BitScanForward
#define _mm_popcount_sizeT _mm_popcnt_u32
#endif
-#endif
#elif defined(FORCE_LINUX) || defined(__linux__) || defined(__gnu_linux__)
@@ -199,9 +193,7 @@ typedef KILOBYTE MEGABYTE[1024];
typedef MEGABYTE GIGABYTE[1024];
#define OSALIGNLINE(RWORD) OSALIGN(RWORD, 64)
-#if KNOB_SIMD_WIDTH == 8
-#define OSALIGNSIMD(RWORD) OSALIGN(RWORD, 32)
-#endif
+#define OSALIGNSIMD(RWORD) OSALIGN(RWORD, KNOB_SIMD_BYTES)
#include "common/swr_assert.h"
diff --git a/src/gallium/drivers/swr/rasterizer/core/api.cpp b/src/gallium/drivers/swr/rasterizer/core/api.cpp
index f0f7956b590..ca9cfdb629e 100644
--- a/src/gallium/drivers/swr/rasterizer/core/api.cpp
+++ b/src/gallium/drivers/swr/rasterizer/core/api.cpp
@@ -29,10 +29,12 @@
#include <cfloat>
#include <cmath>
#include <cstdio>
+#include <new>
#include "core/api.h"
#include "core/backend.h"
#include "core/context.h"
+#include "core/depthstencil.h"
#include "core/frontend.h"
#include "core/rasterizer.h"
#include "core/rdtsc_core.h"
@@ -64,11 +66,14 @@ HANDLE SwrCreateContext(
pContext->dcRing.Init(KNOB_MAX_DRAWS_IN_FLIGHT);
pContext->dsRing.Init(KNOB_MAX_DRAWS_IN_FLIGHT);
+ pContext->pMacroTileManagerArray = (MacroTileMgr*)_aligned_malloc(sizeof(MacroTileMgr) * KNOB_MAX_DRAWS_IN_FLIGHT, 64);
+ pContext->pDispatchQueueArray = (DispatchQueue*)_aligned_malloc(sizeof(DispatchQueue) * KNOB_MAX_DRAWS_IN_FLIGHT, 64);
+
for (uint32_t dc = 0; dc < KNOB_MAX_DRAWS_IN_FLIGHT; ++dc)
{
pContext->dcRing[dc].pArena = new CachingArena(pContext->cachingArenaAllocator);
- pContext->dcRing[dc].pTileMgr = new MacroTileMgr(*(pContext->dcRing[dc].pArena));
- pContext->dcRing[dc].pDispatch = new DispatchQueue(); /// @todo Could lazily allocate this if Dispatch seen.
+ new (&pContext->pMacroTileManagerArray[dc]) MacroTileMgr(*pContext->dcRing[dc].pArena);
+ new (&pContext->pDispatchQueueArray[dc]) DispatchQueue();
pContext->dsRing[dc].pArena = new CachingArena(pContext->cachingArenaAllocator);
}
@@ -86,15 +91,26 @@ HANDLE SwrCreateContext(
// Calling createThreadPool() above can set SINGLE_THREADED
if (KNOB_SINGLE_THREADED)
{
+ SET_KNOB(HYPERTHREADED_FE, false);
pContext->NumWorkerThreads = 1;
+ pContext->NumFEThreads = 1;
+ pContext->NumBEThreads = 1;
}
// Allocate scratch space for workers.
///@note We could lazily allocate this but its rather small amount of memory.
for (uint32_t i = 0; i < pContext->NumWorkerThreads; ++i)
{
- ///@todo Use numa API for allocations using numa information from thread data (if exists).
- pContext->pScratch[i] = (uint8_t*)_aligned_malloc((32 * 1024), KNOB_SIMD_WIDTH * 4);
+#if defined(_WIN32)
+ uint32_t numaNode = pContext->threadPool.pThreadData ?
+ pContext->threadPool.pThreadData[i].numaId : 0;
+ pContext->pScratch[i] = (uint8_t*)VirtualAllocExNuma(
+ GetCurrentProcess(), nullptr, 32 * sizeof(KILOBYTE),
+ MEM_RESERVE | MEM_COMMIT, PAGE_READWRITE,
+ numaNode);
+#else
+ pContext->pScratch[i] = (uint8_t*)_aligned_malloc(32 * sizeof(KILOBYTE), KNOB_SIMD_WIDTH * 4);
+#endif
}
// State setup AFTER context is fully initialized
@@ -131,14 +147,21 @@ void SwrDestroyContext(HANDLE hContext)
{
delete pContext->dcRing[i].pArena;
delete pContext->dsRing[i].pArena;
- delete(pContext->dcRing[i].pTileMgr);
- delete(pContext->dcRing[i].pDispatch);
+ pContext->pMacroTileManagerArray[i].~MacroTileMgr();
+ pContext->pDispatchQueueArray[i].~DispatchQueue();
}
+ _aligned_free(pContext->pDispatchQueueArray);
+ _aligned_free(pContext->pMacroTileManagerArray);
+
// Free scratch space.
for (uint32_t i = 0; i < pContext->NumWorkerThreads; ++i)
{
+#if defined(_WIN32)
+ VirtualFree(pContext->pScratch[i], 0, MEM_RELEASE);
+#else
_aligned_free(pContext->pScratch[i]);
+#endif
}
delete(pContext->pHotTileMgr);
@@ -160,12 +183,20 @@ void WakeAllThreads(SWR_CONTEXT *pContext)
template<bool IsDraw>
void QueueWork(SWR_CONTEXT *pContext)
{
+ DRAW_CONTEXT* pDC = pContext->pCurDrawContext;
+ uint32_t dcIndex = pDC->drawId % KNOB_MAX_DRAWS_IN_FLIGHT;
+
+ if (IsDraw)
+ {
+ pDC->pTileMgr = &pContext->pMacroTileManagerArray[dcIndex];
+ pDC->pTileMgr->initialize();
+ }
+
// Each worker thread looks at a DC for both FE and BE work at different times and so we
// multiply threadDone by 2. When the threadDone counter has reached 0 then all workers
// have moved past this DC. (i.e. Each worker has checked this DC for both FE and BE work and
// then moved on if all work is done.)
- pContext->pCurDrawContext->threadsDone =
- pContext->NumWorkerThreads ? pContext->NumWorkerThreads * 2 : 2;
+ pContext->pCurDrawContext->threadsDone = pContext->NumFEThreads + pContext->NumBEThreads;
_ReadWriteBarrier();
{
@@ -183,7 +214,7 @@ void QueueWork(SWR_CONTEXT *pContext)
{
static TileSet lockedTiles;
uint64_t curDraw[2] = { pContext->pCurDrawContext->drawId, pContext->pCurDrawContext->drawId };
- WorkOnFifoFE(pContext, 0, curDraw[0], 0);
+ WorkOnFifoFE(pContext, 0, curDraw[0]);
WorkOnFifoBE(pContext, 0, curDraw[1], lockedTiles, 0, 0);
}
else
@@ -232,7 +263,20 @@ DRAW_CONTEXT* GetDrawContext(SWR_CONTEXT *pContext, bool isSplitDraw = false)
_mm_pause();
}
- uint32_t dcIndex = pContext->dcRing.GetHead() % KNOB_MAX_DRAWS_IN_FLIGHT;
+ uint64_t curDraw = pContext->dcRing.GetHead();
+ uint32_t dcIndex = curDraw % KNOB_MAX_DRAWS_IN_FLIGHT;
+
+ static uint64_t lastDrawChecked;
+ static uint32_t lastFrameChecked;
+ if ((pContext->frameCount - lastFrameChecked) > 2 ||
+ (curDraw - lastDrawChecked) > 0x10000)
+ {
+ // Take this opportunity to clean-up old arena allocations
+ pContext->cachingArenaAllocator.FreeOldBlocks();
+
+ lastFrameChecked = pContext->frameCount;
+ lastDrawChecked = curDraw;
+ }
DRAW_CONTEXT* pCurDrawContext = &pContext->dcRing[dcIndex];
pContext->pCurDrawContext = pCurDrawContext;
@@ -284,8 +328,6 @@ DRAW_CONTEXT* GetDrawContext(SWR_CONTEXT *pContext, bool isSplitDraw = false)
pCurDrawContext->FeLock = 0;
pCurDrawContext->threadsDone = 0;
- pCurDrawContext->pTileMgr->initialize();
-
// Assign unique drawId for this DC
pCurDrawContext->drawId = pContext->dcRing.GetHead();
@@ -872,6 +914,25 @@ void SetupPipeline(DRAW_CONTEXT *pDC)
!pState->state.blendState.renderTarget[rt].writeDisableBlue) ? (1 << rt) : 0;
}
}
+
+ // Setup depth quantization function
+ if (pState->state.depthHottileEnable)
+ {
+ switch (pState->state.rastState.depthFormat)
+ {
+ case R32_FLOAT_X8X24_TYPELESS: pState->state.pfnQuantizeDepth = QuantizeDepth < R32_FLOAT_X8X24_TYPELESS > ; break;
+ case R32_FLOAT: pState->state.pfnQuantizeDepth = QuantizeDepth < R32_FLOAT > ; break;
+ case R24_UNORM_X8_TYPELESS: pState->state.pfnQuantizeDepth = QuantizeDepth < R24_UNORM_X8_TYPELESS > ; break;
+ case R16_UNORM: pState->state.pfnQuantizeDepth = QuantizeDepth < R16_UNORM > ; break;
+ default: SWR_ASSERT(false, "Unsupported depth format for depth quantiztion.");
+ pState->state.pfnQuantizeDepth = QuantizeDepth < R32_FLOAT > ;
+ }
+ }
+ else
+ {
+ // set up pass-through quantize if depth isn't enabled
+ pState->state.pfnQuantizeDepth = QuantizeDepth < R32_FLOAT > ;
+ }
}
//////////////////////////////////////////////////////////////////////////
@@ -1029,9 +1090,9 @@ void DrawInstanced(
SWR_CONTEXT *pContext = GetContext(hContext);
DRAW_CONTEXT* pDC = GetDrawContext(pContext);
- int32_t maxVertsPerDraw = MaxVertsPerDraw(pDC, numVertices, topology);
+ uint32_t maxVertsPerDraw = MaxVertsPerDraw(pDC, numVertices, topology);
uint32_t primsPerDraw = GetNumPrims(topology, maxVertsPerDraw);
- int32_t remainingVerts = numVertices;
+ uint32_t remainingVerts = numVertices;
API_STATE *pState = &pDC->pState->state;
pState->topology = topology;
@@ -1149,9 +1210,9 @@ void DrawIndexedInstance(
DRAW_CONTEXT* pDC = GetDrawContext(pContext);
API_STATE* pState = &pDC->pState->state;
- int32_t maxIndicesPerDraw = MaxVertsPerDraw(pDC, numIndices, topology);
+ uint32_t maxIndicesPerDraw = MaxVertsPerDraw(pDC, numIndices, topology);
uint32_t primsPerDraw = GetNumPrims(topology, maxIndicesPerDraw);
- int32_t remainingIndices = numIndices;
+ uint32_t remainingIndices = numIndices;
uint32_t indexSize = 0;
switch (pState->indexBuffer.format)
@@ -1334,9 +1395,6 @@ void SwrDispatch(
pDC->isCompute = true; // This is a compute context.
- // Ensure spill fill pointers are initialized to nullptr.
- memset(pDC->pSpillFill, 0, sizeof(pDC->pSpillFill));
-
COMPUTE_DESC* pTaskData = (COMPUTE_DESC*)pDC->pArena->AllocAligned(sizeof(COMPUTE_DESC), 64);
pTaskData->threadGroupCountX = threadGroupCountX;
@@ -1344,6 +1402,8 @@ void SwrDispatch(
pTaskData->threadGroupCountZ = threadGroupCountZ;
uint32_t totalThreadGroups = threadGroupCountX * threadGroupCountY * threadGroupCountZ;
+ uint32_t dcIndex = pDC->drawId % KNOB_MAX_DRAWS_IN_FLIGHT;
+ pDC->pDispatch = &pContext->pDispatchQueueArray[dcIndex];
pDC->pDispatch->initialize(totalThreadGroups, pTaskData);
QueueDispatch(pContext);
@@ -1497,4 +1557,6 @@ void SWR_API SwrEndFrame(
HANDLE hContext)
{
RDTSC_ENDFRAME();
+ SWR_CONTEXT *pContext = GetContext(hContext);
+ pContext->frameCount++;
}
diff --git a/src/gallium/drivers/swr/rasterizer/core/arena.h b/src/gallium/drivers/swr/rasterizer/core/arena.h
index 67d81a44347..64184e16865 100644
--- a/src/gallium/drivers/swr/rasterizer/core/arena.h
+++ b/src/gallium/drivers/swr/rasterizer/core/arena.h
@@ -65,69 +65,41 @@ static_assert(sizeof(ArenaBlock) <= ARENA_BLOCK_ALIGN,
template<uint32_t NumBucketsT = 4, uint32_t StartBucketBitT = 16>
struct CachingAllocatorT : DefaultAllocator
{
- static uint32_t GetBucketId(size_t blockSize)
- {
- uint32_t bucketId = 0;
-
-#if defined(BitScanReverseSizeT)
- BitScanReverseSizeT((unsigned long*)&bucketId, blockSize >> CACHE_START_BUCKET_BIT);
- bucketId = std::min<uint32_t>(bucketId, CACHE_NUM_BUCKETS - 1);
-#endif
-
- return bucketId;
- }
-
void* AllocateAligned(size_t size, size_t align)
{
SWR_ASSERT(size >= sizeof(ArenaBlock));
SWR_ASSERT(size <= uint32_t(-1));
size_t blockSize = size - ARENA_BLOCK_ALIGN;
+ uint32_t bucket = GetBucketId(blockSize);
{
// search cached blocks
std::lock_guard<std::mutex> l(m_mutex);
- ArenaBlock* pPrevBlock = &m_cachedBlocks[GetBucketId(blockSize)];
- ArenaBlock* pBlock = pPrevBlock->pNext;
- ArenaBlock* pPotentialBlock = nullptr;
- ArenaBlock* pPotentialPrev = nullptr;
+ ArenaBlock* pPrevBlock = &m_cachedBlocks[bucket];
+ ArenaBlock* pBlock = SearchBlocks(pPrevBlock, blockSize, align);
- while (pBlock)
+ if (pBlock)
{
- if (pBlock->blockSize >= blockSize)
- {
- if (pBlock == AlignUp(pBlock, align))
- {
- if (pBlock->blockSize == blockSize)
- {
- // Won't find a better match
- break;
- }
-
- // We could use this as it is larger than we wanted, but
- // continue to search for a better match
- pPotentialBlock = pBlock;
- pPotentialPrev = pPrevBlock;
- }
- }
- else
+ m_cachedSize -= pBlock->blockSize;
+ if (pBlock == m_pLastCachedBlocks[bucket])
{
- // Blocks are sorted by size (biggest first)
- // So, if we get here, there are no blocks
- // large enough, fall through to allocation.
- pBlock = nullptr;
- break;
+ m_pLastCachedBlocks[bucket] = pPrevBlock;
}
-
- pPrevBlock = pBlock;
- pBlock = pBlock->pNext;
}
-
- if (!pBlock)
+ else
{
- // Couldn't find an exact match, use next biggest size
- pBlock = pPotentialBlock;
- pPrevBlock = pPotentialPrev;
+ pPrevBlock = &m_oldCachedBlocks[GetBucketId(blockSize)];
+ pBlock = SearchBlocks(pPrevBlock, blockSize, align);
+
+ if (pBlock)
+ {
+ m_oldCachedSize -= pBlock->blockSize;
+ if (pBlock == m_pOldLastCachedBlocks[bucket])
+ {
+ m_pLastCachedBlocks[bucket] = pPrevBlock;
+ }
+ }
}
if (pBlock)
@@ -154,7 +126,7 @@ struct CachingAllocatorT : DefaultAllocator
return this->DefaultAllocator::AllocateAligned(size, align);
}
- void Free(void* pMem)
+ void Free(void* pMem)
{
if (pMem)
{
@@ -162,24 +134,57 @@ struct CachingAllocatorT : DefaultAllocator
SWR_ASSERT(pNewBlock->blockSize >= 0);
std::unique_lock<std::mutex> l(m_mutex);
- ArenaBlock* pPrevBlock = &m_cachedBlocks[GetBucketId(pNewBlock->blockSize)];
- ArenaBlock* pBlock = pPrevBlock->pNext;
+ InsertCachedBlock(GetBucketId(pNewBlock->blockSize), pNewBlock);
+ }
+ }
- while (pBlock)
+ void FreeOldBlocks()
+ {
+ if (!m_cachedSize) { return; }
+ std::lock_guard<std::mutex> l(m_mutex);
+
+ bool doFree = (m_oldCachedSize > MAX_UNUSED_SIZE);
+
+ for (uint32_t i = 0; i < CACHE_NUM_BUCKETS; ++i)
+ {
+ if (doFree)
{
- if (pNewBlock->blockSize >= pBlock->blockSize)
+ ArenaBlock* pBlock = m_oldCachedBlocks[i].pNext;
+ while (pBlock)
{
- // Insert here
- break;
+ ArenaBlock* pNext = pBlock->pNext;
+ m_oldCachedSize -= pBlock->blockSize;
+ m_totalAllocated -= (pBlock->blockSize + ARENA_BLOCK_ALIGN);
+ this->DefaultAllocator::Free(pBlock);
+ pBlock = pNext;
}
- pPrevBlock = pBlock;
- pBlock = pBlock->pNext;
+ m_oldCachedBlocks[i].pNext = nullptr;
+ m_pOldLastCachedBlocks[i] = &m_oldCachedBlocks[i];
}
- // Insert into list
- SWR_ASSERT(pPrevBlock);
- pPrevBlock->pNext = pNewBlock;
- pNewBlock->pNext = pBlock;
+ if (m_pLastCachedBlocks[i] != &m_cachedBlocks[i])
+ {
+ m_pLastCachedBlocks[i]->pNext = m_oldCachedBlocks[i].pNext;
+ m_oldCachedBlocks[i].pNext = m_cachedBlocks[i].pNext;
+ m_cachedBlocks[i].pNext = nullptr;
+ if (m_pOldLastCachedBlocks[i]->pNext)
+ {
+ m_pOldLastCachedBlocks[i] = m_pLastCachedBlocks[i];
+ }
+ m_pLastCachedBlocks[i] = &m_cachedBlocks[i];
+ }
+ }
+
+ m_oldCachedSize += m_cachedSize;
+ m_cachedSize = 0;
+ }
+
+ CachingAllocatorT()
+ {
+ for (uint32_t i = 0; i < CACHE_NUM_BUCKETS; ++i)
+ {
+ m_pLastCachedBlocks[i] = &m_cachedBlocks[i];
+ m_pOldLastCachedBlocks[i] = &m_oldCachedBlocks[i];
}
}
@@ -195,21 +200,126 @@ struct CachingAllocatorT : DefaultAllocator
this->DefaultAllocator::Free(pBlock);
pBlock = pNext;
}
+ pBlock = m_oldCachedBlocks[i].pNext;
+ while (pBlock)
+ {
+ ArenaBlock* pNext = pBlock->pNext;
+ this->DefaultAllocator::Free(pBlock);
+ pBlock = pNext;
+ }
}
}
+private:
+ static uint32_t GetBucketId(size_t blockSize)
+ {
+ uint32_t bucketId = 0;
+
+#if defined(BitScanReverseSizeT)
+ BitScanReverseSizeT((unsigned long*)&bucketId, blockSize >> CACHE_START_BUCKET_BIT);
+ bucketId = std::min<uint32_t>(bucketId, CACHE_NUM_BUCKETS - 1);
+#endif
+
+ return bucketId;
+ }
+
+ void InsertCachedBlock(uint32_t bucketId, ArenaBlock* pNewBlock)
+ {
+ SWR_ASSERT(bucketId < CACHE_NUM_BUCKETS);
+
+ ArenaBlock* pPrevBlock = &m_cachedBlocks[bucketId];
+ ArenaBlock* pBlock = pPrevBlock->pNext;
+
+ while (pBlock)
+ {
+ if (pNewBlock->blockSize >= pBlock->blockSize)
+ {
+ // Insert here
+ break;
+ }
+ pPrevBlock = pBlock;
+ pBlock = pBlock->pNext;
+ }
+
+ // Insert into list
+ SWR_ASSERT(pPrevBlock);
+ pPrevBlock->pNext = pNewBlock;
+ pNewBlock->pNext = pBlock;
+
+ if (m_pLastCachedBlocks[bucketId] == pPrevBlock)
+ {
+ m_pLastCachedBlocks[bucketId] = pNewBlock;
+ }
+
+ m_cachedSize += pNewBlock->blockSize;
+ }
+
+ static ArenaBlock* SearchBlocks(ArenaBlock*& pPrevBlock, size_t blockSize, size_t align)
+ {
+ ArenaBlock* pBlock = pPrevBlock->pNext;
+ ArenaBlock* pPotentialBlock = nullptr;
+ ArenaBlock* pPotentialPrev = nullptr;
+
+ while (pBlock)
+ {
+ if (pBlock->blockSize >= blockSize)
+ {
+ if (pBlock == AlignUp(pBlock, align))
+ {
+ if (pBlock->blockSize == blockSize)
+ {
+ // Won't find a better match
+ break;
+ }
+
+ // We could use this as it is larger than we wanted, but
+ // continue to search for a better match
+ pPotentialBlock = pBlock;
+ pPotentialPrev = pPrevBlock;
+ }
+ }
+ else
+ {
+ // Blocks are sorted by size (biggest first)
+ // So, if we get here, there are no blocks
+ // large enough, fall through to allocation.
+ pBlock = nullptr;
+ break;
+ }
+
+ pPrevBlock = pBlock;
+ pBlock = pBlock->pNext;
+ }
+
+ if (!pBlock)
+ {
+ // Couldn't find an exact match, use next biggest size
+ pBlock = pPotentialBlock;
+ pPrevBlock = pPotentialPrev;
+ }
+
+ return pBlock;
+ }
+
// buckets, for block sizes < (1 << (start+1)), < (1 << (start+2)), ...
static const uint32_t CACHE_NUM_BUCKETS = NumBucketsT;
static const uint32_t CACHE_START_BUCKET_BIT = StartBucketBitT;
+ static const size_t MAX_UNUSED_SIZE = 20 * sizeof(MEGABYTE);
ArenaBlock m_cachedBlocks[CACHE_NUM_BUCKETS];
+ ArenaBlock* m_pLastCachedBlocks[CACHE_NUM_BUCKETS];
+ ArenaBlock m_oldCachedBlocks[CACHE_NUM_BUCKETS];
+ ArenaBlock* m_pOldLastCachedBlocks[CACHE_NUM_BUCKETS];
std::mutex m_mutex;
size_t m_totalAllocated = 0;
+
+ size_t m_cachedSize = 0;
+ size_t m_oldCachedSize = 0;
};
typedef CachingAllocatorT<> CachingAllocator;
-template<typename T = DefaultAllocator, size_t BlockSizeT = (128 * 1024)>
+template<typename T = DefaultAllocator, size_t BlockSizeT = 128 * sizeof(KILOBYTE)>
class TArena
{
public:
diff --git a/src/gallium/drivers/swr/rasterizer/core/backend.cpp b/src/gallium/drivers/swr/rasterizer/core/backend.cpp
index 7fb83edf169..b2d3d9ef4f4 100644
--- a/src/gallium/drivers/swr/rasterizer/core/backend.cpp
+++ b/src/gallium/drivers/swr/rasterizer/core/backend.cpp
@@ -70,7 +70,7 @@ static PFN_CLEAR_TILES sClearTilesTable[NUM_SWR_FORMATS];
/// @param pDC - pointer to draw context (dispatch).
/// @param workerId - The unique worker ID that is assigned to this thread.
/// @param threadGroupId - the linear index for the thread group within the dispatch.
-void ProcessComputeBE(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t threadGroupId)
+void ProcessComputeBE(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t threadGroupId, void*& pSpillFillBuffer)
{
RDTSC_START(BEDispatch);
@@ -80,10 +80,10 @@ void ProcessComputeBE(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t threadGroup
SWR_ASSERT(pTaskData != nullptr);
// Ensure spill fill memory has been allocated.
- if (pDC->pSpillFill[workerId] == nullptr)
+ if (pSpillFillBuffer == nullptr)
{
///@todo Add state which indicates the spill fill size.
- pDC->pSpillFill[workerId] = (uint8_t*)pDC->pArena->AllocAlignedSync(4096 * 1024, sizeof(float) * 8);
+ pSpillFillBuffer = pDC->pArena->AllocAlignedSync(4 * sizeof(MEGABYTE), sizeof(float) * 8);
}
const API_STATE& state = GetApiState(pDC);
@@ -94,7 +94,7 @@ void ProcessComputeBE(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t threadGroup
csContext.dispatchDims[1] = pTaskData->threadGroupCountY;
csContext.dispatchDims[2] = pTaskData->threadGroupCountZ;
csContext.pTGSM = pContext->pScratch[workerId];
- csContext.pSpillFillBuffer = pDC->pSpillFill[workerId];
+ csContext.pSpillFillBuffer = (uint8_t*)pSpillFillBuffer;
state.pfnCsFunc(GetPrivateState(pDC), &csContext);
@@ -772,8 +772,10 @@ void BackendSingleSample(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint3
psContext.vOneOverW.centroid = psContext.vOneOverW.center;
}
- // interpolate z
+ // interpolate and quantize z
psContext.vZ = vplaneps(coeffs.vZa, coeffs.vZb, coeffs.vZc, psContext.vI.center, psContext.vJ.center);
+ psContext.vZ = state.pfnQuantizeDepth(psContext.vZ);
+
RDTSC_STOP(BEBarycentric, 0, 0);
simdmask clipCoverageMask = coverageMask & MASK;
@@ -793,7 +795,7 @@ void BackendSingleSample(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint3
if(CanEarlyZ(pPSState))
{
RDTSC_START(BEEarlyDepthTest);
- depthPassMask = DepthStencilTest(&state.vp[0], &state.depthStencilState, work.triFlags.frontFacing,
+ depthPassMask = DepthStencilTest(&state, work.triFlags.frontFacing,
psContext.vZ, pDepthBase, vCoverageMask, pStencilBase, &stencilPassMask);
RDTSC_STOP(BEEarlyDepthTest, 0, 0);
@@ -825,7 +827,7 @@ void BackendSingleSample(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint3
if(!CanEarlyZ(pPSState))
{
RDTSC_START(BELateDepthTest);
- depthPassMask = DepthStencilTest(&state.vp[0], &state.depthStencilState, work.triFlags.frontFacing,
+ depthPassMask = DepthStencilTest(&state, work.triFlags.frontFacing,
psContext.vZ, pDepthBase, vCoverageMask, pStencilBase, &stencilPassMask);
RDTSC_STOP(BELateDepthTest, 0, 0);
@@ -977,8 +979,9 @@ void BackendSampleRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_
backendFuncs.pfnCalcSampleBarycentrics(coeffs, psContext);
- // interpolate z
+ // interpolate and quantize z
psContext.vZ = vplaneps(coeffs.vZa, coeffs.vZb, coeffs.vZc, psContext.vI.sample, psContext.vJ.sample);
+ psContext.vZ = state.pfnQuantizeDepth(psContext.vZ);
RDTSC_STOP(BEBarycentric, 0, 0);
@@ -1000,7 +1003,7 @@ void BackendSampleRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_
if (CanEarlyZ(pPSState))
{
RDTSC_START(BEEarlyDepthTest);
- depthPassMask = DepthStencilTest(&state.vp[0], &state.depthStencilState, work.triFlags.frontFacing,
+ depthPassMask = DepthStencilTest(&state, work.triFlags.frontFacing,
psContext.vZ, pDepthSample, vCoverageMask, pStencilSample, &stencilPassMask);
RDTSC_STOP(BEEarlyDepthTest, 0, 0);
@@ -1033,7 +1036,7 @@ void BackendSampleRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_
if (!CanEarlyZ(pPSState))
{
RDTSC_START(BELateDepthTest);
- depthPassMask = DepthStencilTest(&state.vp[0], &state.depthStencilState, work.triFlags.frontFacing,
+ depthPassMask = DepthStencilTest(&state, work.triFlags.frontFacing,
psContext.vZ, pDepthSample, vCoverageMask, pStencilSample, &stencilPassMask);
RDTSC_STOP(BELateDepthTest, 0, 0);
@@ -1200,8 +1203,9 @@ void BackendPixelRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_t
RDTSC_START(BEBarycentric);
backendFuncs.pfnCalcPixelBarycentrics(coeffs, psContext);
- // interpolate z
+ // interpolate and quantize z
psContext.vZ = vplaneps(coeffs.vZa, coeffs.vZb, coeffs.vZc, psContext.vI.center, psContext.vJ.center);
+ psContext.vZ = state.pfnQuantizeDepth(psContext.vZ);
RDTSC_STOP(BEBarycentric, 0, 0);
// execute pixel shader
@@ -1263,10 +1267,11 @@ void BackendPixelRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_t
// calc I & J per sample
backendFuncs.pfnCalcSampleBarycentrics(coeffs, psContext);
- // interpolate z
+ // interpolate and quantize z
if (!pPSState->writesODepth)
{
vZ[sample] = vplaneps(coeffs.vZa, coeffs.vZb, coeffs.vZc, psContext.vI.sample, psContext.vJ.sample);
+ vZ[sample] = state.pfnQuantizeDepth(vZ[sample]);
}
///@todo: perspective correct vs non-perspective correct clipping?
@@ -1292,7 +1297,7 @@ void BackendPixelRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_t
// ZTest for this sample
RDTSC_START(BEEarlyDepthTest);
stencilPassMask[sample] = vCoverageMask[sample];
- depthPassMask[sample] = DepthStencilTest(&state.vp[0], &state.depthStencilState, work.triFlags.frontFacing,
+ depthPassMask[sample] = DepthStencilTest(&state, work.triFlags.frontFacing,
vZ[sample], pDepthSample, vCoverageMask[sample], pStencilSample, &stencilPassMask[sample]);
RDTSC_STOP(BEEarlyDepthTest, 0, 0);
@@ -1308,8 +1313,9 @@ void BackendPixelRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_t
{
RDTSC_START(BEBarycentric);
backendFuncs.pfnCalcPixelBarycentrics(coeffs, psContext);
- // interpolate z
+ // interpolate and quantize z
psContext.vZ = vplaneps(coeffs.vZa, coeffs.vZb, coeffs.vZc, psContext.vI.center, psContext.vJ.center);
+ psContext.vZ = state.pfnQuantizeDepth(psContext.vZ);
RDTSC_STOP(BEBarycentric, 0, 0);
// execute pixel shader
@@ -1463,8 +1469,9 @@ void BackendNullPS(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_t y,
backendFuncs.pfnCalcSampleBarycentrics(coeffs, psContext);
- // interpolate z
+ // interpolate and quantize z
psContext.vZ = vplaneps(coeffs.vZa, coeffs.vZb, coeffs.vZc, psContext.vI.sample, psContext.vJ.sample);
+ psContext.vZ = state.pfnQuantizeDepth(psContext.vZ);
RDTSC_STOP(BEBarycentric, 0, 0);
@@ -1483,7 +1490,7 @@ void BackendNullPS(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_t y,
uint8_t *pStencilSample = pStencilBase + MultisampleTraits<sampleCount>::RasterTileStencilOffset(sample);
RDTSC_START(BEEarlyDepthTest);
- simdscalar depthPassMask = DepthStencilTest(&state.vp[0], &state.depthStencilState, work.triFlags.frontFacing,
+ simdscalar depthPassMask = DepthStencilTest(&state, work.triFlags.frontFacing,
psContext.vZ, pDepthSample, vCoverageMask, pStencilSample, &stencilPassMask);
DepthStencilWrite(&state.vp[0], &state.depthStencilState, work.triFlags.frontFacing, psContext.vZ,
pDepthSample, depthPassMask, vCoverageMask, pStencilSample, stencilPassMask);
diff --git a/src/gallium/drivers/swr/rasterizer/core/backend.h b/src/gallium/drivers/swr/rasterizer/core/backend.h
index 2fa18953cad..d0626b997af 100644
--- a/src/gallium/drivers/swr/rasterizer/core/backend.h
+++ b/src/gallium/drivers/swr/rasterizer/core/backend.h
@@ -32,7 +32,7 @@
#include "core/context.h"
#include "core/multisample.h"
-void ProcessComputeBE(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t threadGroupId);
+void ProcessComputeBE(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t threadGroupId, void*& pSpillFillBuffer);
void ProcessSyncBE(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile, void *pUserData);
void ProcessQueryStatsBE(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile, void *pUserData);
void ProcessClearBE(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile, void *pUserData);
diff --git a/src/gallium/drivers/swr/rasterizer/core/clip.cpp b/src/gallium/drivers/swr/rasterizer/core/clip.cpp
index 3a2a8b35be8..e624fd8f674 100644
--- a/src/gallium/drivers/swr/rasterizer/core/clip.cpp
+++ b/src/gallium/drivers/swr/rasterizer/core/clip.cpp
@@ -162,8 +162,8 @@ int ClipTriToPlane( const float *pInPts, int numInPts,
void Clip(const float *pTriangle, const float *pAttribs, int numAttribs, float *pOutTriangles, int *numVerts, float *pOutAttribs)
{
// temp storage to hold at least 6 sets of vertices, the max number that can be created during clipping
- OSALIGN(float, 16) tempPts[6 * 4];
- OSALIGN(float, 16) tempAttribs[6 * KNOB_NUM_ATTRIBUTES * 4];
+ OSALIGNSIMD(float) tempPts[6 * 4];
+ OSALIGNSIMD(float) tempAttribs[6 * KNOB_NUM_ATTRIBUTES * 4];
// we opt to clip to viewport frustum to produce smaller triangles for rasterization precision
int NumOutPts = ClipTriToPlane<FRUSTUM_NEAR>(pTriangle, 3, pAttribs, numAttribs, tempPts, tempAttribs);
diff --git a/src/gallium/drivers/swr/rasterizer/core/clip.h b/src/gallium/drivers/swr/rasterizer/core/clip.h
index ba5870a92bb..67a4c4f47bb 100644
--- a/src/gallium/drivers/swr/rasterizer/core/clip.h
+++ b/src/gallium/drivers/swr/rasterizer/core/clip.h
@@ -265,8 +265,8 @@ public:
// clip a single primitive
int ClipScalar(PA_STATE& pa, uint32_t primIndex, float* pOutPos, float* pOutAttribs)
{
- OSALIGN(float, 16) inVerts[3 * 4];
- OSALIGN(float, 16) inAttribs[3 * KNOB_NUM_ATTRIBUTES * 4];
+ OSALIGNSIMD(float) inVerts[3 * 4];
+ OSALIGNSIMD(float) inAttribs[3 * KNOB_NUM_ATTRIBUTES * 4];
// transpose primitive position
__m128 verts[3];
diff --git a/src/gallium/drivers/swr/rasterizer/core/context.h b/src/gallium/drivers/swr/rasterizer/core/context.h
index 39f23372a18..6464aa20af7 100644
--- a/src/gallium/drivers/swr/rasterizer/core/context.h
+++ b/src/gallium/drivers/swr/rasterizer/core/context.h
@@ -308,6 +308,8 @@ OSALIGNLINE(struct) API_STATE
uint32_t depthHottileEnable: 1;
uint32_t stencilHottileEnable : 1;
};
+
+ PFN_QUANTIZE_DEPTH pfnQuantizeDepth;
};
class MacroTileMgr;
@@ -380,32 +382,29 @@ struct DRAW_STATE
// This draw context maintains all of the state needed for the draw operation.
struct DRAW_CONTEXT
{
- SWR_CONTEXT *pContext;
-
- uint64_t drawId;
-
- bool isCompute; // Is this DC a compute context?
-
- FE_WORK FeWork;
- volatile OSALIGNLINE(uint32_t) FeLock;
- volatile OSALIGNLINE(bool) doneFE; // Is FE work done for this draw?
- volatile OSALIGNLINE(int64_t) threadsDone;
-
- uint64_t dependency;
-
- MacroTileMgr* pTileMgr;
-
- // The following fields are valid if isCompute is true.
- DispatchQueue* pDispatch; // Queue for thread groups. (isCompute)
+ SWR_CONTEXT* pContext;
+ uint64_t drawId;
+ union
+ {
+ MacroTileMgr* pTileMgr;
+ DispatchQueue* pDispatch; // Queue for thread groups. (isCompute)
+ };
+ uint64_t dependency;
+ DRAW_STATE* pState;
+ CachingArena* pArena;
- DRAW_STATE* pState;
- CachingArena* pArena;
+ bool isCompute; // Is this DC a compute context?
+ bool cleanupState; // True if this is the last draw using an entry in the state ring.
+ volatile bool doneFE; // Is FE work done for this draw?
- uint8_t* pSpillFill[KNOB_MAX_NUM_THREADS]; // Scratch space used for spill fills.
+ FE_WORK FeWork;
- bool cleanupState; // True if this is the last draw using an entry in the state ring.
+ volatile OSALIGNLINE(uint32_t) FeLock;
+ volatile int64_t threadsDone;
};
+static_assert((sizeof(DRAW_CONTEXT) & 63) == 0, "Invalid size for DRAW_CONTEXT");
+
INLINE const API_STATE& GetApiState(const DRAW_CONTEXT* pDC)
{
SWR_ASSERT(pDC != nullptr);
@@ -447,6 +446,9 @@ struct SWR_CONTEXT
DRAW_CONTEXT *pCurDrawContext; // This points to DC entry in ring for an unsubmitted draw.
DRAW_CONTEXT *pPrevDrawContext; // This points to DC entry for the previous context submitted that we can copy state from.
+ MacroTileMgr* pMacroTileManagerArray;
+ DispatchQueue* pDispatchQueueArray;
+
// Draw State Ring
// When draw are very large (lots of primitives) then the API thread will break these up.
// These split draws all have identical state. So instead of storing the state directly
@@ -457,6 +459,8 @@ struct SWR_CONTEXT
uint32_t curStateId; // Current index to the next available entry in the DS ring.
uint32_t NumWorkerThreads;
+ uint32_t NumFEThreads;
+ uint32_t NumBEThreads;
THREAD_POOL threadPool; // Thread pool associated with this context
@@ -481,6 +485,7 @@ struct SWR_CONTEXT
uint8_t* pScratch[KNOB_MAX_NUM_THREADS];
CachingAllocator cachingArenaAllocator;
+ uint32_t frameCount;
};
void WaitForDependencies(SWR_CONTEXT *pContext, uint64_t drawId);
diff --git a/src/gallium/drivers/swr/rasterizer/core/depthstencil.h b/src/gallium/drivers/swr/rasterizer/core/depthstencil.h
index 2cc9d4054ac..7b55580bf0a 100644
--- a/src/gallium/drivers/swr/rasterizer/core/depthstencil.h
+++ b/src/gallium/drivers/swr/rasterizer/core/depthstencil.h
@@ -80,14 +80,52 @@ void StencilOp(SWR_STENCILOP op, simdscalar mask, simdscalar stencilRefps, simds
}
+template<SWR_FORMAT depthFormatT>
+simdscalar QuantizeDepth(simdscalar depth)
+{
+ SWR_TYPE depthType = FormatTraits<depthFormatT>::GetType(0);
+ uint32_t depthBpc = FormatTraits<depthFormatT>::GetBPC(0);
+
+ if (depthType == SWR_TYPE_FLOAT)
+ {
+ // assume only 32bit float depth supported
+ SWR_ASSERT(depthBpc == 32);
+
+ // matches shader precision, no quantizing needed
+ return depth;
+ }
+
+ // should be unorm depth if not float
+ SWR_ASSERT(depthType == SWR_TYPE_UNORM);
+
+ float quantize = (float)((1 << depthBpc) - 1);
+ simdscalar result = _simd_mul_ps(depth, _simd_set1_ps(quantize));
+ result = _simd_add_ps(result, _simd_set1_ps(0.5f));
+ result = _simd_round_ps(result, _MM_FROUND_TO_ZERO);
+
+ if (depthBpc > 16)
+ {
+ result = _simd_div_ps(result, _simd_set1_ps(quantize));
+ }
+ else
+ {
+ result = _simd_mul_ps(result, _simd_set1_ps(1.0f / quantize));
+ }
+
+ return result;
+}
+
INLINE
-simdscalar DepthStencilTest(const SWR_VIEWPORT* pViewport, const SWR_DEPTH_STENCIL_STATE* pDSState,
+simdscalar DepthStencilTest(const API_STATE* pState,
bool frontFacing, simdscalar interpZ, uint8_t* pDepthBase, simdscalar coverageMask, uint8_t *pStencilBase,
simdscalar* pStencilMask)
{
static_assert(KNOB_DEPTH_HOT_TILE_FORMAT == R32_FLOAT, "Unsupported depth hot tile format");
static_assert(KNOB_STENCIL_HOT_TILE_FORMAT == R8_UINT, "Unsupported stencil hot tile format");
+ const SWR_DEPTH_STENCIL_STATE* pDSState = &pState->depthStencilState;
+ const SWR_VIEWPORT* pViewport = &pState->vp[0];
+
simdscalar depthResult = _simd_set1_ps(-1.0f);
simdscalar zbuf;
diff --git a/src/gallium/drivers/swr/rasterizer/core/frontend.cpp b/src/gallium/drivers/swr/rasterizer/core/frontend.cpp
index 36721e00beb..93869610ff9 100644
--- a/src/gallium/drivers/swr/rasterizer/core/frontend.cpp
+++ b/src/gallium/drivers/swr/rasterizer/core/frontend.cpp
@@ -793,8 +793,14 @@ static void GeometryShaderStage(
uint8_t* pCutBase = pCutBufferBase + instance * cutInstanceStride;
DWORD numAttribs;
- _BitScanReverse(&numAttribs, state.feAttribMask);
- numAttribs++;
+ if (_BitScanReverse(&numAttribs, state.feAttribMask))
+ {
+ numAttribs++;
+ }
+ else
+ {
+ numAttribs = 0;
+ }
for (uint32_t stream = 0; stream < MAX_SO_STREAMS; ++stream)
{
diff --git a/src/gallium/drivers/swr/rasterizer/core/knobs.h b/src/gallium/drivers/swr/rasterizer/core/knobs.h
index d7feb86273d..55a22a67f4c 100644
--- a/src/gallium/drivers/swr/rasterizer/core/knobs.h
+++ b/src/gallium/drivers/swr/rasterizer/core/knobs.h
@@ -45,14 +45,17 @@
#define KNOB_ARCH_ISA AVX
#define KNOB_ARCH_STR "AVX"
#define KNOB_SIMD_WIDTH 8
+#define KNOB_SIMD_BYTES 32
#elif (KNOB_ARCH == KNOB_ARCH_AVX2)
#define KNOB_ARCH_ISA AVX2
#define KNOB_ARCH_STR "AVX2"
#define KNOB_SIMD_WIDTH 8
+#define KNOB_SIMD_BYTES 32
#elif (KNOB_ARCH == KNOB_ARCH_AVX512)
#define KNOB_ARCH_ISA AVX512F
#define KNOB_ARCH_STR "AVX512"
#define KNOB_SIMD_WIDTH 16
+#define KNOB_SIMD_BYTES 64
#error "AVX512 not yet supported"
#else
#error "Unknown architecture"
diff --git a/src/gallium/drivers/swr/rasterizer/core/pa.h b/src/gallium/drivers/swr/rasterizer/core/pa.h
index f8f1a33b7e3..17f488538d6 100644
--- a/src/gallium/drivers/swr/rasterizer/core/pa.h
+++ b/src/gallium/drivers/swr/rasterizer/core/pa.h
@@ -1017,13 +1017,13 @@ struct PA_TESS : PA_STATE
{
SWR_ASSERT(numPrims <= KNOB_SIMD_WIDTH);
#if KNOB_SIMD_WIDTH == 8
- static const OSALIGN(int32_t, 64) maskGen[KNOB_SIMD_WIDTH * 2] =
+ static const OSALIGNLINE(int32_t) maskGen[KNOB_SIMD_WIDTH * 2] =
{
-1, -1, -1, -1, -1, -1, -1, -1,
0, 0, 0, 0, 0, 0, 0, 0
};
#elif KNOB_SIMD_WIDTH == 16
- static const OSALIGN(int32_t, 128) maskGen[KNOB_SIMD_WIDTH * 2] =
+ static const OSALIGNLINE(int32_t) maskGen[KNOB_SIMD_WIDTH * 2] =
{
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
@@ -1167,8 +1167,14 @@ struct PA_FACTORY
{
memset(&indexStore, 0, sizeof(indexStore));
DWORD numAttribs;
- _BitScanReverse(&numAttribs, state.feAttribMask);
- numAttribs++;
+ if (_BitScanReverse(&numAttribs, state.feAttribMask))
+ {
+ numAttribs++;
+ }
+ else
+ {
+ numAttribs = 0;
+ }
new (&this->paCut) PA_STATE_CUT(pDC, (uint8_t*)&this->vertexStore[0], MAX_NUM_VERTS_PER_PRIM * KNOB_SIMD_WIDTH,
&this->indexStore[0], numVerts, numAttribs, state.topology, false);
cutPA = true;
diff --git a/src/gallium/drivers/swr/rasterizer/core/rasterizer.cpp b/src/gallium/drivers/swr/rasterizer/core/rasterizer.cpp
index 52fb7c88cdd..3144a901c91 100644
--- a/src/gallium/drivers/swr/rasterizer/core/rasterizer.cpp
+++ b/src/gallium/drivers/swr/rasterizer/core/rasterizer.cpp
@@ -383,7 +383,7 @@ __declspec(thread) volatile uint64_t gToss;
static const uint32_t vertsPerTri = 3, componentsPerAttrib = 4;
// try to avoid _chkstk insertions; make this thread local
-static THREAD OSALIGN(float, 16) perspAttribsTLS[vertsPerTri * KNOB_NUM_ATTRIBUTES * componentsPerAttrib];
+static THREAD OSALIGNLINE(float) perspAttribsTLS[vertsPerTri * KNOB_NUM_ATTRIBUTES * componentsPerAttrib];
INLINE
void ComputeEdgeData(int32_t a, int32_t b, EDGE& edge)
@@ -439,7 +439,7 @@ void RasterizeTriangle(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroTile,
const SWR_RASTSTATE &rastState = state.rastState;
const BACKEND_FUNCS& backendFuncs = pDC->pState->backendFuncs;
- OSALIGN(SWR_TRIANGLE_DESC, 16) triDesc;
+ OSALIGNSIMD(SWR_TRIANGLE_DESC) triDesc;
triDesc.pUserClipBuffer = workDesc.pUserClipBuffer;
__m128 vX, vY, vZ, vRecipW;
@@ -502,7 +502,7 @@ void RasterizeTriangle(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroTile,
_MM_EXTRACT_FLOAT(triDesc.J[1], vB, 2);
_MM_EXTRACT_FLOAT(triDesc.J[2], vC, 2);
- OSALIGN(float, 16) oneOverW[4];
+ OSALIGNSIMD(float) oneOverW[4];
_mm_store_ps(oneOverW, vRecipW);
triDesc.OneOverW[0] = oneOverW[0] - oneOverW[2];
triDesc.OneOverW[1] = oneOverW[1] - oneOverW[2];
@@ -537,7 +537,7 @@ void RasterizeTriangle(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroTile,
// compute bary Z
// zInterp = zVert0 + i(zVert1-zVert0) + j (zVert2 - zVert0)
- OSALIGN(float, 16) a[4];
+ OSALIGNSIMD(float) a[4];
_mm_store_ps(a, vZ);
triDesc.Z[0] = a[0] - a[2];
triDesc.Z[1] = a[1] - a[2];
@@ -575,7 +575,7 @@ void RasterizeTriangle(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroTile,
}
// Calc bounding box of triangle
- OSALIGN(BBOX, 16) bbox;
+ OSALIGNSIMD(BBOX) bbox;
calcBoundingBoxInt(vXi, vYi, bbox);
// Intersect with scissor/viewport
@@ -594,7 +594,7 @@ void RasterizeTriangle(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroTile,
int32_t macroBoxTop = macroY * KNOB_MACROTILE_Y_DIM_FIXED;
int32_t macroBoxBottom = macroBoxTop + KNOB_MACROTILE_Y_DIM_FIXED - 1;
- OSALIGN(BBOX, 16) intersect;
+ OSALIGNSIMD(BBOX) intersect;
intersect.left = std::max(bbox.left, macroBoxLeft);
intersect.top = std::max(bbox.top, macroBoxTop);
intersect.right = std::min(bbox.right, macroBoxRight);
@@ -1047,7 +1047,7 @@ void RasterizeSimplePoint(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTi
{ 50, 51, 54, 55, 58, 59, 62, 63 }
};
- OSALIGN(SWR_TRIANGLE_DESC, 16) triDesc;
+ OSALIGNSIMD(SWR_TRIANGLE_DESC) triDesc;
// pull point information from triangle buffer
// @todo use structs for readability
@@ -1286,7 +1286,7 @@ void RasterizeLine(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile, voi
// make sure this macrotile intersects the triangle
__m128i vXai = fpToFixedPoint(vXa);
__m128i vYai = fpToFixedPoint(vYa);
- OSALIGN(BBOX, 16) bboxA;
+ OSALIGNSIMD(BBOX) bboxA;
calcBoundingBoxInt(vXai, vYai, bboxA);
if (!(bboxA.left > macroBoxRight ||
diff --git a/src/gallium/drivers/swr/rasterizer/core/state.h b/src/gallium/drivers/swr/rasterizer/core/state.h
index 5752094ca10..50361068025 100644
--- a/src/gallium/drivers/swr/rasterizer/core/state.h
+++ b/src/gallium/drivers/swr/rasterizer/core/state.h
@@ -790,6 +790,7 @@ typedef void(__cdecl *PFN_SO_FUNC)(SWR_STREAMOUT_CONTEXT& soContext);
typedef void(__cdecl *PFN_PIXEL_KERNEL)(HANDLE hPrivateData, SWR_PS_CONTEXT *pContext);
typedef void(__cdecl *PFN_CPIXEL_KERNEL)(HANDLE hPrivateData, SWR_PS_CONTEXT *pContext);
typedef void(__cdecl *PFN_BLEND_JIT_FUNC)(const SWR_BLEND_STATE*, simdvector&, simdvector&, uint32_t, uint8_t*, simdvector&, simdscalari*, simdscalari*);
+typedef simdscalar(*PFN_QUANTIZE_DEPTH)(simdscalar);
//////////////////////////////////////////////////////////////////////////
/// FRONTEND_STATE
diff --git a/src/gallium/drivers/swr/rasterizer/core/threads.cpp b/src/gallium/drivers/swr/rasterizer/core/threads.cpp
index 07bc94a1a54..4b7a207f366 100644
--- a/src/gallium/drivers/swr/rasterizer/core/threads.cpp
+++ b/src/gallium/drivers/swr/rasterizer/core/threads.cpp
@@ -68,7 +68,10 @@ void CalculateProcessorTopology(CPUNumaNodes& out_nodes, uint32_t& out_numThread
#if defined(_WIN32)
- SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX buffer[KNOB_MAX_NUM_THREADS];
+ static std::mutex m;
+ std::lock_guard<std::mutex> l(m);
+
+ static SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX buffer[KNOB_MAX_NUM_THREADS];
DWORD bufSize = sizeof(buffer);
BOOL ret = GetLogicalProcessorInformationEx(RelationProcessorCore, buffer, &bufSize);
@@ -288,7 +291,10 @@ INLINE int64_t CompleteDrawContext(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC)
{
// Cleanup memory allocations
pDC->pArena->Reset(true);
- pDC->pTileMgr->initialize();
+ if (!pDC->isCompute)
+ {
+ pDC->pTileMgr->initialize();
+ }
if (pDC->cleanupState)
{
pDC->pState->pArena->Reset(true);
@@ -302,10 +308,10 @@ INLINE int64_t CompleteDrawContext(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC)
return result;
}
-INLINE bool FindFirstIncompleteDraw(SWR_CONTEXT* pContext, uint64_t& curDrawBE)
+INLINE bool FindFirstIncompleteDraw(SWR_CONTEXT* pContext, uint64_t& curDrawBE, uint64_t& drawEnqueued)
{
// increment our current draw id to the first incomplete draw
- uint64_t drawEnqueued = GetEnqueuedDraw(pContext);
+ drawEnqueued = GetEnqueuedDraw(pContext);
while (curDrawBE < drawEnqueued)
{
DRAW_CONTEXT *pDC = &pContext->dcRing[curDrawBE % KNOB_MAX_DRAWS_IN_FLIGHT];
@@ -313,8 +319,9 @@ INLINE bool FindFirstIncompleteDraw(SWR_CONTEXT* pContext, uint64_t& curDrawBE)
// If its not compute and FE is not done then break out of loop.
if (!pDC->doneFE && !pDC->isCompute) break;
- bool isWorkComplete = (pDC->isCompute) ?
- pDC->pDispatch->isWorkComplete() : pDC->pTileMgr->isWorkComplete();
+ bool isWorkComplete = pDC->isCompute ?
+ pDC->pDispatch->isWorkComplete() :
+ pDC->pTileMgr->isWorkComplete();
if (isWorkComplete)
{
@@ -355,7 +362,8 @@ void WorkOnFifoBE(
{
// Find the first incomplete draw that has pending work. If no such draw is found then
// return. FindFirstIncompleteDraw is responsible for incrementing the curDrawBE.
- if (FindFirstIncompleteDraw(pContext, curDrawBE) == false)
+ uint64_t drawEnqueued = 0;
+ if (FindFirstIncompleteDraw(pContext, curDrawBE, drawEnqueued) == false)
{
return;
}
@@ -370,7 +378,7 @@ void WorkOnFifoBE(
// 2. If we're trying to work on draws after curDrawBE, we are restricted to
// working on those macrotiles that are known to be complete in the prior draw to
// maintain order. The locked tiles provides the history to ensures this.
- for (uint64_t i = curDrawBE; i < GetEnqueuedDraw(pContext); ++i)
+ for (uint64_t i = curDrawBE; i < drawEnqueued; ++i)
{
DRAW_CONTEXT *pDC = &pContext->dcRing[i % KNOB_MAX_DRAWS_IN_FLIGHT];
@@ -463,7 +471,7 @@ void WorkOnFifoBE(
}
}
-void WorkOnFifoFE(SWR_CONTEXT *pContext, uint32_t workerId, uint64_t &curDrawFE, uint32_t numaNode)
+void WorkOnFifoFE(SWR_CONTEXT *pContext, uint32_t workerId, uint64_t &curDrawFE)
{
// Try to grab the next DC from the ring
uint64_t drawEnqueued = GetEnqueuedDraw(pContext);
@@ -516,38 +524,44 @@ void WorkOnCompute(
uint32_t workerId,
uint64_t& curDrawBE)
{
- if (FindFirstIncompleteDraw(pContext, curDrawBE) == false)
+ uint64_t drawEnqueued = 0;
+ if (FindFirstIncompleteDraw(pContext, curDrawBE, drawEnqueued) == false)
{
return;
}
uint64_t lastRetiredDraw = pContext->dcRing[curDrawBE % KNOB_MAX_DRAWS_IN_FLIGHT].drawId - 1;
- DRAW_CONTEXT *pDC = &pContext->dcRing[curDrawBE % KNOB_MAX_DRAWS_IN_FLIGHT];
- if (pDC->isCompute == false) return;
-
- // check dependencies
- if (CheckDependency(pContext, pDC, lastRetiredDraw))
+ for (uint64_t i = curDrawBE; curDrawBE < drawEnqueued; ++i)
{
- return;
- }
+ DRAW_CONTEXT *pDC = &pContext->dcRing[i % KNOB_MAX_DRAWS_IN_FLIGHT];
+ if (pDC->isCompute == false) return;
- SWR_ASSERT(pDC->pDispatch != nullptr);
- DispatchQueue& queue = *pDC->pDispatch;
+ // check dependencies
+ if (CheckDependency(pContext, pDC, lastRetiredDraw))
+ {
+ return;
+ }
- // Is there any work remaining?
- if (queue.getNumQueued() > 0)
- {
- uint32_t threadGroupId = 0;
- while (queue.getWork(threadGroupId))
+ SWR_ASSERT(pDC->pDispatch != nullptr);
+ DispatchQueue& queue = *pDC->pDispatch;
+
+ // Is there any work remaining?
+ if (queue.getNumQueued() > 0)
{
- ProcessComputeBE(pDC, workerId, threadGroupId);
+ void* pSpillFillBuffer = nullptr;
+ uint32_t threadGroupId = 0;
+ while (queue.getWork(threadGroupId))
+ {
+ ProcessComputeBE(pDC, workerId, threadGroupId, pSpillFillBuffer);
- queue.finishedWork();
+ queue.finishedWork();
+ }
}
}
}
+template<bool IsFEThread, bool IsBEThread>
DWORD workerThreadMain(LPVOID pData)
{
THREAD_DATA *pThreadData = (THREAD_DATA*)pData;
@@ -631,25 +645,38 @@ DWORD workerThreadMain(LPVOID pData)
}
}
- RDTSC_START(WorkerWorkOnFifoBE);
- WorkOnFifoBE(pContext, workerId, curDrawBE, lockedTiles, numaNode, numaMask);
- RDTSC_STOP(WorkerWorkOnFifoBE, 0, 0);
+ if (IsBEThread)
+ {
+ RDTSC_START(WorkerWorkOnFifoBE);
+ WorkOnFifoBE(pContext, workerId, curDrawBE, lockedTiles, numaNode, numaMask);
+ RDTSC_STOP(WorkerWorkOnFifoBE, 0, 0);
- WorkOnCompute(pContext, workerId, curDrawBE);
+ WorkOnCompute(pContext, workerId, curDrawBE);
+ }
- WorkOnFifoFE(pContext, workerId, curDrawFE, numaNode);
+ if (IsFEThread)
+ {
+ WorkOnFifoFE(pContext, workerId, curDrawFE);
+
+ if (!IsBEThread)
+ {
+ curDrawBE = curDrawFE;
+ }
+ }
}
return 0;
}
+template<> DWORD workerThreadMain<false, false>(LPVOID) = delete;
+template <bool IsFEThread, bool IsBEThread>
DWORD workerThreadInit(LPVOID pData)
{
#if defined(_WIN32)
__try
#endif // _WIN32
{
- return workerThreadMain(pData);
+ return workerThreadMain<IsFEThread, IsBEThread>(pData);
}
#if defined(_WIN32)
@@ -661,6 +688,7 @@ DWORD workerThreadInit(LPVOID pData)
return 1;
}
+template<> DWORD workerThreadInit<false, false>(LPVOID pData) = delete;
void CreateThreadPool(SWR_CONTEXT *pContext, THREAD_POOL *pPool)
{
@@ -678,6 +706,16 @@ void CreateThreadPool(SWR_CONTEXT *pContext, THREAD_POOL *pPool)
uint32_t numCoresPerNode = numHWCoresPerNode;
uint32_t numHyperThreads = numHWHyperThreads;
+ if (KNOB_MAX_WORKER_THREADS)
+ {
+ SET_KNOB(HYPERTHREADED_FE, false);
+ }
+
+ if (KNOB_HYPERTHREADED_FE)
+ {
+ SET_KNOB(MAX_THREADS_PER_CORE, 0);
+ }
+
if (KNOB_MAX_NUMA_NODES)
{
numNodes = std::min(numNodes, KNOB_MAX_NUMA_NODES);
@@ -693,6 +731,11 @@ void CreateThreadPool(SWR_CONTEXT *pContext, THREAD_POOL *pPool)
numHyperThreads = std::min(numHyperThreads, KNOB_MAX_THREADS_PER_CORE);
}
+ if (numHyperThreads < 2)
+ {
+ SET_KNOB(HYPERTHREADED_FE, false);
+ }
+
// Calculate numThreads
uint32_t numThreads = numNodes * numCoresPerNode * numHyperThreads;
@@ -767,9 +810,14 @@ void CreateThreadPool(SWR_CONTEXT *pContext, THREAD_POOL *pPool)
pPool->pThreadData[workerId].procGroupId = workerId % numProcGroups;
pPool->pThreadData[workerId].threadId = 0;
pPool->pThreadData[workerId].numaId = 0;
+ pPool->pThreadData[workerId].coreId = 0;
+ pPool->pThreadData[workerId].htId = 0;
pPool->pThreadData[workerId].pContext = pContext;
pPool->pThreadData[workerId].forceBindProcGroup = bForceBindProcGroup;
- pPool->threads[workerId] = new std::thread(workerThreadInit, &pPool->pThreadData[workerId]);
+ pPool->threads[workerId] = new std::thread(workerThreadInit<true, true>, &pPool->pThreadData[workerId]);
+
+ pContext->NumBEThreads++;
+ pContext->NumFEThreads++;
}
}
else
@@ -780,6 +828,10 @@ void CreateThreadPool(SWR_CONTEXT *pContext, THREAD_POOL *pPool)
for (uint32_t n = 0; n < numNodes; ++n)
{
auto& node = nodes[n];
+ if (node.cores.size() == 0)
+ {
+ continue;
+ }
uint32_t numCores = numCoresPerNode;
for (uint32_t c = 0; c < numCores; ++c)
@@ -797,8 +849,29 @@ void CreateThreadPool(SWR_CONTEXT *pContext, THREAD_POOL *pPool)
pPool->pThreadData[workerId].procGroupId = core.procGroup;
pPool->pThreadData[workerId].threadId = core.threadIds[t];
pPool->pThreadData[workerId].numaId = n;
+ pPool->pThreadData[workerId].coreId = c;
+ pPool->pThreadData[workerId].htId = t;
pPool->pThreadData[workerId].pContext = pContext;
- pPool->threads[workerId] = new std::thread(workerThreadInit, &pPool->pThreadData[workerId]);
+
+ if (KNOB_HYPERTHREADED_FE)
+ {
+ if (t == 0)
+ {
+ pContext->NumBEThreads++;
+ pPool->threads[workerId] = new std::thread(workerThreadInit<false, true>, &pPool->pThreadData[workerId]);
+ }
+ else
+ {
+ pContext->NumFEThreads++;
+ pPool->threads[workerId] = new std::thread(workerThreadInit<true, false>, &pPool->pThreadData[workerId]);
+ }
+ }
+ else
+ {
+ pPool->threads[workerId] = new std::thread(workerThreadInit<true, true>, &pPool->pThreadData[workerId]);
+ pContext->NumBEThreads++;
+ pContext->NumFEThreads++;
+ }
++workerId;
}
diff --git a/src/gallium/drivers/swr/rasterizer/core/threads.h b/src/gallium/drivers/swr/rasterizer/core/threads.h
index 821d7dcb16e..3aba6323a95 100644
--- a/src/gallium/drivers/swr/rasterizer/core/threads.h
+++ b/src/gallium/drivers/swr/rasterizer/core/threads.h
@@ -41,6 +41,8 @@ struct THREAD_DATA
uint32_t procGroupId; // Will always be 0 for non-Windows OS
uint32_t threadId; // within the procGroup for Windows
uint32_t numaId; // NUMA node id
+ uint32_t coreId; // Core id
+ uint32_t htId; // Hyperthread id
uint32_t workerId;
SWR_CONTEXT *pContext;
bool forceBindProcGroup; // Only useful when KNOB_MAX_WORKER_THREADS is set.
@@ -62,7 +64,7 @@ void CreateThreadPool(SWR_CONTEXT *pContext, THREAD_POOL *pPool);
void DestroyThreadPool(SWR_CONTEXT *pContext, THREAD_POOL *pPool);
// Expose FE and BE worker functions to the API thread if single threaded
-void WorkOnFifoFE(SWR_CONTEXT *pContext, uint32_t workerId, uint64_t &curDrawFE, uint32_t numaNode);
+void WorkOnFifoFE(SWR_CONTEXT *pContext, uint32_t workerId, uint64_t &curDrawFE);
void WorkOnFifoBE(SWR_CONTEXT *pContext, uint32_t workerId, uint64_t &curDrawBE, TileSet &usedTiles, uint32_t numaNode, uint32_t numaMask);
void WorkOnCompute(SWR_CONTEXT *pContext, uint32_t workerId, uint64_t &curDrawBE);
int64_t CompleteDrawContext(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC); \ No newline at end of file
diff --git a/src/gallium/drivers/swr/rasterizer/core/tilemgr.cpp b/src/gallium/drivers/swr/rasterizer/core/tilemgr.cpp
index 794577270cf..87d9f42c032 100644
--- a/src/gallium/drivers/swr/rasterizer/core/tilemgr.cpp
+++ b/src/gallium/drivers/swr/rasterizer/core/tilemgr.cpp
@@ -35,27 +35,6 @@
#define TILE_ID(x,y) ((x << 16 | y))
-// override new/delete for alignment
-void *MacroTileMgr::operator new(size_t size)
-{
- return _aligned_malloc(size, 64);
-}
-
-void MacroTileMgr::operator delete(void *p)
-{
- _aligned_free(p);
-}
-
-void* DispatchQueue::operator new(size_t size)
-{
- return _aligned_malloc(size, 64);
-}
-
-void DispatchQueue::operator delete(void *p)
-{
- _aligned_free(p);
-}
-
MacroTileMgr::MacroTileMgr(CachingArena& arena) : mArena(arena)
{
}
@@ -304,7 +283,6 @@ void HotTileMgr::ClearStencilHotTile(const HOTTILE* pHotTile)
void HotTileMgr::InitializeHotTiles(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC, uint32_t macroID)
{
const API_STATE& state = GetApiState(pDC);
- HotTileMgr *pHotTileMgr = pContext->pHotTileMgr;
uint32_t x, y;
MacroTileMgr::getTileIndices(macroID, x, y);
diff --git a/src/gallium/drivers/swr/rasterizer/core/tilemgr.h b/src/gallium/drivers/swr/rasterizer/core/tilemgr.h
index aa561badc1c..82a15e16a33 100644
--- a/src/gallium/drivers/swr/rasterizer/core/tilemgr.h
+++ b/src/gallium/drivers/swr/rasterizer/core/tilemgr.h
@@ -140,9 +140,6 @@ public:
x = (tileID >> 16) & 0xffff;
}
- void *operator new(size_t size);
- void operator delete (void *p);
-
private:
CachingArena& mArena;
std::unordered_map<uint32_t, MacroTileQueue> mTiles;
@@ -229,9 +226,6 @@ public:
return mpTaskData;
}
- void *operator new(size_t size);
- void operator delete (void *p);
-
void* mpTaskData{ nullptr }; // The API thread will set this up and the callback task function will interpet this.
OSALIGNLINE(volatile LONG) mTasksAvailable{ 0 };
@@ -272,7 +266,7 @@ class HotTileMgr
public:
HotTileMgr()
{
- memset(&mHotTiles[0][0], 0, sizeof(mHotTiles));
+ memset(mHotTiles, 0, sizeof(mHotTiles));
// cache hottile size
for (uint32_t i = SWR_ATTACHMENT_COLOR0; i <= SWR_ATTACHMENT_COLOR7; ++i)
diff --git a/src/gallium/drivers/swr/rasterizer/scripts/knob_defs.py b/src/gallium/drivers/swr/rasterizer/scripts/knob_defs.py
index 0f3ded68544..3832b91d93e 100644
--- a/src/gallium/drivers/swr/rasterizer/scripts/knob_defs.py
+++ b/src/gallium/drivers/swr/rasterizer/scripts/knob_defs.py
@@ -30,6 +30,18 @@ KNOBS = [
'category' : 'debug',
}],
+ ['HYPERTHREADED_FE', {
+ 'type' : 'bool',
+ 'default' : 'false',
+ 'desc' : ['EXPERIMENTAL!!',
+ 'If enabled will attempt to use secondary threads per core to perform',
+ 'front-end (VS/GS) work.',
+ '',
+ 'Note: Setting this will cause KNOB_MAX_THREADS_PER_CORE to be ignored.'],
+ 'category' : 'perf',
+ 'advanced' : 'true',
+ }],
+
['DUMP_SHADER_IR', {
'type' : 'bool',
'default' : 'false',
@@ -166,6 +178,7 @@ KNOBS = [
'',
'NOTE: Requires KNOB_ENABLE_TOSS_POINTS to be enabled in core/knobs.h'],
'category' : 'perf',
+ 'advanced' : 'true',
}],
['TOSS_FETCH', {
@@ -175,6 +188,7 @@ KNOBS = [
'',
'NOTE: Requires KNOB_ENABLE_TOSS_POINTS to be enabled in core/knobs.h'],
'category' : 'perf',
+ 'advanced' : 'true',
}],
['TOSS_IA', {
@@ -184,6 +198,7 @@ KNOBS = [
'',
'NOTE: Requires KNOB_ENABLE_TOSS_POINTS to be enabled in core/knobs.h'],
'category' : 'perf',
+ 'advanced' : 'true',
}],
['TOSS_VS', {
@@ -193,6 +208,7 @@ KNOBS = [
'',
'NOTE: Requires KNOB_ENABLE_TOSS_POINTS to be enabled in core/knobs.h'],
'category' : 'perf',
+ 'advanced' : 'true',
}],
['TOSS_SETUP_TRIS', {
@@ -202,6 +218,7 @@ KNOBS = [
'',
'NOTE: Requires KNOB_ENABLE_TOSS_POINTS to be enabled in core/knobs.h'],
'category' : 'perf',
+ 'advanced' : 'true',
}],
['TOSS_BIN_TRIS', {
@@ -211,6 +228,7 @@ KNOBS = [
'',
'NOTE: Requires KNOB_ENABLE_TOSS_POINTS to be enabled in core/knobs.h'],
'category' : 'perf',
+ 'advanced' : 'true',
}],
['TOSS_RS', {
@@ -220,4 +238,5 @@ KNOBS = [
'',
'NOTE: Requires KNOB_ENABLE_TOSS_POINTS to be enabled in core/knobs.h'],
'category' : 'perf',
+ 'advanced' : 'true',
}],]
diff --git a/src/gallium/drivers/swr/swr_query.cpp b/src/gallium/drivers/swr/swr_query.cpp
index 810c50b2f8f..e4b8b683278 100644
--- a/src/gallium/drivers/swr/swr_query.cpp
+++ b/src/gallium/drivers/swr/swr_query.cpp
@@ -319,6 +319,12 @@ swr_check_render_cond(struct pipe_context *pipe)
return TRUE;
}
+
+static void
+swr_set_active_query_state(struct pipe_context *pipe, boolean enable)
+{
+}
+
void
swr_query_init(struct pipe_context *pipe)
{
@@ -329,6 +335,7 @@ swr_query_init(struct pipe_context *pipe)
pipe->begin_query = swr_begin_query;
pipe->end_query = swr_end_query;
pipe->get_query_result = swr_get_query_result;
+ pipe->set_active_query_state = swr_set_active_query_state;
ctx->active_queries = 0;
}
diff --git a/src/gallium/drivers/swr/swr_screen.cpp b/src/gallium/drivers/swr/swr_screen.cpp
index f9e52be2367..a0a6324f334 100644
--- a/src/gallium/drivers/swr/swr_screen.cpp
+++ b/src/gallium/drivers/swr/swr_screen.cpp
@@ -337,6 +337,11 @@ swr_get_param(struct pipe_screen *screen, enum pipe_cap param)
case PIPE_CAP_SURFACE_REINTERPRET_BLOCKS:
case PIPE_CAP_QUERY_BUFFER_OBJECT:
case PIPE_CAP_QUERY_MEMORY_INFO:
+ case PIPE_CAP_ROBUST_BUFFER_ACCESS_BEHAVIOR:
+ case PIPE_CAP_PCI_GROUP:
+ case PIPE_CAP_PCI_BUS:
+ case PIPE_CAP_PCI_DEVICE:
+ case PIPE_CAP_PCI_FUNCTION:
return 0;
}
diff --git a/src/gallium/drivers/swr/swr_shader.cpp b/src/gallium/drivers/swr/swr_shader.cpp
index ff16d0f2f11..83e32163ecc 100644
--- a/src/gallium/drivers/swr/swr_shader.cpp
+++ b/src/gallium/drivers/swr/swr_shader.cpp
@@ -40,32 +40,29 @@
#include "swr_state.h"
#include "swr_screen.h"
-bool operator==(const swr_jit_key &lhs, const swr_jit_key &rhs)
+bool operator==(const swr_jit_fs_key &lhs, const swr_jit_fs_key &rhs)
{
return !memcmp(&lhs, &rhs, sizeof(lhs));
}
-void
-swr_generate_fs_key(struct swr_jit_key &key,
- struct swr_context *ctx,
- swr_fragment_shader *swr_fs)
+bool operator==(const swr_jit_vs_key &lhs, const swr_jit_vs_key &rhs)
{
- key.nr_cbufs = ctx->framebuffer.nr_cbufs;
- key.light_twoside = ctx->rasterizer->light_twoside;
- memcpy(&key.vs_output_semantic_name,
- &ctx->vs->info.base.output_semantic_name,
- sizeof(key.vs_output_semantic_name));
- memcpy(&key.vs_output_semantic_idx,
- &ctx->vs->info.base.output_semantic_index,
- sizeof(key.vs_output_semantic_idx));
+ return !memcmp(&lhs, &rhs, sizeof(lhs));
+}
- key.nr_samplers = swr_fs->info.base.file_max[TGSI_FILE_SAMPLER] + 1;
+static void
+swr_generate_sampler_key(const struct lp_tgsi_info &info,
+ struct swr_context *ctx,
+ unsigned shader_type,
+ struct swr_jit_sampler_key &key)
+{
+ key.nr_samplers = info.base.file_max[TGSI_FILE_SAMPLER] + 1;
for (unsigned i = 0; i < key.nr_samplers; i++) {
- if (swr_fs->info.base.file_mask[TGSI_FILE_SAMPLER] & (1 << i)) {
+ if (info.base.file_mask[TGSI_FILE_SAMPLER] & (1 << i)) {
lp_sampler_static_sampler_state(
&key.sampler[i].sampler_state,
- ctx->samplers[PIPE_SHADER_FRAGMENT][i]);
+ ctx->samplers[shader_type][i]);
}
}
@@ -74,28 +71,58 @@ swr_generate_fs_key(struct swr_jit_key &key,
* are dx10-style? Can't really have mixed opcodes, at least not
* if we want to skip the holes here (without rescanning tgsi).
*/
- if (swr_fs->info.base.file_max[TGSI_FILE_SAMPLER_VIEW] != -1) {
+ if (info.base.file_max[TGSI_FILE_SAMPLER_VIEW] != -1) {
key.nr_sampler_views =
- swr_fs->info.base.file_max[TGSI_FILE_SAMPLER_VIEW] + 1;
+ info.base.file_max[TGSI_FILE_SAMPLER_VIEW] + 1;
for (unsigned i = 0; i < key.nr_sampler_views; i++) {
- if (swr_fs->info.base.file_mask[TGSI_FILE_SAMPLER_VIEW] & (1 << i)) {
+ if (info.base.file_mask[TGSI_FILE_SAMPLER_VIEW] & (1 << i)) {
lp_sampler_static_texture_state(
&key.sampler[i].texture_state,
- ctx->sampler_views[PIPE_SHADER_FRAGMENT][i]);
+ ctx->sampler_views[shader_type][i]);
}
}
} else {
key.nr_sampler_views = key.nr_samplers;
for (unsigned i = 0; i < key.nr_sampler_views; i++) {
- if (swr_fs->info.base.file_mask[TGSI_FILE_SAMPLER] & (1 << i)) {
+ if (info.base.file_mask[TGSI_FILE_SAMPLER] & (1 << i)) {
lp_sampler_static_texture_state(
&key.sampler[i].texture_state,
- ctx->sampler_views[PIPE_SHADER_FRAGMENT][i]);
+ ctx->sampler_views[shader_type][i]);
}
}
}
}
+void
+swr_generate_fs_key(struct swr_jit_fs_key &key,
+ struct swr_context *ctx,
+ swr_fragment_shader *swr_fs)
+{
+ memset(&key, 0, sizeof(key));
+
+ key.nr_cbufs = ctx->framebuffer.nr_cbufs;
+ key.light_twoside = ctx->rasterizer->light_twoside;
+ key.flatshade = ctx->rasterizer->flatshade;
+ memcpy(&key.vs_output_semantic_name,
+ &ctx->vs->info.base.output_semantic_name,
+ sizeof(key.vs_output_semantic_name));
+ memcpy(&key.vs_output_semantic_idx,
+ &ctx->vs->info.base.output_semantic_index,
+ sizeof(key.vs_output_semantic_idx));
+
+ swr_generate_sampler_key(swr_fs->info, ctx, PIPE_SHADER_FRAGMENT, key);
+}
+
+void
+swr_generate_vs_key(struct swr_jit_vs_key &key,
+ struct swr_context *ctx,
+ swr_vertex_shader *swr_vs)
+{
+ memset(&key, 0, sizeof(key));
+
+ swr_generate_sampler_key(swr_vs->info, ctx, PIPE_SHADER_VERTEX, key);
+}
+
struct BuilderSWR : public Builder {
BuilderSWR(JitManager *pJitMgr)
: Builder(pJitMgr)
@@ -103,14 +130,15 @@ struct BuilderSWR : public Builder {
pJitMgr->SetupNewModule();
}
- PFN_VERTEX_FUNC
- CompileVS(struct pipe_context *ctx, swr_vertex_shader *swr_vs);
- PFN_PIXEL_KERNEL CompileFS(struct swr_context *ctx, swr_jit_key &key);
+ PFN_VERTEX_FUNC CompileVS(struct swr_context *ctx, swr_jit_vs_key &key);
+ PFN_PIXEL_KERNEL CompileFS(struct swr_context *ctx, swr_jit_fs_key &key);
};
PFN_VERTEX_FUNC
-BuilderSWR::CompileVS(struct pipe_context *ctx, swr_vertex_shader *swr_vs)
+BuilderSWR::CompileVS(struct swr_context *ctx, swr_jit_vs_key &key)
{
+ struct swr_vertex_shader *swr_vs = ctx->vs;
+
swr_vs->linkageMask = 0;
for (unsigned i = 0; i < swr_vs->info.base.num_outputs; i++) {
@@ -180,6 +208,9 @@ BuilderSWR::CompileVS(struct pipe_context *ctx, swr_vertex_shader *swr_vs)
}
}
+ struct lp_build_sampler_soa *sampler =
+ swr_sampler_soa_create(key.sampler, PIPE_SHADER_VERTEX);
+
struct lp_bld_tgsi_system_values system_values;
memset(&system_values, 0, sizeof(system_values));
system_values.instance_id = wrap(LOAD(pVsCtx, {0, SWR_VS_CONTEXT_InstanceID}));
@@ -194,9 +225,9 @@ BuilderSWR::CompileVS(struct pipe_context *ctx, swr_vertex_shader *swr_vs)
&system_values,
inputs,
outputs,
- NULL, // wrap(hPrivateData), (sampler context)
+ wrap(hPrivateData), // (sampler context)
NULL, // thread data
- NULL, // sampler
+ sampler, // sampler
&swr_vs->info.base,
NULL); // geometry shader face
@@ -239,11 +270,11 @@ BuilderSWR::CompileVS(struct pipe_context *ctx, swr_vertex_shader *swr_vs)
}
PFN_VERTEX_FUNC
-swr_compile_vs(struct pipe_context *ctx, swr_vertex_shader *swr_vs)
+swr_compile_vs(struct swr_context *ctx, swr_jit_vs_key &key)
{
BuilderSWR builder(
- reinterpret_cast<JitManager *>(swr_screen(ctx->screen)->hJitMgr));
- return builder.CompileVS(ctx, swr_vs);
+ reinterpret_cast<JitManager *>(swr_screen(ctx->pipe.screen)->hJitMgr));
+ return builder.CompileVS(ctx, key);
}
static unsigned
@@ -269,7 +300,7 @@ locate_linkage(ubyte name, ubyte index, struct tgsi_shader_info *info)
}
PFN_PIXEL_KERNEL
-BuilderSWR::CompileFS(struct swr_context *ctx, swr_jit_key &key)
+BuilderSWR::CompileFS(struct swr_context *ctx, swr_jit_fs_key &key)
{
struct swr_fragment_shader *swr_fs = ctx->fs;
@@ -461,6 +492,9 @@ BuilderSWR::CompileFS(struct swr_context *ctx, swr_jit_key &key)
if (interpMode == TGSI_INTERPOLATE_CONSTANT) {
inputs[attrib][channel] = wrap(va);
+ } else if ((interpMode == TGSI_INTERPOLATE_COLOR) &&
+ (key.flatshade == true)) {
+ inputs[attrib][channel] = wrap(vc);
} else {
Value *vk = FSUB(FSUB(VIMMED1(1.0f), vi), vj);
@@ -478,7 +512,7 @@ BuilderSWR::CompileFS(struct swr_context *ctx, swr_jit_key &key)
}
}
- sampler = swr_sampler_soa_create(key.sampler);
+ sampler = swr_sampler_soa_create(key.sampler, PIPE_SHADER_FRAGMENT);
struct lp_bld_tgsi_system_values system_values;
memset(&system_values, 0, sizeof(system_values));
@@ -583,7 +617,7 @@ BuilderSWR::CompileFS(struct swr_context *ctx, swr_jit_key &key)
}
PFN_PIXEL_KERNEL
-swr_compile_fs(struct swr_context *ctx, swr_jit_key &key)
+swr_compile_fs(struct swr_context *ctx, swr_jit_fs_key &key)
{
BuilderSWR builder(
reinterpret_cast<JitManager *>(swr_screen(ctx->pipe.screen)->hJitMgr));
diff --git a/src/gallium/drivers/swr/swr_shader.h b/src/gallium/drivers/swr/swr_shader.h
index e22a7c48c2a..3f79570bbd9 100644
--- a/src/gallium/drivers/swr/swr_shader.h
+++ b/src/gallium/drivers/swr/swr_shader.h
@@ -25,36 +25,56 @@
class swr_vertex_shader;
class swr_fragment_shader;
-class swr_jit_key;
+class swr_jit_fs_key;
+class swr_jit_vs_key;
PFN_VERTEX_FUNC
-swr_compile_vs(struct pipe_context *ctx, swr_vertex_shader *swr_vs);
+swr_compile_vs(struct swr_context *ctx, swr_jit_vs_key &key);
PFN_PIXEL_KERNEL
-swr_compile_fs(struct swr_context *ctx, swr_jit_key &key);
+swr_compile_fs(struct swr_context *ctx, swr_jit_fs_key &key);
-void swr_generate_fs_key(struct swr_jit_key &key,
+void swr_generate_fs_key(struct swr_jit_fs_key &key,
struct swr_context *ctx,
swr_fragment_shader *swr_fs);
-struct swr_jit_key {
+void swr_generate_vs_key(struct swr_jit_vs_key &key,
+ struct swr_context *ctx,
+ swr_vertex_shader *swr_vs);
+
+struct swr_jit_sampler_key {
+ unsigned nr_samplers;
+ unsigned nr_sampler_views;
+ struct swr_sampler_static_state sampler[PIPE_MAX_SHADER_SAMPLER_VIEWS];
+};
+
+struct swr_jit_fs_key : swr_jit_sampler_key {
unsigned nr_cbufs;
unsigned light_twoside;
+ unsigned flatshade;
ubyte vs_output_semantic_name[PIPE_MAX_SHADER_OUTPUTS];
ubyte vs_output_semantic_idx[PIPE_MAX_SHADER_OUTPUTS];
- unsigned nr_samplers;
- unsigned nr_sampler_views;
- struct swr_sampler_static_state sampler[PIPE_MAX_SHADER_SAMPLER_VIEWS];
+};
+
+struct swr_jit_vs_key : swr_jit_sampler_key {
};
namespace std
{
-template <> struct hash<swr_jit_key> {
- std::size_t operator()(const swr_jit_key &k) const
+template <> struct hash<swr_jit_fs_key> {
+ std::size_t operator()(const swr_jit_fs_key &k) const
+ {
+ return util_hash_crc32(&k, sizeof(k));
+ }
+};
+
+template <> struct hash<swr_jit_vs_key> {
+ std::size_t operator()(const swr_jit_vs_key &k) const
{
return util_hash_crc32(&k, sizeof(k));
}
};
};
-bool operator==(const swr_jit_key &lhs, const swr_jit_key &rhs);
+bool operator==(const swr_jit_fs_key &lhs, const swr_jit_fs_key &rhs);
+bool operator==(const swr_jit_vs_key &lhs, const swr_jit_vs_key &rhs);
diff --git a/src/gallium/drivers/swr/swr_state.cpp b/src/gallium/drivers/swr/swr_state.cpp
index e7bf3618a7d..ded51a9b196 100644
--- a/src/gallium/drivers/swr/swr_state.cpp
+++ b/src/gallium/drivers/swr/swr_state.cpp
@@ -317,8 +317,7 @@ static void *
swr_create_vs_state(struct pipe_context *pipe,
const struct pipe_shader_state *vs)
{
- struct swr_vertex_shader *swr_vs =
- (swr_vertex_shader *)CALLOC_STRUCT(swr_vertex_shader);
+ struct swr_vertex_shader *swr_vs = new swr_vertex_shader;
if (!swr_vs)
return NULL;
@@ -327,8 +326,6 @@ swr_create_vs_state(struct pipe_context *pipe,
lp_build_tgsi_info(vs->tokens, &swr_vs->info);
- swr_vs->func = swr_compile_vs(pipe, swr_vs);
-
swr_vs->soState = {0};
if (swr_vs->pipe.stream_output.num_outputs) {
@@ -368,7 +365,7 @@ swr_delete_vs_state(struct pipe_context *pipe, void *vs)
{
struct swr_vertex_shader *swr_vs = (swr_vertex_shader *)vs;
FREE((void *)swr_vs->pipe.tokens);
- FREE(vs);
+ delete swr_vs;
}
static void *
@@ -675,6 +672,58 @@ swr_update_resource_status(struct pipe_context *pipe,
}
}
+static void
+swr_update_texture_state(struct swr_context *ctx,
+ unsigned shader_type,
+ unsigned num_sampler_views,
+ swr_jit_texture *textures)
+{
+ for (unsigned i = 0; i < num_sampler_views; i++) {
+ struct pipe_sampler_view *view =
+ ctx->sampler_views[shader_type][i];
+
+ if (view) {
+ struct pipe_resource *res = view->texture;
+ struct swr_resource *swr_res = swr_resource(res);
+ struct swr_jit_texture *jit_tex = &textures[i];
+ memset(jit_tex, 0, sizeof(*jit_tex));
+ jit_tex->width = res->width0;
+ jit_tex->height = res->height0;
+ jit_tex->depth = res->depth0;
+ jit_tex->first_level = view->u.tex.first_level;
+ jit_tex->last_level = view->u.tex.last_level;
+ jit_tex->base_ptr = swr_res->swr.pBaseAddress;
+
+ for (unsigned level = jit_tex->first_level;
+ level <= jit_tex->last_level;
+ level++) {
+ jit_tex->row_stride[level] = swr_res->row_stride[level];
+ jit_tex->img_stride[level] = swr_res->img_stride[level];
+ jit_tex->mip_offsets[level] = swr_res->mip_offsets[level];
+ }
+ }
+ }
+}
+
+static void
+swr_update_sampler_state(struct swr_context *ctx,
+ unsigned shader_type,
+ unsigned num_samplers,
+ swr_jit_sampler *samplers)
+{
+ for (unsigned i = 0; i < num_samplers; i++) {
+ const struct pipe_sampler_state *sampler =
+ ctx->samplers[shader_type][i];
+
+ if (sampler) {
+ samplers[i].min_lod = sampler->min_lod;
+ samplers[i].max_lod = sampler->max_lod;
+ samplers[i].lod_bias = sampler->lod_bias;
+ COPY_4V(samplers[i].border_color, sampler->border_color.f);
+ }
+ }
+}
+
void
swr_update_derived(struct pipe_context *pipe,
const struct pipe_draw_info *p_draw_info)
@@ -974,14 +1023,43 @@ swr_update_derived(struct pipe_context *pipe,
}
/* VertexShader */
- if (ctx->dirty & (SWR_NEW_VS | SWR_NEW_FRAMEBUFFER)) {
- SwrSetVertexFunc(ctx->swrContext, ctx->vs->func);
+ if (ctx->dirty & (SWR_NEW_VS |
+ SWR_NEW_SAMPLER |
+ SWR_NEW_SAMPLER_VIEW |
+ SWR_NEW_FRAMEBUFFER)) {
+ swr_jit_vs_key key;
+ swr_generate_vs_key(key, ctx, ctx->vs);
+ auto search = ctx->vs->map.find(key);
+ PFN_VERTEX_FUNC func;
+ if (search != ctx->vs->map.end()) {
+ func = search->second;
+ } else {
+ func = swr_compile_vs(ctx, key);
+ ctx->vs->map.insert(std::make_pair(key, func));
+ }
+ SwrSetVertexFunc(ctx->swrContext, func);
+
+ /* JIT sampler state */
+ if (ctx->dirty & SWR_NEW_SAMPLER) {
+ swr_update_sampler_state(ctx,
+ PIPE_SHADER_VERTEX,
+ key.nr_samplers,
+ ctx->swrDC.samplersVS);
+ }
+
+ /* JIT sampler view state */
+ if (ctx->dirty & (SWR_NEW_SAMPLER_VIEW | SWR_NEW_FRAMEBUFFER)) {
+ swr_update_texture_state(ctx,
+ PIPE_SHADER_VERTEX,
+ key.nr_sampler_views,
+ ctx->swrDC.texturesVS);
+ }
}
- swr_jit_key key;
+ /* FragmentShader */
if (ctx->dirty & (SWR_NEW_FS | SWR_NEW_SAMPLER | SWR_NEW_SAMPLER_VIEW
| SWR_NEW_RASTERIZER | SWR_NEW_FRAMEBUFFER)) {
- memset(&key, 0, sizeof(key));
+ swr_jit_fs_key key;
swr_generate_fs_key(key, ctx, ctx->fs);
auto search = ctx->fs->map.find(key);
PFN_PIXEL_KERNEL func;
@@ -1031,56 +1109,25 @@ swr_update_derived(struct pipe_context *pipe,
psState.usesUAV = false; // XXX
psState.forceEarlyZ = false;
SwrSetPixelShaderState(ctx->swrContext, &psState);
- }
-
- /* JIT sampler state */
- if (ctx->dirty & SWR_NEW_SAMPLER) {
- swr_draw_context *pDC = &ctx->swrDC;
- for (unsigned i = 0; i < key.nr_samplers; i++) {
- const struct pipe_sampler_state *sampler =
- ctx->samplers[PIPE_SHADER_FRAGMENT][i];
-
- if (sampler) {
- pDC->samplersFS[i].min_lod = sampler->min_lod;
- pDC->samplersFS[i].max_lod = sampler->max_lod;
- pDC->samplersFS[i].lod_bias = sampler->lod_bias;
- COPY_4V(pDC->samplersFS[i].border_color, sampler->border_color.f);
- }
+ /* JIT sampler state */
+ if (ctx->dirty & SWR_NEW_SAMPLER) {
+ swr_update_sampler_state(ctx,
+ PIPE_SHADER_FRAGMENT,
+ key.nr_samplers,
+ ctx->swrDC.samplersFS);
}
- }
-
- /* JIT sampler view state */
- if (ctx->dirty & (SWR_NEW_SAMPLER_VIEW | SWR_NEW_FRAMEBUFFER)) {
- swr_draw_context *pDC = &ctx->swrDC;
- for (unsigned i = 0; i < key.nr_sampler_views; i++) {
- struct pipe_sampler_view *view =
- ctx->sampler_views[PIPE_SHADER_FRAGMENT][i];
-
- if (view) {
- struct pipe_resource *res = view->texture;
- struct swr_resource *swr_res = swr_resource(res);
- struct swr_jit_texture *jit_tex = &pDC->texturesFS[i];
- memset(jit_tex, 0, sizeof(*jit_tex));
- jit_tex->width = res->width0;
- jit_tex->height = res->height0;
- jit_tex->depth = res->depth0;
- jit_tex->first_level = view->u.tex.first_level;
- jit_tex->last_level = view->u.tex.last_level;
- jit_tex->base_ptr = swr_res->swr.pBaseAddress;
-
- for (unsigned level = jit_tex->first_level;
- level <= jit_tex->last_level;
- level++) {
- jit_tex->row_stride[level] = swr_res->row_stride[level];
- jit_tex->img_stride[level] = swr_res->img_stride[level];
- jit_tex->mip_offsets[level] = swr_res->mip_offsets[level];
- }
- }
+ /* JIT sampler view state */
+ if (ctx->dirty & (SWR_NEW_SAMPLER_VIEW | SWR_NEW_FRAMEBUFFER)) {
+ swr_update_texture_state(ctx,
+ PIPE_SHADER_FRAGMENT,
+ key.nr_sampler_views,
+ ctx->swrDC.texturesFS);
}
}
+
/* VertexShader Constants */
if (ctx->dirty & SWR_NEW_VSCONSTANTS) {
swr_draw_context *pDC = &ctx->swrDC;
diff --git a/src/gallium/drivers/swr/swr_state.h b/src/gallium/drivers/swr/swr_state.h
index f0a7ff3b185..32a5441295b 100644
--- a/src/gallium/drivers/swr/swr_state.h
+++ b/src/gallium/drivers/swr/swr_state.h
@@ -40,9 +40,9 @@ struct swr_vertex_shader {
struct pipe_shader_state pipe;
struct lp_tgsi_info info;
unsigned linkageMask;
- PFN_VERTEX_FUNC func;
+ std::unordered_map<swr_jit_vs_key, PFN_VERTEX_FUNC> map;
SWR_STREAMOUT_STATE soState;
- PFN_SO_FUNC soFunc[PIPE_PRIM_MAX];
+ PFN_SO_FUNC soFunc[PIPE_PRIM_MAX] {0};
};
struct swr_fragment_shader {
@@ -50,7 +50,7 @@ struct swr_fragment_shader {
struct lp_tgsi_info info;
uint32_t constantMask;
uint32_t pointSpriteMask;
- std::unordered_map<swr_jit_key, PFN_PIXEL_KERNEL> map;
+ std::unordered_map<swr_jit_fs_key, PFN_PIXEL_KERNEL> map;
};
/* Vertex element state */
diff --git a/src/gallium/drivers/swr/swr_tex_sample.cpp b/src/gallium/drivers/swr/swr_tex_sample.cpp
index 8e01e32e280..8172c820f22 100644
--- a/src/gallium/drivers/swr/swr_tex_sample.cpp
+++ b/src/gallium/drivers/swr/swr_tex_sample.cpp
@@ -72,6 +72,8 @@ struct swr_sampler_dynamic_state {
struct lp_sampler_dynamic_state base;
const struct swr_sampler_static_state *static_state;
+
+ unsigned shader_type;
};
@@ -112,7 +114,18 @@ swr_texture_member(const struct lp_sampler_dynamic_state *base,
/* context[0] */
indices[0] = lp_build_const_int32(gallivm, 0);
/* context[0].textures */
- indices[1] = lp_build_const_int32(gallivm, swr_draw_context_texturesFS);
+ auto dynamic = (const struct swr_sampler_dynamic_state *)base;
+ switch (dynamic->shader_type) {
+ case PIPE_SHADER_FRAGMENT:
+ indices[1] = lp_build_const_int32(gallivm, swr_draw_context_texturesFS);
+ break;
+ case PIPE_SHADER_VERTEX:
+ indices[1] = lp_build_const_int32(gallivm, swr_draw_context_texturesVS);
+ break;
+ default:
+ assert(0 && "unsupported shader type");
+ break;
+ }
/* context[0].textures[unit] */
indices[2] = lp_build_const_int32(gallivm, texture_unit);
/* context[0].textures[unit].member */
@@ -195,7 +208,18 @@ swr_sampler_member(const struct lp_sampler_dynamic_state *base,
/* context[0] */
indices[0] = lp_build_const_int32(gallivm, 0);
/* context[0].samplers */
- indices[1] = lp_build_const_int32(gallivm, swr_draw_context_samplersFS);
+ auto dynamic = (const struct swr_sampler_dynamic_state *)base;
+ switch (dynamic->shader_type) {
+ case PIPE_SHADER_FRAGMENT:
+ indices[1] = lp_build_const_int32(gallivm, swr_draw_context_samplersFS);
+ break;
+ case PIPE_SHADER_VERTEX:
+ indices[1] = lp_build_const_int32(gallivm, swr_draw_context_samplersVS);
+ break;
+ default:
+ assert(0 && "unsupported shader type");
+ break;
+ }
/* context[0].samplers[unit] */
indices[2] = lp_build_const_int32(gallivm, sampler_unit);
/* context[0].samplers[unit].member */
@@ -307,7 +331,8 @@ swr_sampler_soa_emit_size_query(const struct lp_build_sampler_soa *base,
struct lp_build_sampler_soa *
-swr_sampler_soa_create(const struct swr_sampler_static_state *static_state)
+swr_sampler_soa_create(const struct swr_sampler_static_state *static_state,
+ unsigned shader_type)
{
struct swr_sampler_soa *sampler;
@@ -334,5 +359,7 @@ swr_sampler_soa_create(const struct swr_sampler_static_state *static_state)
sampler->dynamic_state.static_state = static_state;
+ sampler->dynamic_state.shader_type = shader_type;
+
return &sampler->base;
}
diff --git a/src/gallium/drivers/swr/swr_tex_sample.h b/src/gallium/drivers/swr/swr_tex_sample.h
index f5c368c108d..cb7e83d1c39 100644
--- a/src/gallium/drivers/swr/swr_tex_sample.h
+++ b/src/gallium/drivers/swr/swr_tex_sample.h
@@ -44,4 +44,4 @@ struct swr_sampler_static_state {
*
*/
struct lp_build_sampler_soa *
-swr_sampler_soa_create(const struct swr_sampler_static_state *key);
+swr_sampler_soa_create(const struct swr_sampler_static_state *key, unsigned shader_type);
diff --git a/src/gallium/drivers/trace/tr_context.c b/src/gallium/drivers/trace/tr_context.c
index 08b1d32afb0..b575f2cdb34 100644
--- a/src/gallium/drivers/trace/tr_context.c
+++ b/src/gallium/drivers/trace/tr_context.c
@@ -273,6 +273,24 @@ trace_context_get_query_result(struct pipe_context *_pipe,
}
+static void
+trace_context_set_active_query_state(struct pipe_context *_pipe,
+ boolean enable)
+{
+ struct trace_context *tr_ctx = trace_context(_pipe);
+ struct pipe_context *pipe = tr_ctx->pipe;
+
+ trace_dump_call_begin("pipe_context", "set_active_query_state");
+
+ trace_dump_arg(ptr, pipe);
+ trace_dump_arg(bool, enable);
+
+ pipe->set_active_query_state(pipe, enable);
+
+ trace_dump_call_end();
+}
+
+
static void *
trace_context_create_blend_state(struct pipe_context *_pipe,
const struct pipe_blend_state *state)
@@ -1781,6 +1799,7 @@ trace_context_create(struct trace_screen *tr_scr,
TR_CTX_INIT(begin_query);
TR_CTX_INIT(end_query);
TR_CTX_INIT(get_query_result);
+ TR_CTX_INIT(set_active_query_state);
TR_CTX_INIT(create_blend_state);
TR_CTX_INIT(bind_blend_state);
TR_CTX_INIT(delete_blend_state);
diff --git a/src/gallium/drivers/vc4/vc4_draw.c b/src/gallium/drivers/vc4/vc4_draw.c
index 9b0b540d3fc..68b85737628 100644
--- a/src/gallium/drivers/vc4/vc4_draw.c
+++ b/src/gallium/drivers/vc4/vc4_draw.c
@@ -32,12 +32,19 @@
#include "vc4_resource.h"
static void
-vc4_get_draw_cl_space(struct vc4_context *vc4)
+vc4_get_draw_cl_space(struct vc4_context *vc4, int vert_count)
{
+ /* The SW-5891 workaround may cause us to emit multiple shader recs
+ * and draw packets.
+ */
+ int num_draws = DIV_ROUND_UP(vert_count, 65535) + 1;
+
/* Binner gets our packet state -- vc4_emit.c contents,
* and the primitive itself.
*/
- cl_ensure_space(&vc4->bcl, 256);
+ cl_ensure_space(&vc4->bcl,
+ 256 + (VC4_PACKET_GL_ARRAY_PRIMITIVE_SIZE +
+ VC4_PACKET_GL_SHADER_STATE_SIZE) * num_draws);
/* Nothing for rcl -- that's covered by vc4_context.c */
@@ -45,7 +52,8 @@ vc4_get_draw_cl_space(struct vc4_context *vc4)
* sized shader_rec (104 bytes base for 8 vattrs plus 32 bytes of
* vattr stride).
*/
- cl_ensure_space(&vc4->shader_rec, 12 * sizeof(uint32_t) + 104 + 8 * 32);
+ cl_ensure_space(&vc4->shader_rec,
+ (12 * sizeof(uint32_t) + 104 + 8 * 32) * num_draws);
/* Uniforms are covered by vc4_write_uniforms(). */
@@ -61,12 +69,12 @@ vc4_get_draw_cl_space(struct vc4_context *vc4)
* Does the initial bining command list setup for drawing to a given FBO.
*/
static void
-vc4_start_draw(struct vc4_context *vc4)
+vc4_start_draw(struct vc4_context *vc4, int vert_count)
{
if (vc4->needs_flush)
return;
- vc4_get_draw_cl_space(vc4);
+ vc4_get_draw_cl_space(vc4, 0);
struct vc4_cl_out *bcl = cl_start(&vc4->bcl);
// Tile state data is 48 bytes per tile, I think it can be thrown away
@@ -119,7 +127,8 @@ vc4_update_shadow_textures(struct pipe_context *pctx,
}
static void
-vc4_emit_gl_shader_state(struct vc4_context *vc4, const struct pipe_draw_info *info)
+vc4_emit_gl_shader_state(struct vc4_context *vc4, const struct pipe_draw_info *info,
+ uint32_t extra_index_bias)
{
/* VC4_DIRTY_VTXSTATE */
struct vc4_vertex_stateobj *vtx = vc4->vtx;
@@ -170,7 +179,8 @@ vc4_emit_gl_shader_state(struct vc4_context *vc4, const struct pipe_draw_info *i
/* not vc4->dirty tracked: vc4->last_index_bias */
uint32_t offset = (vb->buffer_offset +
elem->src_offset +
- vb->stride * info->index_bias);
+ vb->stride * (info->index_bias +
+ extra_index_bias));
uint32_t vb_size = rsc->bo->size - offset;
uint32_t elem_size =
util_format_get_blocksize(elem->src_format);
@@ -219,8 +229,9 @@ vc4_emit_gl_shader_state(struct vc4_context *vc4, const struct pipe_draw_info *i
&vc4->constbuf[PIPE_SHADER_VERTEX],
&vc4->verttex);
- vc4->last_index_bias = info->index_bias;
+ vc4->last_index_bias = info->index_bias + extra_index_bias;
vc4->max_index = max_index;
+ vc4->shader_rec_count++;
}
/**
@@ -275,14 +286,14 @@ vc4_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info)
vc4_hw_2116_workaround(pctx);
- vc4_get_draw_cl_space(vc4);
+ vc4_get_draw_cl_space(vc4, info->count);
if (vc4->prim_mode != info->mode) {
vc4->prim_mode = info->mode;
vc4->dirty |= VC4_DIRTY_PRIM_MODE;
}
- vc4_start_draw(vc4);
+ vc4_start_draw(vc4, info->count);
vc4_update_compiled_shaders(vc4, info->mode);
vc4_emit_state(pctx);
@@ -298,7 +309,7 @@ vc4_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info)
vc4->prog.vs->uniform_dirty_bits |
vc4->prog.fs->uniform_dirty_bits)) ||
vc4->last_index_bias != info->index_bias) {
- vc4_emit_gl_shader_state(vc4, info);
+ vc4_emit_gl_shader_state(vc4, info, 0);
}
vc4->dirty = 0;
@@ -342,10 +353,75 @@ vc4_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info)
if (vc4->indexbuf.index_size == 4 || vc4->indexbuf.user_buffer)
pipe_resource_reference(&prsc, NULL);
} else {
- cl_u8(&bcl, VC4_PACKET_GL_ARRAY_PRIMITIVE);
- cl_u8(&bcl, info->mode);
- cl_u32(&bcl, info->count);
- cl_u32(&bcl, info->start);
+ uint32_t count = info->count;
+ uint32_t start = info->start;
+ uint32_t extra_index_bias = 0;
+
+ while (count) {
+ uint32_t this_count = count;
+ uint32_t step = count;
+ static const uint32_t max_verts = 65535;
+
+ /* GFXH-515 / SW-5891: The binner emits 16 bit indices
+ * for drawarrays, which means that if start + count >
+ * 64k it would truncate the top bits. Work around
+ * this by emitting a limited number of primitives at
+ * a time and reemitting the shader state pointing
+ * farther down the vertex attribute arrays.
+ *
+ * To do this properly for line loops or trifans, we'd
+ * need to make a new VB containing the first vertex
+ * plus whatever remainder.
+ */
+ if (extra_index_bias) {
+ cl_end(&vc4->bcl, bcl);
+ vc4_emit_gl_shader_state(vc4, info,
+ extra_index_bias);
+ bcl = cl_start(&vc4->bcl);
+ }
+
+ if (start + count > max_verts) {
+ switch (info->mode) {
+ case PIPE_PRIM_POINTS:
+ this_count = step = max_verts;
+ break;
+ case PIPE_PRIM_LINES:
+ this_count = step = max_verts - (max_verts % 2);
+ break;
+ case PIPE_PRIM_LINE_STRIP:
+ this_count = max_verts;
+ step = max_verts - 1;
+ break;
+ case PIPE_PRIM_LINE_LOOP:
+ this_count = max_verts;
+ step = max_verts - 1;
+ debug_warn_once("unhandled line loop "
+ "looping behavior with "
+ ">65535 verts\n");
+ break;
+ case PIPE_PRIM_TRIANGLES:
+ this_count = step = max_verts - (max_verts % 3);
+ break;
+ case PIPE_PRIM_TRIANGLE_STRIP:
+ this_count = max_verts;
+ step = max_verts - 2;
+ break;
+ default:
+ debug_warn_once("unhandled primitive "
+ "max vert count, truncating\n");
+ this_count = step = max_verts;
+ }
+ }
+
+ cl_u8(&bcl, VC4_PACKET_GL_ARRAY_PRIMITIVE);
+ cl_u8(&bcl, info->mode);
+ cl_u32(&bcl, this_count);
+ cl_u32(&bcl, start);
+
+ count -= step;
+ extra_index_bias += start + step;
+ start = 0;
+ }
}
cl_end(&vc4->bcl, bcl);
@@ -356,8 +432,6 @@ vc4_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info)
vc4->resolve |= PIPE_CLEAR_STENCIL;
vc4->resolve |= PIPE_CLEAR_COLOR0;
- vc4->shader_rec_count++;
-
if (vc4_debug & VC4_DEBUG_ALWAYS_FLUSH)
vc4_flush(pctx);
}
@@ -410,7 +484,7 @@ vc4_clear(struct pipe_context *pctx, unsigned buffers,
vc4->cleared |= buffers;
vc4->resolve |= buffers;
- vc4_start_draw(vc4);
+ vc4_start_draw(vc4, 0);
}
static void
diff --git a/src/gallium/drivers/vc4/vc4_nir_lower_blend.c b/src/gallium/drivers/vc4/vc4_nir_lower_blend.c
index 49a314cdb25..cf6d2896f7d 100644
--- a/src/gallium/drivers/vc4/vc4_nir_lower_blend.c
+++ b/src/gallium/drivers/vc4/vc4_nir_lower_blend.c
@@ -710,9 +710,9 @@ vc4_nir_lower_blend_block(nir_block *block, void *state)
}
void
-vc4_nir_lower_blend(struct vc4_compile *c)
+vc4_nir_lower_blend(nir_shader *s, struct vc4_compile *c)
{
- nir_foreach_function(c->s, function) {
+ nir_foreach_function(s, function) {
if (function->impl) {
nir_foreach_block(function->impl,
vc4_nir_lower_blend_block, c);
diff --git a/src/gallium/drivers/vc4/vc4_nir_lower_io.c b/src/gallium/drivers/vc4/vc4_nir_lower_io.c
index d08ad588e5b..22c602adb54 100644
--- a/src/gallium/drivers/vc4/vc4_nir_lower_io.c
+++ b/src/gallium/drivers/vc4/vc4_nir_lower_io.c
@@ -380,24 +380,14 @@ vc4_nir_lower_uniform(struct vc4_compile *c, nir_builder *b,
intr_comp->num_components = 1;
nir_ssa_dest_init(&intr_comp->instr, &intr_comp->dest, 1, 32, NULL);
- /* Convert the uniform (not user_clip_plane) offset to bytes.
- * If it happens to be a constant, constant-folding will clean
- * up the shift for us.
+ /* Convert the uniform offset to bytes. If it happens to be a
+ * constant, constant-folding will clean up the shift for us.
*/
- if (intr->intrinsic == nir_intrinsic_load_uniform) {
- /* Convert the base offset to bytes and add the
- * component
- */
- intr_comp->const_index[0] = (intr->const_index[0] * 16 + i * 4);
-
- intr_comp->src[0] =
- nir_src_for_ssa(nir_ishl(b, intr->src[0].ssa,
- nir_imm_int(b, 4)));
- } else {
- assert(intr->intrinsic ==
- nir_intrinsic_load_user_clip_plane);
- intr_comp->const_index[0] = intr->const_index[0] * 4 + i;
- }
+ intr_comp->const_index[0] = (intr->const_index[0] * 16 + i * 4);
+
+ intr_comp->src[0] =
+ nir_src_for_ssa(nir_ishl(b, intr->src[0].ssa,
+ nir_imm_int(b, 4)));
dests[i] = &intr_comp->dest.ssa;
@@ -428,10 +418,10 @@ vc4_nir_lower_io_instr(struct vc4_compile *c, nir_builder *b,
break;
case nir_intrinsic_load_uniform:
- case nir_intrinsic_load_user_clip_plane:
vc4_nir_lower_uniform(c, b, intr);
break;
+ case nir_intrinsic_load_user_clip_plane:
default:
break;
}
@@ -465,9 +455,9 @@ vc4_nir_lower_io_impl(struct vc4_compile *c, nir_function_impl *impl)
}
void
-vc4_nir_lower_io(struct vc4_compile *c)
+vc4_nir_lower_io(nir_shader *s, struct vc4_compile *c)
{
- nir_foreach_function(c->s, function) {
+ nir_foreach_function(s, function) {
if (function->impl)
vc4_nir_lower_io_impl(c, function->impl);
}
diff --git a/src/gallium/drivers/vc4/vc4_nir_lower_txf_ms.c b/src/gallium/drivers/vc4/vc4_nir_lower_txf_ms.c
index 8b65cac5084..6b8830743eb 100644
--- a/src/gallium/drivers/vc4/vc4_nir_lower_txf_ms.c
+++ b/src/gallium/drivers/vc4/vc4_nir_lower_txf_ms.c
@@ -162,9 +162,9 @@ vc4_nir_lower_txf_ms_impl(struct vc4_compile *c, nir_function_impl *impl)
}
void
-vc4_nir_lower_txf_ms(struct vc4_compile *c)
+vc4_nir_lower_txf_ms(nir_shader *s, struct vc4_compile *c)
{
- nir_foreach_function(c->s, function) {
+ nir_foreach_function(s, function) {
if (function->impl)
vc4_nir_lower_txf_ms_impl(c, function->impl);
}
diff --git a/src/gallium/drivers/vc4/vc4_opt_vpm.c b/src/gallium/drivers/vc4/vc4_opt_vpm.c
index d15b0c1a39f..d31b673bd63 100644
--- a/src/gallium/drivers/vc4/vc4_opt_vpm.c
+++ b/src/gallium/drivers/vc4/vc4_opt_vpm.c
@@ -65,7 +65,7 @@ qir_opt_vpm(struct vc4_compile *c)
* result, try to move the instruction up in place of the VPM read.
*/
list_for_each_entry(struct qinst, inst, &c->instructions, link) {
- if (!inst || qir_is_multi_instruction(inst))
+ if (!inst)
continue;
if (qir_depends_on_flags(inst) || inst->sf)
@@ -132,7 +132,7 @@ qir_opt_vpm(struct vc4_compile *c)
continue;
struct qinst *inst = c->defs[temp];
- if (!inst || qir_is_multi_instruction(inst))
+ if (!inst)
continue;
if (qir_depends_on_flags(inst) || inst->sf)
diff --git a/src/gallium/drivers/vc4/vc4_program.c b/src/gallium/drivers/vc4/vc4_program.c
index 71a1ebbb313..eccc7ab413f 100644
--- a/src/gallium/drivers/vc4/vc4_program.c
+++ b/src/gallium/drivers/vc4/vc4_program.c
@@ -30,7 +30,6 @@
#include "util/ralloc.h"
#include "util/hash_table.h"
#include "tgsi/tgsi_dump.h"
-#include "tgsi/tgsi_lowering.h"
#include "tgsi/tgsi_parse.h"
#include "compiler/nir/nir.h"
#include "compiler/nir/nir_builder.h"
@@ -638,8 +637,8 @@ emit_vertex_input(struct vc4_compile *c, int attr)
c->vattr_sizes[attr] = align(attr_size, 4);
for (int i = 0; i < align(attr_size, 4) / 4; i++) {
- struct qreg vpm = { QFILE_VPM, attr * 4 + i };
- c->inputs[attr * 4 + i] = qir_MOV(c, vpm);
+ c->inputs[attr * 4 + i] =
+ qir_MOV(c, qir_reg(QFILE_VPM, attr * 4 + i));
c->num_inputs++;
}
}
@@ -647,8 +646,8 @@ emit_vertex_input(struct vc4_compile *c, int attr)
static void
emit_fragcoord_input(struct vc4_compile *c, int attr)
{
- c->inputs[attr * 4 + 0] = qir_FRAG_X(c);
- c->inputs[attr * 4 + 1] = qir_FRAG_Y(c);
+ c->inputs[attr * 4 + 0] = qir_ITOF(c, qir_reg(QFILE_FRAG_X, 0));
+ c->inputs[attr * 4 + 1] = qir_ITOF(c, qir_reg(QFILE_FRAG_Y, 0));
c->inputs[attr * 4 + 2] =
qir_FMUL(c,
qir_ITOF(c, qir_FRAG_Z(c)),
@@ -1193,12 +1192,15 @@ emit_frag_end(struct vc4_compile *c)
}
if (c->fs_key->stencil_enabled) {
- qir_TLB_STENCIL_SETUP(c, qir_uniform(c, QUNIFORM_STENCIL, 0));
+ qir_MOV_dest(c, qir_reg(QFILE_TLB_STENCIL_SETUP, 0),
+ qir_uniform(c, QUNIFORM_STENCIL, 0));
if (c->fs_key->stencil_twoside) {
- qir_TLB_STENCIL_SETUP(c, qir_uniform(c, QUNIFORM_STENCIL, 1));
+ qir_MOV_dest(c, qir_reg(QFILE_TLB_STENCIL_SETUP, 0),
+ qir_uniform(c, QUNIFORM_STENCIL, 1));
}
if (c->fs_key->stencil_full_writemasks) {
- qir_TLB_STENCIL_SETUP(c, qir_uniform(c, QUNIFORM_STENCIL, 2));
+ qir_MOV_dest(c, qir_reg(QFILE_TLB_STENCIL_SETUP, 0),
+ qir_uniform(c, QUNIFORM_STENCIL, 2));
}
}
@@ -1207,24 +1209,24 @@ emit_frag_end(struct vc4_compile *c)
}
if (c->fs_key->depth_enabled) {
- struct qreg z;
if (c->output_position_index != -1) {
- z = qir_FTOI(c, qir_FMUL(c, c->outputs[c->output_position_index + 2],
- qir_uniform_f(c, 0xffffff)));
+ qir_FTOI_dest(c, qir_reg(QFILE_TLB_Z_WRITE, 0),
+ qir_FMUL(c,
+ c->outputs[c->output_position_index + 2],
+ qir_uniform_f(c, 0xffffff)))->cond = discard_cond;
} else {
- z = qir_FRAG_Z(c);
+ qir_MOV_dest(c, qir_reg(QFILE_TLB_Z_WRITE, 0),
+ qir_FRAG_Z(c))->cond = discard_cond;
}
- struct qinst *inst = qir_TLB_Z_WRITE(c, z);
- inst->cond = discard_cond;
}
if (!c->msaa_per_sample_output) {
- struct qinst *inst = qir_TLB_COLOR_WRITE(c, color);
- inst->cond = discard_cond;
+ qir_MOV_dest(c, qir_reg(QFILE_TLB_COLOR_WRITE, 0),
+ color)->cond = discard_cond;
} else {
for (int i = 0; i < VC4_MAX_SAMPLES; i++) {
- struct qinst *inst = qir_TLB_COLOR_WRITE_MS(c, c->sample_colors[i]);
- inst->cond = discard_cond;
+ qir_MOV_dest(c, qir_reg(QFILE_TLB_COLOR_WRITE_MS, 0),
+ c->sample_colors[i])->cond = discard_cond;
}
}
}
@@ -1304,8 +1306,7 @@ emit_stub_vpm_read(struct vc4_compile *c)
return;
c->vattr_sizes[0] = 4;
- struct qreg vpm = { QFILE_VPM, 0 };
- (void)qir_MOV(c, vpm);
+ (void)qir_MOV(c, qir_reg(QFILE_VPM, 0));
c->num_inputs++;
}
@@ -1371,16 +1372,16 @@ vc4_optimize_nir(struct nir_shader *s)
do {
progress = false;
- nir_lower_vars_to_ssa(s);
- nir_lower_alu_to_scalar(s);
+ NIR_PASS_V(s, nir_lower_vars_to_ssa);
+ NIR_PASS_V(s, nir_lower_alu_to_scalar);
- progress = nir_copy_prop(s) || progress;
- progress = nir_opt_dce(s) || progress;
- progress = nir_opt_cse(s) || progress;
- progress = nir_opt_peephole_select(s) || progress;
- progress = nir_opt_algebraic(s) || progress;
- progress = nir_opt_constant_folding(s) || progress;
- progress = nir_opt_undef(s) || progress;
+ NIR_PASS(progress, s, nir_copy_prop);
+ NIR_PASS(progress, s, nir_opt_dce);
+ NIR_PASS(progress, s, nir_opt_cse);
+ NIR_PASS(progress, s, nir_opt_peephole_select);
+ NIR_PASS(progress, s, nir_opt_algebraic);
+ NIR_PASS(progress, s, nir_opt_constant_folding);
+ NIR_PASS(progress, s, nir_opt_undef);
} while (progress);
}
@@ -1427,7 +1428,9 @@ ntq_setup_inputs(struct vc4_compile *c)
if (var->data.location == VARYING_SLOT_POS) {
emit_fragcoord_input(c, loc);
} else if (var->data.location == VARYING_SLOT_FACE) {
- c->inputs[loc * 4 + 0] = qir_FRAG_REV_FLAG(c);
+ c->inputs[loc * 4 + 0] =
+ qir_ITOF(c, qir_reg(QFILE_FRAG_REV_FLAG,
+ 0));
} else if (var->data.location >= VARYING_SLOT_VAR0 &&
(c->fs_key->point_sprite_mask &
(1 << (var->data.location -
@@ -1573,8 +1576,10 @@ ntq_emit_intrinsic(struct vc4_compile *c, nir_intrinsic_instr *instr)
break;
case nir_intrinsic_load_user_clip_plane:
- *dest = qir_uniform(c, QUNIFORM_USER_CLIP_PLANE,
- instr->const_index[0]);
+ for (int i = 0; i < instr->num_components; i++) {
+ dest[i] = qir_uniform(c, QUNIFORM_USER_CLIP_PLANE,
+ instr->const_index[0] * 4 + i);
+ }
break;
case nir_intrinsic_load_sample_mask_in:
@@ -1694,12 +1699,27 @@ ntq_emit_block(struct vc4_compile *c, nir_block *block)
}
}
+static void ntq_emit_cf_list(struct vc4_compile *c, struct exec_list *list);
+
+static void
+ntq_emit_loop(struct vc4_compile *c, nir_loop *nloop)
+{
+ fprintf(stderr, "LOOPS not fully handled. Rendering errors likely.\n");
+ ntq_emit_cf_list(c, &nloop->body);
+}
+
+static void
+ntq_emit_function(struct vc4_compile *c, nir_function_impl *func)
+{
+ fprintf(stderr, "FUNCTIONS not handled.\n");
+ abort();
+}
+
static void
ntq_emit_cf_list(struct vc4_compile *c, struct exec_list *list)
{
foreach_list_typed(nir_cf_node, node, node, list) {
switch (node->type) {
- /* case nir_cf_node_loop: */
case nir_cf_node_block:
ntq_emit_block(c, nir_cf_node_as_block(node));
break;
@@ -1708,8 +1728,17 @@ ntq_emit_cf_list(struct vc4_compile *c, struct exec_list *list)
ntq_emit_if(c, nir_cf_node_as_if(node));
break;
+ case nir_cf_node_loop:
+ ntq_emit_loop(c, nir_cf_node_as_loop(node));
+ break;
+
+ case nir_cf_node_function:
+ ntq_emit_function(c, nir_cf_node_as_function(node));
+ break;
+
default:
- assert(0);
+ fprintf(stderr, "Unknown NIR node type\n");
+ abort();
}
}
}
@@ -1810,11 +1839,11 @@ vc4_shader_ntq(struct vc4_context *vc4, enum qstage stage,
}
c->s = tgsi_to_nir(tokens, &nir_options);
- nir_opt_global_to_local(c->s);
- nir_convert_to_ssa(c->s);
+ NIR_PASS_V(c->s, nir_opt_global_to_local);
+ NIR_PASS_V(c->s, nir_convert_to_ssa);
if (stage == QSTAGE_FRAG)
- vc4_nir_lower_blend(c);
+ NIR_PASS_V(c->s, vc4_nir_lower_blend, c);
struct nir_lower_tex_options tex_options = {
/* We would need to implement txs, but we don't want the
@@ -1864,26 +1893,25 @@ vc4_shader_ntq(struct vc4_context *vc4, enum qstage stage,
}
}
- nir_lower_tex(c->s, &tex_options);
+ NIR_PASS_V(c->s, nir_lower_tex, &tex_options);
if (c->fs_key && c->fs_key->light_twoside)
- nir_lower_two_sided_color(c->s);
+ NIR_PASS_V(c->s, nir_lower_two_sided_color);
if (stage == QSTAGE_FRAG)
- nir_lower_clip_fs(c->s, c->key->ucp_enables);
+ NIR_PASS_V(c->s, nir_lower_clip_fs, c->key->ucp_enables);
else
- nir_lower_clip_vs(c->s, c->key->ucp_enables);
+ NIR_PASS_V(c->s, nir_lower_clip_vs, c->key->ucp_enables);
- vc4_nir_lower_io(c);
- vc4_nir_lower_txf_ms(c);
- nir_lower_idiv(c->s);
- nir_lower_load_const_to_scalar(c->s);
+ NIR_PASS_V(c->s, vc4_nir_lower_io, c);
+ NIR_PASS_V(c->s, vc4_nir_lower_txf_ms, c);
+ NIR_PASS_V(c->s, nir_lower_idiv);
+ NIR_PASS_V(c->s, nir_lower_load_const_to_scalar);
vc4_optimize_nir(c->s);
- nir_remove_dead_variables(c->s);
-
- nir_convert_from_ssa(c->s, true);
+ NIR_PASS_V(c->s, nir_remove_dead_variables, nir_var_local);
+ NIR_PASS_V(c->s, nir_convert_from_ssa, true);
if (vc4_debug & VC4_DEBUG_SHADERDB) {
fprintf(stderr, "SHADER-DB: %s prog %d/%d: %d NIR instructions\n",
diff --git a/src/gallium/drivers/vc4/vc4_qir.c b/src/gallium/drivers/vc4/vc4_qir.c
index e73e3899410..293eb01adab 100644
--- a/src/gallium/drivers/vc4/vc4_qir.c
+++ b/src/gallium/drivers/vc4/vc4_qir.c
@@ -31,7 +31,6 @@ struct qir_op_info {
const char *name;
uint8_t ndst, nsrc;
bool has_side_effects;
- bool multi_instruction;
};
static const struct qir_op_info qir_op_info[] = {
@@ -65,23 +64,16 @@ static const struct qir_op_info qir_op_info[] = {
[QOP_XOR] = { "xor", 1, 2 },
[QOP_NOT] = { "not", 1, 1 },
- [QOP_RCP] = { "rcp", 1, 1, false, true },
- [QOP_RSQ] = { "rsq", 1, 1, false, true },
- [QOP_EXP2] = { "exp2", 1, 2, false, true },
- [QOP_LOG2] = { "log2", 1, 2, false, true },
- [QOP_TLB_STENCIL_SETUP] = { "tlb_stencil_setup", 0, 1, true },
- [QOP_TLB_Z_WRITE] = { "tlb_z", 0, 1, true },
- [QOP_TLB_COLOR_WRITE] = { "tlb_color", 0, 1, true },
- [QOP_TLB_COLOR_WRITE_MS] = { "tlb_color_ms", 0, 1, true },
+ [QOP_RCP] = { "rcp", 1, 1 },
+ [QOP_RSQ] = { "rsq", 1, 1 },
+ [QOP_EXP2] = { "exp2", 1, 2 },
+ [QOP_LOG2] = { "log2", 1, 2 },
[QOP_TLB_COLOR_READ] = { "tlb_color_read", 1, 0 },
[QOP_MS_MASK] = { "ms_mask", 0, 1, true },
[QOP_VARY_ADD_C] = { "vary_add_c", 1, 1 },
- [QOP_FRAG_X] = { "frag_x", 1, 0 },
- [QOP_FRAG_Y] = { "frag_y", 1, 0 },
[QOP_FRAG_Z] = { "frag_z", 1, 0 },
[QOP_FRAG_W] = { "frag_w", 1, 0 },
- [QOP_FRAG_REV_FLAG] = { "frag_rev_flag", 1, 0 },
[QOP_TEX_S] = { "tex_s", 0, 2 },
[QOP_TEX_T] = { "tex_t", 0, 2 },
@@ -116,6 +108,16 @@ qir_get_op_nsrc(enum qop qop)
bool
qir_has_side_effects(struct vc4_compile *c, struct qinst *inst)
{
+ switch (inst->dst.file) {
+ case QFILE_TLB_Z_WRITE:
+ case QFILE_TLB_COLOR_WRITE:
+ case QFILE_TLB_COLOR_WRITE_MS:
+ case QFILE_TLB_STENCIL_SETUP:
+ return true;
+ default:
+ break;
+ }
+
return qir_op_info[inst->op].has_side_effects;
}
@@ -144,12 +146,6 @@ qir_has_side_effect_reads(struct vc4_compile *c, struct qinst *inst)
}
bool
-qir_is_multi_instruction(struct qinst *inst)
-{
- return qir_op_info[inst->op].multi_instruction;
-}
-
-bool
qir_is_mul(struct qinst *inst)
{
switch (inst->op) {
@@ -233,24 +229,47 @@ qir_print_reg(struct vc4_compile *c, struct qreg reg, bool write)
[QFILE_TEMP] = "t",
[QFILE_VARY] = "v",
[QFILE_UNIF] = "u",
+ [QFILE_TLB_COLOR_WRITE] = "tlb_c",
+ [QFILE_TLB_COLOR_WRITE_MS] = "tlb_c_ms",
+ [QFILE_TLB_Z_WRITE] = "tlb_z",
+ [QFILE_TLB_STENCIL_SETUP] = "tlb_stencil",
+ [QFILE_FRAG_X] = "frag_x",
+ [QFILE_FRAG_Y] = "frag_y",
+ [QFILE_FRAG_REV_FLAG] = "frag_rev_flag",
};
- if (reg.file == QFILE_NULL) {
+ switch (reg.file) {
+
+ case QFILE_NULL:
fprintf(stderr, "null");
- } else if (reg.file == QFILE_SMALL_IMM) {
+ break;
+
+ case QFILE_SMALL_IMM:
if ((int)reg.index >= -16 && (int)reg.index <= 15)
fprintf(stderr, "%d", reg.index);
else
fprintf(stderr, "%f", uif(reg.index));
- } else if (reg.file == QFILE_VPM) {
+ break;
+
+ case QFILE_VPM:
if (write) {
fprintf(stderr, "vpm");
} else {
fprintf(stderr, "vpm%d.%d",
reg.index / 4, reg.index % 4);
}
- } else {
+ break;
+
+ case QFILE_TLB_COLOR_WRITE:
+ case QFILE_TLB_COLOR_WRITE_MS:
+ case QFILE_TLB_Z_WRITE:
+ case QFILE_TLB_STENCIL_SETUP:
+ fprintf(stderr, "%s", files[reg.file]);
+ break;
+
+ default:
fprintf(stderr, "%s%d", files[reg.file], reg.index);
+ break;
}
if (reg.file == QFILE_UNIF &&
@@ -455,12 +474,11 @@ qir_uniform(struct vc4_compile *c,
for (int i = 0; i < c->num_uniforms; i++) {
if (c->uniform_contents[i] == contents &&
c->uniform_data[i] == data) {
- return (struct qreg) { QFILE_UNIF, i };
+ return qir_reg(QFILE_UNIF, i);
}
}
uint32_t uniform = c->num_uniforms++;
- struct qreg u = { QFILE_UNIF, uniform };
if (uniform >= c->uniform_array_size) {
c->uniform_array_size = MAX2(MAX2(16, uniform + 1),
@@ -477,7 +495,7 @@ qir_uniform(struct vc4_compile *c,
c->uniform_contents[uniform] = contents;
c->uniform_data[uniform] = data;
- return u;
+ return qir_reg(QFILE_UNIF, uniform);
}
void
@@ -492,10 +510,8 @@ qir_SF(struct vc4_compile *c, struct qreg src)
if (src.file != QFILE_TEMP ||
!c->defs[src.index] ||
- last_inst != c->defs[src.index] ||
- qir_is_multi_instruction(last_inst)) {
- struct qreg null = { QFILE_NULL, 0 };
- last_inst = qir_MOV_dest(c, null, src);
+ last_inst != c->defs[src.index]) {
+ last_inst = qir_MOV_dest(c, qir_reg(QFILE_NULL, 0), src);
last_inst = (struct qinst *)c->instructions.prev;
}
last_inst->sf = true;
diff --git a/src/gallium/drivers/vc4/vc4_qir.h b/src/gallium/drivers/vc4/vc4_qir.h
index 3fbf5d749e7..e8ba74b9a4d 100644
--- a/src/gallium/drivers/vc4/vc4_qir.h
+++ b/src/gallium/drivers/vc4/vc4_qir.h
@@ -49,6 +49,17 @@ enum qfile {
QFILE_VARY,
QFILE_UNIF,
QFILE_VPM,
+ QFILE_TLB_COLOR_WRITE,
+ QFILE_TLB_COLOR_WRITE_MS,
+ QFILE_TLB_Z_WRITE,
+ QFILE_TLB_STENCIL_SETUP,
+
+ /* Payload registers that aren't in the physical register file, so we
+ * can just use the corresponding qpu_reg at qpu_emit time.
+ */
+ QFILE_FRAG_X,
+ QFILE_FRAG_Y,
+ QFILE_FRAG_REV_FLAG,
/**
* Stores an immediate value in the index field that can be turned
@@ -63,6 +74,11 @@ struct qreg {
int pack;
};
+static inline struct qreg qir_reg(enum qfile file, uint32_t index)
+{
+ return (struct qreg){file, index};
+}
+
enum qop {
QOP_UNDEF,
QOP_MOV,
@@ -101,19 +117,12 @@ enum qop {
QOP_LOG2,
QOP_VW_SETUP,
QOP_VR_SETUP,
- QOP_TLB_STENCIL_SETUP,
- QOP_TLB_Z_WRITE,
- QOP_TLB_COLOR_WRITE,
- QOP_TLB_COLOR_WRITE_MS,
QOP_TLB_COLOR_READ,
QOP_MS_MASK,
QOP_VARY_ADD_C,
- QOP_FRAG_X,
- QOP_FRAG_Y,
QOP_FRAG_Z,
QOP_FRAG_W,
- QOP_FRAG_REV_FLAG,
/** Texture x coordinate parameter write */
QOP_TEX_S,
@@ -463,7 +472,6 @@ int qir_get_op_nsrc(enum qop qop);
bool qir_reg_equals(struct qreg a, struct qreg b);
bool qir_has_side_effects(struct vc4_compile *c, struct qinst *inst);
bool qir_has_side_effect_reads(struct vc4_compile *c, struct qinst *inst);
-bool qir_is_multi_instruction(struct qinst *inst);
bool qir_is_mul(struct qinst *inst);
bool qir_is_raw_mov(struct qinst *inst);
bool qir_is_tex(struct qinst *inst);
@@ -484,13 +492,13 @@ bool qir_opt_cse(struct vc4_compile *c);
bool qir_opt_dead_code(struct vc4_compile *c);
bool qir_opt_small_immediates(struct vc4_compile *c);
bool qir_opt_vpm(struct vc4_compile *c);
-void vc4_nir_lower_blend(struct vc4_compile *c);
-void vc4_nir_lower_io(struct vc4_compile *c);
+void vc4_nir_lower_blend(nir_shader *s, struct vc4_compile *c);
+void vc4_nir_lower_io(nir_shader *s, struct vc4_compile *c);
nir_ssa_def *vc4_nir_get_state_uniform(struct nir_builder *b,
enum quniform_contents contents);
nir_ssa_def *vc4_nir_get_swizzled_channel(struct nir_builder *b,
nir_ssa_def **srcs, int swiz);
-void vc4_nir_lower_txf_ms(struct vc4_compile *c);
+void vc4_nir_lower_txf_ms(nir_shader *s, struct vc4_compile *c);
void qir_lower_uniforms(struct vc4_compile *c);
uint32_t qpu_schedule_instructions(struct vc4_compile *c);
@@ -618,17 +626,10 @@ QIR_NODST_2(TEX_T)
QIR_NODST_2(TEX_R)
QIR_NODST_2(TEX_B)
QIR_NODST_2(TEX_DIRECT)
-QIR_ALU0(FRAG_X)
-QIR_ALU0(FRAG_Y)
QIR_ALU0(FRAG_Z)
QIR_ALU0(FRAG_W)
-QIR_ALU0(FRAG_REV_FLAG)
QIR_ALU0(TEX_RESULT)
QIR_ALU0(TLB_COLOR_READ)
-QIR_NODST_1(TLB_COLOR_WRITE)
-QIR_NODST_1(TLB_COLOR_WRITE_MS)
-QIR_NODST_1(TLB_Z_WRITE)
-QIR_NODST_1(TLB_STENCIL_SETUP)
QIR_NODST_1(MS_MASK)
static inline struct qreg
@@ -703,8 +704,7 @@ qir_POW(struct vc4_compile *c, struct qreg x, struct qreg y)
static inline void
qir_VPM_WRITE(struct vc4_compile *c, struct qreg val)
{
- static const struct qreg vpm = { QFILE_VPM, 0 };
- qir_emit(c, qir_inst(QOP_MOV, vpm, val, c->undef));
+ qir_MOV_dest(c, qir_reg(QFILE_VPM, 0), val);
}
#endif /* VC4_QIR_H */
diff --git a/src/gallium/drivers/vc4/vc4_qir_lower_uniforms.c b/src/gallium/drivers/vc4/vc4_qir_lower_uniforms.c
index a57e100593c..927268d71ef 100644
--- a/src/gallium/drivers/vc4/vc4_qir_lower_uniforms.c
+++ b/src/gallium/drivers/vc4/vc4_qir_lower_uniforms.c
@@ -150,7 +150,7 @@ qir_lower_uniforms(struct vc4_compile *c)
* reference a temp instead.
*/
struct qreg temp = qir_get_temp(c);
- struct qreg unif = { QFILE_UNIF, max_index };
+ struct qreg unif = qir_reg(QFILE_UNIF, max_index);
struct qinst *mov = qir_inst(QOP_MOV, temp, unif, c->undef);
list_add(&mov->link, &c->instructions);
c->defs[temp.index] = mov;
diff --git a/src/gallium/drivers/vc4/vc4_qir_schedule.c b/src/gallium/drivers/vc4/vc4_qir_schedule.c
index 186e81be750..8b843a3a158 100644
--- a/src/gallium/drivers/vc4/vc4_qir_schedule.c
+++ b/src/gallium/drivers/vc4/vc4_qir_schedule.c
@@ -228,10 +228,7 @@ calculate_deps(struct schedule_setup_state *state, struct schedule_node *n)
add_write_dep(dir, &state->last_tex_result, n);
break;
- case QOP_TLB_COLOR_WRITE:
case QOP_TLB_COLOR_READ:
- case QOP_TLB_Z_WRITE:
- case QOP_TLB_STENCIL_SETUP:
case QOP_MS_MASK:
add_write_dep(dir, &state->last_tlb, n);
break;
@@ -240,10 +237,25 @@ calculate_deps(struct schedule_setup_state *state, struct schedule_node *n)
break;
}
- if (inst->dst.file == QFILE_VPM)
+ switch (inst->dst.file) {
+ case QFILE_VPM:
add_write_dep(dir, &state->last_vpm_write, n);
- else if (inst->dst.file == QFILE_TEMP)
+ break;
+
+ case QFILE_TEMP:
add_write_dep(dir, &state->last_temp_write[inst->dst.index], n);
+ break;
+
+ case QFILE_TLB_COLOR_WRITE:
+ case QFILE_TLB_COLOR_WRITE_MS:
+ case QFILE_TLB_Z_WRITE:
+ case QFILE_TLB_STENCIL_SETUP:
+ add_write_dep(dir, &state->last_tlb, n);
+ break;
+
+ default:
+ break;
+ }
if (qir_depends_on_flags(inst))
add_dep(dir, state->last_sf, n);
@@ -357,11 +369,13 @@ get_register_pressure_cost(struct schedule_state *state, struct qinst *inst)
static bool
locks_scoreboard(struct qinst *inst)
{
- switch (inst->op) {
- case QOP_TLB_Z_WRITE:
- case QOP_TLB_COLOR_WRITE:
- case QOP_TLB_COLOR_WRITE_MS:
- case QOP_TLB_COLOR_READ:
+ if (inst->op == QOP_TLB_COLOR_READ)
+ return true;
+
+ switch (inst->dst.file) {
+ case QFILE_TLB_Z_WRITE:
+ case QFILE_TLB_COLOR_WRITE:
+ case QFILE_TLB_COLOR_WRITE_MS:
return true;
default:
return false;
diff --git a/src/gallium/drivers/vc4/vc4_qpu_emit.c b/src/gallium/drivers/vc4/vc4_qpu_emit.c
index b507e370683..ae3590854b2 100644
--- a/src/gallium/drivers/vc4/vc4_qpu_emit.c
+++ b/src/gallium/drivers/vc4/vc4_qpu_emit.c
@@ -167,6 +167,16 @@ set_last_dst_pack(struct vc4_compile *c, struct qinst *inst)
}
}
+static void
+handle_r4_qpu_write(struct vc4_compile *c, struct qinst *qinst,
+ struct qpu_reg dst)
+{
+ if (dst.mux != QPU_MUX_R4)
+ queue(c, qpu_a_MOV(dst, qpu_r4()));
+ else if (qinst->sf)
+ queue(c, qpu_a_MOV(qpu_ra(QPU_W_NOP), qpu_r4()));
+}
+
void
vc4_generate_code(struct vc4_context *vc4, struct vc4_compile *c)
{
@@ -290,6 +300,22 @@ vc4_generate_code(struct vc4_context *vc4, struct vc4_compile *c)
last_vpm_read_index = qinst->src[i].index;
src[i] = qpu_ra(QPU_R_VPM);
break;
+
+ case QFILE_FRAG_X:
+ src[i] = qpu_ra(QPU_R_XY_PIXEL_COORD);
+ break;
+ case QFILE_FRAG_Y:
+ src[i] = qpu_rb(QPU_R_XY_PIXEL_COORD);
+ break;
+ case QFILE_FRAG_REV_FLAG:
+ src[i] = qpu_rb(QPU_R_MS_REV_FLAGS);
+ break;
+
+ case QFILE_TLB_COLOR_WRITE:
+ case QFILE_TLB_COLOR_WRITE_MS:
+ case QFILE_TLB_Z_WRITE:
+ case QFILE_TLB_STENCIL_SETUP:
+ unreachable("bad qir src file");
}
}
@@ -304,9 +330,29 @@ vc4_generate_code(struct vc4_context *vc4, struct vc4_compile *c)
case QFILE_VPM:
dst = qpu_ra(QPU_W_VPM);
break;
+
+ case QFILE_TLB_COLOR_WRITE:
+ dst = qpu_tlbc();
+ break;
+
+ case QFILE_TLB_COLOR_WRITE_MS:
+ dst = qpu_tlbc_ms();
+ break;
+
+ case QFILE_TLB_Z_WRITE:
+ dst = qpu_ra(QPU_W_TLB_Z);
+ break;
+
+ case QFILE_TLB_STENCIL_SETUP:
+ dst = qpu_ra(QPU_W_TLB_STENCIL_SETUP);
+ break;
+
case QFILE_VARY:
case QFILE_UNIF:
case QFILE_SMALL_IMM:
+ case QFILE_FRAG_X:
+ case QFILE_FRAG_Y:
+ case QFILE_FRAG_REV_FLAG:
assert(!"not reached");
break;
}
@@ -339,24 +385,8 @@ vc4_generate_code(struct vc4_context *vc4, struct vc4_compile *c)
abort();
}
- if (dst.mux != QPU_MUX_R4)
- queue(c, qpu_a_MOV(dst, qpu_r4()));
-
- break;
+ handle_r4_qpu_write(c, qinst, dst);
- case QOP_FRAG_X:
- queue(c, qpu_a_ITOF(dst,
- qpu_ra(QPU_R_XY_PIXEL_COORD)));
- break;
-
- case QOP_FRAG_Y:
- queue(c, qpu_a_ITOF(dst,
- qpu_rb(QPU_R_XY_PIXEL_COORD)));
- break;
-
- case QOP_FRAG_REV_FLAG:
- queue(c, qpu_a_ITOF(dst,
- qpu_rb(QPU_R_MS_REV_FLAGS)));
break;
case QOP_MS_MASK:
@@ -374,38 +404,11 @@ vc4_generate_code(struct vc4_context *vc4, struct vc4_compile *c)
*/
break;
- case QOP_TLB_STENCIL_SETUP:
- assert(!unpack);
- queue(c, qpu_a_MOV(qpu_ra(QPU_W_TLB_STENCIL_SETUP),
- src[0]) | unpack);
- break;
-
- case QOP_TLB_Z_WRITE:
- queue(c, qpu_a_MOV(qpu_ra(QPU_W_TLB_Z),
- src[0]) | unpack);
- set_last_cond_add(c, qinst->cond);
- handled_qinst_cond = true;
- break;
-
case QOP_TLB_COLOR_READ:
queue(c, qpu_NOP());
*last_inst(c) = qpu_set_sig(*last_inst(c),
QPU_SIG_COLOR_LOAD);
-
- if (dst.mux != QPU_MUX_R4)
- queue(c, qpu_a_MOV(dst, qpu_r4()));
- break;
-
- case QOP_TLB_COLOR_WRITE:
- queue(c, qpu_a_MOV(qpu_tlbc(), src[0]) | unpack);
- set_last_cond_add(c, qinst->cond);
- handled_qinst_cond = true;
- break;
-
- case QOP_TLB_COLOR_WRITE_MS:
- queue(c, qpu_a_MOV(qpu_tlbc_ms(), src[0]));
- set_last_cond_add(c, qinst->cond);
- handled_qinst_cond = true;
+ handle_r4_qpu_write(c, qinst, dst);
break;
case QOP_VARY_ADD_C:
@@ -432,8 +435,7 @@ vc4_generate_code(struct vc4_context *vc4, struct vc4_compile *c)
queue(c, qpu_NOP());
*last_inst(c) = qpu_set_sig(*last_inst(c),
QPU_SIG_LOAD_TMU0);
- if (dst.mux != QPU_MUX_R4)
- queue(c, qpu_a_MOV(dst, qpu_r4()));
+ handle_r4_qpu_write(c, qinst, dst);
break;
default:
@@ -476,10 +478,8 @@ vc4_generate_code(struct vc4_context *vc4, struct vc4_compile *c)
assert(qinst->cond == QPU_COND_ALWAYS ||
handled_qinst_cond);
- if (qinst->sf) {
- assert(!qir_is_multi_instruction(qinst));
+ if (qinst->sf)
*last_inst(c) |= QPU_SF;
- }
}
uint32_t cycles = qpu_schedule_instructions(c);
diff --git a/src/gallium/drivers/vc4/vc4_query.c b/src/gallium/drivers/vc4/vc4_query.c
index 270832eae3a..17400a37ca3 100644
--- a/src/gallium/drivers/vc4/vc4_query.c
+++ b/src/gallium/drivers/vc4/vc4_query.c
@@ -72,6 +72,11 @@ vc4_get_query_result(struct pipe_context *ctx, struct pipe_query *query,
return true;
}
+static void
+vc4_set_active_query_state(struct pipe_context *pipe, boolean enable)
+{
+}
+
void
vc4_query_init(struct pipe_context *pctx)
{
@@ -80,5 +85,6 @@ vc4_query_init(struct pipe_context *pctx)
pctx->begin_query = vc4_begin_query;
pctx->end_query = vc4_end_query;
pctx->get_query_result = vc4_get_query_result;
+ pctx->set_active_query_state = vc4_set_active_query_state;
}
diff --git a/src/gallium/drivers/vc4/vc4_screen.c b/src/gallium/drivers/vc4/vc4_screen.c
index 167a2f5bd8e..1da4db2ebb7 100644
--- a/src/gallium/drivers/vc4/vc4_screen.c
+++ b/src/gallium/drivers/vc4/vc4_screen.c
@@ -208,6 +208,7 @@ vc4_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
case PIPE_CAP_PCI_DEVICE:
case PIPE_CAP_PCI_FUNCTION:
case PIPE_CAP_FRAMEBUFFER_NO_ATTACHMENT:
+ case PIPE_CAP_ROBUST_BUFFER_ACCESS_BEHAVIOR:
return 0;
/* Stream output. */
diff --git a/src/gallium/drivers/virgl/virgl_query.c b/src/gallium/drivers/virgl/virgl_query.c
index b0200556342..5173bd39a45 100644
--- a/src/gallium/drivers/virgl/virgl_query.c
+++ b/src/gallium/drivers/virgl/virgl_query.c
@@ -164,6 +164,11 @@ static boolean virgl_get_query_result(struct pipe_context *ctx,
return TRUE;
}
+static void
+virgl_set_active_query_state(struct pipe_context *pipe, boolean enable)
+{
+}
+
void virgl_init_query_functions(struct virgl_context *vctx)
{
vctx->base.render_condition = virgl_render_condition;
@@ -172,4 +177,5 @@ void virgl_init_query_functions(struct virgl_context *vctx)
vctx->base.begin_query = virgl_begin_query;
vctx->base.end_query = virgl_end_query;
vctx->base.get_query_result = virgl_get_query_result;
+ vctx->base.set_active_query_state = virgl_set_active_query_state;
}
diff --git a/src/gallium/drivers/virgl/virgl_screen.c b/src/gallium/drivers/virgl/virgl_screen.c
index 5a5afc1712f..14c91105a04 100644
--- a/src/gallium/drivers/virgl/virgl_screen.c
+++ b/src/gallium/drivers/virgl/virgl_screen.c
@@ -240,6 +240,7 @@ virgl_get_param(struct pipe_screen *screen, enum pipe_cap param)
case PIPE_CAP_PCI_DEVICE:
case PIPE_CAP_PCI_FUNCTION:
case PIPE_CAP_FRAMEBUFFER_NO_ATTACHMENT:
+ case PIPE_CAP_ROBUST_BUFFER_ACCESS_BEHAVIOR:
return 0;
case PIPE_CAP_VENDOR_ID:
return 0x1af4;
diff --git a/src/gallium/include/pipe/p_context.h b/src/gallium/include/pipe/p_context.h
index 1c97e82ece5..82efaf5d8a9 100644
--- a/src/gallium/include/pipe/p_context.h
+++ b/src/gallium/include/pipe/p_context.h
@@ -173,6 +173,12 @@ struct pipe_context {
struct pipe_resource *resource,
unsigned offset);
+ /**
+ * Set whether all current non-driver queries except TIME_ELAPSED are
+ * active or paused.
+ */
+ void (*set_active_query_state)(struct pipe_context *pipe, boolean enable);
+
/*@}*/
/**
diff --git a/src/gallium/include/pipe/p_defines.h b/src/gallium/include/pipe/p_defines.h
index 5e204a3e5ea..1aef21d6292 100644
--- a/src/gallium/include/pipe/p_defines.h
+++ b/src/gallium/include/pipe/p_defines.h
@@ -691,6 +691,7 @@ enum pipe_cap
PIPE_CAP_PCI_DEVICE,
PIPE_CAP_PCI_FUNCTION,
PIPE_CAP_FRAMEBUFFER_NO_ATTACHMENT,
+ PIPE_CAP_ROBUST_BUFFER_ACCESS_BEHAVIOR,
};
#define PIPE_QUIRK_TEXTURE_BORDER_COLOR_SWIZZLE_NV50 (1 << 0)
diff --git a/src/gallium/state_trackers/clover/llvm/invocation.cpp b/src/gallium/state_trackers/clover/llvm/invocation.cpp
index 4d11c2477c7..fb757886381 100644
--- a/src/gallium/state_trackers/clover/llvm/invocation.cpp
+++ b/src/gallium/state_trackers/clover/llvm/invocation.cpp
@@ -322,6 +322,15 @@ namespace {
// list of kernel functions to the internalizer. The internalizer will
// treat the functions in the list as "main" functions and internalize
// all of the other functions.
+#if HAVE_LLVM >= 0x0309
+ auto preserve_kernels = [=](const llvm::GlobalValue &GV) {
+ for (const auto &kernel : kernels) {
+ if (GV.getName() == kernel->getName())
+ return true;
+ }
+ return false;
+ };
+#else
std::vector<const char*> export_list;
for (std::vector<llvm::Function *>::const_iterator I = kernels.begin(),
E = kernels.end();
@@ -329,12 +338,17 @@ namespace {
llvm::Function *kernel = *I;
export_list.push_back(kernel->getName().data());
}
+#endif
#if HAVE_LLVM < 0x0306
PM.add(new llvm::DataLayoutPass(mod));
#elif HAVE_LLVM < 0x0307
PM.add(new llvm::DataLayoutPass());
#endif
+#if HAVE_LLVM >= 0x0309
+ PM.add(llvm::createInternalizePass(preserve_kernels));
+#else
PM.add(llvm::createInternalizePass(export_list));
+#endif
llvm::PassManagerBuilder PMB;
PMB.OptLevel = optimization_level;
diff --git a/src/gallium/state_trackers/va/context.c b/src/gallium/state_trackers/va/context.c
index b25c381d968..25d587af46f 100644
--- a/src/gallium/state_trackers/va/context.c
+++ b/src/gallium/state_trackers/va/context.c
@@ -283,6 +283,10 @@ vlVaDestroyContext(VADriverContextP ctx, VAContextID context_id)
drv = VL_VA_DRIVER(ctx);
pipe_mutex_lock(drv->mutex);
context = handle_table_get(drv->htab, context_id);
+ if (!context) {
+ pipe_mutex_unlock(drv->mutex);
+ return VA_STATUS_ERROR_INVALID_CONTEXT;
+ }
if (context->decoder) {
if (u_reduce_video_profile(context->decoder->profile) ==
diff --git a/src/gallium/state_trackers/va/image.c b/src/gallium/state_trackers/va/image.c
index 2c42a985823..92d014c3d44 100644
--- a/src/gallium/state_trackers/va/image.c
+++ b/src/gallium/state_trackers/va/image.c
@@ -280,6 +280,7 @@ vlVaDestroyImage(VADriverContextP ctx, VAImageID image)
{
vlVaDriver *drv;
VAImage *vaimage;
+ VAStatus status;
if (!ctx)
return VA_STATUS_ERROR_INVALID_CONTEXT;
@@ -294,8 +295,9 @@ vlVaDestroyImage(VADriverContextP ctx, VAImageID image)
handle_table_remove(VL_VA_DRIVER(ctx)->htab, image);
pipe_mutex_unlock(drv->mutex);
+ status = vlVaDestroyBuffer(ctx, vaimage->buf);
FREE(vaimage);
- return vlVaDestroyBuffer(ctx, vaimage->buf);
+ return status;
}
VAStatus
diff --git a/src/gallium/targets/libgl-xlib/SConscript b/src/gallium/targets/libgl-xlib/SConscript
index e1c78dd06a0..1c816ff7762 100644
--- a/src/gallium/targets/libgl-xlib/SConscript
+++ b/src/gallium/targets/libgl-xlib/SConscript
@@ -48,11 +48,15 @@ if env['llvm']:
env.Prepend(LIBS = [llvmpipe])
if env['platform'] != 'darwin':
+ # Disallow undefined symbols, except with Address Sanitizer, since libasan
+ # is not linked on shared libs, as it should be LD_PRELOAD'ed instead
+ if not env['asan']:
+ env.Append(SHLINKFLAGS = [
+ '-Wl,-z,defs',
+ ])
env.Append(SHLINKFLAGS = [
- # Disallow undefined symbols
- '-Wl,-z,defs',
- # Restrict exported symbols
- '-Wl,--version-script=%s' % File("libgl-xlib.sym").srcnode().path,
+ # Restrict exported symbols
+ '-Wl,--version-script=%s' % File("libgl-xlib.sym").srcnode().path,
])
# libGL.so.1.5
diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c b/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c
index c79bed45753..1b2793a5d6b 100644
--- a/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c
+++ b/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c
@@ -36,6 +36,7 @@
#include <amdgpu_drm.h>
#include <xf86drm.h>
#include <stdio.h>
+#include <inttypes.h>
static inline struct amdgpu_winsys_bo *amdgpu_winsys_bo(struct pb_buffer *bo)
{
@@ -141,9 +142,9 @@ void amdgpu_bo_destroy(struct pb_buffer *_buf)
amdgpu_fence_reference(&bo->fence[i], NULL);
if (bo->initial_domain & RADEON_DOMAIN_VRAM)
- bo->ws->allocated_vram -= align(bo->base.size, bo->ws->gart_page_size);
+ bo->ws->allocated_vram -= align64(bo->base.size, bo->ws->gart_page_size);
else if (bo->initial_domain & RADEON_DOMAIN_GTT)
- bo->ws->allocated_gtt -= align(bo->base.size, bo->ws->gart_page_size);
+ bo->ws->allocated_gtt -= align64(bo->base.size, bo->ws->gart_page_size);
FREE(bo);
}
@@ -265,7 +266,7 @@ static void amdgpu_add_buffer_to_global_list(struct amdgpu_winsys_bo *bo)
}
static struct amdgpu_winsys_bo *amdgpu_create_bo(struct amdgpu_winsys *ws,
- unsigned size,
+ uint64_t size,
unsigned alignment,
unsigned usage,
enum radeon_bo_domain initial_domain,
@@ -303,9 +304,9 @@ static struct amdgpu_winsys_bo *amdgpu_create_bo(struct amdgpu_winsys *ws,
r = amdgpu_bo_alloc(ws->dev, &request, &buf_handle);
if (r) {
fprintf(stderr, "amdgpu: Failed to allocate a buffer:\n");
- fprintf(stderr, "amdgpu: size : %d bytes\n", size);
- fprintf(stderr, "amdgpu: alignment : %d bytes\n", alignment);
- fprintf(stderr, "amdgpu: domains : %d\n", initial_domain);
+ fprintf(stderr, "amdgpu: size : %"PRIu64" bytes\n", size);
+ fprintf(stderr, "amdgpu: alignment : %u bytes\n", alignment);
+ fprintf(stderr, "amdgpu: domains : %u\n", initial_domain);
goto error_bo_alloc;
}
@@ -331,9 +332,9 @@ static struct amdgpu_winsys_bo *amdgpu_create_bo(struct amdgpu_winsys *ws,
bo->unique_id = __sync_fetch_and_add(&ws->next_bo_unique_id, 1);
if (initial_domain & RADEON_DOMAIN_VRAM)
- ws->allocated_vram += align(size, ws->gart_page_size);
+ ws->allocated_vram += align64(size, ws->gart_page_size);
else if (initial_domain & RADEON_DOMAIN_GTT)
- ws->allocated_gtt += align(size, ws->gart_page_size);
+ ws->allocated_gtt += align64(size, ws->gart_page_size);
amdgpu_add_buffer_to_global_list(bo);
@@ -458,7 +459,7 @@ static void amdgpu_buffer_set_metadata(struct pb_buffer *_buf,
static struct pb_buffer *
amdgpu_bo_create(struct radeon_winsys *rws,
- unsigned size,
+ uint64_t size,
unsigned alignment,
boolean use_reusable_pool,
enum radeon_bo_domain domain,
@@ -468,21 +469,11 @@ amdgpu_bo_create(struct radeon_winsys *rws,
struct amdgpu_winsys_bo *bo;
unsigned usage = 0;
- /* Don't use VRAM if the GPU doesn't have much. This is only the initial
- * domain. The kernel is free to move the buffer if it wants to.
- *
- * 64MB means no VRAM by todays standards.
- */
- if (domain & RADEON_DOMAIN_VRAM && ws->info.vram_size <= 64*1024*1024) {
- domain = RADEON_DOMAIN_GTT;
- flags = RADEON_FLAG_GTT_WC;
- }
-
/* Align size to page size. This is the minimum alignment for normal
* BOs. Aligning this here helps the cached bufmgr. Especially small BOs,
* like constant/uniform buffers, can benefit from better and more reuse.
*/
- size = align(size, ws->gart_page_size);
+ size = align64(size, ws->gart_page_size);
/* Only set one usage bit each for domains and flags, or the cache manager
* might consider different sets of domains / flags compatible
@@ -592,9 +583,9 @@ static struct pb_buffer *amdgpu_bo_from_handle(struct radeon_winsys *rws,
*offset = whandle->offset;
if (bo->initial_domain & RADEON_DOMAIN_VRAM)
- ws->allocated_vram += align(bo->base.size, ws->gart_page_size);
+ ws->allocated_vram += align64(bo->base.size, ws->gart_page_size);
else if (bo->initial_domain & RADEON_DOMAIN_GTT)
- ws->allocated_gtt += align(bo->base.size, ws->gart_page_size);
+ ws->allocated_gtt += align64(bo->base.size, ws->gart_page_size);
amdgpu_add_buffer_to_global_list(bo);
@@ -648,7 +639,7 @@ static boolean amdgpu_bo_get_handle(struct pb_buffer *buffer,
}
static struct pb_buffer *amdgpu_bo_from_ptr(struct radeon_winsys *rws,
- void *pointer, unsigned size)
+ void *pointer, uint64_t size)
{
struct amdgpu_winsys *ws = amdgpu_winsys(rws);
amdgpu_bo_handle buf_handle;
@@ -684,7 +675,7 @@ static struct pb_buffer *amdgpu_bo_from_ptr(struct radeon_winsys *rws,
bo->initial_domain = RADEON_DOMAIN_GTT;
bo->unique_id = __sync_fetch_and_add(&ws->next_bo_unique_id, 1);
- ws->allocated_gtt += align(bo->base.size, ws->gart_page_size);
+ ws->allocated_gtt += align64(bo->base.size, ws->gart_page_size);
amdgpu_add_buffer_to_global_list(bo);
diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_surface.c b/src/gallium/winsys/amdgpu/drm/amdgpu_surface.c
index 4c837a8e20f..1164a3058c5 100644
--- a/src/gallium/winsys/amdgpu/drm/amdgpu_surface.c
+++ b/src/gallium/winsys/amdgpu/drm/amdgpu_surface.c
@@ -212,7 +212,7 @@ static int compute_level(struct amdgpu_winsys *ws,
}
surf_level = is_stencil ? &surf->stencil_level[level] : &surf->level[level];
- surf_level->offset = align(surf->bo_size, AddrSurfInfoOut->baseAlign);
+ surf_level->offset = align64(surf->bo_size, AddrSurfInfoOut->baseAlign);
surf_level->slice_size = AddrSurfInfoOut->sliceSize;
surf_level->pitch_bytes = AddrSurfInfoOut->pitch * (is_stencil ? 1 : surf->bpe);
surf_level->npix_x = u_minify(surf->npix_x, level);
diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c b/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c
index 87d9a6aebec..1177d3e3c3a 100644
--- a/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c
+++ b/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c
@@ -256,6 +256,10 @@ static boolean do_winsys_init(struct amdgpu_winsys *ws, int fd)
goto fail;
}
+ /* Set which chips have dedicated VRAM. */
+ ws->info.has_dedicated_vram =
+ !(ws->amdinfo.ids_flags & AMDGPU_IDS_FLAGS_FUSION);
+
/* Set hardware information. */
ws->info.gart_size = gtt.heap_size;
ws->info.vram_size = vram.heap_size;
diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_bo.c b/src/gallium/winsys/radeon/drm/radeon_drm_bo.c
index 08856dff430..dd6555c9502 100644
--- a/src/gallium/winsys/radeon/drm/radeon_drm_bo.c
+++ b/src/gallium/winsys/radeon/drm/radeon_drm_bo.c
@@ -40,6 +40,7 @@
#include <errno.h>
#include <fcntl.h>
#include <stdio.h>
+#include <inttypes.h>
static inline struct radeon_bo *radeon_bo(struct pb_buffer *bo)
{
@@ -297,8 +298,8 @@ void radeon_bo_destroy(struct pb_buffer *_buf)
sizeof(va)) != 0 &&
va.operation == RADEON_VA_RESULT_ERROR) {
fprintf(stderr, "radeon: Failed to deallocate virtual address for buffer:\n");
- fprintf(stderr, "radeon: size : %d bytes\n", bo->base.size);
- fprintf(stderr, "radeon: va : 0x%016llx\n", (unsigned long long)bo->va);
+ fprintf(stderr, "radeon: size : %"PRIu64" bytes\n", bo->base.size);
+ fprintf(stderr, "radeon: va : 0x%"PRIx64"\n", bo->va);
}
}
@@ -529,10 +530,10 @@ static struct radeon_bo *radeon_create_bo(struct radeon_drm_winsys *rws,
if (drmCommandWriteRead(rws->fd, DRM_RADEON_GEM_CREATE,
&args, sizeof(args))) {
fprintf(stderr, "radeon: Failed to allocate a buffer:\n");
- fprintf(stderr, "radeon: size : %d bytes\n", size);
- fprintf(stderr, "radeon: alignment : %d bytes\n", alignment);
- fprintf(stderr, "radeon: domains : %d\n", args.initial_domain);
- fprintf(stderr, "radeon: flags : %d\n", args.flags);
+ fprintf(stderr, "radeon: size : %u bytes\n", size);
+ fprintf(stderr, "radeon: alignment : %u bytes\n", alignment);
+ fprintf(stderr, "radeon: domains : %u\n", args.initial_domain);
+ fprintf(stderr, "radeon: flags : %u\n", args.flags);
return NULL;
}
@@ -717,7 +718,7 @@ static void radeon_bo_set_metadata(struct pb_buffer *_buf,
static struct pb_buffer *
radeon_winsys_bo_create(struct radeon_winsys *rws,
- unsigned size,
+ uint64_t size,
unsigned alignment,
boolean use_reusable_pool,
enum radeon_bo_domain domain,
@@ -727,6 +728,10 @@ radeon_winsys_bo_create(struct radeon_winsys *rws,
struct radeon_bo *bo;
unsigned usage = 0;
+ /* Only 32-bit sizes are supported. */
+ if (size > UINT_MAX)
+ return NULL;
+
/* Align size to page size. This is the minimum alignment for normal
* BOs. Aligning this here helps the cached bufmgr. Especially small BOs,
* like constant/uniform buffers, can benefit from better and more reuse.
@@ -768,7 +773,7 @@ radeon_winsys_bo_create(struct radeon_winsys *rws,
}
static struct pb_buffer *radeon_winsys_bo_from_ptr(struct radeon_winsys *rws,
- void *pointer, unsigned size)
+ void *pointer, uint64_t size)
{
struct radeon_drm_winsys *ws = radeon_drm_winsys(rws);
struct drm_radeon_gem_userptr args;
diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c
index 1accc6a1863..2d9ec8cee09 100644
--- a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c
+++ b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c
@@ -297,6 +297,30 @@ static boolean do_winsys_init(struct radeon_drm_winsys *ws)
break;
}
+ /* Set which chips don't have dedicated VRAM. */
+ switch (ws->info.family) {
+ case CHIP_RS400:
+ case CHIP_RC410:
+ case CHIP_RS480:
+ case CHIP_RS600:
+ case CHIP_RS690:
+ case CHIP_RS740:
+ case CHIP_RS780:
+ case CHIP_RS880:
+ case CHIP_PALM:
+ case CHIP_SUMO:
+ case CHIP_SUMO2:
+ case CHIP_ARUBA:
+ case CHIP_KAVERI:
+ case CHIP_KABINI:
+ case CHIP_MULLINS:
+ ws->info.has_dedicated_vram = false;
+ break;
+
+ default:
+ ws->info.has_dedicated_vram = true;
+ }
+
/* Check for dma */
ws->info.has_sdma = FALSE;
/* DMA is disabled on R700. There is IB corruption and hangs. */
diff --git a/src/gallium/winsys/svga/drm/pb_buffer_simple_fenced.c b/src/gallium/winsys/svga/drm/pb_buffer_simple_fenced.c
index c1b9eb95c52..d049d1dbc46 100644
--- a/src/gallium/winsys/svga/drm/pb_buffer_simple_fenced.c
+++ b/src/gallium/winsys/svga/drm/pb_buffer_simple_fenced.c
@@ -40,6 +40,7 @@
#include <unistd.h>
#include <sched.h>
#endif
+#include <inttypes.h>
#include "pipe/p_compiler.h"
#include "pipe/p_defines.h"
@@ -172,7 +173,7 @@ fenced_manager_dump_locked(struct fenced_manager *fenced_mgr)
while(curr != &fenced_mgr->unfenced) {
fenced_buf = LIST_ENTRY(struct fenced_buffer, curr, head);
assert(!fenced_buf->fence);
- debug_printf("%10p %7u %8u %7s\n",
+ debug_printf("%10p %"PRIu64" %8u %7s\n",
(void *) fenced_buf,
fenced_buf->base.size,
p_atomic_read(&fenced_buf->base.reference.count),
@@ -188,7 +189,7 @@ fenced_manager_dump_locked(struct fenced_manager *fenced_mgr)
fenced_buf = LIST_ENTRY(struct fenced_buffer, curr, head);
assert(fenced_buf->buffer);
signaled = ops->fence_signalled(ops, fenced_buf->fence, 0);
- debug_printf("%10p %7u %8u %7s %10p %s\n",
+ debug_printf("%10p %"PRIu64" %8u %7s %10p %s\n",
(void *) fenced_buf,
fenced_buf->base.size,
p_atomic_read(&fenced_buf->base.reference.count),
diff --git a/src/gallium/winsys/svga/drm/vmw_buffer.c b/src/gallium/winsys/svga/drm/vmw_buffer.c
index c082dcc34e9..3ac80c7caf5 100644
--- a/src/gallium/winsys/svga/drm/vmw_buffer.c
+++ b/src/gallium/winsys/svga/drm/vmw_buffer.c
@@ -154,7 +154,7 @@ vmw_gmr_buffer_unmap(struct pb_buffer *_buf)
static void
vmw_gmr_buffer_get_base_buffer(struct pb_buffer *buf,
struct pb_buffer **base_buf,
- unsigned *offset)
+ pb_size *offset)
{
*base_buf = buf;
*offset = 0;
@@ -266,7 +266,7 @@ vmw_gmr_bufmgr_region_ptr(struct pb_buffer *buf,
struct SVGAGuestPtr *ptr)
{
struct pb_buffer *base_buf;
- unsigned offset = 0;
+ pb_size offset = 0;
struct vmw_gmr_buffer *gmr_buf;
pb_get_base_buffer( buf, &base_buf, &offset );