summaryrefslogtreecommitdiffstats
path: root/src/gallium/drivers
diff options
context:
space:
mode:
Diffstat (limited to 'src/gallium/drivers')
-rw-r--r--src/gallium/drivers/freedreno/Makefile.am5
-rw-r--r--src/gallium/drivers/freedreno/Makefile.sources5
-rw-r--r--src/gallium/drivers/freedreno/a2xx/fd2_compiler.c22
-rw-r--r--src/gallium/drivers/freedreno/a3xx/fd3_context.h3
-rw-r--r--src/gallium/drivers/freedreno/a3xx/fd3_draw.c11
-rw-r--r--src/gallium/drivers/freedreno/a3xx/fd3_program.c9
-rw-r--r--src/gallium/drivers/freedreno/a3xx/fd3_screen.c5
-rw-r--r--src/gallium/drivers/freedreno/a3xx/fd3_texture.c35
-rw-r--r--src/gallium/drivers/freedreno/a4xx/fd4_context.h3
-rw-r--r--src/gallium/drivers/freedreno/a4xx/fd4_draw.c8
-rw-r--r--src/gallium/drivers/freedreno/a4xx/fd4_screen.c5
-rw-r--r--src/gallium/drivers/freedreno/a4xx/fd4_texture.c34
-rw-r--r--src/gallium/drivers/freedreno/freedreno_context.h2
-rw-r--r--src/gallium/drivers/freedreno/freedreno_fence.c4
-rw-r--r--src/gallium/drivers/freedreno/freedreno_gmem.c9
-rw-r--r--src/gallium/drivers/freedreno/freedreno_screen.c6
-rw-r--r--src/gallium/drivers/freedreno/freedreno_screen.h4
-rw-r--r--src/gallium/drivers/freedreno/freedreno_util.h7
-rw-r--r--src/gallium/drivers/freedreno/ir3/disasm-a3xx.c8
-rw-r--r--src/gallium/drivers/freedreno/ir3/instr-a3xx.h19
-rw-r--r--src/gallium/drivers/freedreno/ir3/ir3.c132
-rw-r--r--src/gallium/drivers/freedreno/ir3/ir3.h192
-rw-r--r--src/gallium/drivers/freedreno/ir3/ir3_cmdline.c67
-rw-r--r--src/gallium/drivers/freedreno/ir3/ir3_compiler.c3709
-rw-r--r--src/gallium/drivers/freedreno/ir3/ir3_compiler.h15
-rw-r--r--src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c655
-rw-r--r--src/gallium/drivers/freedreno/ir3/ir3_cp.c38
-rw-r--r--src/gallium/drivers/freedreno/ir3/ir3_depth.c91
-rw-r--r--src/gallium/drivers/freedreno/ir3/ir3_dump.c456
-rw-r--r--src/gallium/drivers/freedreno/ir3/ir3_flatten.c152
-rw-r--r--src/gallium/drivers/freedreno/ir3/ir3_group.c114
-rw-r--r--src/gallium/drivers/freedreno/ir3/ir3_legalize.c253
-rw-r--r--src/gallium/drivers/freedreno/ir3/ir3_nir_lower_if_else.c7
-rw-r--r--src/gallium/drivers/freedreno/ir3/ir3_print.c237
-rw-r--r--src/gallium/drivers/freedreno/ir3/ir3_ra.c1164
-rw-r--r--src/gallium/drivers/freedreno/ir3/ir3_sched.c590
-rw-r--r--src/gallium/drivers/freedreno/ir3/ir3_shader.c38
-rw-r--r--src/gallium/drivers/freedreno/ir3/ir3_shader.h9
-rw-r--r--src/gallium/drivers/i915/i915_fpc_optimize.c4
-rw-r--r--src/gallium/drivers/i915/i915_fpc_translate.c2
-rw-r--r--src/gallium/drivers/i915/i915_screen.c2
-rw-r--r--src/gallium/drivers/ilo/Makefile.sources34
-rw-r--r--src/gallium/drivers/ilo/core/ilo_buffer.h34
-rw-r--r--src/gallium/drivers/ilo/core/ilo_builder.c2
-rw-r--r--src/gallium/drivers/ilo/core/ilo_builder_3d.h58
-rw-r--r--src/gallium/drivers/ilo/core/ilo_builder_3d_bottom.h1301
-rw-r--r--src/gallium/drivers/ilo/core/ilo_builder_3d_top.h1259
-rw-r--r--src/gallium/drivers/ilo/core/ilo_builder_decode.c8
-rw-r--r--src/gallium/drivers/ilo/core/ilo_builder_media.h106
-rw-r--r--src/gallium/drivers/ilo/core/ilo_core.h3
-rw-r--r--src/gallium/drivers/ilo/core/ilo_debug.h17
-rw-r--r--src/gallium/drivers/ilo/core/ilo_dev.c11
-rw-r--r--src/gallium/drivers/ilo/core/ilo_dev.h3
-rw-r--r--src/gallium/drivers/ilo/core/ilo_fence.h73
-rw-r--r--src/gallium/drivers/ilo/core/ilo_format.c755
-rw-r--r--src/gallium/drivers/ilo/core/ilo_image.c33
-rw-r--r--src/gallium/drivers/ilo/core/ilo_image.h34
-rw-r--r--src/gallium/drivers/ilo/core/ilo_state_3d.h427
-rw-r--r--src/gallium/drivers/ilo/core/ilo_state_3d_bottom.c2222
-rw-r--r--src/gallium/drivers/ilo/core/ilo_state_3d_top.c1716
-rw-r--r--src/gallium/drivers/ilo/core/ilo_state_cc.c890
-rw-r--r--src/gallium/drivers/ilo/core/ilo_state_cc.h199
-rw-r--r--src/gallium/drivers/ilo/core/ilo_state_compute.c435
-rw-r--r--src/gallium/drivers/ilo/core/ilo_state_compute.h92
-rw-r--r--src/gallium/drivers/ilo/core/ilo_state_raster.c1252
-rw-r--r--src/gallium/drivers/ilo/core/ilo_state_raster.h301
-rw-r--r--src/gallium/drivers/ilo/core/ilo_state_sampler.c742
-rw-r--r--src/gallium/drivers/ilo/core/ilo_state_sampler.h103
-rw-r--r--src/gallium/drivers/ilo/core/ilo_state_sbe.c350
-rw-r--r--src/gallium/drivers/ilo/core/ilo_state_sbe.h103
-rw-r--r--src/gallium/drivers/ilo/core/ilo_state_shader.c737
-rw-r--r--src/gallium/drivers/ilo/core/ilo_state_shader.h256
-rw-r--r--src/gallium/drivers/ilo/core/ilo_state_shader_ps.c771
-rw-r--r--src/gallium/drivers/ilo/core/ilo_state_sol.c464
-rw-r--r--src/gallium/drivers/ilo/core/ilo_state_sol.h166
-rw-r--r--src/gallium/drivers/ilo/core/ilo_state_surface.c1179
-rw-r--r--src/gallium/drivers/ilo/core/ilo_state_surface.h121
-rw-r--r--src/gallium/drivers/ilo/core/ilo_state_surface_format.c351
-rw-r--r--src/gallium/drivers/ilo/core/ilo_state_urb.c769
-rw-r--r--src/gallium/drivers/ilo/core/ilo_state_urb.h103
-rw-r--r--src/gallium/drivers/ilo/core/ilo_state_vf.c984
-rw-r--r--src/gallium/drivers/ilo/core/ilo_state_vf.h228
-rw-r--r--src/gallium/drivers/ilo/core/ilo_state_viewport.c378
-rw-r--r--src/gallium/drivers/ilo/core/ilo_state_viewport.h132
-rw-r--r--src/gallium/drivers/ilo/core/ilo_state_zs.c727
-rw-r--r--src/gallium/drivers/ilo/core/ilo_state_zs.h93
-rw-r--r--src/gallium/drivers/ilo/genhw/gen_mi.xml.h3
-rw-r--r--src/gallium/drivers/ilo/genhw/gen_regs.xml.h2
-rw-r--r--src/gallium/drivers/ilo/genhw/gen_render_3d.xml.h316
-rw-r--r--src/gallium/drivers/ilo/genhw/gen_render_dynamic.xml.h80
-rw-r--r--src/gallium/drivers/ilo/genhw/gen_render_surface.xml.h7
-rw-r--r--src/gallium/drivers/ilo/genhw/genhw.h7
-rw-r--r--src/gallium/drivers/ilo/ilo_blitter.h38
-rw-r--r--src/gallium/drivers/ilo/ilo_blitter_pipe.c2
-rw-r--r--src/gallium/drivers/ilo/ilo_blitter_rectlist.c157
-rw-r--r--src/gallium/drivers/ilo/ilo_draw.c20
-rw-r--r--src/gallium/drivers/ilo/ilo_format.c356
-rw-r--r--src/gallium/drivers/ilo/ilo_format.h (renamed from src/gallium/drivers/ilo/core/ilo_format.h)4
-rw-r--r--src/gallium/drivers/ilo/ilo_render.c169
-rw-r--r--src/gallium/drivers/ilo/ilo_render.h3
-rw-r--r--src/gallium/drivers/ilo/ilo_render_dynamic.c179
-rw-r--r--src/gallium/drivers/ilo/ilo_render_gen.h29
-rw-r--r--src/gallium/drivers/ilo/ilo_render_gen6.c282
-rw-r--r--src/gallium/drivers/ilo/ilo_render_gen7.c334
-rw-r--r--src/gallium/drivers/ilo/ilo_render_gen8.c155
-rw-r--r--src/gallium/drivers/ilo/ilo_render_media.c3
-rw-r--r--src/gallium/drivers/ilo/ilo_render_surface.c118
-rw-r--r--src/gallium/drivers/ilo/ilo_resource.c54
-rw-r--r--src/gallium/drivers/ilo/ilo_screen.c27
-rw-r--r--src/gallium/drivers/ilo/ilo_shader.c479
-rw-r--r--src/gallium/drivers/ilo/ilo_shader.h42
-rw-r--r--src/gallium/drivers/ilo/ilo_state.c1449
-rw-r--r--src/gallium/drivers/ilo/ilo_state.h217
-rw-r--r--src/gallium/drivers/ilo/shader/ilo_shader_internal.h28
-rw-r--r--src/gallium/drivers/ilo/shader/toy_tgsi.c3
-rw-r--r--src/gallium/drivers/llvmpipe/lp_bld_depth.c4
-rw-r--r--src/gallium/drivers/llvmpipe/lp_public.h8
-rw-r--r--src/gallium/drivers/llvmpipe/lp_query.c2
-rw-r--r--src/gallium/drivers/llvmpipe/lp_screen.c6
-rw-r--r--src/gallium/drivers/llvmpipe/lp_setup.c15
-rw-r--r--src/gallium/drivers/llvmpipe/lp_state_fs.c25
-rw-r--r--src/gallium/drivers/llvmpipe/lp_state_sampler.c40
-rw-r--r--src/gallium/drivers/llvmpipe/lp_surface.c66
-rw-r--r--src/gallium/drivers/nouveau/Android.mk4
-rw-r--r--src/gallium/drivers/nouveau/Makefile.am2
-rw-r--r--src/gallium/drivers/nouveau/codegen/lib/gk110.asm18
-rw-r--r--src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp14
-rw-r--r--src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp8
-rw-r--r--src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp11
-rw-r--r--src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp25
-rw-r--r--src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_gm107.cpp1
-rw-r--r--src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nv50.cpp2
-rw-r--r--src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp22
-rw-r--r--src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp130
-rw-r--r--src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp7
-rw-r--r--src/gallium/drivers/nouveau/nouveau_buffer.c6
-rw-r--r--src/gallium/drivers/nouveau/nouveau_heap.h20
-rw-r--r--src/gallium/drivers/nouveau/nouveau_screen.c10
-rw-r--r--src/gallium/drivers/nouveau/nouveau_screen.h4
-rw-r--r--src/gallium/drivers/nouveau/nv30/nv30_clear.c2
-rw-r--r--src/gallium/drivers/nouveau/nv30/nv30_context.h2
-rw-r--r--src/gallium/drivers/nouveau/nv30/nv30_draw.c74
-rw-r--r--src/gallium/drivers/nouveau/nv30/nv30_fragprog.c5
-rw-r--r--src/gallium/drivers/nouveau/nv30/nv30_screen.c5
-rw-r--r--src/gallium/drivers/nouveau/nv30/nv30_state_validate.c26
-rw-r--r--src/gallium/drivers/nouveau/nv30/nv30_vbo.c2
-rw-r--r--src/gallium/drivers/nouveau/nv30/nv30_vertprog.c5
-rw-r--r--src/gallium/drivers/nouveau/nv30/nvfx_fragprog.c19
-rw-r--r--src/gallium/drivers/nouveau/nv30/nvfx_vertprog.c9
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_context.c11
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_context.h29
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_miptree.c4
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_query.c51
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_screen.c2
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_screen.h24
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_state.c4
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_state_validate.c2
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_vbo.c1
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_compute.c2
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_context.c11
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_context.h24
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_miptree.c2
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_program.c21
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_query.c90
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_screen.c19
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_screen.h25
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_shader_state.c9
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_state.c22
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c4
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_surface.c6
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_tex.c2
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c1
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nve4_compute.c2
-rw-r--r--src/gallium/drivers/r300/r300_screen.c3
-rw-r--r--src/gallium/drivers/r300/r300_tgsi_to_rc.c8
-rw-r--r--src/gallium/drivers/r600/Android.mk4
-rw-r--r--src/gallium/drivers/r600/r600_pipe.c2
-rw-r--r--src/gallium/drivers/r600/r600_shader.c177
-rw-r--r--src/gallium/drivers/r600/r600_state_common.c31
-rw-r--r--src/gallium/drivers/radeon/Android.mk4
-rw-r--r--src/gallium/drivers/radeon/Makefile.sources1
-rw-r--r--src/gallium/drivers/radeon/r600_pipe_common.c5
-rw-r--r--src/gallium/drivers/radeon/radeon_llvm.h4
-rw-r--r--src/gallium/drivers/radeon/radeon_llvm_emit.c8
-rw-r--r--src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c56
-rw-r--r--src/gallium/drivers/radeon/radeon_vce.c60
-rw-r--r--src/gallium/drivers/radeon/radeon_vce.h10
-rw-r--r--src/gallium/drivers/radeon/radeon_vce_40_2_2.c32
-rw-r--r--src/gallium/drivers/radeon/radeon_vce_50.c228
-rw-r--r--src/gallium/drivers/radeonsi/Makefile.sources1
-rw-r--r--src/gallium/drivers/radeonsi/cik_sdma.c364
-rw-r--r--src/gallium/drivers/radeonsi/si_dma.c20
-rw-r--r--src/gallium/drivers/radeonsi/si_pipe.c4
-rw-r--r--src/gallium/drivers/radeonsi/si_pipe.h9
-rw-r--r--src/gallium/drivers/radeonsi/si_shader.c18
-rw-r--r--src/gallium/drivers/radeonsi/si_state.c42
-rw-r--r--src/gallium/drivers/radeonsi/si_state.h1
-rw-r--r--src/gallium/drivers/radeonsi/si_state_shaders.c65
-rw-r--r--src/gallium/drivers/radeonsi/sid.h31
-rw-r--r--src/gallium/drivers/rbug/rbug_public.h8
-rw-r--r--src/gallium/drivers/softpipe/sp_public.h8
-rw-r--r--src/gallium/drivers/softpipe/sp_query.c2
-rw-r--r--src/gallium/drivers/softpipe/sp_screen.c9
-rw-r--r--src/gallium/drivers/softpipe/sp_state_sampler.c12
-rw-r--r--src/gallium/drivers/softpipe/sp_tex_sample.c1228
-rw-r--r--src/gallium/drivers/softpipe/sp_tex_sample.h28
-rw-r--r--src/gallium/drivers/softpipe/sp_tex_tile_cache.c11
-rw-r--r--src/gallium/drivers/softpipe/sp_tex_tile_cache.h4
-rw-r--r--src/gallium/drivers/svga/svga_screen.c3
-rw-r--r--src/gallium/drivers/svga/svga_tgsi_insn.c2
-rw-r--r--src/gallium/drivers/trace/tr_context.c26
-rw-r--r--src/gallium/drivers/trace/tr_dump_state.c2
-rw-r--r--src/gallium/drivers/trace/tr_public.h2
-rw-r--r--src/gallium/drivers/vc4/Android.mk (renamed from src/gallium/drivers/vc4/kernel/Makefile.am)39
-rw-r--r--src/gallium/drivers/vc4/Makefile.am4
-rw-r--r--src/gallium/drivers/vc4/Makefile.sources7
-rw-r--r--src/gallium/drivers/vc4/kernel/Makefile.sources6
-rw-r--r--src/gallium/drivers/vc4/kernel/vc4_drv.h37
-rw-r--r--src/gallium/drivers/vc4/kernel/vc4_gem.c73
-rw-r--r--src/gallium/drivers/vc4/kernel/vc4_packet.h (renamed from src/gallium/drivers/vc4/vc4_packet.h)107
-rw-r--r--src/gallium/drivers/vc4/kernel/vc4_render_cl.c447
-rw-r--r--src/gallium/drivers/vc4/kernel/vc4_validate.c468
-rw-r--r--src/gallium/drivers/vc4/kernel/vc4_validate_shaders.c116
-rw-r--r--src/gallium/drivers/vc4/vc4_blit.c106
-rw-r--r--src/gallium/drivers/vc4/vc4_bufmgr.c177
-rw-r--r--src/gallium/drivers/vc4/vc4_bufmgr.h4
-rw-r--r--src/gallium/drivers/vc4/vc4_cl.h2
-rw-r--r--src/gallium/drivers/vc4/vc4_cl_dump.c33
-rw-r--r--src/gallium/drivers/vc4/vc4_context.c297
-rw-r--r--src/gallium/drivers/vc4/vc4_context.h20
-rw-r--r--src/gallium/drivers/vc4/vc4_draw.c54
-rw-r--r--src/gallium/drivers/vc4/vc4_drm.h40
-rw-r--r--src/gallium/drivers/vc4/vc4_job.c97
-rw-r--r--src/gallium/drivers/vc4/vc4_opt_algebraic.c5
-rw-r--r--src/gallium/drivers/vc4/vc4_opt_constant_folding.c4
-rw-r--r--src/gallium/drivers/vc4/vc4_opt_copy_propagation.c5
-rw-r--r--src/gallium/drivers/vc4/vc4_opt_cse.c5
-rw-r--r--src/gallium/drivers/vc4/vc4_opt_dead_code.c2
-rw-r--r--src/gallium/drivers/vc4/vc4_opt_small_immediates.c5
-rw-r--r--src/gallium/drivers/vc4/vc4_opt_vpm_writes.c8
-rw-r--r--src/gallium/drivers/vc4/vc4_program.c17
-rw-r--r--src/gallium/drivers/vc4/vc4_qir.c18
-rw-r--r--src/gallium/drivers/vc4/vc4_qir.h10
-rw-r--r--src/gallium/drivers/vc4/vc4_qir_lower_uniforms.c9
-rw-r--r--src/gallium/drivers/vc4/vc4_qpu_emit.c19
-rw-r--r--src/gallium/drivers/vc4/vc4_qpu_schedule.c64
-rw-r--r--src/gallium/drivers/vc4/vc4_query.c3
-rw-r--r--src/gallium/drivers/vc4/vc4_register_allocate.c9
-rw-r--r--src/gallium/drivers/vc4/vc4_reorder_uniforms.c4
-rw-r--r--src/gallium/drivers/vc4/vc4_resource.c39
-rw-r--r--src/gallium/drivers/vc4/vc4_resource.h9
-rw-r--r--src/gallium/drivers/vc4/vc4_screen.c4
-rw-r--r--src/gallium/drivers/vc4/vc4_screen.h12
-rw-r--r--src/gallium/drivers/vc4/vc4_simulator.c17
-rw-r--r--src/gallium/drivers/vc4/vc4_simulator_validate.h22
-rw-r--r--src/gallium/drivers/vc4/vc4_state.c23
256 files changed, 22026 insertions, 16637 deletions
diff --git a/src/gallium/drivers/freedreno/Makefile.am b/src/gallium/drivers/freedreno/Makefile.am
index 4b2629f77bd..cbf62c6daae 100644
--- a/src/gallium/drivers/freedreno/Makefile.am
+++ b/src/gallium/drivers/freedreno/Makefile.am
@@ -21,15 +21,16 @@ libfreedreno_la_SOURCES = \
noinst_PROGRAMS = ir3_compiler
+# XXX: Required due to the C++ sources in libnir/libglsl_util
+nodist_EXTRA_ir3_compiler_SOURCES = dummy.cpp
ir3_compiler_SOURCES = \
ir3/ir3_cmdline.c
ir3_compiler_LDADD = \
libfreedreno.la \
- ../../auxiliary/libgallium.la \
+ $(top_builddir)/src/gallium/auxiliary/libgallium.la \
$(top_builddir)/src/glsl/libnir.la \
$(top_builddir)/src/libglsl_util.la \
- -lstdc++ \
$(top_builddir)/src/util/libmesautil.la \
$(GALLIUM_COMMON_LIB_DEPS) \
$(FREEDRENO_LIBS)
diff --git a/src/gallium/drivers/freedreno/Makefile.sources b/src/gallium/drivers/freedreno/Makefile.sources
index a565a9c4e4d..baae9144005 100644
--- a/src/gallium/drivers/freedreno/Makefile.sources
+++ b/src/gallium/drivers/freedreno/Makefile.sources
@@ -120,18 +120,17 @@ ir3_SOURCES := \
ir3/disasm-a3xx.c \
ir3/instr-a3xx.h \
ir3/ir3.c \
- ir3/ir3_compiler.c \
ir3/ir3_compiler_nir.c \
+ ir3/ir3_compiler.c \
ir3/ir3_compiler.h \
ir3/ir3_cp.c \
ir3/ir3_depth.c \
- ir3/ir3_dump.c \
- ir3/ir3_flatten.c \
ir3/ir3_group.c \
ir3/ir3.h \
ir3/ir3_legalize.c \
ir3/ir3_nir.h \
ir3/ir3_nir_lower_if_else.c \
+ ir3/ir3_print.c \
ir3/ir3_ra.c \
ir3/ir3_sched.c \
ir3/ir3_shader.c \
diff --git a/src/gallium/drivers/freedreno/a2xx/fd2_compiler.c b/src/gallium/drivers/freedreno/a2xx/fd2_compiler.c
index e4acc7e95b4..b48fb4659cd 100644
--- a/src/gallium/drivers/freedreno/a2xx/fd2_compiler.c
+++ b/src/gallium/drivers/freedreno/a2xx/fd2_compiler.c
@@ -414,32 +414,16 @@ add_src_reg(struct fd2_compile_context *ctx, struct ir2_instruction *alu,
static void
add_vector_clamp(struct tgsi_full_instruction *inst, struct ir2_instruction *alu)
{
- switch (inst->Instruction.Saturate) {
- case TGSI_SAT_NONE:
- break;
- case TGSI_SAT_ZERO_ONE:
+ if (inst->Instruction.Saturate) {
alu->alu.vector_clamp = true;
- break;
- case TGSI_SAT_MINUS_PLUS_ONE:
- DBG("unsupported saturate");
- assert(0);
- break;
}
}
static void
add_scalar_clamp(struct tgsi_full_instruction *inst, struct ir2_instruction *alu)
{
- switch (inst->Instruction.Saturate) {
- case TGSI_SAT_NONE:
- break;
- case TGSI_SAT_ZERO_ONE:
+ if (inst->Instruction.Saturate) {
alu->alu.scalar_clamp = true;
- break;
- case TGSI_SAT_MINUS_PLUS_ONE:
- DBG("unsupported saturate");
- assert(0);
- break;
}
}
@@ -758,7 +742,7 @@ translate_tex(struct fd2_compile_context *ctx,
struct tgsi_src_register tmp_src;
const struct tgsi_src_register *coord;
bool using_temp = (inst->Dst[0].Register.File == TGSI_FILE_OUTPUT) ||
- (inst->Instruction.Saturate != TGSI_SAT_NONE);
+ inst->Instruction.Saturate;
int idx;
if (using_temp || (opc == TGSI_OPCODE_TXP))
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_context.h b/src/gallium/drivers/freedreno/a3xx/fd3_context.h
index 4e3f521716e..77e4605e550 100644
--- a/src/gallium/drivers/freedreno/a3xx/fd3_context.h
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_context.h
@@ -105,9 +105,6 @@ struct fd3_context {
*/
unsigned fsaturate_s, fsaturate_t, fsaturate_r;
- /* bitmask of integer texture samplers */
- uint16_t vinteger_s, finteger_s;
-
/* some state changes require a different shader variant. Keep
* track of this so we know when we need to re-emit shader state
* due to variant change. See fixup_shader_state()
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_draw.c b/src/gallium/drivers/freedreno/a3xx/fd3_draw.c
index b522cf86695..b5838b58eb2 100644
--- a/src/gallium/drivers/freedreno/a3xx/fd3_draw.c
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_draw.c
@@ -104,14 +104,12 @@ fixup_shader_state(struct fd_context *ctx, struct ir3_shader_key *key)
if (last_key->has_per_samp || key->has_per_samp) {
if ((last_key->vsaturate_s != key->vsaturate_s) ||
(last_key->vsaturate_t != key->vsaturate_t) ||
- (last_key->vsaturate_r != key->vsaturate_r) ||
- (last_key->vinteger_s != key->vinteger_s))
+ (last_key->vsaturate_r != key->vsaturate_r))
ctx->prog.dirty |= FD_SHADER_DIRTY_VP;
if ((last_key->fsaturate_s != key->fsaturate_s) ||
(last_key->fsaturate_t != key->fsaturate_t) ||
- (last_key->fsaturate_r != key->fsaturate_r) ||
- (last_key->finteger_s != key->finteger_s))
+ (last_key->fsaturate_r != key->fsaturate_r))
ctx->prog.dirty |= FD_SHADER_DIRTY_FP;
}
@@ -140,16 +138,13 @@ fd3_draw_vbo(struct fd_context *ctx, const struct pipe_draw_info *info)
// TODO set .half_precision based on render target format,
// ie. float16 and smaller use half, float32 use full..
.half_precision = !!(fd_mesa_debug & FD_DBG_FRAGHALF),
- .has_per_samp = (fd3_ctx->fsaturate || fd3_ctx->vsaturate ||
- fd3_ctx->vinteger_s || fd3_ctx->finteger_s),
+ .has_per_samp = (fd3_ctx->fsaturate || fd3_ctx->vsaturate),
.vsaturate_s = fd3_ctx->vsaturate_s,
.vsaturate_t = fd3_ctx->vsaturate_t,
.vsaturate_r = fd3_ctx->vsaturate_r,
.fsaturate_s = fd3_ctx->fsaturate_s,
.fsaturate_t = fd3_ctx->fsaturate_t,
.fsaturate_r = fd3_ctx->fsaturate_r,
- .vinteger_s = fd3_ctx->vinteger_s,
- .finteger_s = fd3_ctx->finteger_s,
},
.rasterflat = ctx->rasterizer && ctx->rasterizer->flatshade,
.sprite_coord_enable = ctx->rasterizer ? ctx->rasterizer->sprite_coord_enable : 0,
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_program.c b/src/gallium/drivers/freedreno/a3xx/fd3_program.c
index a6824ef92e7..57fcaa9020e 100644
--- a/src/gallium/drivers/freedreno/a3xx/fd3_program.c
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_program.c
@@ -413,12 +413,15 @@ fd3_program_emit(struct fd_ringbuffer *ring, struct fd3_emit *emit,
}
}
- /* TODO: Figure out if there's a way to make it spit out 0's and
- * 1's for the .z and .w components.
+ /* Replace the .xy coordinates with S/T from the point sprite. Set
+ * interpolation bits for .zw such that they become .01
*/
- if (emit->sprite_coord_enable & (1 << sem2idx(fp->inputs[j].semantic)))
+ if (emit->sprite_coord_enable & (1 << sem2idx(fp->inputs[j].semantic))) {
vpsrepl[inloc / 16] |= (emit->sprite_coord_mode ? 0x0d : 0x09)
<< ((inloc % 16) * 2);
+ vinterp[(inloc + 2) / 16] |= 2 << (((inloc + 2) % 16) * 2);
+ vinterp[(inloc + 3) / 16] |= 3 << (((inloc + 3) % 16) * 2);
+ }
}
OUT_PKT0(ring, REG_A3XX_VPC_ATTR, 2);
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_screen.c b/src/gallium/drivers/freedreno/a3xx/fd3_screen.c
index 3497921257c..094dcf376e5 100644
--- a/src/gallium/drivers/freedreno/a3xx/fd3_screen.c
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_screen.c
@@ -32,6 +32,7 @@
#include "fd3_screen.h"
#include "fd3_context.h"
#include "fd3_format.h"
+#include "ir3_compiler.h"
static boolean
fd3_screen_is_format_supported(struct pipe_screen *pscreen,
@@ -103,7 +104,9 @@ fd3_screen_is_format_supported(struct pipe_screen *pscreen,
void
fd3_screen_init(struct pipe_screen *pscreen)
{
- fd_screen(pscreen)->max_rts = 4;
+ struct fd_screen *screen = fd_screen(pscreen);
+ screen->max_rts = 4;
+ screen->compiler = ir3_compiler_create(screen->gpu_id);
pscreen->context_create = fd3_context_create;
pscreen->is_format_supported = fd3_screen_is_format_supported;
}
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_texture.c b/src/gallium/drivers/freedreno/a3xx/fd3_texture.c
index 6f44ee3c08e..a278bf5c603 100644
--- a/src/gallium/drivers/freedreno/a3xx/fd3_texture.c
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_texture.c
@@ -263,44 +263,11 @@ fd3_sampler_view_create(struct pipe_context *pctx, struct pipe_resource *prsc,
return &so->base;
}
-static void
-fd3_set_sampler_views(struct pipe_context *pctx, unsigned shader,
- unsigned start, unsigned nr,
- struct pipe_sampler_view **views)
-{
- struct fd_context *ctx = fd_context(pctx);
- struct fd3_context *fd3_ctx = fd3_context(ctx);
- struct fd_texture_stateobj *tex;
- uint16_t integer_s = 0, *ptr;
- int i;
-
- fd_set_sampler_views(pctx, shader, start, nr, views);
-
- switch (shader) {
- case PIPE_SHADER_FRAGMENT:
- tex = &ctx->fragtex;
- ptr = &fd3_ctx->finteger_s;
- break;
- case PIPE_SHADER_VERTEX:
- tex = &ctx->verttex;
- ptr = &fd3_ctx->vinteger_s;
- break;
- default:
- return;
- }
-
- for (i = 0; i < tex->num_textures; i++)
- if (util_format_is_pure_integer(tex->textures[i]->format))
- integer_s |= 1 << i;
- *ptr = integer_s;
-}
-
-
void
fd3_texture_init(struct pipe_context *pctx)
{
pctx->create_sampler_state = fd3_sampler_state_create;
pctx->bind_sampler_states = fd3_sampler_states_bind;
pctx->create_sampler_view = fd3_sampler_view_create;
- pctx->set_sampler_views = fd3_set_sampler_views;
+ pctx->set_sampler_views = fd_set_sampler_views;
}
diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_context.h b/src/gallium/drivers/freedreno/a4xx/fd4_context.h
index 384602a2e4f..53e1bf6a2e6 100644
--- a/src/gallium/drivers/freedreno/a4xx/fd4_context.h
+++ b/src/gallium/drivers/freedreno/a4xx/fd4_context.h
@@ -83,9 +83,6 @@ struct fd4_context {
*/
uint16_t fsaturate_s, fsaturate_t, fsaturate_r;
- /* bitmask of integer texture samplers */
- uint16_t vinteger_s, finteger_s;
-
/* some state changes require a different shader variant. Keep
* track of this so we know when we need to re-emit shader state
* due to variant change. See fixup_shader_state()
diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_draw.c b/src/gallium/drivers/freedreno/a4xx/fd4_draw.c
index ae407f753fe..de5a306af60 100644
--- a/src/gallium/drivers/freedreno/a4xx/fd4_draw.c
+++ b/src/gallium/drivers/freedreno/a4xx/fd4_draw.c
@@ -82,8 +82,7 @@ fixup_shader_state(struct fd_context *ctx, struct ir3_shader_key *key)
if (last_key->has_per_samp || key->has_per_samp) {
if ((last_key->vsaturate_s != key->vsaturate_s) ||
(last_key->vsaturate_t != key->vsaturate_t) ||
- (last_key->vsaturate_r != key->vsaturate_r) ||
- (last_key->vinteger_s != key->vinteger_s))
+ (last_key->vsaturate_r != key->vsaturate_r))
ctx->prog.dirty |= FD_SHADER_DIRTY_VP;
if ((last_key->fsaturate_s != key->fsaturate_s) ||
@@ -122,16 +121,13 @@ fd4_draw_vbo(struct fd_context *ctx, const struct pipe_draw_info *info)
// TODO set .half_precision based on render target format,
// ie. float16 and smaller use half, float32 use full..
.half_precision = !!(fd_mesa_debug & FD_DBG_FRAGHALF),
- .has_per_samp = (fd4_ctx->fsaturate || fd4_ctx->vsaturate ||
- fd4_ctx->vinteger_s || fd4_ctx->finteger_s),
+ .has_per_samp = (fd4_ctx->fsaturate || fd4_ctx->vsaturate),
.vsaturate_s = fd4_ctx->vsaturate_s,
.vsaturate_t = fd4_ctx->vsaturate_t,
.vsaturate_r = fd4_ctx->vsaturate_r,
.fsaturate_s = fd4_ctx->fsaturate_s,
.fsaturate_t = fd4_ctx->fsaturate_t,
.fsaturate_r = fd4_ctx->fsaturate_r,
- .vinteger_s = fd4_ctx->vinteger_s,
- .finteger_s = fd4_ctx->finteger_s,
},
.format = fd4_emit_format(pfb->cbufs[0]),
.pformat = pipe_surface_format(pfb->cbufs[0]),
diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_screen.c b/src/gallium/drivers/freedreno/a4xx/fd4_screen.c
index f5b46685bdf..e8cbb2d201a 100644
--- a/src/gallium/drivers/freedreno/a4xx/fd4_screen.c
+++ b/src/gallium/drivers/freedreno/a4xx/fd4_screen.c
@@ -32,6 +32,7 @@
#include "fd4_screen.h"
#include "fd4_context.h"
#include "fd4_format.h"
+#include "ir3_compiler.h"
static boolean
fd4_screen_is_format_supported(struct pipe_screen *pscreen,
@@ -100,7 +101,9 @@ fd4_screen_is_format_supported(struct pipe_screen *pscreen,
void
fd4_screen_init(struct pipe_screen *pscreen)
{
- fd_screen(pscreen)->max_rts = 1;
+ struct fd_screen *screen = fd_screen(pscreen);
+ screen->max_rts = 1;
+ screen->compiler = ir3_compiler_create(screen->gpu_id);
pscreen->context_create = fd4_context_create;
pscreen->is_format_supported = fd4_screen_is_format_supported;
}
diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_texture.c b/src/gallium/drivers/freedreno/a4xx/fd4_texture.c
index ff1ff8f0d34..6ba25d0816d 100644
--- a/src/gallium/drivers/freedreno/a4xx/fd4_texture.c
+++ b/src/gallium/drivers/freedreno/a4xx/fd4_texture.c
@@ -205,43 +205,11 @@ fd4_sampler_view_create(struct pipe_context *pctx, struct pipe_resource *prsc,
return &so->base;
}
-static void
-fd4_set_sampler_views(struct pipe_context *pctx, unsigned shader,
- unsigned start, unsigned nr, struct pipe_sampler_view **views)
-{
- struct fd_context *ctx = fd_context(pctx);
- struct fd4_context *fd4_ctx = fd4_context(ctx);
- struct fd_texture_stateobj *tex;
- uint16_t integer_s = 0, *ptr;
- int i;
-
- fd_set_sampler_views(pctx, shader, start, nr, views);
-
- switch (shader) {
- case PIPE_SHADER_FRAGMENT:
- tex = &ctx->fragtex;
- ptr = &fd4_ctx->finteger_s;
- break;
- case PIPE_SHADER_VERTEX:
- tex = &ctx->verttex;
- ptr = &fd4_ctx->vinteger_s;
- break;
- default:
- return;
- }
-
- for (i = 0; i < tex->num_textures; i++)
- if (util_format_is_pure_integer(tex->textures[i]->format))
- integer_s |= 1 << i;
-
- *ptr = integer_s;
-}
-
void
fd4_texture_init(struct pipe_context *pctx)
{
pctx->create_sampler_state = fd4_sampler_state_create;
pctx->bind_sampler_states = fd_sampler_states_bind;
pctx->create_sampler_view = fd4_sampler_view_create;
- pctx->set_sampler_views = fd4_set_sampler_views;
+ pctx->set_sampler_views = fd_set_sampler_views;
}
diff --git a/src/gallium/drivers/freedreno/freedreno_context.h b/src/gallium/drivers/freedreno/freedreno_context.h
index 2c816b4b1f6..e420f1e5bd9 100644
--- a/src/gallium/drivers/freedreno/freedreno_context.h
+++ b/src/gallium/drivers/freedreno/freedreno_context.h
@@ -297,7 +297,7 @@ struct fd_context {
*/
struct fd_gmem_stateobj gmem;
struct fd_vsc_pipe pipe[8];
- struct fd_tile tile[64];
+ struct fd_tile tile[256];
/* which state objects need to be re-emit'd: */
enum {
diff --git a/src/gallium/drivers/freedreno/freedreno_fence.c b/src/gallium/drivers/freedreno/freedreno_fence.c
index 46b057d9062..375e58f7022 100644
--- a/src/gallium/drivers/freedreno/freedreno_fence.c
+++ b/src/gallium/drivers/freedreno/freedreno_fence.c
@@ -35,6 +35,7 @@
struct pipe_fence_handle {
struct pipe_reference reference;
struct fd_context *ctx;
+ struct fd_screen *screen;
uint32_t timestamp;
};
@@ -68,7 +69,7 @@ boolean fd_screen_fence_finish(struct pipe_screen *screen,
struct pipe_fence_handle *fence,
uint64_t timeout)
{
- if (fd_pipe_wait(fence->ctx->screen->pipe, fence->timestamp))
+ if (fd_pipe_wait(fence->screen->pipe, fence->timestamp))
return false;
return true;
@@ -86,6 +87,7 @@ struct pipe_fence_handle * fd_fence_create(struct pipe_context *pctx)
pipe_reference_init(&fence->reference, 1);
fence->ctx = ctx;
+ fence->screen = ctx->screen;
fence->timestamp = fd_ringbuffer_timestamp(ctx->ring);
return fence;
diff --git a/src/gallium/drivers/freedreno/freedreno_gmem.c b/src/gallium/drivers/freedreno/freedreno_gmem.c
index 11a1b62b26b..c105378ec4e 100644
--- a/src/gallium/drivers/freedreno/freedreno_gmem.c
+++ b/src/gallium/drivers/freedreno/freedreno_gmem.c
@@ -117,6 +117,7 @@ calculate_tiles(struct fd_context *ctx)
uint32_t i, j, t, xoff, yoff;
uint32_t tpp_x, tpp_y;
bool has_zs = !!(ctx->resolve & (FD_BUFFER_DEPTH | FD_BUFFER_STENCIL));
+ int tile_n[ARRAY_SIZE(ctx->pipe)];
if (has_zs) {
struct fd_resource *rsc = fd_resource(pfb->zsbuf->texture);
@@ -247,6 +248,7 @@ calculate_tiles(struct fd_context *ctx)
/* configure tiles: */
t = 0;
yoff = miny;
+ memset(tile_n, 0, sizeof(tile_n));
for (i = 0; i < nbins_y; i++) {
uint32_t bw, bh;
@@ -257,20 +259,17 @@ calculate_tiles(struct fd_context *ctx)
for (j = 0; j < nbins_x; j++) {
struct fd_tile *tile = &ctx->tile[t];
- uint32_t n, p;
+ uint32_t p;
assert(t < ARRAY_SIZE(ctx->tile));
/* pipe number: */
p = ((i / tpp_y) * div_round_up(nbins_x, tpp_x)) + (j / tpp_x);
- /* slot number: */
- n = ((i % tpp_y) * tpp_x) + (j % tpp_x);
-
/* clip bin width: */
bw = MIN2(bin_w, minx + width - xoff);
- tile->n = n;
+ tile->n = tile_n[p]++;
tile->p = p;
tile->bin_w = bw;
tile->bin_h = bh;
diff --git a/src/gallium/drivers/freedreno/freedreno_screen.c b/src/gallium/drivers/freedreno/freedreno_screen.c
index 556c8ab18d4..b3b5462b437 100644
--- a/src/gallium/drivers/freedreno/freedreno_screen.c
+++ b/src/gallium/drivers/freedreno/freedreno_screen.c
@@ -68,10 +68,7 @@ static const struct debug_named_value debug_options[] = {
{"fraghalf", FD_DBG_FRAGHALF, "Use half-precision in fragment shader"},
{"nobin", FD_DBG_NOBIN, "Disable hw binning"},
{"optmsgs", FD_DBG_OPTMSGS,"Enable optimizer debug messages"},
- {"optdump", FD_DBG_OPTDUMP,"Dump shader DAG to .dot files"},
{"glsl120", FD_DBG_GLSL120,"Temporary flag to force GLSL 120 (rather than 130) on a3xx+"},
- {"nocp", FD_DBG_NOCP, "Disable copy-propagation"},
- {"nir", FD_DBG_NIR, "Enable experimental NIR compiler"},
DEBUG_NAMED_VALUE_END
};
@@ -220,6 +217,7 @@ fd_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
case PIPE_CAP_POLYGON_OFFSET_CLAMP:
case PIPE_CAP_MULTISAMPLE_Z_RESOLVE:
case PIPE_CAP_RESOURCE_FROM_USER_MEMORY:
+ case PIPE_CAP_DEVICE_RESET_STATUS_QUERY:
return 0;
case PIPE_CAP_MAX_VIEWPORTS:
@@ -374,6 +372,7 @@ fd_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader,
case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED:
case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED:
case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
+ case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE:
return 0;
case PIPE_SHADER_CAP_TGSI_SQRT_SUPPORTED:
return 1;
@@ -519,6 +518,7 @@ fd_screen_create(struct fd_device *dev)
case 220:
fd2_screen_init(pscreen);
break;
+ case 307:
case 320:
case 330:
fd3_screen_init(pscreen);
diff --git a/src/gallium/drivers/freedreno/freedreno_screen.h b/src/gallium/drivers/freedreno/freedreno_screen.h
index 3b470d1d8a6..dbc2808262a 100644
--- a/src/gallium/drivers/freedreno/freedreno_screen.h
+++ b/src/gallium/drivers/freedreno/freedreno_screen.h
@@ -46,7 +46,9 @@ struct fd_screen {
uint32_t device_id;
uint32_t gpu_id; /* 220, 305, etc */
uint32_t chip_id; /* coreid:8 majorrev:8 minorrev:8 patch:8 */
- uint32_t max_rts;
+ uint32_t max_rts; /* max # of render targets */
+
+ void *compiler; /* currently unused for a2xx */
struct fd_device *dev;
struct fd_pipe *pipe;
diff --git a/src/gallium/drivers/freedreno/freedreno_util.h b/src/gallium/drivers/freedreno/freedreno_util.h
index 2735ae41315..deb0e602ce2 100644
--- a/src/gallium/drivers/freedreno/freedreno_util.h
+++ b/src/gallium/drivers/freedreno/freedreno_util.h
@@ -62,11 +62,8 @@ enum adreno_stencil_op fd_stencil_op(unsigned op);
#define FD_DBG_NOBYPASS 0x0040
#define FD_DBG_FRAGHALF 0x0080
#define FD_DBG_NOBIN 0x0100
-#define FD_DBG_OPTMSGS 0x0400
-#define FD_DBG_OPTDUMP 0x0800
-#define FD_DBG_GLSL120 0x1000
-#define FD_DBG_NOCP 0x2000
-#define FD_DBG_NIR 0x4000
+#define FD_DBG_OPTMSGS 0x0200
+#define FD_DBG_GLSL120 0x0400
extern int fd_mesa_debug;
extern bool fd_binning_enabled;
diff --git a/src/gallium/drivers/freedreno/ir3/disasm-a3xx.c b/src/gallium/drivers/freedreno/ir3/disasm-a3xx.c
index a5136c6bd3d..48ae7c71b9f 100644
--- a/src/gallium/drivers/freedreno/ir3/disasm-a3xx.c
+++ b/src/gallium/drivers/freedreno/ir3/disasm-a3xx.c
@@ -133,16 +133,16 @@ static void print_instr_cat0(instr_t *instr)
break;
case OPC_BR:
printf(" %sp0.%c, #%d", cat0->inv ? "!" : "",
- component[cat0->comp], cat0->immed);
+ component[cat0->comp], cat0->a3xx.immed);
break;
case OPC_JUMP:
case OPC_CALL:
- printf(" #%d", cat0->immed);
+ printf(" #%d", cat0->a3xx.immed);
break;
}
- if ((debug & PRINT_VERBOSE) && (cat0->dummy1|cat0->dummy2|cat0->dummy3|cat0->dummy4))
- printf("\t{0: %x,%x,%x,%x}", cat0->dummy1, cat0->dummy2, cat0->dummy3, cat0->dummy4);
+ if ((debug & PRINT_VERBOSE) && (cat0->a3xx.dummy1|cat0->dummy2|cat0->dummy3|cat0->dummy4))
+ printf("\t{0: %x,%x,%x,%x}", cat0->a3xx.dummy1, cat0->dummy2, cat0->dummy3, cat0->dummy4);
}
static void print_instr_cat1(instr_t *instr)
diff --git a/src/gallium/drivers/freedreno/ir3/instr-a3xx.h b/src/gallium/drivers/freedreno/ir3/instr-a3xx.h
index cffa62b6f34..efb07ea479e 100644
--- a/src/gallium/drivers/freedreno/ir3/instr-a3xx.h
+++ b/src/gallium/drivers/freedreno/ir3/instr-a3xx.h
@@ -191,9 +191,9 @@ typedef enum {
OPC_LDLV = 31,
/* meta instructions (category -1): */
- /* placeholder instr to mark inputs/outputs: */
+ /* placeholder instr to mark shader inputs: */
OPC_META_INPUT = 0,
- OPC_META_OUTPUT = 1,
+ OPC_META_PHI = 1,
/* The "fan-in" and "fan-out" instructions are used for keeping
* track of instructions that write to multiple dst registers
* (fan-out) like texture sample instructions, or read multiple
@@ -201,9 +201,6 @@ typedef enum {
*/
OPC_META_FO = 2,
OPC_META_FI = 3,
- /* branches/flow control */
- OPC_META_FLOW = 4,
- OPC_META_PHI = 5,
} opc_t;
@@ -281,8 +278,16 @@ static inline int reg_special(reg_t reg)
typedef struct PACKED {
/* dword0: */
- int16_t immed : 16;
- uint32_t dummy1 : 16;
+ union PACKED {
+ struct PACKED {
+ int16_t immed : 16;
+ uint32_t dummy1 : 16;
+ } a3xx;
+ struct PACKED {
+ int32_t immed : 20;
+ uint32_t dummy1 : 12;
+ } a4xx;
+ };
/* dword1: */
uint32_t dummy2 : 8;
diff --git a/src/gallium/drivers/freedreno/ir3/ir3.c b/src/gallium/drivers/freedreno/ir3/ir3.c
index e015de91c33..a166b67d7cf 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3.c
+++ b/src/gallium/drivers/freedreno/ir3/ir3.c
@@ -66,11 +66,22 @@ void * ir3_alloc(struct ir3 *shader, int sz)
return ptr;
}
-struct ir3 * ir3_create(void)
+struct ir3 * ir3_create(struct ir3_compiler *compiler,
+ unsigned nin, unsigned nout)
{
- struct ir3 *shader =
- calloc(1, sizeof(struct ir3));
+ struct ir3 *shader = calloc(1, sizeof(struct ir3));
+
grow_heap(shader);
+
+ shader->compiler = compiler;
+ shader->ninputs = nin;
+ shader->inputs = ir3_alloc(shader, sizeof(shader->inputs[0]) * nin);
+
+ shader->noutputs = nout;
+ shader->outputs = ir3_alloc(shader, sizeof(shader->outputs[0]) * nout);
+
+ list_inithead(&shader->block_list);
+
return shader;
}
@@ -81,7 +92,8 @@ void ir3_destroy(struct ir3 *shader)
shader->chunk = chunk->next;
free(chunk);
}
- free(shader->instrs);
+ free(shader->indirects);
+ free(shader->predicates);
free(shader->baryfs);
free(shader);
}
@@ -142,7 +154,11 @@ static int emit_cat0(struct ir3_instruction *instr, void *ptr,
{
instr_cat0_t *cat0 = ptr;
- cat0->immed = instr->cat0.immed;
+ if (info->gpu_id >= 400) {
+ cat0->a4xx.immed = instr->cat0.immed;
+ } else {
+ cat0->a3xx.immed = instr->cat0.immed;
+ }
cat0->repeat = instr->repeat;
cat0->ss = !!(instr->flags & IR3_INSTR_SS);
cat0->inv = instr->cat0.inv;
@@ -535,32 +551,40 @@ void * ir3_assemble(struct ir3 *shader, struct ir3_info *info,
uint32_t gpu_id)
{
uint32_t *ptr, *dwords;
- uint32_t i;
+ info->gpu_id = gpu_id;
info->max_reg = -1;
info->max_half_reg = -1;
info->max_const = -1;
info->instrs_count = 0;
+ info->sizedwords = 0;
+
+ list_for_each_entry (struct ir3_block, block, &shader->block_list, node) {
+ list_for_each_entry (struct ir3_instruction, instr, &block->instr_list, node) {
+ info->sizedwords += 2;
+ }
+ }
/* need a integer number of instruction "groups" (sets of 16
* instructions on a4xx or sets of 4 instructions on a3xx),
* so pad out w/ NOPs if needed: (NOTE each instruction is 64bits)
*/
if (gpu_id >= 400) {
- info->sizedwords = 2 * align(shader->instrs_count, 16);
+ info->sizedwords = align(info->sizedwords, 16 * 2);
} else {
- info->sizedwords = 2 * align(shader->instrs_count, 4);
+ info->sizedwords = align(info->sizedwords, 4 * 2);
}
ptr = dwords = calloc(4, info->sizedwords);
- for (i = 0; i < shader->instrs_count; i++) {
- struct ir3_instruction *instr = shader->instrs[i];
- int ret = emit[instr->category](instr, dwords, info);
- if (ret)
- goto fail;
- info->instrs_count += 1 + instr->repeat;
- dwords += 2;
+ list_for_each_entry (struct ir3_block, block, &shader->block_list, node) {
+ list_for_each_entry (struct ir3_instruction, instr, &block->instr_list, node) {
+ int ret = emit[instr->category](instr, dwords, info);
+ if (ret)
+ goto fail;
+ info->instrs_count += 1 + instr->repeat;
+ dwords += 2;
+ }
}
return ptr;
@@ -581,50 +605,30 @@ static struct ir3_register * reg_create(struct ir3 *shader,
return reg;
}
-static void insert_instr(struct ir3 *shader,
+static void insert_instr(struct ir3_block *block,
struct ir3_instruction *instr)
{
+ struct ir3 *shader = block->shader;
#ifdef DEBUG
static uint32_t serialno = 0;
instr->serialno = ++serialno;
#endif
- array_insert(shader->instrs, instr);
+ list_addtail(&instr->node, &block->instr_list);
if (is_input(instr))
array_insert(shader->baryfs, instr);
}
-struct ir3_block * ir3_block_create(struct ir3 *shader,
- unsigned ntmp, unsigned nin, unsigned nout)
+struct ir3_block * ir3_block_create(struct ir3 *shader)
{
- struct ir3_block *block;
- unsigned size;
- char *ptr;
-
- size = sizeof(*block);
- size += sizeof(block->temporaries[0]) * ntmp;
- size += sizeof(block->inputs[0]) * nin;
- size += sizeof(block->outputs[0]) * nout;
-
- ptr = ir3_alloc(shader, size);
-
- block = (void *)ptr;
- ptr += sizeof(*block);
-
- block->temporaries = (void *)ptr;
- block->ntemporaries = ntmp;
- ptr += sizeof(block->temporaries[0]) * ntmp;
-
- block->inputs = (void *)ptr;
- block->ninputs = nin;
- ptr += sizeof(block->inputs[0]) * nin;
-
- block->outputs = (void *)ptr;
- block->noutputs = nout;
- ptr += sizeof(block->outputs[0]) * nout;
-
+ struct ir3_block *block = ir3_alloc(shader, sizeof(*block));
+#ifdef DEBUG
+ static uint32_t serialno = 0;
+ block->serialno = ++serialno;
+#endif
block->shader = shader;
-
+ list_inithead(&block->node);
+ list_inithead(&block->instr_list);
return block;
}
@@ -652,7 +656,7 @@ struct ir3_instruction * ir3_instr_create2(struct ir3_block *block,
instr->block = block;
instr->category = category;
instr->opc = opc;
- insert_instr(block->shader, instr);
+ insert_instr(block, instr);
return instr;
}
@@ -677,7 +681,7 @@ struct ir3_instruction * ir3_instr_clone(struct ir3_instruction *instr)
*new_instr = *instr;
new_instr->regs = regs;
- insert_instr(instr->block->shader, new_instr);
+ insert_instr(instr->block, new_instr);
/* clone registers: */
new_instr->regs_count = 0;
@@ -694,10 +698,40 @@ struct ir3_instruction * ir3_instr_clone(struct ir3_instruction *instr)
struct ir3_register * ir3_reg_create(struct ir3_instruction *instr,
int num, int flags)
{
- struct ir3_register *reg = reg_create(instr->block->shader, num, flags);
+ struct ir3 *shader = instr->block->shader;
+ struct ir3_register *reg = reg_create(shader, num, flags);
#ifdef DEBUG
debug_assert(instr->regs_count < instr->regs_max);
#endif
instr->regs[instr->regs_count++] = reg;
return reg;
}
+
+void
+ir3_block_clear_mark(struct ir3_block *block)
+{
+ list_for_each_entry (struct ir3_instruction, instr, &block->instr_list, node)
+ instr->flags &= ~IR3_INSTR_MARK;
+}
+
+void
+ir3_clear_mark(struct ir3 *ir)
+{
+ list_for_each_entry (struct ir3_block, block, &ir->block_list, node) {
+ ir3_block_clear_mark(block);
+ }
+}
+
+/* note: this will destroy instr->depth, don't do it until after sched! */
+void
+ir3_count_instructions(struct ir3 *ir)
+{
+ unsigned ip = 0;
+ list_for_each_entry (struct ir3_block, block, &ir->block_list, node) {
+ list_for_each_entry (struct ir3_instruction, instr, &block->instr_list, node) {
+ instr->ip = ip++;
+ }
+ block->start_ip = list_first_entry(&block->instr_list, struct ir3_instruction, node)->ip;
+ block->end_ip = list_last_entry(&block->instr_list, struct ir3_instruction, node)->ip;
+ }
+}
diff --git a/src/gallium/drivers/freedreno/ir3/ir3.h b/src/gallium/drivers/freedreno/ir3/ir3.h
index c0a14a07d48..9c35a763d58 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3.h
+++ b/src/gallium/drivers/freedreno/ir3/ir3.h
@@ -28,17 +28,20 @@
#include <stdbool.h>
#include "util/u_debug.h"
+#include "util/list.h"
#include "instr-a3xx.h"
#include "disasm.h" /* TODO move 'enum shader_t' somewhere else.. */
/* low level intermediate representation of an adreno shader program */
+struct ir3_compiler;
struct ir3;
struct ir3_instruction;
struct ir3_block;
struct ir3_info {
+ uint32_t gpu_id;
uint16_t sizedwords;
uint16_t instrs_count; /* expanded to account for rpt's */
/* NOTE: max_reg, etc, does not include registers not touched
@@ -80,8 +83,8 @@ struct ir3_register {
* before register assignment is done:
*/
IR3_REG_SSA = 0x2000, /* 'instr' is ptr to assigning instr */
- IR3_REG_IA = 0x4000, /* meta-input dst is "assigned" */
- IR3_REG_ADDR = 0x8000, /* register is a0.x */
+ IR3_REG_PHI_SRC= 0x4000, /* phi src, regs[0]->instr points to phi */
+
} flags;
union {
/* normal registers:
@@ -185,6 +188,7 @@ struct ir3_instruction {
char inv;
char comp;
int immed;
+ struct ir3_block *target;
} cat0;
struct {
type_t src_type, dst_type;
@@ -218,14 +222,14 @@ struct ir3_instruction {
int aid;
} fi;
struct {
- struct ir3_block *if_block, *else_block;
- } flow;
+ /* used to temporarily hold reference to nir_phi_instr
+ * until we resolve the phi srcs
+ */
+ void *nphi;
+ } phi;
struct {
struct ir3_block *block;
} inout;
-
- /* XXX keep this as big as all other union members! */
- uint32_t info[3];
};
/* transient values used during various algorithms: */
@@ -243,6 +247,13 @@ struct ir3_instruction {
*/
#define DEPTH_UNUSED ~0
unsigned depth;
+ /* When we get to the RA stage, we no longer need depth, but
+ * we do need instruction's position/name:
+ */
+ struct {
+ uint16_t ip;
+ uint16_t name;
+ };
};
/* Used during CP and RA stages. For fanin and shader inputs/
@@ -290,7 +301,9 @@ struct ir3_instruction {
*/
struct ir3_instruction *fanin;
- struct ir3_instruction *next;
+ /* Entry in ir3_block's instruction list: */
+ struct list_head node;
+
#ifdef DEBUG
uint32_t serialno;
#endif
@@ -321,8 +334,11 @@ static inline int ir3_neighbor_count(struct ir3_instruction *instr)
struct ir3_heap_chunk;
struct ir3 {
- unsigned instrs_count, instrs_sz;
- struct ir3_instruction **instrs;
+ struct ir3_compiler *compiler;
+
+ unsigned ninputs, noutputs;
+ struct ir3_instruction **inputs;
+ struct ir3_instruction **outputs;
/* Track bary.f (and ldlv) instructions.. this is needed in
* scheduling to ensure that all varying fetches happen before
@@ -345,33 +361,54 @@ struct ir3 {
*/
unsigned indirects_count, indirects_sz;
struct ir3_instruction **indirects;
+ /* and same for instructions that consume predicate register: */
+ unsigned predicates_count, predicates_sz;
+ struct ir3_instruction **predicates;
+
+ /* List of blocks: */
+ struct list_head block_list;
- struct ir3_block *block;
unsigned heap_idx;
struct ir3_heap_chunk *chunk;
};
+typedef struct nir_block nir_block;
+
struct ir3_block {
+ struct list_head node;
struct ir3 *shader;
- unsigned ntemporaries, ninputs, noutputs;
- /* maps TGSI_FILE_TEMPORARY index back to the assigning instruction: */
- struct ir3_instruction **temporaries;
- struct ir3_instruction **inputs;
- struct ir3_instruction **outputs;
- /* only a single address register: */
- struct ir3_instruction *address;
- struct ir3_block *parent;
- struct ir3_instruction *head;
+
+ nir_block *nblock;
+
+ struct list_head instr_list; /* list of ir3_instruction */
+
+ /* each block has either one or two successors.. in case of
+ * two successors, 'condition' decides which one to follow.
+ * A block preceding an if/else has two successors.
+ */
+ struct ir3_instruction *condition;
+ struct ir3_block *successors[2];
+
+ uint16_t start_ip, end_ip;
+
+ /* used for per-pass extra block data. Mainly used right
+ * now in RA step to track livein/liveout.
+ */
+ void *bd;
+
+#ifdef DEBUG
+ uint32_t serialno;
+#endif
};
-struct ir3 * ir3_create(void);
+struct ir3 * ir3_create(struct ir3_compiler *compiler,
+ unsigned nin, unsigned nout);
void ir3_destroy(struct ir3 *shader);
void * ir3_assemble(struct ir3 *shader,
struct ir3_info *info, uint32_t gpu_id);
void * ir3_alloc(struct ir3 *shader, int sz);
-struct ir3_block * ir3_block_create(struct ir3 *shader,
- unsigned ntmp, unsigned nin, unsigned nout);
+struct ir3_block * ir3_block_create(struct ir3 *shader);
struct ir3_instruction * ir3_instr_create(struct ir3_block *block,
int category, opc_t opc);
@@ -383,7 +420,6 @@ const char *ir3_instr_name(struct ir3_instruction *instr);
struct ir3_register * ir3_reg_create(struct ir3_instruction *instr,
int num, int flags);
-
static inline bool ir3_instr_check_mark(struct ir3_instruction *instr)
{
if (instr->flags & IR3_INSTR_MARK)
@@ -392,22 +428,10 @@ static inline bool ir3_instr_check_mark(struct ir3_instruction *instr)
return false;
}
-static inline void ir3_clear_mark(struct ir3 *shader)
-{
- /* TODO would be nice to drop the instruction array.. for
- * new compiler, _clear_mark() is all we use it for, and
- * we could probably manage a linked list instead..
- *
- * Also, we'll probably want to mark instructions within
- * a block, so tracking the list of instrs globally is
- * unlikely to be what we want.
- */
- unsigned i;
- for (i = 0; i < shader->instrs_count; i++) {
- struct ir3_instruction *instr = shader->instrs[i];
- instr->flags &= ~IR3_INSTR_MARK;
- }
-}
+void ir3_block_clear_mark(struct ir3_block *block);
+void ir3_clear_mark(struct ir3 *shader);
+
+void ir3_count_instructions(struct ir3 *ir);
static inline int ir3_instr_regno(struct ir3_instruction *instr,
struct ir3_register *reg)
@@ -501,6 +525,28 @@ static inline bool is_mem(struct ir3_instruction *instr)
return (instr->category == 6);
}
+static inline bool
+is_store(struct ir3_instruction *instr)
+{
+ if (is_mem(instr)) {
+ /* these instructions, the "destination" register is
+ * actually a source, the address to store to.
+ */
+ switch (instr->opc) {
+ case OPC_STG:
+ case OPC_STP:
+ case OPC_STL:
+ case OPC_STLW:
+ case OPC_L2G:
+ case OPC_G2L:
+ return true;
+ default:
+ break;
+ }
+ }
+ return false;
+}
+
static inline bool is_input(struct ir3_instruction *instr)
{
/* in some cases, ldlv is used to fetch varying without
@@ -525,7 +571,7 @@ static inline bool writes_addr(struct ir3_instruction *instr)
{
if (instr->regs_count > 0) {
struct ir3_register *dst = instr->regs[0];
- return !!(dst->flags & IR3_REG_ADDR);
+ return reg_num(dst) == REG_A0;
}
return false;
}
@@ -556,13 +602,29 @@ static inline bool conflicts(struct ir3_instruction *a,
static inline bool reg_gpr(struct ir3_register *r)
{
- if (r->flags & (IR3_REG_CONST | IR3_REG_IMMED | IR3_REG_ADDR))
+ if (r->flags & (IR3_REG_CONST | IR3_REG_IMMED))
return false;
if ((reg_num(r) == REG_A0) || (reg_num(r) == REG_P0))
return false;
return true;
}
+static inline type_t half_type(type_t type)
+{
+ switch (type) {
+ case TYPE_F32: return TYPE_F16;
+ case TYPE_U32: return TYPE_U16;
+ case TYPE_S32: return TYPE_S16;
+ case TYPE_F16:
+ case TYPE_U16:
+ case TYPE_S16:
+ return type;
+ default:
+ assert(0);
+ return ~0;
+ }
+}
+
/* some cat2 instructions (ie. those which are not float) can embed an
* immediate:
*/
@@ -747,37 +809,31 @@ static inline struct ir3_instruction * __ssa_src_n(struct ir3_instruction *instr
/* dump: */
-#include <stdio.h>
-void ir3_dump(struct ir3 *shader, const char *name,
- struct ir3_block *block /* XXX maybe 'block' ptr should move to ir3? */,
- FILE *f);
-void ir3_dump_instr_single(struct ir3_instruction *instr);
-void ir3_dump_instr_list(struct ir3_instruction *instr);
-
-/* flatten if/else: */
-int ir3_block_flatten(struct ir3_block *block);
+void ir3_print(struct ir3 *ir);
+void ir3_print_instr(struct ir3_instruction *instr);
/* depth calculation: */
int ir3_delayslots(struct ir3_instruction *assigner,
struct ir3_instruction *consumer, unsigned n);
-void ir3_block_depth(struct ir3_block *block);
+void ir3_insert_by_depth(struct ir3_instruction *instr, struct list_head *list);
+void ir3_depth(struct ir3 *ir);
/* copy-propagate: */
-void ir3_block_cp(struct ir3_block *block);
+void ir3_cp(struct ir3 *ir);
-/* group neightbors and insert mov's to resolve conflicts: */
-void ir3_block_group(struct ir3_block *block);
+/* group neighbors and insert mov's to resolve conflicts: */
+void ir3_group(struct ir3 *ir);
/* scheduling: */
-int ir3_block_sched(struct ir3_block *block);
+int ir3_sched(struct ir3 *ir);
/* register assignment: */
-int ir3_block_ra(struct ir3_block *block, enum shader_t type,
+struct ir3_ra_reg_set * ir3_ra_alloc_reg_set(void *memctx);
+int ir3_ra(struct ir3 *ir3, enum shader_t type,
bool frag_coord, bool frag_face);
/* legalize: */
-void ir3_block_legalize(struct ir3_block *block,
- bool *has_samp, int *max_bary);
+void ir3_legalize(struct ir3 *ir, bool *has_samp, int *max_bary);
/* ************************************************************************* */
/* instruction helpers */
@@ -807,6 +863,21 @@ ir3_COV(struct ir3_block *block, struct ir3_instruction *src,
return instr;
}
+static inline struct ir3_instruction *
+ir3_NOP(struct ir3_block *block)
+{
+ return ir3_instr_create(block, 0, OPC_NOP);
+}
+
+#define INSTR0(CAT, name) \
+static inline struct ir3_instruction * \
+ir3_##name(struct ir3_block *block) \
+{ \
+ struct ir3_instruction *instr = \
+ ir3_instr_create(block, CAT, OPC_##name); \
+ return instr; \
+}
+
#define INSTR1(CAT, name) \
static inline struct ir3_instruction * \
ir3_##name(struct ir3_block *block, \
@@ -850,7 +921,10 @@ ir3_##name(struct ir3_block *block, \
}
/* cat0 instructions: */
+INSTR0(0, BR);
+INSTR0(0, JUMP);
INSTR1(0, KILL);
+INSTR0(0, END);
/* cat2 instructions, most 2 src but some 1 src: */
INSTR2(2, ADD_F)
diff --git a/src/gallium/drivers/freedreno/ir3/ir3_cmdline.c b/src/gallium/drivers/freedreno/ir3/ir3_cmdline.c
index d0517aab8ce..ad9d2719d59 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3_cmdline.c
+++ b/src/gallium/drivers/freedreno/ir3/ir3_cmdline.c
@@ -30,6 +30,7 @@
#include <fcntl.h>
#include <stdint.h>
#include <stdlib.h>
+#include <stdio.h>
#include <err.h>
#include "tgsi/tgsi_parse.h"
@@ -65,34 +66,34 @@ static void dump_info(struct ir3_shader_variant *so, const char *str)
// TODO make gpu_id configurable on cmdline
bin = ir3_shader_assemble(so, 320);
if (fd_mesa_debug & FD_DBG_DISASM) {
- struct ir3_block *block = so->ir->block;
+ struct ir3 *ir = so->ir;
struct ir3_register *reg;
uint8_t regid;
unsigned i;
debug_printf("; %s: %s\n", type, str);
- for (i = 0; i < block->ninputs; i++) {
- if (!block->inputs[i]) {
+ for (i = 0; i < ir->ninputs; i++) {
+ if (!ir->inputs[i]) {
debug_printf("; in%d unused\n", i);
continue;
}
- reg = block->inputs[i]->regs[0];
+ reg = ir->inputs[i]->regs[0];
regid = reg->num;
debug_printf("@in(%sr%d.%c)\tin%d\n",
(reg->flags & IR3_REG_HALF) ? "h" : "",
(regid >> 2), "xyzw"[regid & 0x3], i);
}
- for (i = 0; i < block->noutputs; i++) {
- if (!block->outputs[i]) {
+ for (i = 0; i < ir->noutputs; i++) {
+ if (!ir->outputs[i]) {
debug_printf("; out%d unused\n", i);
continue;
}
/* kill shows up as a virtual output.. skip it! */
- if (is_kill(block->outputs[i]))
+ if (is_kill(ir->outputs[i]))
continue;
- reg = block->outputs[i]->regs[0];
+ reg = ir->outputs[i]->regs[0];
regid = reg->num;
debug_printf("@out(%sr%d.%c)\tout%d\n",
(reg->flags & IR3_REG_HALF) ? "h" : "",
@@ -194,16 +195,6 @@ read_file(const char *filename, void **ptr, size_t *size)
return 0;
}
-static void reset_variant(struct ir3_shader_variant *v, const char *msg)
-{
- printf("; %s\n", msg);
- v->inputs_count = 0;
- v->outputs_count = 0;
- v->total_in = 0;
- v->has_samp = false;
- v->immediates_count = 0;
-}
-
static void print_usage(void)
{
printf("Usage: ir3_compiler [OPTIONS]... FILE\n");
@@ -225,12 +216,12 @@ int main(int argc, char **argv)
const char *filename;
struct tgsi_token toks[65536];
struct tgsi_parse_context parse;
+ struct ir3_compiler *compiler;
struct ir3_shader_variant v;
struct ir3_shader_key key = {};
const char *info;
void *ptr;
size_t size;
- int use_nir = 0;
fd_mesa_debug |= FD_DBG_DISASM;
@@ -243,7 +234,7 @@ int main(int argc, char **argv)
while (n < argc) {
if (!strcmp(argv[n], "--verbose")) {
- fd_mesa_debug |= FD_DBG_OPTDUMP | FD_DBG_MSGS | FD_DBG_OPTMSGS;
+ fd_mesa_debug |= FD_DBG_MSGS | FD_DBG_OPTMSGS;
n++;
continue;
}
@@ -290,17 +281,6 @@ int main(int argc, char **argv)
continue;
}
- if (!strcmp(argv[n], "--nocp")) {
- fd_mesa_debug |= FD_DBG_NOCP;
- n++;
- continue;
- }
- if (!strcmp(argv[n], "--nir")) {
- use_nir = true;
- n++;
- continue;
- }
-
if (!strcmp(argv[n], "--help")) {
print_usage();
return 0;
@@ -340,31 +320,14 @@ int main(int argc, char **argv)
break;
}
- if (use_nir) {
- info = "NIR compiler";
- ret = ir3_compile_shader_nir(&v, toks, key);
- } else {
- info = "TGSI compiler";
- ret = ir3_compile_shader(&v, toks, key, true);
- }
-
- if (ret) {
- reset_variant(&v, "compiler failed, trying without copy propagation!");
- info = "compiler (no copy propagation)";
- ret = ir3_compile_shader(&v, toks, key, false);
- }
+ /* TODO cmdline option to target different gpus: */
+ compiler = ir3_compiler_create(320);
+ info = "NIR compiler";
+ ret = ir3_compile_shader_nir(compiler, &v, toks, key);
if (ret) {
fprintf(stderr, "compiler failed!\n");
return ret;
}
dump_info(&v, info);
}
-
-void _mesa_error_no_memory(const char *caller);
-
-void
-_mesa_error_no_memory(const char *caller)
-{
- fprintf(stderr, "Mesa error: out of memory in %s", caller);
-}
diff --git a/src/gallium/drivers/freedreno/ir3/ir3_compiler.c b/src/gallium/drivers/freedreno/ir3/ir3_compiler.c
index 43f4c955ac0..7c8eccb54e1 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3_compiler.c
+++ b/src/gallium/drivers/freedreno/ir3/ir3_compiler.c
@@ -1,7 +1,7 @@
/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
/*
- * Copyright (C) 2013 Rob Clark <[email protected]>
+ * Copyright (C) 2015 Rob Clark <[email protected]>
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -26,3710 +26,19 @@
* Rob Clark <[email protected]>
*/
-#include <stdarg.h>
-
-#include "pipe/p_state.h"
-#include "util/u_string.h"
-#include "util/u_memory.h"
-#include "util/u_inlines.h"
-#include "tgsi/tgsi_lowering.h"
-#include "tgsi/tgsi_parse.h"
-#include "tgsi/tgsi_ureg.h"
-#include "tgsi/tgsi_info.h"
-#include "tgsi/tgsi_strings.h"
-#include "tgsi/tgsi_dump.h"
-#include "tgsi/tgsi_scan.h"
-
-#include "freedreno_util.h"
+#include "util/ralloc.h"
#include "ir3_compiler.h"
-#include "ir3_shader.h"
-
-#include "instr-a3xx.h"
-#include "ir3.h"
-
-struct ir3_compile_context {
- const struct tgsi_token *tokens;
- bool free_tokens;
- struct ir3 *ir;
- struct ir3_shader_variant *so;
- uint16_t integer_s;
-
- struct ir3_block *block;
- struct ir3_instruction *current_instr;
-
- /* we need to defer updates to block->outputs[] until the end
- * of an instruction (so we don't see new value until *after*
- * the src registers are processed)
- */
- struct {
- struct ir3_instruction *instr, **instrp;
- } output_updates[64];
- unsigned num_output_updates;
-
- /* are we in a sequence of "atomic" instructions?
- */
- bool atomic;
-
- /* For fragment shaders, from the hw perspective the only
- * actual input is r0.xy position register passed to bary.f.
- * But TGSI doesn't know that, it still declares things as
- * IN[] registers. So we do all the input tracking normally
- * and fix things up after compile_instructions()
- *
- * NOTE that frag_pos is the hardware position (possibly it
- * is actually an index or tag or some such.. it is *not*
- * values that can be directly used for gl_FragCoord..)
- */
- struct ir3_instruction *frag_pos, *frag_face, *frag_coord[4];
-
- /* For vertex shaders, keep track of the system values sources */
- struct ir3_instruction *vertex_id, *basevertex, *instance_id;
-
- struct tgsi_parse_context parser;
- unsigned type;
-
- struct tgsi_shader_info info;
-
- /* hmm, would be nice if tgsi_scan_shader figured this out
- * for us:
- */
- struct {
- unsigned first, last;
- struct ir3_instruction *fanin;
- } array[MAX_ARRAYS];
- uint32_t array_dirty;
- /* offset into array[], per file, of first array info */
- uint8_t array_offsets[TGSI_FILE_COUNT];
-
- /* for calculating input/output positions/linkages: */
- unsigned next_inloc;
-
- /* a4xx (at least patchlevel 0) cannot seem to flat-interpolate
- * so we need to use ldlv.u32 to load the varying directly:
- */
- bool flat_bypass;
-
- unsigned num_internal_temps;
- struct tgsi_src_register internal_temps[8];
-
- /* for looking up which system value is which */
- unsigned sysval_semantics[8];
-
- /* idx/slot for last compiler generated immediate */
- unsigned immediate_idx;
-
- /* stack of branch instructions that mark (potentially nested)
- * branch if/else/loop/etc
- */
- struct {
- struct ir3_instruction *instr, *cond;
- bool inv; /* true iff in else leg of branch */
- } branch[16];
- unsigned int branch_count;
-
- /* list of kill instructions: */
- struct ir3_instruction *kill[16];
- unsigned int kill_count;
-
- /* used when dst is same as one of the src, to avoid overwriting a
- * src element before the remaining scalar instructions that make
- * up the vector operation
- */
- struct tgsi_dst_register tmp_dst;
- struct tgsi_src_register *tmp_src;
-
- /* just for catching incorrect use of get_dst()/put_dst():
- */
- bool using_tmp_dst;
-};
-
-
-static void vectorize(struct ir3_compile_context *ctx,
- struct ir3_instruction *instr, struct tgsi_dst_register *dst,
- int nsrcs, ...);
-static void create_mov(struct ir3_compile_context *ctx,
- struct tgsi_dst_register *dst, struct tgsi_src_register *src);
-static type_t get_ftype(struct ir3_compile_context *ctx);
-static type_t get_utype(struct ir3_compile_context *ctx);
-
-static unsigned setup_arrays(struct ir3_compile_context *ctx, unsigned file, unsigned i)
-{
- /* ArrayID 0 for a given file is the legacy array spanning the entire file: */
- ctx->array[i].first = 0;
- ctx->array[i].last = ctx->info.file_max[file];
- ctx->array_offsets[file] = i;
- i += ctx->info.array_max[file] + 1;
- return i;
-}
-
-static unsigned
-compile_init(struct ir3_compile_context *ctx, struct ir3_shader_variant *so,
- const struct tgsi_token *tokens)
-{
- unsigned ret, i;
- struct tgsi_shader_info *info = &ctx->info;
- struct tgsi_lowering_config lconfig = {
- .color_two_side = so->key.color_two_side,
- .lower_DST = true,
- .lower_XPD = true,
- .lower_SCS = true,
- .lower_LRP = true,
- .lower_FRC = true,
- .lower_POW = true,
- .lower_LIT = true,
- .lower_EXP = true,
- .lower_LOG = true,
- .lower_DP4 = true,
- .lower_DP3 = true,
- .lower_DPH = true,
- .lower_DP2 = true,
- .lower_DP2A = true,
- };
-
- switch (so->type) {
- case SHADER_FRAGMENT:
- case SHADER_COMPUTE:
- lconfig.saturate_s = so->key.fsaturate_s;
- lconfig.saturate_t = so->key.fsaturate_t;
- lconfig.saturate_r = so->key.fsaturate_r;
- ctx->integer_s = so->key.finteger_s;
- break;
- case SHADER_VERTEX:
- lconfig.saturate_s = so->key.vsaturate_s;
- lconfig.saturate_t = so->key.vsaturate_t;
- lconfig.saturate_r = so->key.vsaturate_r;
- ctx->integer_s = so->key.vinteger_s;
- break;
- }
-
- if (!so->shader) {
- /* hack for standalone compiler which does not have
- * screen/context:
- */
- } else if (ir3_shader_gpuid(so->shader) >= 400) {
- /* a4xx seems to have *no* sam.p */
- lconfig.lower_TXP = ~0; /* lower all txp */
- /* need special handling for "flat" */
- ctx->flat_bypass = true;
- } else {
- /* a3xx just needs to avoid sam.p for 3d tex */
- lconfig.lower_TXP = (1 << TGSI_TEXTURE_3D);
- /* no special handling for "flat" */
- ctx->flat_bypass = false;
- }
-
- ctx->tokens = tgsi_transform_lowering(&lconfig, tokens, &ctx->info);
- ctx->free_tokens = !!ctx->tokens;
- if (!ctx->tokens) {
- /* no lowering */
- ctx->tokens = tokens;
- }
- ctx->ir = so->ir;
- ctx->so = so;
- ctx->array_dirty = 0;
- ctx->next_inloc = 8;
- ctx->num_internal_temps = 0;
- ctx->branch_count = 0;
- ctx->kill_count = 0;
- ctx->block = NULL;
- ctx->current_instr = NULL;
- ctx->num_output_updates = 0;
- ctx->atomic = false;
- ctx->frag_pos = NULL;
- ctx->frag_face = NULL;
- ctx->vertex_id = NULL;
- ctx->instance_id = NULL;
- ctx->tmp_src = NULL;
- ctx->using_tmp_dst = false;
-
- memset(ctx->frag_coord, 0, sizeof(ctx->frag_coord));
- memset(ctx->array, 0, sizeof(ctx->array));
- memset(ctx->array_offsets, 0, sizeof(ctx->array_offsets));
-
-#define FM(x) (1 << TGSI_FILE_##x)
- /* NOTE: if relative addressing is used, we set constlen in
- * the compiler (to worst-case value) since we don't know in
- * the assembler what the max addr reg value can be:
- */
- if (info->indirect_files & FM(CONSTANT))
- so->constlen = MIN2(255, ctx->info.const_file_max[0] + 1);
-
- i = 0;
- i += setup_arrays(ctx, TGSI_FILE_INPUT, i);
- i += setup_arrays(ctx, TGSI_FILE_TEMPORARY, i);
- i += setup_arrays(ctx, TGSI_FILE_OUTPUT, i);
- /* any others? we don't track arrays for const..*/
-
- /* Immediates go after constants: */
- so->first_immediate = so->first_driver_param =
- info->const_file_max[0] + 1;
- /* 1 unit for the vertex id base */
- if (so->type == SHADER_VERTEX)
- so->first_immediate++;
- /* 4 (vec4) units for ubo base addresses */
- so->first_immediate += 4;
- ctx->immediate_idx = 4 * (ctx->info.file_max[TGSI_FILE_IMMEDIATE] + 1);
-
- ret = tgsi_parse_init(&ctx->parser, ctx->tokens);
- if (ret != TGSI_PARSE_OK)
- return ret;
-
- ctx->type = ctx->parser.FullHeader.Processor.Processor;
-
- return ret;
-}
-
-static void
-compile_error(struct ir3_compile_context *ctx, const char *format, ...)
-{
- va_list ap;
- va_start(ap, format);
- _debug_vprintf(format, ap);
- va_end(ap);
- tgsi_dump(ctx->tokens, 0);
- debug_assert(0);
-}
-
-#define compile_assert(ctx, cond) do { \
- if (!(cond)) compile_error((ctx), "failed assert: "#cond"\n"); \
- } while (0)
-
-static void
-compile_free(struct ir3_compile_context *ctx)
-{
- if (ctx->free_tokens)
- free((void *)ctx->tokens);
- tgsi_parse_free(&ctx->parser);
-}
-
-struct instr_translater {
- void (*fxn)(const struct instr_translater *t,
- struct ir3_compile_context *ctx,
- struct tgsi_full_instruction *inst);
- unsigned tgsi_opc;
- opc_t opc;
- opc_t hopc; /* opc to use for half_precision mode, if different */
- unsigned arg;
-};
-
-static void
-instr_finish(struct ir3_compile_context *ctx)
-{
- unsigned i;
-
- if (ctx->atomic)
- return;
-
- for (i = 0; i < ctx->num_output_updates; i++)
- *(ctx->output_updates[i].instrp) = ctx->output_updates[i].instr;
-
- ctx->num_output_updates = 0;
-
- while (ctx->array_dirty) {
- unsigned aid = ffs(ctx->array_dirty) - 1;
- ctx->array[aid].fanin = NULL;
- ctx->array_dirty &= ~(1 << aid);
- }
-}
-
-/* For "atomic" groups of instructions, for example the four scalar
- * instructions to perform a vec4 operation. Basically this just
- * blocks out handling of output_updates so the next scalar instruction
- * still sees the result from before the start of the atomic group.
- *
- * NOTE: when used properly, this could probably replace get/put_dst()
- * stuff.
- */
-static void
-instr_atomic_start(struct ir3_compile_context *ctx)
-{
- ctx->atomic = true;
-}
-
-static void
-instr_atomic_end(struct ir3_compile_context *ctx)
-{
- ctx->atomic = false;
- instr_finish(ctx);
-}
-
-static struct ir3_instruction *
-instr_create(struct ir3_compile_context *ctx, int category, opc_t opc)
-{
- instr_finish(ctx);
- return (ctx->current_instr = ir3_instr_create(ctx->block, category, opc));
-}
-
-static struct ir3_block *
-push_block(struct ir3_compile_context *ctx)
-{
- struct ir3_block *block;
- unsigned ntmp, nin, nout;
-
-#define SCALAR_REGS(file) (4 * (ctx->info.file_max[TGSI_FILE_ ## file] + 1))
-
- /* hmm, give ourselves room to create 8 extra temporaries (vec4):
- */
- ntmp = SCALAR_REGS(TEMPORARY);
- ntmp += 8 * 4;
-
- nout = SCALAR_REGS(OUTPUT);
- nin = SCALAR_REGS(INPUT) + SCALAR_REGS(SYSTEM_VALUE);
-
- /* for outermost block, 'inputs' are the actual shader INPUT
- * register file. Reads from INPUT registers always go back to
- * top block. For nested blocks, 'inputs' is used to track any
- * TEMPORARY file register from one of the enclosing blocks that
- * is ready in this block.
- */
- if (!ctx->block) {
- /* NOTE: fragment shaders actually have two inputs (r0.xy, the
- * position)
- */
- if (ctx->type == TGSI_PROCESSOR_FRAGMENT) {
- int n = 2;
- if (ctx->info.reads_position)
- n += 4;
- if (ctx->info.uses_frontface)
- n += 4;
- nin = MAX2(n, nin);
- nout += ARRAY_SIZE(ctx->kill);
- }
- } else {
- nin = ntmp;
- }
-
- block = ir3_block_create(ctx->ir, ntmp, nin, nout);
-
- if ((ctx->type == TGSI_PROCESSOR_FRAGMENT) && !ctx->block)
- block->noutputs -= ARRAY_SIZE(ctx->kill);
-
- block->parent = ctx->block;
- ctx->block = block;
-
- return block;
-}
-
-static void
-pop_block(struct ir3_compile_context *ctx)
-{
- ctx->block = ctx->block->parent;
- compile_assert(ctx, ctx->block);
-}
-
-static struct ir3_instruction *
-create_output(struct ir3_block *block, struct ir3_instruction *instr,
- unsigned n)
-{
- struct ir3_instruction *out;
-
- out = ir3_instr_create(block, -1, OPC_META_OUTPUT);
- out->inout.block = block;
- ir3_reg_create(out, n, 0);
- if (instr)
- ir3_reg_create(out, 0, IR3_REG_SSA)->instr = instr;
-
- return out;
-}
-
-static struct ir3_instruction *
-create_input(struct ir3_block *block, struct ir3_instruction *instr,
- unsigned n)
-{
- struct ir3_instruction *in;
-
- in = ir3_instr_create(block, -1, OPC_META_INPUT);
- in->inout.block = block;
- ir3_reg_create(in, n, 0);
- if (instr)
- ir3_reg_create(in, 0, IR3_REG_SSA)->instr = instr;
-
- return in;
-}
-
-static struct ir3_instruction *
-block_input(struct ir3_block *block, unsigned n)
-{
- /* references to INPUT register file always go back up to
- * top level:
- */
- if (block->parent)
- return block_input(block->parent, n);
- return block->inputs[n];
-}
-
-/* return temporary in scope, creating if needed meta-input node
- * to track block inputs
- */
-static struct ir3_instruction *
-block_temporary(struct ir3_block *block, unsigned n)
-{
- /* references to TEMPORARY register file, find the nearest
- * enclosing block which has already assigned this temporary,
- * creating meta-input instructions along the way to keep
- * track of block inputs
- */
- if (block->parent && !block->temporaries[n]) {
- /* if already have input for this block, reuse: */
- if (!block->inputs[n])
- block->inputs[n] = block_temporary(block->parent, n);
-
- /* and create new input to return: */
- return create_input(block, block->inputs[n], n);
- }
- return block->temporaries[n];
-}
-
-static struct ir3_instruction *
-create_immed(struct ir3_compile_context *ctx, float val)
-{
- /* NOTE: *don't* use instr_create() here!
- */
- struct ir3_instruction *instr;
- instr = ir3_instr_create(ctx->block, 1, 0);
- instr->cat1.src_type = get_ftype(ctx);
- instr->cat1.dst_type = get_ftype(ctx);
- ir3_reg_create(instr, 0, 0);
- ir3_reg_create(instr, 0, IR3_REG_IMMED)->fim_val = val;
- return instr;
-}
-
-static void
-ssa_instr_set(struct ir3_compile_context *ctx, unsigned file, unsigned n,
- struct ir3_instruction *instr)
-{
- struct ir3_block *block = ctx->block;
- unsigned idx = ctx->num_output_updates;
-
- compile_assert(ctx, idx < ARRAY_SIZE(ctx->output_updates));
-
- /* NOTE: defer update of temporaries[idx] or output[idx]
- * until instr_finish(), so that if the current instruction
- * reads the same TEMP/OUT[] it gets the old value:
- *
- * bleh.. this might be a bit easier to just figure out
- * in instr_finish(). But at that point we've already
- * lost information about OUTPUT vs TEMPORARY register
- * file..
- */
-
- switch (file) {
- case TGSI_FILE_OUTPUT:
- compile_assert(ctx, n < block->noutputs);
- ctx->output_updates[idx].instrp = &block->outputs[n];
- ctx->output_updates[idx].instr = instr;
- ctx->num_output_updates++;
- break;
- case TGSI_FILE_TEMPORARY:
- compile_assert(ctx, n < block->ntemporaries);
- ctx->output_updates[idx].instrp = &block->temporaries[n];
- ctx->output_updates[idx].instr = instr;
- ctx->num_output_updates++;
- break;
- case TGSI_FILE_ADDRESS:
- compile_assert(ctx, n < 1);
- ctx->output_updates[idx].instrp = &block->address;
- ctx->output_updates[idx].instr = instr;
- ctx->num_output_updates++;
- break;
- }
-}
-
-static struct ir3_instruction *
-ssa_instr_get(struct ir3_compile_context *ctx, unsigned file, unsigned n)
-{
- struct ir3_block *block = ctx->block;
- struct ir3_instruction *instr = NULL;
-
- switch (file) {
- case TGSI_FILE_INPUT:
- instr = block_input(ctx->block, n);
- break;
- case TGSI_FILE_OUTPUT:
- /* really this should just happen in case of 'MOV_SAT OUT[n], ..',
- * for the following clamp instructions:
- */
- instr = block->outputs[n];
- /* we don't have to worry about read from an OUTPUT that was
- * assigned outside of the current block, because the _SAT
- * clamp instructions will always be in the same block as
- * the original instruction which wrote the OUTPUT
- */
- compile_assert(ctx, instr);
- break;
- case TGSI_FILE_TEMPORARY:
- instr = block_temporary(ctx->block, n);
- if (!instr) {
- /* this can happen when registers (or components of a TGSI
- * register) are used as src before they have been assigned
- * (undefined contents). To avoid confusing the rest of the
- * compiler, and to generally keep things peachy, substitute
- * an instruction that sets the src to 0.0. Or to keep
- * things undefined, I could plug in a random number? :-P
- *
- * NOTE: *don't* use instr_create() here!
- */
- instr = create_immed(ctx, 0.0);
- /* no need to recreate the immed for every access: */
- block->temporaries[n] = instr;
- }
- break;
- case TGSI_FILE_SYSTEM_VALUE:
- switch (ctx->sysval_semantics[n >> 2]) {
- case TGSI_SEMANTIC_VERTEXID_NOBASE:
- instr = ctx->vertex_id;
- break;
- case TGSI_SEMANTIC_BASEVERTEX:
- instr = ctx->basevertex;
- break;
- case TGSI_SEMANTIC_INSTANCEID:
- instr = ctx->instance_id;
- break;
- }
- break;
- }
-
- return instr;
-}
-
-static int dst_array_id(struct ir3_compile_context *ctx,
- const struct tgsi_dst_register *dst)
-{
- // XXX complete hack to recover tgsi_full_dst_register...
- // nothing that isn't wrapped in a tgsi_full_dst_register
- // should be indirect
- const struct tgsi_full_dst_register *fdst = (const void *)dst;
- return fdst->Indirect.ArrayID + ctx->array_offsets[dst->File];
-}
-
-static int src_array_id(struct ir3_compile_context *ctx,
- const struct tgsi_src_register *src)
-{
- // XXX complete hack to recover tgsi_full_src_register...
- // nothing that isn't wrapped in a tgsi_full_src_register
- // should be indirect
- const struct tgsi_full_src_register *fsrc = (const void *)src;
- debug_assert(src->File != TGSI_FILE_CONSTANT);
- return fsrc->Indirect.ArrayID + ctx->array_offsets[src->File];
-}
-
-static struct ir3_instruction *
-array_fanin(struct ir3_compile_context *ctx, unsigned aid, unsigned file)
-{
- struct ir3_instruction *instr;
-
- if (ctx->array[aid].fanin) {
- instr = ctx->array[aid].fanin;
- } else {
- unsigned first = ctx->array[aid].first;
- unsigned last = ctx->array[aid].last;
- unsigned i, j;
-
- instr = ir3_instr_create2(ctx->block, -1, OPC_META_FI,
- 1 + (4 * (last + 1 - first)));
- ir3_reg_create(instr, 0, 0);
- for (i = first; i <= last; i++) {
- for (j = 0; j < 4; j++) {
- unsigned n = regid(i, j);
- ir3_reg_create(instr, 0, IR3_REG_SSA)->instr =
- ssa_instr_get(ctx, file, n);
- }
- }
- ctx->array[aid].fanin = instr;
- ctx->array_dirty |= (1 << aid);
- }
-
- return instr;
-}
-
-static void
-ssa_dst(struct ir3_compile_context *ctx, struct ir3_instruction *instr,
- const struct tgsi_dst_register *dst, unsigned chan)
-{
- if (dst->Indirect) {
- struct ir3_register *reg = instr->regs[0];
- unsigned i, aid = dst_array_id(ctx, dst);
- unsigned first = ctx->array[aid].first;
- unsigned last = ctx->array[aid].last;
- unsigned off = dst->Index - first; /* vec4 offset */
-
- reg->size = 4 * (1 + last - first);
- reg->offset = regid(off, chan);
-
- instr->fanin = array_fanin(ctx, aid, dst->File);
-
- /* annotate with the array-id, to help out the register-
- * assignment stage. At least for the case of indirect
- * writes, we should capture enough dependencies to
- * preserve the order of reads/writes of the array, so
- * the multiple "names" for the array should end up all
- * assigned to the same registers.
- */
- instr->fanin->fi.aid = aid;
-
- /* Since we are scalarizing vec4 tgsi instructions/regs, we
- * run into a slight complication here. To do the naive thing
- * and setup a fanout for each scalar array element would end
- * up with the result that the instructions generated for each
- * component of the vec4 would end up clobbering each other.
- * So we take advantage here of knowing that the array index
- * (after the shl.b) will be a multiple of four, and only set
- * every fourth scalar component in the array. See also
- * fixup_ssa_dst_array()
- */
- for (i = first; i <= last; i++) {
- struct ir3_instruction *split;
- unsigned n = regid(i, chan);
- int off = (4 * (i - first)) + chan;
-
- if (is_meta(instr) && (instr->opc == OPC_META_FO))
- off -= instr->fo.off;
-
- split = ir3_instr_create(ctx->block, -1, OPC_META_FO);
- split->fo.off = off;
- ir3_reg_create(split, 0, 0);
- ir3_reg_create(split, 0, IR3_REG_SSA)->instr = instr;
-
- ssa_instr_set(ctx, dst->File, n, split);
- }
- } else {
- /* normal case (not relative addressed GPR) */
- ssa_instr_set(ctx, dst->File, regid(dst->Index, chan), instr);
- }
-}
-
-static void
-ssa_src(struct ir3_compile_context *ctx, struct ir3_register *reg,
- const struct tgsi_src_register *src, unsigned chan)
-{
- struct ir3_instruction *instr;
-
- if (src->Indirect && (src->File != TGSI_FILE_CONSTANT)) {
- /* for relative addressing of gpr's (due to register assignment)
- * we must generate a fanin instruction to collect all possible
- * array elements that the instruction could address together:
- */
- unsigned aid = src_array_id(ctx, src);
- unsigned first = ctx->array[aid].first;
- unsigned last = ctx->array[aid].last;
- unsigned off = src->Index - first; /* vec4 offset */
-
- reg->size = 4 * (1 + last - first);
- reg->offset = regid(off, chan);
-
- instr = array_fanin(ctx, aid, src->File);
- } else if (src->File == TGSI_FILE_CONSTANT && src->Dimension) {
- const struct tgsi_full_src_register *fsrc = (const void *)src;
- struct ir3_instruction *temp = NULL;
- int ubo_regid = regid(ctx->so->first_driver_param, 0) +
- fsrc->Dimension.Index - 1;
- int offset = 0;
-
- /* We don't handle indirect UBO array accesses... yet. */
- compile_assert(ctx, !fsrc->Dimension.Indirect);
- /* UBOs start at index 1. */
- compile_assert(ctx, fsrc->Dimension.Index > 0);
-
- if (src->Indirect) {
- /* In case of an indirect index, it will have been loaded into an
- * address register. There will be a sequence of
- *
- * shl.b x, val, 2
- * mova a0, x
- *
- * We rely on this sequence to get the original val out and shift
- * it by 4, since we're dealing in vec4 units.
- */
- compile_assert(ctx, ctx->block->address);
- compile_assert(ctx, ctx->block->address->regs[1]->instr->opc ==
- OPC_SHL_B);
-
- temp = instr = instr_create(ctx, 2, OPC_SHL_B);
- ir3_reg_create(instr, 0, 0);
- ir3_reg_create(instr, 0, IR3_REG_HALF | IR3_REG_SSA)->instr =
- ctx->block->address->regs[1]->instr->regs[1]->instr;
- ir3_reg_create(instr, 0, IR3_REG_IMMED)->iim_val = 4;
- } else if (src->Index >= 64) {
- /* Otherwise it's a plain index (in vec4 units). Move it into a
- * register.
- */
- temp = instr = instr_create(ctx, 1, 0);
- instr->cat1.src_type = get_utype(ctx);
- instr->cat1.dst_type = get_utype(ctx);
- ir3_reg_create(instr, 0, 0);
- ir3_reg_create(instr, 0, IR3_REG_IMMED)->iim_val = src->Index * 16;
- } else {
- /* The offset is small enough to fit into the ldg instruction
- * directly.
- */
- offset = src->Index * 16;
- }
-
- if (temp) {
- /* If there was an offset (most common), add it to the buffer
- * address.
- */
- instr = instr_create(ctx, 2, OPC_ADD_S);
- ir3_reg_create(instr, 0, 0);
- ir3_reg_create(instr, 0, IR3_REG_SSA)->instr = temp;
- ir3_reg_create(instr, ubo_regid, IR3_REG_CONST);
- } else {
- /* Otherwise just load the buffer address directly */
- instr = instr_create(ctx, 1, 0);
- instr->cat1.src_type = get_utype(ctx);
- instr->cat1.dst_type = get_utype(ctx);
- ir3_reg_create(instr, 0, 0);
- ir3_reg_create(instr, ubo_regid, IR3_REG_CONST);
- }
-
- temp = instr;
-
- instr = instr_create(ctx, 6, OPC_LDG);
- instr->cat6.type = TYPE_U32;
- instr->cat6.offset = offset + chan * 4;
- ir3_reg_create(instr, 0, 0);
- ir3_reg_create(instr, 0, IR3_REG_SSA)->instr = temp;
- ir3_reg_create(instr, 0, IR3_REG_IMMED)->iim_val = 1;
-
- reg->flags &= ~(IR3_REG_RELATIV | IR3_REG_CONST);
- } else {
- /* normal case (not relative addressed GPR) */
- instr = ssa_instr_get(ctx, src->File, regid(src->Index, chan));
- }
-
- if (instr) {
- reg->flags |= IR3_REG_SSA;
- reg->instr = instr;
- } else if (reg->flags & IR3_REG_SSA) {
- /* special hack for trans_samp() which calls ssa_src() directly
- * to build up the collect (fanin) for const src.. (so SSA flag
- * set but no src instr... it basically gets lucky because we
- * default to 0.0 for "undefined" src instructions, which is
- * what it wants. We probably need to give it a better way to
- * do this, but for now this hack:
- */
- reg->instr = create_immed(ctx, 0.0);
- }
-}
-
-static struct ir3_register *
-add_dst_reg_wrmask(struct ir3_compile_context *ctx,
- struct ir3_instruction *instr, const struct tgsi_dst_register *dst,
- unsigned chan, unsigned wrmask)
-{
- unsigned flags = 0, num = 0;
- struct ir3_register *reg;
-
- switch (dst->File) {
- case TGSI_FILE_OUTPUT:
- case TGSI_FILE_TEMPORARY:
- /* uses SSA */
- break;
- case TGSI_FILE_ADDRESS:
- flags |= IR3_REG_ADDR;
- /* uses SSA */
- break;
- default:
- compile_error(ctx, "unsupported dst register file: %s\n",
- tgsi_file_name(dst->File));
- break;
- }
-
- if (dst->Indirect) {
- flags |= IR3_REG_RELATIV;
-
- /* shouldn't happen, and we can't cope with it below: */
- compile_assert(ctx, wrmask == 0x1);
-
- compile_assert(ctx, ctx->block->address);
- if (instr->address)
- compile_assert(ctx, ctx->block->address == instr->address);
-
- instr->address = ctx->block->address;
- array_insert(ctx->ir->indirects, instr);
- }
-
- reg = ir3_reg_create(instr, regid(num, chan), flags);
- reg->wrmask = wrmask;
-
- if (wrmask == 0x1) {
- /* normal case */
- ssa_dst(ctx, instr, dst, chan);
- } else if ((dst->File == TGSI_FILE_TEMPORARY) ||
- (dst->File == TGSI_FILE_OUTPUT) ||
- (dst->File == TGSI_FILE_ADDRESS)) {
- struct ir3_instruction *prev = NULL;
- unsigned i;
-
- compile_assert(ctx, !dst->Indirect);
-
- /* if instruction writes multiple, we need to create
- * some place-holder collect the registers:
- */
- for (i = 0; i < 4; i++) {
- /* NOTE: slightly ugly that we setup neighbor ptrs
- * for FO here, but handle FI in CP pass.. we should
- * probably just always setup neighbor ptrs in the
- * frontend?
- */
- struct ir3_instruction *split =
- ir3_instr_create(ctx->block, -1, OPC_META_FO);
- split->fo.off = i;
- /* unused dst reg: */
- /* NOTE: set SSA flag on dst here, because unused FO's
- * which don't get scheduled will end up not in the
- * instruction list when RA sets SSA flag on each dst.
- * Slight hack. We really should set SSA flag on
- * every dst register in the frontend.
- */
- ir3_reg_create(split, 0, IR3_REG_SSA);
- /* and src reg used to hold original instr */
- ir3_reg_create(split, 0, IR3_REG_SSA)->instr = instr;
- if (prev) {
- split->cp.left = prev;
- split->cp.left_cnt++;
- prev->cp.right = split;
- prev->cp.right_cnt++;
- }
- if ((wrmask & (1 << i)) && !ctx->atomic)
- ssa_dst(ctx, split, dst, chan+i);
- prev = split;
- }
- }
-
- return reg;
-}
-
-static struct ir3_register *
-add_dst_reg(struct ir3_compile_context *ctx, struct ir3_instruction *instr,
- const struct tgsi_dst_register *dst, unsigned chan)
-{
- return add_dst_reg_wrmask(ctx, instr, dst, chan, 0x1);
-}
-
-static struct ir3_register *
-add_src_reg_wrmask(struct ir3_compile_context *ctx,
- struct ir3_instruction *instr, const struct tgsi_src_register *src,
- unsigned chan, unsigned wrmask)
-{
- unsigned flags = 0, num = 0;
- struct ir3_register *reg;
-
- switch (src->File) {
- case TGSI_FILE_IMMEDIATE:
- /* TODO if possible, use actual immediate instead of const.. but
- * TGSI has vec4 immediates, we can only embed scalar (of limited
- * size, depending on instruction..)
- */
- flags |= IR3_REG_CONST;
- num = src->Index + ctx->so->first_immediate;
- break;
- case TGSI_FILE_CONSTANT:
- flags |= IR3_REG_CONST;
- num = src->Index;
- break;
- case TGSI_FILE_OUTPUT:
- /* NOTE: we should only end up w/ OUTPUT file for things like
- * clamp()'ing saturated dst instructions
- */
- case TGSI_FILE_INPUT:
- case TGSI_FILE_TEMPORARY:
- case TGSI_FILE_SYSTEM_VALUE:
- /* uses SSA */
- break;
- default:
- compile_error(ctx, "unsupported src register file: %s\n",
- tgsi_file_name(src->File));
- break;
- }
-
- /* We seem to have 8 bits (6.2) for dst register always, so I think
- * it is safe to assume GPR cannot be >=64
- *
- * cat3 instructions only have 8 bits for src2, but cannot take a
- * const for src2
- *
- * cat5 and cat6 in some cases only has 8 bits, but cannot take a
- * const for any src.
- *
- * Other than that we seem to have 12 bits to encode const src,
- * except for cat1 which may only have 11 bits (but that seems like
- * a bug)
- */
- if (flags & IR3_REG_CONST)
- compile_assert(ctx, src->Index < (1 << 9));
- else
- compile_assert(ctx, src->Index < (1 << 6));
-
- /* NOTE: abs/neg modifiers in tgsi only apply to float */
- if (src->Absolute)
- flags |= IR3_REG_FABS;
- if (src->Negate)
- flags |= IR3_REG_FNEG;
-
- if (src->Indirect) {
- flags |= IR3_REG_RELATIV;
-
- /* shouldn't happen, and we can't cope with it below: */
- compile_assert(ctx, wrmask == 0x1);
-
- compile_assert(ctx, ctx->block->address);
- if (instr->address)
- compile_assert(ctx, ctx->block->address == instr->address);
-
- instr->address = ctx->block->address;
- array_insert(ctx->ir->indirects, instr);
- }
-
- reg = ir3_reg_create(instr, regid(num, chan), flags);
- reg->wrmask = wrmask;
-
- if (wrmask == 0x1) {
- /* normal case */
- ssa_src(ctx, reg, src, chan);
- } else if ((src->File == TGSI_FILE_TEMPORARY) ||
- (src->File == TGSI_FILE_OUTPUT) ||
- (src->File == TGSI_FILE_INPUT)) {
- struct ir3_instruction *collect;
- unsigned i;
-
- compile_assert(ctx, !src->Indirect);
-
- /* if instruction reads multiple, we need to create
- * some place-holder collect the registers:
- */
- collect = ir3_instr_create(ctx->block, -1, OPC_META_FI);
- ir3_reg_create(collect, 0, 0); /* unused dst reg */
-
- for (i = 0; i < 4; i++) {
- if (wrmask & (1 << i)) {
- /* and src reg used point to the original instr */
- ssa_src(ctx, ir3_reg_create(collect, 0, IR3_REG_SSA),
- src, chan + i);
- } else if (wrmask & ~((i << i) - 1)) {
- /* if any remaining components, then dummy
- * placeholder src reg to fill in the blanks:
- */
- ir3_reg_create(collect, 0, 0);
- }
- }
-
- reg->flags |= IR3_REG_SSA;
- reg->instr = collect;
- }
-
- return reg;
-}
-
-static struct ir3_register *
-add_src_reg(struct ir3_compile_context *ctx, struct ir3_instruction *instr,
- const struct tgsi_src_register *src, unsigned chan)
-{
- return add_src_reg_wrmask(ctx, instr, src, chan, 0x1);
-}
-
-static void
-src_from_dst(struct tgsi_src_register *src, struct tgsi_dst_register *dst)
-{
- src->File = dst->File;
- src->Indirect = dst->Indirect;
- src->Dimension = dst->Dimension;
- src->Index = dst->Index;
- src->Absolute = 0;
- src->Negate = 0;
- src->SwizzleX = TGSI_SWIZZLE_X;
- src->SwizzleY = TGSI_SWIZZLE_Y;
- src->SwizzleZ = TGSI_SWIZZLE_Z;
- src->SwizzleW = TGSI_SWIZZLE_W;
-}
-
-/* Get internal-temp src/dst to use for a sequence of instructions
- * generated by a single TGSI op.
- */
-static struct tgsi_src_register *
-get_internal_temp(struct ir3_compile_context *ctx,
- struct tgsi_dst_register *tmp_dst)
-{
- struct tgsi_src_register *tmp_src;
- int n;
-
- tmp_dst->File = TGSI_FILE_TEMPORARY;
- tmp_dst->WriteMask = TGSI_WRITEMASK_XYZW;
- tmp_dst->Indirect = 0;
- tmp_dst->Dimension = 0;
-
- /* assign next temporary: */
- n = ctx->num_internal_temps++;
- compile_assert(ctx, n < ARRAY_SIZE(ctx->internal_temps));
- tmp_src = &ctx->internal_temps[n];
-
- tmp_dst->Index = ctx->info.file_max[TGSI_FILE_TEMPORARY] + n + 1;
-
- src_from_dst(tmp_src, tmp_dst);
-
- return tmp_src;
-}
-
-static inline bool
-is_const(struct tgsi_src_register *src)
-{
- return (src->File == TGSI_FILE_CONSTANT) ||
- (src->File == TGSI_FILE_IMMEDIATE);
-}
-
-static inline bool
-is_relative(struct tgsi_src_register *src)
-{
- return src->Indirect;
-}
-
-static inline bool
-is_rel_or_const(struct tgsi_src_register *src)
-{
- return is_relative(src) || is_const(src);
-}
-
-static type_t
-get_ftype(struct ir3_compile_context *ctx)
-{
- return TYPE_F32;
-}
-
-static type_t
-get_utype(struct ir3_compile_context *ctx)
-{
- return TYPE_U32;
-}
-
-static type_t
-get_stype(struct ir3_compile_context *ctx)
-{
- return TYPE_S32;
-}
-
-static unsigned
-src_swiz(struct tgsi_src_register *src, int chan)
-{
- switch (chan) {
- case 0: return src->SwizzleX;
- case 1: return src->SwizzleY;
- case 2: return src->SwizzleZ;
- case 3: return src->SwizzleW;
- }
- assert(0);
- return 0;
-}
-
-/* for instructions that cannot take a const register as src, if needed
- * generate a move to temporary gpr:
- */
-static struct tgsi_src_register *
-get_unconst(struct ir3_compile_context *ctx, struct tgsi_src_register *src)
-{
- struct tgsi_dst_register tmp_dst;
- struct tgsi_src_register *tmp_src;
-
- compile_assert(ctx, is_rel_or_const(src));
-
- tmp_src = get_internal_temp(ctx, &tmp_dst);
-
- create_mov(ctx, &tmp_dst, src);
-
- return tmp_src;
-}
-
-static void
-get_immediate(struct ir3_compile_context *ctx,
- struct tgsi_src_register *reg, uint32_t val)
-{
- unsigned neg, swiz, idx, i;
- /* actually maps 1:1 currently.. not sure if that is safe to rely on: */
- static const unsigned swiz2tgsi[] = {
- TGSI_SWIZZLE_X, TGSI_SWIZZLE_Y, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_W,
- };
-
- for (i = 0; i < ctx->immediate_idx; i++) {
- swiz = i % 4;
- idx = i / 4;
-
- if (ctx->so->immediates[idx].val[swiz] == val) {
- neg = 0;
- break;
- }
-
- if (ctx->so->immediates[idx].val[swiz] == -val) {
- neg = 1;
- break;
- }
- }
-
- if (i == ctx->immediate_idx) {
- /* need to generate a new immediate: */
- swiz = i % 4;
- idx = i / 4;
- neg = 0;
- ctx->so->immediates[idx].val[swiz] = val;
- ctx->so->immediates_count = idx + 1;
- ctx->immediate_idx++;
- }
-
- reg->File = TGSI_FILE_IMMEDIATE;
- reg->Indirect = 0;
- reg->Dimension = 0;
- reg->Index = idx;
- reg->Absolute = 0;
- reg->Negate = neg;
- reg->SwizzleX = swiz2tgsi[swiz];
- reg->SwizzleY = swiz2tgsi[swiz];
- reg->SwizzleZ = swiz2tgsi[swiz];
- reg->SwizzleW = swiz2tgsi[swiz];
-}
-
-static void
-create_mov(struct ir3_compile_context *ctx, struct tgsi_dst_register *dst,
- struct tgsi_src_register *src)
-{
- type_t type_mov = get_ftype(ctx);
- unsigned i;
-
- for (i = 0; i < 4; i++) {
- /* move to destination: */
- if (dst->WriteMask & (1 << i)) {
- struct ir3_instruction *instr;
-
- if (src->Absolute || src->Negate) {
- /* can't have abs or neg on a mov instr, so use
- * absneg.f instead to handle these cases:
- */
- instr = instr_create(ctx, 2, OPC_ABSNEG_F);
- } else {
- instr = instr_create(ctx, 1, 0);
- instr->cat1.src_type = type_mov;
- instr->cat1.dst_type = type_mov;
- }
-
- add_dst_reg(ctx, instr, dst, i);
- add_src_reg(ctx, instr, src, src_swiz(src, i));
- }
- }
-}
-
-static void
-create_clamp(struct ir3_compile_context *ctx,
- struct tgsi_dst_register *dst, struct tgsi_src_register *val,
- struct tgsi_src_register *minval, struct tgsi_src_register *maxval)
-{
- struct ir3_instruction *instr;
-
- instr = instr_create(ctx, 2, OPC_MAX_F);
- vectorize(ctx, instr, dst, 2, val, 0, minval, 0);
-
- instr = instr_create(ctx, 2, OPC_MIN_F);
- vectorize(ctx, instr, dst, 2, val, 0, maxval, 0);
-}
-
-static void
-create_clamp_imm(struct ir3_compile_context *ctx,
- struct tgsi_dst_register *dst,
- uint32_t minval, uint32_t maxval)
-{
- struct tgsi_src_register minconst, maxconst;
- struct tgsi_src_register src;
-
- src_from_dst(&src, dst);
-
- get_immediate(ctx, &minconst, minval);
- get_immediate(ctx, &maxconst, maxval);
-
- create_clamp(ctx, dst, &src, &minconst, &maxconst);
-}
-
-static struct tgsi_dst_register *
-get_dst(struct ir3_compile_context *ctx, struct tgsi_full_instruction *inst)
-{
- struct tgsi_dst_register *dst = &inst->Dst[0].Register;
- unsigned i;
-
- compile_assert(ctx, !ctx->using_tmp_dst);
- ctx->using_tmp_dst = true;
-
- for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
- struct tgsi_src_register *src = &inst->Src[i].Register;
- if ((src->File == dst->File) && (src->Index == dst->Index)) {
- if ((dst->WriteMask == TGSI_WRITEMASK_XYZW) &&
- (src->SwizzleX == TGSI_SWIZZLE_X) &&
- (src->SwizzleY == TGSI_SWIZZLE_Y) &&
- (src->SwizzleZ == TGSI_SWIZZLE_Z) &&
- (src->SwizzleW == TGSI_SWIZZLE_W))
- continue;
- ctx->tmp_src = get_internal_temp(ctx, &ctx->tmp_dst);
- ctx->tmp_dst.WriteMask = dst->WriteMask;
- dst = &ctx->tmp_dst;
- break;
- }
- }
- return dst;
-}
-
-static void
-put_dst(struct ir3_compile_context *ctx, struct tgsi_full_instruction *inst,
- struct tgsi_dst_register *dst)
-{
- compile_assert(ctx, ctx->using_tmp_dst);
- ctx->using_tmp_dst = false;
-
- /* if necessary, add mov back into original dst: */
- if (dst != &inst->Dst[0].Register) {
- create_mov(ctx, &inst->Dst[0].Register, ctx->tmp_src);
- }
-}
-
-/* helper to generate the necessary repeat and/or additional instructions
- * to turn a scalar instruction into a vector operation:
- */
-static void
-vectorize(struct ir3_compile_context *ctx, struct ir3_instruction *instr,
- struct tgsi_dst_register *dst, int nsrcs, ...)
-{
- va_list ap;
- int i, j, n = 0;
-
- instr_atomic_start(ctx);
-
- for (i = 0; i < 4; i++) {
- if (dst->WriteMask & (1 << i)) {
- struct ir3_instruction *cur;
-
- if (n++ == 0) {
- cur = instr;
- } else {
- cur = instr_create(ctx, instr->category, instr->opc);
- memcpy(cur->info, instr->info, sizeof(cur->info));
- }
-
- add_dst_reg(ctx, cur, dst, i);
-
- va_start(ap, nsrcs);
- for (j = 0; j < nsrcs; j++) {
- struct tgsi_src_register *src =
- va_arg(ap, struct tgsi_src_register *);
- unsigned flags = va_arg(ap, unsigned);
- struct ir3_register *reg;
- if (flags & IR3_REG_IMMED) {
- reg = ir3_reg_create(cur, 0, IR3_REG_IMMED);
- /* this is an ugly cast.. should have put flags first! */
- reg->iim_val = *(int *)&src;
- } else {
- reg = add_src_reg(ctx, cur, src, src_swiz(src, i));
- }
- reg->flags |= flags & ~(IR3_REG_FNEG | IR3_REG_SNEG);
- if (flags & IR3_REG_FNEG)
- reg->flags ^= IR3_REG_FNEG;
- if (flags & IR3_REG_SNEG)
- reg->flags ^= IR3_REG_SNEG;
- }
- va_end(ap);
- }
- }
-
- instr_atomic_end(ctx);
-}
-
-/*
- * Handlers for TGSI instructions which do not have a 1:1 mapping to
- * native instructions:
- */
-
-static void
-trans_clamp(const struct instr_translater *t,
- struct ir3_compile_context *ctx,
- struct tgsi_full_instruction *inst)
-{
- struct tgsi_dst_register *dst = get_dst(ctx, inst);
- struct tgsi_src_register *src0 = &inst->Src[0].Register;
- struct tgsi_src_register *src1 = &inst->Src[1].Register;
- struct tgsi_src_register *src2 = &inst->Src[2].Register;
-
- create_clamp(ctx, dst, src0, src1, src2);
-
- put_dst(ctx, inst, dst);
-}
-
-/* ARL(x) = x, but mova from hrN.x to a0.. */
-static void
-trans_arl(const struct instr_translater *t,
- struct ir3_compile_context *ctx,
- struct tgsi_full_instruction *inst)
-{
- struct ir3_instruction *instr;
- struct tgsi_dst_register tmp_dst;
- struct tgsi_src_register *tmp_src;
- struct tgsi_dst_register *dst = &inst->Dst[0].Register;
- struct tgsi_src_register *src = &inst->Src[0].Register;
- unsigned chan = src->SwizzleX;
-
- compile_assert(ctx, dst->File == TGSI_FILE_ADDRESS);
-
- /* NOTE: we allocate a temporary from a flat register
- * namespace (ignoring half vs full). It turns out
- * not to really matter since registers get reassigned
- * later in ir3_ra which (hopefully!) can deal a bit
- * better with mixed half and full precision.
- */
- tmp_src = get_internal_temp(ctx, &tmp_dst);
-
- /* cov.{u,f}{32,16}s16 Rtmp, Rsrc */
- instr = instr_create(ctx, 1, 0);
- instr->cat1.src_type = (t->tgsi_opc == TGSI_OPCODE_ARL) ?
- get_ftype(ctx) : get_utype(ctx);
- instr->cat1.dst_type = TYPE_S16;
- add_dst_reg(ctx, instr, &tmp_dst, chan)->flags |= IR3_REG_HALF;
- add_src_reg(ctx, instr, src, chan);
-
- /* shl.b Rtmp, Rtmp, 2 */
- instr = instr_create(ctx, 2, OPC_SHL_B);
- add_dst_reg(ctx, instr, &tmp_dst, chan)->flags |= IR3_REG_HALF;
- add_src_reg(ctx, instr, tmp_src, chan)->flags |= IR3_REG_HALF;
- ir3_reg_create(instr, 0, IR3_REG_IMMED)->iim_val = 2;
-
- /* mova a0, Rtmp */
- instr = instr_create(ctx, 1, 0);
- instr->cat1.src_type = TYPE_S16;
- instr->cat1.dst_type = TYPE_S16;
- add_dst_reg(ctx, instr, dst, 0)->flags |= IR3_REG_HALF;
- add_src_reg(ctx, instr, tmp_src, chan)->flags |= IR3_REG_HALF;
-}
-
-/*
- * texture fetch/sample instructions:
- */
-
-struct tex_info {
- int8_t order[4];
- int8_t args;
- unsigned src_wrmask, flags;
-};
-
-struct target_info {
- uint8_t dims;
- uint8_t cube;
- uint8_t array;
- uint8_t shadow;
-};
-
-static const struct target_info tex_targets[] = {
- [TGSI_TEXTURE_1D] = { 1, 0, 0, 0 },
- [TGSI_TEXTURE_2D] = { 2, 0, 0, 0 },
- [TGSI_TEXTURE_3D] = { 3, 0, 0, 0 },
- [TGSI_TEXTURE_CUBE] = { 3, 1, 0, 0 },
- [TGSI_TEXTURE_RECT] = { 2, 0, 0, 0 },
- [TGSI_TEXTURE_SHADOW1D] = { 1, 0, 0, 1 },
- [TGSI_TEXTURE_SHADOW2D] = { 2, 0, 0, 1 },
- [TGSI_TEXTURE_SHADOWRECT] = { 2, 0, 0, 1 },
- [TGSI_TEXTURE_1D_ARRAY] = { 1, 0, 1, 0 },
- [TGSI_TEXTURE_2D_ARRAY] = { 2, 0, 1, 0 },
- [TGSI_TEXTURE_SHADOW1D_ARRAY] = { 1, 0, 1, 1 },
- [TGSI_TEXTURE_SHADOW2D_ARRAY] = { 2, 0, 1, 1 },
- [TGSI_TEXTURE_SHADOWCUBE] = { 3, 1, 0, 1 },
- [TGSI_TEXTURE_2D_MSAA] = { 2, 0, 0, 0 },
- [TGSI_TEXTURE_2D_ARRAY_MSAA] = { 2, 0, 1, 0 },
- [TGSI_TEXTURE_CUBE_ARRAY] = { 3, 1, 1, 0 },
- [TGSI_TEXTURE_SHADOWCUBE_ARRAY] = { 3, 1, 1, 1 },
-};
-
-static void
-fill_tex_info(struct ir3_compile_context *ctx,
- struct tgsi_full_instruction *inst,
- struct tex_info *info)
-{
- const struct target_info *tgt = &tex_targets[inst->Texture.Texture];
-
- if (tgt->dims == 3)
- info->flags |= IR3_INSTR_3D;
- if (tgt->array)
- info->flags |= IR3_INSTR_A;
- if (tgt->shadow)
- info->flags |= IR3_INSTR_S;
-
- switch (inst->Instruction.Opcode) {
- case TGSI_OPCODE_TXB:
- case TGSI_OPCODE_TXB2:
- case TGSI_OPCODE_TXL:
- case TGSI_OPCODE_TXF:
- info->args = 2;
- break;
- case TGSI_OPCODE_TXP:
- info->flags |= IR3_INSTR_P;
- /* fallthrough */
- case TGSI_OPCODE_TEX:
- case TGSI_OPCODE_TXD:
- info->args = 1;
- break;
- }
-
- /*
- * lay out the first argument in the proper order:
- * - actual coordinates first
- * - shadow reference
- * - array index
- * - projection w
- *
- * bias/lod go into the second arg
- */
- int arg, pos = 0;
- for (arg = 0; arg < tgt->dims; arg++)
- info->order[arg] = pos++;
- if (tgt->dims == 1)
- info->order[pos++] = -1;
- if (tgt->shadow)
- info->order[pos++] = MAX2(arg + tgt->array, 2);
- if (tgt->array)
- info->order[pos++] = arg++;
- if (info->flags & IR3_INSTR_P)
- info->order[pos++] = 3;
-
- info->src_wrmask = (1 << pos) - 1;
-
- for (; pos < 4; pos++)
- info->order[pos] = -1;
-
- assert(pos <= 4);
-}
-
-static bool check_swiz(struct tgsi_src_register *src, const int8_t order[4])
-{
- unsigned i;
- for (i = 1; (i < 4) && order[i] >= 0; i++)
- if (src_swiz(src, i) != (src_swiz(src, 0) + order[i]))
- return false;
- return true;
-}
-
-static bool is_1d(unsigned tex)
-{
- return tex_targets[tex].dims == 1;
-}
-
-static struct tgsi_src_register *
-get_tex_coord(struct ir3_compile_context *ctx,
- struct tgsi_full_instruction *inst,
- const struct tex_info *tinf)
-{
- struct tgsi_src_register *coord = &inst->Src[0].Register;
- struct ir3_instruction *instr;
- unsigned tex = inst->Texture.Texture;
- struct tgsi_dst_register tmp_dst;
- struct tgsi_src_register *tmp_src;
- type_t type_mov = get_ftype(ctx);
- unsigned j;
-
- /* need to move things around: */
- tmp_src = get_internal_temp(ctx, &tmp_dst);
-
- for (j = 0; j < 4; j++) {
- if (tinf->order[j] < 0)
- continue;
- instr = instr_create(ctx, 1, 0); /* mov */
- instr->cat1.src_type = type_mov;
- instr->cat1.dst_type = type_mov;
- add_dst_reg(ctx, instr, &tmp_dst, j);
- add_src_reg(ctx, instr, coord,
- src_swiz(coord, tinf->order[j]));
- }
-
- /* fix up .y coord: */
- if (is_1d(tex)) {
- struct ir3_register *imm;
- instr = instr_create(ctx, 1, 0); /* mov */
- instr->cat1.src_type = type_mov;
- instr->cat1.dst_type = type_mov;
- add_dst_reg(ctx, instr, &tmp_dst, 1); /* .y */
- imm = ir3_reg_create(instr, 0, IR3_REG_IMMED);
- if (inst->Instruction.Opcode == TGSI_OPCODE_TXF)
- imm->iim_val = 0;
- else
- imm->fim_val = 0.5;
- }
-
- return tmp_src;
-}
-
-static void
-trans_samp(const struct instr_translater *t,
- struct ir3_compile_context *ctx,
- struct tgsi_full_instruction *inst)
-{
- struct ir3_instruction *instr, *collect;
- struct ir3_register *reg;
- struct tgsi_dst_register *dst = &inst->Dst[0].Register;
- struct tgsi_src_register *orig, *coord, *samp, *offset, *dpdx, *dpdy;
- struct tgsi_src_register zero;
- const struct target_info *tgt = &tex_targets[inst->Texture.Texture];
- struct tex_info tinf;
- int i;
-
- memset(&tinf, 0, sizeof(tinf));
- fill_tex_info(ctx, inst, &tinf);
- coord = get_tex_coord(ctx, inst, &tinf);
- get_immediate(ctx, &zero, 0);
-
- switch (inst->Instruction.Opcode) {
- case TGSI_OPCODE_TXB2:
- orig = &inst->Src[1].Register;
- samp = &inst->Src[2].Register;
- break;
- case TGSI_OPCODE_TXD:
- orig = &inst->Src[0].Register;
- dpdx = &inst->Src[1].Register;
- dpdy = &inst->Src[2].Register;
- samp = &inst->Src[3].Register;
- if (is_rel_or_const(dpdx))
- dpdx = get_unconst(ctx, dpdx);
- if (is_rel_or_const(dpdy))
- dpdy = get_unconst(ctx, dpdy);
- break;
- default:
- orig = &inst->Src[0].Register;
- samp = &inst->Src[1].Register;
- break;
- }
- if (tinf.args > 1 && is_rel_or_const(orig))
- orig = get_unconst(ctx, orig);
-
- /* scale up integer coords for TXF based on the LOD */
- if (inst->Instruction.Opcode == TGSI_OPCODE_TXF) {
- struct tgsi_dst_register tmp_dst;
- struct tgsi_src_register *tmp_src;
- type_t type_mov = get_utype(ctx);
-
- tmp_src = get_internal_temp(ctx, &tmp_dst);
- for (i = 0; i < tgt->dims; i++) {
- instr = instr_create(ctx, 2, OPC_SHL_B);
- add_dst_reg(ctx, instr, &tmp_dst, i);
- add_src_reg(ctx, instr, coord, src_swiz(coord, i));
- add_src_reg(ctx, instr, orig, orig->SwizzleW);
- }
- if (tgt->dims < 2) {
- instr = instr_create(ctx, 1, 0);
- instr->cat1.src_type = type_mov;
- instr->cat1.dst_type = type_mov;
- add_dst_reg(ctx, instr, &tmp_dst, i);
- add_src_reg(ctx, instr, &zero, 0);
- i++;
- }
- if (tgt->array) {
- instr = instr_create(ctx, 1, 0);
- instr->cat1.src_type = type_mov;
- instr->cat1.dst_type = type_mov;
- add_dst_reg(ctx, instr, &tmp_dst, i);
- add_src_reg(ctx, instr, coord, src_swiz(coord, i));
- }
- coord = tmp_src;
- }
-
- if (inst->Texture.NumOffsets) {
- struct tgsi_texture_offset *tex_offset = &inst->TexOffsets[0];
- struct tgsi_src_register offset_src = {0};
-
- offset_src.File = tex_offset->File;
- offset_src.Index = tex_offset->Index;
- offset_src.SwizzleX = tex_offset->SwizzleX;
- offset_src.SwizzleY = tex_offset->SwizzleY;
- offset_src.SwizzleZ = tex_offset->SwizzleZ;
- offset = get_unconst(ctx, &offset_src);
- tinf.flags |= IR3_INSTR_O;
- }
-
- instr = instr_create(ctx, 5, t->opc);
- if (ctx->integer_s & (1 << samp->Index))
- instr->cat5.type = get_utype(ctx);
- else
- instr->cat5.type = get_ftype(ctx);
- instr->cat5.samp = samp->Index;
- instr->cat5.tex = samp->Index;
- instr->flags |= tinf.flags;
-
- add_dst_reg_wrmask(ctx, instr, dst, 0, dst->WriteMask);
-
- reg = ir3_reg_create(instr, 0, IR3_REG_SSA);
-
- collect = ir3_instr_create2(ctx->block, -1, OPC_META_FI, 12);
- ir3_reg_create(collect, 0, 0);
- for (i = 0; i < 4; i++) {
- if (tinf.src_wrmask & (1 << i))
- ssa_src(ctx, ir3_reg_create(collect, 0, IR3_REG_SSA),
- coord, src_swiz(coord, i));
- else if (tinf.src_wrmask & ~((1 << i) - 1))
- ir3_reg_create(collect, 0, 0);
- }
-
- /* Attach derivatives onto the end of the fan-in. Derivatives start after
- * the 4th argument, so make sure that fi is padded up to 4 first.
- */
- if (inst->Instruction.Opcode == TGSI_OPCODE_TXD) {
- while (collect->regs_count < 5)
- ssa_src(ctx, ir3_reg_create(collect, 0, IR3_REG_SSA), &zero, 0);
- for (i = 0; i < tgt->dims; i++)
- ssa_src(ctx, ir3_reg_create(collect, 0, IR3_REG_SSA), dpdx, i);
- if (tgt->dims < 2)
- ssa_src(ctx, ir3_reg_create(collect, 0, IR3_REG_SSA), &zero, 0);
- for (i = 0; i < tgt->dims; i++)
- ssa_src(ctx, ir3_reg_create(collect, 0, IR3_REG_SSA), dpdy, i);
- if (tgt->dims < 2)
- ssa_src(ctx, ir3_reg_create(collect, 0, IR3_REG_SSA), &zero, 0);
- tinf.src_wrmask |= ((1 << (2 * MAX2(tgt->dims, 2))) - 1) << 4;
- }
-
- reg->instr = collect;
- reg->wrmask = tinf.src_wrmask;
-
- /* The second argument contains the offsets, followed by the lod/bias
- * argument. This is constructed more manually due to the dynamic nature.
- */
- if (inst->Texture.NumOffsets == 0 && tinf.args == 1)
- return;
-
- reg = ir3_reg_create(instr, 0, IR3_REG_SSA);
-
- collect = ir3_instr_create2(ctx->block, -1, OPC_META_FI, 5);
- ir3_reg_create(collect, 0, 0);
-
- if (inst->Texture.NumOffsets) {
- for (i = 0; i < tgt->dims; i++)
- ssa_src(ctx, ir3_reg_create(collect, 0, IR3_REG_SSA),
- offset, i);
- if (tgt->dims < 2)
- ssa_src(ctx, ir3_reg_create(collect, 0, IR3_REG_SSA), &zero, 0);
- }
- if (inst->Instruction.Opcode == TGSI_OPCODE_TXB2)
- ssa_src(ctx, ir3_reg_create(collect, 0, IR3_REG_SSA),
- orig, orig->SwizzleX);
- else if (tinf.args > 1)
- ssa_src(ctx, ir3_reg_create(collect, 0, IR3_REG_SSA),
- orig, orig->SwizzleW);
-
- reg->instr = collect;
- reg->wrmask = (1 << (collect->regs_count - 1)) - 1;
-}
-
-static void
-trans_txq(const struct instr_translater *t,
- struct ir3_compile_context *ctx,
- struct tgsi_full_instruction *inst)
-{
- struct ir3_instruction *instr;
- struct tgsi_dst_register *dst = &inst->Dst[0].Register;
- struct tgsi_src_register *level = &inst->Src[0].Register;
- struct tgsi_src_register *samp = &inst->Src[1].Register;
- const struct target_info *tgt = &tex_targets[inst->Texture.Texture];
- struct tex_info tinf;
-
- memset(&tinf, 0, sizeof(tinf));
- fill_tex_info(ctx, inst, &tinf);
- if (is_rel_or_const(level))
- level = get_unconst(ctx, level);
-
- instr = instr_create(ctx, 5, OPC_GETSIZE);
- instr->cat5.type = get_utype(ctx);
- instr->cat5.samp = samp->Index;
- instr->cat5.tex = samp->Index;
- instr->flags |= tinf.flags;
-
- if (tgt->array && (dst->WriteMask & (1 << tgt->dims))) {
- /* Array size actually ends up in .w rather than .z. This doesn't
- * matter for miplevel 0, but for higher mips the value in z is
- * minified whereas w stays. Also, the value in TEX_CONST_3_DEPTH is
- * returned, which means that we have to add 1 to it for arrays.
- */
- struct tgsi_dst_register tmp_dst;
- struct tgsi_src_register *tmp_src;
- type_t type_mov = get_utype(ctx);
-
- tmp_src = get_internal_temp(ctx, &tmp_dst);
- add_dst_reg_wrmask(ctx, instr, &tmp_dst, 0,
- dst->WriteMask | TGSI_WRITEMASK_W);
- add_src_reg_wrmask(ctx, instr, level, level->SwizzleX, 0x1);
-
- if (dst->WriteMask & TGSI_WRITEMASK_X) {
- instr = instr_create(ctx, 1, 0);
- instr->cat1.src_type = type_mov;
- instr->cat1.dst_type = type_mov;
- add_dst_reg(ctx, instr, dst, 0);
- add_src_reg(ctx, instr, tmp_src, src_swiz(tmp_src, 0));
- }
-
- if (tgt->dims == 2) {
- if (dst->WriteMask & TGSI_WRITEMASK_Y) {
- instr = instr_create(ctx, 1, 0);
- instr->cat1.src_type = type_mov;
- instr->cat1.dst_type = type_mov;
- add_dst_reg(ctx, instr, dst, 1);
- add_src_reg(ctx, instr, tmp_src, src_swiz(tmp_src, 1));
- }
- }
-
- instr = instr_create(ctx, 2, OPC_ADD_U);
- add_dst_reg(ctx, instr, dst, tgt->dims);
- add_src_reg(ctx, instr, tmp_src, src_swiz(tmp_src, 3));
- ir3_reg_create(instr, 0, IR3_REG_IMMED)->iim_val = 1;
- } else {
- add_dst_reg_wrmask(ctx, instr, dst, 0, dst->WriteMask);
- add_src_reg_wrmask(ctx, instr, level, level->SwizzleX, 0x1);
- }
-
- if (dst->WriteMask & TGSI_WRITEMASK_W) {
- /* The # of levels comes from getinfo.z. We need to add 1 to it, since
- * the value in TEX_CONST_0 is zero-based.
- */
- struct tgsi_dst_register tmp_dst;
- struct tgsi_src_register *tmp_src;
-
- tmp_src = get_internal_temp(ctx, &tmp_dst);
- instr = instr_create(ctx, 5, OPC_GETINFO);
- instr->cat5.type = get_utype(ctx);
- instr->cat5.samp = samp->Index;
- instr->cat5.tex = samp->Index;
- add_dst_reg_wrmask(ctx, instr, &tmp_dst, 0, TGSI_WRITEMASK_Z);
-
- instr = instr_create(ctx, 2, OPC_ADD_U);
- add_dst_reg(ctx, instr, dst, 3);
- add_src_reg(ctx, instr, tmp_src, src_swiz(tmp_src, 2));
- ir3_reg_create(instr, 0, IR3_REG_IMMED)->iim_val = 1;
- }
-}
-
-/* DDX/DDY */
-static void
-trans_deriv(const struct instr_translater *t,
- struct ir3_compile_context *ctx,
- struct tgsi_full_instruction *inst)
-{
- struct ir3_instruction *instr;
- struct tgsi_dst_register *dst = &inst->Dst[0].Register;
- struct tgsi_src_register *src = &inst->Src[0].Register;
- static const int8_t order[4] = {0, 1, 2, 3};
-
- if (!check_swiz(src, order)) {
- struct tgsi_dst_register tmp_dst;
- struct tgsi_src_register *tmp_src;
-
- tmp_src = get_internal_temp(ctx, &tmp_dst);
- create_mov(ctx, &tmp_dst, src);
-
- src = tmp_src;
- }
-
- /* This might be a workaround for hw bug? Blob compiler always
- * seems to work two components at a time for dsy/dsx. It does
- * actually seem to work in some cases (or at least some piglit
- * tests) for four components at a time. But seems more reliable
- * to split this into two instructions like the blob compiler
- * does:
- */
-
- instr = instr_create(ctx, 5, t->opc);
- instr->cat5.type = get_ftype(ctx);
- add_dst_reg_wrmask(ctx, instr, dst, 0, dst->WriteMask & 0x3);
- add_src_reg_wrmask(ctx, instr, src, 0, dst->WriteMask & 0x3);
-
- instr = instr_create(ctx, 5, t->opc);
- instr->cat5.type = get_ftype(ctx);
- add_dst_reg_wrmask(ctx, instr, dst, 2, (dst->WriteMask >> 2) & 0x3);
- add_src_reg_wrmask(ctx, instr, src, 2, (dst->WriteMask >> 2) & 0x3);
-}
-
-/*
- * SEQ(a,b) = (a == b) ? 1.0 : 0.0
- * cmps.f.eq tmp0, a, b
- * cov.u16f16 dst, tmp0
- *
- * SNE(a,b) = (a != b) ? 1.0 : 0.0
- * cmps.f.ne tmp0, a, b
- * cov.u16f16 dst, tmp0
- *
- * SGE(a,b) = (a >= b) ? 1.0 : 0.0
- * cmps.f.ge tmp0, a, b
- * cov.u16f16 dst, tmp0
- *
- * SLE(a,b) = (a <= b) ? 1.0 : 0.0
- * cmps.f.le tmp0, a, b
- * cov.u16f16 dst, tmp0
- *
- * SGT(a,b) = (a > b) ? 1.0 : 0.0
- * cmps.f.gt tmp0, a, b
- * cov.u16f16 dst, tmp0
- *
- * SLT(a,b) = (a < b) ? 1.0 : 0.0
- * cmps.f.lt tmp0, a, b
- * cov.u16f16 dst, tmp0
- *
- * CMP(a,b,c) = (a < 0.0) ? b : c
- * cmps.f.lt tmp0, a, {0.0}
- * sel.b16 dst, b, tmp0, c
- */
-static void
-trans_cmp(const struct instr_translater *t,
- struct ir3_compile_context *ctx,
- struct tgsi_full_instruction *inst)
-{
- struct ir3_instruction *instr;
- struct tgsi_dst_register tmp_dst;
- struct tgsi_src_register *tmp_src;
- struct tgsi_src_register constval0;
- /* final instruction for CMP() uses orig src1 and src2: */
- struct tgsi_dst_register *dst = get_dst(ctx, inst);
- struct tgsi_src_register *a0, *a1, *a2;
- unsigned condition;
-
- tmp_src = get_internal_temp(ctx, &tmp_dst);
-
- a0 = &inst->Src[0].Register; /* a */
- a1 = &inst->Src[1].Register; /* b */
-
- switch (t->tgsi_opc) {
- case TGSI_OPCODE_SEQ:
- case TGSI_OPCODE_FSEQ:
- condition = IR3_COND_EQ;
- break;
- case TGSI_OPCODE_SNE:
- case TGSI_OPCODE_FSNE:
- condition = IR3_COND_NE;
- break;
- case TGSI_OPCODE_SGE:
- case TGSI_OPCODE_FSGE:
- condition = IR3_COND_GE;
- break;
- case TGSI_OPCODE_SLT:
- case TGSI_OPCODE_FSLT:
- condition = IR3_COND_LT;
- break;
- case TGSI_OPCODE_SLE:
- condition = IR3_COND_LE;
- break;
- case TGSI_OPCODE_SGT:
- condition = IR3_COND_GT;
- break;
- case TGSI_OPCODE_CMP:
- get_immediate(ctx, &constval0, fui(0.0));
- a0 = &inst->Src[0].Register; /* a */
- a1 = &constval0; /* {0.0} */
- condition = IR3_COND_LT;
- break;
- default:
- compile_assert(ctx, 0);
- return;
- }
-
- if (is_const(a0) && is_const(a1))
- a0 = get_unconst(ctx, a0);
-
- /* cmps.f.<cond> tmp, a0, a1 */
- instr = instr_create(ctx, 2, OPC_CMPS_F);
- instr->cat2.condition = condition;
- vectorize(ctx, instr, &tmp_dst, 2, a0, 0, a1, 0);
-
- switch (t->tgsi_opc) {
- case TGSI_OPCODE_SEQ:
- case TGSI_OPCODE_SGE:
- case TGSI_OPCODE_SLE:
- case TGSI_OPCODE_SNE:
- case TGSI_OPCODE_SGT:
- case TGSI_OPCODE_SLT:
- /* cov.u16f16 dst, tmp0 */
- instr = instr_create(ctx, 1, 0);
- instr->cat1.src_type = get_utype(ctx);
- instr->cat1.dst_type = get_ftype(ctx);
- vectorize(ctx, instr, dst, 1, tmp_src, 0);
- break;
- case TGSI_OPCODE_FSEQ:
- case TGSI_OPCODE_FSGE:
- case TGSI_OPCODE_FSNE:
- case TGSI_OPCODE_FSLT:
- /* absneg.s dst, (neg)tmp0 */
- instr = instr_create(ctx, 2, OPC_ABSNEG_S);
- vectorize(ctx, instr, dst, 1, tmp_src, IR3_REG_SNEG);
- break;
- case TGSI_OPCODE_CMP:
- a1 = &inst->Src[1].Register;
- a2 = &inst->Src[2].Register;
- /* sel.{b32,b16} dst, src2, tmp, src1 */
- instr = instr_create(ctx, 3, OPC_SEL_B32);
- vectorize(ctx, instr, dst, 3, a1, 0, tmp_src, 0, a2, 0);
- break;
- }
-
- put_dst(ctx, inst, dst);
-}
-
-/*
- * USNE(a,b) = (a != b) ? ~0 : 0
- * cmps.u32.ne dst, a, b
- *
- * USEQ(a,b) = (a == b) ? ~0 : 0
- * cmps.u32.eq dst, a, b
- *
- * ISGE(a,b) = (a > b) ? ~0 : 0
- * cmps.s32.ge dst, a, b
- *
- * USGE(a,b) = (a > b) ? ~0 : 0
- * cmps.u32.ge dst, a, b
- *
- * ISLT(a,b) = (a < b) ? ~0 : 0
- * cmps.s32.lt dst, a, b
- *
- * USLT(a,b) = (a < b) ? ~0 : 0
- * cmps.u32.lt dst, a, b
- *
- */
-static void
-trans_icmp(const struct instr_translater *t,
- struct ir3_compile_context *ctx,
- struct tgsi_full_instruction *inst)
-{
- struct ir3_instruction *instr;
- struct tgsi_dst_register *dst = get_dst(ctx, inst);
- struct tgsi_dst_register tmp_dst;
- struct tgsi_src_register *tmp_src;
- struct tgsi_src_register *a0, *a1;
- unsigned condition;
-
- a0 = &inst->Src[0].Register; /* a */
- a1 = &inst->Src[1].Register; /* b */
-
- switch (t->tgsi_opc) {
- case TGSI_OPCODE_USNE:
- condition = IR3_COND_NE;
- break;
- case TGSI_OPCODE_USEQ:
- condition = IR3_COND_EQ;
- break;
- case TGSI_OPCODE_ISGE:
- case TGSI_OPCODE_USGE:
- condition = IR3_COND_GE;
- break;
- case TGSI_OPCODE_ISLT:
- case TGSI_OPCODE_USLT:
- condition = IR3_COND_LT;
- break;
-
- default:
- compile_assert(ctx, 0);
- return;
- }
-
- if (is_const(a0) && is_const(a1))
- a0 = get_unconst(ctx, a0);
-
- tmp_src = get_internal_temp(ctx, &tmp_dst);
- /* cmps.{u32,s32}.<cond> tmp, a0, a1 */
- instr = instr_create(ctx, 2, t->opc);
- instr->cat2.condition = condition;
- vectorize(ctx, instr, &tmp_dst, 2, a0, 0, a1, 0);
-
- /* absneg.s dst, (neg)tmp */
- instr = instr_create(ctx, 2, OPC_ABSNEG_S);
- vectorize(ctx, instr, dst, 1, tmp_src, IR3_REG_SNEG);
-
- put_dst(ctx, inst, dst);
-}
-
-/*
- * UCMP(a,b,c) = a ? b : c
- * sel.b16 dst, b, a, c
- */
-static void
-trans_ucmp(const struct instr_translater *t,
- struct ir3_compile_context *ctx,
- struct tgsi_full_instruction *inst)
-{
- struct ir3_instruction *instr;
- struct tgsi_dst_register *dst = get_dst(ctx, inst);
- struct tgsi_src_register *a0, *a1, *a2;
-
- a0 = &inst->Src[0].Register; /* a */
- a1 = &inst->Src[1].Register; /* b */
- a2 = &inst->Src[2].Register; /* c */
-
- if (is_rel_or_const(a0))
- a0 = get_unconst(ctx, a0);
-
- /* sel.{b32,b16} dst, b, a, c */
- instr = instr_create(ctx, 3, OPC_SEL_B32);
- vectorize(ctx, instr, dst, 3, a1, 0, a0, 0, a2, 0);
- put_dst(ctx, inst, dst);
-}
-
-/*
- * ISSG(a) = a < 0 ? -1 : a > 0 ? 1 : 0
- * cmps.s.lt tmp_neg, a, 0 # 1 if a is negative
- * cmps.s.gt tmp_pos, a, 0 # 1 if a is positive
- * sub.u dst, tmp_pos, tmp_neg
- */
-static void
-trans_issg(const struct instr_translater *t,
- struct ir3_compile_context *ctx,
- struct tgsi_full_instruction *inst)
-{
- struct ir3_instruction *instr;
- struct tgsi_dst_register *dst = get_dst(ctx, inst);
- struct tgsi_src_register *a = &inst->Src[0].Register;
- struct tgsi_dst_register neg_dst, pos_dst;
- struct tgsi_src_register *neg_src, *pos_src;
-
- neg_src = get_internal_temp(ctx, &neg_dst);
- pos_src = get_internal_temp(ctx, &pos_dst);
-
- /* cmps.s.lt neg, a, 0 */
- instr = instr_create(ctx, 2, OPC_CMPS_S);
- instr->cat2.condition = IR3_COND_LT;
- vectorize(ctx, instr, &neg_dst, 2, a, 0, 0, IR3_REG_IMMED);
-
- /* cmps.s.gt pos, a, 0 */
- instr = instr_create(ctx, 2, OPC_CMPS_S);
- instr->cat2.condition = IR3_COND_GT;
- vectorize(ctx, instr, &pos_dst, 2, a, 0, 0, IR3_REG_IMMED);
-
- /* sub.u dst, pos, neg */
- instr = instr_create(ctx, 2, OPC_SUB_U);
- vectorize(ctx, instr, dst, 2, pos_src, 0, neg_src, 0);
-
- put_dst(ctx, inst, dst);
-}
-
-
-
-/*
- * Conditional / Flow control
- */
-
-static void
-push_branch(struct ir3_compile_context *ctx, bool inv,
- struct ir3_instruction *instr, struct ir3_instruction *cond)
-{
- unsigned int idx = ctx->branch_count++;
- compile_assert(ctx, idx < ARRAY_SIZE(ctx->branch));
- ctx->branch[idx].instr = instr;
- ctx->branch[idx].inv = inv;
- /* else side of branch has same condition: */
- if (!inv)
- ctx->branch[idx].cond = cond;
-}
-
-static struct ir3_instruction *
-pop_branch(struct ir3_compile_context *ctx)
-{
- unsigned int idx = --ctx->branch_count;
- return ctx->branch[idx].instr;
-}
-
-static void
-trans_if(const struct instr_translater *t,
- struct ir3_compile_context *ctx,
- struct tgsi_full_instruction *inst)
-{
- struct ir3_instruction *instr, *cond;
- struct tgsi_src_register *src = &inst->Src[0].Register;
- struct tgsi_dst_register tmp_dst;
- struct tgsi_src_register *tmp_src;
- struct tgsi_src_register constval;
-
- get_immediate(ctx, &constval, fui(0.0));
- tmp_src = get_internal_temp(ctx, &tmp_dst);
-
- if (is_const(src))
- src = get_unconst(ctx, src);
-
- /* cmps.{f,u}.ne tmp0, b, {0.0} */
- instr = instr_create(ctx, 2, t->opc);
- add_dst_reg(ctx, instr, &tmp_dst, 0);
- add_src_reg(ctx, instr, src, src->SwizzleX);
- add_src_reg(ctx, instr, &constval, constval.SwizzleX);
- instr->cat2.condition = IR3_COND_NE;
-
- compile_assert(ctx, instr->regs[1]->flags & IR3_REG_SSA); /* because get_unconst() */
- cond = instr->regs[1]->instr;
-
- /* meta:flow tmp0 */
- instr = instr_create(ctx, -1, OPC_META_FLOW);
- ir3_reg_create(instr, 0, 0); /* dummy dst */
- add_src_reg(ctx, instr, tmp_src, TGSI_SWIZZLE_X);
-
- push_branch(ctx, false, instr, cond);
- instr->flow.if_block = push_block(ctx);
-}
-
-static void
-trans_else(const struct instr_translater *t,
- struct ir3_compile_context *ctx,
- struct tgsi_full_instruction *inst)
-{
- struct ir3_instruction *instr;
-
- pop_block(ctx);
-
- instr = pop_branch(ctx);
-
- compile_assert(ctx, (instr->category == -1) &&
- (instr->opc == OPC_META_FLOW));
-
- push_branch(ctx, true, instr, NULL);
- instr->flow.else_block = push_block(ctx);
-}
-
-static struct ir3_instruction *
-find_temporary(struct ir3_block *block, unsigned n)
-{
- if (block->parent && !block->temporaries[n])
- return find_temporary(block->parent, n);
- return block->temporaries[n];
-}
-
-static struct ir3_instruction *
-find_output(struct ir3_block *block, unsigned n)
-{
- if (block->parent && !block->outputs[n])
- return find_output(block->parent, n);
- return block->outputs[n];
-}
-
-static struct ir3_instruction *
-create_phi(struct ir3_compile_context *ctx, struct ir3_instruction *cond,
- struct ir3_instruction *a, struct ir3_instruction *b)
-{
- struct ir3_instruction *phi;
-
- compile_assert(ctx, cond);
-
- /* Either side of the condition could be null.. which
- * indicates a variable written on only one side of the
- * branch. Normally this should only be variables not
- * used outside of that side of the branch. So we could
- * just 'return a ? a : b;' in that case. But for better
- * defined undefined behavior we just stick in imm{0.0}.
- * In the common case of a value only used within the
- * one side of the branch, the PHI instruction will not
- * get scheduled
- */
- if (!a)
- a = create_immed(ctx, 0.0);
- if (!b)
- b = create_immed(ctx, 0.0);
-
- phi = instr_create(ctx, -1, OPC_META_PHI);
- ir3_reg_create(phi, 0, 0); /* dummy dst */
- ir3_reg_create(phi, 0, IR3_REG_SSA)->instr = cond;
- ir3_reg_create(phi, 0, IR3_REG_SSA)->instr = a;
- ir3_reg_create(phi, 0, IR3_REG_SSA)->instr = b;
-
- return phi;
-}
-
-static void
-trans_endif(const struct instr_translater *t,
- struct ir3_compile_context *ctx,
- struct tgsi_full_instruction *inst)
-{
- struct ir3_instruction *instr;
- struct ir3_block *ifb, *elseb;
- struct ir3_instruction **ifout, **elseout;
- unsigned i, ifnout = 0, elsenout = 0;
-
- pop_block(ctx);
-
- instr = pop_branch(ctx);
-
- compile_assert(ctx, (instr->category == -1) &&
- (instr->opc == OPC_META_FLOW));
-
- ifb = instr->flow.if_block;
- elseb = instr->flow.else_block;
- /* if there is no else block, the parent block is used for the
- * branch-not-taken src of the PHI instructions:
- */
- if (!elseb)
- elseb = ifb->parent;
-
- /* worst case sizes: */
- ifnout = ifb->ntemporaries + ifb->noutputs;
- elsenout = elseb->ntemporaries + elseb->noutputs;
-
- ifout = ir3_alloc(ctx->ir, sizeof(ifb->outputs[0]) * ifnout);
- if (elseb != ifb->parent)
- elseout = ir3_alloc(ctx->ir, sizeof(ifb->outputs[0]) * elsenout);
-
- ifnout = 0;
- elsenout = 0;
-
- /* generate PHI instructions for any temporaries written: */
- for (i = 0; i < ifb->ntemporaries; i++) {
- struct ir3_instruction *a = ifb->temporaries[i];
- struct ir3_instruction *b = elseb->temporaries[i];
-
- /* if temporary written in if-block, or if else block
- * is present and temporary written in else-block:
- */
- if (a || ((elseb != ifb->parent) && b)) {
- struct ir3_instruction *phi;
-
- /* if only written on one side, find the closest
- * enclosing update on other side:
- */
- if (!a)
- a = find_temporary(ifb, i);
- if (!b)
- b = find_temporary(elseb, i);
-
- ifout[ifnout] = a;
- a = create_output(ifb, a, ifnout++);
-
- if (elseb != ifb->parent) {
- elseout[elsenout] = b;
- b = create_output(elseb, b, elsenout++);
- }
-
- phi = create_phi(ctx, instr, a, b);
- ctx->block->temporaries[i] = phi;
- }
- }
-
- compile_assert(ctx, ifb->noutputs == elseb->noutputs);
-
- /* .. and any outputs written: */
- for (i = 0; i < ifb->noutputs; i++) {
- struct ir3_instruction *a = ifb->outputs[i];
- struct ir3_instruction *b = elseb->outputs[i];
-
- /* if output written in if-block, or if else block
- * is present and output written in else-block:
- */
- if (a || ((elseb != ifb->parent) && b)) {
- struct ir3_instruction *phi;
-
- /* if only written on one side, find the closest
- * enclosing update on other side:
- */
- if (!a)
- a = find_output(ifb, i);
- if (!b)
- b = find_output(elseb, i);
-
- ifout[ifnout] = a;
- a = create_output(ifb, a, ifnout++);
-
- if (elseb != ifb->parent) {
- elseout[elsenout] = b;
- b = create_output(elseb, b, elsenout++);
- }
-
- phi = create_phi(ctx, instr, a, b);
- ctx->block->outputs[i] = phi;
- }
- }
-
- ifb->noutputs = ifnout;
- ifb->outputs = ifout;
-
- if (elseb != ifb->parent) {
- elseb->noutputs = elsenout;
- elseb->outputs = elseout;
- }
-
- // TODO maybe we want to compact block->inputs?
-}
-
-/*
- * Kill
- */
-
-static void
-trans_kill(const struct instr_translater *t,
- struct ir3_compile_context *ctx,
- struct tgsi_full_instruction *inst)
-{
- struct ir3_instruction *instr, *immed, *cond = NULL;
- bool inv = false;
-
- /* unconditional kill, use enclosing if condition: */
- if (ctx->branch_count > 0) {
- unsigned int idx = ctx->branch_count - 1;
- cond = ctx->branch[idx].cond;
- inv = ctx->branch[idx].inv;
- } else {
- cond = create_immed(ctx, 1.0);
- }
-
- compile_assert(ctx, cond);
-
- immed = create_immed(ctx, 0.0);
-
- /* cmps.f.ne p0.x, cond, {0.0} */
- instr = instr_create(ctx, 2, OPC_CMPS_F);
- instr->cat2.condition = IR3_COND_NE;
- ir3_reg_create(instr, regid(REG_P0, 0), 0);
- ir3_reg_create(instr, 0, IR3_REG_SSA)->instr = cond;
- ir3_reg_create(instr, 0, IR3_REG_SSA)->instr = immed;
- cond = instr;
-
- /* kill p0.x */
- instr = instr_create(ctx, 0, OPC_KILL);
- instr->cat0.inv = inv;
- ir3_reg_create(instr, 0, 0); /* dummy dst */
- ir3_reg_create(instr, 0, IR3_REG_SSA)->instr = cond;
-
- ctx->kill[ctx->kill_count++] = instr;
-
- ctx->so->has_kill = true;
-}
-
-/*
- * Kill-If
- */
-
-static void
-trans_killif(const struct instr_translater *t,
- struct ir3_compile_context *ctx,
- struct tgsi_full_instruction *inst)
-{
- struct tgsi_src_register *src = &inst->Src[0].Register;
- struct ir3_instruction *instr, *immed, *cond = NULL;
- bool inv = false;
-
- immed = create_immed(ctx, 0.0);
-
- /* cmps.f.ne p0.x, cond, {0.0} */
- instr = instr_create(ctx, 2, OPC_CMPS_F);
- instr->cat2.condition = IR3_COND_NE;
- ir3_reg_create(instr, regid(REG_P0, 0), 0);
- ir3_reg_create(instr, 0, IR3_REG_SSA)->instr = immed;
- add_src_reg(ctx, instr, src, src->SwizzleX);
-
- cond = instr;
-
- /* kill p0.x */
- instr = instr_create(ctx, 0, OPC_KILL);
- instr->cat0.inv = inv;
- ir3_reg_create(instr, 0, 0); /* dummy dst */
- ir3_reg_create(instr, 0, IR3_REG_SSA)->instr = cond;
-
- ctx->kill[ctx->kill_count++] = instr;
-
- ctx->so->has_kill = true;
-
-}
-/*
- * I2F / U2F / F2I / F2U
- */
-
-static void
-trans_cov(const struct instr_translater *t,
- struct ir3_compile_context *ctx,
- struct tgsi_full_instruction *inst)
-{
- struct ir3_instruction *instr;
- struct tgsi_dst_register *dst = get_dst(ctx, inst);
- struct tgsi_src_register *src = &inst->Src[0].Register;
-
- // cov.f32s32 dst, tmp0 /
- instr = instr_create(ctx, 1, 0);
- switch (t->tgsi_opc) {
- case TGSI_OPCODE_U2F:
- instr->cat1.src_type = TYPE_U32;
- instr->cat1.dst_type = TYPE_F32;
- break;
- case TGSI_OPCODE_I2F:
- instr->cat1.src_type = TYPE_S32;
- instr->cat1.dst_type = TYPE_F32;
- break;
- case TGSI_OPCODE_F2U:
- instr->cat1.src_type = TYPE_F32;
- instr->cat1.dst_type = TYPE_U32;
- break;
- case TGSI_OPCODE_F2I:
- instr->cat1.src_type = TYPE_F32;
- instr->cat1.dst_type = TYPE_S32;
- break;
-
- }
- vectorize(ctx, instr, dst, 1, src, 0);
- put_dst(ctx, inst, dst);
-}
-
-/*
- * UMUL / UMAD
- *
- * There is no 32-bit multiply instruction, so splitting a and b into high and
- * low components, we get that
- *
- * dst = al * bl + ah * bl << 16 + al * bh << 16
- *
- * mull.u tmp0, a, b (mul low, i.e. al * bl)
- * madsh.m16 tmp1, a, b, tmp0 (mul-add shift high mix, i.e. ah * bl << 16)
- * madsh.m16 dst, b, a, tmp1 (i.e. al * bh << 16)
- *
- * For UMAD, add in the extra argument after mull.u.
- */
-static void
-trans_umul(const struct instr_translater *t,
- struct ir3_compile_context *ctx,
- struct tgsi_full_instruction *inst)
-{
- struct ir3_instruction *instr;
- struct tgsi_dst_register *dst = get_dst(ctx, inst);
- struct tgsi_src_register *a = &inst->Src[0].Register;
- struct tgsi_src_register *b = &inst->Src[1].Register;
-
- struct tgsi_dst_register tmp0_dst, tmp1_dst;
- struct tgsi_src_register *tmp0_src, *tmp1_src;
-
- tmp0_src = get_internal_temp(ctx, &tmp0_dst);
- tmp1_src = get_internal_temp(ctx, &tmp1_dst);
-
- if (is_rel_or_const(a))
- a = get_unconst(ctx, a);
- if (is_rel_or_const(b))
- b = get_unconst(ctx, b);
-
- /* mull.u tmp0, a, b */
- instr = instr_create(ctx, 2, OPC_MULL_U);
- vectorize(ctx, instr, &tmp0_dst, 2, a, 0, b, 0);
-
- if (t->tgsi_opc == TGSI_OPCODE_UMAD) {
- struct tgsi_src_register *c = &inst->Src[2].Register;
-
- /* add.u tmp0, tmp0, c */
- instr = instr_create(ctx, 2, OPC_ADD_U);
- vectorize(ctx, instr, &tmp0_dst, 2, tmp0_src, 0, c, 0);
- }
-
- /* madsh.m16 tmp1, a, b, tmp0 */
- instr = instr_create(ctx, 3, OPC_MADSH_M16);
- vectorize(ctx, instr, &tmp1_dst, 3, a, 0, b, 0, tmp0_src, 0);
-
- /* madsh.m16 dst, b, a, tmp1 */
- instr = instr_create(ctx, 3, OPC_MADSH_M16);
- vectorize(ctx, instr, dst, 3, b, 0, a, 0, tmp1_src, 0);
- put_dst(ctx, inst, dst);
-}
-
-/*
- * IDIV / UDIV / MOD / UMOD
- *
- * See NV50LegalizeSSA::handleDIV for the origin of this implementation. For
- * MOD/UMOD, it becomes a - [IU]DIV(a, modulus) * modulus.
- */
-static void
-trans_idiv(const struct instr_translater *t,
- struct ir3_compile_context *ctx,
- struct tgsi_full_instruction *inst)
-{
- struct ir3_instruction *instr;
- struct tgsi_dst_register *dst = get_dst(ctx, inst), *premod_dst = dst;
- struct tgsi_src_register *a = &inst->Src[0].Register;
- struct tgsi_src_register *b = &inst->Src[1].Register;
-
- struct tgsi_dst_register af_dst, bf_dst, q_dst, r_dst, a_dst, b_dst;
- struct tgsi_src_register *af_src, *bf_src, *q_src, *r_src, *a_src, *b_src;
-
- struct tgsi_src_register negative_2, thirty_one;
- type_t src_type;
-
- if (t->tgsi_opc == TGSI_OPCODE_IDIV || t->tgsi_opc == TGSI_OPCODE_MOD)
- src_type = get_stype(ctx);
- else
- src_type = get_utype(ctx);
-
- af_src = get_internal_temp(ctx, &af_dst);
- bf_src = get_internal_temp(ctx, &bf_dst);
- q_src = get_internal_temp(ctx, &q_dst);
- r_src = get_internal_temp(ctx, &r_dst);
- a_src = get_internal_temp(ctx, &a_dst);
- b_src = get_internal_temp(ctx, &b_dst);
-
- get_immediate(ctx, &negative_2, -2);
- get_immediate(ctx, &thirty_one, 31);
-
- if (t->tgsi_opc == TGSI_OPCODE_MOD || t->tgsi_opc == TGSI_OPCODE_UMOD)
- premod_dst = &q_dst;
-
- /* cov.[us]32f32 af, numerator */
- instr = instr_create(ctx, 1, 0);
- instr->cat1.src_type = src_type;
- instr->cat1.dst_type = get_ftype(ctx);
- vectorize(ctx, instr, &af_dst, 1, a, 0);
-
- /* cov.[us]32f32 bf, denominator */
- instr = instr_create(ctx, 1, 0);
- instr->cat1.src_type = src_type;
- instr->cat1.dst_type = get_ftype(ctx);
- vectorize(ctx, instr, &bf_dst, 1, b, 0);
-
- /* Get the absolute values for IDIV */
- if (type_sint(src_type)) {
- /* absneg.f af, (abs)af */
- instr = instr_create(ctx, 2, OPC_ABSNEG_F);
- vectorize(ctx, instr, &af_dst, 1, af_src, IR3_REG_FABS);
-
- /* absneg.f bf, (abs)bf */
- instr = instr_create(ctx, 2, OPC_ABSNEG_F);
- vectorize(ctx, instr, &bf_dst, 1, bf_src, IR3_REG_FABS);
-
- /* absneg.s a, (abs)numerator */
- instr = instr_create(ctx, 2, OPC_ABSNEG_S);
- vectorize(ctx, instr, &a_dst, 1, a, IR3_REG_SABS);
-
- /* absneg.s b, (abs)denominator */
- instr = instr_create(ctx, 2, OPC_ABSNEG_S);
- vectorize(ctx, instr, &b_dst, 1, b, IR3_REG_SABS);
- } else {
- /* mov.u32u32 a, numerator */
- instr = instr_create(ctx, 1, 0);
- instr->cat1.src_type = src_type;
- instr->cat1.dst_type = src_type;
- vectorize(ctx, instr, &a_dst, 1, a, 0);
-
- /* mov.u32u32 b, denominator */
- instr = instr_create(ctx, 1, 0);
- instr->cat1.src_type = src_type;
- instr->cat1.dst_type = src_type;
- vectorize(ctx, instr, &b_dst, 1, b, 0);
- }
-
- /* rcp.f bf, bf */
- instr = instr_create(ctx, 4, OPC_RCP);
- vectorize(ctx, instr, &bf_dst, 1, bf_src, 0);
-
- /* That's right, subtract 2 as an integer from the float */
- /* add.u bf, bf, -2 */
- instr = instr_create(ctx, 2, OPC_ADD_U);
- vectorize(ctx, instr, &bf_dst, 2, bf_src, 0, &negative_2, 0);
-
- /* mul.f q, af, bf */
- instr = instr_create(ctx, 2, OPC_MUL_F);
- vectorize(ctx, instr, &q_dst, 2, af_src, 0, bf_src, 0);
-
- /* cov.f32[us]32 q, q */
- instr = instr_create(ctx, 1, 0);
- instr->cat1.src_type = get_ftype(ctx);
- instr->cat1.dst_type = src_type;
- vectorize(ctx, instr, &q_dst, 1, q_src, 0);
-
- /* integer multiply q by b */
- /* mull.u r, q, b */
- instr = instr_create(ctx, 2, OPC_MULL_U);
- vectorize(ctx, instr, &r_dst, 2, q_src, 0, b_src, 0);
-
- /* madsh.m16 r, q, b, r */
- instr = instr_create(ctx, 3, OPC_MADSH_M16);
- vectorize(ctx, instr, &r_dst, 3, q_src, 0, b_src, 0, r_src, 0);
-
- /* madsh.m16, r, b, q, r */
- instr = instr_create(ctx, 3, OPC_MADSH_M16);
- vectorize(ctx, instr, &r_dst, 3, b_src, 0, q_src, 0, r_src, 0);
-
- /* sub.u r, a, r */
- instr = instr_create(ctx, 2, OPC_SUB_U);
- vectorize(ctx, instr, &r_dst, 2, a_src, 0, r_src, 0);
-
- /* cov.u32f32, r, r */
- instr = instr_create(ctx, 1, 0);
- instr->cat1.src_type = get_utype(ctx);
- instr->cat1.dst_type = get_ftype(ctx);
- vectorize(ctx, instr, &r_dst, 1, r_src, 0);
-
- /* mul.f r, r, bf */
- instr = instr_create(ctx, 2, OPC_MUL_F);
- vectorize(ctx, instr, &r_dst, 2, r_src, 0, bf_src, 0);
-
- /* cov.f32u32 r, r */
- instr = instr_create(ctx, 1, 0);
- instr->cat1.src_type = get_ftype(ctx);
- instr->cat1.dst_type = get_utype(ctx);
- vectorize(ctx, instr, &r_dst, 1, r_src, 0);
-
- /* add.u q, q, r */
- instr = instr_create(ctx, 2, OPC_ADD_U);
- vectorize(ctx, instr, &q_dst, 2, q_src, 0, r_src, 0);
-
- /* mull.u r, q, b */
- instr = instr_create(ctx, 2, OPC_MULL_U);
- vectorize(ctx, instr, &r_dst, 2, q_src, 0, b_src, 0);
-
- /* madsh.m16 r, q, b, r */
- instr = instr_create(ctx, 3, OPC_MADSH_M16);
- vectorize(ctx, instr, &r_dst, 3, q_src, 0, b_src, 0, r_src, 0);
-
- /* madsh.m16 r, b, q, r */
- instr = instr_create(ctx, 3, OPC_MADSH_M16);
- vectorize(ctx, instr, &r_dst, 3, b_src, 0, q_src, 0, r_src, 0);
-
- /* sub.u r, a, r */
- instr = instr_create(ctx, 2, OPC_SUB_U);
- vectorize(ctx, instr, &r_dst, 2, a_src, 0, r_src, 0);
-
- /* cmps.u.ge r, r, b */
- instr = instr_create(ctx, 2, OPC_CMPS_U);
- instr->cat2.condition = IR3_COND_GE;
- vectorize(ctx, instr, &r_dst, 2, r_src, 0, b_src, 0);
-
- if (type_uint(src_type)) {
- /* add.u dst, q, r */
- instr = instr_create(ctx, 2, OPC_ADD_U);
- vectorize(ctx, instr, premod_dst, 2, q_src, 0, r_src, 0);
- } else {
- /* add.u q, q, r */
- instr = instr_create(ctx, 2, OPC_ADD_U);
- vectorize(ctx, instr, &q_dst, 2, q_src, 0, r_src, 0);
-
- /* negate result based on the original arguments */
- if (is_const(a) && is_const(b))
- a = get_unconst(ctx, a);
-
- /* xor.b r, numerator, denominator */
- instr = instr_create(ctx, 2, OPC_XOR_B);
- vectorize(ctx, instr, &r_dst, 2, a, 0, b, 0);
-
- /* shr.b r, r, 31 */
- instr = instr_create(ctx, 2, OPC_SHR_B);
- vectorize(ctx, instr, &r_dst, 2, r_src, 0, &thirty_one, 0);
-
- /* absneg.s b, (neg)q */
- instr = instr_create(ctx, 2, OPC_ABSNEG_S);
- vectorize(ctx, instr, &b_dst, 1, q_src, IR3_REG_SNEG);
-
- /* sel.b dst, b, r, q */
- instr = instr_create(ctx, 3, OPC_SEL_B32);
- vectorize(ctx, instr, premod_dst, 3, b_src, 0, r_src, 0, q_src, 0);
- }
-
- if (t->tgsi_opc == TGSI_OPCODE_MOD || t->tgsi_opc == TGSI_OPCODE_UMOD) {
- /* The division result will have ended up in q. */
-
- if (is_rel_or_const(b))
- b = get_unconst(ctx, b);
-
- /* mull.u r, q, b */
- instr = instr_create(ctx, 2, OPC_MULL_U);
- vectorize(ctx, instr, &r_dst, 2, q_src, 0, b, 0);
-
- /* madsh.m16 r, q, b, r */
- instr = instr_create(ctx, 3, OPC_MADSH_M16);
- vectorize(ctx, instr, &r_dst, 3, q_src, 0, b, 0, r_src, 0);
-
- /* madsh.m16 r, b, q, r */
- instr = instr_create(ctx, 3, OPC_MADSH_M16);
- vectorize(ctx, instr, &r_dst, 3, b, 0, q_src, 0, r_src, 0);
-
- /* sub.u dst, a, r */
- instr = instr_create(ctx, 2, OPC_SUB_U);
- vectorize(ctx, instr, dst, 2, a, 0, r_src, 0);
- }
-
- put_dst(ctx, inst, dst);
-}
-
-/*
- * Handlers for TGSI instructions which do have 1:1 mapping to native
- * instructions:
- */
-
-static void
-instr_cat0(const struct instr_translater *t,
- struct ir3_compile_context *ctx,
- struct tgsi_full_instruction *inst)
+struct ir3_compiler * ir3_compiler_create(uint32_t gpu_id)
{
- instr_create(ctx, 0, t->opc);
+ struct ir3_compiler *compiler = rzalloc(NULL, struct ir3_compiler);
+ compiler->gpu_id = gpu_id;
+ compiler->set = ir3_ra_alloc_reg_set(compiler);
+ return compiler;
}
-static void
-instr_cat1(const struct instr_translater *t,
- struct ir3_compile_context *ctx,
- struct tgsi_full_instruction *inst)
+void ir3_compiler_destroy(struct ir3_compiler *compiler)
{
- struct tgsi_dst_register *dst = &inst->Dst[0].Register;
- struct tgsi_src_register *src = &inst->Src[0].Register;
-
- /* NOTE: atomic start/end, rather than in create_mov() since
- * create_mov() is used already w/in atomic sequences (and
- * we aren't clever enough to deal with the nesting)
- */
- instr_atomic_start(ctx);
- create_mov(ctx, dst, src);
- instr_atomic_end(ctx);
-}
-
-static void
-instr_cat2(const struct instr_translater *t,
- struct ir3_compile_context *ctx,
- struct tgsi_full_instruction *inst)
-{
- struct tgsi_dst_register *dst = get_dst(ctx, inst);
- struct tgsi_src_register *src0 = &inst->Src[0].Register;
- struct tgsi_src_register *src1 = &inst->Src[1].Register;
- struct ir3_instruction *instr;
- unsigned src0_flags = 0, src1_flags = 0;
-
- switch (t->tgsi_opc) {
- case TGSI_OPCODE_ABS:
- src0_flags = IR3_REG_FABS;
- break;
- case TGSI_OPCODE_IABS:
- src0_flags = IR3_REG_SABS;
- break;
- case TGSI_OPCODE_INEG:
- src0_flags = IR3_REG_SNEG;
- break;
- case TGSI_OPCODE_SUB:
- src1_flags = IR3_REG_FNEG;
- break;
- }
-
- switch (t->opc) {
- case OPC_ABSNEG_F:
- case OPC_ABSNEG_S:
- case OPC_CLZ_B:
- case OPC_CLZ_S:
- case OPC_SIGN_F:
- case OPC_FLOOR_F:
- case OPC_CEIL_F:
- case OPC_RNDNE_F:
- case OPC_RNDAZ_F:
- case OPC_TRUNC_F:
- case OPC_NOT_B:
- case OPC_BFREV_B:
- case OPC_SETRM:
- case OPC_CBITS_B:
- /* these only have one src reg */
- instr = instr_create(ctx, 2, t->opc);
- vectorize(ctx, instr, dst, 1, src0, src0_flags);
- break;
- default:
- if (is_const(src0) && is_const(src1))
- src0 = get_unconst(ctx, src0);
-
- instr = instr_create(ctx, 2, t->opc);
- vectorize(ctx, instr, dst, 2, src0, src0_flags,
- src1, src1_flags);
- break;
- }
-
- put_dst(ctx, inst, dst);
-}
-
-static void
-instr_cat3(const struct instr_translater *t,
- struct ir3_compile_context *ctx,
- struct tgsi_full_instruction *inst)
-{
- struct tgsi_dst_register *dst = get_dst(ctx, inst);
- struct tgsi_src_register *src0 = &inst->Src[0].Register;
- struct tgsi_src_register *src1 = &inst->Src[1].Register;
- struct ir3_instruction *instr;
-
- /* in particular, can't handle const for src1 for cat3..
- * for mad, we can swap first two src's if needed:
- */
- if (is_rel_or_const(src1)) {
- if (is_mad(t->opc) && !is_rel_or_const(src0)) {
- struct tgsi_src_register *tmp;
- tmp = src0;
- src0 = src1;
- src1 = tmp;
- } else {
- src1 = get_unconst(ctx, src1);
- }
- }
-
- instr = instr_create(ctx, 3, t->opc);
- vectorize(ctx, instr, dst, 3, src0, 0, src1, 0,
- &inst->Src[2].Register, 0);
- put_dst(ctx, inst, dst);
-}
-
-static void
-instr_cat4(const struct instr_translater *t,
- struct ir3_compile_context *ctx,
- struct tgsi_full_instruction *inst)
-{
- struct tgsi_dst_register *dst = get_dst(ctx, inst);
- struct tgsi_src_register *src = &inst->Src[0].Register;
- struct ir3_instruction *instr;
- unsigned i;
-
- /* seems like blob compiler avoids const as src.. */
- if (is_const(src))
- src = get_unconst(ctx, src);
-
- /* we need to replicate into each component: */
- for (i = 0; i < 4; i++) {
- if (dst->WriteMask & (1 << i)) {
- instr = instr_create(ctx, 4, t->opc);
- add_dst_reg(ctx, instr, dst, i);
- add_src_reg(ctx, instr, src, src->SwizzleX);
- }
- }
-
- put_dst(ctx, inst, dst);
-}
-
-static const struct instr_translater translaters[TGSI_OPCODE_LAST] = {
-#define INSTR(n, f, ...) \
- [TGSI_OPCODE_ ## n] = { .fxn = (f), .tgsi_opc = TGSI_OPCODE_ ## n, ##__VA_ARGS__ }
-
- INSTR(MOV, instr_cat1),
- INSTR(RCP, instr_cat4, .opc = OPC_RCP),
- INSTR(RSQ, instr_cat4, .opc = OPC_RSQ),
- INSTR(SQRT, instr_cat4, .opc = OPC_SQRT),
- INSTR(MUL, instr_cat2, .opc = OPC_MUL_F),
- INSTR(ADD, instr_cat2, .opc = OPC_ADD_F),
- INSTR(SUB, instr_cat2, .opc = OPC_ADD_F),
- INSTR(MIN, instr_cat2, .opc = OPC_MIN_F),
- INSTR(MAX, instr_cat2, .opc = OPC_MAX_F),
- INSTR(UADD, instr_cat2, .opc = OPC_ADD_U),
- INSTR(IMIN, instr_cat2, .opc = OPC_MIN_S),
- INSTR(UMIN, instr_cat2, .opc = OPC_MIN_U),
- INSTR(IMAX, instr_cat2, .opc = OPC_MAX_S),
- INSTR(UMAX, instr_cat2, .opc = OPC_MAX_U),
- INSTR(AND, instr_cat2, .opc = OPC_AND_B),
- INSTR(OR, instr_cat2, .opc = OPC_OR_B),
- INSTR(NOT, instr_cat2, .opc = OPC_NOT_B),
- INSTR(XOR, instr_cat2, .opc = OPC_XOR_B),
- INSTR(UMUL, trans_umul),
- INSTR(UMAD, trans_umul),
- INSTR(UDIV, trans_idiv),
- INSTR(IDIV, trans_idiv),
- INSTR(MOD, trans_idiv),
- INSTR(UMOD, trans_idiv),
- INSTR(SHL, instr_cat2, .opc = OPC_SHL_B),
- INSTR(USHR, instr_cat2, .opc = OPC_SHR_B),
- INSTR(ISHR, instr_cat2, .opc = OPC_ASHR_B),
- INSTR(IABS, instr_cat2, .opc = OPC_ABSNEG_S),
- INSTR(INEG, instr_cat2, .opc = OPC_ABSNEG_S),
- INSTR(AND, instr_cat2, .opc = OPC_AND_B),
- INSTR(MAD, instr_cat3, .opc = OPC_MAD_F32, .hopc = OPC_MAD_F16),
- INSTR(TRUNC, instr_cat2, .opc = OPC_TRUNC_F),
- INSTR(CLAMP, trans_clamp),
- INSTR(FLR, instr_cat2, .opc = OPC_FLOOR_F),
- INSTR(ROUND, instr_cat2, .opc = OPC_RNDNE_F),
- INSTR(SSG, instr_cat2, .opc = OPC_SIGN_F),
- INSTR(CEIL, instr_cat2, .opc = OPC_CEIL_F),
- INSTR(ARL, trans_arl),
- INSTR(UARL, trans_arl),
- INSTR(EX2, instr_cat4, .opc = OPC_EXP2),
- INSTR(LG2, instr_cat4, .opc = OPC_LOG2),
- INSTR(ABS, instr_cat2, .opc = OPC_ABSNEG_F),
- INSTR(COS, instr_cat4, .opc = OPC_COS),
- INSTR(SIN, instr_cat4, .opc = OPC_SIN),
- INSTR(TEX, trans_samp, .opc = OPC_SAM),
- INSTR(TXP, trans_samp, .opc = OPC_SAM),
- INSTR(TXB, trans_samp, .opc = OPC_SAMB),
- INSTR(TXB2, trans_samp, .opc = OPC_SAMB),
- INSTR(TXL, trans_samp, .opc = OPC_SAML),
- INSTR(TXD, trans_samp, .opc = OPC_SAMGQ),
- INSTR(TXF, trans_samp, .opc = OPC_ISAML),
- INSTR(TXQ, trans_txq),
- INSTR(DDX, trans_deriv, .opc = OPC_DSX),
- INSTR(DDY, trans_deriv, .opc = OPC_DSY),
- INSTR(SGT, trans_cmp),
- INSTR(SLT, trans_cmp),
- INSTR(FSLT, trans_cmp),
- INSTR(SGE, trans_cmp),
- INSTR(FSGE, trans_cmp),
- INSTR(SLE, trans_cmp),
- INSTR(SNE, trans_cmp),
- INSTR(FSNE, trans_cmp),
- INSTR(SEQ, trans_cmp),
- INSTR(FSEQ, trans_cmp),
- INSTR(CMP, trans_cmp),
- INSTR(USNE, trans_icmp, .opc = OPC_CMPS_U),
- INSTR(USEQ, trans_icmp, .opc = OPC_CMPS_U),
- INSTR(ISGE, trans_icmp, .opc = OPC_CMPS_S),
- INSTR(USGE, trans_icmp, .opc = OPC_CMPS_U),
- INSTR(ISLT, trans_icmp, .opc = OPC_CMPS_S),
- INSTR(USLT, trans_icmp, .opc = OPC_CMPS_U),
- INSTR(UCMP, trans_ucmp),
- INSTR(ISSG, trans_issg),
- INSTR(IF, trans_if, .opc = OPC_CMPS_F),
- INSTR(UIF, trans_if, .opc = OPC_CMPS_U),
- INSTR(ELSE, trans_else),
- INSTR(ENDIF, trans_endif),
- INSTR(END, instr_cat0, .opc = OPC_END),
- INSTR(KILL, trans_kill, .opc = OPC_KILL),
- INSTR(KILL_IF, trans_killif, .opc = OPC_KILL),
- INSTR(I2F, trans_cov),
- INSTR(U2F, trans_cov),
- INSTR(F2I, trans_cov),
- INSTR(F2U, trans_cov),
-};
-
-static ir3_semantic
-decl_semantic(const struct tgsi_declaration_semantic *sem)
-{
- return ir3_semantic_name(sem->Name, sem->Index);
-}
-
-static struct ir3_instruction *
-decl_in_frag_bary(struct ir3_compile_context *ctx, unsigned regid,
- unsigned j, unsigned inloc, bool use_ldlv)
-{
- struct ir3_instruction *instr;
- struct ir3_register *src;
-
- if (use_ldlv) {
- /* ldlv.u32 dst, l[#inloc], 1 */
- instr = instr_create(ctx, 6, OPC_LDLV);
- instr->cat6.type = TYPE_U32;
- instr->cat6.iim_val = 1;
- ir3_reg_create(instr, regid, 0); /* dummy dst */
- ir3_reg_create(instr, 0, IR3_REG_IMMED)->iim_val = inloc;
- ir3_reg_create(instr, 0, IR3_REG_IMMED)->iim_val = 1;
-
- return instr;
- }
-
- /* bary.f dst, #inloc, r0.x */
- instr = instr_create(ctx, 2, OPC_BARY_F);
- ir3_reg_create(instr, regid, 0); /* dummy dst */
- ir3_reg_create(instr, 0, IR3_REG_IMMED)->iim_val = inloc;
- src = ir3_reg_create(instr, 0, IR3_REG_SSA);
- src->wrmask = 0x3;
- src->instr = ctx->frag_pos;
-
- return instr;
-}
-
-/* TGSI_SEMANTIC_POSITION
- * """"""""""""""""""""""
- *
- * For fragment shaders, TGSI_SEMANTIC_POSITION is used to indicate that
- * fragment shader input contains the fragment's window position. The X
- * component starts at zero and always increases from left to right.
- * The Y component starts at zero and always increases but Y=0 may either
- * indicate the top of the window or the bottom depending on the fragment
- * coordinate origin convention (see TGSI_PROPERTY_FS_COORD_ORIGIN).
- * The Z coordinate ranges from 0 to 1 to represent depth from the front
- * to the back of the Z buffer. The W component contains the reciprocol
- * of the interpolated vertex position W component.
- */
-static struct ir3_instruction *
-decl_in_frag_coord(struct ir3_compile_context *ctx, unsigned regid,
- unsigned j)
-{
- struct ir3_instruction *instr, *src;
-
- compile_assert(ctx, !ctx->frag_coord[j]);
-
- ctx->frag_coord[j] = create_input(ctx->block, NULL, 0);
-
-
- switch (j) {
- case 0: /* .x */
- case 1: /* .y */
- /* for frag_coord, we get unsigned values.. we need
- * to subtract (integer) 8 and divide by 16 (right-
- * shift by 4) then convert to float:
- */
-
- /* add.s tmp, src, -8 */
- instr = instr_create(ctx, 2, OPC_ADD_S);
- ir3_reg_create(instr, regid, 0); /* dummy dst */
- ir3_reg_create(instr, 0, IR3_REG_SSA)->instr = ctx->frag_coord[j];
- ir3_reg_create(instr, 0, IR3_REG_IMMED)->iim_val = -8;
- src = instr;
-
- /* shr.b tmp, tmp, 4 */
- instr = instr_create(ctx, 2, OPC_SHR_B);
- ir3_reg_create(instr, regid, 0); /* dummy dst */
- ir3_reg_create(instr, 0, IR3_REG_SSA)->instr = src;
- ir3_reg_create(instr, 0, IR3_REG_IMMED)->iim_val = 4;
- src = instr;
-
- /* mov.u32f32 dst, tmp */
- instr = instr_create(ctx, 1, 0);
- instr->cat1.src_type = TYPE_U32;
- instr->cat1.dst_type = TYPE_F32;
- ir3_reg_create(instr, regid, 0); /* dummy dst */
- ir3_reg_create(instr, 0, IR3_REG_SSA)->instr = src;
-
- break;
- case 2: /* .z */
- case 3: /* .w */
- /* seems that we can use these as-is: */
- instr = ctx->frag_coord[j];
- break;
- default:
- compile_error(ctx, "invalid channel\n");
- instr = create_immed(ctx, 0.0);
- break;
- }
-
- return instr;
-}
-
-/* TGSI_SEMANTIC_FACE
- * """"""""""""""""""
- *
- * This label applies to fragment shader inputs only and indicates that
- * the register contains front/back-face information of the form (F, 0,
- * 0, 1). The first component will be positive when the fragment belongs
- * to a front-facing polygon, and negative when the fragment belongs to a
- * back-facing polygon.
- */
-static struct ir3_instruction *
-decl_in_frag_face(struct ir3_compile_context *ctx, unsigned regid,
- unsigned j)
-{
- struct ir3_instruction *instr, *src;
-
- switch (j) {
- case 0: /* .x */
- compile_assert(ctx, !ctx->frag_face);
-
- ctx->frag_face = create_input(ctx->block, NULL, 0);
-
- /* for faceness, we always get -1 or 0 (int).. but TGSI expects
- * positive vs negative float.. and piglit further seems to
- * expect -1.0 or 1.0:
- *
- * mul.s tmp, hr0.x, 2
- * add.s tmp, tmp, 1
- * mov.s16f32, dst, tmp
- *
- */
-
- instr = instr_create(ctx, 2, OPC_MUL_S);
- ir3_reg_create(instr, regid, 0); /* dummy dst */
- ir3_reg_create(instr, 0, IR3_REG_SSA)->instr = ctx->frag_face;
- ir3_reg_create(instr, 0, IR3_REG_IMMED)->iim_val = 2;
- src = instr;
-
- instr = instr_create(ctx, 2, OPC_ADD_S);
- ir3_reg_create(instr, regid, 0); /* dummy dst */
- ir3_reg_create(instr, 0, IR3_REG_SSA)->instr = src;
- ir3_reg_create(instr, 0, IR3_REG_IMMED)->iim_val = 1;
- src = instr;
-
- instr = instr_create(ctx, 1, 0); /* mov */
- instr->cat1.src_type = TYPE_S32;
- instr->cat1.dst_type = TYPE_F32;
- ir3_reg_create(instr, regid, 0); /* dummy dst */
- ir3_reg_create(instr, 0, IR3_REG_SSA)->instr = src;
-
- break;
- case 1: /* .y */
- case 2: /* .z */
- instr = create_immed(ctx, 0.0);
- break;
- case 3: /* .w */
- instr = create_immed(ctx, 1.0);
- break;
- default:
- compile_error(ctx, "invalid channel\n");
- instr = create_immed(ctx, 0.0);
- break;
- }
-
- return instr;
-}
-
-static void
-decl_in(struct ir3_compile_context *ctx, struct tgsi_full_declaration *decl)
-{
- struct ir3_shader_variant *so = ctx->so;
- unsigned name = decl->Semantic.Name;
- unsigned i;
-
- /* I don't think we should get frag shader input without
- * semantic info? Otherwise how do inputs get linked to
- * vert outputs?
- */
- compile_assert(ctx, (ctx->type == TGSI_PROCESSOR_VERTEX) ||
- decl->Declaration.Semantic);
-
- for (i = decl->Range.First; i <= decl->Range.Last; i++) {
- unsigned n = so->inputs_count++;
- unsigned r = regid(i, 0);
- unsigned ncomp, j;
-
- /* we'll figure out the actual components used after scheduling */
- ncomp = 4;
-
- DBG("decl in -> r%d", i);
-
- compile_assert(ctx, n < ARRAY_SIZE(so->inputs));
-
- so->inputs[n].semantic = decl_semantic(&decl->Semantic);
- so->inputs[n].compmask = (1 << ncomp) - 1;
- so->inputs[n].regid = r;
- so->inputs[n].inloc = ctx->next_inloc;
- so->inputs[n].interpolate = decl->Interp.Interpolate;
-
- for (j = 0; j < ncomp; j++) {
- struct ir3_instruction *instr = NULL;
-
- if (ctx->type == TGSI_PROCESSOR_FRAGMENT) {
- /* for fragment shaders, POSITION and FACE are handled
- * specially, not using normal varying / bary.f
- */
- if (name == TGSI_SEMANTIC_POSITION) {
- so->inputs[n].bary = false;
- so->frag_coord = true;
- instr = decl_in_frag_coord(ctx, r + j, j);
- } else if (name == TGSI_SEMANTIC_FACE) {
- so->inputs[n].bary = false;
- so->frag_face = true;
- instr = decl_in_frag_face(ctx, r + j, j);
- } else {
- bool use_ldlv = false;
-
- /* if no interpolation given, pick based on
- * semantic:
- */
- if (!decl->Declaration.Interpolate) {
- switch (decl->Semantic.Name) {
- case TGSI_SEMANTIC_COLOR:
- so->inputs[n].interpolate =
- TGSI_INTERPOLATE_COLOR;
- break;
- default:
- so->inputs[n].interpolate =
- TGSI_INTERPOLATE_LINEAR;
- }
- }
-
- if (ctx->flat_bypass) {
- switch (so->inputs[n].interpolate) {
- case TGSI_INTERPOLATE_COLOR:
- if (!ctx->so->key.rasterflat)
- break;
- /* fallthrough */
- case TGSI_INTERPOLATE_CONSTANT:
- use_ldlv = true;
- break;
- }
- }
-
- so->inputs[n].bary = true;
-
- instr = decl_in_frag_bary(ctx, r + j, j,
- so->inputs[n].inloc + j - 8, use_ldlv);
- }
- } else {
- instr = create_input(ctx->block, NULL, (i * 4) + j);
- }
-
- ctx->block->inputs[(i * 4) + j] = instr;
- }
-
- if (so->inputs[n].bary || (ctx->type == TGSI_PROCESSOR_VERTEX)) {
- ctx->next_inloc += ncomp;
- so->total_in += ncomp;
- }
- }
-}
-
-static void
-decl_sv(struct ir3_compile_context *ctx, struct tgsi_full_declaration *decl)
-{
- struct ir3_shader_variant *so = ctx->so;
- unsigned r = regid(so->inputs_count, 0);
- unsigned n = so->inputs_count++;
-
- DBG("decl sv -> r%d", n);
-
- compile_assert(ctx, n < ARRAY_SIZE(so->inputs));
- compile_assert(ctx, decl->Range.First < ARRAY_SIZE(ctx->sysval_semantics));
-
- ctx->sysval_semantics[decl->Range.First] = decl->Semantic.Name;
- so->inputs[n].semantic = decl_semantic(&decl->Semantic);
- so->inputs[n].compmask = 1;
- so->inputs[n].regid = r;
- so->inputs[n].inloc = ctx->next_inloc;
- so->inputs[n].interpolate = TGSI_INTERPOLATE_CONSTANT;
-
- struct ir3_instruction *instr = NULL;
-
- switch (decl->Semantic.Name) {
- case TGSI_SEMANTIC_VERTEXID_NOBASE:
- ctx->vertex_id = instr = create_input(ctx->block, NULL, r);
- break;
- case TGSI_SEMANTIC_BASEVERTEX:
- ctx->basevertex = instr = instr_create(ctx, 1, 0);
- instr->cat1.src_type = get_stype(ctx);
- instr->cat1.dst_type = get_stype(ctx);
- ir3_reg_create(instr, 0, 0);
- ir3_reg_create(instr, regid(so->first_driver_param + 4, 0),
- IR3_REG_CONST);
- break;
- case TGSI_SEMANTIC_INSTANCEID:
- ctx->instance_id = instr = create_input(ctx->block, NULL, r);
- break;
- default:
- compile_error(ctx, "Unknown semantic: %s\n",
- tgsi_semantic_names[decl->Semantic.Name]);
- }
-
- ctx->block->inputs[r] = instr;
- ctx->next_inloc++;
- so->total_in++;
-}
-
-static void
-decl_out(struct ir3_compile_context *ctx, struct tgsi_full_declaration *decl)
-{
- struct ir3_shader_variant *so = ctx->so;
- unsigned comp = 0;
- unsigned name = decl->Semantic.Name;
- unsigned i;
-
- compile_assert(ctx, decl->Declaration.Semantic);
-
- DBG("decl out[%d] -> r%d", name, decl->Range.First);
-
- if (ctx->type == TGSI_PROCESSOR_VERTEX) {
- switch (name) {
- case TGSI_SEMANTIC_POSITION:
- so->writes_pos = true;
- break;
- case TGSI_SEMANTIC_PSIZE:
- so->writes_psize = true;
- break;
- case TGSI_SEMANTIC_COLOR:
- case TGSI_SEMANTIC_BCOLOR:
- case TGSI_SEMANTIC_GENERIC:
- case TGSI_SEMANTIC_FOG:
- case TGSI_SEMANTIC_TEXCOORD:
- break;
- default:
- compile_error(ctx, "unknown VS semantic name: %s\n",
- tgsi_semantic_names[name]);
- }
- } else {
- switch (name) {
- case TGSI_SEMANTIC_POSITION:
- comp = 2; /* tgsi will write to .z component */
- so->writes_pos = true;
- break;
- case TGSI_SEMANTIC_COLOR:
- break;
- default:
- compile_error(ctx, "unknown FS semantic name: %s\n",
- tgsi_semantic_names[name]);
- }
- }
-
- for (i = decl->Range.First; i <= decl->Range.Last; i++) {
- unsigned n = so->outputs_count++;
- unsigned ncomp, j;
-
- ncomp = 4;
-
- compile_assert(ctx, n < ARRAY_SIZE(so->outputs));
-
- so->outputs[n].semantic = decl_semantic(&decl->Semantic);
- so->outputs[n].regid = regid(i, comp);
-
- /* avoid undefined outputs, stick a dummy mov from imm{0.0},
- * which if the output is actually assigned will be over-
- * written
- */
- for (j = 0; j < ncomp; j++)
- ctx->block->outputs[(i * 4) + j] = create_immed(ctx, 0.0);
- }
-}
-
-/* from TGSI perspective, we actually have inputs. But most of the "inputs"
- * for a fragment shader are just bary.f instructions. The *actual* inputs
- * from the hw perspective are the frag_pos and optionally frag_coord and
- * frag_face.
- */
-static void
-fixup_frag_inputs(struct ir3_compile_context *ctx)
-{
- struct ir3_shader_variant *so = ctx->so;
- struct ir3_block *block = ctx->block;
- struct ir3_instruction **inputs;
- struct ir3_instruction *instr;
- int n, regid = 0;
-
- block->ninputs = 0;
-
- n = 4; /* always have frag_pos */
- n += COND(so->frag_face, 4);
- n += COND(so->frag_coord, 4);
-
- inputs = ir3_alloc(ctx->ir, n * (sizeof(struct ir3_instruction *)));
-
- if (so->frag_face) {
- /* this ultimately gets assigned to hr0.x so doesn't conflict
- * with frag_coord/frag_pos..
- */
- inputs[block->ninputs++] = ctx->frag_face;
- ctx->frag_face->regs[0]->num = 0;
-
- /* remaining channels not used, but let's avoid confusing
- * other parts that expect inputs to come in groups of vec4
- */
- inputs[block->ninputs++] = NULL;
- inputs[block->ninputs++] = NULL;
- inputs[block->ninputs++] = NULL;
- }
-
- /* since we don't know where to set the regid for frag_coord,
- * we have to use r0.x for it. But we don't want to *always*
- * use r1.x for frag_pos as that could increase the register
- * footprint on simple shaders:
- */
- if (so->frag_coord) {
- ctx->frag_coord[0]->regs[0]->num = regid++;
- ctx->frag_coord[1]->regs[0]->num = regid++;
- ctx->frag_coord[2]->regs[0]->num = regid++;
- ctx->frag_coord[3]->regs[0]->num = regid++;
-
- inputs[block->ninputs++] = ctx->frag_coord[0];
- inputs[block->ninputs++] = ctx->frag_coord[1];
- inputs[block->ninputs++] = ctx->frag_coord[2];
- inputs[block->ninputs++] = ctx->frag_coord[3];
- }
-
- /* we always have frag_pos: */
- so->pos_regid = regid;
-
- /* r0.x */
- instr = create_input(block, NULL, block->ninputs);
- instr->regs[0]->num = regid++;
- inputs[block->ninputs++] = instr;
- ctx->frag_pos->regs[1]->instr = instr;
-
- /* r0.y */
- instr = create_input(block, NULL, block->ninputs);
- instr->regs[0]->num = regid++;
- inputs[block->ninputs++] = instr;
- ctx->frag_pos->regs[2]->instr = instr;
-
- block->inputs = inputs;
-}
-
-static void
-compile_instructions(struct ir3_compile_context *ctx)
-{
- push_block(ctx);
-
- /* for fragment shader, we have a single input register (usually
- * r0.xy) which is used as the base for bary.f varying fetch instrs:
- */
- if (ctx->type == TGSI_PROCESSOR_FRAGMENT) {
- struct ir3_instruction *instr;
- instr = ir3_instr_create(ctx->block, -1, OPC_META_FI);
- ir3_reg_create(instr, 0, 0);
- ir3_reg_create(instr, 0, IR3_REG_SSA); /* r0.x */
- ir3_reg_create(instr, 0, IR3_REG_SSA); /* r0.y */
- ctx->frag_pos = instr;
- }
-
- while (!tgsi_parse_end_of_tokens(&ctx->parser)) {
- tgsi_parse_token(&ctx->parser);
-
- switch (ctx->parser.FullToken.Token.Type) {
- case TGSI_TOKEN_TYPE_DECLARATION: {
- struct tgsi_full_declaration *decl =
- &ctx->parser.FullToken.FullDeclaration;
- unsigned file = decl->Declaration.File;
- if (file == TGSI_FILE_OUTPUT) {
- decl_out(ctx, decl);
- } else if (file == TGSI_FILE_INPUT) {
- decl_in(ctx, decl);
- } else if (decl->Declaration.File == TGSI_FILE_SYSTEM_VALUE) {
- decl_sv(ctx, decl);
- }
-
- if ((file != TGSI_FILE_CONSTANT) && decl->Declaration.Array) {
- int aid = decl->Array.ArrayID + ctx->array_offsets[file];
-
- compile_assert(ctx, aid < ARRAY_SIZE(ctx->array));
-
- /* legacy ArrayID==0 stuff probably isn't going to work
- * well (and is at least untested).. let's just scream:
- */
- compile_assert(ctx, aid != 0);
-
- ctx->array[aid].first = decl->Range.First;
- ctx->array[aid].last = decl->Range.Last;
- }
- break;
- }
- case TGSI_TOKEN_TYPE_IMMEDIATE: {
- /* TODO: if we know the immediate is small enough, and only
- * used with instructions that can embed an immediate, we
- * can skip this:
- */
- struct tgsi_full_immediate *imm =
- &ctx->parser.FullToken.FullImmediate;
- unsigned n = ctx->so->immediates_count++;
- compile_assert(ctx, n < ARRAY_SIZE(ctx->so->immediates));
- memcpy(ctx->so->immediates[n].val, imm->u, 16);
- break;
- }
- case TGSI_TOKEN_TYPE_INSTRUCTION: {
- struct tgsi_full_instruction *inst =
- &ctx->parser.FullToken.FullInstruction;
- unsigned opc = inst->Instruction.Opcode;
- const struct instr_translater *t = &translaters[opc];
-
- if (t->fxn) {
- t->fxn(t, ctx, inst);
- ctx->num_internal_temps = 0;
-
- compile_assert(ctx, !ctx->using_tmp_dst);
- } else {
- compile_error(ctx, "unknown TGSI opc: %s\n",
- tgsi_get_opcode_name(opc));
- }
-
- switch (inst->Instruction.Saturate) {
- case TGSI_SAT_ZERO_ONE:
- create_clamp_imm(ctx, &inst->Dst[0].Register,
- fui(0.0), fui(1.0));
- break;
- case TGSI_SAT_MINUS_PLUS_ONE:
- create_clamp_imm(ctx, &inst->Dst[0].Register,
- fui(-1.0), fui(1.0));
- break;
- }
-
- instr_finish(ctx);
-
- break;
- }
- case TGSI_TOKEN_TYPE_PROPERTY: {
- struct tgsi_full_property *prop =
- &ctx->parser.FullToken.FullProperty;
- switch (prop->Property.PropertyName) {
- case TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS:
- ctx->so->color0_mrt = !!prop->u[0].Data;
- break;
- }
- }
- default:
- break;
- }
- }
-}
-
-static void
-compile_dump(struct ir3_compile_context *ctx)
-{
- const char *name = (ctx->so->type == SHADER_VERTEX) ? "vert" : "frag";
- static unsigned n = 0;
- char fname[16];
- FILE *f;
- snprintf(fname, sizeof(fname), "%s-%04u.dot", name, n++);
- f = fopen(fname, "w");
- if (!f)
- return;
- ir3_block_depth(ctx->block);
- ir3_dump(ctx->ir, name, ctx->block, f);
- fclose(f);
-}
-
-int
-ir3_compile_shader(struct ir3_shader_variant *so,
- const struct tgsi_token *tokens, struct ir3_shader_key key,
- bool cp)
-{
- struct ir3_compile_context ctx;
- struct ir3_block *block;
- struct ir3_instruction **inputs;
- unsigned i, j, actual_in;
- int ret = 0, max_bary;
-
- assert(!so->ir);
-
- so->ir = ir3_create();
-
- assert(so->ir);
-
- if (compile_init(&ctx, so, tokens) != TGSI_PARSE_OK) {
- DBG("INIT failed!");
- ret = -1;
- goto out;
- }
-
- /* for now, until the edge cases are worked out: */
- if (ctx.info.indirect_files_written & (FM(TEMPORARY) | FM(INPUT) | FM(OUTPUT)))
- cp = false;
-
- compile_instructions(&ctx);
-
- block = ctx.block;
- so->ir->block = block;
-
- /* keep track of the inputs from TGSI perspective.. */
- inputs = block->inputs;
-
- /* but fixup actual inputs for frag shader: */
- if (ctx.type == TGSI_PROCESSOR_FRAGMENT)
- fixup_frag_inputs(&ctx);
-
- /* at this point, for binning pass, throw away unneeded outputs: */
- if (key.binning_pass) {
- for (i = 0, j = 0; i < so->outputs_count; i++) {
- unsigned name = sem2name(so->outputs[i].semantic);
- unsigned idx = sem2idx(so->outputs[i].semantic);
-
- /* throw away everything but first position/psize */
- if ((idx == 0) && ((name == TGSI_SEMANTIC_POSITION) ||
- (name == TGSI_SEMANTIC_PSIZE))) {
- if (i != j) {
- so->outputs[j] = so->outputs[i];
- block->outputs[(j*4)+0] = block->outputs[(i*4)+0];
- block->outputs[(j*4)+1] = block->outputs[(i*4)+1];
- block->outputs[(j*4)+2] = block->outputs[(i*4)+2];
- block->outputs[(j*4)+3] = block->outputs[(i*4)+3];
- }
- j++;
- }
- }
- so->outputs_count = j;
- block->noutputs = j * 4;
- }
-
- /* if we want half-precision outputs, mark the output registers
- * as half:
- */
- if (key.half_precision) {
- for (i = 0; i < block->noutputs; i++) {
- if (!block->outputs[i])
- continue;
- block->outputs[i]->regs[0]->flags |= IR3_REG_HALF;
- }
- }
-
- /* at this point, we want the kill's in the outputs array too,
- * so that they get scheduled (since they have no dst).. we've
- * already ensured that the array is big enough in push_block():
- */
- if (ctx.type == TGSI_PROCESSOR_FRAGMENT) {
- for (i = 0; i < ctx.kill_count; i++)
- block->outputs[block->noutputs++] = ctx.kill[i];
- }
-
- if (fd_mesa_debug & FD_DBG_OPTDUMP)
- compile_dump(&ctx);
-
- ret = ir3_block_flatten(block);
- if (ret < 0) {
- DBG("FLATTEN failed!");
- goto out;
- }
- if ((ret > 0) && (fd_mesa_debug & FD_DBG_OPTDUMP))
- compile_dump(&ctx);
-
- if (fd_mesa_debug & FD_DBG_OPTMSGS) {
- printf("BEFORE CP:\n");
- ir3_dump_instr_list(block->head);
- }
-
- ir3_block_depth(block);
-
- /* First remove all the extra mov's (which we could skip if the
- * front-end was clever enough not to insert them in the first
- * place). Then figure out left/right neighbors, re-inserting
- * extra mov's when needed to avoid conflicts.
- */
- if (cp && !(fd_mesa_debug & FD_DBG_NOCP))
- ir3_block_cp(block);
-
- if (fd_mesa_debug & FD_DBG_OPTMSGS) {
- printf("BEFORE GROUPING:\n");
- ir3_dump_instr_list(block->head);
- }
-
- /* Group left/right neighbors, inserting mov's where needed to
- * solve conflicts:
- */
- ir3_block_group(block);
-
- if (fd_mesa_debug & FD_DBG_OPTDUMP)
- compile_dump(&ctx);
-
- ir3_block_depth(block);
-
- if (fd_mesa_debug & FD_DBG_OPTMSGS) {
- printf("AFTER DEPTH:\n");
- ir3_dump_instr_list(block->head);
- }
-
- ret = ir3_block_sched(block);
- if (ret) {
- DBG("SCHED failed!");
- goto out;
- }
-
- if (fd_mesa_debug & FD_DBG_OPTMSGS) {
- printf("AFTER SCHED:\n");
- ir3_dump_instr_list(block->head);
- }
-
- ret = ir3_block_ra(block, so->type, so->frag_coord, so->frag_face);
- if (ret) {
- DBG("RA failed!");
- goto out;
- }
-
- if (fd_mesa_debug & FD_DBG_OPTMSGS) {
- printf("AFTER RA:\n");
- ir3_dump_instr_list(block->head);
- }
-
- ir3_block_legalize(block, &so->has_samp, &max_bary);
-
- /* fixup input/outputs: */
- for (i = 0; i < so->outputs_count; i++) {
- so->outputs[i].regid = block->outputs[i*4]->regs[0]->num;
- /* preserve hack for depth output.. tgsi writes depth to .z,
- * but what we give the hw is the scalar register:
- */
- if ((ctx.type == TGSI_PROCESSOR_FRAGMENT) &&
- (sem2name(so->outputs[i].semantic) == TGSI_SEMANTIC_POSITION))
- so->outputs[i].regid += 2;
- }
- /* Note that some or all channels of an input may be unused: */
- actual_in = 0;
- for (i = 0; i < so->inputs_count; i++) {
- unsigned j, regid = ~0, compmask = 0;
- so->inputs[i].ncomp = 0;
- for (j = 0; j < 4; j++) {
- struct ir3_instruction *in = inputs[(i*4) + j];
- if (in) {
- compmask |= (1 << j);
- regid = in->regs[0]->num - j;
- actual_in++;
- so->inputs[i].ncomp++;
- }
- }
- so->inputs[i].regid = regid;
- so->inputs[i].compmask = compmask;
- }
-
- /* fragment shader always gets full vec4's even if it doesn't
- * fetch all components, but vertex shader we need to update
- * with the actual number of components fetch, otherwise thing
- * will hang due to mismaptch between VFD_DECODE's and
- * TOTALATTRTOVS
- */
- if (so->type == SHADER_VERTEX)
- so->total_in = actual_in;
- else
- so->total_in = align(max_bary + 1, 4);
-
-out:
- if (ret) {
- ir3_destroy(so->ir);
- so->ir = NULL;
- }
- compile_free(&ctx);
-
- return ret;
+ ralloc_free(compiler);
}
diff --git a/src/gallium/drivers/freedreno/ir3/ir3_compiler.h b/src/gallium/drivers/freedreno/ir3/ir3_compiler.h
index 9213386e00c..86b1161d9cb 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3_compiler.h
+++ b/src/gallium/drivers/freedreno/ir3/ir3_compiler.h
@@ -31,12 +31,19 @@
#include "ir3_shader.h"
+struct ir3_ra_reg_set;
-int ir3_compile_shader_nir(struct ir3_shader_variant *so,
- const struct tgsi_token *tokens, struct ir3_shader_key key);
+struct ir3_compiler {
+ uint32_t gpu_id;
+ struct ir3_ra_reg_set *set;
+};
-int ir3_compile_shader(struct ir3_shader_variant *so,
+struct ir3_compiler * ir3_compiler_create(uint32_t gpu_id);
+void ir3_compiler_destroy(struct ir3_compiler *compiler);
+
+int ir3_compile_shader_nir(struct ir3_compiler *compiler,
+ struct ir3_shader_variant *so,
const struct tgsi_token *tokens,
- struct ir3_shader_key key, bool cp);
+ struct ir3_shader_key key);
#endif /* IR3_COMPILER_H_ */
diff --git a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c
index 05e7049ad55..48b1d8f3606 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c
+++ b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c
@@ -48,19 +48,19 @@
#include "ir3.h"
-static struct ir3_instruction * create_immed(struct ir3_block *block, uint32_t val);
-
struct ir3_compile {
+ struct ir3_compiler *compiler;
+
const struct tgsi_token *tokens;
struct nir_shader *s;
struct ir3 *ir;
struct ir3_shader_variant *so;
- /* bitmask of which samplers are integer: */
- uint16_t integer_s;
+ struct ir3_block *block; /* the current block */
+ struct ir3_block *in_block; /* block created for shader inputs */
- struct ir3_block *block;
+ nir_function_impl *impl;
/* For fragment shaders, from the hw perspective the only
* actual input is r0.xy position register passed to bary.f.
@@ -92,6 +92,11 @@ struct ir3_compile {
*/
struct hash_table *addr_ht;
+ /* maps nir_block to ir3_block, mostly for the purposes of
+ * figuring out the blocks successors
+ */
+ struct hash_table *block_ht;
+
/* for calculating input/output positions/linkages: */
unsigned next_inloc;
@@ -104,6 +109,11 @@ struct ir3_compile {
*/
bool levels_add_one;
+ /* on a3xx, we need to scale up integer coords for isaml based
+ * on LoD:
+ */
+ bool unminify_coords;
+
/* for looking up which system value is which */
unsigned sysval_semantics[8];
@@ -118,6 +128,9 @@ struct ir3_compile {
};
+static struct ir3_instruction * create_immed(struct ir3_block *block, uint32_t val);
+static struct ir3_block * get_block(struct ir3_compile *ctx, nir_block *nblock);
+
static struct nir_shader *to_nir(const struct tgsi_token *tokens)
{
struct nir_shader_compiler_options options = {
@@ -146,6 +159,7 @@ static struct nir_shader *to_nir(const struct tgsi_token *tokens)
nir_lower_vars_to_ssa(s);
nir_lower_alu_to_scalar(s);
+ nir_lower_phis_to_scalar(s);
progress |= nir_copy_prop(s);
progress |= nir_opt_dce(s);
@@ -170,7 +184,8 @@ static struct nir_shader *to_nir(const struct tgsi_token *tokens)
/* TODO nir doesn't lower everything for us yet, but ideally it would: */
static const struct tgsi_token *
-lower_tgsi(const struct tgsi_token *tokens, struct ir3_shader_variant *so)
+lower_tgsi(struct ir3_compile *ctx, const struct tgsi_token *tokens,
+ struct ir3_shader_variant *so)
{
struct tgsi_shader_info info;
struct tgsi_lowering_config lconfig = {
@@ -192,11 +207,7 @@ lower_tgsi(const struct tgsi_token *tokens, struct ir3_shader_variant *so)
break;
}
- if (!so->shader) {
- /* hack for standalone compiler which does not have
- * screen/context:
- */
- } else if (ir3_shader_gpuid(so->shader) >= 400) {
+ if (ctx->compiler->gpu_id >= 400) {
/* a4xx seems to have *no* sam.p */
lconfig.lower_TXP = ~0; /* lower all txp */
} else {
@@ -208,36 +219,26 @@ lower_tgsi(const struct tgsi_token *tokens, struct ir3_shader_variant *so)
}
static struct ir3_compile *
-compile_init(struct ir3_shader_variant *so,
+compile_init(struct ir3_compiler *compiler,
+ struct ir3_shader_variant *so,
const struct tgsi_token *tokens)
{
struct ir3_compile *ctx = rzalloc(NULL, struct ir3_compile);
const struct tgsi_token *lowered_tokens;
- if (!so->shader) {
- /* hack for standalone compiler which does not have
- * screen/context:
- */
- } else if (ir3_shader_gpuid(so->shader) >= 400) {
+ if (compiler->gpu_id >= 400) {
/* need special handling for "flat" */
ctx->flat_bypass = true;
ctx->levels_add_one = false;
+ ctx->unminify_coords = false;
} else {
/* no special handling for "flat" */
ctx->flat_bypass = false;
ctx->levels_add_one = true;
+ ctx->unminify_coords = true;
}
- switch (so->type) {
- case SHADER_FRAGMENT:
- case SHADER_COMPUTE:
- ctx->integer_s = so->key.finteger_s;
- break;
- case SHADER_VERTEX:
- ctx->integer_s = so->key.vinteger_s;
- break;
- }
-
+ ctx->compiler = compiler;
ctx->ir = so->ir;
ctx->so = so;
ctx->next_inloc = 8;
@@ -247,8 +248,10 @@ compile_init(struct ir3_shader_variant *so,
_mesa_hash_pointer, _mesa_key_pointer_equal);
ctx->addr_ht = _mesa_hash_table_create(ctx,
_mesa_hash_pointer, _mesa_key_pointer_equal);
+ ctx->block_ht = _mesa_hash_table_create(ctx,
+ _mesa_hash_pointer, _mesa_key_pointer_equal);
- lowered_tokens = lower_tgsi(tokens, so);
+ lowered_tokens = lower_tgsi(ctx, tokens, so);
if (!lowered_tokens)
lowered_tokens = tokens;
ctx->s = to_nir(lowered_tokens);
@@ -290,33 +293,206 @@ compile_free(struct ir3_compile *ctx)
ralloc_free(ctx);
}
-
+/* global per-array information: */
struct ir3_array {
unsigned length, aid;
+};
+
+/* per-block array state: */
+struct ir3_array_value {
+ /* TODO drop length/aid, and just have ptr back to ir3_array */
+ unsigned length, aid;
+ /* initial array element values are phi's, other than for the
+ * entry block. The phi src's get added later in a resolve step
+ * after we have visited all the blocks, to account for back
+ * edges in the cfg.
+ */
+ struct ir3_instruction **phis;
+ /* current array element values (as block is processed). When
+ * the array phi's are resolved, it will contain the array state
+ * at exit of block, so successor blocks can use it to add their
+ * phi srcs.
+ */
struct ir3_instruction *arr[];
};
+/* track array assignments per basic block. When an array is read
+ * outside of the same basic block, we can use NIR's dominance-frontier
+ * information to figure out where phi nodes are needed.
+ */
+struct ir3_nir_block_data {
+ unsigned foo;
+ /* indexed by array-id (aid): */
+ struct ir3_array_value *arrs[];
+};
+
+static struct ir3_nir_block_data *
+get_block_data(struct ir3_compile *ctx, struct ir3_block *block)
+{
+ if (!block->bd) {
+ struct ir3_nir_block_data *bd = ralloc_size(ctx, sizeof(*bd) +
+ ((ctx->num_arrays + 1) * sizeof(bd->arrs[0])));
+ block->bd = bd;
+ }
+ return block->bd;
+}
+
static void
declare_var(struct ir3_compile *ctx, nir_variable *var)
{
unsigned length = glsl_get_length(var->type) * 4; /* always vec4, at least with ttn */
- struct ir3_array *arr = ralloc_size(ctx, sizeof(*arr) +
- (length * sizeof(arr->arr[0])));
+ struct ir3_array *arr = ralloc(ctx, struct ir3_array);
arr->length = length;
arr->aid = ++ctx->num_arrays;
- /* Some shaders end up reading array elements without first writing..
- * so initialize things to prevent null instr ptrs later:
- */
- for (unsigned i = 0; i < length; i++)
- arr->arr[i] = create_immed(ctx->block, 0);
_mesa_hash_table_insert(ctx->var_ht, var, arr);
}
-static struct ir3_array *
+static nir_block *
+nir_block_pred(nir_block *block)
+{
+ assert(block->predecessors->entries < 2);
+ if (block->predecessors->entries == 0)
+ return NULL;
+ return (nir_block *)_mesa_set_next_entry(block->predecessors, NULL)->key;
+}
+
+static struct ir3_array_value *
get_var(struct ir3_compile *ctx, nir_variable *var)
{
struct hash_entry *entry = _mesa_hash_table_search(ctx->var_ht, var);
- return entry->data;
+ struct ir3_block *block = ctx->block;
+ struct ir3_nir_block_data *bd = get_block_data(ctx, block);
+ struct ir3_array *arr = entry->data;
+
+ if (!bd->arrs[arr->aid]) {
+ struct ir3_array_value *av = ralloc_size(bd, sizeof(*av) +
+ (arr->length * sizeof(av->arr[0])));
+ struct ir3_array_value *defn = NULL;
+ nir_block *pred_block;
+
+ av->length = arr->length;
+ av->aid = arr->aid;
+
+ /* For loops, we have to consider that we have not visited some
+ * of the blocks who should feed into the phi (ie. back-edges in
+ * the cfg).. for example:
+ *
+ * loop {
+ * block { load_var; ... }
+ * if then block {} else block {}
+ * block { store_var; ... }
+ * if then block {} else block {}
+ * block {...}
+ * }
+ *
+ * We can skip the phi if we can chase the block predecessors
+ * until finding the block previously defining the array without
+ * crossing a block that has more than one predecessor.
+ *
+ * Otherwise create phi's and resolve them as a post-pass after
+ * all the blocks have been visited (to handle back-edges).
+ */
+
+ for (pred_block = block->nblock;
+ pred_block && (pred_block->predecessors->entries < 2) && !defn;
+ pred_block = nir_block_pred(pred_block)) {
+ struct ir3_block *pblock = get_block(ctx, pred_block);
+ struct ir3_nir_block_data *pbd = pblock->bd;
+ if (!pbd)
+ continue;
+ defn = pbd->arrs[arr->aid];
+ }
+
+ if (defn) {
+ /* only one possible definer: */
+ for (unsigned i = 0; i < arr->length; i++)
+ av->arr[i] = defn->arr[i];
+ } else if (pred_block) {
+ /* not the first block, and multiple potential definers: */
+ av->phis = ralloc_size(av, arr->length * sizeof(av->phis[0]));
+
+ for (unsigned i = 0; i < arr->length; i++) {
+ struct ir3_instruction *phi;
+
+ phi = ir3_instr_create2(block, -1, OPC_META_PHI,
+ 1 + ctx->impl->num_blocks);
+ ir3_reg_create(phi, 0, 0); /* dst */
+
+ /* phi's should go at head of block: */
+ list_delinit(&phi->node);
+ list_add(&phi->node, &block->instr_list);
+
+ av->phis[i] = av->arr[i] = phi;
+ }
+ } else {
+ /* Some shaders end up reading array elements without
+ * first writing.. so initialize things to prevent null
+ * instr ptrs later:
+ */
+ for (unsigned i = 0; i < arr->length; i++)
+ av->arr[i] = create_immed(block, 0);
+ }
+
+ bd->arrs[arr->aid] = av;
+ }
+
+ return bd->arrs[arr->aid];
+}
+
+static void
+add_array_phi_srcs(struct ir3_compile *ctx, nir_block *nblock,
+ struct ir3_array_value *av, BITSET_WORD *visited)
+{
+ struct ir3_block *block;
+ struct ir3_nir_block_data *bd;
+
+ if (BITSET_TEST(visited, nblock->index))
+ return;
+
+ BITSET_SET(visited, nblock->index);
+
+ block = get_block(ctx, nblock);
+ bd = block->bd;
+
+ if (bd && bd->arrs[av->aid]) {
+ struct ir3_array_value *dav = bd->arrs[av->aid];
+ for (unsigned i = 0; i < av->length; i++) {
+ ir3_reg_create(av->phis[i], 0, IR3_REG_SSA)->instr =
+ dav->arr[i];
+ }
+ } else {
+ /* didn't find defn, recurse predecessors: */
+ struct set_entry *entry;
+ set_foreach(nblock->predecessors, entry) {
+ add_array_phi_srcs(ctx, (nir_block *)entry->key, av, visited);
+ }
+ }
+}
+
+static void
+resolve_array_phis(struct ir3_compile *ctx, struct ir3_block *block)
+{
+ struct ir3_nir_block_data *bd = block->bd;
+ unsigned bitset_words = BITSET_WORDS(ctx->impl->num_blocks);
+
+ if (!bd)
+ return;
+
+ /* TODO use nir dom_frontier to help us with this? */
+
+ for (unsigned i = 1; i <= ctx->num_arrays; i++) {
+ struct ir3_array_value *av = bd->arrs[i];
+ BITSET_WORD visited[bitset_words];
+ struct set_entry *entry;
+
+ if (!(av && av->phis))
+ continue;
+
+ memset(visited, 0, sizeof(visited));
+ set_foreach(block->nblock->predecessors, entry) {
+ add_array_phi_srcs(ctx, (nir_block *)entry->key, av, visited);
+ }
+ }
}
/* allocate a n element value array (to be populated by caller) and
@@ -393,7 +569,8 @@ create_addr(struct ir3_block *block, struct ir3_instruction *src)
instr->regs[1]->flags |= IR3_REG_HALF;
instr = ir3_MOV(block, instr, TYPE_S16);
- instr->regs[0]->flags |= IR3_REG_ADDR | IR3_REG_HALF;
+ instr->regs[0]->num = regid(REG_A0, 0);
+ instr->regs[0]->flags |= IR3_REG_HALF;
instr->regs[1]->flags |= IR3_REG_HALF;
return instr;
@@ -419,6 +596,22 @@ get_addr(struct ir3_compile *ctx, struct ir3_instruction *src)
}
static struct ir3_instruction *
+get_predicate(struct ir3_compile *ctx, struct ir3_instruction *src)
+{
+ struct ir3_block *b = ctx->block;
+ struct ir3_instruction *cond;
+
+ /* NOTE: only cmps.*.* can write p0.x: */
+ cond = ir3_CMPS_S(b, src, 0, create_immed(b, 0), 0);
+ cond->cat2.condition = IR3_COND_NE;
+
+ /* condition always goes in predicate register: */
+ cond->regs[0]->num = regid(REG_P0, 0);
+
+ return cond;
+}
+
+static struct ir3_instruction *
create_uniform(struct ir3_compile *ctx, unsigned n)
{
struct ir3_instruction *mov;
@@ -461,7 +654,7 @@ create_collect(struct ir3_block *block, struct ir3_instruction **arr,
return NULL;
collect = ir3_instr_create2(block, -1, OPC_META_FI, 1 + arrsz);
- ir3_reg_create(collect, 0, 0);
+ ir3_reg_create(collect, 0, 0); /* dst */
for (unsigned i = 0; i < arrsz; i++)
ir3_reg_create(collect, 0, IR3_REG_SSA)->instr = arr[i];
@@ -597,6 +790,7 @@ create_frag_face(struct ir3_compile *ctx, unsigned comp)
compile_assert(ctx, !ctx->frag_face);
ctx->frag_face = create_input(block, NULL, 0);
+ ctx->frag_face->regs[0]->flags |= IR3_REG_HALF;
/* for faceness, we always get -1 or 0 (int).. but TGSI expects
* positive vs negative float.. and piglit further seems to
@@ -628,10 +822,10 @@ create_frag_face(struct ir3_compile *ctx, unsigned comp)
*/
static void
split_dest(struct ir3_block *block, struct ir3_instruction **dst,
- struct ir3_instruction *src)
+ struct ir3_instruction *src, unsigned n)
{
struct ir3_instruction *prev = NULL;
- for (int i = 0, j = 0; i < 4; i++) {
+ for (int i = 0, j = 0; i < n; i++) {
struct ir3_instruction *split =
ir3_instr_create(block, -1, OPC_META_FO);
ir3_reg_create(split, 0, IR3_REG_SSA);
@@ -882,9 +1076,15 @@ emit_alu(struct ir3_compile *ctx, nir_alu_instr *alu)
case nir_op_imax:
dst[0] = ir3_MAX_S(b, src[0], 0, src[1], 0);
break;
+ case nir_op_umax:
+ dst[0] = ir3_MAX_U(b, src[0], 0, src[1], 0);
+ break;
case nir_op_imin:
dst[0] = ir3_MIN_S(b, src[0], 0, src[1], 0);
break;
+ case nir_op_umin:
+ dst[0] = ir3_MIN_U(b, src[0], 0, src[1], 0);
+ break;
case nir_op_imul:
/*
* dst = (al * bl) + (ah * bl << 16) + (al * bh << 16)
@@ -1030,7 +1230,7 @@ emit_intrinisic_load_var(struct ir3_compile *ctx, nir_intrinsic_instr *intr,
{
nir_deref_var *dvar = intr->variables[0];
nir_deref_array *darr = nir_deref_as_array(dvar->deref.child);
- struct ir3_array *arr = get_var(ctx, dvar->var);
+ struct ir3_array_value *arr = get_var(ctx, dvar->var);
compile_assert(ctx, dvar->deref.child &&
(dvar->deref.child->deref_type == nir_deref_type_array));
@@ -1070,7 +1270,7 @@ emit_intrinisic_store_var(struct ir3_compile *ctx, nir_intrinsic_instr *intr)
{
nir_deref_var *dvar = intr->variables[0];
nir_deref_array *darr = nir_deref_as_array(dvar->deref.child);
- struct ir3_array *arr = get_var(ctx, dvar->var);
+ struct ir3_array_value *arr = get_var(ctx, dvar->var);
struct ir3_instruction **src;
compile_assert(ctx, dvar->deref.child &&
@@ -1140,8 +1340,8 @@ static void add_sysval_input(struct ir3_compile *ctx, unsigned name,
so->inputs[n].interpolate = TGSI_INTERPOLATE_CONSTANT;
so->total_in++;
- ctx->block->ninputs = MAX2(ctx->block->ninputs, r + 1);
- ctx->block->inputs[r] = instr;
+ ctx->ir->ninputs = MAX2(ctx->ir->ninputs, r + 1);
+ ctx->ir->inputs[r] = instr;
}
static void
@@ -1154,18 +1354,18 @@ emit_intrinisic(struct ir3_compile *ctx, nir_intrinsic_instr *intr)
if (info->has_dest) {
dst = get_dst(ctx, &intr->dest, intr->num_components);
+ } else {
+ dst = NULL;
}
switch (intr->intrinsic) {
case nir_intrinsic_load_uniform:
- compile_assert(ctx, intr->const_index[1] == 1);
for (int i = 0; i < intr->num_components; i++) {
unsigned n = idx * 4 + i;
dst[i] = create_uniform(ctx, n);
}
break;
case nir_intrinsic_load_uniform_indirect:
- compile_assert(ctx, intr->const_index[1] == 1);
src = get_src(ctx, &intr->src[0]);
for (int i = 0; i < intr->num_components; i++) {
unsigned n = idx * 4 + i;
@@ -1178,21 +1378,20 @@ emit_intrinisic(struct ir3_compile *ctx, nir_intrinsic_instr *intr)
emit_intrinsic_load_ubo(ctx, intr, dst);
break;
case nir_intrinsic_load_input:
- compile_assert(ctx, intr->const_index[1] == 1);
for (int i = 0; i < intr->num_components; i++) {
unsigned n = idx * 4 + i;
- dst[i] = b->inputs[n];
+ dst[i] = ctx->ir->inputs[n];
}
break;
case nir_intrinsic_load_input_indirect:
- compile_assert(ctx, intr->const_index[1] == 1);
src = get_src(ctx, &intr->src[0]);
struct ir3_instruction *collect =
- create_collect(b, b->inputs, b->ninputs);
+ create_collect(b, ctx->ir->inputs, ctx->ir->ninputs);
struct ir3_instruction *addr = get_addr(ctx, src[0]);
for (int i = 0; i < intr->num_components; i++) {
unsigned n = idx * 4 + i;
- dst[i] = create_indirect_load(ctx, b->ninputs, n, addr, collect);
+ dst[i] = create_indirect_load(ctx, ctx->ir->ninputs,
+ n, addr, collect);
}
break;
case nir_intrinsic_load_var:
@@ -1202,11 +1401,10 @@ emit_intrinisic(struct ir3_compile *ctx, nir_intrinsic_instr *intr)
emit_intrinisic_store_var(ctx, intr);
break;
case nir_intrinsic_store_output:
- compile_assert(ctx, intr->const_index[1] == 1);
src = get_src(ctx, &intr->src[0]);
for (int i = 0; i < intr->num_components; i++) {
unsigned n = idx * 4 + i;
- b->outputs[n] = src[i];
+ ctx->ir->outputs[n] = src[i];
}
break;
case nir_intrinsic_load_base_vertex:
@@ -1248,6 +1446,7 @@ emit_intrinisic(struct ir3_compile *ctx, nir_intrinsic_instr *intr)
cond = create_immed(b, 1);
}
+ /* NOTE: only cmps.*.* can write p0.x: */
cond = ir3_CMPS_S(b, cond, 0, create_immed(b, 0), 0);
cond->cat2.condition = IR3_COND_NE;
@@ -1255,6 +1454,7 @@ emit_intrinisic(struct ir3_compile *ctx, nir_intrinsic_instr *intr)
cond->regs[0]->num = regid(REG_P0, 0);
kill = ir3_KILL(b, cond, 0);
+ array_insert(ctx->ir->predicates, kill);
ctx->kill[ctx->kill_count++] = kill;
ctx->so->has_kill = true;
@@ -1318,6 +1518,8 @@ tex_info(nir_tex_instr *tex, unsigned *flagsp, unsigned *coordsp)
coords = 3;
flags |= IR3_INSTR_3D;
break;
+ default:
+ unreachable("bad sampler_dim");
}
if (tex->is_shadow)
@@ -1340,7 +1542,10 @@ emit_tex(struct ir3_compile *ctx, nir_tex_instr *tex)
unsigned i, coords, flags;
unsigned nsrc0 = 0, nsrc1 = 0;
type_t type;
- opc_t opc;
+ opc_t opc = 0;
+
+ coord = off = ddx = ddy = NULL;
+ lod = proj = compare = NULL;
/* TODO: might just be one component for gathers? */
dst = get_dst(ctx, &tex->dest, 4);
@@ -1400,11 +1605,12 @@ emit_tex(struct ir3_compile *ctx, nir_tex_instr *tex)
tex_info(tex, &flags, &coords);
/* scale up integer coords for TXF based on the LOD */
- if (opc == OPC_ISAML) {
+ if (ctx->unminify_coords && (opc == OPC_ISAML)) {
assert(has_lod);
for (i = 0; i < coords; i++)
coord[i] = ir3_SHL_B(b, coord[i], 0, lod, 0);
}
+
/*
* lay out the first argument in the proper order:
* - actual coordinates first
@@ -1484,6 +1690,8 @@ emit_tex(struct ir3_compile *ctx, nir_tex_instr *tex)
case nir_type_bool:
type = TYPE_U32;
break;
+ default:
+ unreachable("bad dest_type");
}
sam = ir3_SAM(b, opc, type, TGSI_WRITEMASK_XYZW,
@@ -1491,7 +1699,7 @@ emit_tex(struct ir3_compile *ctx, nir_tex_instr *tex)
create_collect(b, src0, nsrc0),
create_collect(b, src1, nsrc1));
- split_dest(b, dst, sam);
+ split_dest(b, dst, sam, 4);
}
static void
@@ -1508,7 +1716,7 @@ emit_tex_query_levels(struct ir3_compile *ctx, nir_tex_instr *tex)
/* even though there is only one component, since it ends
* up in .z rather than .x, we need a split_dest()
*/
- split_dest(b, dst, sam);
+ split_dest(b, dst, sam, 3);
/* The # of levels comes from getinfo.z. We need to add 1 to it, since
* the value in TEX_CONST_0 is zero-based.
@@ -1536,7 +1744,7 @@ emit_tex_txs(struct ir3_compile *ctx, nir_tex_instr *tex)
sam = ir3_SAM(b, OPC_GETSIZE, TYPE_U32, TGSI_WRITEMASK_XYZW, flags,
tex->sampler_index, tex->sampler_index, lod, NULL);
- split_dest(b, dst, sam);
+ split_dest(b, dst, sam, 4);
/* Array size actually ends up in .w rather than .z. This doesn't
* matter for miplevel 0, but for higher mips the value in z is
@@ -1553,6 +1761,71 @@ emit_tex_txs(struct ir3_compile *ctx, nir_tex_instr *tex)
}
static void
+emit_phi(struct ir3_compile *ctx, nir_phi_instr *nphi)
+{
+ struct ir3_instruction *phi, **dst;
+
+ /* NOTE: phi's should be lowered to scalar at this point */
+ compile_assert(ctx, nphi->dest.ssa.num_components == 1);
+
+ dst = get_dst(ctx, &nphi->dest, 1);
+
+ phi = ir3_instr_create2(ctx->block, -1, OPC_META_PHI,
+ 1 + exec_list_length(&nphi->srcs));
+ ir3_reg_create(phi, 0, 0); /* dst */
+ phi->phi.nphi = nphi;
+
+ dst[0] = phi;
+}
+
+/* phi instructions are left partially constructed. We don't resolve
+ * their srcs until the end of the block, since (eg. loops) one of
+ * the phi's srcs might be defined after the phi due to back edges in
+ * the CFG.
+ */
+static void
+resolve_phis(struct ir3_compile *ctx, struct ir3_block *block)
+{
+ list_for_each_entry (struct ir3_instruction, instr, &block->instr_list, node) {
+ nir_phi_instr *nphi;
+
+ /* phi's only come at start of block: */
+ if (!(is_meta(instr) && (instr->opc == OPC_META_PHI)))
+ break;
+
+ if (!instr->phi.nphi)
+ break;
+
+ nphi = instr->phi.nphi;
+ instr->phi.nphi = NULL;
+
+ foreach_list_typed(nir_phi_src, nsrc, node, &nphi->srcs) {
+ struct ir3_instruction *src = get_src(ctx, &nsrc->src)[0];
+ ir3_reg_create(instr, 0, IR3_REG_SSA)->instr = src;
+ }
+ }
+
+ resolve_array_phis(ctx, block);
+}
+
+static void
+emit_jump(struct ir3_compile *ctx, nir_jump_instr *jump)
+{
+ switch (jump->type) {
+ case nir_jump_break:
+ case nir_jump_continue:
+ /* I *think* we can simply just ignore this, and use the
+ * successor block link to figure out where we need to
+ * jump to for break/continue
+ */
+ break;
+ default:
+ compile_error(ctx, "Unhandled NIR jump type: %d\n", jump->type);
+ break;
+ }
+}
+
+static void
emit_instr(struct ir3_compile *ctx, nir_instr *instr)
{
switch (instr->type) {
@@ -1585,45 +1858,112 @@ emit_instr(struct ir3_compile *ctx, nir_instr *instr)
}
break;
}
- case nir_instr_type_call:
- case nir_instr_type_jump:
case nir_instr_type_phi:
+ emit_phi(ctx, nir_instr_as_phi(instr));
+ break;
+ case nir_instr_type_jump:
+ emit_jump(ctx, nir_instr_as_jump(instr));
+ break;
+ case nir_instr_type_call:
case nir_instr_type_parallel_copy:
compile_error(ctx, "Unhandled NIR instruction type: %d\n", instr->type);
break;
}
}
+static struct ir3_block *
+get_block(struct ir3_compile *ctx, nir_block *nblock)
+{
+ struct ir3_block *block;
+ struct hash_entry *entry;
+ entry = _mesa_hash_table_search(ctx->block_ht, nblock);
+ if (entry)
+ return entry->data;
+
+ block = ir3_block_create(ctx->ir);
+ block->nblock = nblock;
+ _mesa_hash_table_insert(ctx->block_ht, nblock, block);
+
+ return block;
+}
+
static void
-emit_block(struct ir3_compile *ctx, nir_block *block)
+emit_block(struct ir3_compile *ctx, nir_block *nblock)
{
- nir_foreach_instr(block, instr) {
+ struct ir3_block *block = get_block(ctx, nblock);
+
+ for (int i = 0; i < ARRAY_SIZE(block->successors); i++) {
+ if (nblock->successors[i]) {
+ block->successors[i] =
+ get_block(ctx, nblock->successors[i]);
+ }
+ }
+
+ ctx->block = block;
+ list_addtail(&block->node, &ctx->ir->block_list);
+
+ nir_foreach_instr(nblock, instr) {
emit_instr(ctx, instr);
if (ctx->error)
return;
}
}
+static void emit_cf_list(struct ir3_compile *ctx, struct exec_list *list);
+
static void
-emit_function(struct ir3_compile *ctx, nir_function_impl *impl)
+emit_if(struct ir3_compile *ctx, nir_if *nif)
+{
+ struct ir3_instruction *condition = get_src(ctx, &nif->condition)[0];
+
+ ctx->block->condition =
+ get_predicate(ctx, ir3_b2n(condition->block, condition));
+
+ emit_cf_list(ctx, &nif->then_list);
+ emit_cf_list(ctx, &nif->else_list);
+}
+
+static void
+emit_loop(struct ir3_compile *ctx, nir_loop *nloop)
+{
+ emit_cf_list(ctx, &nloop->body);
+}
+
+static void
+emit_cf_list(struct ir3_compile *ctx, struct exec_list *list)
{
- foreach_list_typed(nir_cf_node, node, node, &impl->body) {
+ foreach_list_typed(nir_cf_node, node, node, list) {
switch (node->type) {
case nir_cf_node_block:
emit_block(ctx, nir_cf_node_as_block(node));
break;
case nir_cf_node_if:
+ emit_if(ctx, nir_cf_node_as_if(node));
+ break;
case nir_cf_node_loop:
+ emit_loop(ctx, nir_cf_node_as_loop(node));
+ break;
case nir_cf_node_function:
compile_error(ctx, "TODO\n");
break;
}
- if (ctx->error)
- return;
}
}
static void
+emit_function(struct ir3_compile *ctx, nir_function_impl *impl)
+{
+ emit_cf_list(ctx, &impl->body);
+ emit_block(ctx, impl->end_block);
+
+ /* at this point, we should have a single empty block,
+ * into which we emit the 'end' instruction.
+ */
+ compile_assert(ctx, list_empty(&ctx->block->instr_list));
+ ir3_END(ctx->block);
+}
+
+static void
setup_input(struct ir3_compile *ctx, nir_variable *in)
{
struct ir3_shader_variant *so = ctx->so;
@@ -1708,7 +2048,7 @@ setup_input(struct ir3_compile *ctx, nir_variable *in)
instr = create_input(ctx->block, NULL, idx);
}
- ctx->block->inputs[idx] = instr;
+ ctx->ir->inputs[idx] = instr;
}
if (so->inputs[n].bary || (ctx->so->type == SHADER_VERTEX)) {
@@ -1775,15 +2115,26 @@ setup_output(struct ir3_compile *ctx, nir_variable *out)
for (int i = 0; i < ncomp; i++) {
unsigned idx = (n * 4) + i;
- ctx->block->outputs[idx] = create_immed(ctx->block, fui(0.0));
+ ctx->ir->outputs[idx] = create_immed(ctx->block, fui(0.0));
}
}
static void
emit_instructions(struct ir3_compile *ctx)
{
- unsigned ninputs = exec_list_length(&ctx->s->inputs) * 4;
- unsigned noutputs = exec_list_length(&ctx->s->outputs) * 4;
+ unsigned ninputs, noutputs;
+ nir_function_impl *fxn = NULL;
+
+ /* Find the main function: */
+ nir_foreach_overload(ctx->s, overload) {
+ compile_assert(ctx, strcmp(overload->function->name, "main") == 0);
+ compile_assert(ctx, overload->impl);
+ fxn = overload->impl;
+ break;
+ }
+
+ ninputs = exec_list_length(&ctx->s->inputs) * 4;
+ noutputs = exec_list_length(&ctx->s->outputs) * 4;
/* we need to allocate big enough outputs array so that
* we can stuff the kill's at the end. Likewise for vtx
@@ -1795,12 +2146,17 @@ emit_instructions(struct ir3_compile *ctx)
ninputs += 8;
}
- ctx->block = ir3_block_create(ctx->ir, 0, ninputs, noutputs);
+ ctx->ir = ir3_create(ctx->compiler, ninputs, noutputs);
+
+ /* Create inputs in first block: */
+ ctx->block = get_block(ctx, fxn->start_block);
+ ctx->in_block = ctx->block;
+ list_addtail(&ctx->block->node, &ctx->ir->block_list);
if (ctx->so->type == SHADER_FRAGMENT) {
- ctx->block->noutputs -= ARRAY_SIZE(ctx->kill);
+ ctx->ir->noutputs -= ARRAY_SIZE(ctx->kill);
} else if (ctx->so->type == SHADER_VERTEX) {
- ctx->block->ninputs -= 8;
+ ctx->ir->ninputs -= 8;
}
/* for fragment shader, we have a single input register (usually
@@ -1831,13 +2187,12 @@ emit_instructions(struct ir3_compile *ctx)
declare_var(ctx, var);
}
- /* Find the main function and emit the body: */
- nir_foreach_overload(ctx->s, overload) {
- compile_assert(ctx, strcmp(overload->function->name, "main") == 0);
- compile_assert(ctx, overload->impl);
- emit_function(ctx, overload->impl);
- if (ctx->error)
- return;
+ /* And emit the body: */
+ ctx->impl = fxn;
+ emit_function(ctx, fxn);
+
+ list_for_each_entry (struct ir3_block, block, &ctx->ir->block_list, node) {
+ resolve_phis(ctx, block);
}
}
@@ -1850,12 +2205,12 @@ static void
fixup_frag_inputs(struct ir3_compile *ctx)
{
struct ir3_shader_variant *so = ctx->so;
- struct ir3_block *block = ctx->block;
+ struct ir3 *ir = ctx->ir;
struct ir3_instruction **inputs;
struct ir3_instruction *instr;
int n, regid = 0;
- block->ninputs = 0;
+ ir->ninputs = 0;
n = 4; /* always have frag_pos */
n += COND(so->frag_face, 4);
@@ -1867,15 +2222,15 @@ fixup_frag_inputs(struct ir3_compile *ctx)
/* this ultimately gets assigned to hr0.x so doesn't conflict
* with frag_coord/frag_pos..
*/
- inputs[block->ninputs++] = ctx->frag_face;
+ inputs[ir->ninputs++] = ctx->frag_face;
ctx->frag_face->regs[0]->num = 0;
/* remaining channels not used, but let's avoid confusing
* other parts that expect inputs to come in groups of vec4
*/
- inputs[block->ninputs++] = NULL;
- inputs[block->ninputs++] = NULL;
- inputs[block->ninputs++] = NULL;
+ inputs[ir->ninputs++] = NULL;
+ inputs[ir->ninputs++] = NULL;
+ inputs[ir->ninputs++] = NULL;
}
/* since we don't know where to set the regid for frag_coord,
@@ -1889,63 +2244,45 @@ fixup_frag_inputs(struct ir3_compile *ctx)
ctx->frag_coord[2]->regs[0]->num = regid++;
ctx->frag_coord[3]->regs[0]->num = regid++;
- inputs[block->ninputs++] = ctx->frag_coord[0];
- inputs[block->ninputs++] = ctx->frag_coord[1];
- inputs[block->ninputs++] = ctx->frag_coord[2];
- inputs[block->ninputs++] = ctx->frag_coord[3];
+ inputs[ir->ninputs++] = ctx->frag_coord[0];
+ inputs[ir->ninputs++] = ctx->frag_coord[1];
+ inputs[ir->ninputs++] = ctx->frag_coord[2];
+ inputs[ir->ninputs++] = ctx->frag_coord[3];
}
/* we always have frag_pos: */
so->pos_regid = regid;
/* r0.x */
- instr = create_input(block, NULL, block->ninputs);
+ instr = create_input(ctx->in_block, NULL, ir->ninputs);
instr->regs[0]->num = regid++;
- inputs[block->ninputs++] = instr;
+ inputs[ir->ninputs++] = instr;
ctx->frag_pos->regs[1]->instr = instr;
/* r0.y */
- instr = create_input(block, NULL, block->ninputs);
+ instr = create_input(ctx->in_block, NULL, ir->ninputs);
instr->regs[0]->num = regid++;
- inputs[block->ninputs++] = instr;
+ inputs[ir->ninputs++] = instr;
ctx->frag_pos->regs[2]->instr = instr;
- block->inputs = inputs;
-}
-
-static void
-compile_dump(struct ir3_compile *ctx)
-{
- const char *name = (ctx->so->type == SHADER_VERTEX) ? "vert" : "frag";
- static unsigned n = 0;
- char fname[16];
- FILE *f;
- snprintf(fname, sizeof(fname), "%s-%04u.dot", name, n++);
- f = fopen(fname, "w");
- if (!f)
- return;
- ir3_block_depth(ctx->block);
- ir3_dump(ctx->ir, name, ctx->block, f);
- fclose(f);
+ ir->inputs = inputs;
}
int
-ir3_compile_shader_nir(struct ir3_shader_variant *so,
- const struct tgsi_token *tokens, struct ir3_shader_key key)
+ir3_compile_shader_nir(struct ir3_compiler *compiler,
+ struct ir3_shader_variant *so,
+ const struct tgsi_token *tokens,
+ struct ir3_shader_key key)
{
struct ir3_compile *ctx;
- struct ir3_block *block;
+ struct ir3 *ir;
struct ir3_instruction **inputs;
unsigned i, j, actual_in;
int ret = 0, max_bary;
assert(!so->ir);
- so->ir = ir3_create();
-
- assert(so->ir);
-
- ctx = compile_init(so, tokens);
+ ctx = compile_init(compiler, so, tokens);
if (!ctx) {
DBG("INIT failed!");
ret = -1;
@@ -1960,11 +2297,10 @@ ir3_compile_shader_nir(struct ir3_shader_variant *so,
goto out;
}
- block = ctx->block;
- so->ir->block = block;
+ ir = so->ir = ctx->ir;
/* keep track of the inputs from TGSI perspective.. */
- inputs = block->inputs;
+ inputs = ir->inputs;
/* but fixup actual inputs for frag shader: */
if (so->type == SHADER_FRAGMENT)
@@ -1981,26 +2317,39 @@ ir3_compile_shader_nir(struct ir3_shader_variant *so,
(name == TGSI_SEMANTIC_PSIZE))) {
if (i != j) {
so->outputs[j] = so->outputs[i];
- block->outputs[(j*4)+0] = block->outputs[(i*4)+0];
- block->outputs[(j*4)+1] = block->outputs[(i*4)+1];
- block->outputs[(j*4)+2] = block->outputs[(i*4)+2];
- block->outputs[(j*4)+3] = block->outputs[(i*4)+3];
+ ir->outputs[(j*4)+0] = ir->outputs[(i*4)+0];
+ ir->outputs[(j*4)+1] = ir->outputs[(i*4)+1];
+ ir->outputs[(j*4)+2] = ir->outputs[(i*4)+2];
+ ir->outputs[(j*4)+3] = ir->outputs[(i*4)+3];
}
j++;
}
}
so->outputs_count = j;
- block->noutputs = j * 4;
+ ir->noutputs = j * 4;
}
/* if we want half-precision outputs, mark the output registers
* as half:
*/
if (key.half_precision) {
- for (i = 0; i < block->noutputs; i++) {
- if (!block->outputs[i])
+ for (i = 0; i < ir->noutputs; i++) {
+ struct ir3_instruction *out = ir->outputs[i];
+ if (!out)
continue;
- block->outputs[i]->regs[0]->flags |= IR3_REG_HALF;
+ out->regs[0]->flags |= IR3_REG_HALF;
+ /* output could be a fanout (ie. texture fetch output)
+ * in which case we need to propagate the half-reg flag
+ * up to the definer so that RA sees it:
+ */
+ if (is_meta(out) && (out->opc == OPC_META_FO)) {
+ out = out->regs[1]->instr;
+ out->regs[0]->flags |= IR3_REG_HALF;
+ }
+
+ if (out->category == 1) {
+ out->cat1.dst_type = half_type(out->cat1.dst_type);
+ }
}
}
@@ -2010,42 +2359,34 @@ ir3_compile_shader_nir(struct ir3_shader_variant *so,
*/
if (so->type == SHADER_FRAGMENT) {
for (i = 0; i < ctx->kill_count; i++)
- block->outputs[block->noutputs++] = ctx->kill[i];
+ ir->outputs[ir->noutputs++] = ctx->kill[i];
}
- if (fd_mesa_debug & FD_DBG_OPTDUMP)
- compile_dump(ctx);
-
if (fd_mesa_debug & FD_DBG_OPTMSGS) {
printf("BEFORE CP:\n");
- ir3_dump_instr_list(block->head);
+ ir3_print(ir);
}
- ir3_block_depth(block);
-
- ir3_block_cp(block);
+ ir3_cp(ir);
if (fd_mesa_debug & FD_DBG_OPTMSGS) {
printf("BEFORE GROUPING:\n");
- ir3_dump_instr_list(block->head);
+ ir3_print(ir);
}
/* Group left/right neighbors, inserting mov's where needed to
* solve conflicts:
*/
- ir3_block_group(block);
-
- if (fd_mesa_debug & FD_DBG_OPTDUMP)
- compile_dump(ctx);
+ ir3_group(ir);
- ir3_block_depth(block);
+ ir3_depth(ir);
if (fd_mesa_debug & FD_DBG_OPTMSGS) {
printf("AFTER DEPTH:\n");
- ir3_dump_instr_list(block->head);
+ ir3_print(ir);
}
- ret = ir3_block_sched(block);
+ ret = ir3_sched(ir);
if (ret) {
DBG("SCHED failed!");
goto out;
@@ -2053,10 +2394,10 @@ ir3_compile_shader_nir(struct ir3_shader_variant *so,
if (fd_mesa_debug & FD_DBG_OPTMSGS) {
printf("AFTER SCHED:\n");
- ir3_dump_instr_list(block->head);
+ ir3_print(ir);
}
- ret = ir3_block_ra(block, so->type, so->frag_coord, so->frag_face);
+ ret = ir3_ra(ir, so->type, so->frag_coord, so->frag_face);
if (ret) {
DBG("RA failed!");
goto out;
@@ -2064,14 +2405,19 @@ ir3_compile_shader_nir(struct ir3_shader_variant *so,
if (fd_mesa_debug & FD_DBG_OPTMSGS) {
printf("AFTER RA:\n");
- ir3_dump_instr_list(block->head);
+ ir3_print(ir);
}
- ir3_block_legalize(block, &so->has_samp, &max_bary);
+ ir3_legalize(ir, &so->has_samp, &max_bary);
+
+ if (fd_mesa_debug & FD_DBG_OPTMSGS) {
+ printf("AFTER LEGALIZE:\n");
+ ir3_print(ir);
+ }
/* fixup input/outputs: */
for (i = 0; i < so->outputs_count; i++) {
- so->outputs[i].regid = block->outputs[i*4]->regs[0]->num;
+ so->outputs[i].regid = ir->outputs[i*4]->regs[0]->num;
/* preserve hack for depth output.. tgsi writes depth to .z,
* but what we give the hw is the scalar register:
*/
@@ -2111,7 +2457,8 @@ ir3_compile_shader_nir(struct ir3_shader_variant *so,
out:
if (ret) {
- ir3_destroy(so->ir);
+ if (so->ir)
+ ir3_destroy(so->ir);
so->ir = NULL;
}
compile_free(ctx);
diff --git a/src/gallium/drivers/freedreno/ir3/ir3_cp.c b/src/gallium/drivers/freedreno/ir3/ir3_cp.c
index fa7d363be7b..8c7c80f7aae 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3_cp.c
+++ b/src/gallium/drivers/freedreno/ir3/ir3_cp.c
@@ -41,7 +41,7 @@ static bool is_eligible_mov(struct ir3_instruction *instr, bool allow_flags)
struct ir3_register *dst = instr->regs[0];
struct ir3_register *src = instr->regs[1];
struct ir3_instruction *src_instr = ssa(src);
- if (dst->flags & (IR3_REG_ADDR | IR3_REG_RELATIV))
+ if (dst->flags & IR3_REG_RELATIV)
return false;
if (src->flags & IR3_REG_RELATIV)
return false;
@@ -54,6 +54,13 @@ static bool is_eligible_mov(struct ir3_instruction *instr, bool allow_flags)
/* TODO: remove this hack: */
if (is_meta(src_instr) && (src_instr->opc == OPC_META_FO))
return false;
+ /* TODO: we currently don't handle left/right neighbors
+ * very well when inserting parallel-copies into phi..
+ * to avoid problems don't eliminate a mov coming out
+ * of phi..
+ */
+ if (is_meta(src_instr) && (src_instr->opc == OPC_META_PHI))
+ return false;
return true;
}
return false;
@@ -354,13 +361,6 @@ instr_cp(struct ir3_instruction *instr, unsigned *flags)
{
struct ir3_register *reg;
- /* stay within the block.. don't try to operate across
- * basic block boundaries or we'll have problems when
- * dealing with multiple basic blocks:
- */
- if (is_meta(instr) && (instr->opc == OPC_META_INPUT))
- return instr;
-
if (is_eligible_mov(instr, !!flags)) {
struct ir3_register *reg = instr->regs[1];
struct ir3_instruction *src_instr = ssa(reg);
@@ -394,22 +394,22 @@ instr_cp(struct ir3_instruction *instr, unsigned *flags)
return instr;
}
-static void block_cp(struct ir3_block *block)
+void
+ir3_cp(struct ir3 *ir)
{
- unsigned i;
+ ir3_clear_mark(ir);
- for (i = 0; i < block->noutputs; i++) {
- if (block->outputs[i]) {
+ for (unsigned i = 0; i < ir->noutputs; i++) {
+ if (ir->outputs[i]) {
struct ir3_instruction *out =
- instr_cp(block->outputs[i], NULL);
+ instr_cp(ir->outputs[i], NULL);
- block->outputs[i] = out;
+ ir->outputs[i] = out;
}
}
-}
-void ir3_block_cp(struct ir3_block *block)
-{
- ir3_clear_mark(block->shader);
- block_cp(block);
+ list_for_each_entry (struct ir3_block, block, &ir->block_list, node) {
+ if (block->condition)
+ block->condition = instr_cp(block->condition, NULL);
+ }
}
diff --git a/src/gallium/drivers/freedreno/ir3/ir3_depth.c b/src/gallium/drivers/freedreno/ir3/ir3_depth.c
index b899c66b37e..3a108243479 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3_depth.c
+++ b/src/gallium/drivers/freedreno/ir3/ir3_depth.c
@@ -84,25 +84,25 @@ int ir3_delayslots(struct ir3_instruction *assigner,
}
}
-static void insert_by_depth(struct ir3_instruction *instr)
+void
+ir3_insert_by_depth(struct ir3_instruction *instr, struct list_head *list)
{
- struct ir3_block *block = instr->block;
- struct ir3_instruction *n = block->head;
- struct ir3_instruction *p = NULL;
-
- while (n && (n != instr) && (n->depth > instr->depth)) {
- p = n;
- n = n->next;
+ /* remove from existing spot in list: */
+ list_delinit(&instr->node);
+
+ /* find where to re-insert instruction: */
+ list_for_each_entry (struct ir3_instruction, pos, list, node) {
+ if (pos->depth > instr->depth) {
+ list_add(&instr->node, &pos->node);
+ return;
+ }
}
-
- instr->next = n;
- if (p)
- p->next = instr;
- else
- block->head = instr;
+ /* if we get here, we didn't find an insertion spot: */
+ list_addtail(&instr->node, list);
}
-static void ir3_instr_depth(struct ir3_instruction *instr)
+static void
+ir3_instr_depth(struct ir3_instruction *instr)
{
struct ir3_instruction *src;
@@ -123,47 +123,54 @@ static void ir3_instr_depth(struct ir3_instruction *instr)
instr->depth = MAX2(instr->depth, sd);
}
- /* meta-instructions don't add cycles, other than PHI.. which
- * might translate to a real instruction..
- *
- * well, not entirely true, fan-in/out, etc might need to need
- * to generate some extra mov's in edge cases, etc.. probably
- * we might want to do depth calculation considering the worst
- * case for these??
- */
if (!is_meta(instr))
instr->depth++;
- insert_by_depth(instr);
+ ir3_insert_by_depth(instr, &instr->block->instr_list);
+}
+
+static void
+remove_unused_by_block(struct ir3_block *block)
+{
+ list_for_each_entry_safe (struct ir3_instruction, instr, &block->instr_list, node) {
+ if (!ir3_instr_check_mark(instr)) {
+ if (is_flow(instr) && (instr->opc == OPC_END))
+ continue;
+ /* mark it, in case it is input, so we can
+ * remove unused inputs:
+ */
+ instr->depth = DEPTH_UNUSED;
+ /* and remove from instruction list: */
+ list_delinit(&instr->node);
+ }
+ }
}
-void ir3_block_depth(struct ir3_block *block)
+void
+ir3_depth(struct ir3 *ir)
{
unsigned i;
- block->head = NULL;
+ ir3_clear_mark(ir);
+ for (i = 0; i < ir->noutputs; i++)
+ if (ir->outputs[i])
+ ir3_instr_depth(ir->outputs[i]);
- ir3_clear_mark(block->shader);
- for (i = 0; i < block->noutputs; i++)
- if (block->outputs[i])
- ir3_instr_depth(block->outputs[i]);
+ /* We also need to account for if-condition: */
+ list_for_each_entry (struct ir3_block, block, &ir->block_list, node) {
+ if (block->condition)
+ ir3_instr_depth(block->condition);
+ }
/* mark un-used instructions: */
- for (i = 0; i < block->shader->instrs_count; i++) {
- struct ir3_instruction *instr = block->shader->instrs[i];
-
- /* just consider instructions within this block: */
- if (instr->block != block)
- continue;
-
- if (!ir3_instr_check_mark(instr))
- instr->depth = DEPTH_UNUSED;
+ list_for_each_entry (struct ir3_block, block, &ir->block_list, node) {
+ remove_unused_by_block(block);
}
/* cleanup unused inputs: */
- for (i = 0; i < block->ninputs; i++) {
- struct ir3_instruction *in = block->inputs[i];
+ for (i = 0; i < ir->ninputs; i++) {
+ struct ir3_instruction *in = ir->inputs[i];
if (in && (in->depth == DEPTH_UNUSED))
- block->inputs[i] = NULL;
+ ir->inputs[i] = NULL;
}
}
diff --git a/src/gallium/drivers/freedreno/ir3/ir3_dump.c b/src/gallium/drivers/freedreno/ir3/ir3_dump.c
deleted file mode 100644
index 1614d637b13..00000000000
--- a/src/gallium/drivers/freedreno/ir3/ir3_dump.c
+++ /dev/null
@@ -1,456 +0,0 @@
-/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
-
-/*
- * Copyright (C) 2014 Rob Clark <[email protected]>
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- *
- * Authors:
- * Rob Clark <[email protected]>
- */
-
-#include <stdarg.h>
-
-#include "ir3.h"
-
-#define PTRID(x) ((unsigned long)(x))
-
-struct ir3_dump_ctx {
- FILE *f;
- bool verbose;
-};
-
-static void dump_instr_name(struct ir3_dump_ctx *ctx,
- struct ir3_instruction *instr)
-{
- /* for debugging: */
- if (ctx->verbose) {
-#ifdef DEBUG
- fprintf(ctx->f, "%04u:", instr->serialno);
-#endif
- fprintf(ctx->f, "%03u: ", instr->depth);
- }
-
- if (instr->flags & IR3_INSTR_SY)
- fprintf(ctx->f, "(sy)");
- if (instr->flags & IR3_INSTR_SS)
- fprintf(ctx->f, "(ss)");
-
- if (is_meta(instr)) {
- switch(instr->opc) {
- case OPC_META_PHI:
- fprintf(ctx->f, "&#934;");
- break;
- default:
- /* shouldn't hit here.. just for debugging: */
- switch (instr->opc) {
- case OPC_META_INPUT: fprintf(ctx->f, "_meta:in"); break;
- case OPC_META_OUTPUT: fprintf(ctx->f, "_meta:out"); break;
- case OPC_META_FO: fprintf(ctx->f, "_meta:fo"); break;
- case OPC_META_FI: fprintf(ctx->f, "_meta:fi"); break;
- case OPC_META_FLOW: fprintf(ctx->f, "_meta:flow"); break;
-
- default: fprintf(ctx->f, "_meta:%d", instr->opc); break;
- }
- break;
- }
- } else if (instr->category == 1) {
- static const char *type[] = {
- [TYPE_F16] = "f16",
- [TYPE_F32] = "f32",
- [TYPE_U16] = "u16",
- [TYPE_U32] = "u32",
- [TYPE_S16] = "s16",
- [TYPE_S32] = "s32",
- [TYPE_U8] = "u8",
- [TYPE_S8] = "s8",
- };
- if (instr->cat1.src_type == instr->cat1.dst_type)
- fprintf(ctx->f, "mov");
- else
- fprintf(ctx->f, "cov");
- fprintf(ctx->f, ".%s%s", type[instr->cat1.src_type], type[instr->cat1.dst_type]);
- } else {
- fprintf(ctx->f, "%s", ir3_instr_name(instr));
- if (instr->flags & IR3_INSTR_3D)
- fprintf(ctx->f, ".3d");
- if (instr->flags & IR3_INSTR_A)
- fprintf(ctx->f, ".a");
- if (instr->flags & IR3_INSTR_O)
- fprintf(ctx->f, ".o");
- if (instr->flags & IR3_INSTR_P)
- fprintf(ctx->f, ".p");
- if (instr->flags & IR3_INSTR_S)
- fprintf(ctx->f, ".s");
- if (instr->flags & IR3_INSTR_S2EN)
- fprintf(ctx->f, ".s2en");
- }
-}
-
-static void dump_reg_name(struct ir3_dump_ctx *ctx,
- struct ir3_register *reg, bool followssa)
-{
- if ((reg->flags & (IR3_REG_FABS | IR3_REG_SABS)) &&
- (reg->flags & (IR3_REG_FNEG | IR3_REG_SNEG | IR3_REG_BNOT)))
- fprintf(ctx->f, "(absneg)");
- else if (reg->flags & (IR3_REG_FNEG | IR3_REG_SNEG | IR3_REG_BNOT))
- fprintf(ctx->f, "(neg)");
- else if (reg->flags & (IR3_REG_FABS | IR3_REG_SABS))
- fprintf(ctx->f, "(abs)");
-
- if (reg->flags & IR3_REG_IMMED) {
- fprintf(ctx->f, "imm[%f,%d,0x%x]", reg->fim_val, reg->iim_val, reg->iim_val);
- } else if (reg->flags & IR3_REG_SSA) {
- if (ctx->verbose) {
- fprintf(ctx->f, "_");
- if (followssa) {
- fprintf(ctx->f, "[");
- dump_instr_name(ctx, reg->instr);
- fprintf(ctx->f, "]");
- }
- }
- } else if (reg->flags & IR3_REG_RELATIV) {
- if (reg->flags & IR3_REG_HALF)
- fprintf(ctx->f, "h");
- if (reg->flags & IR3_REG_CONST)
- fprintf(ctx->f, "c<a0.x + %u>", reg->num);
- else
- fprintf(ctx->f, "\x1b[0;31mr<a0.x + %u>\x1b[0m (%u)", reg->num, reg->size);
- } else {
- if (reg->flags & IR3_REG_HALF)
- fprintf(ctx->f, "h");
- if (reg->flags & IR3_REG_CONST)
- fprintf(ctx->f, "c%u.%c", reg_num(reg), "xyzw"[reg_comp(reg)]);
- else
- fprintf(ctx->f, "\x1b[0;31mr%u.%c\x1b[0m", reg_num(reg), "xyzw"[reg_comp(reg)]);
- }
-}
-
-static void ir3_instr_dump(struct ir3_dump_ctx *ctx,
- struct ir3_instruction *instr);
-static void ir3_block_dump(struct ir3_dump_ctx *ctx,
- struct ir3_block *block, const char *name);
-
-static void dump_instr(struct ir3_dump_ctx *ctx,
- struct ir3_instruction *instr)
-{
- /* if we've already visited this instruction, bail now: */
- if (ir3_instr_check_mark(instr))
- return;
-
- /* some meta-instructions need to be handled specially: */
- if (is_meta(instr)) {
- if ((instr->opc == OPC_META_FO) ||
- (instr->opc == OPC_META_FI)) {
- struct ir3_instruction *src;
- foreach_ssa_src(src, instr)
- dump_instr(ctx, src);
- } else if (instr->opc == OPC_META_FLOW) {
- struct ir3_register *reg = instr->regs[1];
- ir3_block_dump(ctx, instr->flow.if_block, "if");
- if (instr->flow.else_block)
- ir3_block_dump(ctx, instr->flow.else_block, "else");
- if (reg->flags & IR3_REG_SSA)
- dump_instr(ctx, reg->instr);
- } else if (instr->opc == OPC_META_PHI) {
- /* treat like a normal instruction: */
- ir3_instr_dump(ctx, instr);
- }
- } else {
- ir3_instr_dump(ctx, instr);
- }
-}
-
-/* arrarraggh! if link is to something outside of the current block, we
- * need to defer emitting the link until the end of the block, since the
- * edge triggers pre-creation of the node it links to inside the cluster,
- * even though it is meant to be outside..
- */
-static struct {
- char buf[40960];
- unsigned n;
-} edge_buf;
-
-/* helper to print or defer: */
-static void printdef(struct ir3_dump_ctx *ctx,
- bool defer, const char *fmt, ...)
-{
- va_list ap;
- va_start(ap, fmt);
- if (defer) {
- unsigned n = edge_buf.n;
- n += vsnprintf(&edge_buf.buf[n], sizeof(edge_buf.buf) - n,
- fmt, ap);
- edge_buf.n = n;
- } else {
- vfprintf(ctx->f, fmt, ap);
- }
- va_end(ap);
-}
-
-static void dump_link2(struct ir3_dump_ctx *ctx,
- struct ir3_instruction *instr, const char *target, bool defer)
-{
- /* some meta-instructions need to be handled specially: */
- if (is_meta(instr)) {
- if (instr->opc == OPC_META_INPUT) {
- printdef(ctx, defer, "input%lx:<in%u>:w -> %s",
- PTRID(instr->inout.block),
- instr->regs[0]->num, target);
- } else if (instr->opc == OPC_META_FO) {
- struct ir3_register *reg = instr->regs[1];
- dump_link2(ctx, reg->instr, target, defer);
- printdef(ctx, defer, "[label=\".%c\"]",
- "xyzw"[instr->fo.off & 0x3]);
- } else if (instr->opc == OPC_META_FI) {
- struct ir3_instruction *src;
-
- foreach_ssa_src_n(src, i, instr) {
- dump_link2(ctx, src, target, defer);
- printdef(ctx, defer, "[label=\".%c\"]",
- "xyzw"[i & 0x3]);
- }
- } else if (instr->opc == OPC_META_OUTPUT) {
- printdef(ctx, defer, "output%lx:<out%u>:w -> %s",
- PTRID(instr->inout.block),
- instr->regs[0]->num, target);
- } else if (instr->opc == OPC_META_PHI) {
- /* treat like a normal instruction: */
- printdef(ctx, defer, "instr%lx:<dst0> -> %s", PTRID(instr), target);
- }
- } else {
- printdef(ctx, defer, "instr%lx:<dst0> -> %s", PTRID(instr), target);
- }
-}
-
-static void dump_link(struct ir3_dump_ctx *ctx,
- struct ir3_instruction *instr,
- struct ir3_block *block, const char *target)
-{
- bool defer = instr->block != block;
- dump_link2(ctx, instr, target, defer);
- printdef(ctx, defer, "\n");
-}
-
-static struct ir3_register *follow_flow(struct ir3_register *reg)
-{
- if (reg->flags & IR3_REG_SSA) {
- struct ir3_instruction *instr = reg->instr;
- /* go with the flow.. */
- if (is_meta(instr) && (instr->opc == OPC_META_FLOW))
- return instr->regs[1];
- }
- return reg;
-}
-
-static void ir3_instr_dump(struct ir3_dump_ctx *ctx,
- struct ir3_instruction *instr)
-{
- struct ir3_register *src;
-
- fprintf(ctx->f, "instr%lx [shape=record,style=filled,fillcolor=lightgrey,label=\"{",
- PTRID(instr));
- dump_instr_name(ctx, instr);
-
- /* destination register: */
- fprintf(ctx->f, "|<dst0>");
-
- /* source register(s): */
- foreach_src_n(src, i, instr) {
- struct ir3_register *reg = follow_flow(src);
-
- fprintf(ctx->f, "|");
-
- if (reg->flags & IR3_REG_SSA)
- fprintf(ctx->f, "<src%u> ", i);
-
- dump_reg_name(ctx, reg, true);
- }
-
- fprintf(ctx->f, "}\"];\n");
-
- /* and recursively dump dependent instructions: */
- foreach_src_n(src, i, instr) {
- struct ir3_register *reg = follow_flow(src);
- char target[32]; /* link target */
-
- if (!(reg->flags & IR3_REG_SSA))
- continue;
-
- snprintf(target, sizeof(target), "instr%lx:<src%u>",
- PTRID(instr), i);
-
- dump_instr(ctx, reg->instr);
- dump_link(ctx, reg->instr, instr->block, target);
- }
-}
-
-static void ir3_block_dump(struct ir3_dump_ctx *ctx,
- struct ir3_block *block, const char *name)
-{
- unsigned i, n;
-
- n = edge_buf.n;
-
- fprintf(ctx->f, "subgraph cluster%lx {\n", PTRID(block));
- fprintf(ctx->f, "label=\"%s\";\n", name);
-
- /* draw inputs: */
- fprintf(ctx->f, "input%lx [shape=record,label=\"inputs", PTRID(block));
- for (i = 0; i < block->ninputs; i++)
- if (block->inputs[i])
- fprintf(ctx->f, "|<in%u> i%u.%c", i, (i >> 2), "xyzw"[i & 0x3]);
- fprintf(ctx->f, "\"];\n");
-
- /* draw instruction graph: */
- for (i = 0; i < block->noutputs; i++)
- if (block->outputs[i])
- dump_instr(ctx, block->outputs[i]);
-
- /* draw outputs: */
- fprintf(ctx->f, "output%lx [shape=record,label=\"outputs", PTRID(block));
- for (i = 0; i < block->noutputs; i++)
- fprintf(ctx->f, "|<out%u> o%u.%c", i, (i >> 2), "xyzw"[i & 0x3]);
- fprintf(ctx->f, "\"];\n");
-
- /* and links to outputs: */
- for (i = 0; i < block->noutputs; i++) {
- char target[32]; /* link target */
-
- /* NOTE: there could be outputs that are never assigned,
- * so skip them
- */
- if (!block->outputs[i])
- continue;
-
- snprintf(target, sizeof(target), "output%lx:<out%u>:e",
- PTRID(block), i);
-
- dump_link(ctx, block->outputs[i], block, target);
- }
-
- fprintf(ctx->f, "}\n");
-
- /* and links to inputs: */
- if (block->parent) {
- for (i = 0; i < block->ninputs; i++) {
- char target[32]; /* link target */
-
- if (!block->inputs[i])
- continue;
-
- dump_instr(ctx, block->inputs[i]);
-
- snprintf(target, sizeof(target), "input%lx:<in%u>:e",
- PTRID(block), i);
-
- dump_link(ctx, block->inputs[i], block, target);
- }
- }
-
- /* dump deferred edges: */
- if (edge_buf.n > n) {
- fprintf(ctx->f, "%*s", edge_buf.n - n, &edge_buf.buf[n]);
- edge_buf.n = n;
- }
-}
-
-void ir3_dump(struct ir3 *shader, const char *name,
- struct ir3_block *block /* XXX maybe 'block' ptr should move to ir3? */,
- FILE *f)
-{
- struct ir3_dump_ctx ctx = {
- .f = f,
- };
- ir3_clear_mark(shader);
- fprintf(ctx.f, "digraph G {\n");
- fprintf(ctx.f, "rankdir=RL;\n");
- fprintf(ctx.f, "nodesep=0.25;\n");
- fprintf(ctx.f, "ranksep=1.5;\n");
- ir3_block_dump(&ctx, block, name);
- fprintf(ctx.f, "}\n");
-}
-
-/*
- * For Debugging:
- */
-
-void
-ir3_dump_instr_single(struct ir3_instruction *instr)
-{
- struct ir3_dump_ctx ctx = {
- .f = stdout,
- .verbose = true,
- };
- unsigned i;
-
- dump_instr_name(&ctx, instr);
- for (i = 0; i < instr->regs_count; i++) {
- struct ir3_register *reg = instr->regs[i];
- printf(i ? ", " : " ");
- dump_reg_name(&ctx, reg, !!i);
- }
-
- if (instr->address) {
- fprintf(ctx.f, ", address=_");
- fprintf(ctx.f, "[");
- dump_instr_name(&ctx, instr->address);
- fprintf(ctx.f, "]");
- }
-
- if (instr->fanin) {
- fprintf(ctx.f, ", fanin=_");
- fprintf(ctx.f, "[");
- dump_instr_name(&ctx, instr->fanin);
- fprintf(ctx.f, "]");
- }
-
- if (is_meta(instr)) {
- if (instr->opc == OPC_META_FO) {
- printf(", off=%d", instr->fo.off);
- } else if ((instr->opc == OPC_META_FI) && instr->fi.aid) {
- printf(", aid=%d", instr->fi.aid);
- }
- }
-
- printf("\n");
-}
-
-void
-ir3_dump_instr_list(struct ir3_instruction *instr)
-{
- struct ir3_block *block = instr->block;
- unsigned n = 0;
-
- while (instr) {
- ir3_dump_instr_single(instr);
- if (!is_meta(instr))
- n++;
- instr = instr->next;
- }
- printf("%u instructions\n", n);
-
- for (n = 0; n < block->noutputs; n++) {
- if (!block->outputs[n])
- continue;
- printf("out%d: ", n);
- ir3_dump_instr_single(block->outputs[n]);
- }
-}
diff --git a/src/gallium/drivers/freedreno/ir3/ir3_flatten.c b/src/gallium/drivers/freedreno/ir3/ir3_flatten.c
deleted file mode 100644
index 419cd9dfcd4..00000000000
--- a/src/gallium/drivers/freedreno/ir3/ir3_flatten.c
+++ /dev/null
@@ -1,152 +0,0 @@
-/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
-
-/*
- * Copyright (C) 2014 Rob Clark <[email protected]>
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- *
- * Authors:
- * Rob Clark <[email protected]>
- */
-
-#include <stdarg.h>
-
-#include "ir3.h"
-
-/*
- * Flatten: flatten out legs of if/else, etc
- *
- * TODO probably should use some heuristic to decide to not flatten
- * if one side of the other is too large / deeply nested / whatever?
- */
-
-struct ir3_flatten_ctx {
- struct ir3_block *block;
- unsigned cnt;
-};
-
-static struct ir3_register *unwrap(struct ir3_register *reg)
-{
-
- if (reg->flags & IR3_REG_SSA) {
- struct ir3_instruction *instr = reg->instr;
- if (is_meta(instr)) {
- switch (instr->opc) {
- case OPC_META_OUTPUT:
- case OPC_META_FLOW:
- if (instr->regs_count > 1)
- return instr->regs[1];
- return NULL;
- default:
- break;
- }
- }
- }
- return reg;
-}
-
-static void ir3_instr_flatten(struct ir3_flatten_ctx *ctx,
- struct ir3_instruction *instr)
-{
- struct ir3_instruction *src;
-
- /* if we've already visited this instruction, bail now: */
- if (ir3_instr_check_mark(instr))
- return;
-
- instr->block = ctx->block;
-
- /* TODO: maybe some threshold to decide whether to
- * flatten or not??
- */
- if (is_meta(instr)) {
- if (instr->opc == OPC_META_PHI) {
- struct ir3_register *cond, *t, *f;
-
- cond = unwrap(instr->regs[1]);
- t = unwrap(instr->regs[2]); /* true val */
- f = unwrap(instr->regs[3]); /* false val */
-
- /* must have cond, but t or f may be null if only written
- * one one side of the if/else (in which case we can just
- * convert the PHI to a simple move).
- */
- assert(cond);
- assert(t || f);
-
- if (t && f) {
- /* convert the PHI instruction to sel.{b16,b32} */
- instr->category = 3;
-
- /* instruction type based on dst size: */
- if (instr->regs[0]->flags & IR3_REG_HALF)
- instr->opc = OPC_SEL_B16;
- else
- instr->opc = OPC_SEL_B32;
-
- instr->regs[1] = t;
- instr->regs[2] = cond;
- instr->regs[3] = f;
- } else {
- /* convert to simple mov: */
- instr->category = 1;
- instr->cat1.dst_type = TYPE_F32;
- instr->cat1.src_type = TYPE_F32;
- instr->regs_count = 2;
- instr->regs[1] = t ? t : f;
- }
-
- ctx->cnt++;
- } else if ((instr->opc == OPC_META_INPUT) &&
- (instr->regs_count == 2)) {
- type_t ftype;
-
- if (instr->regs[0]->flags & IR3_REG_HALF)
- ftype = TYPE_F16;
- else
- ftype = TYPE_F32;
-
- /* convert meta:input to mov: */
- instr->category = 1;
- instr->cat1.src_type = ftype;
- instr->cat1.dst_type = ftype;
- }
- }
-
- /* recursively visit children: */
- foreach_ssa_src(src, instr)
- ir3_instr_flatten(ctx, src);
-}
-
-/* return >= 0 is # of phi's flattened, < 0 is error */
-int ir3_block_flatten(struct ir3_block *block)
-{
- struct ir3_flatten_ctx ctx = {
- .block = block,
- };
- unsigned i;
-
- ir3_clear_mark(block->shader);
- for(i = 0; i < block->noutputs; i++)
- if (block->outputs[i])
- ir3_instr_flatten(&ctx, block->outputs[i]);
-
- return ctx.cnt;
-}
diff --git a/src/gallium/drivers/freedreno/ir3/ir3_group.c b/src/gallium/drivers/freedreno/ir3/ir3_group.c
index 782f6e87e56..70d9b08e019 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3_group.c
+++ b/src/gallium/drivers/freedreno/ir3/ir3_group.c
@@ -34,35 +34,6 @@
* Find/group instruction neighbors:
*/
-/* stop condition for iteration: */
-static bool check_stop(struct ir3_instruction *instr)
-{
- if (ir3_instr_check_mark(instr))
- return true;
-
- /* stay within the block.. don't try to operate across
- * basic block boundaries or we'll have problems when
- * dealing with multiple basic blocks:
- */
- if (is_meta(instr) && (instr->opc == OPC_META_INPUT))
- return true;
-
- return false;
-}
-
-static struct ir3_instruction * create_mov(struct ir3_instruction *instr)
-{
- struct ir3_instruction *mov;
-
- mov = ir3_instr_create(instr->block, 1, 0);
- mov->cat1.src_type = TYPE_F32;
- mov->cat1.dst_type = TYPE_F32;
- ir3_reg_create(mov, 0, 0); /* dst */
- ir3_reg_create(mov, 0, IR3_REG_SSA)->instr = instr;
-
- return mov;
-}
-
/* bleh.. we need to do the same group_n() thing for both inputs/outputs
* (where we have a simple instr[] array), and fanin nodes (where we have
* an extra indirection via reg->instr).
@@ -78,7 +49,8 @@ static struct ir3_instruction *arr_get(void *arr, int idx)
}
static void arr_insert_mov_out(void *arr, int idx, struct ir3_instruction *instr)
{
- ((struct ir3_instruction **)arr)[idx] = create_mov(instr);
+ ((struct ir3_instruction **)arr)[idx] =
+ ir3_MOV(instr->block, instr, TYPE_F32);
}
static void arr_insert_mov_in(void *arr, int idx, struct ir3_instruction *instr)
{
@@ -111,14 +83,17 @@ static struct ir3_instruction *instr_get(void *arr, int idx)
{
return ssa(((struct ir3_instruction *)arr)->regs[idx+1]);
}
-static void instr_insert_mov(void *arr, int idx, struct ir3_instruction *instr)
+static void
+instr_insert_mov(void *arr, int idx, struct ir3_instruction *instr)
{
- ((struct ir3_instruction *)arr)->regs[idx+1]->instr = create_mov(instr);
+ ((struct ir3_instruction *)arr)->regs[idx+1]->instr =
+ ir3_MOV(instr->block, instr, TYPE_F32);
}
static struct group_ops instr_ops = { instr_get, instr_insert_mov };
-static void group_n(struct group_ops *ops, void *arr, unsigned n)
+static void
+group_n(struct group_ops *ops, void *arr, unsigned n)
{
unsigned i, j;
@@ -141,6 +116,10 @@ restart:
conflict = conflicts(instr->cp.left, left) ||
conflicts(instr->cp.right, right);
+ /* RA can't yet deal very well w/ group'd phi's: */
+ if (is_meta(instr) && (instr->opc == OPC_META_PHI))
+ conflict = true;
+
/* we also can't have an instr twice in the group: */
for (j = i + 1; (j < n) && !conflict; j++)
if (ops->get(arr, j) == instr)
@@ -181,11 +160,12 @@ restart:
}
}
-static void instr_find_neighbors(struct ir3_instruction *instr)
+static void
+instr_find_neighbors(struct ir3_instruction *instr)
{
struct ir3_instruction *src;
- if (check_stop(instr))
+ if (ir3_instr_check_mark(instr))
return;
if (is_meta(instr) && (instr->opc == OPC_META_FI))
@@ -200,7 +180,8 @@ static void instr_find_neighbors(struct ir3_instruction *instr)
* we need to insert dummy/padding instruction for grouping, and
* then take it back out again before anyone notices.
*/
-static void pad_and_group_input(struct ir3_instruction **input, unsigned n)
+static void
+pad_and_group_input(struct ir3_instruction **input, unsigned n)
{
int i, mask = 0;
struct ir3_block *block = NULL;
@@ -210,8 +191,8 @@ static void pad_and_group_input(struct ir3_instruction **input, unsigned n)
if (instr) {
block = instr->block;
} else if (block) {
- instr = ir3_instr_create(block, 0, OPC_NOP);
- ir3_reg_create(instr, 0, IR3_REG_SSA); /* dst */
+ instr = ir3_NOP(block);
+ ir3_reg_create(instr, 0, IR3_REG_SSA); /* dummy dst */
input[i] = instr;
mask |= (1 << i);
}
@@ -225,42 +206,41 @@ static void pad_and_group_input(struct ir3_instruction **input, unsigned n)
}
}
-static void block_find_neighbors(struct ir3_block *block)
+static void
+find_neighbors(struct ir3 *ir)
{
unsigned i;
- for (i = 0; i < block->noutputs; i++) {
- if (block->outputs[i]) {
- struct ir3_instruction *instr = block->outputs[i];
- instr_find_neighbors(instr);
- }
- }
-
/* shader inputs/outputs themselves must be contiguous as well:
+ *
+ * NOTE: group inputs first, since we only insert mov's
+ * *before* the conflicted instr (and that would go badly
+ * for inputs). By doing inputs first, we should never
+ * have a conflict on inputs.. pushing any conflict to
+ * resolve to the outputs, for stuff like:
+ *
+ * MOV OUT[n], IN[m].wzyx
+ *
+ * NOTE: we assume here inputs/outputs are grouped in vec4.
+ * This logic won't quite cut it if we don't align smaller
+ * on vec4 boundaries
*/
- if (!block->parent) {
- /* NOTE: group inputs first, since we only insert mov's
- * *before* the conflicted instr (and that would go badly
- * for inputs). By doing inputs first, we should never
- * have a conflict on inputs.. pushing any conflict to
- * resolve to the outputs, for stuff like:
- *
- * MOV OUT[n], IN[m].wzyx
- *
- * NOTE: we assume here inputs/outputs are grouped in vec4.
- * This logic won't quite cut it if we don't align smaller
- * on vec4 boundaries
- */
- for (i = 0; i < block->ninputs; i += 4)
- pad_and_group_input(&block->inputs[i], 4);
- for (i = 0; i < block->noutputs; i += 4)
- group_n(&arr_ops_out, &block->outputs[i], 4);
-
+ for (i = 0; i < ir->ninputs; i += 4)
+ pad_and_group_input(&ir->inputs[i], 4);
+ for (i = 0; i < ir->noutputs; i += 4)
+ group_n(&arr_ops_out, &ir->outputs[i], 4);
+
+ for (i = 0; i < ir->noutputs; i++) {
+ if (ir->outputs[i]) {
+ struct ir3_instruction *instr = ir->outputs[i];
+ instr_find_neighbors(instr);
+ }
}
}
-void ir3_block_group(struct ir3_block *block)
+void
+ir3_group(struct ir3 *ir)
{
- ir3_clear_mark(block->shader);
- block_find_neighbors(block);
+ ir3_clear_mark(ir);
+ find_neighbors(ir);
}
diff --git a/src/gallium/drivers/freedreno/ir3/ir3_legalize.c b/src/gallium/drivers/freedreno/ir3/ir3_legalize.c
index 2455f7e4efc..f4a4223ae17 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3_legalize.c
+++ b/src/gallium/drivers/freedreno/ir3/ir3_legalize.c
@@ -26,7 +26,6 @@
* Rob Clark <[email protected]>
*/
-#include "pipe/p_shader_tokens.h"
#include "util/u_math.h"
#include "freedreno_util.h"
@@ -43,20 +42,31 @@
*/
struct ir3_legalize_ctx {
- struct ir3_block *block;
bool has_samp;
int max_bary;
};
-static void legalize(struct ir3_legalize_ctx *ctx)
+/* We want to evaluate each block from the position of any other
+ * predecessor block, in order that the flags set are the union
+ * of all possible program paths. For stopping condition, we
+ * want to stop when the pair of <pred-block, current-block> has
+ * been visited already.
+ *
+ * XXX is that completely true? We could have different needs_xyz
+ * flags set depending on path leading to pred-block.. we could
+ * do *most* of this based on chasing src instructions ptrs (and
+ * following all phi srcs).. except the write-after-read hazzard.
+ *
+ * For now we just set ss/sy flag on first instruction on block,
+ * and handle everything within the block as before.
+ */
+
+static void
+legalize_block(struct ir3_legalize_ctx *ctx, struct ir3_block *block)
{
- struct ir3_block *block = ctx->block;
- struct ir3_instruction *n;
- struct ir3 *shader = block->shader;
- struct ir3_instruction *end =
- ir3_instr_create(block, 0, OPC_END);
struct ir3_instruction *last_input = NULL;
struct ir3_instruction *last_rel = NULL;
+ struct list_head instr_list;
regmask_t needs_ss_war; /* write after read */
regmask_t needs_ss;
regmask_t needs_sy;
@@ -65,9 +75,13 @@ static void legalize(struct ir3_legalize_ctx *ctx)
regmask_init(&needs_ss);
regmask_init(&needs_sy);
- shader->instrs_count = 0;
+ /* remove all the instructions from the list, we'll be adding
+ * them back in as we go
+ */
+ list_replace(&block->instr_list, &instr_list);
+ list_inithead(&block->instr_list);
- for (n = block->head; n; n = n->next) {
+ list_for_each_entry_safe (struct ir3_instruction, n, &instr_list, node) {
struct ir3_register *reg;
unsigned i;
@@ -134,18 +148,18 @@ static void legalize(struct ir3_legalize_ctx *ctx)
*/
if ((n->flags & IR3_INSTR_SS) && (n->category >= 5)) {
struct ir3_instruction *nop;
- nop = ir3_instr_create(block, 0, OPC_NOP);
+ nop = ir3_NOP(block);
nop->flags |= IR3_INSTR_SS;
n->flags &= ~IR3_INSTR_SS;
}
/* need to be able to set (ss) on first instruction: */
- if ((shader->instrs_count == 0) && (n->category >= 5))
- ir3_instr_create(block, 0, OPC_NOP);
+ if (list_empty(&block->instr_list) && (n->category >= 5))
+ ir3_NOP(block);
- if (is_nop(n) && shader->instrs_count) {
- struct ir3_instruction *last =
- shader->instrs[shader->instrs_count-1];
+ if (is_nop(n) && !list_empty(&block->instr_list)) {
+ struct ir3_instruction *last = list_last_entry(&block->instr_list,
+ struct ir3_instruction, node);
if (is_nop(last) && (last->repeat < 5)) {
last->repeat++;
last->flags |= n->flags;
@@ -153,7 +167,7 @@ static void legalize(struct ir3_legalize_ctx *ctx)
}
}
- shader->instrs[shader->instrs_count++] = n;
+ list_addtail(&n->node, &block->instr_list);
if (is_sfu(n))
regmask_set(&needs_ss, n->regs[0]);
@@ -192,35 +206,20 @@ static void legalize(struct ir3_legalize_ctx *ctx)
* the (ei) flag:
*/
if (is_mem(last_input) && (last_input->opc == OPC_LDLV)) {
- int i, cnt;
-
- /* note that ir3_instr_create() inserts into
- * shader->instrs[] and increments the count..
- * so we need to bump up the cnt initially (to
- * avoid it clobbering the last real instr) and
- * restore it after.
- */
- cnt = ++shader->instrs_count;
+ struct ir3_instruction *baryf;
- /* inserting instructions would be a bit nicer if list.. */
- for (i = cnt - 2; i >= 0; i--) {
- if (shader->instrs[i] == last_input) {
+ /* (ss)bary.f (ei)r63.x, 0, r0.x */
+ baryf = ir3_instr_create(block, 2, OPC_BARY_F);
+ baryf->flags |= IR3_INSTR_SS;
+ ir3_reg_create(baryf, regid(63, 0), 0);
+ ir3_reg_create(baryf, 0, IR3_REG_IMMED)->iim_val = 0;
+ ir3_reg_create(baryf, regid(0, 0), 0);
- /* (ss)bary.f (ei)r63.x, 0, r0.x */
- last_input = ir3_instr_create(block, 2, OPC_BARY_F);
- last_input->flags |= IR3_INSTR_SS;
- ir3_reg_create(last_input, regid(63, 0), 0);
- ir3_reg_create(last_input, 0, IR3_REG_IMMED)->iim_val = 0;
- ir3_reg_create(last_input, regid(0, 0), 0);
+ /* insert the dummy bary.f after last_input: */
+ list_delinit(&baryf->node);
+ list_add(&baryf->node, &last_input->node);
- shader->instrs[i + 1] = last_input;
-
- break;
- }
- shader->instrs[i + 1] = shader->instrs[i];
- }
-
- shader->instrs_count = cnt;
+ last_input = baryf;
}
last_input->regs[0]->flags |= IR3_REG_EI;
}
@@ -228,21 +227,177 @@ static void legalize(struct ir3_legalize_ctx *ctx)
if (last_rel)
last_rel->flags |= IR3_INSTR_UL;
- shader->instrs[shader->instrs_count++] = end;
+ list_first_entry(&block->instr_list, struct ir3_instruction, node)
+ ->flags |= IR3_INSTR_SS | IR3_INSTR_SY;
+}
+
+/* NOTE: branch instructions are always the last instruction(s)
+ * in the block. We take advantage of this as we resolve the
+ * branches, since "if (foo) break;" constructs turn into
+ * something like:
+ *
+ * block3 {
+ * ...
+ * 0029:021: mov.s32s32 r62.x, r1.y
+ * 0082:022: br !p0.x, target=block5
+ * 0083:023: br p0.x, target=block4
+ * // succs: if _[0029:021: mov.s32s32] block4; else block5;
+ * }
+ * block4 {
+ * 0084:024: jump, target=block6
+ * // succs: block6;
+ * }
+ * block5 {
+ * 0085:025: jump, target=block7
+ * // succs: block7;
+ * }
+ *
+ * ie. only instruction in block4/block5 is a jump, so when
+ * resolving branches we can easily detect this by checking
+ * that the first instruction in the target block is itself
+ * a jump, and setup the br directly to the jump's target
+ * (and strip back out the now unreached jump)
+ *
+ * TODO sometimes we end up with things like:
+ *
+ * br !p0.x, #2
+ * br p0.x, #12
+ * add.u r0.y, r0.y, 1
+ *
+ * If we swapped the order of the branches, we could drop one.
+ */
+static struct ir3_block *
+resolve_dest_block(struct ir3_block *block)
+{
+ /* special case for last block: */
+ if (!block->successors[0])
+ return block;
+
+ /* NOTE that we may or may not have inserted the jump
+ * in the target block yet, so conditions to resolve
+ * the dest to the dest block's successor are:
+ *
+ * (1) successor[1] == NULL &&
+ * (2) (block-is-empty || only-instr-is-jump)
+ */
+ if (block->successors[1] == NULL) {
+ if (list_empty(&block->instr_list)) {
+ return block->successors[0];
+ } else if (list_length(&block->instr_list) == 1) {
+ struct ir3_instruction *instr = list_first_entry(
+ &block->instr_list, struct ir3_instruction, node);
+ if (is_flow(instr) && (instr->opc == OPC_JUMP))
+ return block->successors[0];
+ }
+ }
+ return block;
+}
+
+static bool
+resolve_jump(struct ir3_instruction *instr)
+{
+ struct ir3_block *tblock =
+ resolve_dest_block(instr->cat0.target);
+ struct ir3_instruction *target;
+
+ if (tblock != instr->cat0.target) {
+ list_delinit(&instr->cat0.target->node);
+ instr->cat0.target = tblock;
+ return true;
+ }
+
+ target = list_first_entry(&tblock->instr_list,
+ struct ir3_instruction, node);
+
+ if ((!target) || (target->ip == (instr->ip + 1))) {
+ list_delinit(&instr->node);
+ return true;
+ } else {
+ instr->cat0.immed =
+ (int)target->ip - (int)instr->ip;
+ }
+ return false;
+}
+
+/* resolve jumps, removing jumps/branches to immediately following
+ * instruction which we end up with from earlier stages. Since
+ * removing an instruction can invalidate earlier instruction's
+ * branch offsets, we need to do this iteratively until no more
+ * branches are removed.
+ */
+static bool
+resolve_jumps(struct ir3 *ir)
+{
+ list_for_each_entry (struct ir3_block, block, &ir->block_list, node)
+ list_for_each_entry (struct ir3_instruction, instr, &block->instr_list, node)
+ if (is_flow(instr) && instr->cat0.target)
+ if (resolve_jump(instr))
+ return true;
+
+ return false;
+}
- shader->instrs[0]->flags |= IR3_INSTR_SS | IR3_INSTR_SY;
+/* we want to mark points where divergent flow control re-converges
+ * with (jp) flags. For now, since we don't do any optimization for
+ * things that start out as a 'do {} while()', re-convergence points
+ * will always be a branch or jump target. Note that this is overly
+ * conservative, since unconditional jump targets are not convergence
+ * points, we are just assuming that the other path to reach the jump
+ * target was divergent. If we were clever enough to optimize the
+ * jump at end of a loop back to a conditional branch into a single
+ * conditional branch, ie. like:
+ *
+ * add.f r1.w, r0.x, (neg)(r)c2.x <= loop start
+ * mul.f r1.z, r1.z, r0.x
+ * mul.f r1.y, r1.y, r0.x
+ * mul.f r0.z, r1.x, r0.x
+ * mul.f r0.w, r0.y, r0.x
+ * cmps.f.ge r0.x, (r)c2.y, (r)r1.w
+ * add.s r0.x, (r)r0.x, (r)-1
+ * sel.f32 r0.x, (r)c3.y, (r)r0.x, c3.x
+ * cmps.f.eq p0.x, r0.x, c3.y
+ * mov.f32f32 r0.x, r1.w
+ * mov.f32f32 r0.y, r0.w
+ * mov.f32f32 r1.x, r0.z
+ * (rpt2)nop
+ * br !p0.x, #-13
+ * (jp)mul.f r0.x, c263.y, r1.y
+ *
+ * Then we'd have to be more clever, as the convergence point is no
+ * longer a branch or jump target.
+ */
+static void
+mark_convergence_points(struct ir3 *ir)
+{
+ list_for_each_entry (struct ir3_block, block, &ir->block_list, node) {
+ list_for_each_entry (struct ir3_instruction, instr, &block->instr_list, node) {
+ if (is_flow(instr) && instr->cat0.target) {
+ struct ir3_instruction *target =
+ list_first_entry(&instr->cat0.target->instr_list,
+ struct ir3_instruction, node);
+ target->flags |= IR3_INSTR_JP;
+ }
+ }
+ }
}
-void ir3_block_legalize(struct ir3_block *block,
- bool *has_samp, int *max_bary)
+void
+ir3_legalize(struct ir3 *ir, bool *has_samp, int *max_bary)
{
struct ir3_legalize_ctx ctx = {
- .block = block,
.max_bary = -1,
};
- legalize(&ctx);
+ list_for_each_entry (struct ir3_block, block, &ir->block_list, node) {
+ legalize_block(&ctx, block);
+ }
*has_samp = ctx.has_samp;
*max_bary = ctx.max_bary;
+
+ do {
+ ir3_count_instructions(ir);
+ } while(resolve_jumps(ir));
+
+ mark_convergence_points(ir);
}
diff --git a/src/gallium/drivers/freedreno/ir3/ir3_nir_lower_if_else.c b/src/gallium/drivers/freedreno/ir3/ir3_nir_lower_if_else.c
index ae36019ed5f..dc9e4626f27 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3_nir_lower_if_else.c
+++ b/src/gallium/drivers/freedreno/ir3/ir3_nir_lower_if_else.c
@@ -74,14 +74,13 @@ valid_dest(nir_block *block, nir_dest *dest)
* (so this is run iteratively in a loop). Therefore if
* we get this far, it should not have any if_uses:
*/
- assert(dest->ssa.if_uses->entries == 0);
+ assert(list_empty(&dest->ssa.if_uses));
/* The only uses of this definition must be phi's in the
* successor or in the current block
*/
- struct set_entry *entry;
- set_foreach(dest->ssa.uses, entry) {
- const nir_instr *dest_instr = entry->key;
+ nir_foreach_use(&dest->ssa, use) {
+ nir_instr *dest_instr = use->parent_instr;
if (dest_instr->block == block)
continue;
if ((dest_instr->type == nir_instr_type_phi) &&
diff --git a/src/gallium/drivers/freedreno/ir3/ir3_print.c b/src/gallium/drivers/freedreno/ir3/ir3_print.c
new file mode 100644
index 00000000000..f377982dd5e
--- /dev/null
+++ b/src/gallium/drivers/freedreno/ir3/ir3_print.c
@@ -0,0 +1,237 @@
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+
+/*
+ * Copyright (C) 2014 Rob Clark <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ * Rob Clark <[email protected]>
+ */
+
+#include <stdarg.h>
+#include <stdio.h>
+
+#include "ir3.h"
+
+#define PTRID(x) ((unsigned long)(x))
+
+static void print_instr_name(struct ir3_instruction *instr)
+{
+#ifdef DEBUG
+ printf("%04u:", instr->serialno);
+#endif
+ printf("%03u: ", instr->depth);
+
+ if (instr->flags & IR3_INSTR_SY)
+ printf("(sy)");
+ if (instr->flags & IR3_INSTR_SS)
+ printf("(ss)");
+
+ if (is_meta(instr)) {
+ switch(instr->opc) {
+ case OPC_META_PHI:
+ printf("&#934;");
+ break;
+ default:
+ /* shouldn't hit here.. just for debugging: */
+ switch (instr->opc) {
+ case OPC_META_INPUT: printf("_meta:in"); break;
+ case OPC_META_FO: printf("_meta:fo"); break;
+ case OPC_META_FI: printf("_meta:fi"); break;
+
+ default: printf("_meta:%d", instr->opc); break;
+ }
+ break;
+ }
+ } else if (instr->category == 1) {
+ static const char *type[] = {
+ [TYPE_F16] = "f16",
+ [TYPE_F32] = "f32",
+ [TYPE_U16] = "u16",
+ [TYPE_U32] = "u32",
+ [TYPE_S16] = "s16",
+ [TYPE_S32] = "s32",
+ [TYPE_U8] = "u8",
+ [TYPE_S8] = "s8",
+ };
+ if (instr->cat1.src_type == instr->cat1.dst_type)
+ printf("mov");
+ else
+ printf("cov");
+ printf(".%s%s", type[instr->cat1.src_type], type[instr->cat1.dst_type]);
+ } else {
+ printf("%s", ir3_instr_name(instr));
+ if (instr->flags & IR3_INSTR_3D)
+ printf(".3d");
+ if (instr->flags & IR3_INSTR_A)
+ printf(".a");
+ if (instr->flags & IR3_INSTR_O)
+ printf(".o");
+ if (instr->flags & IR3_INSTR_P)
+ printf(".p");
+ if (instr->flags & IR3_INSTR_S)
+ printf(".s");
+ if (instr->flags & IR3_INSTR_S2EN)
+ printf(".s2en");
+ }
+}
+
+static void print_reg_name(struct ir3_register *reg, bool followssa)
+{
+ if ((reg->flags & (IR3_REG_FABS | IR3_REG_SABS)) &&
+ (reg->flags & (IR3_REG_FNEG | IR3_REG_SNEG | IR3_REG_BNOT)))
+ printf("(absneg)");
+ else if (reg->flags & (IR3_REG_FNEG | IR3_REG_SNEG | IR3_REG_BNOT))
+ printf("(neg)");
+ else if (reg->flags & (IR3_REG_FABS | IR3_REG_SABS))
+ printf("(abs)");
+
+ if (reg->flags & IR3_REG_IMMED) {
+ printf("imm[%f,%d,0x%x]", reg->fim_val, reg->iim_val, reg->iim_val);
+ } else if (reg->flags & IR3_REG_SSA) {
+ printf("_");
+ if (followssa) {
+ printf("[");
+ print_instr_name(reg->instr);
+ printf("]");
+ }
+ } else if (reg->flags & IR3_REG_RELATIV) {
+ if (reg->flags & IR3_REG_HALF)
+ printf("h");
+ if (reg->flags & IR3_REG_CONST)
+ printf("c<a0.x + %u>", reg->num);
+ else
+ printf("\x1b[0;31mr<a0.x + %u>\x1b[0m (%u)", reg->num, reg->size);
+ } else {
+ if (reg->flags & IR3_REG_HALF)
+ printf("h");
+ if (reg->flags & IR3_REG_CONST)
+ printf("c%u.%c", reg_num(reg), "xyzw"[reg_comp(reg)]);
+ else
+ printf("\x1b[0;31mr%u.%c\x1b[0m", reg_num(reg), "xyzw"[reg_comp(reg)]);
+ }
+}
+
+static void
+tab(int lvl)
+{
+ for (int i = 0; i < lvl; i++)
+ printf("\t");
+}
+
+static uint32_t
+block_id(struct ir3_block *block)
+{
+#ifdef DEBUG
+ return block->serialno;
+#else
+ return (uint32_t)(uint64_t)block;
+#endif
+}
+
+static void
+print_instr(struct ir3_instruction *instr, int lvl)
+{
+ unsigned i;
+
+ tab(lvl);
+
+ print_instr_name(instr);
+ for (i = 0; i < instr->regs_count; i++) {
+ struct ir3_register *reg = instr->regs[i];
+ printf(i ? ", " : " ");
+ print_reg_name(reg, !!i);
+ }
+
+ if (instr->address) {
+ printf(", address=_");
+ printf("[");
+ print_instr_name(instr->address);
+ printf("]");
+ }
+
+ if (instr->fanin) {
+ printf(", fanin=_");
+ printf("[");
+ print_instr_name(instr->fanin);
+ printf("]");
+ }
+
+ if (is_meta(instr)) {
+ if (instr->opc == OPC_META_FO) {
+ printf(", off=%d", instr->fo.off);
+ } else if ((instr->opc == OPC_META_FI) && instr->fi.aid) {
+ printf(", aid=%d", instr->fi.aid);
+ }
+ }
+
+ if (is_flow(instr) && instr->cat0.target) {
+ /* the predicate register src is implied: */
+ if (instr->opc == OPC_BR) {
+ printf(" %sp0.x", instr->cat0.inv ? "!" : "");
+ }
+ printf(", target=block%u", block_id(instr->cat0.target));
+ }
+
+ printf("\n");
+}
+
+void ir3_print_instr(struct ir3_instruction *instr)
+{
+ print_instr(instr, 0);
+}
+
+static void
+print_block(struct ir3_block *block, int lvl)
+{
+ tab(lvl); printf("block%u {\n", block_id(block));
+ list_for_each_entry (struct ir3_instruction, instr, &block->instr_list, node) {
+ print_instr(instr, lvl+1);
+ }
+ if (block->successors[1]) {
+ /* leading into if/else: */
+ tab(lvl+1);
+ printf("/* succs: if _[");
+ print_instr_name(block->condition);
+ printf("] block%u; else block%u; */\n",
+ block_id(block->successors[0]),
+ block_id(block->successors[1]));
+ } else if (block->successors[0]) {
+ tab(lvl+1);
+ printf("/* succs: block%u; */\n",
+ block_id(block->successors[0]));
+ }
+ tab(lvl); printf("}\n");
+}
+
+void
+ir3_print(struct ir3 *ir)
+{
+ list_for_each_entry (struct ir3_block, block, &ir->block_list, node)
+ print_block(block, 0);
+
+ for (unsigned i = 0; i < ir->noutputs; i++) {
+ if (!ir->outputs[i])
+ continue;
+ printf("out%d: ", i);
+ print_instr(ir->outputs[i], 0);
+ }
+}
diff --git a/src/gallium/drivers/freedreno/ir3/ir3_ra.c b/src/gallium/drivers/freedreno/ir3/ir3_ra.c
index a4235a77a15..e5aba859fab 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3_ra.c
+++ b/src/gallium/drivers/freedreno/ir3/ir3_ra.c
@@ -26,284 +26,702 @@
* Rob Clark <[email protected]>
*/
-#include "pipe/p_shader_tokens.h"
#include "util/u_math.h"
+#include "util/register_allocate.h"
+#include "util/ralloc.h"
+#include "util/bitset.h"
#include "ir3.h"
+#include "ir3_compiler.h"
/*
* Register Assignment:
*
- * NOTE: currently only works on a single basic block.. need to think
- * about how multiple basic blocks are going to get scheduled. But
- * I think I want to re-arrange how blocks work, ie. get rid of the
- * block nesting thing..
+ * Uses the register_allocate util, which implements graph coloring
+ * algo with interference classes. To handle the cases where we need
+ * consecutive registers (for example, texture sample instructions),
+ * we model these as larger (double/quad/etc) registers which conflict
+ * with the corresponding registers in other classes.
*
- * NOTE: we could do register coalescing (eliminate moves) as part of
- * the RA step.. OTOH I think we need to do scheduling before register
- * assignment. And if we remove a mov that effects scheduling (unless
- * we leave a placeholder nop, which seems lame), so I'm not really
- * sure how practical this is to do both in a single stage. But OTOH
- * I'm not really sure a sane way for the CP stage to realize when it
- * cannot remove a mov due to multi-register constraints..
+ * Additionally we create additional classes for half-regs, which
+ * do not conflict with the full-reg classes. We do need at least
+ * sizes 1-4 (to deal w/ texture sample instructions output to half-
+ * reg). At the moment we don't create the higher order half-reg
+ * classes as half-reg frequently does not have enough precision
+ * for texture coords at higher resolutions.
*
- * NOTE: http://scopesconf.org/scopes-01/paper/session1_2.ps.gz has
- * some ideas to handle array allocation with a more conventional
- * graph coloring algorithm for register assignment, which might be
- * a good alternative to the current algo. However afaict it cannot
- * handle overlapping arrays, which is a scenario that we have to
- * deal with
+ * There are some additional cases that we need to handle specially,
+ * as the graph coloring algo doesn't understand "partial writes".
+ * For example, a sequence like:
+ *
+ * add r0.z, ...
+ * sam (f32)(xy)r0.x, ...
+ * ...
+ * sam (f32)(xyzw)r0.w, r0.x, ... ; 3d texture, so r0.xyz are coord
+ *
+ * In this scenario, we treat r0.xyz as class size 3, which is written
+ * (from a use/def perspective) at the 'add' instruction and ignore the
+ * subsequent partial writes to r0.xy. So the 'add r0.z, ...' is the
+ * defining instruction, as it is the first to partially write r0.xyz.
+ *
+ * Note i965 has a similar scenario, which they solve with a virtual
+ * LOAD_PAYLOAD instruction which gets turned into multiple MOV's after
+ * register assignment. But for us that is horrible from a scheduling
+ * standpoint. Instead what we do is use idea of 'definer' instruction.
+ * Ie. the first instruction (lowest ip) to write to the array is the
+ * one we consider from use/def perspective when building interference
+ * graph. (Other instructions which write other array elements just
+ * define the variable some more.)
+ */
+
+static const unsigned class_sizes[] = {
+ 1, 2, 3, 4,
+ 4 + 4, /* txd + 1d/2d */
+ 4 + 6, /* txd + 3d */
+ /* temporary: until we can assign arrays, create classes so we
+ * can round up array to fit. NOTE with tgsi arrays should
+ * really all be multiples of four:
+ */
+ 4 * 4,
+ 4 * 8,
+ 4 * 16,
+ 4 * 32,
+
+};
+#define class_count ARRAY_SIZE(class_sizes)
+
+static const unsigned half_class_sizes[] = {
+ 1, 2, 3, 4,
+};
+#define half_class_count ARRAY_SIZE(half_class_sizes)
+#define total_class_count (class_count + half_class_count)
+
+/* Below a0.x are normal regs. RA doesn't need to assign a0.x/p0.x. */
+#define NUM_REGS (4 * (REG_A0 - 1))
+/* Number of virtual regs in a given class: */
+#define CLASS_REGS(i) (NUM_REGS - (class_sizes[i] - 1))
+#define HALF_CLASS_REGS(i) (NUM_REGS - (half_class_sizes[i] - 1))
+
+/* register-set, created one time, used for all shaders: */
+struct ir3_ra_reg_set {
+ struct ra_regs *regs;
+ unsigned int classes[class_count];
+ unsigned int half_classes[half_class_count];
+ /* maps flat virtual register space to base gpr: */
+ uint16_t *ra_reg_to_gpr;
+ /* maps cls,gpr to flat virtual register space: */
+ uint16_t **gpr_to_ra_reg;
+};
+
+/* One-time setup of RA register-set, which describes all the possible
+ * "virtual" registers and their interferences. Ie. double register
+ * occupies (and conflicts with) two single registers, and so forth.
+ * Since registers do not need to be aligned to their class size, they
+ * can conflict with other registers in the same class too. Ie:
+ *
+ * Single (base) | Double
+ * --------------+---------------
+ * R0 | D0
+ * R1 | D0 D1
+ * R2 | D1 D2
+ * R3 | D2
+ * .. and so on..
+ *
+ * (NOTE the disassembler uses notation like r0.x/y/z/w but those are
+ * really just four scalar registers. Don't let that confuse you.)
*/
+struct ir3_ra_reg_set *
+ir3_ra_alloc_reg_set(void *memctx)
+{
+ struct ir3_ra_reg_set *set = rzalloc(memctx, struct ir3_ra_reg_set);
+ unsigned ra_reg_count, reg, first_half_reg;
+ unsigned int **q_values;
+
+ /* calculate # of regs across all classes: */
+ ra_reg_count = 0;
+ for (unsigned i = 0; i < class_count; i++)
+ ra_reg_count += CLASS_REGS(i);
+ for (unsigned i = 0; i < half_class_count; i++)
+ ra_reg_count += HALF_CLASS_REGS(i);
+
+ /* allocate and populate q_values: */
+ q_values = ralloc_array(set, unsigned *, total_class_count);
+ for (unsigned i = 0; i < class_count; i++) {
+ q_values[i] = rzalloc_array(q_values, unsigned, total_class_count);
+
+ /* From register_allocate.c:
+ *
+ * q(B,C) (indexed by C, B is this register class) in
+ * Runeson/Nyström paper. This is "how many registers of B could
+ * the worst choice register from C conflict with".
+ *
+ * If we just let the register allocation algorithm compute these
+ * values, is extremely expensive. However, since all of our
+ * registers are laid out, we can very easily compute them
+ * ourselves. View the register from C as fixed starting at GRF n
+ * somewhere in the middle, and the register from B as sliding back
+ * and forth. Then the first register to conflict from B is the
+ * one starting at n - class_size[B] + 1 and the last register to
+ * conflict will start at n + class_size[B] - 1. Therefore, the
+ * number of conflicts from B is class_size[B] + class_size[C] - 1.
+ *
+ * +-+-+-+-+-+-+ +-+-+-+-+-+-+
+ * B | | | | | |n| --> | | | | | | |
+ * +-+-+-+-+-+-+ +-+-+-+-+-+-+
+ * +-+-+-+-+-+
+ * C |n| | | | |
+ * +-+-+-+-+-+
+ *
+ * (Idea copied from brw_fs_reg_allocate.cpp)
+ */
+ for (unsigned j = 0; j < class_count; j++)
+ q_values[i][j] = class_sizes[i] + class_sizes[j] - 1;
+ }
+
+ for (unsigned i = class_count; i < total_class_count; i++) {
+ q_values[i] = ralloc_array(q_values, unsigned, total_class_count);
+
+ /* see comment above: */
+ for (unsigned j = class_count; j < total_class_count; j++) {
+ q_values[i][j] = half_class_sizes[i - class_count] +
+ half_class_sizes[j - class_count] - 1;
+ }
+ }
+ /* allocate the reg-set.. */
+ set->regs = ra_alloc_reg_set(set, ra_reg_count);
+ set->ra_reg_to_gpr = ralloc_array(set, uint16_t, ra_reg_count);
+ set->gpr_to_ra_reg = ralloc_array(set, uint16_t *, total_class_count);
+
+ /* .. and classes */
+ reg = 0;
+ for (unsigned i = 0; i < class_count; i++) {
+ set->classes[i] = ra_alloc_reg_class(set->regs);
+
+ set->gpr_to_ra_reg[i] = ralloc_array(set, uint16_t, CLASS_REGS(i));
+
+ for (unsigned j = 0; j < CLASS_REGS(i); j++) {
+ ra_class_add_reg(set->regs, set->classes[i], reg);
+
+ set->ra_reg_to_gpr[reg] = j;
+ set->gpr_to_ra_reg[i][j] = reg;
+
+ for (unsigned br = j; br < j + class_sizes[i]; br++)
+ ra_add_transitive_reg_conflict(set->regs, br, reg);
+
+ reg++;
+ }
+ }
+
+ first_half_reg = reg;
+
+ for (unsigned i = 0; i < half_class_count; i++) {
+ set->half_classes[i] = ra_alloc_reg_class(set->regs);
+
+ set->gpr_to_ra_reg[class_count + i] =
+ ralloc_array(set, uint16_t, CLASS_REGS(i));
+
+ for (unsigned j = 0; j < HALF_CLASS_REGS(i); j++) {
+ ra_class_add_reg(set->regs, set->half_classes[i], reg);
+
+ set->ra_reg_to_gpr[reg] = j;
+ set->gpr_to_ra_reg[class_count + i][j] = reg;
+
+ for (unsigned br = j; br < j + half_class_sizes[i]; br++)
+ ra_add_transitive_reg_conflict(set->regs, br + first_half_reg, reg);
+
+ reg++;
+ }
+ }
+
+ ra_set_finalize(set->regs, q_values);
+
+ ralloc_free(q_values);
+
+ return set;
+}
+
+/* register-assign context, per-shader */
struct ir3_ra_ctx {
- struct ir3_block *block;
+ struct ir3 *ir;
enum shader_t type;
- bool frag_coord;
bool frag_face;
- int cnt;
- bool error;
- struct {
- unsigned base;
- unsigned size;
- } arrays[MAX_ARRAYS];
+
+ struct ir3_ra_reg_set *set;
+ struct ra_graph *g;
+ unsigned alloc_count;
+ unsigned class_alloc_count[total_class_count];
+ unsigned class_base[total_class_count];
+ unsigned instr_cnt;
+ unsigned *def, *use; /* def/use table */
};
-#ifdef DEBUG
-# include "freedreno_util.h"
-# define ra_debug (fd_mesa_debug & FD_DBG_OPTMSGS)
-#else
-# define ra_debug 0
-#endif
-
-#define ra_dump_list(msg, n) do { \
- if (ra_debug) { \
- debug_printf("-- " msg); \
- ir3_dump_instr_list(n); \
- } \
- } while (0)
-
-#define ra_dump_instr(msg, n) do { \
- if (ra_debug) { \
- debug_printf(">> " msg); \
- ir3_dump_instr_single(n); \
- } \
- } while (0)
-
-#define ra_assert(ctx, x) do { \
- debug_assert(x); \
- if (!(x)) { \
- debug_printf("RA: failed assert: %s\n", #x); \
- (ctx)->error = true; \
- }; \
- } while (0)
-
-
-/* sorta ugly way to retrofit half-precision support.. rather than
- * passing extra param around, just OR in a high bit. All the low
- * value arithmetic (ie. +/- offset within a contiguous vec4, etc)
- * will continue to work as long as you don't underflow (and that
- * would go badly anyways).
- */
-#define REG_HALF 0x8000
+/* additional block-data (per-block) */
+struct ir3_ra_block_data {
+ BITSET_WORD *def; /* variables defined before used in block */
+ BITSET_WORD *use; /* variables used before defined in block */
+ BITSET_WORD *livein; /* which defs reach entry point of block */
+ BITSET_WORD *liveout; /* which defs reach exit point of block */
+};
+
+static bool
+is_half(struct ir3_instruction *instr)
+{
+ return !!(instr->regs[0]->flags & IR3_REG_HALF);
+}
-#define REG(n, wm, f) (struct ir3_register){ \
- .flags = (f), \
- .num = (n), \
- .wrmask = TGSI_WRITEMASK_ ## wm, \
+static int
+size_to_class(unsigned sz, bool half)
+{
+ if (half) {
+ for (unsigned i = 0; i < half_class_count; i++)
+ if (half_class_sizes[i] >= sz)
+ return i + class_count;
+ } else {
+ for (unsigned i = 0; i < class_count; i++)
+ if (class_sizes[i] >= sz)
+ return i;
}
+ debug_assert(0);
+ return -1;
+}
-/* check that the register exists, is a GPR and is not special (a0/p0) */
-static struct ir3_register * reg_check(struct ir3_instruction *instr, unsigned n)
+static bool
+is_temp(struct ir3_register *reg)
{
- if ((n < instr->regs_count) && reg_gpr(instr->regs[n]) &&
- !(instr->regs[n]->flags & IR3_REG_SSA))
- return instr->regs[n];
- return NULL;
+ if (reg->flags & (IR3_REG_CONST | IR3_REG_IMMED))
+ return false;
+ if (reg->flags & IR3_REG_RELATIV) // TODO
+ return false;
+ if ((reg->num == regid(REG_A0, 0)) ||
+ (reg->num == regid(REG_P0, 0)))
+ return false;
+ return true;
}
-/* figure out if an unassigned src register points back to the instr we
- * are assigning:
- */
-static bool instr_used_by(struct ir3_instruction *instr,
- struct ir3_register *src)
+static bool
+writes_gpr(struct ir3_instruction *instr)
{
- struct ir3_instruction *src_instr = ssa(src);
- unsigned i;
- if (instr == src_instr)
- return true;
- if (src_instr && is_meta(src_instr))
- for (i = 1; i < src_instr->regs_count; i++)
- if (instr_used_by(instr, src_instr->regs[i]))
- return true;
-
- return false;
+ if (is_store(instr))
+ return false;
+ /* is dest a normal temp register: */
+ return is_temp(instr->regs[0]);
}
-static bool instr_is_output(struct ir3_instruction *instr)
+static struct ir3_instruction *
+get_definer(struct ir3_instruction *instr, int *sz, int *off)
{
- struct ir3_block *block = instr->block;
- unsigned i;
+ struct ir3_instruction *d = NULL;
+ if (is_meta(instr) && (instr->opc == OPC_META_FI)) {
+ /* What about the case where collect is subset of array, we
+ * need to find the distance between where actual array starts
+ * and fanin.. that probably doesn't happen currently.
+ */
+ struct ir3_register *src;
- for (i = 0; i < block->noutputs; i++)
- if (instr == block->outputs[i])
- return true;
+ /* note: don't use foreach_ssa_src as this gets called once
+ * while assigning regs (which clears SSA flag)
+ */
+ foreach_src(src, instr) {
+ if (!src->instr)
+ continue;
+ if ((!d) || (src->instr->ip < d->ip))
+ d = src->instr;
+ }
- return false;
-}
+ *sz = instr->regs_count - 1;
+ *off = 0;
-static void mark_sources(struct ir3_instruction *instr,
- struct ir3_instruction *n, regmask_t *liveregs, regmask_t *written)
-{
- unsigned i;
+ } else if (instr->cp.right || instr->cp.left) {
+ /* covers also the meta:fo case, which ends up w/ single
+ * scalar instructions for each component:
+ */
+ struct ir3_instruction *f = ir3_neighbor_first(instr);
+
+ /* by definition, the entire sequence forms one linked list
+ * of single scalar register nodes (even if some of them may
+ * be fanouts from a texture sample (for example) instr. We
+ * just need to walk the list finding the first element of
+ * the group defined (lowest ip)
+ */
+ int cnt = 0;
+
+ d = f;
+ while (f) {
+ if (f->ip < d->ip)
+ d = f;
+ if (f == instr)
+ *off = cnt;
+ f = f->cp.right;
+ cnt++;
+ }
+
+ *sz = cnt;
+
+ } else {
+ /* second case is looking directly at the instruction which
+ * produces multiple values (eg, texture sample), rather
+ * than the fanout nodes that point back to that instruction.
+ * This isn't quite right, because it may be part of a larger
+ * group, such as:
+ *
+ * sam (f32)(xyzw)r0.x, ...
+ * add r1.x, ...
+ * add r1.y, ...
+ * sam (f32)(xyzw)r2.x, r0.w <-- (r0.w, r1.x, r1.y)
+ *
+ * need to come up with a better way to handle that case.
+ */
+ if (instr->address) {
+ *sz = instr->regs[0]->size;
+ } else {
+ *sz = util_last_bit(instr->regs[0]->wrmask);
+ }
+ *off = 0;
+ d = instr;
+ }
+
+ if (d->regs[0]->flags & IR3_REG_PHI_SRC) {
+ struct ir3_instruction *phi = d->regs[0]->instr;
+ struct ir3_instruction *dd;
+ int dsz, doff;
+
+ dd = get_definer(phi, &dsz, &doff);
+
+ *sz = MAX2(*sz, dsz);
+ *off = doff;
+
+ if (dd->ip < d->ip) {
+ d = dd;
+ }
+ }
- for (i = 1; i < n->regs_count; i++) {
- struct ir3_register *r = reg_check(n, i);
- if (r)
- regmask_set_if_not(liveregs, r, written);
+ if (is_meta(d) && (d->opc == OPC_META_PHI)) {
+ /* we have already inserted parallel-copies into
+ * the phi, so we don't need to chase definers
+ */
+ struct ir3_register *src;
- /* if any src points back to the instruction(s) in
- * the block of neighbors that we are assigning then
- * mark any written (clobbered) registers as live:
+ /* note: don't use foreach_ssa_src as this gets called once
+ * while assigning regs (which clears SSA flag)
*/
- if (instr_used_by(instr, n->regs[i]))
- regmask_or(liveregs, liveregs, written);
+ foreach_src(src, d) {
+ if (!src->instr)
+ continue;
+ if (src->instr->ip < d->ip)
+ d = src->instr;
+ }
}
+ if (is_meta(d) && (d->opc == OPC_META_FO)) {
+ struct ir3_instruction *dd;
+ int dsz, doff;
+
+ dd = get_definer(d->regs[1]->instr, &dsz, &doff);
+
+ /* by definition, should come before: */
+ debug_assert(dd->ip < d->ip);
+
+ *sz = MAX2(*sz, dsz);
+
+ /* Fanout's are grouped, so *off should already valid */
+
+ d = dd;
+ }
+
+ return d;
}
-/* live means read before written */
-static void compute_liveregs(struct ir3_ra_ctx *ctx,
- struct ir3_instruction *instr, regmask_t *liveregs)
+/* give each instruction a name (and ip), and count up the # of names
+ * of each class
+ */
+static void
+ra_block_name_instructions(struct ir3_ra_ctx *ctx, struct ir3_block *block)
{
- struct ir3_block *block = instr->block;
- struct ir3_instruction *n;
- regmask_t written;
- unsigned i;
+ list_for_each_entry (struct ir3_instruction, instr, &block->instr_list, node) {
+ struct ir3_instruction *defn;
+ int cls, sz, off;
- regmask_init(&written);
+ ctx->instr_cnt++;
- for (n = instr->next; n; n = n->next) {
- struct ir3_register *r;
-
- if (is_meta(n))
+ if (instr->regs_count == 0)
continue;
- /* check first src's read: */
- mark_sources(instr, n, liveregs, &written);
+ if (!writes_gpr(instr))
+ continue;
- /* for instructions that write to an array, we need to
- * capture the dependency on the array elements:
- */
- if (n->fanin)
- mark_sources(instr, n->fanin, liveregs, &written);
+ defn = get_definer(instr, &sz, &off);
- /* meta-instructions don't actually get scheduled,
- * so don't let it's write confuse us.. what we
- * really care about is when the src to the meta
- * instr was written:
- */
- if (is_meta(n))
+ if (defn != instr)
continue;
- /* then dst written (if assigned already): */
- r = reg_check(n, 0);
- if (r) {
- /* if an instruction *is* an output, then it is live */
- if (!instr_is_output(n))
- regmask_set(&written, r);
+ /* arrays which don't fit in one of the pre-defined class
+ * sizes are pre-colored:
+ *
+ * TODO but we still need to allocate names for them, don't we??
+ */
+ cls = size_to_class(sz, is_half(defn));
+ if (cls >= 0) {
+ instr->name = ctx->class_alloc_count[cls]++;
+ ctx->alloc_count++;
}
-
}
+}
- /* be sure to account for output registers too: */
- for (i = 0; i < block->noutputs; i++) {
- struct ir3_register *r;
- if (!block->outputs[i])
- continue;
- r = reg_check(block->outputs[i], 0);
- if (r)
- regmask_set_if_not(liveregs, r, &written);
+static void
+ra_init(struct ir3_ra_ctx *ctx)
+{
+ ir3_clear_mark(ctx->ir);
+ ir3_count_instructions(ctx->ir);
+
+ list_for_each_entry (struct ir3_block, block, &ctx->ir->block_list, node) {
+ ra_block_name_instructions(ctx, block);
}
- /* if instruction is output, we need a reg that isn't written
- * before the end.. equiv to the instr_used_by() check above
- * in the loop body
- * TODO maybe should follow fanin/fanout?
+ /* figure out the base register name for each class. The
+ * actual ra name is class_base[cls] + instr->name;
*/
- if (instr_is_output(instr))
- regmask_or(liveregs, liveregs, &written);
+ ctx->class_base[0] = 0;
+ for (unsigned i = 1; i < total_class_count; i++) {
+ ctx->class_base[i] = ctx->class_base[i-1] +
+ ctx->class_alloc_count[i-1];
+ }
+
+ ctx->g = ra_alloc_interference_graph(ctx->set->regs, ctx->alloc_count);
+ ctx->def = rzalloc_array(ctx->g, unsigned, ctx->alloc_count);
+ ctx->use = rzalloc_array(ctx->g, unsigned, ctx->alloc_count);
+}
+
+static unsigned
+ra_name(struct ir3_ra_ctx *ctx, int cls, struct ir3_instruction *defn)
+{
+ unsigned name;
+ debug_assert(cls >= 0);
+ name = ctx->class_base[cls] + defn->name;
+ debug_assert(name < ctx->alloc_count);
+ return name;
}
-static int find_available(regmask_t *liveregs, int size, bool half)
+static void
+ra_destroy(struct ir3_ra_ctx *ctx)
{
- unsigned i;
- unsigned f = half ? IR3_REG_HALF : 0;
- for (i = 0; i < MAX_REG - size; i++) {
- if (!regmask_get(liveregs, &REG(i, X, f))) {
- unsigned start = i++;
- for (; (i < MAX_REG) && ((i - start) < size); i++)
- if (regmask_get(liveregs, &REG(i, X, f)))
- break;
- if ((i - start) >= size)
- return start;
+ ralloc_free(ctx->g);
+}
+
+static void
+ra_block_compute_live_ranges(struct ir3_ra_ctx *ctx, struct ir3_block *block)
+{
+ struct ir3_ra_block_data *bd;
+ unsigned bitset_words = BITSET_WORDS(ctx->alloc_count);
+
+ bd = rzalloc(ctx->g, struct ir3_ra_block_data);
+
+ bd->def = rzalloc_array(bd, BITSET_WORD, bitset_words);
+ bd->use = rzalloc_array(bd, BITSET_WORD, bitset_words);
+ bd->livein = rzalloc_array(bd, BITSET_WORD, bitset_words);
+ bd->liveout = rzalloc_array(bd, BITSET_WORD, bitset_words);
+
+ block->bd = bd;
+
+ list_for_each_entry (struct ir3_instruction, instr, &block->instr_list, node) {
+ struct ir3_instruction *src;
+
+ if (instr->regs_count == 0)
+ continue;
+
+ /* There are a couple special cases to deal with here:
+ *
+ * fanout: used to split values from a higher class to a lower
+ * class, for example split the results of a texture fetch
+ * into individual scalar values; We skip over these from
+ * a 'def' perspective, and for a 'use' we walk the chain
+ * up to the defining instruction.
+ *
+ * fanin: used to collect values from lower class and assemble
+ * them together into a higher class, for example arguments
+ * to texture sample instructions; We consider these to be
+ * defined at the earliest fanin source.
+ *
+ * phi: used to merge values from different flow control paths
+ * to the same reg. Consider defined at earliest phi src,
+ * and update all the other phi src's (which may come later
+ * in the program) as users to extend the var's live range.
+ *
+ * Most of this, other than phi, is completely handled in the
+ * get_definer() helper.
+ *
+ * In either case, we trace the instruction back to the original
+ * definer and consider that as the def/use ip.
+ */
+
+ if (writes_gpr(instr)) {
+ struct ir3_instruction *defn;
+ int cls, sz, off;
+
+ defn = get_definer(instr, &sz, &off);
+ if (defn == instr) {
+ /* arrays which don't fit in one of the pre-defined class
+ * sizes are pre-colored:
+ */
+ cls = size_to_class(sz, is_half(defn));
+ if (cls >= 0) {
+ unsigned name = ra_name(ctx, cls, defn);
+
+ ctx->def[name] = defn->ip;
+ ctx->use[name] = defn->ip;
+
+ /* since we are in SSA at this point: */
+ debug_assert(!BITSET_TEST(bd->use, name));
+
+ BITSET_SET(bd->def, name);
+
+ if (is_half(defn)) {
+ ra_set_node_class(ctx->g, name,
+ ctx->set->half_classes[cls - class_count]);
+ } else {
+ ra_set_node_class(ctx->g, name,
+ ctx->set->classes[cls]);
+ }
+
+ /* extend the live range for phi srcs, which may come
+ * from the bottom of the loop
+ */
+ if (defn->regs[0]->flags & IR3_REG_PHI_SRC) {
+ struct ir3_instruction *phi = defn->regs[0]->instr;
+ foreach_ssa_src(src, phi) {
+ /* if src is after phi, then we need to extend
+ * the liverange to the end of src's block:
+ */
+ if (src->ip > phi->ip) {
+ struct ir3_instruction *last =
+ list_last_entry(&src->block->instr_list,
+ struct ir3_instruction, node);
+ ctx->use[name] = MAX2(ctx->use[name], last->ip);
+ }
+ }
+ }
+ }
+ }
+ }
+
+ foreach_ssa_src(src, instr) {
+ if (writes_gpr(src)) {
+ struct ir3_instruction *srcdefn;
+ int cls, sz, off;
+
+ srcdefn = get_definer(src, &sz, &off);
+ cls = size_to_class(sz, is_half(srcdefn));
+ if (cls >= 0) {
+ unsigned name = ra_name(ctx, cls, srcdefn);
+ ctx->use[name] = MAX2(ctx->use[name], instr->ip);
+ if (!BITSET_TEST(bd->def, name))
+ BITSET_SET(bd->use, name);
+ }
+ }
}
}
- assert(0);
- return -1;
}
-static int alloc_block(struct ir3_ra_ctx *ctx,
- struct ir3_instruction *instr, int size)
+static bool
+ra_compute_livein_liveout(struct ir3_ra_ctx *ctx)
{
- struct ir3_register *dst = instr->regs[0];
- struct ir3_instruction *n;
- regmask_t liveregs;
- unsigned name;
+ unsigned bitset_words = BITSET_WORDS(ctx->alloc_count);
+ bool progress = false;
- /* should only ever be called w/ head of neighbor list: */
- debug_assert(!instr->cp.left);
+ list_for_each_entry (struct ir3_block, block, &ctx->ir->block_list, node) {
+ struct ir3_ra_block_data *bd = block->bd;
- regmask_init(&liveregs);
+ /* update livein: */
+ for (unsigned i = 0; i < bitset_words; i++) {
+ BITSET_WORD new_livein =
+ (bd->use[i] | (bd->liveout[i] & ~bd->def[i]));
- for (n = instr; n; n = n->cp.right)
- compute_liveregs(ctx, n, &liveregs);
+ if (new_livein & ~bd->livein[i]) {
+ bd->livein[i] |= new_livein;
+ progress = true;
+ }
+ }
- /* because we do assignment on fanout nodes for wrmask!=0x1, we
- * need to handle this special case, where the fanout nodes all
- * appear after one or more of the consumers of the src node:
- *
- * 0098:009: sam _, r2.x
- * 0028:010: mul.f r3.z, r4.x, c13.x
- * ; we start assigning here for '0098:009: sam'.. but
- * ; would miss the usage at '0028:010: mul.f'
- * 0101:009: _meta:fo _, _[0098:009: sam], off=2
- */
- if (is_meta(instr) && (instr->opc == OPC_META_FO))
- compute_liveregs(ctx, instr->regs[1]->instr, &liveregs);
+ /* update liveout: */
+ for (unsigned j = 0; j < ARRAY_SIZE(block->successors); j++) {
+ struct ir3_block *succ = block->successors[j];
+ struct ir3_ra_block_data *succ_bd;
+
+ if (!succ)
+ continue;
- name = find_available(&liveregs, size,
- !!(dst->flags & IR3_REG_HALF));
+ succ_bd = succ->bd;
- if (dst->flags & IR3_REG_HALF)
- name |= REG_HALF;
+ for (unsigned i = 0; i < bitset_words; i++) {
+ BITSET_WORD new_liveout =
+ (succ_bd->livein[i] & ~bd->liveout[i]);
- return name;
+ if (new_liveout) {
+ bd->liveout[i] |= new_liveout;
+ progress = true;
+ }
+ }
+ }
+ }
+
+ return progress;
}
-static type_t half_type(type_t type)
+static void
+ra_add_interference(struct ir3_ra_ctx *ctx)
{
- switch (type) {
- case TYPE_F32: return TYPE_F16;
- case TYPE_U32: return TYPE_U16;
- case TYPE_S32: return TYPE_S16;
- /* instructions may already be fixed up: */
- case TYPE_F16:
- case TYPE_U16:
- case TYPE_S16:
- return type;
- default:
- assert(0);
- return ~0;
+ struct ir3 *ir = ctx->ir;
+
+ /* compute live ranges (use/def) on a block level, also updating
+ * block's def/use bitmasks (used below to calculate per-block
+ * livein/liveout):
+ */
+ list_for_each_entry (struct ir3_block, block, &ir->block_list, node) {
+ ra_block_compute_live_ranges(ctx, block);
+ }
+
+ /* update per-block livein/liveout: */
+ while (ra_compute_livein_liveout(ctx)) {}
+
+ /* extend start/end ranges based on livein/liveout info from cfg: */
+ unsigned bitset_words = BITSET_WORDS(ctx->alloc_count);
+ list_for_each_entry (struct ir3_block, block, &ir->block_list, node) {
+ struct ir3_ra_block_data *bd = block->bd;
+
+ for (unsigned i = 0; i < bitset_words; i++) {
+ if (BITSET_TEST(bd->livein, i)) {
+ ctx->def[i] = MIN2(ctx->def[i], block->start_ip);
+ ctx->use[i] = MAX2(ctx->use[i], block->start_ip);
+ }
+
+ if (BITSET_TEST(bd->liveout, i)) {
+ ctx->def[i] = MIN2(ctx->def[i], block->end_ip);
+ ctx->use[i] = MAX2(ctx->use[i], block->end_ip);
+ }
+ }
+ }
+
+ /* need to fix things up to keep outputs live: */
+ for (unsigned i = 0; i < ir->noutputs; i++) {
+ struct ir3_instruction *instr = ir->outputs[i];
+ struct ir3_instruction *defn;
+ int cls, sz, off;
+
+ defn = get_definer(instr, &sz, &off);
+ cls = size_to_class(sz, is_half(defn));
+ if (cls >= 0) {
+ unsigned name = ra_name(ctx, cls, defn);
+ ctx->use[name] = ctx->instr_cnt;
+ }
+ }
+
+ for (unsigned i = 0; i < ctx->alloc_count; i++) {
+ for (unsigned j = 0; j < ctx->alloc_count; j++) {
+ if (!((ctx->def[i] >= ctx->use[j]) ||
+ (ctx->def[j] >= ctx->use[i]))) {
+ ra_add_node_interference(ctx->g, i, j);
+ }
+ }
}
}
@@ -358,302 +776,124 @@ static void fixup_half_instr_src(struct ir3_instruction *instr)
}
}
-static void reg_assign(struct ir3_instruction *instr,
- unsigned r, unsigned name)
+static void
+reg_assign(struct ir3_ra_ctx *ctx, struct ir3_register *reg,
+ struct ir3_instruction *instr)
{
- struct ir3_register *reg = instr->regs[r];
-
- reg->flags &= ~IR3_REG_SSA;
- reg->num = name & ~REG_HALF;
-
- if (name & REG_HALF) {
- reg->flags |= IR3_REG_HALF;
- /* if dst reg being assigned, patch up the instr: */
- if (reg == instr->regs[0])
- fixup_half_instr_dst(instr);
- else
- fixup_half_instr_src(instr);
- }
-}
-
-static void instr_assign(struct ir3_ra_ctx *ctx,
- struct ir3_instruction *instr, unsigned name);
+ struct ir3_instruction *defn;
+ int cls, sz, off;
-static void instr_assign_src(struct ir3_ra_ctx *ctx,
- struct ir3_instruction *instr, unsigned r, unsigned name)
-{
- struct ir3_register *reg = instr->regs[r];
+ defn = get_definer(instr, &sz, &off);
+ cls = size_to_class(sz, is_half(defn));
+ if (cls >= 0) {
+ unsigned name = ra_name(ctx, cls, defn);
+ unsigned r = ra_get_node_reg(ctx->g, name);
+ unsigned num = ctx->set->ra_reg_to_gpr[r] + off;
- if (reg->flags & IR3_REG_RELATIV)
- name += reg->offset;
+ if (reg->flags & IR3_REG_RELATIV)
+ num += reg->offset;
- reg_assign(instr, r, name);
+ reg->num = num;
+ reg->flags &= ~(IR3_REG_SSA | IR3_REG_PHI_SRC);
- if (is_meta(instr)) {
- switch (instr->opc) {
- case OPC_META_INPUT:
- /* shader-input does not have a src, only block input: */
- debug_assert(instr->regs_count == 2);
- instr_assign(ctx, instr, name);
- return;
- case OPC_META_FO:
- instr_assign(ctx, instr, name + instr->fo.off);
- return;
- case OPC_META_FI:
- instr_assign(ctx, instr, name - (r - 1));
- return;
- default:
- break;
- }
+ if (is_half(defn))
+ reg->flags |= IR3_REG_HALF;
}
}
-static void instr_assign_srcs(struct ir3_ra_ctx *ctx,
- struct ir3_instruction *instr, unsigned name)
+static void
+ra_block_alloc(struct ir3_ra_ctx *ctx, struct ir3_block *block)
{
- struct ir3_instruction *n, *src;
+ list_for_each_entry (struct ir3_instruction, instr, &block->instr_list, node) {
+ struct ir3_register *reg;
- for (n = instr->next; n && !ctx->error; n = n->next) {
- foreach_ssa_src_n(src, i, n) {
- unsigned r = i + 1;
-
- /* skip address / etc (non real sources): */
- if (r >= n->regs_count)
- continue;
+ if (instr->regs_count == 0)
+ continue;
- if (src == instr)
- instr_assign_src(ctx, n, r, name);
+ if (writes_gpr(instr)) {
+ reg_assign(ctx, instr->regs[0], instr);
+ if (instr->regs[0]->flags & IR3_REG_HALF)
+ fixup_half_instr_dst(instr);
}
- }
-}
-
-static void instr_assign(struct ir3_ra_ctx *ctx,
- struct ir3_instruction *instr, unsigned name)
-{
- struct ir3_register *reg = instr->regs[0];
-
- if (reg->flags & IR3_REG_RELATIV)
- return;
-
- /* check if already assigned: */
- if (!(reg->flags & IR3_REG_SSA)) {
- /* ... and if so, sanity check: */
- ra_assert(ctx, reg->num == (name & ~REG_HALF));
- return;
- }
-
- /* rename this instructions dst register: */
- reg_assign(instr, 0, name);
-
- /* and rename any subsequent use of result of this instr: */
- instr_assign_srcs(ctx, instr, name);
-
- /* To simplify the neighbor logic, and to "avoid" dealing with
- * instructions which write more than one output, we actually
- * do register assignment for instructions that produce multiple
- * outputs on the fanout nodes and propagate up the assignment
- * to the actual instruction:
- */
- if (is_meta(instr) && (instr->opc == OPC_META_FO)) {
- struct ir3_instruction *src;
- debug_assert(name >= instr->fo.off);
-
- foreach_ssa_src(src, instr)
- instr_assign(ctx, src, name - instr->fo.off);
- }
-}
+ foreach_src_n(reg, n, instr) {
+ struct ir3_instruction *src = reg->instr;
+ if (!src)
+ continue;
-/* check neighbor list to see if it is already partially (or completely)
- * assigned, in which case register block is already allocated and we
- * just need to complete the assignment:
- */
-static int check_partial_assignment(struct ir3_ra_ctx *ctx,
- struct ir3_instruction *instr)
-{
- struct ir3_instruction *n;
- int off = 0;
-
- debug_assert(!instr->cp.left);
-
- for (n = instr; n; n = n->cp.right) {
- struct ir3_register *dst = n->regs[0];
- if ((n->depth != DEPTH_UNUSED) &&
- !(dst->flags & IR3_REG_SSA)) {
- int name = dst->num - off;
- debug_assert(name >= 0);
- return name;
+ reg_assign(ctx, instr->regs[n+1], src);
+ if (instr->regs[n+1]->flags & IR3_REG_HALF)
+ fixup_half_instr_src(instr);
}
- off++;
}
-
- return -1;
}
-/* allocate register name(s) for a list of neighboring instructions;
- * instr should point to leftmost neighbor (head of list)
- */
-static void instr_alloc_and_assign(struct ir3_ra_ctx *ctx,
- struct ir3_instruction *instr)
+static int
+ra_alloc(struct ir3_ra_ctx *ctx)
{
- struct ir3_instruction *n;
- struct ir3_register *dst;
- int name;
-
- debug_assert(!instr->cp.left);
-
- if (instr->regs_count == 0)
- return;
-
- dst = instr->regs[0];
-
- /* For indirect dst, take the register assignment from the
- * fanin and propagate it forward.
- */
- if (dst->flags & IR3_REG_RELATIV) {
- /* NOTE can be grouped, if for example outputs:
- * for now disable cp if indirect writes
- */
- instr_alloc_and_assign(ctx, instr->fanin);
-
- dst->num += instr->fanin->regs[0]->num;
- dst->flags &= ~IR3_REG_SSA;
-
- instr_assign_srcs(ctx, instr, instr->fanin->regs[0]->num);
-
- return;
- }
-
- /* for instructions w/ fanouts, do the actual register assignment
- * on the group of fanout neighbor nodes and propagate the reg
- * name back up to the texture instruction.
- */
- if (dst->wrmask != 0x1)
- return;
-
- name = check_partial_assignment(ctx, instr);
-
- /* allocate register(s): */
- if (name >= 0) {
- /* already partially assigned, just finish the job */
- } else if (reg_gpr(dst)) {
- int size;
- /* number of consecutive registers to assign: */
- size = ir3_neighbor_count(instr);
- if (dst->wrmask != 0x1)
- size = MAX2(size, ffs(~dst->wrmask) - 1);
- name = alloc_block(ctx, instr, size);
- } else if (dst->flags & IR3_REG_ADDR) {
- debug_assert(!instr->cp.right);
- dst->flags &= ~IR3_REG_ADDR;
- name = regid(REG_A0, 0) | REG_HALF;
- } else {
- debug_assert(!instr->cp.right);
- /* predicate register (p0).. etc */
- name = regid(REG_P0, 0);
- debug_assert(dst->num == name);
- }
-
- ra_assert(ctx, name >= 0);
-
- for (n = instr; n && !ctx->error; n = n->cp.right) {
- instr_assign(ctx, n, name);
- name++;
- }
-}
-
-static void instr_assign_array(struct ir3_ra_ctx *ctx,
- struct ir3_instruction *instr)
-{
- struct ir3_instruction *src;
- int name, aid = instr->fi.aid;
-
- if (ctx->arrays[aid].base == ~0) {
- int size = instr->regs_count - 1;
- ctx->arrays[aid].base = alloc_block(ctx, instr, size);
- ctx->arrays[aid].size = size;
- }
-
- name = ctx->arrays[aid].base;
-
- foreach_ssa_src_n(src, i, instr) {
- unsigned r = i + 1;
-
- /* skip address / etc (non real sources): */
- if (r >= instr->regs_count)
- break;
-
- instr_assign(ctx, src, name);
- name++;
- }
-
-}
-
-static int block_ra(struct ir3_ra_ctx *ctx, struct ir3_block *block)
-{
- struct ir3_instruction *n;
-
/* frag shader inputs get pre-assigned, since we have some
* constraints/unknowns about setup for some of these regs:
*/
- if ((ctx->type == SHADER_FRAGMENT) && !block->parent) {
+ if (ctx->type == SHADER_FRAGMENT) {
+ struct ir3 *ir = ctx->ir;
unsigned i = 0, j;
- if (ctx->frag_face && (i < block->ninputs) && block->inputs[i]) {
+ if (ctx->frag_face && (i < ir->ninputs) && ir->inputs[i]) {
+ struct ir3_instruction *instr = ir->inputs[i];
+ int cls = size_to_class(1, true);
+ unsigned name = ra_name(ctx, cls, instr);
+ unsigned reg = ctx->set->gpr_to_ra_reg[cls][0];
+
/* if we have frag_face, it gets hr0.x */
- instr_assign(ctx, block->inputs[i], REG_HALF | 0);
+ ra_set_node_reg(ctx->g, name, reg);
i += 4;
}
- for (j = 0; i < block->ninputs; i++, j++)
- if (block->inputs[i])
- instr_assign(ctx, block->inputs[i], j);
- }
- ra_dump_list("-------\n", block->head);
+ for (j = 0; i < ir->ninputs; i++) {
+ struct ir3_instruction *instr = ir->inputs[i];
+ if (instr) {
+ struct ir3_instruction *defn;
+ int cls, sz, off;
- /* first pass, assign arrays: */
- for (n = block->head; n && !ctx->error; n = n->next) {
- if (is_meta(n) && (n->opc == OPC_META_FI) && n->fi.aid) {
- debug_assert(!n->cp.left); /* don't think this should happen */
- ra_dump_instr("ASSIGN ARRAY: ", n);
- instr_assign_array(ctx, n);
- ra_dump_list("-------\n", block->head);
+ defn = get_definer(instr, &sz, &off);
+ if (defn == instr) {
+ unsigned name, reg;
+
+ cls = size_to_class(sz, is_half(defn));
+ name = ra_name(ctx, cls, defn);
+ reg = ctx->set->gpr_to_ra_reg[cls][j];
+
+ ra_set_node_reg(ctx->g, name, reg);
+ j += sz;
+ }
+ }
}
}
- for (n = block->head; n && !ctx->error; n = n->next) {
- ra_dump_instr("ASSIGN: ", n);
- instr_alloc_and_assign(ctx, ir3_neighbor_first(n));
- ra_dump_list("-------\n", block->head);
+ if (!ra_allocate(ctx->g))
+ return -1;
+
+ list_for_each_entry (struct ir3_block, block, &ctx->ir->block_list, node) {
+ ra_block_alloc(ctx, block);
}
- return ctx->error ? -1 : 0;
+ return 0;
}
-int ir3_block_ra(struct ir3_block *block, enum shader_t type,
+int ir3_ra(struct ir3 *ir, enum shader_t type,
bool frag_coord, bool frag_face)
{
- struct ir3_instruction *n;
struct ir3_ra_ctx ctx = {
- .block = block,
+ .ir = ir,
.type = type,
- .frag_coord = frag_coord,
.frag_face = frag_face,
+ .set = ir->compiler->set,
};
int ret;
- memset(&ctx.arrays, ~0, sizeof(ctx.arrays));
-
- /* mark dst registers w/ SSA flag so we can see which
- * have been assigned so far:
- * NOTE: we really should set SSA flag consistently on
- * every dst register in the frontend.
- */
- for (n = block->head; n; n = n->next)
- if (n->regs_count > 0)
- n->regs[0]->flags |= IR3_REG_SSA;
-
- ir3_clear_mark(block->shader);
- ret = block_ra(&ctx, block);
+ ra_init(&ctx);
+ ra_add_interference(&ctx);
+ ret = ra_alloc(&ctx);
+ ra_destroy(&ctx);
return ret;
}
diff --git a/src/gallium/drivers/freedreno/ir3/ir3_sched.c b/src/gallium/drivers/freedreno/ir3/ir3_sched.c
index a790cba129b..49a4426d163 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3_sched.c
+++ b/src/gallium/drivers/freedreno/ir3/ir3_sched.c
@@ -31,23 +31,14 @@
#include "ir3.h"
-enum {
- SCHEDULED = -1,
- DELAYED = -2,
-};
-
/*
* Instruction Scheduling:
*
- * Using the depth sorted list from depth pass, attempt to recursively
- * schedule deepest unscheduled path. The first instruction that cannot
- * be scheduled, returns the required delay slots it needs, at which
- * point we return back up to the top and attempt to schedule by next
- * highest depth. After a sufficient number of instructions have been
- * scheduled, return back to beginning of list and start again. If you
- * reach the end of depth sorted list without being able to insert any
- * instruction, insert nop's. Repeat until no more unscheduled
- * instructions.
+ * A priority-queue based scheduling algo. Add eligible instructions,
+ * ie. ones with all their dependencies scheduled, to the priority
+ * (depth) sorted queue (list). Pop highest priority instruction off
+ * the queue and schedule it, add newly eligible instructions to the
+ * priority queue, rinse, repeat.
*
* There are a few special cases that need to be handled, since sched
* is currently independent of register allocation. Usages of address
@@ -60,90 +51,33 @@ enum {
*/
struct ir3_sched_ctx {
- struct ir3_instruction *scheduled; /* last scheduled instr */
+ struct ir3_block *block; /* the current block */
+ struct ir3_instruction *scheduled; /* last scheduled instr XXX remove*/
struct ir3_instruction *addr; /* current a0.x user, if any */
struct ir3_instruction *pred; /* current p0.x user, if any */
- unsigned cnt;
bool error;
};
-static struct ir3_instruction *
-deepest(struct ir3_instruction **srcs, unsigned nsrcs)
-{
- struct ir3_instruction *d = NULL;
- unsigned i = 0, id = 0;
-
- while ((i < nsrcs) && !(d = srcs[id = i]))
- i++;
-
- if (!d)
- return NULL;
-
- for (; i < nsrcs; i++)
- if (srcs[i] && (srcs[i]->depth > d->depth))
- d = srcs[id = i];
-
- srcs[id] = NULL;
-
- return d;
-}
-
-static unsigned distance(struct ir3_sched_ctx *ctx,
- struct ir3_instruction *instr, unsigned maxd)
-{
- struct ir3_instruction *n = ctx->scheduled;
- unsigned d = 0;
- while (n && (n != instr) && (d < maxd)) {
- if (is_alu(n) || is_flow(n))
- d++;
- n = n->next;
- }
- return d;
-}
-
-/* TODO maybe we want double linked list? */
-static struct ir3_instruction * prev(struct ir3_instruction *instr)
-{
- struct ir3_instruction *p = instr->block->head;
- while (p && (p->next != instr))
- p = p->next;
- return p;
-}
-
static bool is_sfu_or_mem(struct ir3_instruction *instr)
{
return is_sfu(instr) || is_mem(instr);
}
-static void schedule(struct ir3_sched_ctx *ctx,
- struct ir3_instruction *instr, bool remove)
+static void
+schedule(struct ir3_sched_ctx *ctx, struct ir3_instruction *instr)
{
- struct ir3_block *block = instr->block;
+ debug_assert(ctx->block == instr->block);
/* maybe there is a better way to handle this than just stuffing
* a nop.. ideally we'd know about this constraint in the
* scheduling and depth calculation..
*/
if (ctx->scheduled && is_sfu_or_mem(ctx->scheduled) && is_sfu_or_mem(instr))
- schedule(ctx, ir3_instr_create(block, 0, OPC_NOP), false);
+ ir3_NOP(ctx->block);
/* remove from depth list:
*/
- if (remove) {
- struct ir3_instruction *p = prev(instr);
-
- /* NOTE: this can happen for inputs which are not
- * read.. in that case there is no need to schedule
- * the input, so just bail:
- */
- if (instr != (p ? p->next : block->head))
- return;
-
- if (p)
- p->next = instr->next;
- else
- block->head = instr->next;
- }
+ list_delinit(&instr->node);
if (writes_addr(instr)) {
assert(ctx->addr == NULL);
@@ -157,18 +91,30 @@ static void schedule(struct ir3_sched_ctx *ctx,
instr->flags |= IR3_INSTR_MARK;
- instr->next = ctx->scheduled;
+ list_addtail(&instr->node, &instr->block->instr_list);
ctx->scheduled = instr;
-
- ctx->cnt++;
}
-/*
- * Delay-slot calculation. Follows fanin/fanout.
- */
+static unsigned
+distance(struct ir3_sched_ctx *ctx, struct ir3_instruction *instr,
+ unsigned maxd)
+{
+ struct list_head *instr_list = &ctx->block->instr_list;
+ unsigned d = 0;
+
+ list_for_each_entry_rev (struct ir3_instruction, n, instr_list, node) {
+ if ((n == instr) || (d >= maxd))
+ break;
+ if (is_alu(n) || is_flow(n))
+ d++;
+ }
+
+ return d;
+}
/* calculate delay for specified src: */
-static unsigned delay_calc_srcn(struct ir3_sched_ctx *ctx,
+static unsigned
+delay_calc_srcn(struct ir3_sched_ctx *ctx,
struct ir3_instruction *assigner,
struct ir3_instruction *consumer, unsigned srcn)
{
@@ -177,7 +123,10 @@ static unsigned delay_calc_srcn(struct ir3_sched_ctx *ctx,
if (is_meta(assigner)) {
struct ir3_instruction *src;
foreach_ssa_src(src, assigner) {
- unsigned d = delay_calc_srcn(ctx, src, consumer, srcn);
+ unsigned d;
+ if (src->block != assigner->block)
+ break;
+ d = delay_calc_srcn(ctx, src, consumer, srcn);
delay = MAX2(delay, d);
}
} else {
@@ -189,48 +138,87 @@ static unsigned delay_calc_srcn(struct ir3_sched_ctx *ctx,
}
/* calculate delay for instruction (maximum of delay for all srcs): */
-static unsigned delay_calc(struct ir3_sched_ctx *ctx,
- struct ir3_instruction *instr)
+static unsigned
+delay_calc(struct ir3_sched_ctx *ctx, struct ir3_instruction *instr)
{
unsigned delay = 0;
struct ir3_instruction *src;
foreach_ssa_src_n(src, i, instr) {
- unsigned d = delay_calc_srcn(ctx, src, instr, i);
+ unsigned d;
+ if (src->block != instr->block)
+ continue;
+ d = delay_calc_srcn(ctx, src, instr, i);
delay = MAX2(delay, d);
}
return delay;
}
-/* A negative return value signals that an instruction has been newly
- * SCHEDULED (or DELAYED due to address or predicate register already
- * in use), return back up to the top of the stack (to block_sched())
+struct ir3_sched_notes {
+ /* there is at least one kill which could be scheduled, except
+ * for unscheduled bary.f's:
+ */
+ bool blocked_kill;
+ /* there is at least one instruction that could be scheduled,
+ * except for conflicting address/predicate register usage:
+ */
+ bool addr_conflict, pred_conflict;
+};
+
+static bool is_scheduled(struct ir3_instruction *instr)
+{
+ return !!(instr->flags & IR3_INSTR_MARK);
+}
+
+static bool
+check_conflict(struct ir3_sched_ctx *ctx, struct ir3_sched_notes *notes,
+ struct ir3_instruction *instr)
+{
+ /* if this is a write to address/predicate register, and that
+ * register is currently in use, we need to defer until it is
+ * free:
+ */
+ if (writes_addr(instr) && ctx->addr) {
+ assert(ctx->addr != instr);
+ notes->addr_conflict = true;
+ return true;
+ }
+
+ if (writes_pred(instr) && ctx->pred) {
+ assert(ctx->pred != instr);
+ notes->pred_conflict = true;
+ return true;
+ }
+
+ return false;
+}
+
+/* is this instruction ready to be scheduled? Return negative for not
+ * ready (updating notes if needed), or >= 0 to indicate number of
+ * delay slots needed.
*/
-static int trysched(struct ir3_sched_ctx *ctx,
+static int
+instr_eligibility(struct ir3_sched_ctx *ctx, struct ir3_sched_notes *notes,
struct ir3_instruction *instr)
{
- struct ir3_instruction *srcs[64];
struct ir3_instruction *src;
- unsigned delay, nsrcs = 0;
+ unsigned delay = 0;
- /* if already scheduled: */
- if (instr->flags & IR3_INSTR_MARK)
+ /* Phi instructions can have a dependency on something not
+ * scheduled yet (for ex, loops). But OTOH we don't really
+ * care. By definition phi's should appear at the top of
+ * the block, and it's sources should be values from the
+ * previously executing block, so they are always ready to
+ * be scheduled:
+ */
+ if (is_meta(instr) && (instr->opc == OPC_META_PHI))
return 0;
- /* figure out our src's, copy 'em out into an array for sorting: */
foreach_ssa_src(src, instr) {
- debug_assert(nsrcs < ARRAY_SIZE(srcs));
- srcs[nsrcs++] = src;
- }
-
- /* for each src register in sorted order:
- */
- delay = 0;
- while ((src = deepest(srcs, nsrcs))) {
- delay = trysched(ctx, src);
- if (delay)
- return delay;
+ /* if dependency not scheduled, we aren't ready yet: */
+ if (!is_scheduled(src))
+ return -1;
}
/* all our dependents are scheduled, figure out if
@@ -255,216 +243,276 @@ static int trysched(struct ir3_sched_ctx *ctx,
*/
if (is_kill(instr)) {
struct ir3 *ir = instr->block->shader;
- unsigned i;
- for (i = 0; i < ir->baryfs_count; i++) {
+ for (unsigned i = 0; i < ir->baryfs_count; i++) {
struct ir3_instruction *baryf = ir->baryfs[i];
if (baryf->depth == DEPTH_UNUSED)
continue;
- delay = trysched(ctx, baryf);
- if (delay)
- return delay;
+ if (!is_scheduled(baryf)) {
+ notes->blocked_kill = true;
+ return -1;
+ }
}
}
- /* if this is a write to address/predicate register, and that
- * register is currently in use, we need to defer until it is
- * free:
- */
- if (writes_addr(instr) && ctx->addr) {
- assert(ctx->addr != instr);
- return DELAYED;
- }
- if (writes_pred(instr) && ctx->pred) {
- assert(ctx->pred != instr);
- return DELAYED;
- }
+ if (check_conflict(ctx, notes, instr))
+ return -1;
- schedule(ctx, instr, true);
- return SCHEDULED;
+ return 0;
}
-static struct ir3_instruction * reverse(struct ir3_instruction *instr)
+/* move eligible instructions to the priority list: */
+static unsigned
+add_eligible_instrs(struct ir3_sched_ctx *ctx, struct ir3_sched_notes *notes,
+ struct list_head *prio_queue, struct list_head *unscheduled_list)
{
- struct ir3_instruction *reversed = NULL;
- while (instr) {
- struct ir3_instruction *next = instr->next;
- instr->next = reversed;
- reversed = instr;
- instr = next;
+ unsigned min_delay = ~0;
+
+ list_for_each_entry_safe (struct ir3_instruction, instr, unscheduled_list, node) {
+ int e = instr_eligibility(ctx, notes, instr);
+ if (e < 0)
+ continue;
+ min_delay = MIN2(min_delay, e);
+ if (e == 0) {
+ /* remove from unscheduled list and into priority queue: */
+ list_delinit(&instr->node);
+ ir3_insert_by_depth(instr, prio_queue);
+ }
}
- return reversed;
-}
-static bool uses_current_addr(struct ir3_sched_ctx *ctx,
- struct ir3_instruction *instr)
-{
- return instr->address && (ctx->addr == instr->address);
+ return min_delay;
}
-static bool uses_current_pred(struct ir3_sched_ctx *ctx,
- struct ir3_instruction *instr)
+/* "spill" the address register by remapping any unscheduled
+ * instructions which depend on the current address register
+ * to a clone of the instruction which wrote the address reg.
+ */
+static void
+split_addr(struct ir3_sched_ctx *ctx)
{
- struct ir3_instruction *src;
- foreach_ssa_src(src, instr)
- if (ctx->pred == src)
- return true;
- return false;
+ struct ir3 *ir = ctx->addr->block->shader;
+ struct ir3_instruction *new_addr = NULL;
+ unsigned i;
+
+ debug_assert(ctx->addr);
+
+ for (i = 0; i < ir->indirects_count; i++) {
+ struct ir3_instruction *indirect = ir->indirects[i];
+
+ /* skip instructions already scheduled: */
+ if (indirect->flags & IR3_INSTR_MARK)
+ continue;
+
+ /* remap remaining instructions using current addr
+ * to new addr:
+ */
+ if (indirect->address == ctx->addr) {
+ if (!new_addr) {
+ new_addr = ir3_instr_clone(ctx->addr);
+ /* original addr is scheduled, but new one isn't: */
+ new_addr->flags &= ~IR3_INSTR_MARK;
+ }
+ indirect->address = new_addr;
+ }
+ }
+
+ /* all remaining indirects remapped to new addr: */
+ ctx->addr = NULL;
}
-/* when we encounter an instruction that writes to the address register
- * when it is in use, we delay that instruction and try to schedule all
- * other instructions using the current address register:
+/* "spill" the predicate register by remapping any unscheduled
+ * instructions which depend on the current predicate register
+ * to a clone of the instruction which wrote the address reg.
*/
-static int block_sched_undelayed(struct ir3_sched_ctx *ctx,
- struct ir3_block *block)
+static void
+split_pred(struct ir3_sched_ctx *ctx)
{
- struct ir3_instruction *instr = block->head;
- bool addr_in_use = false;
- bool pred_in_use = false;
- bool all_delayed = true;
- unsigned cnt = ~0, attempted = 0;
-
- while (instr) {
- struct ir3_instruction *next = instr->next;
- bool addr = uses_current_addr(ctx, instr);
- bool pred = uses_current_pred(ctx, instr);
-
- if (addr || pred) {
- int ret = trysched(ctx, instr);
-
- if (ret != DELAYED)
- all_delayed = false;
-
- if (ret == SCHEDULED)
- cnt = 0;
- else if (ret > 0)
- cnt = MIN2(cnt, ret);
- if (addr)
- addr_in_use = true;
- if (pred)
- pred_in_use = true;
-
- attempted++;
- }
+ struct ir3 *ir = ctx->pred->block->shader;
+ struct ir3_instruction *new_pred = NULL;
+ unsigned i;
- instr = next;
- }
+ debug_assert(ctx->pred);
- if (!addr_in_use)
- ctx->addr = NULL;
+ for (i = 0; i < ir->predicates_count; i++) {
+ struct ir3_instruction *predicated = ir->predicates[i];
- if (!pred_in_use)
- ctx->pred = NULL;
+ /* skip instructions already scheduled: */
+ if (predicated->flags & IR3_INSTR_MARK)
+ continue;
- /* detect if we've gotten ourselves into an impossible situation
- * and bail if needed
- */
- if (all_delayed && (attempted > 0)) {
- if (pred_in_use) {
- /* TODO we probably need to keep a list of instructions
- * that reference predicate, similar to indirects
- */
- ctx->error = true;
- return DELAYED;
- }
- if (addr_in_use) {
- struct ir3 *ir = ctx->addr->block->shader;
- struct ir3_instruction *new_addr =
- ir3_instr_clone(ctx->addr);
- unsigned i;
-
- /* original addr is scheduled, but new one isn't: */
- new_addr->flags &= ~IR3_INSTR_MARK;
-
- for (i = 0; i < ir->indirects_count; i++) {
- struct ir3_instruction *indirect = ir->indirects[i];
-
- /* skip instructions already scheduled: */
- if (indirect->flags & IR3_INSTR_MARK)
- continue;
-
- /* remap remaining instructions using current addr
- * to new addr:
- */
- if (indirect->address == ctx->addr)
- indirect->address = new_addr;
+ /* remap remaining instructions using current pred
+ * to new pred:
+ *
+ * TODO is there ever a case when pred isn't first
+ * (and only) src?
+ */
+ if (ssa(predicated->regs[1]) == ctx->pred) {
+ if (!new_pred) {
+ new_pred = ir3_instr_clone(ctx->pred);
+ /* original pred is scheduled, but new one isn't: */
+ new_pred->flags &= ~IR3_INSTR_MARK;
}
-
- /* all remaining indirects remapped to new addr: */
- ctx->addr = NULL;
-
- /* not really, but this will trigger us to go back to
- * main trysched() loop now that we've resolved the
- * conflict by duplicating the instr that writes to
- * the address register.
- */
- return SCHEDULED;
+ predicated->regs[1]->instr = new_pred;
}
}
- return cnt;
+ /* all remaining predicated remapped to new pred: */
+ ctx->pred = NULL;
}
-static void block_sched(struct ir3_sched_ctx *ctx, struct ir3_block *block)
+static void
+sched_block(struct ir3_sched_ctx *ctx, struct ir3_block *block)
{
- struct ir3_instruction *instr;
+ struct list_head unscheduled_list, prio_queue;
- /* schedule all the shader input's (meta-instr) first so that
- * the RA step sees that the input registers contain a value
- * from the start of the shader:
+ ctx->block = block;
+
+ /* move all instructions to the unscheduled list, and
+ * empty the block's instruction list (to which we will
+ * be inserting.
*/
- if (!block->parent) {
- unsigned i;
- for (i = 0; i < block->ninputs; i++) {
- struct ir3_instruction *in = block->inputs[i];
- if (in)
- schedule(ctx, in, true);
+ list_replace(&block->instr_list, &unscheduled_list);
+ list_inithead(&block->instr_list);
+ list_inithead(&prio_queue);
+
+ /* first a pre-pass to schedule all meta:input/phi instructions
+ * (which need to appear first so that RA knows the register is
+ * occupied:
+ */
+ list_for_each_entry_safe (struct ir3_instruction, instr, &unscheduled_list, node) {
+ if (is_meta(instr) && ((instr->opc == OPC_META_INPUT) ||
+ (instr->opc == OPC_META_PHI)))
+ schedule(ctx, instr);
+ }
+
+ while (!(list_empty(&unscheduled_list) &&
+ list_empty(&prio_queue))) {
+ struct ir3_sched_notes notes = {0};
+ unsigned delay;
+
+ delay = add_eligible_instrs(ctx, &notes, &prio_queue, &unscheduled_list);
+
+ if (!list_empty(&prio_queue)) {
+ struct ir3_instruction *instr = list_last_entry(&prio_queue,
+ struct ir3_instruction, node);
+ /* ugg, this is a bit ugly, but between the time when
+ * the instruction became eligible and now, a new
+ * conflict may have arose..
+ */
+ if (check_conflict(ctx, &notes, instr)) {
+ list_del(&instr->node);
+ list_addtail(&instr->node, &unscheduled_list);
+ continue;
+ }
+
+ schedule(ctx, instr);
+ } else if (delay == ~0) {
+ /* nothing available to schedule.. if we are blocked on
+ * address/predicate register conflict, then break the
+ * deadlock by cloning the instruction that wrote that
+ * reg:
+ */
+ if (notes.addr_conflict) {
+ split_addr(ctx);
+ } else if (notes.pred_conflict) {
+ split_pred(ctx);
+ } else {
+ debug_assert(0);
+ ctx->error = true;
+ return;
+ }
+ } else {
+ /* and if we run out of instructions that can be scheduled,
+ * then it is time for nop's:
+ */
+ debug_assert(delay <= 6);
+ while (delay > 0) {
+ ir3_NOP(block);
+ delay--;
+ }
}
}
- while ((instr = block->head) && !ctx->error) {
- /* NOTE: always grab next *before* trysched(), in case the
- * instruction is actually scheduled (and therefore moved
- * from depth list into scheduled list)
- */
- struct ir3_instruction *next = instr->next;
- int cnt = trysched(ctx, instr);
+ /* And lastly, insert branch/jump instructions to take us to
+ * the next block. Later we'll strip back out the branches
+ * that simply jump to next instruction.
+ */
+ if (block->successors[1]) {
+ /* if/else, conditional branches to "then" or "else": */
+ struct ir3_instruction *br;
+ unsigned delay = 6;
- if (cnt == DELAYED)
- cnt = block_sched_undelayed(ctx, block);
+ debug_assert(ctx->pred);
+ debug_assert(block->condition);
- /* -1 is signal to return up stack, but to us means same as 0: */
- cnt = MAX2(0, cnt);
- cnt += ctx->cnt;
- instr = next;
+ delay -= distance(ctx, ctx->pred, delay);
- /* if deepest remaining instruction cannot be scheduled, try
- * the increasingly more shallow instructions until needed
- * number of delay slots is filled:
- */
- while (instr && (cnt > ctx->cnt)) {
- next = instr->next;
- trysched(ctx, instr);
- instr = next;
+ while (delay > 0) {
+ ir3_NOP(block);
+ delay--;
}
- /* and if we run out of instructions that can be scheduled,
- * then it is time for nop's:
+ /* create "else" branch first (since "then" block should
+ * frequently/always end up being a fall-thru):
+ */
+ br = ir3_BR(block);
+ br->cat0.inv = true;
+ br->cat0.target = block->successors[1];
+
+ /* NOTE: we have to hard code delay of 6 above, since
+ * we want to insert the nop's before constructing the
+ * branch. Throw in an assert so we notice if this
+ * ever breaks on future generation:
*/
- while (cnt > ctx->cnt)
- schedule(ctx, ir3_instr_create(block, 0, OPC_NOP), false);
+ debug_assert(ir3_delayslots(ctx->pred, br, 0) == 6);
+
+ br = ir3_BR(block);
+ br->cat0.target = block->successors[0];
+
+ } else if (block->successors[0]) {
+ /* otherwise unconditional jump to next block: */
+ struct ir3_instruction *jmp;
+
+ jmp = ir3_JUMP(block);
+ jmp->cat0.target = block->successors[0];
}
- /* at this point, scheduled list is in reverse order, so fix that: */
- block->head = reverse(ctx->scheduled);
+ /* NOTE: if we kept track of the predecessors, we could do a better
+ * job w/ (jp) flags.. every node w/ > predecessor is a join point.
+ * Note that as we eliminate blocks which contain only an unconditional
+ * jump we probably need to propagate (jp) flag..
+ */
}
-int ir3_block_sched(struct ir3_block *block)
+/* this is needed to ensure later RA stage succeeds: */
+static void
+sched_insert_parallel_copies(struct ir3_block *block)
+{
+ list_for_each_entry (struct ir3_instruction, instr, &block->instr_list, node) {
+ if (is_meta(instr) && (instr->opc == OPC_META_PHI)) {
+ struct ir3_register *reg;
+ foreach_src(reg, instr) {
+ struct ir3_instruction *src = reg->instr;
+ struct ir3_instruction *mov =
+ ir3_MOV(src->block, src, TYPE_U32);
+ mov->regs[0]->flags |= IR3_REG_PHI_SRC;
+ mov->regs[0]->instr = instr;
+ reg->instr = mov;
+ }
+ }
+ }
+}
+
+int ir3_sched(struct ir3 *ir)
{
struct ir3_sched_ctx ctx = {0};
- ir3_clear_mark(block->shader);
- block_sched(&ctx, block);
+ list_for_each_entry (struct ir3_block, block, &ir->block_list, node) {
+ sched_insert_parallel_copies(block);
+ }
+ ir3_clear_mark(ir);
+ list_for_each_entry (struct ir3_block, block, &ir->block_list, node) {
+ sched_block(&ctx, block);
+ }
if (ctx.error)
return -1;
return 0;
diff --git a/src/gallium/drivers/freedreno/ir3/ir3_shader.c b/src/gallium/drivers/freedreno/ir3/ir3_shader.c
index 9bf4e64c7f1..b5b038100cc 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3_shader.c
+++ b/src/gallium/drivers/freedreno/ir3/ir3_shader.c
@@ -127,7 +127,7 @@ static void
assemble_variant(struct ir3_shader_variant *v)
{
struct fd_context *ctx = fd_context(v->shader->pctx);
- uint32_t gpu_id = ir3_shader_gpuid(v->shader);
+ uint32_t gpu_id = v->shader->compiler->gpu_id;
uint32_t sz, *bin;
bin = ir3_shader_assemble(v, gpu_id);
@@ -146,17 +146,6 @@ assemble_variant(struct ir3_shader_variant *v)
v->ir = NULL;
}
-/* reset before attempting to compile again.. */
-static void reset_variant(struct ir3_shader_variant *v, const char *msg)
-{
- debug_error(msg);
- v->inputs_count = 0;
- v->outputs_count = 0;
- v->total_in = 0;
- v->has_samp = false;
- v->immediates_count = 0;
-}
-
static struct ir3_shader_variant *
create_variant(struct ir3_shader *shader, struct ir3_shader_key key)
{
@@ -177,22 +166,7 @@ create_variant(struct ir3_shader *shader, struct ir3_shader_key key)
tgsi_dump(tokens, 0);
}
- if (fd_mesa_debug & FD_DBG_NIR) {
- ret = ir3_compile_shader_nir(v, tokens, key);
- if (ret)
- reset_variant(v, "NIR compiler failed, fallback to TGSI!");
- } else {
- ret = -1;
- }
-
- if (ret) {
- ret = ir3_compile_shader(v, tokens, key, true);
- if (ret) {
- reset_variant(v, "new compiler failed, trying without copy propagation!");
- ret = ir3_compile_shader(v, tokens, key, false);
- }
- }
-
+ ret = ir3_compile_shader_nir(shader->compiler, v, tokens, key);
if (ret) {
debug_error("compile failed!");
goto fail;
@@ -217,13 +191,6 @@ fail:
return NULL;
}
-uint32_t
-ir3_shader_gpuid(struct ir3_shader *shader)
-{
- struct fd_context *ctx = fd_context(shader->pctx);
- return ctx->screen->gpu_id;
-}
-
struct ir3_shader_variant *
ir3_shader_variant(struct ir3_shader *shader, struct ir3_shader_key key)
{
@@ -286,6 +253,7 @@ ir3_shader_create(struct pipe_context *pctx, const struct tgsi_token *tokens,
enum shader_t type)
{
struct ir3_shader *shader = CALLOC_STRUCT(ir3_shader);
+ shader->compiler = fd_context(pctx)->screen->compiler;
shader->pctx = pctx;
shader->type = type;
shader->tokens = tgsi_dup_tokens(tokens);
diff --git a/src/gallium/drivers/freedreno/ir3/ir3_shader.h b/src/gallium/drivers/freedreno/ir3/ir3_shader.h
index e5410bf88b2..9f1b0769180 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3_shader.h
+++ b/src/gallium/drivers/freedreno/ir3/ir3_shader.h
@@ -86,10 +86,6 @@ struct ir3_shader_key {
* shader:
*/
uint16_t fsaturate_s, fsaturate_t, fsaturate_r;
-
- /* bitmask of sampler which produces integer outputs:
- */
- uint16_t vinteger_s, finteger_s;
};
static inline bool
@@ -196,6 +192,8 @@ struct ir3_shader_variant {
struct ir3_shader {
enum shader_t type;
+ struct ir3_compiler *compiler;
+
struct pipe_context *pctx;
const struct tgsi_token *tokens;
@@ -212,7 +210,6 @@ void * ir3_shader_assemble(struct ir3_shader_variant *v, uint32_t gpu_id);
struct ir3_shader * ir3_shader_create(struct pipe_context *pctx,
const struct tgsi_token *tokens, enum shader_t type);
void ir3_shader_destroy(struct ir3_shader *shader);
-uint32_t ir3_shader_gpuid(struct ir3_shader *shader);
struct ir3_shader_variant * ir3_shader_variant(struct ir3_shader *shader,
struct ir3_shader_key key);
@@ -220,6 +217,8 @@ struct ir3_shader_variant * ir3_shader_variant(struct ir3_shader *shader,
* Helper/util:
*/
+#include "pipe/p_shader_tokens.h"
+
static inline int
ir3_find_output(const struct ir3_shader_variant *so, ir3_semantic semantic)
{
diff --git a/src/gallium/drivers/i915/i915_fpc_optimize.c b/src/gallium/drivers/i915/i915_fpc_optimize.c
index e0134a7c4ee..83bb64918d4 100644
--- a/src/gallium/drivers/i915/i915_fpc_optimize.c
+++ b/src/gallium/drivers/i915/i915_fpc_optimize.c
@@ -552,7 +552,7 @@ static boolean i915_fpc_useless_mov(union tgsi_full_token *tgsi_current)
if ( current.Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION &&
current.FullInstruction.Instruction.Opcode == TGSI_OPCODE_MOV &&
op_has_dst(current.FullInstruction.Instruction.Opcode) &&
- current.FullInstruction.Instruction.Saturate == TGSI_SAT_NONE &&
+ !current.FullInstruction.Instruction.Saturate &&
current.FullInstruction.Src[0].Register.Absolute == 0 &&
current.FullInstruction.Src[0].Register.Negate == 0 &&
is_unswizzled(&current.FullInstruction.Src[0], current.FullInstruction.Dst[0].Register.WriteMask) &&
@@ -582,7 +582,7 @@ static void i915_fpc_optimize_useless_mov_after_inst(struct i915_optimize_contex
next->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION &&
next->FullInstruction.Instruction.Opcode == TGSI_OPCODE_MOV &&
op_has_dst(current->FullInstruction.Instruction.Opcode) &&
- next->FullInstruction.Instruction.Saturate == TGSI_SAT_NONE &&
+ !next->FullInstruction.Instruction.Saturate &&
next->FullInstruction.Src[0].Register.Absolute == 0 &&
next->FullInstruction.Src[0].Register.Negate == 0 &&
unused_from(ctx, &current->FullInstruction.Dst[0], index) &&
diff --git a/src/gallium/drivers/i915/i915_fpc_translate.c b/src/gallium/drivers/i915/i915_fpc_translate.c
index b74f8239bb4..38a33888166 100644
--- a/src/gallium/drivers/i915/i915_fpc_translate.c
+++ b/src/gallium/drivers/i915/i915_fpc_translate.c
@@ -329,7 +329,7 @@ get_result_flags(const struct i915_full_instruction *inst)
= inst->Dst[0].Register.WriteMask;
uint flags = 0x0;
- if (inst->Instruction.Saturate == TGSI_SAT_ZERO_ONE)
+ if (inst->Instruction.Saturate)
flags |= A0_DEST_SATURATE;
if (writeMask & TGSI_WRITEMASK_X)
diff --git a/src/gallium/drivers/i915/i915_screen.c b/src/gallium/drivers/i915/i915_screen.c
index 7216160bb22..0590da07b9a 100644
--- a/src/gallium/drivers/i915/i915_screen.c
+++ b/src/gallium/drivers/i915/i915_screen.c
@@ -165,6 +165,7 @@ i915_get_shader_param(struct pipe_screen *screen, unsigned shader, enum pipe_sha
case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED:
case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED:
case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
+ case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE:
return 0;
default:
debug_printf("%s: Unknown cap %u.\n", __FUNCTION__, cap);
@@ -241,6 +242,7 @@ i915_get_param(struct pipe_screen *screen, enum pipe_cap cap)
case PIPE_CAP_POLYGON_OFFSET_CLAMP:
case PIPE_CAP_MULTISAMPLE_Z_RESOLVE:
case PIPE_CAP_RESOURCE_FROM_USER_MEMORY:
+ case PIPE_CAP_DEVICE_RESET_STATUS_QUERY:
return 0;
case PIPE_CAP_MAX_DUAL_SOURCE_RENDER_TARGETS:
diff --git a/src/gallium/drivers/ilo/Makefile.sources b/src/gallium/drivers/ilo/Makefile.sources
index 91a6f65f2e9..e1bbb9a0781 100644
--- a/src/gallium/drivers/ilo/Makefile.sources
+++ b/src/gallium/drivers/ilo/Makefile.sources
@@ -15,14 +15,34 @@ C_SOURCES := \
core/ilo_debug.h \
core/ilo_dev.c \
core/ilo_dev.h \
- core/ilo_format.c \
- core/ilo_format.h \
- core/ilo_fence.h \
core/ilo_image.c \
core/ilo_image.h \
- core/ilo_state_3d.h \
- core/ilo_state_3d_bottom.c \
- core/ilo_state_3d_top.c \
+ core/ilo_state_cc.c \
+ core/ilo_state_cc.h \
+ core/ilo_state_compute.c \
+ core/ilo_state_compute.h \
+ core/ilo_state_raster.c \
+ core/ilo_state_raster.h \
+ core/ilo_state_sampler.c \
+ core/ilo_state_sampler.h \
+ core/ilo_state_sbe.c \
+ core/ilo_state_sbe.h \
+ core/ilo_state_shader.c \
+ core/ilo_state_shader_ps.c \
+ core/ilo_state_shader.h \
+ core/ilo_state_sol.c \
+ core/ilo_state_sol.h \
+ core/ilo_state_surface.c \
+ core/ilo_state_surface_format.c \
+ core/ilo_state_surface.h \
+ core/ilo_state_urb.c \
+ core/ilo_state_urb.h \
+ core/ilo_state_vf.c \
+ core/ilo_state_vf.h \
+ core/ilo_state_viewport.c \
+ core/ilo_state_viewport.h \
+ core/ilo_state_zs.c \
+ core/ilo_state_zs.h \
core/intel_winsys.h \
ilo_blit.c \
ilo_blit.h \
@@ -38,6 +58,8 @@ C_SOURCES := \
ilo_cp.h \
ilo_draw.c \
ilo_draw.h \
+ ilo_format.c \
+ ilo_format.h \
ilo_gpgpu.c \
ilo_gpgpu.h \
ilo_public.h \
diff --git a/src/gallium/drivers/ilo/core/ilo_buffer.h b/src/gallium/drivers/ilo/core/ilo_buffer.h
index 50f97d10bd7..ca3c61ff890 100644
--- a/src/gallium/drivers/ilo/core/ilo_buffer.h
+++ b/src/gallium/drivers/ilo/core/ilo_buffer.h
@@ -31,11 +31,13 @@
#include "intel_winsys.h"
#include "ilo_core.h"
+#include "ilo_debug.h"
#include "ilo_dev.h"
struct ilo_buffer {
unsigned bo_size;
+ /* managed by users */
struct intel_bo *bo;
};
@@ -43,6 +45,8 @@ static inline void
ilo_buffer_init(struct ilo_buffer *buf, const struct ilo_dev *dev,
unsigned size, uint32_t bind, uint32_t flags)
{
+ assert(ilo_is_zeroed(buf, sizeof(*buf)));
+
buf->bo_size = size;
/*
@@ -55,36 +59,6 @@ ilo_buffer_init(struct ilo_buffer *buf, const struct ilo_dev *dev,
*/
if (bind & PIPE_BIND_SAMPLER_VIEW)
buf->bo_size = align(buf->bo_size, 256) + 16;
-
- if ((bind & PIPE_BIND_VERTEX_BUFFER) && ilo_dev_gen(dev) < ILO_GEN(7.5)) {
- /*
- * As noted in ilo_format_translate(), we treat some 3-component formats
- * as 4-component formats to work around hardware limitations. Imagine
- * the case where the vertex buffer holds a single
- * PIPE_FORMAT_R16G16B16_FLOAT vertex, and buf->bo_size is 6. The
- * hardware would fail to fetch it at boundary check because the vertex
- * buffer is expected to hold a PIPE_FORMAT_R16G16B16A16_FLOAT vertex
- * and that takes at least 8 bytes.
- *
- * For the workaround to work, we should add 2 to the bo size. But that
- * would waste a page when the bo size is already page aligned. Let's
- * round it to page size for now and revisit this when needed.
- */
- buf->bo_size = align(buf->bo_size, 4096);
- }
-}
-
-static inline void
-ilo_buffer_cleanup(struct ilo_buffer *buf)
-{
- intel_bo_unref(buf->bo);
-}
-
-static inline void
-ilo_buffer_set_bo(struct ilo_buffer *buf, struct intel_bo *bo)
-{
- intel_bo_unref(buf->bo);
- buf->bo = intel_bo_ref(bo);
}
#endif /* ILO_BUFFER_H */
diff --git a/src/gallium/drivers/ilo/core/ilo_builder.c b/src/gallium/drivers/ilo/core/ilo_builder.c
index 3c5eef9bcbc..4e05a3aca1e 100644
--- a/src/gallium/drivers/ilo/core/ilo_builder.c
+++ b/src/gallium/drivers/ilo/core/ilo_builder.c
@@ -333,7 +333,7 @@ ilo_builder_init(struct ilo_builder *builder,
{
int i;
- memset(builder, 0, sizeof(*builder));
+ assert(ilo_is_zeroed(builder, sizeof(*builder)));
builder->dev = dev;
builder->winsys = winsys;
diff --git a/src/gallium/drivers/ilo/core/ilo_builder_3d.h b/src/gallium/drivers/ilo/core/ilo_builder_3d.h
index 6cf1732ee1c..fb8b53cbe23 100644
--- a/src/gallium/drivers/ilo/core/ilo_builder_3d.h
+++ b/src/gallium/drivers/ilo/core/ilo_builder_3d.h
@@ -35,45 +35,45 @@
#include "ilo_builder_3d_top.h"
#include "ilo_builder_3d_bottom.h"
+struct gen6_3dprimitive_info {
+ enum gen_3dprim_type topology;
+ bool indexed;
+
+ uint32_t vertex_count;
+ uint32_t vertex_start;
+ uint32_t instance_count;
+ uint32_t instance_start;
+ int32_t vertex_base;
+};
+
static inline void
gen6_3DPRIMITIVE(struct ilo_builder *builder,
- const struct pipe_draw_info *info,
- const struct ilo_ib_state *ib)
+ const struct gen6_3dprimitive_info *info)
{
const uint8_t cmd_len = 6;
- const int prim = gen6_3d_translate_pipe_prim(info->mode);
- const int vb_access = (info->indexed) ?
- GEN6_3DPRIM_DW0_ACCESS_RANDOM : GEN6_3DPRIM_DW0_ACCESS_SEQUENTIAL;
- const uint32_t vb_start = info->start +
- ((info->indexed) ? ib->draw_start_offset : 0);
uint32_t *dw;
ILO_DEV_ASSERT(builder->dev, 6, 6);
ilo_builder_batch_pointer(builder, cmd_len, &dw);
- dw[0] = GEN6_RENDER_CMD(3D, 3DPRIMITIVE) |
- vb_access |
- prim << GEN6_3DPRIM_DW0_TYPE__SHIFT |
- (cmd_len - 2);
- dw[1] = info->count;
- dw[2] = vb_start;
+ dw[0] = GEN6_RENDER_CMD(3D, 3DPRIMITIVE) | (cmd_len - 2) |
+ info->topology << GEN6_3DPRIM_DW0_TYPE__SHIFT;
+ if (info->indexed)
+ dw[0] |= GEN6_3DPRIM_DW0_ACCESS_RANDOM;
+
+ dw[1] = info->vertex_count;
+ dw[2] = info->vertex_start;
dw[3] = info->instance_count;
- dw[4] = info->start_instance;
- dw[5] = info->index_bias;
+ dw[4] = info->instance_start;
+ dw[5] = info->vertex_base;
}
static inline void
gen7_3DPRIMITIVE(struct ilo_builder *builder,
- const struct pipe_draw_info *info,
- const struct ilo_ib_state *ib)
+ const struct gen6_3dprimitive_info *info)
{
const uint8_t cmd_len = 7;
- const int prim = gen6_3d_translate_pipe_prim(info->mode);
- const int vb_access = (info->indexed) ?
- GEN7_3DPRIM_DW1_ACCESS_RANDOM : GEN7_3DPRIM_DW1_ACCESS_SEQUENTIAL;
- const uint32_t vb_start = info->start +
- ((info->indexed) ? ib->draw_start_offset : 0);
uint32_t *dw;
ILO_DEV_ASSERT(builder->dev, 7, 8);
@@ -81,12 +81,16 @@ gen7_3DPRIMITIVE(struct ilo_builder *builder,
ilo_builder_batch_pointer(builder, cmd_len, &dw);
dw[0] = GEN6_RENDER_CMD(3D, 3DPRIMITIVE) | (cmd_len - 2);
- dw[1] = vb_access | prim;
- dw[2] = info->count;
- dw[3] = vb_start;
+
+ dw[1] = info->topology << GEN7_3DPRIM_DW1_TYPE__SHIFT;
+ if (info->indexed)
+ dw[1] |= GEN7_3DPRIM_DW1_ACCESS_RANDOM;
+
+ dw[2] = info->vertex_count;
+ dw[3] = info->vertex_start;
dw[4] = info->instance_count;
- dw[5] = info->start_instance;
- dw[6] = info->index_bias;
+ dw[5] = info->instance_start;
+ dw[6] = info->vertex_base;
}
#endif /* ILO_BUILDER_3D_H */
diff --git a/src/gallium/drivers/ilo/core/ilo_builder_3d_bottom.h b/src/gallium/drivers/ilo/core/ilo_builder_3d_bottom.h
index 16ec4afd15b..6d9e3699125 100644
--- a/src/gallium/drivers/ilo/core/ilo_builder_3d_bottom.h
+++ b/src/gallium/drivers/ilo/core/ilo_builder_3d_bottom.h
@@ -29,335 +29,121 @@
#define ILO_BUILDER_3D_BOTTOM_H
#include "genhw/genhw.h"
-#include "../ilo_shader.h"
#include "intel_winsys.h"
#include "ilo_core.h"
#include "ilo_dev.h"
-#include "ilo_format.h"
+#include "ilo_state_cc.h"
+#include "ilo_state_raster.h"
+#include "ilo_state_sbe.h"
+#include "ilo_state_shader.h"
+#include "ilo_state_viewport.h"
+#include "ilo_state_zs.h"
#include "ilo_builder.h"
#include "ilo_builder_3d_top.h"
static inline void
gen6_3DSTATE_CLIP(struct ilo_builder *builder,
- const struct ilo_rasterizer_state *rasterizer,
- const struct ilo_shader_state *fs,
- bool enable_guardband,
- int num_viewports)
-{
- const uint8_t cmd_len = 4;
- uint32_t dw1, dw2, dw3, *dw;
- int interps;
-
- ILO_DEV_ASSERT(builder->dev, 6, 8);
-
- dw1 = rasterizer->clip.payload[0];
- dw2 = rasterizer->clip.payload[1];
- dw3 = rasterizer->clip.payload[2];
-
- if (enable_guardband && rasterizer->clip.can_enable_guardband)
- dw2 |= GEN6_CLIP_DW2_GB_TEST_ENABLE;
-
- interps = (fs) ? ilo_shader_get_kernel_param(fs,
- ILO_KERNEL_FS_BARYCENTRIC_INTERPOLATIONS) : 0;
-
- if (interps & (GEN6_INTERP_NONPERSPECTIVE_PIXEL |
- GEN6_INTERP_NONPERSPECTIVE_CENTROID |
- GEN6_INTERP_NONPERSPECTIVE_SAMPLE))
- dw2 |= GEN6_CLIP_DW2_NONPERSPECTIVE_BARYCENTRIC_ENABLE;
-
- dw3 |= GEN6_CLIP_DW3_RTAINDEX_FORCED_ZERO |
- (num_viewports - 1);
-
- ilo_builder_batch_pointer(builder, cmd_len, &dw);
-
- dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_CLIP) | (cmd_len - 2);
- dw[1] = dw1;
- dw[2] = dw2;
- dw[3] = dw3;
-}
-
-static inline void
-gen6_disable_3DSTATE_CLIP(struct ilo_builder *builder)
+ const struct ilo_state_raster *rs)
{
const uint8_t cmd_len = 4;
uint32_t *dw;
- ILO_DEV_ASSERT(builder->dev, 6, 7.5);
+ ILO_DEV_ASSERT(builder->dev, 6, 8);
ilo_builder_batch_pointer(builder, cmd_len, &dw);
dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_CLIP) | (cmd_len - 2);
- dw[1] = 0;
- dw[2] = 0;
- dw[3] = 0;
-}
-
-static inline void
-gen7_internal_3dstate_sf(struct ilo_builder *builder,
- uint8_t cmd_len, uint32_t *dw,
- const struct ilo_rasterizer_sf *sf,
- int num_samples)
-{
- ILO_DEV_ASSERT(builder->dev, 6, 7.5);
-
- assert(cmd_len == 7);
-
- dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_SF) | (cmd_len - 2);
-
- if (!sf) {
- dw[1] = 0;
- dw[2] = (num_samples > 1) ? GEN7_SF_DW2_MSRASTMODE_ON_PATTERN : 0;
- dw[3] = 0;
- dw[4] = 0;
- dw[5] = 0;
- dw[6] = 0;
-
- return;
- }
-
- /* see rasterizer_init_sf_gen6() */
- STATIC_ASSERT(Elements(sf->payload) >= 3);
- dw[1] = sf->payload[0];
- dw[2] = sf->payload[1];
- dw[3] = sf->payload[2];
-
- if (num_samples > 1)
- dw[2] |= sf->dw_msaa;
-
- dw[4] = sf->dw_depth_offset_const;
- dw[5] = sf->dw_depth_offset_scale;
- dw[6] = sf->dw_depth_offset_clamp;
-}
-
-static inline void
-gen8_internal_3dstate_sbe(struct ilo_builder *builder,
- uint8_t cmd_len, uint32_t *dw,
- const struct ilo_shader_state *fs,
- int sprite_coord_mode)
-{
- const struct ilo_kernel_routing *routing;
- int vue_offset, vue_len, out_count;
-
- ILO_DEV_ASSERT(builder->dev, 6, 8);
-
- assert(cmd_len == 4);
-
- dw[0] = GEN7_RENDER_CMD(3D, 3DSTATE_SBE) | (cmd_len - 2);
-
- if (!fs) {
- dw[1] = 1 << GEN7_SBE_DW1_URB_READ_LEN__SHIFT;
- dw[2] = 0;
- dw[3] = 0;
- return;
- }
-
- routing = ilo_shader_get_kernel_routing(fs);
-
- vue_offset = routing->source_skip;
- assert(vue_offset % 2 == 0);
- vue_offset /= 2;
-
- vue_len = (routing->source_len + 1) / 2;
- if (!vue_len)
- vue_len = 1;
-
- out_count = ilo_shader_get_kernel_param(fs, ILO_KERNEL_INPUT_COUNT);
- assert(out_count <= 32);
-
- dw[1] = out_count << GEN7_SBE_DW1_ATTR_COUNT__SHIFT |
- vue_len << GEN7_SBE_DW1_URB_READ_LEN__SHIFT;
-
- if (ilo_dev_gen(builder->dev) >= ILO_GEN(8)) {
- dw[1] |= GEN8_SBE_DW1_USE_URB_READ_LEN |
- GEN8_SBE_DW1_USE_URB_READ_OFFSET |
- vue_offset << GEN8_SBE_DW1_URB_READ_OFFSET__SHIFT;
- } else {
- dw[1] |= vue_offset << GEN7_SBE_DW1_URB_READ_OFFSET__SHIFT;
- }
-
- if (routing->swizzle_enable)
- dw[1] |= GEN7_SBE_DW1_ATTR_SWIZZLE_ENABLE;
-
- switch (sprite_coord_mode) {
- case PIPE_SPRITE_COORD_UPPER_LEFT:
- dw[1] |= GEN7_SBE_DW1_POINT_SPRITE_TEXCOORD_UPPERLEFT;
- break;
- case PIPE_SPRITE_COORD_LOWER_LEFT:
- dw[1] |= GEN7_SBE_DW1_POINT_SPRITE_TEXCOORD_LOWERLEFT;
- break;
- }
-
- /*
- * From the Ivy Bridge PRM, volume 2 part 1, page 268:
- *
- * "This field (Point Sprite Texture Coordinate Enable) must be
- * programmed to 0 when non-point primitives are rendered."
- *
- * TODO We do not check that yet.
- */
- dw[2] = routing->point_sprite_enable;
-
- dw[3] = routing->const_interp_enable;
-}
-
-static inline void
-gen8_internal_3dstate_sbe_swiz(struct ilo_builder *builder,
- uint8_t cmd_len, uint32_t *dw,
- const struct ilo_shader_state *fs)
-{
- const struct ilo_kernel_routing *routing;
-
- ILO_DEV_ASSERT(builder->dev, 6, 8);
-
- assert(cmd_len == 11);
-
- dw[0] = GEN8_RENDER_CMD(3D, 3DSTATE_SBE_SWIZ) | (cmd_len - 2);
-
- if (!fs) {
- memset(&dw[1], 0, sizeof(*dw) * (cmd_len - 1));
- return;
- }
-
- routing = ilo_shader_get_kernel_routing(fs);
-
- STATIC_ASSERT(sizeof(routing->swizzles) >= sizeof(*dw) * 8);
- memcpy(&dw[1], routing->swizzles, sizeof(*dw) * 8);
-
- /* WrapShortest enables */
- dw[9] = 0;
- dw[10] = 0;
+ /* see raster_set_gen6_3DSTATE_CLIP() */
+ dw[1] = rs->clip[0];
+ dw[2] = rs->clip[1];
+ dw[3] = rs->clip[2];
}
static inline void
gen6_3DSTATE_SF(struct ilo_builder *builder,
- const struct ilo_rasterizer_state *rasterizer,
- const struct ilo_shader_state *fs,
- int sample_count)
+ const struct ilo_state_raster *rs,
+ const struct ilo_state_sbe *sbe)
{
const uint8_t cmd_len = 20;
- uint32_t gen8_3dstate_sbe[4], gen8_3dstate_sbe_swiz[11];
- uint32_t gen7_3dstate_sf[7];
- const struct ilo_rasterizer_sf *sf;
- int sprite_coord_mode;
uint32_t *dw;
ILO_DEV_ASSERT(builder->dev, 6, 6);
- sf = (rasterizer) ? &rasterizer->sf : NULL;
- sprite_coord_mode = (rasterizer) ? rasterizer->state.sprite_coord_mode : 0;
-
- gen8_internal_3dstate_sbe(builder, Elements(gen8_3dstate_sbe),
- gen8_3dstate_sbe, fs, sprite_coord_mode);
- gen8_internal_3dstate_sbe_swiz(builder, Elements(gen8_3dstate_sbe_swiz),
- gen8_3dstate_sbe_swiz, fs);
- gen7_internal_3dstate_sf(builder, Elements(gen7_3dstate_sf),
- gen7_3dstate_sf, sf, sample_count);
-
ilo_builder_batch_pointer(builder, cmd_len, &dw);
dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_SF) | (cmd_len - 2);
- dw[1] = gen8_3dstate_sbe[1];
- memcpy(&dw[2], &gen7_3dstate_sf[1], sizeof(*dw) * 6);
- memcpy(&dw[8], &gen8_3dstate_sbe_swiz[1], sizeof(*dw) * 8);
- dw[16] = gen8_3dstate_sbe[2];
- dw[17] = gen8_3dstate_sbe[3];
- dw[18] = gen8_3dstate_sbe_swiz[9];
- dw[19] = gen8_3dstate_sbe_swiz[10];
+ /* see sbe_set_gen8_3DSTATE_SBE() */
+ dw[1] = sbe->sbe[0];
+
+ /* see raster_set_gen7_3DSTATE_SF() */
+ dw[2] = rs->sf[0];
+ dw[3] = rs->sf[1];
+ dw[4] = rs->sf[2];
+ dw[5] = rs->raster[1];
+ dw[6] = rs->raster[2];
+ dw[7] = rs->raster[3];
+
+ /* see sbe_set_gen8_3DSTATE_SBE_SWIZ() */
+ memcpy(&dw[8], sbe->swiz, sizeof(*dw) * 8);
+
+ dw[16] = sbe->sbe[1];
+ dw[17] = sbe->sbe[2];
+ /* WrapShortest enables */
+ dw[18] = 0;
+ dw[19] = 0;
}
static inline void
gen7_3DSTATE_SF(struct ilo_builder *builder,
- const struct ilo_rasterizer_sf *sf,
- enum pipe_format zs_format,
- int sample_count)
+ const struct ilo_state_raster *rs)
{
- const uint8_t cmd_len = 7;
+ const uint8_t cmd_len = (ilo_dev_gen(builder->dev) >= ILO_GEN(8)) ? 4 : 7;
uint32_t *dw;
- ILO_DEV_ASSERT(builder->dev, 7, 7.5);
-
- ilo_builder_batch_pointer(builder, cmd_len, &dw);
-
- gen7_internal_3dstate_sf(builder, cmd_len, dw, sf, sample_count);
-
- if (ilo_dev_gen(builder->dev) >= ILO_GEN(7)) {
- int hw_format;
-
- /* separate stencil */
- switch (zs_format) {
- case PIPE_FORMAT_Z16_UNORM:
- hw_format = GEN6_ZFORMAT_D16_UNORM;
- break;
- case PIPE_FORMAT_Z32_FLOAT:
- case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
- hw_format = GEN6_ZFORMAT_D32_FLOAT;
- break;
- case PIPE_FORMAT_Z24X8_UNORM:
- case PIPE_FORMAT_Z24_UNORM_S8_UINT:
- hw_format = GEN6_ZFORMAT_D24_UNORM_X8_UINT;
- break;
- default:
- /* FLOAT surface is assumed when there is no depth buffer */
- hw_format = GEN6_ZFORMAT_D32_FLOAT;
- break;
- }
-
- dw[1] |= hw_format << GEN7_SF_DW1_DEPTH_FORMAT__SHIFT;
- }
-}
-
-static inline void
-gen8_3DSTATE_SF(struct ilo_builder *builder,
- const struct ilo_rasterizer_sf *sf)
-{
- const uint8_t cmd_len = 4;
- uint32_t *dw;
-
- ILO_DEV_ASSERT(builder->dev, 8, 8);
+ ILO_DEV_ASSERT(builder->dev, 7, 8);
ilo_builder_batch_pointer(builder, cmd_len, &dw);
dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_SF) | (cmd_len - 2);
- /* see rasterizer_init_sf_gen8() */
- STATIC_ASSERT(Elements(sf->payload) >= 3);
- dw[1] = sf->payload[0];
- dw[2] = sf->payload[1];
- dw[3] = sf->payload[2];
+ /* see raster_set_gen7_3DSTATE_SF() or raster_set_gen8_3DSTATE_SF() */
+ dw[1] = rs->sf[0];
+ dw[2] = rs->sf[1];
+ dw[3] = rs->sf[2];
+ if (ilo_dev_gen(builder->dev) < ILO_GEN(8)) {
+ dw[4] = rs->raster[1];
+ dw[5] = rs->raster[2];
+ dw[6] = rs->raster[3];
+ }
}
static inline void
gen7_3DSTATE_SBE(struct ilo_builder *builder,
- const struct ilo_shader_state *fs,
- int sprite_coord_mode)
+ const struct ilo_state_sbe *sbe)
{
const uint8_t cmd_len = 14;
- uint32_t gen8_3dstate_sbe[4], gen8_3dstate_sbe_swiz[11];
uint32_t *dw;
ILO_DEV_ASSERT(builder->dev, 7, 7.5);
- gen8_internal_3dstate_sbe(builder, Elements(gen8_3dstate_sbe),
- gen8_3dstate_sbe, fs, sprite_coord_mode);
- gen8_internal_3dstate_sbe_swiz(builder, Elements(gen8_3dstate_sbe_swiz),
- gen8_3dstate_sbe_swiz, fs);
-
ilo_builder_batch_pointer(builder, cmd_len, &dw);
dw[0] = GEN7_RENDER_CMD(3D, 3DSTATE_SBE) | (cmd_len - 2);
- dw[1] = gen8_3dstate_sbe[1];
- memcpy(&dw[2], &gen8_3dstate_sbe_swiz[1], sizeof(*dw) * 8);
- dw[10] = gen8_3dstate_sbe[2];
- dw[11] = gen8_3dstate_sbe[3];
- dw[12] = gen8_3dstate_sbe_swiz[9];
- dw[13] = gen8_3dstate_sbe_swiz[10];
+ /* see sbe_set_gen8_3DSTATE_SBE() and sbe_set_gen8_3DSTATE_SBE_SWIZ() */
+ dw[1] = sbe->sbe[0];
+ memcpy(&dw[2], sbe->swiz, sizeof(*dw) * 8);
+ dw[10] = sbe->sbe[1];
+ dw[11] = sbe->sbe[2];
+
+ /* WrapShortest enables */
+ dw[12] = 0;
+ dw[13] = 0;
}
static inline void
gen8_3DSTATE_SBE(struct ilo_builder *builder,
- const struct ilo_shader_state *fs,
- int sprite_coord_mode)
+ const struct ilo_state_sbe *sbe)
{
const uint8_t cmd_len = 4;
uint32_t *dw;
@@ -366,12 +152,16 @@ gen8_3DSTATE_SBE(struct ilo_builder *builder,
ilo_builder_batch_pointer(builder, cmd_len, &dw);
- gen8_internal_3dstate_sbe(builder, cmd_len, dw, fs, sprite_coord_mode);
+ /* see sbe_set_gen8_3DSTATE_SBE() */
+ dw[0] = GEN7_RENDER_CMD(3D, 3DSTATE_SBE) | (cmd_len - 2);
+ dw[1] = sbe->sbe[0];
+ dw[2] = sbe->sbe[1];
+ dw[3] = sbe->sbe[2];
}
static inline void
gen8_3DSTATE_SBE_SWIZ(struct ilo_builder *builder,
- const struct ilo_shader_state *fs)
+ const struct ilo_state_sbe *sbe)
{
const uint8_t cmd_len = 11;
uint32_t *dw;
@@ -380,12 +170,17 @@ gen8_3DSTATE_SBE_SWIZ(struct ilo_builder *builder,
ilo_builder_batch_pointer(builder, cmd_len, &dw);
- gen8_internal_3dstate_sbe_swiz(builder, cmd_len, dw, fs);
+ dw[0] = GEN8_RENDER_CMD(3D, 3DSTATE_SBE_SWIZ) | (cmd_len - 2);
+ /* see sbe_set_gen8_3DSTATE_SBE_SWIZ() */
+ memcpy(&dw[1], sbe->swiz, sizeof(*dw) * 8);
+ /* WrapShortest enables */
+ dw[9] = 0;
+ dw[10] = 0;
}
static inline void
gen8_3DSTATE_RASTER(struct ilo_builder *builder,
- const struct ilo_rasterizer_sf *sf)
+ const struct ilo_state_raster *rs)
{
const uint8_t cmd_len = 5;
uint32_t *dw;
@@ -395,232 +190,108 @@ gen8_3DSTATE_RASTER(struct ilo_builder *builder,
ilo_builder_batch_pointer(builder, cmd_len, &dw);
dw[0] = GEN8_RENDER_CMD(3D, 3DSTATE_RASTER) | (cmd_len - 2);
- dw[1] = sf->dw_raster;
- dw[2] = sf->dw_depth_offset_const;
- dw[3] = sf->dw_depth_offset_scale;
- dw[4] = sf->dw_depth_offset_clamp;
+ /* see raster_set_gen8_3DSTATE_RASTER() */
+ dw[1] = rs->raster[0];
+ dw[2] = rs->raster[1];
+ dw[3] = rs->raster[2];
+ dw[4] = rs->raster[3];
}
static inline void
gen6_3DSTATE_WM(struct ilo_builder *builder,
- const struct ilo_shader_state *fs,
- const struct ilo_rasterizer_state *rasterizer,
- bool dual_blend, bool cc_may_kill)
+ const struct ilo_state_raster *rs,
+ const struct ilo_state_ps *ps,
+ uint32_t kernel_offset)
{
const uint8_t cmd_len = 9;
- const int num_samples = 1;
- const struct ilo_shader_cso *cso;
- uint32_t dw2, dw4, dw5, dw6, *dw;
+ uint32_t *dw;
ILO_DEV_ASSERT(builder->dev, 6, 6);
- cso = ilo_shader_get_kernel_cso(fs);
- dw2 = cso->payload[0];
- dw4 = cso->payload[1];
- dw5 = cso->payload[2];
- dw6 = cso->payload[3];
-
- /*
- * From the Sandy Bridge PRM, volume 2 part 1, page 248:
- *
- * "This bit (Statistics Enable) must be disabled if either of these
- * bits is set: Depth Buffer Clear , Hierarchical Depth Buffer Resolve
- * Enable or Depth Buffer Resolve Enable."
- */
- dw4 |= GEN6_WM_DW4_STATISTICS;
-
- if (cc_may_kill)
- dw5 |= GEN6_WM_DW5_PS_KILL_PIXEL | GEN6_WM_DW5_PS_DISPATCH_ENABLE;
-
- if (dual_blend)
- dw5 |= GEN6_WM_DW5_PS_DUAL_SOURCE_BLEND;
-
- dw5 |= rasterizer->wm.payload[0];
-
- dw6 |= rasterizer->wm.payload[1];
-
- if (num_samples > 1) {
- dw6 |= rasterizer->wm.dw_msaa_rast |
- rasterizer->wm.dw_msaa_disp;
- }
-
ilo_builder_batch_pointer(builder, cmd_len, &dw);
dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_WM) | (cmd_len - 2);
- dw[1] = ilo_shader_get_kernel_offset(fs);
- dw[2] = dw2;
- dw[3] = 0; /* scratch */
- dw[4] = dw4;
- dw[5] = dw5;
- dw[6] = dw6;
+ dw[1] = kernel_offset;
+ /* see raster_set_gen6_3dstate_wm() and ps_set_gen6_3dstate_wm() */
+ dw[2] = ps->ps[0];
+ dw[3] = ps->ps[1];
+ dw[4] = rs->wm[0] | ps->ps[2];
+ dw[5] = rs->wm[1] | ps->ps[3];
+ dw[6] = rs->wm[2] | ps->ps[4];
dw[7] = 0; /* kernel 1 */
dw[8] = 0; /* kernel 2 */
}
static inline void
-gen6_hiz_3DSTATE_WM(struct ilo_builder *builder, uint32_t hiz_op)
-{
- const uint8_t cmd_len = 9;
- const int max_threads = (builder->dev->gt == 2) ? 80 : 40;
- uint32_t *dw;
-
- ILO_DEV_ASSERT(builder->dev, 6, 6);
-
- ilo_builder_batch_pointer(builder, cmd_len, &dw);
-
- dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_WM) | (cmd_len - 2);
- dw[1] = 0;
- dw[2] = 0;
- dw[3] = 0;
- dw[4] = hiz_op;
- /* honor the valid range even if dispatching is disabled */
- dw[5] = (max_threads - 1) << GEN6_WM_DW5_MAX_THREADS__SHIFT;
- dw[6] = 0;
- dw[7] = 0;
- dw[8] = 0;
-}
-
-static inline void
gen7_3DSTATE_WM(struct ilo_builder *builder,
- const struct ilo_shader_state *fs,
- const struct ilo_rasterizer_state *rasterizer,
- bool cc_may_kill)
+ const struct ilo_state_raster *rs,
+ const struct ilo_state_ps *ps)
{
const uint8_t cmd_len = 3;
- const int num_samples = 1;
- const struct ilo_shader_cso *cso;
- uint32_t dw1, dw2, *dw;
+ uint32_t *dw;
ILO_DEV_ASSERT(builder->dev, 7, 7.5);
- /* see rasterizer_init_wm_gen7() */
- dw1 = rasterizer->wm.payload[0];
- dw2 = rasterizer->wm.payload[1];
-
- /* see fs_init_cso_gen7() */
- cso = ilo_shader_get_kernel_cso(fs);
- dw1 |= cso->payload[3];
-
- dw1 |= GEN7_WM_DW1_STATISTICS;
-
- if (cc_may_kill)
- dw1 |= GEN7_WM_DW1_PS_DISPATCH_ENABLE | GEN7_WM_DW1_PS_KILL_PIXEL;
-
- if (num_samples > 1) {
- dw1 |= rasterizer->wm.dw_msaa_rast;
- dw2 |= rasterizer->wm.dw_msaa_disp;
- }
-
ilo_builder_batch_pointer(builder, cmd_len, &dw);
dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_WM) | (cmd_len - 2);
- dw[1] = dw1;
- dw[2] = dw2;
+ /* see raster_set_gen8_3DSTATE_WM() and ps_set_gen7_3dstate_wm() */
+ dw[1] = rs->wm[0] | ps->ps[0];
+ dw[2] = ps->ps[1];
}
static inline void
gen8_3DSTATE_WM(struct ilo_builder *builder,
- const struct ilo_shader_state *fs,
- const struct ilo_rasterizer_state *rasterizer)
+ const struct ilo_state_raster *rs)
{
const uint8_t cmd_len = 2;
- const struct ilo_shader_cso *cso;
- uint32_t dw1, interps, *dw;
+ uint32_t *dw;
ILO_DEV_ASSERT(builder->dev, 8, 8);
- /* see rasterizer_get_wm_gen8() */
- dw1 = rasterizer->wm.payload[0];
- dw1 |= GEN7_WM_DW1_STATISTICS;
-
- /* see fs_init_cso_gen8() */
- cso = ilo_shader_get_kernel_cso(fs);
- interps = cso->payload[4];
-
- assert(!(dw1 & interps));
-
ilo_builder_batch_pointer(builder, cmd_len, &dw);
dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_WM) | (cmd_len - 2);
- dw[1] = dw1 | interps;
-}
-
-static inline void
-gen7_hiz_3DSTATE_WM(struct ilo_builder *builder, uint32_t hiz_op)
-{
- const uint8_t cmd_len = 3;
- uint32_t *dw;
-
- ILO_DEV_ASSERT(builder->dev, 7, 7.5);
-
- ilo_builder_batch_pointer(builder, cmd_len, &dw);
- dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_WM) | (cmd_len - 2);
- dw[1] = hiz_op;
- dw[2] = 0;
+ /* see raster_set_gen8_3DSTATE_WM() */
+ dw[1] = rs->wm[0];
}
static inline void
gen8_3DSTATE_WM_DEPTH_STENCIL(struct ilo_builder *builder,
- const struct ilo_dsa_state *dsa)
+ const struct ilo_state_cc *cc)
{
const uint8_t cmd_len = 3;
- uint32_t dw1, dw2, *dw;
+ uint32_t *dw;
ILO_DEV_ASSERT(builder->dev, 8, 8);
- dw1 = dsa->payload[0];
- dw2 = dsa->payload[1];
-
ilo_builder_batch_pointer(builder, cmd_len, &dw);
dw[0] = GEN8_RENDER_CMD(3D, 3DSTATE_WM_DEPTH_STENCIL) | (cmd_len - 2);
- dw[1] = dw1;
- dw[2] = dw2;
+ /* see cc_set_gen8_3DSTATE_WM_DEPTH_STENCIL() */
+ dw[1] = cc->ds[0];
+ dw[2] = cc->ds[1];
}
static inline void
-gen8_3DSTATE_WM_HZ_OP(struct ilo_builder *builder, uint32_t op,
- uint16_t width, uint16_t height, int sample_count)
+gen8_3DSTATE_WM_HZ_OP(struct ilo_builder *builder,
+ const struct ilo_state_raster *rs,
+ uint16_t width, uint16_t height)
{
const uint8_t cmd_len = 5;
- const uint32_t sample_mask = ((1 << sample_count) - 1) | 0x1;
- uint32_t dw1, *dw;
+ uint32_t *dw;
ILO_DEV_ASSERT(builder->dev, 8, 8);
- dw1 = op;
-
- switch (sample_count) {
- case 0:
- case 1:
- dw1 |= GEN8_WM_HZ_DW1_NUMSAMPLES_1;
- break;
- case 2:
- dw1 |= GEN8_WM_HZ_DW1_NUMSAMPLES_2;
- break;
- case 4:
- dw1 |= GEN8_WM_HZ_DW1_NUMSAMPLES_4;
- break;
- case 8:
- dw1 |= GEN8_WM_HZ_DW1_NUMSAMPLES_8;
- break;
- case 16:
- dw1 |= GEN8_WM_HZ_DW1_NUMSAMPLES_16;
- break;
- default:
- assert(!"unsupported sample count");
- dw1 |= GEN8_WM_HZ_DW1_NUMSAMPLES_1;
- break;
- }
-
ilo_builder_batch_pointer(builder, cmd_len, &dw);
dw[0] = GEN8_RENDER_CMD(3D, 3DSTATE_WM_HZ_OP) | (cmd_len - 2);
- dw[1] = dw1;
+ /* see raster_set_gen8_3dstate_wm_hz_op() */
+ dw[1] = rs->wm[1];
dw[2] = 0;
- /* exclusive? */
+ /* exclusive */
dw[3] = height << 16 | width;
- dw[4] = sample_mask;
+ dw[4] = rs->wm[2];
}
static inline void
@@ -656,100 +327,48 @@ gen8_3DSTATE_WM_CHROMAKEY(struct ilo_builder *builder)
static inline void
gen7_3DSTATE_PS(struct ilo_builder *builder,
- const struct ilo_shader_state *fs,
- bool dual_blend)
+ const struct ilo_state_ps *ps,
+ uint32_t kernel_offset)
{
const uint8_t cmd_len = 8;
- const struct ilo_shader_cso *cso;
- uint32_t dw2, dw4, dw5, *dw;
+ uint32_t *dw;
ILO_DEV_ASSERT(builder->dev, 7, 7.5);
- /* see fs_init_cso_gen7() */
- cso = ilo_shader_get_kernel_cso(fs);
- dw2 = cso->payload[0];
- dw4 = cso->payload[1];
- dw5 = cso->payload[2];
-
- if (dual_blend)
- dw4 |= GEN7_PS_DW4_DUAL_SOURCE_BLEND;
-
ilo_builder_batch_pointer(builder, cmd_len, &dw);
dw[0] = GEN7_RENDER_CMD(3D, 3DSTATE_PS) | (cmd_len - 2);
- dw[1] = ilo_shader_get_kernel_offset(fs);
- dw[2] = dw2;
- dw[3] = 0; /* scratch */
- dw[4] = dw4;
- dw[5] = dw5;
+ dw[1] = kernel_offset;
+ /* see ps_set_gen7_3DSTATE_PS() */
+ dw[2] = ps->ps[2];
+ dw[3] = ps->ps[3];
+ dw[4] = ps->ps[4];
+ dw[5] = ps->ps[5];
dw[6] = 0; /* kernel 1 */
dw[7] = 0; /* kernel 2 */
}
static inline void
-gen7_disable_3DSTATE_PS(struct ilo_builder *builder)
-{
- const uint8_t cmd_len = 8;
- int max_threads;
- uint32_t dw4, *dw;
-
- ILO_DEV_ASSERT(builder->dev, 7, 7.5);
-
- /* GPU hangs if none of the dispatch enable bits is set */
- dw4 = GEN6_PS_DISPATCH_8 << GEN7_PS_DW4_DISPATCH_MODE__SHIFT;
-
- /* see brwCreateContext() */
- switch (ilo_dev_gen(builder->dev)) {
- case ILO_GEN(7.5):
- max_threads = (builder->dev->gt == 3) ? 408 :
- (builder->dev->gt == 2) ? 204 : 102;
- dw4 |= (max_threads - 1) << GEN75_PS_DW4_MAX_THREADS__SHIFT;
- break;
- case ILO_GEN(7):
- default:
- max_threads = (builder->dev->gt == 2) ? 172 : 48;
- dw4 |= (max_threads - 1) << GEN7_PS_DW4_MAX_THREADS__SHIFT;
- break;
- }
-
- ilo_builder_batch_pointer(builder, cmd_len, &dw);
-
- dw[0] = GEN7_RENDER_CMD(3D, 3DSTATE_PS) | (cmd_len - 2);
- dw[1] = 0;
- dw[2] = 0;
- dw[3] = 0;
- dw[4] = dw4;
- dw[5] = 0;
- dw[6] = 0;
- dw[7] = 0;
-}
-
-static inline void
gen8_3DSTATE_PS(struct ilo_builder *builder,
- const struct ilo_shader_state *fs)
+ const struct ilo_state_ps *ps,
+ uint32_t kernel_offset)
{
const uint8_t cmd_len = 12;
- const struct ilo_shader_cso *cso;
- uint32_t dw3, dw6, dw7, *dw;
+ uint32_t *dw;
ILO_DEV_ASSERT(builder->dev, 8, 8);
- /* see fs_init_cso_gen8() */
- cso = ilo_shader_get_kernel_cso(fs);
- dw3 = cso->payload[0];
- dw6 = cso->payload[1];
- dw7 = cso->payload[2];
-
ilo_builder_batch_pointer(builder, cmd_len, &dw);
dw[0] = GEN7_RENDER_CMD(3D, 3DSTATE_PS) | (cmd_len - 2);
- dw[1] = ilo_shader_get_kernel_offset(fs);
+ dw[1] = kernel_offset;
dw[2] = 0;
- dw[3] = dw3;
- dw[4] = 0; /* scratch */
+ /* see ps_set_gen8_3DSTATE_PS() */
+ dw[3] = ps->ps[0];
+ dw[4] = ps->ps[1];
dw[5] = 0;
- dw[6] = dw6;
- dw[7] = dw7;
+ dw[6] = ps->ps[2];
+ dw[7] = ps->ps[3];
dw[8] = 0; /* kernel 1 */
dw[9] = 0;
dw[10] = 0; /* kernel 2 */
@@ -758,66 +377,34 @@ gen8_3DSTATE_PS(struct ilo_builder *builder,
static inline void
gen8_3DSTATE_PS_EXTRA(struct ilo_builder *builder,
- const struct ilo_shader_state *fs,
- bool cc_may_kill, bool per_sample)
+ const struct ilo_state_ps *ps)
{
const uint8_t cmd_len = 2;
- const struct ilo_shader_cso *cso;
- uint32_t dw1, *dw;
+ uint32_t *dw;
ILO_DEV_ASSERT(builder->dev, 8, 8);
- /* see fs_init_cso_gen8() */
- cso = ilo_shader_get_kernel_cso(fs);
- dw1 = cso->payload[3];
-
- if (cc_may_kill)
- dw1 |= GEN8_PSX_DW1_DISPATCH_ENABLE | GEN8_PSX_DW1_KILL_PIXEL;
- if (per_sample)
- dw1 |= GEN8_PSX_DW1_PER_SAMPLE;
-
ilo_builder_batch_pointer(builder, cmd_len, &dw);
dw[0] = GEN8_RENDER_CMD(3D, 3DSTATE_PS_EXTRA) | (cmd_len - 2);
- dw[1] = dw1;
+ /* see ps_set_gen8_3DSTATE_PS_EXTRA() */
+ dw[1] = ps->ps[4];
}
static inline void
gen8_3DSTATE_PS_BLEND(struct ilo_builder *builder,
- const struct ilo_blend_state *blend,
- const struct ilo_fb_state *fb,
- const struct ilo_dsa_state *dsa)
+ const struct ilo_state_cc *cc)
{
const uint8_t cmd_len = 2;
- uint32_t dw1, *dw;
+ uint32_t *dw;
ILO_DEV_ASSERT(builder->dev, 8, 8);
- dw1 = 0;
- if (blend->alpha_to_coverage && fb->num_samples > 1)
- dw1 |= GEN8_PS_BLEND_DW1_ALPHA_TO_COVERAGE;
-
- if (fb->state.nr_cbufs && fb->state.cbufs[0]) {
- const struct ilo_fb_blend_caps *caps = &fb->blend_caps[0];
-
- dw1 |= GEN8_PS_BLEND_DW1_WRITABLE_RT;
- if (caps->can_blend) {
- if (caps->dst_alpha_forced_one)
- dw1 |= blend->dw_ps_blend_dst_alpha_forced_one;
- else
- dw1 |= blend->dw_ps_blend;
- }
-
- if (caps->can_alpha_test)
- dw1 |= dsa->dw_ps_blend_alpha;
- } else {
- dw1 |= dsa->dw_ps_blend_alpha;
- }
-
ilo_builder_batch_pointer(builder, cmd_len, &dw);
dw[0] = GEN8_RENDER_CMD(3D, 3DSTATE_PS_BLEND) | (cmd_len - 2);
- dw[1] = dw1;
+ /* see cc_set_gen8_3DSTATE_PS_BLEND() */
+ dw[1] = cc->blend[0];
}
static inline void
@@ -862,101 +449,49 @@ gen7_3DSTATE_SAMPLER_STATE_POINTERS_PS(struct ilo_builder *builder,
static inline void
gen6_3DSTATE_MULTISAMPLE(struct ilo_builder *builder,
- int num_samples, const uint32_t *pattern,
- bool pixel_location_center)
+ const struct ilo_state_raster *rs,
+ const struct ilo_state_sample_pattern *pattern,
+ uint8_t sample_count)
{
const uint8_t cmd_len = (ilo_dev_gen(builder->dev) >= ILO_GEN(7)) ? 4 : 3;
- uint32_t dw1, dw2, dw3, *dw;
+ const uint32_t *packed = (const uint32_t *)
+ ilo_state_sample_pattern_get_packed_offsets(pattern,
+ builder->dev, sample_count);
+ uint32_t *dw;
ILO_DEV_ASSERT(builder->dev, 6, 7.5);
- dw1 = (pixel_location_center) ? GEN6_MULTISAMPLE_DW1_PIXLOC_CENTER :
- GEN6_MULTISAMPLE_DW1_PIXLOC_UL_CORNER;
-
- switch (num_samples) {
- case 0:
- case 1:
- dw1 |= GEN6_MULTISAMPLE_DW1_NUMSAMPLES_1;
- dw2 = 0;
- dw3 = 0;
- break;
- case 4:
- dw1 |= GEN6_MULTISAMPLE_DW1_NUMSAMPLES_4;
- dw2 = pattern[0];
- dw3 = 0;
- break;
- case 8:
- assert(ilo_dev_gen(builder->dev) >= ILO_GEN(7));
- dw1 |= GEN7_MULTISAMPLE_DW1_NUMSAMPLES_8;
- dw2 = pattern[0];
- dw3 = pattern[1];
- break;
- default:
- assert(!"unsupported sample count");
- dw1 |= GEN6_MULTISAMPLE_DW1_NUMSAMPLES_1;
- dw2 = 0;
- dw3 = 0;
- break;
- }
-
ilo_builder_batch_pointer(builder, cmd_len, &dw);
dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_MULTISAMPLE) | (cmd_len - 2);
- dw[1] = dw1;
- dw[2] = dw2;
+ /* see raster_set_gen8_3DSTATE_MULTISAMPLE() */
+ dw[1] = rs->sample[0];
+
+ /* see sample_pattern_set_gen8_3DSTATE_SAMPLE_PATTERN() */
+ dw[2] = (sample_count >= 4) ? packed[0] : 0;
if (ilo_dev_gen(builder->dev) >= ILO_GEN(7))
- dw[3] = dw3;
+ dw[3] = (sample_count >= 8) ? packed[1] : 0;
}
static inline void
gen8_3DSTATE_MULTISAMPLE(struct ilo_builder *builder,
- int num_samples,
- bool pixel_location_center)
+ const struct ilo_state_raster *rs)
{
const uint8_t cmd_len = 2;
- uint32_t dw1, *dw;
+ uint32_t *dw;
ILO_DEV_ASSERT(builder->dev, 8, 8);
- dw1 = (pixel_location_center) ? GEN6_MULTISAMPLE_DW1_PIXLOC_CENTER :
- GEN6_MULTISAMPLE_DW1_PIXLOC_UL_CORNER;
-
- switch (num_samples) {
- case 0:
- case 1:
- dw1 |= GEN6_MULTISAMPLE_DW1_NUMSAMPLES_1;
- break;
- case 2:
- dw1 |= GEN8_MULTISAMPLE_DW1_NUMSAMPLES_2;
- break;
- case 4:
- dw1 |= GEN6_MULTISAMPLE_DW1_NUMSAMPLES_4;
- break;
- case 8:
- dw1 |= GEN7_MULTISAMPLE_DW1_NUMSAMPLES_8;
- break;
- case 16:
- dw1 |= GEN8_MULTISAMPLE_DW1_NUMSAMPLES_16;
- break;
- default:
- assert(!"unsupported sample count");
- dw1 |= GEN6_MULTISAMPLE_DW1_NUMSAMPLES_1;
- break;
- }
-
ilo_builder_batch_pointer(builder, cmd_len, &dw);
dw[0] = GEN8_RENDER_CMD(3D, 3DSTATE_MULTISAMPLE) | (cmd_len - 2);
- dw[1] = dw1;
+ /* see raster_set_gen8_3DSTATE_MULTISAMPLE() */
+ dw[1] = rs->sample[0];
}
static inline void
gen8_3DSTATE_SAMPLE_PATTERN(struct ilo_builder *builder,
- const uint32_t *pattern_1x,
- const uint32_t *pattern_2x,
- const uint32_t *pattern_4x,
- const uint32_t *pattern_8x,
- const uint32_t *pattern_16x)
+ const struct ilo_state_sample_pattern *pattern)
{
const uint8_t cmd_len = 9;
uint32_t *dw;
@@ -966,61 +501,32 @@ gen8_3DSTATE_SAMPLE_PATTERN(struct ilo_builder *builder,
ilo_builder_batch_pointer(builder, cmd_len, &dw);
dw[0] = GEN8_RENDER_CMD(3D, 3DSTATE_SAMPLE_PATTERN) | (cmd_len - 2);
- dw[1] = pattern_16x[3];
- dw[2] = pattern_16x[2];
- dw[3] = pattern_16x[1];
- dw[4] = pattern_16x[0];
- dw[5] = pattern_8x[1];
- dw[6] = pattern_8x[0];
- dw[7] = pattern_4x[0];
- dw[8] = pattern_1x[0] << 16 |
- pattern_2x[0];
+ dw[1] = 0;
+ dw[2] = 0;
+ dw[3] = 0;
+ dw[4] = 0;
+ /* see sample_pattern_set_gen8_3DSTATE_SAMPLE_PATTERN() */
+ dw[5] = ((const uint32_t *) pattern->pattern_8x)[1];
+ dw[6] = ((const uint32_t *) pattern->pattern_8x)[0];
+ dw[7] = ((const uint32_t *) pattern->pattern_4x)[0];
+ dw[8] = pattern->pattern_1x[0] << 16 |
+ ((const uint16_t *) pattern->pattern_2x)[0];
}
static inline void
gen6_3DSTATE_SAMPLE_MASK(struct ilo_builder *builder,
- unsigned sample_mask)
+ const struct ilo_state_raster *rs)
{
const uint8_t cmd_len = 2;
- const unsigned valid_mask = 0xf;
uint32_t *dw;
- ILO_DEV_ASSERT(builder->dev, 6, 6);
-
- sample_mask &= valid_mask;
-
- ilo_builder_batch_pointer(builder, cmd_len, &dw);
-
- dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_SAMPLE_MASK) | (cmd_len - 2);
- dw[1] = sample_mask;
-}
-
-static inline void
-gen7_3DSTATE_SAMPLE_MASK(struct ilo_builder *builder,
- unsigned sample_mask,
- int num_samples)
-{
- const uint8_t cmd_len = 2;
- const unsigned valid_mask = ((1 << num_samples) - 1) | 0x1;
- uint32_t *dw;
-
- ILO_DEV_ASSERT(builder->dev, 7, 8);
-
- /*
- * From the Ivy Bridge PRM, volume 2 part 1, page 294:
- *
- * "If Number of Multisamples is NUMSAMPLES_1, bits 7:1 of this field
- * (Sample Mask) must be zero.
- *
- * If Number of Multisamples is NUMSAMPLES_4, bits 7:4 of this field
- * must be zero."
- */
- sample_mask &= valid_mask;
+ ILO_DEV_ASSERT(builder->dev, 6, 8);
ilo_builder_batch_pointer(builder, cmd_len, &dw);
dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_SAMPLE_MASK) | (cmd_len - 2);
- dw[1] = sample_mask;
+ /* see raster_set_gen6_3DSTATE_SAMPLE_MASK() */
+ dw[1] = rs->sample[1];
}
static inline void
@@ -1070,95 +576,75 @@ gen6_3DSTATE_DRAWING_RECTANGLE(struct ilo_builder *builder,
static inline void
gen6_3DSTATE_POLY_STIPPLE_OFFSET(struct ilo_builder *builder,
- int x_offset, int y_offset)
+ const struct ilo_state_poly_stipple *stipple)
{
const uint8_t cmd_len = 2;
uint32_t *dw;
ILO_DEV_ASSERT(builder->dev, 6, 8);
- assert(x_offset >= 0 && x_offset <= 31);
- assert(y_offset >= 0 && y_offset <= 31);
-
ilo_builder_batch_pointer(builder, cmd_len, &dw);
dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_POLY_STIPPLE_OFFSET) | (cmd_len - 2);
- dw[1] = x_offset << 8 | y_offset;
+ /* constant */
+ dw[1] = 0;
}
static inline void
gen6_3DSTATE_POLY_STIPPLE_PATTERN(struct ilo_builder *builder,
- const struct pipe_poly_stipple *pattern)
+ const struct ilo_state_poly_stipple *stipple)
{
const uint8_t cmd_len = 33;
uint32_t *dw;
- int i;
ILO_DEV_ASSERT(builder->dev, 6, 8);
ilo_builder_batch_pointer(builder, cmd_len, &dw);
dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_POLY_STIPPLE_PATTERN) | (cmd_len - 2);
- dw++;
-
- STATIC_ASSERT(Elements(pattern->stipple) == 32);
- for (i = 0; i < 32; i++)
- dw[i] = pattern->stipple[i];
+ /* see poly_stipple_set_gen6_3DSTATE_POLY_STIPPLE_PATTERN() */
+ memcpy(&dw[1], stipple->stipple, sizeof(stipple->stipple));
}
static inline void
gen6_3DSTATE_LINE_STIPPLE(struct ilo_builder *builder,
- unsigned pattern, unsigned factor)
+ const struct ilo_state_line_stipple *stipple)
{
const uint8_t cmd_len = 3;
- unsigned inverse;
uint32_t *dw;
ILO_DEV_ASSERT(builder->dev, 6, 8);
- assert((pattern & 0xffff) == pattern);
- assert(factor >= 1 && factor <= 256);
-
ilo_builder_batch_pointer(builder, cmd_len, &dw);
dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_LINE_STIPPLE) | (cmd_len - 2);
- dw[1] = pattern;
-
- if (ilo_dev_gen(builder->dev) >= ILO_GEN(7)) {
- /* in U1.16 */
- inverse = 65536 / factor;
-
- dw[2] = inverse << GEN7_LINE_STIPPLE_DW2_INVERSE_REPEAT_COUNT__SHIFT |
- factor;
- }
- else {
- /* in U1.13 */
- inverse = 8192 / factor;
-
- dw[2] = inverse << GEN6_LINE_STIPPLE_DW2_INVERSE_REPEAT_COUNT__SHIFT |
- factor;
- }
+ /* see line_stipple_set_gen6_3DSTATE_LINE_STIPPLE() */
+ dw[1] = stipple->stipple[0];
+ dw[2] = stipple->stipple[1];
}
static inline void
-gen6_3DSTATE_AA_LINE_PARAMETERS(struct ilo_builder *builder)
+gen6_3DSTATE_AA_LINE_PARAMETERS(struct ilo_builder *builder,
+ const struct ilo_state_raster *rs)
{
const uint8_t cmd_len = 3;
- const uint32_t dw[3] = {
- GEN6_RENDER_CMD(3D, 3DSTATE_AA_LINE_PARAMETERS) | (cmd_len - 2),
- 0 << GEN6_AA_LINE_DW1_BIAS__SHIFT | 0,
- 0 << GEN6_AA_LINE_DW2_CAP_BIAS__SHIFT | 0,
- };
+ uint32_t *dw;
ILO_DEV_ASSERT(builder->dev, 6, 8);
- ilo_builder_batch_write(builder, cmd_len, dw);
+ ilo_builder_batch_pointer(builder, cmd_len, &dw);
+
+ dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_AA_LINE_PARAMETERS) | (cmd_len - 2);
+ /* constant */
+ dw[1] = 0 << GEN6_AA_LINE_DW1_BIAS__SHIFT |
+ 0 << GEN6_AA_LINE_DW1_SLOPE__SHIFT;
+ dw[2] = 0 << GEN6_AA_LINE_DW2_CAP_BIAS__SHIFT |
+ 0 << GEN6_AA_LINE_DW2_CAP_SLOPE__SHIFT;
}
static inline void
gen6_3DSTATE_DEPTH_BUFFER(struct ilo_builder *builder,
- const struct ilo_zs_surface *zs,
- bool aligned_8x4)
+ const struct ilo_state_zs *zs)
{
const uint32_t cmd = (ilo_dev_gen(builder->dev) >= ILO_GEN(7)) ?
GEN7_RENDER_CMD(3D, 3DSTATE_DEPTH_BUFFER) :
@@ -1172,44 +658,49 @@ gen6_3DSTATE_DEPTH_BUFFER(struct ilo_builder *builder,
pos = ilo_builder_batch_pointer(builder, cmd_len, &dw);
dw[0] = cmd | (cmd_len - 2);
- dw[1] = zs->payload[0];
- dw[2] = 0;
- /* see ilo_gpe_init_zs_surface() */
+ /*
+ * see zs_set_gen6_3DSTATE_DEPTH_BUFFER() and
+ * zs_set_gen7_3DSTATE_DEPTH_BUFFER()
+ */
if (ilo_dev_gen(builder->dev) >= ILO_GEN(8)) {
+ dw[1] = zs->depth[0];
+ dw[2] = 0;
dw[3] = 0;
- dw[4] = (aligned_8x4) ? zs->dw_aligned_8x4 : zs->payload[2];
- dw[5] = zs->payload[3];
- dw[6] = zs->payload[4];
- dw[7] = zs->payload[5];
+ dw[4] = zs->depth[2];
+ dw[5] = zs->depth[3];
+ dw[6] = 0;
+ dw[7] = zs->depth[4];
dw[5] |= builder->mocs << GEN8_DEPTH_DW5_MOCS__SHIFT;
- if (zs->bo) {
- ilo_builder_batch_reloc64(builder, pos + 2, zs->bo,
- zs->payload[1], INTEL_RELOC_WRITE);
+ if (zs->depth_bo) {
+ ilo_builder_batch_reloc64(builder, pos + 2, zs->depth_bo,
+ zs->depth[1], (zs->z_readonly) ? 0 : INTEL_RELOC_WRITE);
}
} else {
- dw[3] = (aligned_8x4) ? zs->dw_aligned_8x4 : zs->payload[2];
- dw[4] = zs->payload[3];
- dw[5] = zs->payload[4];
- dw[6] = zs->payload[5];
+ dw[1] = zs->depth[0];
+ dw[2] = 0;
+ dw[3] = zs->depth[2];
+ dw[4] = zs->depth[3];
+ dw[5] = 0;
+ dw[6] = zs->depth[4];
if (ilo_dev_gen(builder->dev) >= ILO_GEN(7))
dw[4] |= builder->mocs << GEN7_DEPTH_DW4_MOCS__SHIFT;
else
dw[6] |= builder->mocs << GEN6_DEPTH_DW6_MOCS__SHIFT;
- if (zs->bo) {
- ilo_builder_batch_reloc(builder, pos + 2, zs->bo,
- zs->payload[1], INTEL_RELOC_WRITE);
+ if (zs->depth_bo) {
+ ilo_builder_batch_reloc(builder, pos + 2, zs->depth_bo,
+ zs->depth[1], (zs->z_readonly) ? 0 : INTEL_RELOC_WRITE);
}
}
}
static inline void
gen6_3DSTATE_STENCIL_BUFFER(struct ilo_builder *builder,
- const struct ilo_zs_surface *zs)
+ const struct ilo_state_zs *zs)
{
const uint32_t cmd = (ilo_dev_gen(builder->dev) >= ILO_GEN(7)) ?
GEN7_RENDER_CMD(3D, 3DSTATE_STENCIL_BUFFER) :
@@ -1223,33 +714,36 @@ gen6_3DSTATE_STENCIL_BUFFER(struct ilo_builder *builder,
pos = ilo_builder_batch_pointer(builder, cmd_len, &dw);
dw[0] = cmd | (cmd_len - 2);
- /* see ilo_gpe_init_zs_surface() */
- dw[1] = zs->payload[6];
- dw[2] = 0;
+ /* see zs_set_gen6_3DSTATE_STENCIL_BUFFER() */
if (ilo_dev_gen(builder->dev) >= ILO_GEN(8)) {
- dw[1] |= builder->mocs << GEN8_STENCIL_DW1_MOCS__SHIFT;
-
+ dw[1] = zs->stencil[0];
+ dw[2] = 0;
dw[3] = 0;
- dw[4] = zs->payload[8];
+ dw[4] = zs->stencil[2];
- if (zs->separate_s8_bo) {
- ilo_builder_batch_reloc64(builder, pos + 2,
- zs->separate_s8_bo, zs->payload[7], INTEL_RELOC_WRITE);
+ dw[1] |= builder->mocs << GEN8_STENCIL_DW1_MOCS__SHIFT;
+
+ if (zs->stencil_bo) {
+ ilo_builder_batch_reloc64(builder, pos + 2, zs->stencil_bo,
+ zs->stencil[1], (zs->s_readonly) ? 0 : INTEL_RELOC_WRITE);
}
} else {
+ dw[1] = zs->stencil[0];
+ dw[2] = 0;
+
dw[1] |= builder->mocs << GEN6_STENCIL_DW1_MOCS__SHIFT;
- if (zs->separate_s8_bo) {
- ilo_builder_batch_reloc(builder, pos + 2,
- zs->separate_s8_bo, zs->payload[7], INTEL_RELOC_WRITE);
+ if (zs->stencil_bo) {
+ ilo_builder_batch_reloc(builder, pos + 2, zs->stencil_bo,
+ zs->stencil[1], (zs->s_readonly) ? 0 : INTEL_RELOC_WRITE);
}
}
}
static inline void
gen6_3DSTATE_HIER_DEPTH_BUFFER(struct ilo_builder *builder,
- const struct ilo_zs_surface *zs)
+ const struct ilo_state_zs *zs)
{
const uint32_t cmd = (ilo_dev_gen(builder->dev) >= ILO_GEN(7)) ?
GEN7_RENDER_CMD(3D, 3DSTATE_HIER_DEPTH_BUFFER) :
@@ -1263,26 +757,29 @@ gen6_3DSTATE_HIER_DEPTH_BUFFER(struct ilo_builder *builder,
pos = ilo_builder_batch_pointer(builder, cmd_len, &dw);
dw[0] = cmd | (cmd_len - 2);
- /* see ilo_gpe_init_zs_surface() */
- dw[1] = zs->payload[9];
- dw[2] = 0;
+ /* see zs_set_gen6_3DSTATE_HIER_DEPTH_BUFFER() */
if (ilo_dev_gen(builder->dev) >= ILO_GEN(8)) {
- dw[1] |= builder->mocs << GEN8_HIZ_DW1_MOCS__SHIFT;
-
+ dw[1] = zs->hiz[0];
+ dw[2] = 0;
dw[3] = 0;
- dw[4] = zs->payload[11];
+ dw[4] = zs->hiz[2];
+
+ dw[1] |= builder->mocs << GEN8_HIZ_DW1_MOCS__SHIFT;
if (zs->hiz_bo) {
- ilo_builder_batch_reloc64(builder, pos + 2,
- zs->hiz_bo, zs->payload[10], INTEL_RELOC_WRITE);
+ ilo_builder_batch_reloc64(builder, pos + 2, zs->hiz_bo,
+ zs->hiz[1], (zs->z_readonly) ? 0 : INTEL_RELOC_WRITE);
}
} else {
+ dw[1] = zs->hiz[0];
+ dw[2] = 0;
+
dw[1] |= builder->mocs << GEN6_HIZ_DW1_MOCS__SHIFT;
if (zs->hiz_bo) {
- ilo_builder_batch_reloc(builder, pos + 2,
- zs->hiz_bo, zs->payload[10], INTEL_RELOC_WRITE);
+ ilo_builder_batch_reloc(builder, pos + 2, zs->hiz_bo,
+ zs->hiz[1], (zs->z_readonly) ? 0 : INTEL_RELOC_WRITE);
}
}
}
@@ -1440,34 +937,24 @@ gen7_3DSTATE_BLEND_STATE_POINTERS(struct ilo_builder *builder,
static inline uint32_t
gen6_CLIP_VIEWPORT(struct ilo_builder *builder,
- const struct ilo_viewport_cso *viewports,
- unsigned num_viewports)
+ const struct ilo_state_viewport *vp)
{
const int state_align = 32;
- const int state_len = 4 * num_viewports;
+ const int state_len = 4 * vp->count;
uint32_t state_offset, *dw;
- unsigned i;
+ int i;
ILO_DEV_ASSERT(builder->dev, 6, 6);
- /*
- * From the Sandy Bridge PRM, volume 2 part 1, page 193:
- *
- * "The viewport-related state is stored as an array of up to 16
- * elements..."
- */
- assert(num_viewports && num_viewports <= 16);
-
state_offset = ilo_builder_dynamic_pointer(builder,
ILO_BUILDER_ITEM_CLIP_VIEWPORT, state_align, state_len, &dw);
- for (i = 0; i < num_viewports; i++) {
- const struct ilo_viewport_cso *vp = &viewports[i];
-
- dw[0] = fui(vp->min_gbx);
- dw[1] = fui(vp->max_gbx);
- dw[2] = fui(vp->min_gby);
- dw[3] = fui(vp->max_gby);
+ for (i = 0; i < vp->count; i++) {
+ /* see viewport_matrix_set_gen7_SF_CLIP_VIEWPORT() */
+ dw[0] = vp->sf_clip[i][8];
+ dw[1] = vp->sf_clip[i][9];
+ dw[2] = vp->sf_clip[i][10];
+ dw[3] = vp->sf_clip[i][11];
dw += 4;
}
@@ -1477,38 +964,21 @@ gen6_CLIP_VIEWPORT(struct ilo_builder *builder,
static inline uint32_t
gen6_SF_VIEWPORT(struct ilo_builder *builder,
- const struct ilo_viewport_cso *viewports,
- unsigned num_viewports)
+ const struct ilo_state_viewport *vp)
{
const int state_align = 32;
- const int state_len = 8 * num_viewports;
+ const int state_len = 8 * vp->count;
uint32_t state_offset, *dw;
- unsigned i;
+ int i;
ILO_DEV_ASSERT(builder->dev, 6, 6);
- /*
- * From the Sandy Bridge PRM, volume 2 part 1, page 262:
- *
- * "The viewport-specific state used by the SF unit (SF_VIEWPORT) is
- * stored as an array of up to 16 elements..."
- */
- assert(num_viewports && num_viewports <= 16);
-
state_offset = ilo_builder_dynamic_pointer(builder,
ILO_BUILDER_ITEM_SF_VIEWPORT, state_align, state_len, &dw);
- for (i = 0; i < num_viewports; i++) {
- const struct ilo_viewport_cso *vp = &viewports[i];
-
- dw[0] = fui(vp->m00);
- dw[1] = fui(vp->m11);
- dw[2] = fui(vp->m22);
- dw[3] = fui(vp->m30);
- dw[4] = fui(vp->m31);
- dw[5] = fui(vp->m32);
- dw[6] = 0;
- dw[7] = 0;
+ for (i = 0; i < vp->count; i++) {
+ /* see viewport_matrix_set_gen7_SF_CLIP_VIEWPORT() */
+ memcpy(dw, vp->sf_clip[i], sizeof(*dw) * 8);
dw += 8;
}
@@ -1518,298 +988,103 @@ gen6_SF_VIEWPORT(struct ilo_builder *builder,
static inline uint32_t
gen7_SF_CLIP_VIEWPORT(struct ilo_builder *builder,
- const struct ilo_viewport_cso *viewports,
- unsigned num_viewports)
+ const struct ilo_state_viewport *vp)
{
const int state_align = 64;
- const int state_len = 16 * num_viewports;
- uint32_t state_offset, *dw;
- unsigned i;
+ const int state_len = 16 * vp->count;
ILO_DEV_ASSERT(builder->dev, 7, 8);
- /*
- * From the Ivy Bridge PRM, volume 2 part 1, page 270:
- *
- * "The viewport-specific state used by both the SF and CL units
- * (SF_CLIP_VIEWPORT) is stored as an array of up to 16 elements, each
- * of which contains the DWords described below. The start of each
- * element is spaced 16 DWords apart. The location of first element of
- * the array, as specified by both Pointer to SF_VIEWPORT and Pointer
- * to CLIP_VIEWPORT, is aligned to a 64-byte boundary."
- */
- assert(num_viewports && num_viewports <= 16);
-
- state_offset = ilo_builder_dynamic_pointer(builder,
- ILO_BUILDER_ITEM_SF_VIEWPORT, state_align, state_len, &dw);
-
- for (i = 0; i < num_viewports; i++) {
- const struct ilo_viewport_cso *vp = &viewports[i];
-
- dw[0] = fui(vp->m00);
- dw[1] = fui(vp->m11);
- dw[2] = fui(vp->m22);
- dw[3] = fui(vp->m30);
- dw[4] = fui(vp->m31);
- dw[5] = fui(vp->m32);
- dw[6] = 0;
- dw[7] = 0;
-
- dw[8] = fui(vp->min_gbx);
- dw[9] = fui(vp->max_gbx);
- dw[10] = fui(vp->min_gby);
- dw[11] = fui(vp->max_gby);
-
- if (ilo_dev_gen(builder->dev) >= ILO_GEN(8)) {
- dw[12] = fui(vp->min_x);
- dw[13] = fui(vp->max_x - 1.0f);
- dw[14] = fui(vp->min_y);
- dw[15] = fui(vp->max_y - 1.0f);
- } else {
- dw[12] = 0;
- dw[13] = 0;
- dw[14] = 0;
- dw[15] = 0;
- }
-
- dw += 16;
- }
-
- return state_offset;
+ /* see viewport_matrix_set_gen7_SF_CLIP_VIEWPORT() */
+ return ilo_builder_dynamic_write(builder, ILO_BUILDER_ITEM_SF_VIEWPORT,
+ state_align, state_len, (const uint32_t *) vp->sf_clip);
}
static inline uint32_t
gen6_CC_VIEWPORT(struct ilo_builder *builder,
- const struct ilo_viewport_cso *viewports,
- unsigned num_viewports)
+ const struct ilo_state_viewport *vp)
{
const int state_align = 32;
- const int state_len = 2 * num_viewports;
- uint32_t state_offset, *dw;
- unsigned i;
+ const int state_len = 2 * vp->count;
ILO_DEV_ASSERT(builder->dev, 6, 8);
- /*
- * From the Sandy Bridge PRM, volume 2 part 1, page 385:
- *
- * "The viewport state is stored as an array of up to 16 elements..."
- */
- assert(num_viewports && num_viewports <= 16);
-
- state_offset = ilo_builder_dynamic_pointer(builder,
- ILO_BUILDER_ITEM_CC_VIEWPORT, state_align, state_len, &dw);
-
- for (i = 0; i < num_viewports; i++) {
- const struct ilo_viewport_cso *vp = &viewports[i];
-
- dw[0] = fui(vp->min_z);
- dw[1] = fui(vp->max_z);
-
- dw += 2;
- }
-
- return state_offset;
+ /* see viewport_matrix_set_gen6_CC_VIEWPORT() */
+ return ilo_builder_dynamic_write(builder, ILO_BUILDER_ITEM_CC_VIEWPORT,
+ state_align, state_len, (const uint32_t *) vp->cc);
}
static inline uint32_t
gen6_SCISSOR_RECT(struct ilo_builder *builder,
- const struct ilo_scissor_state *scissor,
- unsigned num_viewports)
+ const struct ilo_state_viewport *vp)
{
const int state_align = 32;
- const int state_len = 2 * num_viewports;
+ const int state_len = 2 * vp->count;
ILO_DEV_ASSERT(builder->dev, 6, 8);
- /*
- * From the Sandy Bridge PRM, volume 2 part 1, page 263:
- *
- * "The viewport-specific state used by the SF unit (SCISSOR_RECT) is
- * stored as an array of up to 16 elements..."
- */
- assert(num_viewports && num_viewports <= 16);
- assert(Elements(scissor->payload) >= state_len);
-
+ /* see viewport_scissor_set_gen6_SCISSOR_RECT() */
return ilo_builder_dynamic_write(builder, ILO_BUILDER_ITEM_SCISSOR_RECT,
- state_align, state_len, scissor->payload);
+ state_align, state_len, (const uint32_t *) vp->scissor);
}
static inline uint32_t
gen6_COLOR_CALC_STATE(struct ilo_builder *builder,
- const struct pipe_stencil_ref *stencil_ref,
- ubyte alpha_ref,
- const struct pipe_blend_color *blend_color)
+ const struct ilo_state_cc *cc)
{
const int state_align = 64;
const int state_len = 6;
- uint32_t state_offset, *dw;
ILO_DEV_ASSERT(builder->dev, 6, 8);
- state_offset = ilo_builder_dynamic_pointer(builder,
- ILO_BUILDER_ITEM_COLOR_CALC, state_align, state_len, &dw);
-
- dw[0] = stencil_ref->ref_value[0] << 24 |
- stencil_ref->ref_value[1] << 16 |
- GEN6_CC_DW0_ALPHATEST_UNORM8;
- dw[1] = alpha_ref;
- dw[2] = fui(blend_color->color[0]);
- dw[3] = fui(blend_color->color[1]);
- dw[4] = fui(blend_color->color[2]);
- dw[5] = fui(blend_color->color[3]);
-
- return state_offset;
+ /* see cc_params_set_gen6_COLOR_CALC_STATE() */
+ return ilo_builder_dynamic_write(builder, ILO_BUILDER_ITEM_COLOR_CALC,
+ state_align, state_len, cc->cc);
}
static inline uint32_t
gen6_DEPTH_STENCIL_STATE(struct ilo_builder *builder,
- const struct ilo_dsa_state *dsa)
+ const struct ilo_state_cc *cc)
{
const int state_align = 64;
const int state_len = 3;
ILO_DEV_ASSERT(builder->dev, 6, 7.5);
- STATIC_ASSERT(Elements(dsa->payload) >= state_len);
-
+ /* see cc_set_gen6_DEPTH_STENCIL_STATE() */
return ilo_builder_dynamic_write(builder, ILO_BUILDER_ITEM_DEPTH_STENCIL,
- state_align, state_len, dsa->payload);
+ state_align, state_len, cc->ds);
}
static inline uint32_t
gen6_BLEND_STATE(struct ilo_builder *builder,
- const struct ilo_blend_state *blend,
- const struct ilo_fb_state *fb,
- const struct ilo_dsa_state *dsa)
+ const struct ilo_state_cc *cc)
{
const int state_align = 64;
- int state_len;
- uint32_t state_offset, *dw;
- unsigned num_targets, i;
+ const int state_len = 2 * cc->blend_state_count;
ILO_DEV_ASSERT(builder->dev, 6, 7.5);
- /*
- * From the Sandy Bridge PRM, volume 2 part 1, page 376:
- *
- * "The blend state is stored as an array of up to 8 elements..."
- */
- num_targets = fb->state.nr_cbufs;
- assert(num_targets <= 8);
-
- if (!num_targets) {
- if (!dsa->dw_blend_alpha)
- return 0;
- /* to be able to reference alpha func */
- num_targets = 1;
- }
-
- state_len = 2 * num_targets;
-
- state_offset = ilo_builder_dynamic_pointer(builder,
- ILO_BUILDER_ITEM_BLEND, state_align, state_len, &dw);
-
- for (i = 0; i < num_targets; i++) {
- const struct ilo_blend_cso *cso = &blend->cso[i];
-
- dw[0] = cso->payload[0];
- dw[1] = cso->payload[1] | blend->dw_shared;
-
- if (i < fb->state.nr_cbufs && fb->state.cbufs[i]) {
- const struct ilo_fb_blend_caps *caps = &fb->blend_caps[i];
-
- if (caps->can_blend) {
- if (caps->dst_alpha_forced_one)
- dw[0] |= cso->dw_blend_dst_alpha_forced_one;
- else
- dw[0] |= cso->dw_blend;
- }
-
- if (caps->can_logicop)
- dw[1] |= blend->dw_logicop;
-
- if (caps->can_alpha_test)
- dw[1] |= dsa->dw_blend_alpha;
- } else {
- dw[1] |= GEN6_RT_DW1_WRITE_DISABLE_A |
- GEN6_RT_DW1_WRITE_DISABLE_R |
- GEN6_RT_DW1_WRITE_DISABLE_G |
- GEN6_RT_DW1_WRITE_DISABLE_B |
- dsa->dw_blend_alpha;
- }
+ if (!state_len)
+ return 0;
- /*
- * From the Sandy Bridge PRM, volume 2 part 1, page 356:
- *
- * "When NumSamples = 1, AlphaToCoverage and AlphaToCoverage
- * Dither both must be disabled."
- *
- * There is no such limitation on GEN7, or for AlphaToOne. But GL
- * requires that anyway.
- */
- if (fb->num_samples > 1)
- dw[1] |= blend->dw_alpha_mod;
-
- dw += 2;
- }
-
- return state_offset;
+ /* see cc_set_gen6_BLEND_STATE() */
+ return ilo_builder_dynamic_write(builder, ILO_BUILDER_ITEM_BLEND,
+ state_align, state_len, cc->blend);
}
static inline uint32_t
gen8_BLEND_STATE(struct ilo_builder *builder,
- const struct ilo_blend_state *blend,
- const struct ilo_fb_state *fb,
- const struct ilo_dsa_state *dsa)
+ const struct ilo_state_cc *cc)
{
const int state_align = 64;
- const int state_len = 1 + 2 * fb->state.nr_cbufs;
- uint32_t state_offset, *dw;
- unsigned i;
+ const int state_len = 1 + 2 * cc->blend_state_count;
ILO_DEV_ASSERT(builder->dev, 8, 8);
- assert(fb->state.nr_cbufs <= 8);
-
- state_offset = ilo_builder_dynamic_pointer(builder,
- ILO_BUILDER_ITEM_BLEND, state_align, state_len, &dw);
-
- dw[0] = blend->dw_shared;
- if (fb->num_samples > 1)
- dw[0] |= blend->dw_alpha_mod;
- if (!fb->state.nr_cbufs || fb->blend_caps[0].can_alpha_test)
- dw[0] |= dsa->dw_blend_alpha;
- dw++;
-
- for (i = 0; i < fb->state.nr_cbufs; i++) {
- const struct ilo_fb_blend_caps *caps = &fb->blend_caps[i];
- const struct ilo_blend_cso *cso = &blend->cso[i];
-
- dw[0] = cso->payload[0];
- dw[1] = cso->payload[1];
-
- if (fb->state.cbufs[i]) {
- if (caps->can_blend) {
- if (caps->dst_alpha_forced_one)
- dw[0] |= cso->dw_blend_dst_alpha_forced_one;
- else
- dw[0] |= cso->dw_blend;
- }
-
- if (caps->can_logicop)
- dw[1] |= blend->dw_logicop;
- } else {
- dw[0] |= GEN8_RT_DW0_WRITE_DISABLE_A |
- GEN8_RT_DW0_WRITE_DISABLE_R |
- GEN8_RT_DW0_WRITE_DISABLE_G |
- GEN8_RT_DW0_WRITE_DISABLE_B;
- }
-
- dw += 2;
- }
-
- return state_offset;
+ /* see cc_set_gen8_BLEND_STATE() */
+ return ilo_builder_dynamic_write(builder, ILO_BUILDER_ITEM_BLEND,
+ state_align, state_len, &cc->blend[1]);
}
#endif /* ILO_BUILDER_3D_BOTTOM_H */
diff --git a/src/gallium/drivers/ilo/core/ilo_builder_3d_top.h b/src/gallium/drivers/ilo/core/ilo_builder_3d_top.h
index 05dbce7c905..8d30095e6f6 100644
--- a/src/gallium/drivers/ilo/core/ilo_builder_3d_top.h
+++ b/src/gallium/drivers/ilo/core/ilo_builder_3d_top.h
@@ -29,303 +29,167 @@
#define ILO_BUILDER_3D_TOP_H
#include "genhw/genhw.h"
-#include "../ilo_resource.h"
-#include "../ilo_shader.h"
#include "intel_winsys.h"
#include "ilo_core.h"
#include "ilo_dev.h"
-#include "ilo_state_3d.h"
+#include "ilo_state_sampler.h"
+#include "ilo_state_shader.h"
+#include "ilo_state_sol.h"
+#include "ilo_state_surface.h"
+#include "ilo_state_urb.h"
+#include "ilo_state_vf.h"
#include "ilo_builder.h"
static inline void
gen6_3DSTATE_URB(struct ilo_builder *builder,
- int vs_total_size, int gs_total_size,
- int vs_entry_size, int gs_entry_size)
+ const struct ilo_state_urb *urb)
{
const uint8_t cmd_len = 3;
- const int row_size = 128; /* 1024 bits */
- int vs_alloc_size, gs_alloc_size;
- int vs_num_entries, gs_num_entries;
uint32_t *dw;
- ILO_DEV_ASSERT(builder->dev, 6, 6);
-
- /* in 1024-bit URB rows */
- vs_alloc_size = (vs_entry_size + row_size - 1) / row_size;
- gs_alloc_size = (gs_entry_size + row_size - 1) / row_size;
-
- /* the valid range is [1, 5] */
- if (!vs_alloc_size)
- vs_alloc_size = 1;
- if (!gs_alloc_size)
- gs_alloc_size = 1;
- assert(vs_alloc_size <= 5 && gs_alloc_size <= 5);
-
- /* the valid range is [24, 256] in multiples of 4 */
- vs_num_entries = (vs_total_size / row_size / vs_alloc_size) & ~3;
- if (vs_num_entries > 256)
- vs_num_entries = 256;
- assert(vs_num_entries >= 24);
-
- /* the valid range is [0, 256] in multiples of 4 */
- gs_num_entries = (gs_total_size / row_size / gs_alloc_size) & ~3;
- if (gs_num_entries > 256)
- gs_num_entries = 256;
-
ilo_builder_batch_pointer(builder, cmd_len, &dw);
dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_URB) | (cmd_len - 2);
- dw[1] = (vs_alloc_size - 1) << GEN6_URB_DW1_VS_ENTRY_SIZE__SHIFT |
- vs_num_entries << GEN6_URB_DW1_VS_ENTRY_COUNT__SHIFT;
- dw[2] = gs_num_entries << GEN6_URB_DW2_GS_ENTRY_COUNT__SHIFT |
- (gs_alloc_size - 1) << GEN6_URB_DW2_GS_ENTRY_SIZE__SHIFT;
+ /* see urb_set_gen6_3DSTATE_URB() */
+ dw[1] = urb->urb[0];
+ dw[2] = urb->urb[1];
}
static inline void
-gen7_3dstate_push_constant_alloc(struct ilo_builder *builder,
- int subop, int offset, int size)
+gen7_3DSTATE_PUSH_CONSTANT_ALLOC_VS(struct ilo_builder *builder,
+ const struct ilo_state_urb *urb)
{
- const uint32_t cmd = GEN6_RENDER_TYPE_RENDER |
- GEN6_RENDER_SUBTYPE_3D |
- subop;
const uint8_t cmd_len = 2;
- const int slice_count = ((ilo_dev_gen(builder->dev) == ILO_GEN(7.5) &&
- builder->dev->gt == 3) ||
- ilo_dev_gen(builder->dev) >= ILO_GEN(8)) ? 2 : 1;
uint32_t *dw;
- int end;
-
- ILO_DEV_ASSERT(builder->dev, 7, 8);
-
- /* VS, HS, DS, GS, and PS variants */
- assert(subop >= GEN7_RENDER_OPCODE_3DSTATE_PUSH_CONSTANT_ALLOC_VS &&
- subop <= GEN7_RENDER_OPCODE_3DSTATE_PUSH_CONSTANT_ALLOC_PS);
-
- /*
- * From the Ivy Bridge PRM, volume 2 part 1, page 68:
- *
- * "(A table that says the maximum size of each constant buffer is
- * 16KB")
- *
- * From the Ivy Bridge PRM, volume 2 part 1, page 115:
- *
- * "The sum of the Constant Buffer Offset and the Constant Buffer Size
- * may not exceed the maximum value of the Constant Buffer Size."
- *
- * Thus, the valid range of buffer end is [0KB, 16KB].
- */
- end = (offset + size) / 1024;
- if (end > 16 * slice_count) {
- assert(!"invalid constant buffer end");
- end = 16 * slice_count;
- }
-
- /* the valid range of buffer offset is [0KB, 15KB] */
- offset = (offset + 1023) / 1024;
- if (offset > 15 * slice_count) {
- assert(!"invalid constant buffer offset");
- offset = 15 * slice_count;
- }
-
- if (offset > end) {
- assert(!size);
- offset = end;
- }
-
- /* the valid range of buffer size is [0KB, 15KB] */
- size = end - offset;
- if (size > 15 * slice_count) {
- assert(!"invalid constant buffer size");
- size = 15 * slice_count;
- }
-
- assert(offset % slice_count == 0 && size % slice_count == 0);
ilo_builder_batch_pointer(builder, cmd_len, &dw);
- dw[0] = cmd | (cmd_len - 2);
- dw[1] = offset << GEN7_PCB_ALLOC_DW1_OFFSET__SHIFT |
- size;
-}
-
-static inline void
-gen7_3DSTATE_PUSH_CONSTANT_ALLOC_VS(struct ilo_builder *builder,
- int offset, int size)
-{
- gen7_3dstate_push_constant_alloc(builder,
- GEN7_RENDER_OPCODE_3DSTATE_PUSH_CONSTANT_ALLOC_VS, offset, size);
+ dw[0] = GEN7_RENDER_CMD(3D, 3DSTATE_PUSH_CONSTANT_ALLOC_VS) |
+ (cmd_len - 2);
+ /* see urb_set_gen7_3dstate_push_constant_alloc() */
+ dw[1] = urb->pcb[0];
}
static inline void
gen7_3DSTATE_PUSH_CONSTANT_ALLOC_HS(struct ilo_builder *builder,
- int offset, int size)
+ const struct ilo_state_urb *urb)
{
- gen7_3dstate_push_constant_alloc(builder,
- GEN7_RENDER_OPCODE_3DSTATE_PUSH_CONSTANT_ALLOC_HS, offset, size);
+ const uint8_t cmd_len = 2;
+ uint32_t *dw;
+
+ ilo_builder_batch_pointer(builder, cmd_len, &dw);
+
+ dw[0] = GEN7_RENDER_CMD(3D, 3DSTATE_PUSH_CONSTANT_ALLOC_HS) |
+ (cmd_len - 2);
+ /* see urb_set_gen7_3dstate_push_constant_alloc() */
+ dw[1] = urb->pcb[1];
}
static inline void
gen7_3DSTATE_PUSH_CONSTANT_ALLOC_DS(struct ilo_builder *builder,
- int offset, int size)
+ const struct ilo_state_urb *urb)
{
- gen7_3dstate_push_constant_alloc(builder,
- GEN7_RENDER_OPCODE_3DSTATE_PUSH_CONSTANT_ALLOC_DS, offset, size);
+ const uint8_t cmd_len = 2;
+ uint32_t *dw;
+
+ ilo_builder_batch_pointer(builder, cmd_len, &dw);
+
+ dw[0] = GEN7_RENDER_CMD(3D, 3DSTATE_PUSH_CONSTANT_ALLOC_DS) |
+ (cmd_len - 2);
+ /* see urb_set_gen7_3dstate_push_constant_alloc() */
+ dw[1] = urb->pcb[2];
}
static inline void
gen7_3DSTATE_PUSH_CONSTANT_ALLOC_GS(struct ilo_builder *builder,
- int offset, int size)
+ const struct ilo_state_urb *urb)
{
- gen7_3dstate_push_constant_alloc(builder,
- GEN7_RENDER_OPCODE_3DSTATE_PUSH_CONSTANT_ALLOC_GS, offset, size);
-}
+ const uint8_t cmd_len = 2;
+ uint32_t *dw;
-static inline void
-gen7_3DSTATE_PUSH_CONSTANT_ALLOC_PS(struct ilo_builder *builder,
- int offset, int size)
-{
- gen7_3dstate_push_constant_alloc(builder,
- GEN7_RENDER_OPCODE_3DSTATE_PUSH_CONSTANT_ALLOC_PS, offset, size);
+ ilo_builder_batch_pointer(builder, cmd_len, &dw);
+
+ dw[0] = GEN7_RENDER_CMD(3D, 3DSTATE_PUSH_CONSTANT_ALLOC_GS) |
+ (cmd_len - 2);
+ /* see urb_set_gen7_3dstate_push_constant_alloc() */
+ dw[1] = urb->pcb[3];
}
static inline void
-gen7_3dstate_urb(struct ilo_builder *builder,
- int subop, int offset, int size,
- int entry_size)
+gen7_3DSTATE_PUSH_CONSTANT_ALLOC_PS(struct ilo_builder *builder,
+ const struct ilo_state_urb *urb)
{
- const uint32_t cmd = GEN6_RENDER_TYPE_RENDER |
- GEN6_RENDER_SUBTYPE_3D |
- subop;
const uint8_t cmd_len = 2;
- const int row_size = 64; /* 512 bits */
- int alloc_size, num_entries, min_entries, max_entries;
uint32_t *dw;
- ILO_DEV_ASSERT(builder->dev, 7, 8);
-
- /* VS, HS, DS, and GS variants */
- assert(subop >= GEN7_RENDER_OPCODE_3DSTATE_URB_VS &&
- subop <= GEN7_RENDER_OPCODE_3DSTATE_URB_GS);
-
- /* in multiples of 8KB */
- assert(offset % 8192 == 0);
- offset /= 8192;
-
- /* in multiple of 512-bit rows */
- alloc_size = (entry_size + row_size - 1) / row_size;
- if (!alloc_size)
- alloc_size = 1;
-
- /*
- * From the Ivy Bridge PRM, volume 2 part 1, page 34:
- *
- * "VS URB Entry Allocation Size equal to 4(5 512-bit URB rows) may
- * cause performance to decrease due to banking in the URB. Element
- * sizes of 16 to 20 should be programmed with six 512-bit URB rows."
- */
- if (subop == GEN7_RENDER_OPCODE_3DSTATE_URB_VS && alloc_size == 5)
- alloc_size = 6;
-
- /* in multiples of 8 */
- num_entries = (size / row_size / alloc_size) & ~7;
-
- switch (subop) {
- case GEN7_RENDER_OPCODE_3DSTATE_URB_VS:
- switch (ilo_dev_gen(builder->dev)) {
- case ILO_GEN(8):
- max_entries = 2560;
- min_entries = 64;
- break;
- case ILO_GEN(7.5):
- max_entries = (builder->dev->gt >= 2) ? 1664 : 640;
- min_entries = (builder->dev->gt >= 2) ? 64 : 32;
- break;
- case ILO_GEN(7):
- default:
- max_entries = (builder->dev->gt == 2) ? 704 : 512;
- min_entries = 32;
- break;
- }
-
- assert(num_entries >= min_entries);
- if (num_entries > max_entries)
- num_entries = max_entries;
- break;
- case GEN7_RENDER_OPCODE_3DSTATE_URB_HS:
- max_entries = (builder->dev->gt == 2) ? 64 : 32;
- if (num_entries > max_entries)
- num_entries = max_entries;
- break;
- case GEN7_RENDER_OPCODE_3DSTATE_URB_DS:
- if (num_entries)
- assert(num_entries >= 138);
- break;
- case GEN7_RENDER_OPCODE_3DSTATE_URB_GS:
- switch (ilo_dev_gen(builder->dev)) {
- case ILO_GEN(8):
- max_entries = 960;
- break;
- case ILO_GEN(7.5):
- max_entries = (builder->dev->gt >= 2) ? 640 : 256;
- break;
- case ILO_GEN(7):
- default:
- max_entries = (builder->dev->gt == 2) ? 320 : 192;
- break;
- }
-
- if (num_entries > max_entries)
- num_entries = max_entries;
- break;
- default:
- break;
- }
-
ilo_builder_batch_pointer(builder, cmd_len, &dw);
- dw[0] = cmd | (cmd_len - 2);
- dw[1] = offset << GEN7_URB_DW1_OFFSET__SHIFT |
- (alloc_size - 1) << GEN7_URB_DW1_ENTRY_SIZE__SHIFT |
- num_entries;
+ dw[0] = GEN7_RENDER_CMD(3D, 3DSTATE_PUSH_CONSTANT_ALLOC_PS) |
+ (cmd_len - 2);
+ /* see urb_set_gen7_3dstate_push_constant_alloc() */
+ dw[1] = urb->pcb[4];
}
static inline void
gen7_3DSTATE_URB_VS(struct ilo_builder *builder,
- int offset, int size, int entry_size)
+ const struct ilo_state_urb *urb)
{
- gen7_3dstate_urb(builder, GEN7_RENDER_OPCODE_3DSTATE_URB_VS,
- offset, size, entry_size);
+ const uint8_t cmd_len = 2;
+ uint32_t *dw;
+
+ ilo_builder_batch_pointer(builder, cmd_len, &dw);
+
+ dw[0] = GEN7_RENDER_CMD(3D, 3DSTATE_URB_VS) | (cmd_len - 2);
+ /* see urb_set_gen7_3dstate_push_constant_alloc() */
+ dw[1] = urb->urb[0];
}
static inline void
gen7_3DSTATE_URB_HS(struct ilo_builder *builder,
- int offset, int size, int entry_size)
+ const struct ilo_state_urb *urb)
{
- gen7_3dstate_urb(builder, GEN7_RENDER_OPCODE_3DSTATE_URB_HS,
- offset, size, entry_size);
+ const uint8_t cmd_len = 2;
+ uint32_t *dw;
+
+ ilo_builder_batch_pointer(builder, cmd_len, &dw);
+
+ dw[0] = GEN7_RENDER_CMD(3D, 3DSTATE_URB_HS) | (cmd_len - 2);
+ /* see urb_set_gen7_3dstate_push_constant_alloc() */
+ dw[1] = urb->urb[1];
}
static inline void
gen7_3DSTATE_URB_DS(struct ilo_builder *builder,
- int offset, int size, int entry_size)
+ const struct ilo_state_urb *urb)
{
- gen7_3dstate_urb(builder, GEN7_RENDER_OPCODE_3DSTATE_URB_DS,
- offset, size, entry_size);
+ const uint8_t cmd_len = 2;
+ uint32_t *dw;
+
+ ilo_builder_batch_pointer(builder, cmd_len, &dw);
+
+ dw[0] = GEN7_RENDER_CMD(3D, 3DSTATE_URB_DS) | (cmd_len - 2);
+ /* see urb_set_gen7_3dstate_push_constant_alloc() */
+ dw[1] = urb->urb[2];
}
static inline void
gen7_3DSTATE_URB_GS(struct ilo_builder *builder,
- int offset, int size, int entry_size)
+ const struct ilo_state_urb *urb)
{
- gen7_3dstate_urb(builder, GEN7_RENDER_OPCODE_3DSTATE_URB_GS,
- offset, size, entry_size);
+ const uint8_t cmd_len = 2;
+ uint32_t *dw;
+
+ ilo_builder_batch_pointer(builder, cmd_len, &dw);
+
+ dw[0] = GEN7_RENDER_CMD(3D, 3DSTATE_URB_GS) | (cmd_len - 2);
+ /* see urb_set_gen7_3dstate_push_constant_alloc() */
+ dw[1] = urb->urb[3];
}
static inline void
gen75_3DSTATE_VF(struct ilo_builder *builder,
- bool enable_cut_index,
- uint32_t cut_index)
+ const struct ilo_state_vf *vf)
{
const uint8_t cmd_len = 2;
uint32_t *dw;
@@ -334,11 +198,10 @@ gen75_3DSTATE_VF(struct ilo_builder *builder,
ilo_builder_batch_pointer(builder, cmd_len, &dw);
- dw[0] = GEN75_RENDER_CMD(3D, 3DSTATE_VF) | (cmd_len - 2);
- if (enable_cut_index)
- dw[0] |= GEN75_VF_DW0_CUT_INDEX_ENABLE;
-
- dw[1] = cut_index;
+ /* see vf_params_set_gen75_3DSTATE_VF() */
+ dw[0] = GEN75_RENDER_CMD(3D, 3DSTATE_VF) | (cmd_len - 2) |
+ vf->cut[0];
+ dw[1] = vf->cut[1];
}
static inline void
@@ -354,40 +217,11 @@ gen6_3DSTATE_VF_STATISTICS(struct ilo_builder *builder,
ilo_builder_batch_write(builder, cmd_len, &dw0);
}
-/**
- * Translate a pipe primitive type to the matching hardware primitive type.
- */
-static inline int
-gen6_3d_translate_pipe_prim(unsigned prim)
-{
- static const int prim_mapping[ILO_PRIM_MAX] = {
- [PIPE_PRIM_POINTS] = GEN6_3DPRIM_POINTLIST,
- [PIPE_PRIM_LINES] = GEN6_3DPRIM_LINELIST,
- [PIPE_PRIM_LINE_LOOP] = GEN6_3DPRIM_LINELOOP,
- [PIPE_PRIM_LINE_STRIP] = GEN6_3DPRIM_LINESTRIP,
- [PIPE_PRIM_TRIANGLES] = GEN6_3DPRIM_TRILIST,
- [PIPE_PRIM_TRIANGLE_STRIP] = GEN6_3DPRIM_TRISTRIP,
- [PIPE_PRIM_TRIANGLE_FAN] = GEN6_3DPRIM_TRIFAN,
- [PIPE_PRIM_QUADS] = GEN6_3DPRIM_QUADLIST,
- [PIPE_PRIM_QUAD_STRIP] = GEN6_3DPRIM_QUADSTRIP,
- [PIPE_PRIM_POLYGON] = GEN6_3DPRIM_POLYGON,
- [PIPE_PRIM_LINES_ADJACENCY] = GEN6_3DPRIM_LINELIST_ADJ,
- [PIPE_PRIM_LINE_STRIP_ADJACENCY] = GEN6_3DPRIM_LINESTRIP_ADJ,
- [PIPE_PRIM_TRIANGLES_ADJACENCY] = GEN6_3DPRIM_TRILIST_ADJ,
- [PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY] = GEN6_3DPRIM_TRISTRIP_ADJ,
- [ILO_PRIM_RECTANGLES] = GEN6_3DPRIM_RECTLIST,
- };
-
- assert(prim_mapping[prim]);
-
- return prim_mapping[prim];
-}
-
static inline void
-gen8_3DSTATE_VF_TOPOLOGY(struct ilo_builder *builder, unsigned pipe_prim)
+gen8_3DSTATE_VF_TOPOLOGY(struct ilo_builder *builder,
+ enum gen_3dprim_type topology)
{
const uint8_t cmd_len = 2;
- const int prim = gen6_3d_translate_pipe_prim(pipe_prim);
uint32_t *dw;
ILO_DEV_ASSERT(builder->dev, 8, 8);
@@ -395,12 +229,13 @@ gen8_3DSTATE_VF_TOPOLOGY(struct ilo_builder *builder, unsigned pipe_prim)
ilo_builder_batch_pointer(builder, cmd_len, &dw);
dw[0] = GEN8_RENDER_CMD(3D, 3DSTATE_VF_TOPOLOGY) | (cmd_len - 2);
- dw[1] = prim;
+ dw[1] = topology << GEN8_TOPOLOGY_DW1_TYPE__SHIFT;
}
static inline void
gen8_3DSTATE_VF_INSTANCING(struct ilo_builder *builder,
- int vb_index, uint32_t step_rate)
+ const struct ilo_state_vf *vf,
+ uint32_t attr)
{
const uint8_t cmd_len = 3;
uint32_t *dw;
@@ -410,16 +245,20 @@ gen8_3DSTATE_VF_INSTANCING(struct ilo_builder *builder,
ilo_builder_batch_pointer(builder, cmd_len, &dw);
dw[0] = GEN8_RENDER_CMD(3D, 3DSTATE_VF_INSTANCING) | (cmd_len - 2);
- dw[1] = vb_index;
- if (step_rate)
- dw[1] |= GEN8_INSTANCING_DW1_ENABLE;
- dw[2] = step_rate;
+ dw[1] = attr << GEN8_INSTANCING_DW1_VE_INDEX__SHIFT;
+ dw[2] = 0;
+ /* see vf_set_gen8_3DSTATE_VF_INSTANCING() */
+ if (attr >= vf->internal_ve_count) {
+ attr -= vf->internal_ve_count;
+
+ dw[1] |= vf->user_instancing[attr][0];
+ dw[2] |= vf->user_instancing[attr][1];
+ }
}
static inline void
gen8_3DSTATE_VF_SGVS(struct ilo_builder *builder,
- bool vid_enable, int vid_ve, int vid_comp,
- bool iid_enable, int iid_ve, int iid_comp)
+ const struct ilo_state_vf *vf)
{
const uint8_t cmd_len = 2;
uint32_t *dw;
@@ -429,29 +268,19 @@ gen8_3DSTATE_VF_SGVS(struct ilo_builder *builder,
ilo_builder_batch_pointer(builder, cmd_len, &dw);
dw[0] = GEN8_RENDER_CMD(3D, 3DSTATE_VF_SGVS) | (cmd_len - 2);
- dw[1] = 0;
-
- if (iid_enable) {
- dw[1] |= GEN8_SGVS_DW1_IID_ENABLE |
- vid_comp << GEN8_SGVS_DW1_IID_VE_COMP__SHIFT |
- vid_ve << GEN8_SGVS_DW1_IID_VE_INDEX__SHIFT;
- }
-
- if (vid_enable) {
- dw[1] |= GEN8_SGVS_DW1_VID_ENABLE |
- vid_comp << GEN8_SGVS_DW1_VID_VE_COMP__SHIFT |
- vid_ve << GEN8_SGVS_DW1_VID_VE_INDEX__SHIFT;
- }
+ /* see vf_params_set_gen8_3DSTATE_VF_SGVS() */
+ dw[1] = vf->sgvs[0];
}
static inline void
gen6_3DSTATE_VERTEX_BUFFERS(struct ilo_builder *builder,
- const struct ilo_ve_state *ve,
- const struct ilo_vb_state *vb)
+ const struct ilo_state_vf *vf,
+ const struct ilo_state_vertex_buffer *vb,
+ unsigned vb_count)
{
uint8_t cmd_len;
uint32_t *dw;
- unsigned pos, hw_idx;
+ unsigned pos, i;
ILO_DEV_ASSERT(builder->dev, 6, 8);
@@ -460,67 +289,52 @@ gen6_3DSTATE_VERTEX_BUFFERS(struct ilo_builder *builder,
*
* "From 1 to 33 VBs can be specified..."
*/
- assert(ve->vb_count <= 33);
+ assert(vb_count <= 33);
- if (!ve->vb_count)
+ if (!vb_count)
return;
- cmd_len = 1 + 4 * ve->vb_count;
+ cmd_len = 1 + 4 * vb_count;
pos = ilo_builder_batch_pointer(builder, cmd_len, &dw);
dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_VERTEX_BUFFERS) | (cmd_len - 2);
dw++;
pos++;
- for (hw_idx = 0; hw_idx < ve->vb_count; hw_idx++) {
- const unsigned instance_divisor = ve->instance_divisors[hw_idx];
- const unsigned pipe_idx = ve->vb_mapping[hw_idx];
- const struct pipe_vertex_buffer *cso = &vb->states[pipe_idx];
+ for (i = 0; i < vb_count; i++) {
+ const struct ilo_state_vertex_buffer *b = &vb[i];
- dw[0] = hw_idx << GEN6_VB_DW0_INDEX__SHIFT;
+ /* see vertex_buffer_set_gen8_vertex_buffer_state() */
+ dw[0] = b->vb[0] |
+ i << GEN6_VB_DW0_INDEX__SHIFT;
if (ilo_dev_gen(builder->dev) >= ILO_GEN(8))
dw[0] |= builder->mocs << GEN8_VB_DW0_MOCS__SHIFT;
else
dw[0] |= builder->mocs << GEN6_VB_DW0_MOCS__SHIFT;
- if (ilo_dev_gen(builder->dev) >= ILO_GEN(7))
- dw[0] |= GEN7_VB_DW0_ADDR_MODIFIED;
-
- if (instance_divisor)
- dw[0] |= GEN6_VB_DW0_ACCESS_INSTANCEDATA;
- else
- dw[0] |= GEN6_VB_DW0_ACCESS_VERTEXDATA;
-
- /* use null vb if there is no buffer or the stride is out of range */
- if (!cso->buffer || cso->stride > 2048) {
- dw[0] |= GEN6_VB_DW0_IS_NULL;
- dw[1] = 0;
- dw[2] = 0;
- dw[3] = (ilo_dev_gen(builder->dev) >= ILO_GEN(8)) ?
- 0 : instance_divisor;
-
- continue;
- }
-
- dw[0] |= cso->stride << GEN6_VB_DW0_PITCH__SHIFT;
+ dw[1] = 0;
+ dw[2] = 0;
+ dw[3] = 0;
if (ilo_dev_gen(builder->dev) >= ILO_GEN(8)) {
- const struct ilo_buffer *buf = ilo_buffer(cso->buffer);
- const uint32_t start_offset = cso->buffer_offset;
+ if (b->need_bo)
+ ilo_builder_batch_reloc64(builder, pos + 1, b->bo, b->vb[1], 0);
- ilo_builder_batch_reloc64(builder, pos + 1,
- buf->bo, start_offset, 0);
- dw[3] = buf->bo_size;
+ dw[3] |= b->vb[2];
} else {
- const struct ilo_buffer *buf = ilo_buffer(cso->buffer);
- const uint32_t start_offset = cso->buffer_offset;
- const uint32_t end_offset = buf->bo_size - 1;
+ const int8_t elem = vf->vb_to_first_elem[i];
- dw[3] = instance_divisor;
+ /* see vf_set_gen6_vertex_buffer_state() */
+ if (elem >= 0) {
+ dw[0] |= vf->user_instancing[elem][0];
+ dw[3] |= vf->user_instancing[elem][1];
+ }
- ilo_builder_batch_reloc(builder, pos + 1, buf->bo, start_offset, 0);
- ilo_builder_batch_reloc(builder, pos + 2, buf->bo, end_offset, 0);
+ if (b->need_bo) {
+ ilo_builder_batch_reloc(builder, pos + 1, b->bo, b->vb[1], 0);
+ ilo_builder_batch_reloc(builder, pos + 2, b->bo, b->vb[2], 0);
+ }
}
dw += 4;
@@ -563,248 +377,189 @@ gen6_user_3DSTATE_VERTEX_BUFFERS(struct ilo_builder *builder,
static inline void
gen6_3DSTATE_VERTEX_ELEMENTS(struct ilo_builder *builder,
- const struct ilo_ve_state *ve)
+ const struct ilo_state_vf *vf)
{
uint8_t cmd_len;
uint32_t *dw;
- unsigned i;
ILO_DEV_ASSERT(builder->dev, 6, 8);
- /*
- * From the Sandy Bridge PRM, volume 2 part 1, page 92:
- *
- * "At least one VERTEX_ELEMENT_STATE structure must be included."
- *
- * From the Sandy Bridge PRM, volume 2 part 1, page 93:
- *
- * "Up to 34 (DevSNB+) vertex elements are supported."
- */
- assert(ve->count + ve->prepend_nosrc_cso >= 1);
- assert(ve->count + ve->prepend_nosrc_cso <= 34);
-
- STATIC_ASSERT(Elements(ve->cso[0].payload) == 2);
+ cmd_len = 1 + 2 * (vf->internal_ve_count + vf->user_ve_count);
- cmd_len = 1 + 2 * (ve->count + ve->prepend_nosrc_cso);
ilo_builder_batch_pointer(builder, cmd_len, &dw);
dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_VERTEX_ELEMENTS) | (cmd_len - 2);
dw++;
- if (ve->prepend_nosrc_cso) {
- memcpy(dw, ve->nosrc_cso.payload, sizeof(ve->nosrc_cso.payload));
- dw += 2;
- }
-
- for (i = 0; i < ve->count - ve->last_cso_edgeflag; i++) {
- memcpy(dw, ve->cso[i].payload, sizeof(ve->cso[i].payload));
- dw += 2;
+ /*
+ * see vf_params_set_gen6_internal_ve() and
+ * vf_set_gen6_3DSTATE_VERTEX_ELEMENTS()
+ */
+ if (vf->internal_ve_count) {
+ memcpy(dw, vf->internal_ve,
+ sizeof(vf->internal_ve[0]) * vf->internal_ve_count);
+ dw += 2 * vf->internal_ve_count;
}
- if (ve->last_cso_edgeflag)
- memcpy(dw, ve->edgeflag_cso.payload, sizeof(ve->edgeflag_cso.payload));
+ memcpy(dw, vf->user_ve, sizeof(vf->user_ve[0]) * vf->user_ve_count);
}
static inline void
gen6_3DSTATE_INDEX_BUFFER(struct ilo_builder *builder,
- const struct ilo_ib_state *ib,
- bool enable_cut_index)
+ const struct ilo_state_vf *vf,
+ const struct ilo_state_index_buffer *ib)
{
const uint8_t cmd_len = 3;
- struct ilo_buffer *buf = ilo_buffer(ib->hw_resource);
- uint32_t start_offset, end_offset;
- int format;
- uint32_t *dw;
+ uint32_t dw0, *dw;
unsigned pos;
ILO_DEV_ASSERT(builder->dev, 6, 7.5);
- if (!buf)
- return;
-
- /* this is moved to the new 3DSTATE_VF */
- if (ilo_dev_gen(builder->dev) >= ILO_GEN(7.5))
- assert(!enable_cut_index);
-
- switch (ib->hw_index_size) {
- case 4:
- format = GEN6_IB_DW0_FORMAT_DWORD;
- break;
- case 2:
- format = GEN6_IB_DW0_FORMAT_WORD;
- break;
- case 1:
- format = GEN6_IB_DW0_FORMAT_BYTE;
- break;
- default:
- assert(!"unknown index size");
- format = GEN6_IB_DW0_FORMAT_BYTE;
- break;
- }
+ dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_INDEX_BUFFER) | (cmd_len - 2) |
+ builder->mocs << GEN6_IB_DW0_MOCS__SHIFT;
/*
- * set start_offset to 0 here and adjust pipe_draw_info::start with
- * ib->draw_start_offset in 3DPRIMITIVE
+ * see index_buffer_set_gen8_3DSTATE_INDEX_BUFFER() and
+ * vf_params_set_gen6_3dstate_index_buffer()
*/
- start_offset = 0;
- end_offset = buf->bo_size;
-
- /* end_offset must also be aligned and is inclusive */
- end_offset -= (end_offset % ib->hw_index_size);
- end_offset--;
+ dw0 |= ib->ib[0];
+ if (ilo_dev_gen(builder->dev) <= ILO_GEN(7))
+ dw0 |= vf->cut[0];
pos = ilo_builder_batch_pointer(builder, cmd_len, &dw);
- dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_INDEX_BUFFER) | (cmd_len - 2) |
- builder->mocs << GEN6_IB_DW0_MOCS__SHIFT |
- format;
- if (enable_cut_index)
- dw[0] |= GEN6_IB_DW0_CUT_INDEX_ENABLE;
-
- ilo_builder_batch_reloc(builder, pos + 1, buf->bo, start_offset, 0);
- ilo_builder_batch_reloc(builder, pos + 2, buf->bo, end_offset, 0);
+ dw[0] = dw0;
+ if (ib->need_bo) {
+ ilo_builder_batch_reloc(builder, pos + 1, ib->bo, ib->ib[1], 0);
+ ilo_builder_batch_reloc(builder, pos + 2, ib->bo, ib->ib[2], 0);
+ } else {
+ dw[1] = 0;
+ dw[2] = 0;
+ }
}
static inline void
gen8_3DSTATE_INDEX_BUFFER(struct ilo_builder *builder,
- const struct ilo_ib_state *ib)
+ const struct ilo_state_vf *vf,
+ const struct ilo_state_index_buffer *ib)
{
const uint8_t cmd_len = 5;
- struct ilo_buffer *buf = ilo_buffer(ib->hw_resource);
- int format;
uint32_t *dw;
unsigned pos;
ILO_DEV_ASSERT(builder->dev, 8, 8);
- if (!buf)
- return;
-
- switch (ib->hw_index_size) {
- case 4:
- format = GEN8_IB_DW1_FORMAT_DWORD;
- break;
- case 2:
- format = GEN8_IB_DW1_FORMAT_WORD;
- break;
- case 1:
- format = GEN8_IB_DW1_FORMAT_BYTE;
- break;
- default:
- assert(!"unknown index size");
- format = GEN8_IB_DW1_FORMAT_BYTE;
- break;
- }
-
pos = ilo_builder_batch_pointer(builder, cmd_len, &dw);
dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_INDEX_BUFFER) | (cmd_len - 2);
- dw[1] = format |
+ /* see index_buffer_set_gen8_3DSTATE_INDEX_BUFFER() */
+ dw[1] = ib->ib[0] |
builder->mocs << GEN8_IB_DW1_MOCS__SHIFT;
- dw[4] = buf->bo_size;
- /* ignore ib->offset here in favor of adjusting 3DPRIMITIVE */
- ilo_builder_batch_reloc64(builder, pos + 2, buf->bo, 0, 0);
+ if (ib->need_bo) {
+ ilo_builder_batch_reloc64(builder, pos + 2, ib->bo, ib->ib[1], 0);
+ } else {
+ dw[2] = 0;
+ dw[3] = 0;
+ }
+
+ dw[4] = ib->ib[2];
}
static inline void
gen6_3DSTATE_VS(struct ilo_builder *builder,
- const struct ilo_shader_state *vs)
+ const struct ilo_state_vs *vs,
+ uint32_t kernel_offset)
{
const uint8_t cmd_len = 6;
- const struct ilo_shader_cso *cso;
- uint32_t dw2, dw4, dw5, *dw;
+ uint32_t *dw;
ILO_DEV_ASSERT(builder->dev, 6, 7.5);
- cso = ilo_shader_get_kernel_cso(vs);
- dw2 = cso->payload[0];
- dw4 = cso->payload[1];
- dw5 = cso->payload[2];
-
ilo_builder_batch_pointer(builder, cmd_len, &dw);
dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_VS) | (cmd_len - 2);
- dw[1] = ilo_shader_get_kernel_offset(vs);
- dw[2] = dw2;
- dw[3] = 0; /* scratch */
- dw[4] = dw4;
- dw[5] = dw5;
+ dw[1] = kernel_offset;
+ /* see vs_set_gen6_3DSTATE_VS() */
+ dw[2] = vs->vs[0];
+ dw[3] = vs->vs[1];
+ dw[4] = vs->vs[2];
+ dw[5] = vs->vs[3];
}
static inline void
gen8_3DSTATE_VS(struct ilo_builder *builder,
- const struct ilo_shader_state *vs,
- uint32_t clip_plane_enable)
+ const struct ilo_state_vs *vs,
+ uint32_t kernel_offset)
{
const uint8_t cmd_len = 9;
- const struct ilo_shader_cso *cso;
- uint32_t dw3, dw6, dw7, dw8, *dw;
+ uint32_t *dw;
ILO_DEV_ASSERT(builder->dev, 8, 8);
- cso = ilo_shader_get_kernel_cso(vs);
- dw3 = cso->payload[0];
- dw6 = cso->payload[1];
- dw7 = cso->payload[2];
- dw8 = clip_plane_enable << GEN8_VS_DW8_UCP_CLIP_ENABLES__SHIFT;
-
ilo_builder_batch_pointer(builder, cmd_len, &dw);
dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_VS) | (cmd_len - 2);
- dw[1] = ilo_shader_get_kernel_offset(vs);
+ dw[1] = kernel_offset;
dw[2] = 0;
- dw[3] = dw3;
- dw[4] = 0; /* scratch */
+ /* see vs_set_gen6_3DSTATE_VS() */
+ dw[3] = vs->vs[0];
+ dw[4] = vs->vs[1];
dw[5] = 0;
- dw[6] = dw6;
- dw[7] = dw7;
- dw[8] = dw8;
+ dw[6] = vs->vs[2];
+ dw[7] = vs->vs[3];
+ dw[8] = vs->vs[4];
}
static inline void
-gen6_disable_3DSTATE_VS(struct ilo_builder *builder)
+gen7_3DSTATE_HS(struct ilo_builder *builder,
+ const struct ilo_state_hs *hs,
+ uint32_t kernel_offset)
{
- const uint8_t cmd_len = 6;
+ const uint8_t cmd_len = 7;
uint32_t *dw;
- ILO_DEV_ASSERT(builder->dev, 6, 7.5);
+ ILO_DEV_ASSERT(builder->dev, 7, 7.5);
ilo_builder_batch_pointer(builder, cmd_len, &dw);
- dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_VS) | (cmd_len - 2);
- dw[1] = 0;
- dw[2] = 0;
- dw[3] = 0;
- dw[4] = 0;
- dw[5] = 0;
+ dw[0] = GEN7_RENDER_CMD(3D, 3DSTATE_HS) | (cmd_len - 2);
+ /* see hs_set_gen7_3DSTATE_HS() */
+ dw[1] = hs->hs[0];
+ dw[2] = hs->hs[1];
+ dw[3] = kernel_offset;
+ dw[4] = hs->hs[2];
+ dw[5] = hs->hs[3];
+ dw[6] = 0;
}
static inline void
-gen7_disable_3DSTATE_HS(struct ilo_builder *builder)
+gen8_3DSTATE_HS(struct ilo_builder *builder,
+ const struct ilo_state_hs *hs,
+ uint32_t kernel_offset)
{
- const uint8_t cmd_len = (ilo_dev_gen(builder->dev) >= ILO_GEN(8)) ? 9 : 7;
+ const uint8_t cmd_len = 9;
uint32_t *dw;
- ILO_DEV_ASSERT(builder->dev, 7, 8);
+ ILO_DEV_ASSERT(builder->dev, 8, 8);
ilo_builder_batch_pointer(builder, cmd_len, &dw);
dw[0] = GEN7_RENDER_CMD(3D, 3DSTATE_HS) | (cmd_len - 2);
- dw[1] = 0;
- dw[2] = 0;
- dw[3] = 0;
+ /* see hs_set_gen7_3DSTATE_HS() */
+ dw[1] = hs->hs[0];
+ dw[2] = hs->hs[1];
+ dw[3] = kernel_offset;
dw[4] = 0;
- dw[5] = 0;
+ dw[5] = hs->hs[2];
dw[6] = 0;
- if (ilo_dev_gen(builder->dev) >= ILO_GEN(8)) {
- dw[7] = 0;
- dw[8] = 0;
- }
+ dw[7] = hs->hs[3];
+ dw[8] = 0;
}
static inline void
-gen7_3DSTATE_TE(struct ilo_builder *builder)
+gen7_3DSTATE_TE(struct ilo_builder *builder,
+ const struct ilo_state_ds *ds)
{
const uint8_t cmd_len = 4;
uint32_t *dw;
@@ -814,108 +569,61 @@ gen7_3DSTATE_TE(struct ilo_builder *builder)
ilo_builder_batch_pointer(builder, cmd_len, &dw);
dw[0] = GEN7_RENDER_CMD(3D, 3DSTATE_TE) | (cmd_len - 2);
- dw[1] = 0;
- dw[2] = 0;
- dw[3] = 0;
+ /* see ds_set_gen7_3DSTATE_TE() */
+ dw[1] = ds->te[0];
+ dw[2] = ds->te[1];
+ dw[3] = ds->te[2];
}
static inline void
-gen7_disable_3DSTATE_DS(struct ilo_builder *builder)
+gen7_3DSTATE_DS(struct ilo_builder *builder,
+ const struct ilo_state_ds *ds,
+ uint32_t kernel_offset)
{
- const uint8_t cmd_len = (ilo_dev_gen(builder->dev) >= ILO_GEN(8)) ? 9 : 6;
+ const uint8_t cmd_len = 6;
uint32_t *dw;
- ILO_DEV_ASSERT(builder->dev, 7, 8);
+ ILO_DEV_ASSERT(builder->dev, 7, 7.5);
ilo_builder_batch_pointer(builder, cmd_len, &dw);
dw[0] = GEN7_RENDER_CMD(3D, 3DSTATE_DS) | (cmd_len - 2);
- dw[1] = 0;
- dw[2] = 0;
- dw[3] = 0;
- dw[4] = 0;
- dw[5] = 0;
- if (ilo_dev_gen(builder->dev) >= ILO_GEN(8)) {
- dw[6] = 0;
- dw[7] = 0;
- dw[8] = 0;
- }
-}
-
-static inline void
-gen6_3DSTATE_GS(struct ilo_builder *builder,
- const struct ilo_shader_state *gs)
-{
- const uint8_t cmd_len = 7;
- const struct ilo_shader_cso *cso;
- uint32_t dw2, dw4, dw5, dw6, *dw;
-
- ILO_DEV_ASSERT(builder->dev, 6, 6);
-
- cso = ilo_shader_get_kernel_cso(gs);
- dw2 = cso->payload[0];
- dw4 = cso->payload[1];
- dw5 = cso->payload[2];
- dw6 = cso->payload[3];
-
- ilo_builder_batch_pointer(builder, cmd_len, &dw);
-
- dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_GS) | (cmd_len - 2);
- dw[1] = ilo_shader_get_kernel_offset(gs);
- dw[2] = dw2;
- dw[3] = 0; /* scratch */
- dw[4] = dw4;
- dw[5] = dw5;
- dw[6] = dw6;
+ /* see ds_set_gen7_3DSTATE_DS() */
+ dw[1] = kernel_offset;
+ dw[2] = ds->ds[0];
+ dw[3] = ds->ds[1];
+ dw[4] = ds->ds[2];
+ dw[5] = ds->ds[3];
}
static inline void
-gen6_so_3DSTATE_GS(struct ilo_builder *builder,
- const struct ilo_shader_state *vs,
- int verts_per_prim)
+gen8_3DSTATE_DS(struct ilo_builder *builder,
+ const struct ilo_state_ds *ds,
+ uint32_t kernel_offset)
{
- const uint8_t cmd_len = 7;
- struct ilo_shader_cso cso;
- enum ilo_kernel_param param;
- uint32_t dw2, dw4, dw5, dw6, *dw;
-
- ILO_DEV_ASSERT(builder->dev, 6, 6);
-
- assert(ilo_shader_get_kernel_param(vs, ILO_KERNEL_VS_GEN6_SO));
-
- switch (verts_per_prim) {
- case 1:
- param = ILO_KERNEL_VS_GEN6_SO_POINT_OFFSET;
- break;
- case 2:
- param = ILO_KERNEL_VS_GEN6_SO_LINE_OFFSET;
- break;
- default:
- param = ILO_KERNEL_VS_GEN6_SO_TRI_OFFSET;
- break;
- }
+ const uint8_t cmd_len = 9;
+ uint32_t *dw;
- /* cannot use VS's CSO */
- ilo_gpe_init_gs_cso(builder->dev, vs, &cso);
- dw2 = cso.payload[0];
- dw4 = cso.payload[1];
- dw5 = cso.payload[2];
- dw6 = cso.payload[3];
+ ILO_DEV_ASSERT(builder->dev, 8, 8);
ilo_builder_batch_pointer(builder, cmd_len, &dw);
- dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_GS) | (cmd_len - 2);
- dw[1] = ilo_shader_get_kernel_offset(vs) +
- ilo_shader_get_kernel_param(vs, param);
- dw[2] = dw2;
- dw[3] = 0;
- dw[4] = dw4;
- dw[5] = dw5;
- dw[6] = dw6;
+ dw[0] = GEN7_RENDER_CMD(3D, 3DSTATE_DS) | (cmd_len - 2);
+ /* see ds_set_gen7_3DSTATE_DS() */
+ dw[1] = kernel_offset;
+ dw[2] = 0;
+ dw[3] = ds->ds[0];
+ dw[4] = ds->ds[1];
+ dw[5] = 0;
+ dw[6] = ds->ds[2];
+ dw[7] = ds->ds[3];
+ dw[8] = ds->ds[4];
}
static inline void
-gen6_disable_3DSTATE_GS(struct ilo_builder *builder)
+gen6_3DSTATE_GS(struct ilo_builder *builder,
+ const struct ilo_state_gs *gs,
+ uint32_t kernel_offset)
{
const uint8_t cmd_len = 7;
uint32_t *dw;
@@ -925,13 +633,13 @@ gen6_disable_3DSTATE_GS(struct ilo_builder *builder)
ilo_builder_batch_pointer(builder, cmd_len, &dw);
dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_GS) | (cmd_len - 2);
- dw[1] = 0;
- dw[2] = 0;
- dw[3] = 0;
- /* honor the valid range of URB read length */
- dw[4] = 1 << GEN6_GS_DW4_URB_READ_LEN__SHIFT;
- dw[5] = GEN6_GS_DW5_STATISTICS;
- dw[6] = 0;
+ dw[1] = kernel_offset;
+ /* see gs_set_gen6_3DSTATE_GS() */
+ dw[2] = gs->gs[0];
+ dw[3] = gs->gs[1];
+ dw[4] = gs->gs[2];
+ dw[5] = gs->gs[3];
+ dw[6] = gs->gs[4];
}
static inline void
@@ -960,183 +668,90 @@ gen6_3DSTATE_GS_SVB_INDEX(struct ilo_builder *builder,
static inline void
gen7_3DSTATE_GS(struct ilo_builder *builder,
- const struct ilo_shader_state *gs)
+ const struct ilo_state_gs *gs,
+ uint32_t kernel_offset)
{
const uint8_t cmd_len = 7;
- const struct ilo_shader_cso *cso;
- uint32_t dw2, dw4, dw5, *dw;
+ uint32_t *dw;
ILO_DEV_ASSERT(builder->dev, 7, 7.5);
- cso = ilo_shader_get_kernel_cso(gs);
- dw2 = cso->payload[0];
- dw4 = cso->payload[1];
- dw5 = cso->payload[2];
-
ilo_builder_batch_pointer(builder, cmd_len, &dw);
dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_GS) | (cmd_len - 2);
- dw[1] = ilo_shader_get_kernel_offset(gs);
- dw[2] = dw2;
- dw[3] = 0; /* scratch */
- dw[4] = dw4;
- dw[5] = dw5;
+ dw[1] = kernel_offset;
+ /* see gs_set_gen7_3DSTATE_GS() */
+ dw[2] = gs->gs[0];
+ dw[3] = gs->gs[1];
+ dw[4] = gs->gs[2];
+ dw[5] = gs->gs[3];
dw[6] = 0;
}
static inline void
-gen7_disable_3DSTATE_GS(struct ilo_builder *builder)
+gen8_3DSTATE_GS(struct ilo_builder *builder,
+ const struct ilo_state_gs *gs,
+ uint32_t kernel_offset)
{
- const uint8_t cmd_len = (ilo_dev_gen(builder->dev) >= ILO_GEN(8)) ? 10 : 7;
+ const uint8_t cmd_len = 10;
uint32_t *dw;
- ILO_DEV_ASSERT(builder->dev, 7, 8);
+ ILO_DEV_ASSERT(builder->dev, 8, 8);
ilo_builder_batch_pointer(builder, cmd_len, &dw);
dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_GS) | (cmd_len - 2);
- dw[1] = 0;
+ dw[1] = kernel_offset;
dw[2] = 0;
- dw[3] = 0;
- dw[4] = 0;
-
- if (ilo_dev_gen(builder->dev) >= ILO_GEN(8)) {
- dw[7] = GEN8_GS_DW7_STATISTICS;
- dw[8] = 0;
- dw[9] = 0;
- } else {
- dw[5] = GEN7_GS_DW5_STATISTICS;
- dw[6] = 0;
- }
+ /* see gs_set_gen7_3DSTATE_GS() */
+ dw[3] = gs->gs[0];
+ dw[4] = gs->gs[1];
+ dw[5] = 0;
+ dw[6] = gs->gs[2];
+ dw[7] = gs->gs[3];
+ dw[8] = 0;
+ dw[9] = gs->gs[4];
}
static inline void
gen7_3DSTATE_STREAMOUT(struct ilo_builder *builder,
- int render_stream,
- bool render_disable,
- int vertex_attrib_count,
- const int *buf_strides)
+ const struct ilo_state_sol *sol)
{
const uint8_t cmd_len = (ilo_dev_gen(builder->dev) >= ILO_GEN(8)) ? 5 : 3;
uint32_t *dw;
- int buf_mask;
ILO_DEV_ASSERT(builder->dev, 7, 8);
ilo_builder_batch_pointer(builder, cmd_len, &dw);
dw[0] = GEN7_RENDER_CMD(3D, 3DSTATE_STREAMOUT) | (cmd_len - 2);
-
- dw[1] = render_stream << GEN7_SO_DW1_RENDER_STREAM_SELECT__SHIFT;
- if (render_disable)
- dw[1] |= GEN7_SO_DW1_RENDER_DISABLE;
-
- if (buf_strides) {
- buf_mask = ((bool) buf_strides[3]) << 3 |
- ((bool) buf_strides[2]) << 2 |
- ((bool) buf_strides[1]) << 1 |
- ((bool) buf_strides[0]);
- if (ilo_dev_gen(builder->dev) >= ILO_GEN(8)) {
- dw[3] = buf_strides[1] << 16 | buf_strides[0];
- dw[4] = buf_strides[3] << 16 | buf_strides[1];
- }
- } else {
- buf_mask = 0;
- }
-
- if (buf_mask) {
- int read_len;
-
- dw[1] |= GEN7_SO_DW1_SO_ENABLE |
- GEN7_SO_DW1_STATISTICS;
- /* API_OPENGL */
- if (true)
- dw[1] |= GEN7_SO_DW1_REORDER_TRAILING;
- if (ilo_dev_gen(builder->dev) < ILO_GEN(8))
- dw[1] |= buf_mask << GEN7_SO_DW1_BUFFER_ENABLES__SHIFT;
-
- read_len = (vertex_attrib_count + 1) / 2;
- if (!read_len)
- read_len = 1;
-
- dw[2] = 0 << GEN7_SO_DW2_STREAM3_READ_OFFSET__SHIFT |
- (read_len - 1) << GEN7_SO_DW2_STREAM3_READ_LEN__SHIFT |
- 0 << GEN7_SO_DW2_STREAM2_READ_OFFSET__SHIFT |
- (read_len - 1) << GEN7_SO_DW2_STREAM2_READ_LEN__SHIFT |
- 0 << GEN7_SO_DW2_STREAM1_READ_OFFSET__SHIFT |
- (read_len - 1) << GEN7_SO_DW2_STREAM1_READ_LEN__SHIFT |
- 0 << GEN7_SO_DW2_STREAM0_READ_OFFSET__SHIFT |
- (read_len - 1) << GEN7_SO_DW2_STREAM0_READ_LEN__SHIFT;
- } else {
- dw[2] = 0;
+ /* see sol_set_gen7_3DSTATE_STREAMOUT() */
+ dw[1] = sol->streamout[0];
+ dw[2] = sol->streamout[1];
+ if (ilo_dev_gen(builder->dev) >= ILO_GEN(8)) {
+ dw[3] = sol->strides[1] << GEN8_SO_DW3_BUFFER1_PITCH__SHIFT |
+ sol->strides[0] << GEN8_SO_DW3_BUFFER0_PITCH__SHIFT;
+ dw[4] = sol->strides[3] << GEN8_SO_DW4_BUFFER3_PITCH__SHIFT |
+ sol->strides[2] << GEN8_SO_DW4_BUFFER2_PITCH__SHIFT;
}
}
static inline void
gen7_3DSTATE_SO_DECL_LIST(struct ilo_builder *builder,
- const struct pipe_stream_output_info *so_info)
+ const struct ilo_state_sol *sol)
{
/*
* Note that "DWord Length" has 9 bits for this command and the type of
* cmd_len cannot be uint8_t.
*/
uint16_t cmd_len;
- struct {
- int buf_selects;
- int decl_count;
- uint16_t decls[128];
- } streams[4];
- unsigned buf_offsets[PIPE_MAX_SO_BUFFERS];
- int hw_decl_count, i;
+ int cmd_decl_count;
uint32_t *dw;
ILO_DEV_ASSERT(builder->dev, 7, 8);
- memset(streams, 0, sizeof(streams));
- memset(buf_offsets, 0, sizeof(buf_offsets));
-
- for (i = 0; i < so_info->num_outputs; i++) {
- unsigned decl, st, buf, reg, mask;
-
- st = so_info->output[i].stream;
- buf = so_info->output[i].output_buffer;
-
- /* pad with holes */
- while (buf_offsets[buf] < so_info->output[i].dst_offset) {
- int num_dwords;
-
- num_dwords = so_info->output[i].dst_offset - buf_offsets[buf];
- if (num_dwords > 4)
- num_dwords = 4;
-
- decl = buf << GEN7_SO_DECL_OUTPUT_SLOT__SHIFT |
- GEN7_SO_DECL_HOLE_FLAG |
- ((1 << num_dwords) - 1) << GEN7_SO_DECL_COMPONENT_MASK__SHIFT;
-
- assert(streams[st].decl_count < Elements(streams[st].decls));
- streams[st].decls[streams[st].decl_count++] = decl;
- buf_offsets[buf] += num_dwords;
- }
- assert(buf_offsets[buf] == so_info->output[i].dst_offset);
-
- reg = so_info->output[i].register_index;
- mask = ((1 << so_info->output[i].num_components) - 1) <<
- so_info->output[i].start_component;
-
- decl = buf << GEN7_SO_DECL_OUTPUT_SLOT__SHIFT |
- reg << GEN7_SO_DECL_REG_INDEX__SHIFT |
- mask << GEN7_SO_DECL_COMPONENT_MASK__SHIFT;
-
- assert(streams[st].decl_count < Elements(streams[st].decls));
-
- streams[st].buf_selects |= 1 << buf;
- streams[st].decls[streams[st].decl_count++] = decl;
- buf_offsets[buf] += so_info->output[i].num_components;
- }
-
if (ilo_dev_gen(builder->dev) >= ILO_GEN(7.5)) {
- hw_decl_count = MAX4(streams[0].decl_count, streams[1].decl_count,
- streams[2].decl_count, streams[3].decl_count);
+ cmd_decl_count = sol->decl_count;
} else {
/*
* From the Ivy Bridge PRM, volume 2 part 1, page 201:
@@ -1145,100 +760,97 @@ gen7_3DSTATE_SO_DECL_LIST(struct ilo_builder *builder,
* whenever this command is issued. The "Num Entries [n]" fields
* still contain the actual numbers of valid decls."
*/
- hw_decl_count = 128;
+ cmd_decl_count = 128;
}
- cmd_len = 3 + 2 * hw_decl_count;
+ cmd_len = 3 + 2 * cmd_decl_count;
ilo_builder_batch_pointer(builder, cmd_len, &dw);
dw[0] = GEN7_RENDER_CMD(3D, 3DSTATE_SO_DECL_LIST) | (cmd_len - 2);
- dw[1] = streams[3].buf_selects << GEN7_SO_DECL_DW1_STREAM3_BUFFER_SELECTS__SHIFT |
- streams[2].buf_selects << GEN7_SO_DECL_DW1_STREAM2_BUFFER_SELECTS__SHIFT |
- streams[1].buf_selects << GEN7_SO_DECL_DW1_STREAM1_BUFFER_SELECTS__SHIFT |
- streams[0].buf_selects << GEN7_SO_DECL_DW1_STREAM0_BUFFER_SELECTS__SHIFT;
- dw[2] = streams[3].decl_count << GEN7_SO_DECL_DW2_STREAM3_ENTRY_COUNT__SHIFT |
- streams[2].decl_count << GEN7_SO_DECL_DW2_STREAM2_ENTRY_COUNT__SHIFT |
- streams[1].decl_count << GEN7_SO_DECL_DW2_STREAM1_ENTRY_COUNT__SHIFT |
- streams[0].decl_count << GEN7_SO_DECL_DW2_STREAM0_ENTRY_COUNT__SHIFT;
- dw += 3;
-
- for (i = 0; i < hw_decl_count; i++) {
- dw[0] = streams[1].decls[i] << 16 | streams[0].decls[i];
- dw[1] = streams[3].decls[i] << 16 | streams[2].decls[i];
- dw += 2;
+ /* see sol_set_gen7_3DSTATE_SO_DECL_LIST() */
+ dw[1] = sol->so_decl[0];
+ dw[2] = sol->so_decl[1];
+ memcpy(&dw[3], sol->decl, sizeof(sol->decl[0]) * sol->decl_count);
+
+ if (sol->decl_count < cmd_decl_count) {
+ memset(&dw[3 + 2 * sol->decl_count], 0, sizeof(sol->decl[0]) *
+ cmd_decl_count - sol->decl_count);
}
}
static inline void
-gen7_3DSTATE_SO_BUFFER(struct ilo_builder *builder, int index, int stride,
- const struct pipe_stream_output_target *so_target)
+gen7_3DSTATE_SO_BUFFER(struct ilo_builder *builder,
+ const struct ilo_state_sol *sol,
+ const struct ilo_state_sol_buffer *sb,
+ uint8_t buffer)
{
- const uint8_t cmd_len = (ilo_dev_gen(builder->dev) >= ILO_GEN(8)) ? 8 : 4;
- struct ilo_buffer *buf;
- int start, end;
+ const uint8_t cmd_len = 4;
uint32_t *dw;
unsigned pos;
- ILO_DEV_ASSERT(builder->dev, 7, 8);
-
- buf = ilo_buffer(so_target->buffer);
-
- /* DWord-aligned */
- assert(stride % 4 == 0);
- assert(so_target->buffer_offset % 4 == 0);
+ ILO_DEV_ASSERT(builder->dev, 7, 7.5);
- stride &= ~3;
- start = so_target->buffer_offset & ~3;
- end = (start + so_target->buffer_size) & ~3;
+ assert(buffer < ILO_STATE_SOL_MAX_BUFFER_COUNT);
pos = ilo_builder_batch_pointer(builder, cmd_len, &dw);
dw[0] = GEN7_RENDER_CMD(3D, 3DSTATE_SO_BUFFER) | (cmd_len - 2);
- dw[1] = index << GEN7_SO_BUF_DW1_INDEX__SHIFT |
- stride;
-
- if (ilo_dev_gen(builder->dev) >= ILO_GEN(8)) {
- dw[1] |= builder->mocs << GEN8_SO_BUF_DW1_MOCS__SHIFT;
-
- dw[4] = end - start;
- dw[5] = 0;
- dw[6] = 0;
- dw[7] = 0;
-
- ilo_builder_batch_reloc64(builder, pos + 2,
- buf->bo, start, INTEL_RELOC_WRITE);
+ /* see sol_buffer_set_gen7_3dstate_so_buffer() */
+ dw[1] = buffer << GEN7_SO_BUF_DW1_INDEX__SHIFT |
+ builder->mocs << GEN7_SO_BUF_DW1_MOCS__SHIFT |
+ sol->strides[buffer] << GEN7_SO_BUF_DW1_PITCH__SHIFT;
+
+ if (sb->need_bo) {
+ ilo_builder_batch_reloc(builder, pos + 2, sb->bo,
+ sb->so_buf[0], INTEL_RELOC_WRITE);
+ ilo_builder_batch_reloc(builder, pos + 3, sb->bo,
+ sb->so_buf[1], INTEL_RELOC_WRITE);
} else {
- dw[1] |= builder->mocs << GEN7_SO_BUF_DW1_MOCS__SHIFT;
-
- ilo_builder_batch_reloc(builder, pos + 2,
- buf->bo, start, INTEL_RELOC_WRITE);
- ilo_builder_batch_reloc(builder, pos + 3,
- buf->bo, end, INTEL_RELOC_WRITE);
+ dw[2] = 0;
+ dw[3] = 0;
}
}
static inline void
-gen7_disable_3DSTATE_SO_BUFFER(struct ilo_builder *builder, int index)
+gen8_3DSTATE_SO_BUFFER(struct ilo_builder *builder,
+ const struct ilo_state_sol *sol,
+ const struct ilo_state_sol_buffer *sb,
+ uint8_t buffer)
{
- const uint8_t cmd_len = (ilo_dev_gen(builder->dev) >= ILO_GEN(8)) ? 8 : 4;
+ const uint8_t cmd_len = 8;
uint32_t *dw;
+ unsigned pos;
- ILO_DEV_ASSERT(builder->dev, 7, 8);
+ ILO_DEV_ASSERT(builder->dev, 8, 8);
- ilo_builder_batch_pointer(builder, cmd_len, &dw);
+ pos = ilo_builder_batch_pointer(builder, cmd_len, &dw);
dw[0] = GEN7_RENDER_CMD(3D, 3DSTATE_SO_BUFFER) | (cmd_len - 2);
- dw[1] = index << GEN7_SO_BUF_DW1_INDEX__SHIFT;
- dw[2] = 0;
- dw[3] = 0;
+ /* see sol_buffer_set_gen8_3dstate_so_buffer() */
+ dw[1] = sb->so_buf[0] |
+ buffer << GEN7_SO_BUF_DW1_INDEX__SHIFT |
+ builder->mocs << GEN8_SO_BUF_DW1_MOCS__SHIFT;
+
+ if (sb->need_bo) {
+ ilo_builder_batch_reloc64(builder, pos + 2, sb->bo,
+ sb->so_buf[1], INTEL_RELOC_WRITE);
+ } else {
+ dw[2] = 0;
+ dw[3] = 0;
+ }
- if (ilo_dev_gen(builder->dev) >= ILO_GEN(8)) {
- dw[4] = 0;
+ dw[4] = sb->so_buf[2];
+
+ if (sb->need_write_offset_bo) {
+ ilo_builder_batch_reloc64(builder, pos + 5, sb->write_offset_bo,
+ sizeof(uint32_t) * buffer, INTEL_RELOC_WRITE);
+ } else {
dw[5] = 0;
dw[6] = 0;
- dw[7] = 0;
}
+
+ dw[7] = sb->so_buf[3];
}
static inline void
@@ -1627,8 +1239,7 @@ gen6_BINDING_TABLE_STATE(struct ilo_builder *builder,
static inline uint32_t
gen6_SURFACE_STATE(struct ilo_builder *builder,
- const struct ilo_view_surface *surf,
- bool for_render)
+ const struct ilo_state_surface *surf)
{
int state_align, state_len;
uint32_t state_offset, *dw;
@@ -1641,7 +1252,7 @@ gen6_SURFACE_STATE(struct ilo_builder *builder,
state_offset = ilo_builder_surface_pointer(builder,
ILO_BUILDER_ITEM_SURFACE, state_align, state_len, &dw);
- memcpy(dw, surf->payload, state_len << 2);
+ memcpy(dw, surf->surface, state_len << 2);
if (surf->bo) {
const uint32_t mocs = (surf->scanout) ?
@@ -1650,7 +1261,7 @@ gen6_SURFACE_STATE(struct ilo_builder *builder,
dw[1] |= mocs << GEN8_SURFACE_DW1_MOCS__SHIFT;
ilo_builder_surface_reloc64(builder, state_offset, 8, surf->bo,
- surf->payload[8], (for_render) ? INTEL_RELOC_WRITE : 0);
+ surf->surface[8], (surf->readonly) ? 0 : INTEL_RELOC_WRITE);
}
} else {
state_align = 32;
@@ -1658,7 +1269,7 @@ gen6_SURFACE_STATE(struct ilo_builder *builder,
state_offset = ilo_builder_surface_pointer(builder,
ILO_BUILDER_ITEM_SURFACE, state_align, state_len, &dw);
- memcpy(dw, surf->payload, state_len << 2);
+ memcpy(dw, surf->surface, state_len << 2);
if (surf->bo) {
/*
@@ -1668,7 +1279,7 @@ gen6_SURFACE_STATE(struct ilo_builder *builder,
dw[5] |= builder->mocs << GEN6_SURFACE_DW5_MOCS__SHIFT;
ilo_builder_surface_reloc(builder, state_offset, 1, surf->bo,
- surf->payload[1], (for_render) ? INTEL_RELOC_WRITE : 0);
+ surf->surface[1], (surf->readonly) ? 0 : INTEL_RELOC_WRITE);
}
}
@@ -1676,55 +1287,13 @@ gen6_SURFACE_STATE(struct ilo_builder *builder,
}
static inline uint32_t
-gen6_so_SURFACE_STATE(struct ilo_builder *builder,
- const struct pipe_stream_output_target *so,
- const struct pipe_stream_output_info *so_info,
- int so_index)
-{
- struct ilo_buffer *buf = ilo_buffer(so->buffer);
- unsigned bo_offset, struct_size;
- enum pipe_format elem_format;
- struct ilo_view_surface surf;
-
- ILO_DEV_ASSERT(builder->dev, 6, 6);
-
- bo_offset = so->buffer_offset + so_info->output[so_index].dst_offset * 4;
- struct_size = so_info->stride[so_info->output[so_index].output_buffer] * 4;
-
- switch (so_info->output[so_index].num_components) {
- case 1:
- elem_format = PIPE_FORMAT_R32_FLOAT;
- break;
- case 2:
- elem_format = PIPE_FORMAT_R32G32_FLOAT;
- break;
- case 3:
- elem_format = PIPE_FORMAT_R32G32B32_FLOAT;
- break;
- case 4:
- elem_format = PIPE_FORMAT_R32G32B32A32_FLOAT;
- break;
- default:
- assert(!"unexpected SO components length");
- elem_format = PIPE_FORMAT_R32_FLOAT;
- break;
- }
-
- ilo_gpe_init_view_surface_for_buffer(builder->dev, buf, bo_offset,
- so->buffer_size, struct_size, elem_format, false, true, &surf);
-
- return gen6_SURFACE_STATE(builder, &surf, false);
-}
-
-static inline uint32_t
gen6_SAMPLER_STATE(struct ilo_builder *builder,
- const struct ilo_sampler_cso * const *samplers,
- const struct pipe_sampler_view * const *views,
+ const struct ilo_state_sampler *samplers,
const uint32_t *sampler_border_colors,
- int num_samplers)
+ int sampler_count)
{
const int state_align = 32;
- const int state_len = 4 * num_samplers;
+ const int state_len = 4 * sampler_count;
uint32_t state_offset, *dw;
int i;
@@ -1735,9 +1304,9 @@ gen6_SAMPLER_STATE(struct ilo_builder *builder,
*
* "The sampler state is stored as an array of up to 16 elements..."
*/
- assert(num_samplers <= 16);
+ assert(sampler_count <= 16);
- if (!num_samplers)
+ if (!sampler_count)
return 0;
/*
@@ -1749,86 +1318,19 @@ gen6_SAMPLER_STATE(struct ilo_builder *builder,
*
* It also applies to other shader stages.
*/
- ilo_builder_dynamic_pad_top(builder, 4 * (4 - (num_samplers % 4)));
+ ilo_builder_dynamic_pad_top(builder, 4 * (4 - (sampler_count % 4)));
state_offset = ilo_builder_dynamic_pointer(builder,
ILO_BUILDER_ITEM_SAMPLER, state_align, state_len, &dw);
- for (i = 0; i < num_samplers; i++) {
- const struct ilo_sampler_cso *sampler = samplers[i];
- const struct pipe_sampler_view *view = views[i];
- const uint32_t border_color = sampler_border_colors[i];
- uint32_t dw_filter, dw_wrap;
-
- /* there may be holes */
- if (!sampler || !view) {
- /* disabled sampler */
- dw[0] = 1 << 31;
- dw[1] = 0;
- dw[2] = 0;
- dw[3] = 0;
- dw += 4;
-
- continue;
- }
-
- /* determine filter and wrap modes */
- switch (view->texture->target) {
- case PIPE_TEXTURE_1D:
- dw_filter = (sampler->anisotropic) ?
- sampler->dw_filter_aniso : sampler->dw_filter;
- dw_wrap = sampler->dw_wrap_1d;
- break;
- case PIPE_TEXTURE_3D:
- /*
- * From the Sandy Bridge PRM, volume 4 part 1, page 103:
- *
- * "Only MAPFILTER_NEAREST and MAPFILTER_LINEAR are supported for
- * surfaces of type SURFTYPE_3D."
- */
- dw_filter = sampler->dw_filter;
- dw_wrap = sampler->dw_wrap;
- break;
- case PIPE_TEXTURE_CUBE:
- dw_filter = (sampler->anisotropic) ?
- sampler->dw_filter_aniso : sampler->dw_filter;
- dw_wrap = sampler->dw_wrap_cube;
- break;
- default:
- dw_filter = (sampler->anisotropic) ?
- sampler->dw_filter_aniso : sampler->dw_filter;
- dw_wrap = sampler->dw_wrap;
- break;
- }
+ for (i = 0; i < sampler_count; i++) {
+ /* see sampler_set_gen6_SAMPLER_STATE() */
+ dw[0] = samplers[i].sampler[0];
+ dw[1] = samplers[i].sampler[1];
+ dw[3] = samplers[i].sampler[2];
- dw[0] = sampler->payload[0];
- dw[1] = sampler->payload[1];
- assert(!(border_color & 0x1f));
- dw[2] = border_color;
- dw[3] = sampler->payload[2];
-
- dw[0] |= dw_filter;
-
- if (ilo_dev_gen(builder->dev) >= ILO_GEN(7)) {
- dw[3] |= dw_wrap;
- }
- else {
- /*
- * From the Sandy Bridge PRM, volume 4 part 1, page 21:
- *
- * "[DevSNB] Errata: Incorrect behavior is observed in cases
- * where the min and mag mode filters are different and
- * SurfMinLOD is nonzero. The determination of MagMode uses the
- * following equation instead of the one in the above
- * pseudocode: MagMode = (LOD + SurfMinLOD - Base <= 0)"
- *
- * As a way to work around that, we set Base to
- * view->u.tex.first_level.
- */
- dw[0] |= view->u.tex.first_level << 22;
-
- dw[1] |= dw_wrap;
- }
+ assert(!(sampler_border_colors[i] & 0x1f));
+ dw[2] = sampler_border_colors[i];
dw += 4;
}
@@ -1838,7 +1340,7 @@ gen6_SAMPLER_STATE(struct ilo_builder *builder,
static inline uint32_t
gen6_SAMPLER_BORDER_COLOR_STATE(struct ilo_builder *builder,
- const struct ilo_sampler_cso *sampler)
+ const struct ilo_state_sampler_border *border)
{
const int state_align =
(ilo_dev_gen(builder->dev) >= ILO_GEN(8)) ? 64 : 32;
@@ -1846,11 +1348,12 @@ gen6_SAMPLER_BORDER_COLOR_STATE(struct ilo_builder *builder,
ILO_DEV_ASSERT(builder->dev, 6, 8);
- assert(Elements(sampler->payload) >= 3 + state_len);
-
- /* see ilo_gpe_init_sampler_cso() */
+ /*
+ * see border_set_gen6_SAMPLER_BORDER_COLOR_STATE() and
+ * border_set_gen7_SAMPLER_BORDER_COLOR_STATE()
+ */
return ilo_builder_dynamic_write(builder, ILO_BUILDER_ITEM_BLOB,
- state_align, state_len, &sampler->payload[3]);
+ state_align, state_len, border->color);
}
static inline uint32_t
diff --git a/src/gallium/drivers/ilo/core/ilo_builder_decode.c b/src/gallium/drivers/ilo/core/ilo_builder_decode.c
index cedaab1559d..c5a98c91204 100644
--- a/src/gallium/drivers/ilo/core/ilo_builder_decode.c
+++ b/src/gallium/drivers/ilo/core/ilo_builder_decode.c
@@ -319,7 +319,7 @@ writer_decode_color_calc(const struct ilo_builder *builder,
"stencil ref %d, bf stencil ref %d\n",
GEN_EXTRACT(dw, GEN6_CC_DW0_ALPHATEST) ? "FLOAT32" : "UNORM8",
(bool) (dw & GEN6_CC_DW0_ROUND_DISABLE_DISABLE),
- GEN_EXTRACT(dw, GEN6_CC_DW0_STENCIL0_REF),
+ GEN_EXTRACT(dw, GEN6_CC_DW0_STENCIL_REF),
GEN_EXTRACT(dw, GEN6_CC_DW0_STENCIL1_REF));
writer_dw(builder, which, item->offset, 1, "CC\n");
@@ -347,13 +347,13 @@ writer_decode_depth_stencil(const struct ilo_builder *builder,
dw = writer_dw(builder, which, item->offset, 0, "D_S");
ilo_printf("stencil %sable, func %d, write %sable\n",
(dw & GEN6_ZS_DW0_STENCIL_TEST_ENABLE) ? "en" : "dis",
- GEN_EXTRACT(dw, GEN6_ZS_DW0_STENCIL0_FUNC),
+ GEN_EXTRACT(dw, GEN6_ZS_DW0_STENCIL_FUNC),
(dw & GEN6_ZS_DW0_STENCIL_WRITE_ENABLE) ? "en" : "dis");
dw = writer_dw(builder, which, item->offset, 1, "D_S");
ilo_printf("stencil test mask 0x%x, write mask 0x%x\n",
- GEN_EXTRACT(dw, GEN6_ZS_DW1_STENCIL0_VALUEMASK),
- GEN_EXTRACT(dw, GEN6_ZS_DW1_STENCIL0_WRITEMASK));
+ GEN_EXTRACT(dw, GEN6_ZS_DW1_STENCIL_TEST_MASK),
+ GEN_EXTRACT(dw, GEN6_ZS_DW1_STENCIL_WRITE_MASK));
dw = writer_dw(builder, which, item->offset, 2, "D_S");
ilo_printf("depth test %sable, func %d, write %sable\n",
diff --git a/src/gallium/drivers/ilo/core/ilo_builder_media.h b/src/gallium/drivers/ilo/core/ilo_builder_media.h
index 7fbe6d41635..7197104a23e 100644
--- a/src/gallium/drivers/ilo/core/ilo_builder_media.h
+++ b/src/gallium/drivers/ilo/core/ilo_builder_media.h
@@ -29,57 +29,30 @@
#define ILO_BUILDER_MEDIA_H
#include "genhw/genhw.h"
-#include "../ilo_shader.h"
#include "intel_winsys.h"
#include "ilo_core.h"
#include "ilo_dev.h"
+#include "ilo_state_compute.h"
#include "ilo_builder.h"
-struct gen6_idrt_data {
- const struct ilo_shader_state *cs;
-
- uint32_t sampler_offset;
- uint32_t binding_table_offset;
-
- unsigned curbe_size;
- unsigned thread_group_size;
-};
-
static inline void
gen6_MEDIA_VFE_STATE(struct ilo_builder *builder,
- unsigned curbe_alloc, bool use_slm)
+ const struct ilo_state_compute *compute)
{
const uint8_t cmd_len = 8;
- const unsigned idrt_alloc =
- ((ilo_dev_gen(builder->dev) >= ILO_GEN(7.5)) ? 64 : 32) * 32;
- int max_threads;
uint32_t *dw;
- ILO_DEV_ASSERT(builder->dev, 7, 7.5);
-
- max_threads = builder->dev->thread_count;
-
- curbe_alloc = align(curbe_alloc, 32);
- assert(idrt_alloc + curbe_alloc <= builder->dev->urb_size / (use_slm + 1));
+ ILO_DEV_ASSERT(builder->dev, 6, 7.5);
ilo_builder_batch_pointer(builder, cmd_len, &dw);
dw[0] = GEN6_RENDER_CMD(MEDIA, MEDIA_VFE_STATE) | (cmd_len - 2);
- dw[1] = 0; /* scratch */
-
- dw[2] = (max_threads - 1) << GEN6_VFE_DW2_MAX_THREADS__SHIFT |
- 0 << GEN6_VFE_DW2_URB_ENTRY_COUNT__SHIFT |
- GEN6_VFE_DW2_RESET_GATEWAY_TIMER |
- GEN6_VFE_DW2_BYPASS_GATEWAY_CONTROL;
- if (ilo_dev_gen(builder->dev) >= ILO_GEN(7))
- dw[2] |= GEN7_VFE_DW2_GPGPU_MODE;
-
+ /* see compute_set_gen6_MEDIA_VFE_STATE() */
+ dw[1] = compute->vfe[0];
+ dw[2] = compute->vfe[1];
dw[3] = 0;
-
- dw[4] = 0 << GEN6_VFE_DW4_URB_ENTRY_SIZE__SHIFT |
- (curbe_alloc / 32);
-
+ dw[4] = compute->vfe[2];
dw[5] = 0;
dw[6] = 0;
dw[7] = 0;
@@ -194,8 +167,10 @@ gen7_GPGPU_WALKER(struct ilo_builder *builder,
static inline uint32_t
gen6_INTERFACE_DESCRIPTOR_DATA(struct ilo_builder *builder,
- const struct gen6_idrt_data *data,
- int idrt_count)
+ const struct ilo_state_compute *compute,
+ const uint32_t *kernel_offsets,
+ const uint32_t *sampler_offsets,
+ const uint32_t *binding_table_offsets)
{
/*
* From the Sandy Bridge PRM, volume 2 part 2, page 34:
@@ -211,61 +186,26 @@ gen6_INTERFACE_DESCRIPTOR_DATA(struct ilo_builder *builder,
* aligned address of the Interface Descriptor data."
*/
const int state_align = 32;
- const int state_len = (32 / 4) * idrt_count;
+ const int state_len = (32 / 4) * compute->idrt_count;
uint32_t state_offset, *dw;
int i;
- ILO_DEV_ASSERT(builder->dev, 7, 7.5);
+ ILO_DEV_ASSERT(builder->dev, 6, 7.5);
state_offset = ilo_builder_dynamic_pointer(builder,
ILO_BUILDER_ITEM_INTERFACE_DESCRIPTOR, state_align, state_len, &dw);
- for (i = 0; i < idrt_count; i++) {
- const struct gen6_idrt_data *idrt = &data[i];
- const struct ilo_shader_state *cs = idrt->cs;
- unsigned sampler_count, bt_size, slm_size;
-
- sampler_count =
- ilo_shader_get_kernel_param(cs, ILO_KERNEL_SAMPLER_COUNT);
- assert(sampler_count <= 16);
- sampler_count = (sampler_count + 3) / 4;
-
- bt_size =
- ilo_shader_get_kernel_param(cs, ILO_KERNEL_SURFACE_TOTAL_COUNT);
- if (bt_size > 31)
- bt_size = 31;
-
- slm_size = ilo_shader_get_kernel_param(cs, ILO_KERNEL_CS_LOCAL_SIZE);
-
- assert(idrt->curbe_size / 32 <= 63);
-
- dw[0] = ilo_shader_get_kernel_offset(idrt->cs);
+ for (i = 0; i < compute->idrt_count; i++) {
+ /* see compute_set_gen6_INTERFACE_DESCRIPTOR_DATA() */
+ dw[0] = compute->idrt[i][0] + kernel_offsets[i];
dw[1] = 0;
- dw[2] = idrt->sampler_offset |
- sampler_count << GEN6_IDRT_DW2_SAMPLER_COUNT__SHIFT;
- dw[3] = idrt->binding_table_offset |
- bt_size << GEN6_IDRT_DW3_BINDING_TABLE_SIZE__SHIFT;
-
- dw[4] = (idrt->curbe_size / 32) << GEN6_IDRT_DW4_CURBE_READ_LEN__SHIFT |
- 0 << GEN6_IDRT_DW4_CURBE_READ_OFFSET__SHIFT;
-
- if (ilo_dev_gen(builder->dev) >= ILO_GEN(7)) {
- dw[5] = GEN7_IDRT_DW5_ROUNDING_MODE_RTNE;
-
- if (slm_size) {
- assert(slm_size <= 64 * 1024);
- slm_size = util_next_power_of_two((slm_size + 4095) / 4096);
-
- dw[5] |= GEN7_IDRT_DW5_BARRIER_ENABLE |
- slm_size << GEN7_IDRT_DW5_SLM_SIZE__SHIFT |
- idrt->thread_group_size <<
- GEN7_IDRT_DW5_THREAD_GROUP_SIZE__SHIFT;
- }
- } else {
- dw[5] = 0;
- }
-
- dw[6] = 0;
+ dw[2] = compute->idrt[i][1] |
+ sampler_offsets[i];
+ dw[3] = compute->idrt[i][2] |
+ binding_table_offsets[i];
+ dw[4] = compute->idrt[i][3];
+ dw[5] = compute->idrt[i][4];
+ dw[6] = compute->idrt[i][5];
dw[7] = 0;
dw += 8;
diff --git a/src/gallium/drivers/ilo/core/ilo_core.h b/src/gallium/drivers/ilo/core/ilo_core.h
index 3587d3930f3..0a7f7d9d3fe 100644
--- a/src/gallium/drivers/ilo/core/ilo_core.h
+++ b/src/gallium/drivers/ilo/core/ilo_core.h
@@ -40,7 +40,4 @@
#include "util/u_memory.h"
#include "util/u_pointer.h"
-#define ILO_PRIM_RECTANGLES PIPE_PRIM_MAX
-#define ILO_PRIM_MAX (PIPE_PRIM_MAX + 1)
-
#endif /* ILO_CORE_H */
diff --git a/src/gallium/drivers/ilo/core/ilo_debug.h b/src/gallium/drivers/ilo/core/ilo_debug.h
index d9c460498ff..9833233d796 100644
--- a/src/gallium/drivers/ilo/core/ilo_debug.h
+++ b/src/gallium/drivers/ilo/core/ilo_debug.h
@@ -100,4 +100,21 @@ ilo_warn(const char *format, ...)
#endif
}
+static inline bool
+ilo_is_zeroed(const void *ptr, size_t size)
+{
+#ifdef DEBUG
+ size_t i;
+
+ for (i = 0; i < size; i++) {
+ if (*((const char *) ptr) != 0)
+ return false;
+ }
+
+ return true;
+#else
+ return true;
+#endif
+}
+
#endif /* ILO_DEBUG_H */
diff --git a/src/gallium/drivers/ilo/core/ilo_dev.c b/src/gallium/drivers/ilo/core/ilo_dev.c
index 7a774fa1591..925322abba4 100644
--- a/src/gallium/drivers/ilo/core/ilo_dev.c
+++ b/src/gallium/drivers/ilo/core/ilo_dev.c
@@ -32,14 +32,15 @@
#include "ilo_dev.h"
/**
- * Initialize the \p dev from \p winsys. \p winsys is considered owned by \p
- * dev and will be destroyed in \p ilo_dev_cleanup().
+ * Initialize the \p dev from \p winsys.
*/
bool
ilo_dev_init(struct ilo_dev *dev, struct intel_winsys *winsys)
{
const struct intel_winsys_info *info;
+ assert(ilo_is_zeroed(dev, sizeof(*dev)));
+
info = intel_winsys_get_info(winsys);
dev->winsys = winsys;
@@ -178,9 +179,3 @@ ilo_dev_init(struct ilo_dev *dev, struct intel_winsys *winsys)
return true;
}
-
-void
-ilo_dev_cleanup(struct ilo_dev *dev)
-{
- intel_winsys_destroy(dev->winsys);
-}
diff --git a/src/gallium/drivers/ilo/core/ilo_dev.h b/src/gallium/drivers/ilo/core/ilo_dev.h
index 4eb5d59dc86..a9f9b176e16 100644
--- a/src/gallium/drivers/ilo/core/ilo_dev.h
+++ b/src/gallium/drivers/ilo/core/ilo_dev.h
@@ -63,9 +63,6 @@ struct ilo_dev {
bool
ilo_dev_init(struct ilo_dev *dev, struct intel_winsys *winsys);
-void
-ilo_dev_cleanup(struct ilo_dev *dev);
-
static inline int
ilo_dev_gen(const struct ilo_dev *dev)
{
diff --git a/src/gallium/drivers/ilo/core/ilo_fence.h b/src/gallium/drivers/ilo/core/ilo_fence.h
deleted file mode 100644
index 00d555aa95b..00000000000
--- a/src/gallium/drivers/ilo/core/ilo_fence.h
+++ /dev/null
@@ -1,73 +0,0 @@
-/*
- * Mesa 3-D graphics library
- *
- * Copyright (C) 2012-2013 LunarG, Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included
- * in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
- * DEALINGS IN THE SOFTWARE.
- *
- * Authors:
- * Chia-I Wu <[email protected]>
- */
-
-#ifndef ILO_FENCE_H
-#define ILO_FENCE_H
-
-#include "intel_winsys.h"
-
-#include "ilo_core.h"
-#include "ilo_dev.h"
-
-struct ilo_fence {
- struct intel_bo *seq_bo;
-};
-
-static inline void
-ilo_fence_init(struct ilo_fence *fence, const struct ilo_dev *dev)
-{
- /* no-op */
-}
-
-static inline void
-ilo_fence_cleanup(struct ilo_fence *fence)
-{
- intel_bo_unref(fence->seq_bo);
-}
-
-/**
- * Set the sequence bo for waiting. The fence is considered signaled when
- * there is no sequence bo.
- */
-static inline void
-ilo_fence_set_seq_bo(struct ilo_fence *fence, struct intel_bo *seq_bo)
-{
- intel_bo_unref(fence->seq_bo);
- fence->seq_bo = intel_bo_ref(seq_bo);
-}
-
-/**
- * Wait for the fence to be signaled or until \p timeout nanoseconds has
- * passed. It will wait indefinitely when \p timeout is negative.
- */
-static inline bool
-ilo_fence_wait(struct ilo_fence *fence, int64_t timeout)
-{
- return (!fence->seq_bo || intel_bo_wait(fence->seq_bo, timeout) == 0);
-}
-
-#endif /* ILO_FENCE_H */
diff --git a/src/gallium/drivers/ilo/core/ilo_format.c b/src/gallium/drivers/ilo/core/ilo_format.c
deleted file mode 100644
index 280e499d54a..00000000000
--- a/src/gallium/drivers/ilo/core/ilo_format.c
+++ /dev/null
@@ -1,755 +0,0 @@
-/*
- * Mesa 3-D graphics library
- *
- * Copyright (C) 2012-2013 LunarG, Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included
- * in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
- * DEALINGS IN THE SOFTWARE.
- *
- * Authors:
- * Chia-I Wu <[email protected]>
- */
-
-#include "genhw/genhw.h"
-#include "ilo_format.h"
-
-struct ilo_vf_cap {
- int vertex_element;
-};
-
-struct ilo_sol_cap {
- int buffer;
-};
-
-struct ilo_sampler_cap {
- int sampling;
- int filtering;
- int shadow_map;
- int chroma_key;
-};
-
-struct ilo_dp_cap {
- int rt_write;
- int rt_write_blending;
- int typed_write;
- int media_color_processing;
-};
-
-/*
- * This table is based on:
- *
- * - the Sandy Bridge PRM, volume 4 part 1, page 88-97
- * - the Ivy Bridge PRM, volume 2 part 1, page 97-99
- * - the Haswell PRM, volume 7, page 467-470
- */
-static const struct ilo_vf_cap ilo_vf_caps[] = {
-#define CAP(vertex_element) { ILO_GEN(vertex_element) }
- [GEN6_FORMAT_R32G32B32A32_FLOAT] = CAP( 1),
- [GEN6_FORMAT_R32G32B32A32_SINT] = CAP( 1),
- [GEN6_FORMAT_R32G32B32A32_UINT] = CAP( 1),
- [GEN6_FORMAT_R32G32B32A32_UNORM] = CAP( 1),
- [GEN6_FORMAT_R32G32B32A32_SNORM] = CAP( 1),
- [GEN6_FORMAT_R64G64_FLOAT] = CAP( 1),
- [GEN6_FORMAT_R32G32B32A32_SSCALED] = CAP( 1),
- [GEN6_FORMAT_R32G32B32A32_USCALED] = CAP( 1),
- [GEN6_FORMAT_R32G32B32A32_SFIXED] = CAP(7.5),
- [GEN6_FORMAT_R32G32B32_FLOAT] = CAP( 1),
- [GEN6_FORMAT_R32G32B32_SINT] = CAP( 1),
- [GEN6_FORMAT_R32G32B32_UINT] = CAP( 1),
- [GEN6_FORMAT_R32G32B32_UNORM] = CAP( 1),
- [GEN6_FORMAT_R32G32B32_SNORM] = CAP( 1),
- [GEN6_FORMAT_R32G32B32_SSCALED] = CAP( 1),
- [GEN6_FORMAT_R32G32B32_USCALED] = CAP( 1),
- [GEN6_FORMAT_R32G32B32_SFIXED] = CAP(7.5),
- [GEN6_FORMAT_R16G16B16A16_UNORM] = CAP( 1),
- [GEN6_FORMAT_R16G16B16A16_SNORM] = CAP( 1),
- [GEN6_FORMAT_R16G16B16A16_SINT] = CAP( 1),
- [GEN6_FORMAT_R16G16B16A16_UINT] = CAP( 1),
- [GEN6_FORMAT_R16G16B16A16_FLOAT] = CAP( 1),
- [GEN6_FORMAT_R32G32_FLOAT] = CAP( 1),
- [GEN6_FORMAT_R32G32_SINT] = CAP( 1),
- [GEN6_FORMAT_R32G32_UINT] = CAP( 1),
- [GEN6_FORMAT_R32G32_UNORM] = CAP( 1),
- [GEN6_FORMAT_R32G32_SNORM] = CAP( 1),
- [GEN6_FORMAT_R64_FLOAT] = CAP( 1),
- [GEN6_FORMAT_R16G16B16A16_SSCALED] = CAP( 1),
- [GEN6_FORMAT_R16G16B16A16_USCALED] = CAP( 1),
- [GEN6_FORMAT_R32G32_SSCALED] = CAP( 1),
- [GEN6_FORMAT_R32G32_USCALED] = CAP( 1),
- [GEN6_FORMAT_R32G32_SFIXED] = CAP(7.5),
- [GEN6_FORMAT_B8G8R8A8_UNORM] = CAP( 1),
- [GEN6_FORMAT_R10G10B10A2_UNORM] = CAP( 1),
- [GEN6_FORMAT_R10G10B10A2_UINT] = CAP( 1),
- [GEN6_FORMAT_R10G10B10_SNORM_A2_UNORM] = CAP( 1),
- [GEN6_FORMAT_R8G8B8A8_UNORM] = CAP( 1),
- [GEN6_FORMAT_R8G8B8A8_SNORM] = CAP( 1),
- [GEN6_FORMAT_R8G8B8A8_SINT] = CAP( 1),
- [GEN6_FORMAT_R8G8B8A8_UINT] = CAP( 1),
- [GEN6_FORMAT_R16G16_UNORM] = CAP( 1),
- [GEN6_FORMAT_R16G16_SNORM] = CAP( 1),
- [GEN6_FORMAT_R16G16_SINT] = CAP( 1),
- [GEN6_FORMAT_R16G16_UINT] = CAP( 1),
- [GEN6_FORMAT_R16G16_FLOAT] = CAP( 1),
- [GEN6_FORMAT_B10G10R10A2_UNORM] = CAP(7.5),
- [GEN6_FORMAT_R11G11B10_FLOAT] = CAP( 1),
- [GEN6_FORMAT_R32_SINT] = CAP( 1),
- [GEN6_FORMAT_R32_UINT] = CAP( 1),
- [GEN6_FORMAT_R32_FLOAT] = CAP( 1),
- [GEN6_FORMAT_R32_UNORM] = CAP( 1),
- [GEN6_FORMAT_R32_SNORM] = CAP( 1),
- [GEN6_FORMAT_R10G10B10X2_USCALED] = CAP( 1),
- [GEN6_FORMAT_R8G8B8A8_SSCALED] = CAP( 1),
- [GEN6_FORMAT_R8G8B8A8_USCALED] = CAP( 1),
- [GEN6_FORMAT_R16G16_SSCALED] = CAP( 1),
- [GEN6_FORMAT_R16G16_USCALED] = CAP( 1),
- [GEN6_FORMAT_R32_SSCALED] = CAP( 1),
- [GEN6_FORMAT_R32_USCALED] = CAP( 1),
- [GEN6_FORMAT_R8G8_UNORM] = CAP( 1),
- [GEN6_FORMAT_R8G8_SNORM] = CAP( 1),
- [GEN6_FORMAT_R8G8_SINT] = CAP( 1),
- [GEN6_FORMAT_R8G8_UINT] = CAP( 1),
- [GEN6_FORMAT_R16_UNORM] = CAP( 1),
- [GEN6_FORMAT_R16_SNORM] = CAP( 1),
- [GEN6_FORMAT_R16_SINT] = CAP( 1),
- [GEN6_FORMAT_R16_UINT] = CAP( 1),
- [GEN6_FORMAT_R16_FLOAT] = CAP( 1),
- [GEN6_FORMAT_R8G8_SSCALED] = CAP( 1),
- [GEN6_FORMAT_R8G8_USCALED] = CAP( 1),
- [GEN6_FORMAT_R16_SSCALED] = CAP( 1),
- [GEN6_FORMAT_R16_USCALED] = CAP( 1),
- [GEN6_FORMAT_R8_UNORM] = CAP( 1),
- [GEN6_FORMAT_R8_SNORM] = CAP( 1),
- [GEN6_FORMAT_R8_SINT] = CAP( 1),
- [GEN6_FORMAT_R8_UINT] = CAP( 1),
- [GEN6_FORMAT_R8_SSCALED] = CAP( 1),
- [GEN6_FORMAT_R8_USCALED] = CAP( 1),
- [GEN6_FORMAT_R8G8B8_UNORM] = CAP( 1),
- [GEN6_FORMAT_R8G8B8_SNORM] = CAP( 1),
- [GEN6_FORMAT_R8G8B8_SSCALED] = CAP( 1),
- [GEN6_FORMAT_R8G8B8_USCALED] = CAP( 1),
- [GEN6_FORMAT_R64G64B64A64_FLOAT] = CAP( 1),
- [GEN6_FORMAT_R64G64B64_FLOAT] = CAP( 1),
- [GEN6_FORMAT_R16G16B16_FLOAT] = CAP( 6),
- [GEN6_FORMAT_R16G16B16_UNORM] = CAP( 1),
- [GEN6_FORMAT_R16G16B16_SNORM] = CAP( 1),
- [GEN6_FORMAT_R16G16B16_SSCALED] = CAP( 1),
- [GEN6_FORMAT_R16G16B16_USCALED] = CAP( 1),
- [GEN6_FORMAT_R16G16B16_UINT] = CAP(7.5),
- [GEN6_FORMAT_R16G16B16_SINT] = CAP(7.5),
- [GEN6_FORMAT_R32_SFIXED] = CAP(7.5),
- [GEN6_FORMAT_R10G10B10A2_SNORM] = CAP(7.5),
- [GEN6_FORMAT_R10G10B10A2_USCALED] = CAP(7.5),
- [GEN6_FORMAT_R10G10B10A2_SSCALED] = CAP(7.5),
- [GEN6_FORMAT_R10G10B10A2_SINT] = CAP(7.5),
- [GEN6_FORMAT_B10G10R10A2_SNORM] = CAP(7.5),
- [GEN6_FORMAT_B10G10R10A2_USCALED] = CAP(7.5),
- [GEN6_FORMAT_B10G10R10A2_SSCALED] = CAP(7.5),
- [GEN6_FORMAT_B10G10R10A2_UINT] = CAP(7.5),
- [GEN6_FORMAT_B10G10R10A2_SINT] = CAP(7.5),
- [GEN6_FORMAT_R8G8B8_UINT] = CAP(7.5),
- [GEN6_FORMAT_R8G8B8_SINT] = CAP(7.5),
-#undef CAP
-};
-
-/*
- * This table is based on:
- *
- * - the Sandy Bridge PRM, volume 4 part 1, page 88-97
- * - the Ivy Bridge PRM, volume 2 part 1, page 195
- * - the Haswell PRM, volume 7, page 535
- */
-static const struct ilo_sol_cap ilo_sol_caps[] = {
-#define CAP(buffer) { ILO_GEN(buffer) }
- [GEN6_FORMAT_R32G32B32A32_FLOAT] = CAP( 1),
- [GEN6_FORMAT_R32G32B32A32_SINT] = CAP( 1),
- [GEN6_FORMAT_R32G32B32A32_UINT] = CAP( 1),
- [GEN6_FORMAT_R32G32B32_FLOAT] = CAP( 1),
- [GEN6_FORMAT_R32G32B32_SINT] = CAP( 1),
- [GEN6_FORMAT_R32G32B32_UINT] = CAP( 1),
- [GEN6_FORMAT_R32G32_FLOAT] = CAP( 1),
- [GEN6_FORMAT_R32G32_SINT] = CAP( 1),
- [GEN6_FORMAT_R32G32_UINT] = CAP( 1),
- [GEN6_FORMAT_R32_SINT] = CAP( 1),
- [GEN6_FORMAT_R32_UINT] = CAP( 1),
- [GEN6_FORMAT_R32_FLOAT] = CAP( 1),
-#undef CAP
-};
-
-/*
- * This table is based on:
- *
- * - the Sandy Bridge PRM, volume 4 part 1, page 88-97
- * - the Ivy Bridge PRM, volume 4 part 1, page 84-87
- */
-static const struct ilo_sampler_cap ilo_sampler_caps[] = {
-#define CAP(sampling, filtering, shadow_map, chroma_key) \
- { ILO_GEN(sampling), ILO_GEN(filtering), ILO_GEN(shadow_map), ILO_GEN(chroma_key) }
- [GEN6_FORMAT_R32G32B32A32_FLOAT] = CAP( 1, 5, 0, 0),
- [GEN6_FORMAT_R32G32B32A32_SINT] = CAP( 1, 0, 0, 0),
- [GEN6_FORMAT_R32G32B32A32_UINT] = CAP( 1, 0, 0, 0),
- [GEN6_FORMAT_R32G32B32X32_FLOAT] = CAP( 1, 5, 0, 0),
- [GEN6_FORMAT_R32G32B32_FLOAT] = CAP( 1, 5, 0, 0),
- [GEN6_FORMAT_R32G32B32_SINT] = CAP( 1, 0, 0, 0),
- [GEN6_FORMAT_R32G32B32_UINT] = CAP( 1, 0, 0, 0),
- [GEN6_FORMAT_R16G16B16A16_UNORM] = CAP( 1, 1, 0, 0),
- [GEN6_FORMAT_R16G16B16A16_SNORM] = CAP( 1, 1, 0, 0),
- [GEN6_FORMAT_R16G16B16A16_SINT] = CAP( 1, 0, 0, 0),
- [GEN6_FORMAT_R16G16B16A16_UINT] = CAP( 1, 0, 0, 0),
- [GEN6_FORMAT_R16G16B16A16_FLOAT] = CAP( 1, 1, 0, 0),
- [GEN6_FORMAT_R32G32_FLOAT] = CAP( 1, 5, 0, 0),
- [GEN6_FORMAT_R32G32_SINT] = CAP( 1, 0, 0, 0),
- [GEN6_FORMAT_R32G32_UINT] = CAP( 1, 0, 0, 0),
- [GEN6_FORMAT_R32_FLOAT_X8X24_TYPELESS] = CAP( 1, 5, 1, 0),
- [GEN6_FORMAT_X32_TYPELESS_G8X24_UINT] = CAP( 1, 0, 0, 0),
- [GEN6_FORMAT_L32A32_FLOAT] = CAP( 1, 5, 0, 0),
- [GEN6_FORMAT_R16G16B16X16_UNORM] = CAP( 1, 1, 0, 0),
- [GEN6_FORMAT_R16G16B16X16_FLOAT] = CAP( 1, 1, 0, 0),
- [GEN6_FORMAT_A32X32_FLOAT] = CAP( 1, 5, 0, 0),
- [GEN6_FORMAT_L32X32_FLOAT] = CAP( 1, 5, 0, 0),
- [GEN6_FORMAT_I32X32_FLOAT] = CAP( 1, 5, 0, 0),
- [GEN6_FORMAT_B8G8R8A8_UNORM] = CAP( 1, 1, 0, 1),
- [GEN6_FORMAT_B8G8R8A8_UNORM_SRGB] = CAP( 1, 1, 0, 0),
- [GEN6_FORMAT_R10G10B10A2_UNORM] = CAP( 1, 1, 0, 0),
- [GEN6_FORMAT_R10G10B10A2_UNORM_SRGB] = CAP( 1, 1, 0, 0),
- [GEN6_FORMAT_R10G10B10A2_UINT] = CAP( 1, 0, 0, 0),
- [GEN6_FORMAT_R10G10B10_SNORM_A2_UNORM] = CAP( 1, 1, 0, 0),
- [GEN6_FORMAT_R8G8B8A8_UNORM] = CAP( 1, 1, 0, 0),
- [GEN6_FORMAT_R8G8B8A8_UNORM_SRGB] = CAP( 1, 1, 0, 0),
- [GEN6_FORMAT_R8G8B8A8_SNORM] = CAP( 1, 1, 0, 0),
- [GEN6_FORMAT_R8G8B8A8_SINT] = CAP( 1, 0, 0, 0),
- [GEN6_FORMAT_R8G8B8A8_UINT] = CAP( 1, 0, 0, 0),
- [GEN6_FORMAT_R16G16_UNORM] = CAP( 1, 1, 0, 0),
- [GEN6_FORMAT_R16G16_SNORM] = CAP( 1, 1, 0, 0),
- [GEN6_FORMAT_R16G16_SINT] = CAP( 1, 0, 0, 0),
- [GEN6_FORMAT_R16G16_UINT] = CAP( 1, 0, 0, 0),
- [GEN6_FORMAT_R16G16_FLOAT] = CAP( 1, 1, 0, 0),
- [GEN6_FORMAT_B10G10R10A2_UNORM] = CAP( 1, 1, 0, 0),
- [GEN6_FORMAT_B10G10R10A2_UNORM_SRGB] = CAP( 1, 1, 0, 0),
- [GEN6_FORMAT_R11G11B10_FLOAT] = CAP( 1, 1, 0, 0),
- [GEN6_FORMAT_R32_SINT] = CAP( 1, 0, 0, 0),
- [GEN6_FORMAT_R32_UINT] = CAP( 1, 0, 0, 0),
- [GEN6_FORMAT_R32_FLOAT] = CAP( 1, 5, 1, 0),
- [GEN6_FORMAT_R24_UNORM_X8_TYPELESS] = CAP( 1, 5, 1, 0),
- [GEN6_FORMAT_X24_TYPELESS_G8_UINT] = CAP( 1, 0, 0, 0),
- [GEN6_FORMAT_L16A16_UNORM] = CAP( 1, 1, 0, 0),
- [GEN6_FORMAT_I24X8_UNORM] = CAP( 1, 5, 1, 0),
- [GEN6_FORMAT_L24X8_UNORM] = CAP( 1, 5, 1, 0),
- [GEN6_FORMAT_A24X8_UNORM] = CAP( 1, 5, 1, 0),
- [GEN6_FORMAT_I32_FLOAT] = CAP( 1, 5, 1, 0),
- [GEN6_FORMAT_L32_FLOAT] = CAP( 1, 5, 1, 0),
- [GEN6_FORMAT_A32_FLOAT] = CAP( 1, 5, 1, 0),
- [GEN6_FORMAT_B8G8R8X8_UNORM] = CAP( 1, 1, 0, 1),
- [GEN6_FORMAT_B8G8R8X8_UNORM_SRGB] = CAP( 1, 1, 0, 0),
- [GEN6_FORMAT_R8G8B8X8_UNORM] = CAP( 1, 1, 0, 0),
- [GEN6_FORMAT_R8G8B8X8_UNORM_SRGB] = CAP( 1, 1, 0, 0),
- [GEN6_FORMAT_R9G9B9E5_SHAREDEXP] = CAP( 1, 1, 0, 0),
- [GEN6_FORMAT_B10G10R10X2_UNORM] = CAP( 1, 1, 0, 0),
- [GEN6_FORMAT_L16A16_FLOAT] = CAP( 1, 1, 0, 0),
- [GEN6_FORMAT_B5G6R5_UNORM] = CAP( 1, 1, 0, 1),
- [GEN6_FORMAT_B5G6R5_UNORM_SRGB] = CAP( 1, 1, 0, 0),
- [GEN6_FORMAT_B5G5R5A1_UNORM] = CAP( 1, 1, 0, 1),
- [GEN6_FORMAT_B5G5R5A1_UNORM_SRGB] = CAP( 1, 1, 0, 0),
- [GEN6_FORMAT_B4G4R4A4_UNORM] = CAP( 1, 1, 0, 1),
- [GEN6_FORMAT_B4G4R4A4_UNORM_SRGB] = CAP( 1, 1, 0, 0),
- [GEN6_FORMAT_R8G8_UNORM] = CAP( 1, 1, 0, 0),
- [GEN6_FORMAT_R8G8_SNORM] = CAP( 1, 1, 0, 1),
- [GEN6_FORMAT_R8G8_SINT] = CAP( 1, 0, 0, 0),
- [GEN6_FORMAT_R8G8_UINT] = CAP( 1, 0, 0, 0),
- [GEN6_FORMAT_R16_UNORM] = CAP( 1, 1, 1, 0),
- [GEN6_FORMAT_R16_SNORM] = CAP( 1, 1, 0, 0),
- [GEN6_FORMAT_R16_SINT] = CAP( 1, 0, 0, 0),
- [GEN6_FORMAT_R16_UINT] = CAP( 1, 0, 0, 0),
- [GEN6_FORMAT_R16_FLOAT] = CAP( 1, 1, 0, 0),
- [GEN6_FORMAT_A8P8_UNORM_PALETTE0] = CAP( 5, 5, 0, 0),
- [GEN6_FORMAT_A8P8_UNORM_PALETTE1] = CAP( 5, 5, 0, 0),
- [GEN6_FORMAT_I16_UNORM] = CAP( 1, 1, 1, 0),
- [GEN6_FORMAT_L16_UNORM] = CAP( 1, 1, 1, 0),
- [GEN6_FORMAT_A16_UNORM] = CAP( 1, 1, 1, 0),
- [GEN6_FORMAT_L8A8_UNORM] = CAP( 1, 1, 0, 1),
- [GEN6_FORMAT_I16_FLOAT] = CAP( 1, 1, 1, 0),
- [GEN6_FORMAT_L16_FLOAT] = CAP( 1, 1, 1, 0),
- [GEN6_FORMAT_A16_FLOAT] = CAP( 1, 1, 1, 0),
- [GEN6_FORMAT_L8A8_UNORM_SRGB] = CAP(4.5, 4.5, 0, 0),
- [GEN6_FORMAT_R5G5_SNORM_B6_UNORM] = CAP( 1, 1, 0, 1),
- [GEN6_FORMAT_P8A8_UNORM_PALETTE0] = CAP( 5, 5, 0, 0),
- [GEN6_FORMAT_P8A8_UNORM_PALETTE1] = CAP( 5, 5, 0, 0),
- [GEN6_FORMAT_R8_UNORM] = CAP( 1, 1, 0, 4.5),
- [GEN6_FORMAT_R8_SNORM] = CAP( 1, 1, 0, 0),
- [GEN6_FORMAT_R8_SINT] = CAP( 1, 0, 0, 0),
- [GEN6_FORMAT_R8_UINT] = CAP( 1, 0, 0, 0),
- [GEN6_FORMAT_A8_UNORM] = CAP( 1, 1, 0, 1),
- [GEN6_FORMAT_I8_UNORM] = CAP( 1, 1, 0, 0),
- [GEN6_FORMAT_L8_UNORM] = CAP( 1, 1, 0, 1),
- [GEN6_FORMAT_P4A4_UNORM_PALETTE0] = CAP( 1, 1, 0, 0),
- [GEN6_FORMAT_A4P4_UNORM_PALETTE0] = CAP( 1, 1, 0, 0),
- [GEN6_FORMAT_P8_UNORM_PALETTE0] = CAP(4.5, 4.5, 0, 0),
- [GEN6_FORMAT_L8_UNORM_SRGB] = CAP(4.5, 4.5, 0, 0),
- [GEN6_FORMAT_P8_UNORM_PALETTE1] = CAP(4.5, 4.5, 0, 0),
- [GEN6_FORMAT_P4A4_UNORM_PALETTE1] = CAP(4.5, 4.5, 0, 0),
- [GEN6_FORMAT_A4P4_UNORM_PALETTE1] = CAP(4.5, 4.5, 0, 0),
- [GEN6_FORMAT_DXT1_RGB_SRGB] = CAP(4.5, 4.5, 0, 0),
- [GEN6_FORMAT_R1_UNORM] = CAP( 1, 1, 0, 0),
- [GEN6_FORMAT_YCRCB_NORMAL] = CAP( 1, 1, 0, 1),
- [GEN6_FORMAT_YCRCB_SWAPUVY] = CAP( 1, 1, 0, 1),
- [GEN6_FORMAT_P2_UNORM_PALETTE0] = CAP(4.5, 4.5, 0, 0),
- [GEN6_FORMAT_P2_UNORM_PALETTE1] = CAP(4.5, 4.5, 0, 0),
- [GEN6_FORMAT_BC1_UNORM] = CAP( 1, 1, 0, 1),
- [GEN6_FORMAT_BC2_UNORM] = CAP( 1, 1, 0, 1),
- [GEN6_FORMAT_BC3_UNORM] = CAP( 1, 1, 0, 1),
- [GEN6_FORMAT_BC4_UNORM] = CAP( 1, 1, 0, 0),
- [GEN6_FORMAT_BC5_UNORM] = CAP( 1, 1, 0, 0),
- [GEN6_FORMAT_BC1_UNORM_SRGB] = CAP( 1, 1, 0, 0),
- [GEN6_FORMAT_BC2_UNORM_SRGB] = CAP( 1, 1, 0, 0),
- [GEN6_FORMAT_BC3_UNORM_SRGB] = CAP( 1, 1, 0, 0),
- [GEN6_FORMAT_MONO8] = CAP( 1, 0, 0, 0),
- [GEN6_FORMAT_YCRCB_SWAPUV] = CAP( 1, 1, 0, 0),
- [GEN6_FORMAT_YCRCB_SWAPY] = CAP( 1, 1, 0, 0),
- [GEN6_FORMAT_DXT1_RGB] = CAP( 1, 1, 0, 0),
- [GEN6_FORMAT_FXT1] = CAP( 1, 1, 0, 0),
- [GEN6_FORMAT_BC4_SNORM] = CAP( 1, 1, 0, 0),
- [GEN6_FORMAT_BC5_SNORM] = CAP( 1, 1, 0, 0),
- [GEN6_FORMAT_R16G16B16_FLOAT] = CAP( 5, 5, 0, 0),
- [GEN6_FORMAT_BC6H_SF16] = CAP( 7, 7, 0, 0),
- [GEN6_FORMAT_BC7_UNORM] = CAP( 7, 7, 0, 0),
- [GEN6_FORMAT_BC7_UNORM_SRGB] = CAP( 7, 7, 0, 0),
- [GEN6_FORMAT_BC6H_UF16] = CAP( 7, 7, 0, 0),
-#undef CAP
-};
-
-/*
- * This table is based on:
- *
- * - the Sandy Bridge PRM, volume 4 part 1, page 88-97
- * - the Ivy Bridge PRM, volume 4 part 1, page 172, 252-253, and 277-278
- * - the Haswell PRM, volume 7, page 262-264
- */
-static const struct ilo_dp_cap ilo_dp_caps[] = {
-#define CAP(rt_write, rt_write_blending, typed_write, media_color_processing) \
- { ILO_GEN(rt_write), ILO_GEN(rt_write_blending), ILO_GEN(typed_write), ILO_GEN(media_color_processing) }
- [GEN6_FORMAT_R32G32B32A32_FLOAT] = CAP( 1, 1, 7, 0),
- [GEN6_FORMAT_R32G32B32A32_SINT] = CAP( 1, 0, 7, 0),
- [GEN6_FORMAT_R32G32B32A32_UINT] = CAP( 1, 0, 7, 0),
- [GEN6_FORMAT_R16G16B16A16_UNORM] = CAP( 1, 4.5, 7, 6),
- [GEN6_FORMAT_R16G16B16A16_SNORM] = CAP( 1, 6, 7, 0),
- [GEN6_FORMAT_R16G16B16A16_SINT] = CAP( 1, 0, 7, 0),
- [GEN6_FORMAT_R16G16B16A16_UINT] = CAP( 1, 0, 7, 0),
- [GEN6_FORMAT_R16G16B16A16_FLOAT] = CAP( 1, 1, 7, 0),
- [GEN6_FORMAT_R32G32_FLOAT] = CAP( 1, 1, 7, 0),
- [GEN6_FORMAT_R32G32_SINT] = CAP( 1, 0, 7, 0),
- [GEN6_FORMAT_R32G32_UINT] = CAP( 1, 0, 7, 0),
- [GEN6_FORMAT_B8G8R8A8_UNORM] = CAP( 1, 1, 7, 6),
- [GEN6_FORMAT_B8G8R8A8_UNORM_SRGB] = CAP( 1, 1, 0, 0),
- [GEN6_FORMAT_R10G10B10A2_UNORM] = CAP( 1, 1, 7, 6),
- [GEN6_FORMAT_R10G10B10A2_UNORM_SRGB] = CAP( 0, 0, 0, 6),
- [GEN6_FORMAT_R10G10B10A2_UINT] = CAP( 1, 0, 7, 0),
- [GEN6_FORMAT_R8G8B8A8_UNORM] = CAP( 1, 1, 7, 6),
- [GEN6_FORMAT_R8G8B8A8_UNORM_SRGB] = CAP( 1, 1, 0, 6),
- [GEN6_FORMAT_R8G8B8A8_SNORM] = CAP( 1, 6, 7, 0),
- [GEN6_FORMAT_R8G8B8A8_SINT] = CAP( 1, 0, 7, 0),
- [GEN6_FORMAT_R8G8B8A8_UINT] = CAP( 1, 0, 7, 0),
- [GEN6_FORMAT_R16G16_UNORM] = CAP( 1, 4.5, 7, 0),
- [GEN6_FORMAT_R16G16_SNORM] = CAP( 1, 6, 7, 0),
- [GEN6_FORMAT_R16G16_SINT] = CAP( 1, 0, 7, 0),
- [GEN6_FORMAT_R16G16_UINT] = CAP( 1, 0, 7, 0),
- [GEN6_FORMAT_R16G16_FLOAT] = CAP( 1, 1, 7, 0),
- [GEN6_FORMAT_B10G10R10A2_UNORM] = CAP( 1, 1, 7, 6),
- [GEN6_FORMAT_B10G10R10A2_UNORM_SRGB] = CAP( 1, 1, 0, 6),
- [GEN6_FORMAT_R11G11B10_FLOAT] = CAP( 1, 1, 7, 0),
- [GEN6_FORMAT_R32_SINT] = CAP( 1, 0, 7, 0),
- [GEN6_FORMAT_R32_UINT] = CAP( 1, 0, 7, 0),
- [GEN6_FORMAT_R32_FLOAT] = CAP( 1, 1, 7, 0),
- [GEN6_FORMAT_B8G8R8X8_UNORM] = CAP( 0, 0, 0, 6),
- [GEN6_FORMAT_B5G6R5_UNORM] = CAP( 1, 1, 7, 0),
- [GEN6_FORMAT_B5G6R5_UNORM_SRGB] = CAP( 1, 1, 0, 0),
- [GEN6_FORMAT_B5G5R5A1_UNORM] = CAP( 1, 1, 7, 0),
- [GEN6_FORMAT_B5G5R5A1_UNORM_SRGB] = CAP( 1, 1, 0, 0),
- [GEN6_FORMAT_B4G4R4A4_UNORM] = CAP( 1, 1, 7, 0),
- [GEN6_FORMAT_B4G4R4A4_UNORM_SRGB] = CAP( 1, 1, 0, 0),
- [GEN6_FORMAT_R8G8_UNORM] = CAP( 1, 1, 7, 0),
- [GEN6_FORMAT_R8G8_SNORM] = CAP( 1, 6, 7, 0),
- [GEN6_FORMAT_R8G8_SINT] = CAP( 1, 0, 7, 0),
- [GEN6_FORMAT_R8G8_UINT] = CAP( 1, 0, 7, 0),
- [GEN6_FORMAT_R16_UNORM] = CAP( 1, 4.5, 7, 7),
- [GEN6_FORMAT_R16_SNORM] = CAP( 1, 6, 7, 0),
- [GEN6_FORMAT_R16_SINT] = CAP( 1, 0, 7, 0),
- [GEN6_FORMAT_R16_UINT] = CAP( 1, 0, 7, 0),
- [GEN6_FORMAT_R16_FLOAT] = CAP( 1, 1, 7, 0),
- [GEN6_FORMAT_B5G5R5X1_UNORM] = CAP( 1, 1, 7, 0),
- [GEN6_FORMAT_B5G5R5X1_UNORM_SRGB] = CAP( 1, 1, 0, 0),
- [GEN6_FORMAT_R8_UNORM] = CAP( 1, 1, 7, 0),
- [GEN6_FORMAT_R8_SNORM] = CAP( 1, 6, 7, 0),
- [GEN6_FORMAT_R8_SINT] = CAP( 1, 0, 7, 0),
- [GEN6_FORMAT_R8_UINT] = CAP( 1, 0, 7, 0),
- [GEN6_FORMAT_A8_UNORM] = CAP( 1, 1, 7, 0),
- [GEN6_FORMAT_YCRCB_NORMAL] = CAP( 1, 0, 0, 6),
- [GEN6_FORMAT_YCRCB_SWAPUVY] = CAP( 1, 0, 0, 6),
- [GEN6_FORMAT_YCRCB_SWAPUV] = CAP( 1, 0, 0, 6),
- [GEN6_FORMAT_YCRCB_SWAPY] = CAP( 1, 0, 0, 6),
-#undef CAP
-};
-
-bool
-ilo_format_support_vb(const struct ilo_dev *dev,
- enum pipe_format format)
-{
- const int idx = ilo_format_translate(dev, format, PIPE_BIND_VERTEX_BUFFER);
- const struct ilo_vf_cap *cap = (idx >= 0 && idx < Elements(ilo_vf_caps)) ?
- &ilo_vf_caps[idx] : NULL;
-
- return (cap && cap->vertex_element &&
- ilo_dev_gen(dev) >= cap->vertex_element);
-}
-
-bool
-ilo_format_support_sol(const struct ilo_dev *dev,
- enum pipe_format format)
-{
- const int idx = ilo_format_translate(dev, format, PIPE_BIND_STREAM_OUTPUT);
- const struct ilo_sol_cap *cap = (idx >= 0 && idx < Elements(ilo_sol_caps)) ?
- &ilo_sol_caps[idx] : NULL;
-
- return (cap && cap->buffer && ilo_dev_gen(dev) >= cap->buffer);
-}
-
-bool
-ilo_format_support_sampler(const struct ilo_dev *dev,
- enum pipe_format format)
-{
- const int idx = ilo_format_translate(dev, format, PIPE_BIND_SAMPLER_VIEW);
- const struct ilo_sampler_cap *cap = (idx >= 0 &&
- idx < Elements(ilo_sampler_caps)) ? &ilo_sampler_caps[idx] : NULL;
-
- if (!cap || !cap->sampling)
- return false;
-
- assert(!cap->filtering || cap->filtering >= cap->sampling);
-
- if (util_format_is_pure_integer(format))
- return (ilo_dev_gen(dev) >= cap->sampling);
- else if (cap->filtering)
- return (ilo_dev_gen(dev) >= cap->filtering);
- else
- return false;
-}
-
-bool
-ilo_format_support_rt(const struct ilo_dev *dev,
- enum pipe_format format)
-{
- const int idx = ilo_format_translate(dev, format, PIPE_BIND_RENDER_TARGET);
- const struct ilo_dp_cap *cap = (idx >= 0 && idx < Elements(ilo_dp_caps)) ?
- &ilo_dp_caps[idx] : NULL;
-
- if (!cap || !cap->rt_write)
- return false;
-
- assert(!cap->rt_write_blending || cap->rt_write_blending >= cap->rt_write);
-
- if (util_format_is_pure_integer(format))
- return (ilo_dev_gen(dev) >= cap->rt_write);
- else if (cap->rt_write_blending)
- return (ilo_dev_gen(dev) >= cap->rt_write_blending);
- else
- return false;
-}
-
-bool
-ilo_format_support_zs(const struct ilo_dev *dev,
- enum pipe_format format)
-{
- switch (format) {
- case PIPE_FORMAT_Z16_UNORM:
- case PIPE_FORMAT_Z24X8_UNORM:
- case PIPE_FORMAT_Z32_FLOAT:
- case PIPE_FORMAT_Z24_UNORM_S8_UINT:
- case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
- return true;
- case PIPE_FORMAT_S8_UINT:
- /* TODO separate stencil */
- default:
- return false;
- }
-}
-
-/**
- * Translate a color (non-depth/stencil) pipe format to the matching hardware
- * format. Return -1 on errors.
- */
-int
-ilo_format_translate_color(const struct ilo_dev *dev,
- enum pipe_format format)
-{
- static const int format_mapping[PIPE_FORMAT_COUNT] = {
- [PIPE_FORMAT_NONE] = 0,
- [PIPE_FORMAT_B8G8R8A8_UNORM] = GEN6_FORMAT_B8G8R8A8_UNORM,
- [PIPE_FORMAT_B8G8R8X8_UNORM] = GEN6_FORMAT_B8G8R8X8_UNORM,
- [PIPE_FORMAT_A8R8G8B8_UNORM] = 0,
- [PIPE_FORMAT_X8R8G8B8_UNORM] = 0,
- [PIPE_FORMAT_B5G5R5A1_UNORM] = GEN6_FORMAT_B5G5R5A1_UNORM,
- [PIPE_FORMAT_B4G4R4A4_UNORM] = GEN6_FORMAT_B4G4R4A4_UNORM,
- [PIPE_FORMAT_B5G6R5_UNORM] = GEN6_FORMAT_B5G6R5_UNORM,
- [PIPE_FORMAT_R10G10B10A2_UNORM] = GEN6_FORMAT_R10G10B10A2_UNORM,
- [PIPE_FORMAT_L8_UNORM] = GEN6_FORMAT_L8_UNORM,
- [PIPE_FORMAT_A8_UNORM] = GEN6_FORMAT_A8_UNORM,
- [PIPE_FORMAT_I8_UNORM] = GEN6_FORMAT_I8_UNORM,
- [PIPE_FORMAT_L8A8_UNORM] = GEN6_FORMAT_L8A8_UNORM,
- [PIPE_FORMAT_L16_UNORM] = GEN6_FORMAT_L16_UNORM,
- [PIPE_FORMAT_UYVY] = GEN6_FORMAT_YCRCB_SWAPUVY,
- [PIPE_FORMAT_YUYV] = GEN6_FORMAT_YCRCB_NORMAL,
- [PIPE_FORMAT_Z16_UNORM] = 0,
- [PIPE_FORMAT_Z32_UNORM] = 0,
- [PIPE_FORMAT_Z32_FLOAT] = 0,
- [PIPE_FORMAT_Z24_UNORM_S8_UINT] = 0,
- [PIPE_FORMAT_S8_UINT_Z24_UNORM] = 0,
- [PIPE_FORMAT_Z24X8_UNORM] = 0,
- [PIPE_FORMAT_X8Z24_UNORM] = 0,
- [PIPE_FORMAT_S8_UINT] = 0,
- [PIPE_FORMAT_R64_FLOAT] = GEN6_FORMAT_R64_FLOAT,
- [PIPE_FORMAT_R64G64_FLOAT] = GEN6_FORMAT_R64G64_FLOAT,
- [PIPE_FORMAT_R64G64B64_FLOAT] = GEN6_FORMAT_R64G64B64_FLOAT,
- [PIPE_FORMAT_R64G64B64A64_FLOAT] = GEN6_FORMAT_R64G64B64A64_FLOAT,
- [PIPE_FORMAT_R32_FLOAT] = GEN6_FORMAT_R32_FLOAT,
- [PIPE_FORMAT_R32G32_FLOAT] = GEN6_FORMAT_R32G32_FLOAT,
- [PIPE_FORMAT_R32G32B32_FLOAT] = GEN6_FORMAT_R32G32B32_FLOAT,
- [PIPE_FORMAT_R32G32B32A32_FLOAT] = GEN6_FORMAT_R32G32B32A32_FLOAT,
- [PIPE_FORMAT_R32_UNORM] = GEN6_FORMAT_R32_UNORM,
- [PIPE_FORMAT_R32G32_UNORM] = GEN6_FORMAT_R32G32_UNORM,
- [PIPE_FORMAT_R32G32B32_UNORM] = GEN6_FORMAT_R32G32B32_UNORM,
- [PIPE_FORMAT_R32G32B32A32_UNORM] = GEN6_FORMAT_R32G32B32A32_UNORM,
- [PIPE_FORMAT_R32_USCALED] = GEN6_FORMAT_R32_USCALED,
- [PIPE_FORMAT_R32G32_USCALED] = GEN6_FORMAT_R32G32_USCALED,
- [PIPE_FORMAT_R32G32B32_USCALED] = GEN6_FORMAT_R32G32B32_USCALED,
- [PIPE_FORMAT_R32G32B32A32_USCALED] = GEN6_FORMAT_R32G32B32A32_USCALED,
- [PIPE_FORMAT_R32_SNORM] = GEN6_FORMAT_R32_SNORM,
- [PIPE_FORMAT_R32G32_SNORM] = GEN6_FORMAT_R32G32_SNORM,
- [PIPE_FORMAT_R32G32B32_SNORM] = GEN6_FORMAT_R32G32B32_SNORM,
- [PIPE_FORMAT_R32G32B32A32_SNORM] = GEN6_FORMAT_R32G32B32A32_SNORM,
- [PIPE_FORMAT_R32_SSCALED] = GEN6_FORMAT_R32_SSCALED,
- [PIPE_FORMAT_R32G32_SSCALED] = GEN6_FORMAT_R32G32_SSCALED,
- [PIPE_FORMAT_R32G32B32_SSCALED] = GEN6_FORMAT_R32G32B32_SSCALED,
- [PIPE_FORMAT_R32G32B32A32_SSCALED] = GEN6_FORMAT_R32G32B32A32_SSCALED,
- [PIPE_FORMAT_R16_UNORM] = GEN6_FORMAT_R16_UNORM,
- [PIPE_FORMAT_R16G16_UNORM] = GEN6_FORMAT_R16G16_UNORM,
- [PIPE_FORMAT_R16G16B16_UNORM] = GEN6_FORMAT_R16G16B16_UNORM,
- [PIPE_FORMAT_R16G16B16A16_UNORM] = GEN6_FORMAT_R16G16B16A16_UNORM,
- [PIPE_FORMAT_R16_USCALED] = GEN6_FORMAT_R16_USCALED,
- [PIPE_FORMAT_R16G16_USCALED] = GEN6_FORMAT_R16G16_USCALED,
- [PIPE_FORMAT_R16G16B16_USCALED] = GEN6_FORMAT_R16G16B16_USCALED,
- [PIPE_FORMAT_R16G16B16A16_USCALED] = GEN6_FORMAT_R16G16B16A16_USCALED,
- [PIPE_FORMAT_R16_SNORM] = GEN6_FORMAT_R16_SNORM,
- [PIPE_FORMAT_R16G16_SNORM] = GEN6_FORMAT_R16G16_SNORM,
- [PIPE_FORMAT_R16G16B16_SNORM] = GEN6_FORMAT_R16G16B16_SNORM,
- [PIPE_FORMAT_R16G16B16A16_SNORM] = GEN6_FORMAT_R16G16B16A16_SNORM,
- [PIPE_FORMAT_R16_SSCALED] = GEN6_FORMAT_R16_SSCALED,
- [PIPE_FORMAT_R16G16_SSCALED] = GEN6_FORMAT_R16G16_SSCALED,
- [PIPE_FORMAT_R16G16B16_SSCALED] = GEN6_FORMAT_R16G16B16_SSCALED,
- [PIPE_FORMAT_R16G16B16A16_SSCALED] = GEN6_FORMAT_R16G16B16A16_SSCALED,
- [PIPE_FORMAT_R8_UNORM] = GEN6_FORMAT_R8_UNORM,
- [PIPE_FORMAT_R8G8_UNORM] = GEN6_FORMAT_R8G8_UNORM,
- [PIPE_FORMAT_R8G8B8_UNORM] = GEN6_FORMAT_R8G8B8_UNORM,
- [PIPE_FORMAT_R8G8B8A8_UNORM] = GEN6_FORMAT_R8G8B8A8_UNORM,
- [PIPE_FORMAT_X8B8G8R8_UNORM] = 0,
- [PIPE_FORMAT_R8_USCALED] = GEN6_FORMAT_R8_USCALED,
- [PIPE_FORMAT_R8G8_USCALED] = GEN6_FORMAT_R8G8_USCALED,
- [PIPE_FORMAT_R8G8B8_USCALED] = GEN6_FORMAT_R8G8B8_USCALED,
- [PIPE_FORMAT_R8G8B8A8_USCALED] = GEN6_FORMAT_R8G8B8A8_USCALED,
- [PIPE_FORMAT_R8_SNORM] = GEN6_FORMAT_R8_SNORM,
- [PIPE_FORMAT_R8G8_SNORM] = GEN6_FORMAT_R8G8_SNORM,
- [PIPE_FORMAT_R8G8B8_SNORM] = GEN6_FORMAT_R8G8B8_SNORM,
- [PIPE_FORMAT_R8G8B8A8_SNORM] = GEN6_FORMAT_R8G8B8A8_SNORM,
- [PIPE_FORMAT_R8_SSCALED] = GEN6_FORMAT_R8_SSCALED,
- [PIPE_FORMAT_R8G8_SSCALED] = GEN6_FORMAT_R8G8_SSCALED,
- [PIPE_FORMAT_R8G8B8_SSCALED] = GEN6_FORMAT_R8G8B8_SSCALED,
- [PIPE_FORMAT_R8G8B8A8_SSCALED] = GEN6_FORMAT_R8G8B8A8_SSCALED,
- [PIPE_FORMAT_R32_FIXED] = GEN6_FORMAT_R32_SFIXED,
- [PIPE_FORMAT_R32G32_FIXED] = GEN6_FORMAT_R32G32_SFIXED,
- [PIPE_FORMAT_R32G32B32_FIXED] = GEN6_FORMAT_R32G32B32_SFIXED,
- [PIPE_FORMAT_R32G32B32A32_FIXED] = GEN6_FORMAT_R32G32B32A32_SFIXED,
- [PIPE_FORMAT_R16_FLOAT] = GEN6_FORMAT_R16_FLOAT,
- [PIPE_FORMAT_R16G16_FLOAT] = GEN6_FORMAT_R16G16_FLOAT,
- [PIPE_FORMAT_R16G16B16_FLOAT] = GEN6_FORMAT_R16G16B16_FLOAT,
- [PIPE_FORMAT_R16G16B16A16_FLOAT] = GEN6_FORMAT_R16G16B16A16_FLOAT,
- [PIPE_FORMAT_L8_SRGB] = GEN6_FORMAT_L8_UNORM_SRGB,
- [PIPE_FORMAT_L8A8_SRGB] = GEN6_FORMAT_L8A8_UNORM_SRGB,
- [PIPE_FORMAT_R8G8B8_SRGB] = GEN6_FORMAT_R8G8B8_UNORM_SRGB,
- [PIPE_FORMAT_A8B8G8R8_SRGB] = 0,
- [PIPE_FORMAT_X8B8G8R8_SRGB] = 0,
- [PIPE_FORMAT_B8G8R8A8_SRGB] = GEN6_FORMAT_B8G8R8A8_UNORM_SRGB,
- [PIPE_FORMAT_B8G8R8X8_SRGB] = GEN6_FORMAT_B8G8R8X8_UNORM_SRGB,
- [PIPE_FORMAT_A8R8G8B8_SRGB] = 0,
- [PIPE_FORMAT_X8R8G8B8_SRGB] = 0,
- [PIPE_FORMAT_R8G8B8A8_SRGB] = GEN6_FORMAT_R8G8B8A8_UNORM_SRGB,
- [PIPE_FORMAT_DXT1_RGB] = GEN6_FORMAT_DXT1_RGB,
- [PIPE_FORMAT_DXT1_RGBA] = GEN6_FORMAT_BC1_UNORM,
- [PIPE_FORMAT_DXT3_RGBA] = GEN6_FORMAT_BC2_UNORM,
- [PIPE_FORMAT_DXT5_RGBA] = GEN6_FORMAT_BC3_UNORM,
- [PIPE_FORMAT_DXT1_SRGB] = GEN6_FORMAT_DXT1_RGB_SRGB,
- [PIPE_FORMAT_DXT1_SRGBA] = GEN6_FORMAT_BC1_UNORM_SRGB,
- [PIPE_FORMAT_DXT3_SRGBA] = GEN6_FORMAT_BC2_UNORM_SRGB,
- [PIPE_FORMAT_DXT5_SRGBA] = GEN6_FORMAT_BC3_UNORM_SRGB,
- [PIPE_FORMAT_RGTC1_UNORM] = GEN6_FORMAT_BC4_UNORM,
- [PIPE_FORMAT_RGTC1_SNORM] = GEN6_FORMAT_BC4_SNORM,
- [PIPE_FORMAT_RGTC2_UNORM] = GEN6_FORMAT_BC5_UNORM,
- [PIPE_FORMAT_RGTC2_SNORM] = GEN6_FORMAT_BC5_SNORM,
- [PIPE_FORMAT_R8G8_B8G8_UNORM] = 0,
- [PIPE_FORMAT_G8R8_G8B8_UNORM] = 0,
- [PIPE_FORMAT_R8SG8SB8UX8U_NORM] = 0,
- [PIPE_FORMAT_R5SG5SB6U_NORM] = 0,
- [PIPE_FORMAT_A8B8G8R8_UNORM] = 0,
- [PIPE_FORMAT_B5G5R5X1_UNORM] = GEN6_FORMAT_B5G5R5X1_UNORM,
- [PIPE_FORMAT_R10G10B10A2_USCALED] = GEN6_FORMAT_R10G10B10A2_USCALED,
- [PIPE_FORMAT_R11G11B10_FLOAT] = GEN6_FORMAT_R11G11B10_FLOAT,
- [PIPE_FORMAT_R9G9B9E5_FLOAT] = GEN6_FORMAT_R9G9B9E5_SHAREDEXP,
- [PIPE_FORMAT_Z32_FLOAT_S8X24_UINT] = 0,
- [PIPE_FORMAT_R1_UNORM] = GEN6_FORMAT_R1_UNORM,
- [PIPE_FORMAT_R10G10B10X2_USCALED] = GEN6_FORMAT_R10G10B10X2_USCALED,
- [PIPE_FORMAT_R10G10B10X2_SNORM] = 0,
- [PIPE_FORMAT_L4A4_UNORM] = 0,
- [PIPE_FORMAT_B10G10R10A2_UNORM] = GEN6_FORMAT_B10G10R10A2_UNORM,
- [PIPE_FORMAT_R10SG10SB10SA2U_NORM] = 0,
- [PIPE_FORMAT_R8G8Bx_SNORM] = 0,
- [PIPE_FORMAT_R8G8B8X8_UNORM] = GEN6_FORMAT_R8G8B8X8_UNORM,
- [PIPE_FORMAT_B4G4R4X4_UNORM] = 0,
- [PIPE_FORMAT_X24S8_UINT] = 0,
- [PIPE_FORMAT_S8X24_UINT] = 0,
- [PIPE_FORMAT_X32_S8X24_UINT] = 0,
- [PIPE_FORMAT_B2G3R3_UNORM] = 0,
- [PIPE_FORMAT_L16A16_UNORM] = GEN6_FORMAT_L16A16_UNORM,
- [PIPE_FORMAT_A16_UNORM] = GEN6_FORMAT_A16_UNORM,
- [PIPE_FORMAT_I16_UNORM] = GEN6_FORMAT_I16_UNORM,
- [PIPE_FORMAT_LATC1_UNORM] = 0,
- [PIPE_FORMAT_LATC1_SNORM] = 0,
- [PIPE_FORMAT_LATC2_UNORM] = 0,
- [PIPE_FORMAT_LATC2_SNORM] = 0,
- [PIPE_FORMAT_A8_SNORM] = 0,
- [PIPE_FORMAT_L8_SNORM] = 0,
- [PIPE_FORMAT_L8A8_SNORM] = 0,
- [PIPE_FORMAT_I8_SNORM] = 0,
- [PIPE_FORMAT_A16_SNORM] = 0,
- [PIPE_FORMAT_L16_SNORM] = 0,
- [PIPE_FORMAT_L16A16_SNORM] = 0,
- [PIPE_FORMAT_I16_SNORM] = 0,
- [PIPE_FORMAT_A16_FLOAT] = GEN6_FORMAT_A16_FLOAT,
- [PIPE_FORMAT_L16_FLOAT] = GEN6_FORMAT_L16_FLOAT,
- [PIPE_FORMAT_L16A16_FLOAT] = GEN6_FORMAT_L16A16_FLOAT,
- [PIPE_FORMAT_I16_FLOAT] = GEN6_FORMAT_I16_FLOAT,
- [PIPE_FORMAT_A32_FLOAT] = GEN6_FORMAT_A32_FLOAT,
- [PIPE_FORMAT_L32_FLOAT] = GEN6_FORMAT_L32_FLOAT,
- [PIPE_FORMAT_L32A32_FLOAT] = GEN6_FORMAT_L32A32_FLOAT,
- [PIPE_FORMAT_I32_FLOAT] = GEN6_FORMAT_I32_FLOAT,
- [PIPE_FORMAT_YV12] = 0,
- [PIPE_FORMAT_YV16] = 0,
- [PIPE_FORMAT_IYUV] = 0,
- [PIPE_FORMAT_NV12] = 0,
- [PIPE_FORMAT_NV21] = 0,
- [PIPE_FORMAT_A4R4_UNORM] = 0,
- [PIPE_FORMAT_R4A4_UNORM] = 0,
- [PIPE_FORMAT_R8A8_UNORM] = 0,
- [PIPE_FORMAT_A8R8_UNORM] = 0,
- [PIPE_FORMAT_R10G10B10A2_SSCALED] = GEN6_FORMAT_R10G10B10A2_SSCALED,
- [PIPE_FORMAT_R10G10B10A2_SNORM] = GEN6_FORMAT_R10G10B10A2_SNORM,
- [PIPE_FORMAT_B10G10R10A2_USCALED] = GEN6_FORMAT_B10G10R10A2_USCALED,
- [PIPE_FORMAT_B10G10R10A2_SSCALED] = GEN6_FORMAT_B10G10R10A2_SSCALED,
- [PIPE_FORMAT_B10G10R10A2_SNORM] = GEN6_FORMAT_B10G10R10A2_SNORM,
- [PIPE_FORMAT_R8_UINT] = GEN6_FORMAT_R8_UINT,
- [PIPE_FORMAT_R8G8_UINT] = GEN6_FORMAT_R8G8_UINT,
- [PIPE_FORMAT_R8G8B8_UINT] = GEN6_FORMAT_R8G8B8_UINT,
- [PIPE_FORMAT_R8G8B8A8_UINT] = GEN6_FORMAT_R8G8B8A8_UINT,
- [PIPE_FORMAT_R8_SINT] = GEN6_FORMAT_R8_SINT,
- [PIPE_FORMAT_R8G8_SINT] = GEN6_FORMAT_R8G8_SINT,
- [PIPE_FORMAT_R8G8B8_SINT] = GEN6_FORMAT_R8G8B8_SINT,
- [PIPE_FORMAT_R8G8B8A8_SINT] = GEN6_FORMAT_R8G8B8A8_SINT,
- [PIPE_FORMAT_R16_UINT] = GEN6_FORMAT_R16_UINT,
- [PIPE_FORMAT_R16G16_UINT] = GEN6_FORMAT_R16G16_UINT,
- [PIPE_FORMAT_R16G16B16_UINT] = GEN6_FORMAT_R16G16B16_UINT,
- [PIPE_FORMAT_R16G16B16A16_UINT] = GEN6_FORMAT_R16G16B16A16_UINT,
- [PIPE_FORMAT_R16_SINT] = GEN6_FORMAT_R16_SINT,
- [PIPE_FORMAT_R16G16_SINT] = GEN6_FORMAT_R16G16_SINT,
- [PIPE_FORMAT_R16G16B16_SINT] = GEN6_FORMAT_R16G16B16_SINT,
- [PIPE_FORMAT_R16G16B16A16_SINT] = GEN6_FORMAT_R16G16B16A16_SINT,
- [PIPE_FORMAT_R32_UINT] = GEN6_FORMAT_R32_UINT,
- [PIPE_FORMAT_R32G32_UINT] = GEN6_FORMAT_R32G32_UINT,
- [PIPE_FORMAT_R32G32B32_UINT] = GEN6_FORMAT_R32G32B32_UINT,
- [PIPE_FORMAT_R32G32B32A32_UINT] = GEN6_FORMAT_R32G32B32A32_UINT,
- [PIPE_FORMAT_R32_SINT] = GEN6_FORMAT_R32_SINT,
- [PIPE_FORMAT_R32G32_SINT] = GEN6_FORMAT_R32G32_SINT,
- [PIPE_FORMAT_R32G32B32_SINT] = GEN6_FORMAT_R32G32B32_SINT,
- [PIPE_FORMAT_R32G32B32A32_SINT] = GEN6_FORMAT_R32G32B32A32_SINT,
- [PIPE_FORMAT_A8_UINT] = 0,
- [PIPE_FORMAT_I8_UINT] = GEN6_FORMAT_I8_UINT,
- [PIPE_FORMAT_L8_UINT] = GEN6_FORMAT_L8_UINT,
- [PIPE_FORMAT_L8A8_UINT] = GEN6_FORMAT_L8A8_UINT,
- [PIPE_FORMAT_A8_SINT] = 0,
- [PIPE_FORMAT_I8_SINT] = GEN6_FORMAT_I8_SINT,
- [PIPE_FORMAT_L8_SINT] = GEN6_FORMAT_L8_SINT,
- [PIPE_FORMAT_L8A8_SINT] = GEN6_FORMAT_L8A8_SINT,
- [PIPE_FORMAT_A16_UINT] = 0,
- [PIPE_FORMAT_I16_UINT] = 0,
- [PIPE_FORMAT_L16_UINT] = 0,
- [PIPE_FORMAT_L16A16_UINT] = 0,
- [PIPE_FORMAT_A16_SINT] = 0,
- [PIPE_FORMAT_I16_SINT] = 0,
- [PIPE_FORMAT_L16_SINT] = 0,
- [PIPE_FORMAT_L16A16_SINT] = 0,
- [PIPE_FORMAT_A32_UINT] = 0,
- [PIPE_FORMAT_I32_UINT] = 0,
- [PIPE_FORMAT_L32_UINT] = 0,
- [PIPE_FORMAT_L32A32_UINT] = 0,
- [PIPE_FORMAT_A32_SINT] = 0,
- [PIPE_FORMAT_I32_SINT] = 0,
- [PIPE_FORMAT_L32_SINT] = 0,
- [PIPE_FORMAT_L32A32_SINT] = 0,
- [PIPE_FORMAT_B10G10R10A2_UINT] = GEN6_FORMAT_B10G10R10A2_UINT,
- [PIPE_FORMAT_ETC1_RGB8] = GEN6_FORMAT_ETC1_RGB8,
- [PIPE_FORMAT_R8G8_R8B8_UNORM] = 0,
- [PIPE_FORMAT_G8R8_B8R8_UNORM] = 0,
- [PIPE_FORMAT_R8G8B8X8_SNORM] = 0,
- [PIPE_FORMAT_R8G8B8X8_SRGB] = 0,
- [PIPE_FORMAT_R8G8B8X8_UINT] = 0,
- [PIPE_FORMAT_R8G8B8X8_SINT] = 0,
- [PIPE_FORMAT_B10G10R10X2_UNORM] = GEN6_FORMAT_B10G10R10X2_UNORM,
- [PIPE_FORMAT_R16G16B16X16_UNORM] = GEN6_FORMAT_R16G16B16X16_UNORM,
- [PIPE_FORMAT_R16G16B16X16_SNORM] = 0,
- [PIPE_FORMAT_R16G16B16X16_FLOAT] = GEN6_FORMAT_R16G16B16X16_FLOAT,
- [PIPE_FORMAT_R16G16B16X16_UINT] = 0,
- [PIPE_FORMAT_R16G16B16X16_SINT] = 0,
- [PIPE_FORMAT_R32G32B32X32_FLOAT] = GEN6_FORMAT_R32G32B32X32_FLOAT,
- [PIPE_FORMAT_R32G32B32X32_UINT] = 0,
- [PIPE_FORMAT_R32G32B32X32_SINT] = 0,
- [PIPE_FORMAT_R8A8_SNORM] = 0,
- [PIPE_FORMAT_R16A16_UNORM] = 0,
- [PIPE_FORMAT_R16A16_SNORM] = 0,
- [PIPE_FORMAT_R16A16_FLOAT] = 0,
- [PIPE_FORMAT_R32A32_FLOAT] = 0,
- [PIPE_FORMAT_R8A8_UINT] = 0,
- [PIPE_FORMAT_R8A8_SINT] = 0,
- [PIPE_FORMAT_R16A16_UINT] = 0,
- [PIPE_FORMAT_R16A16_SINT] = 0,
- [PIPE_FORMAT_R32A32_UINT] = 0,
- [PIPE_FORMAT_R32A32_SINT] = 0,
- [PIPE_FORMAT_R10G10B10A2_UINT] = GEN6_FORMAT_R10G10B10A2_UINT,
- [PIPE_FORMAT_B5G6R5_SRGB] = GEN6_FORMAT_B5G6R5_UNORM_SRGB,
- };
- int sfmt = format_mapping[format];
-
- /* GEN6_FORMAT_R32G32B32A32_FLOAT happens to be 0 */
- if (!sfmt && format != PIPE_FORMAT_R32G32B32A32_FLOAT)
- sfmt = -1;
-
- return sfmt;
-}
diff --git a/src/gallium/drivers/ilo/core/ilo_image.c b/src/gallium/drivers/ilo/core/ilo_image.c
index 22c8ef2620a..0d837d8a9d5 100644
--- a/src/gallium/drivers/ilo/core/ilo_image.c
+++ b/src/gallium/drivers/ilo/core/ilo_image.c
@@ -675,9 +675,12 @@ img_init_size_and_format(struct ilo_image *img,
enum pipe_format format = templ->format;
bool require_separate_stencil = false;
+ img->target = templ->target;
img->width0 = templ->width0;
img->height0 = templ->height0;
img->depth0 = templ->depth0;
+ img->array_size = templ->array_size;
+ img->level_count = templ->last_level + 1;
img->sample_count = (templ->nr_samples) ? templ->nr_samples : 1;
/*
@@ -794,6 +797,10 @@ img_want_hiz(const struct ilo_image *img,
if (ilo_debug & ILO_DEBUG_NOHIZ)
return false;
+ /* we want 8x4 aligned levels */
+ if (templ->target == PIPE_TEXTURE_1D)
+ return false;
+
if (!(templ->bind & PIPE_BIND_DEPTH_STENCIL))
return false;
@@ -1343,9 +1350,12 @@ img_init_for_transfer(struct ilo_image *img,
img->aux.type = ILO_IMAGE_AUX_NONE;
+ img->target = templ->target;
img->width0 = templ->width0;
img->height0 = templ->height0;
img->depth0 = templ->depth0;
+ img->array_size = templ->array_size;
+ img->level_count = 1;
img->sample_count = 1;
img->format = templ->format;
@@ -1386,6 +1396,8 @@ void ilo_image_init(struct ilo_image *img,
struct ilo_image_params params;
bool transfer_only;
+ assert(ilo_is_zeroed(img, sizeof(*img)));
+
/* use transfer layout when the texture is never bound to GPU */
transfer_only = !(templ->bind & ~(PIPE_BIND_TRANSFER_WRITE |
PIPE_BIND_TRANSFER_READ));
@@ -1411,6 +1423,8 @@ ilo_image_init_for_imported(struct ilo_image *img,
{
struct ilo_image_params params;
+ assert(ilo_is_zeroed(img, sizeof(*img)));
+
if ((tiling == GEN6_TILING_X && bo_stride % 512) ||
(tiling == GEN6_TILING_Y && bo_stride % 128) ||
(tiling == GEN8_TILING_W && bo_stride % 64))
@@ -1435,3 +1449,22 @@ ilo_image_init_for_imported(struct ilo_image *img,
return true;
}
+
+bool
+ilo_image_disable_aux(struct ilo_image *img, const struct ilo_dev *dev)
+{
+ /* HiZ is required for separate stencil on Gen6 */
+ if (ilo_dev_gen(dev) == ILO_GEN(6) &&
+ img->aux.type == ILO_IMAGE_AUX_HIZ &&
+ img->separate_stencil)
+ return false;
+
+ /* MCS is required for multisample images */
+ if (img->aux.type == ILO_IMAGE_AUX_MCS &&
+ img->sample_count > 1)
+ return false;
+
+ img->aux.enables = 0x0;
+
+ return true;
+}
diff --git a/src/gallium/drivers/ilo/core/ilo_image.h b/src/gallium/drivers/ilo/core/ilo_image.h
index 4956bdae2ee..af15e856028 100644
--- a/src/gallium/drivers/ilo/core/ilo_image.h
+++ b/src/gallium/drivers/ilo/core/ilo_image.h
@@ -88,10 +88,14 @@ struct ilo_image_lod {
* Texture layout.
*/
struct ilo_image {
+ enum pipe_texture_target target;
+
/* size, format, etc for programming hardware states */
unsigned width0;
unsigned height0;
unsigned depth0;
+ unsigned array_size;
+ unsigned level_count;
unsigned sample_count;
enum pipe_format format;
bool separate_stencil;
@@ -125,8 +129,6 @@ struct ilo_image {
bool scanout;
- struct intel_bo *bo;
-
struct {
enum ilo_image_aux_type type;
@@ -140,8 +142,12 @@ struct ilo_image {
unsigned bo_stride;
unsigned bo_height;
+ /* managed by users */
struct intel_bo *bo;
} aux;
+
+ /* managed by users */
+ struct intel_bo *bo;
};
struct pipe_resource;
@@ -158,31 +164,13 @@ ilo_image_init_for_imported(struct ilo_image *img,
enum gen_surface_tiling tiling,
unsigned bo_stride);
-static inline void
-ilo_image_cleanup(struct ilo_image *img)
-{
- intel_bo_unref(img->bo);
- intel_bo_unref(img->aux.bo);
-}
-
-static inline void
-ilo_image_set_bo(struct ilo_image *img, struct intel_bo *bo)
-{
- intel_bo_unref(img->bo);
- img->bo = intel_bo_ref(bo);
-}
-
-static inline void
-ilo_image_set_aux_bo(struct ilo_image *img, struct intel_bo *bo)
-{
- intel_bo_unref(img->aux.bo);
- img->aux.bo = intel_bo_ref(bo);
-}
+bool
+ilo_image_disable_aux(struct ilo_image *img, const struct ilo_dev *dev);
static inline bool
ilo_image_can_enable_aux(const struct ilo_image *img, unsigned level)
{
- return (img->aux.bo && (img->aux.enables & (1 << level)));
+ return (img->aux.enables & (1 << level));
}
/**
diff --git a/src/gallium/drivers/ilo/core/ilo_state_3d.h b/src/gallium/drivers/ilo/core/ilo_state_3d.h
deleted file mode 100644
index fdce445f733..00000000000
--- a/src/gallium/drivers/ilo/core/ilo_state_3d.h
+++ /dev/null
@@ -1,427 +0,0 @@
-/*
- * Mesa 3-D graphics library
- *
- * Copyright (C) 2012-2014 LunarG, Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included
- * in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
- * DEALINGS IN THE SOFTWARE.
- *
- * Authors:
- * Chia-I Wu <[email protected]>
- */
-
-#ifndef ILO_STATE_3D_H
-#define ILO_STATE_3D_H
-
-#include "genhw/genhw.h"
-#include "pipe/p_state.h"
-
-#include "ilo_core.h"
-#include "ilo_dev.h"
-
-/**
- * \see brw_context.h
- */
-#define ILO_MAX_DRAW_BUFFERS 8
-#define ILO_MAX_CONST_BUFFERS (1 + 12)
-#define ILO_MAX_SAMPLER_VIEWS 16
-#define ILO_MAX_SAMPLERS 16
-#define ILO_MAX_SO_BINDINGS 64
-#define ILO_MAX_SO_BUFFERS 4
-#define ILO_MAX_VIEWPORTS 1
-
-#define ILO_MAX_SURFACES 256
-
-struct intel_bo;
-struct ilo_buffer;
-struct ilo_image;
-struct ilo_shader_state;
-
-struct ilo_vb_state {
- struct pipe_vertex_buffer states[PIPE_MAX_ATTRIBS];
- uint32_t enabled_mask;
-};
-
-struct ilo_ib_state {
- struct pipe_resource *buffer;
- const void *user_buffer;
- unsigned offset;
- unsigned index_size;
-
- /* these are not valid until the state is finalized */
- struct pipe_resource *hw_resource;
- unsigned hw_index_size;
- /* an offset to be added to pipe_draw_info::start */
- int64_t draw_start_offset;
-};
-
-struct ilo_ve_cso {
- /* VERTEX_ELEMENT_STATE */
- uint32_t payload[2];
-};
-
-struct ilo_ve_state {
- struct ilo_ve_cso cso[PIPE_MAX_ATTRIBS];
- unsigned count;
-
- unsigned instance_divisors[PIPE_MAX_ATTRIBS];
- unsigned vb_mapping[PIPE_MAX_ATTRIBS];
- unsigned vb_count;
-
- /* these are not valid until the state is finalized */
- struct ilo_ve_cso edgeflag_cso;
- bool last_cso_edgeflag;
-
- struct ilo_ve_cso nosrc_cso;
- bool prepend_nosrc_cso;
-};
-
-struct ilo_so_state {
- struct pipe_stream_output_target *states[ILO_MAX_SO_BUFFERS];
- unsigned count;
- unsigned append_bitmask;
-
- bool enabled;
-};
-
-struct ilo_viewport_cso {
- /* matrix form */
- float m00, m11, m22, m30, m31, m32;
-
- /* guardband in NDC space */
- float min_gbx, min_gby, max_gbx, max_gby;
-
- /* viewport in screen space */
- float min_x, min_y, min_z;
- float max_x, max_y, max_z;
-};
-
-struct ilo_viewport_state {
- struct ilo_viewport_cso cso[ILO_MAX_VIEWPORTS];
- unsigned count;
-
- struct pipe_viewport_state viewport0;
-};
-
-struct ilo_scissor_state {
- /* SCISSOR_RECT */
- uint32_t payload[ILO_MAX_VIEWPORTS * 2];
-
- struct pipe_scissor_state scissor0;
-};
-
-struct ilo_rasterizer_clip {
- /* 3DSTATE_CLIP */
- uint32_t payload[3];
-
- uint32_t can_enable_guardband;
-};
-
-struct ilo_rasterizer_sf {
- /* 3DSTATE_SF */
- uint32_t payload[3];
- uint32_t dw_msaa;
-
- /* Global Depth Offset Constant/Scale/Clamp */
- uint32_t dw_depth_offset_const;
- uint32_t dw_depth_offset_scale;
- uint32_t dw_depth_offset_clamp;
-
- /* Gen8+ 3DSTATE_RASTER */
- uint32_t dw_raster;
-};
-
-struct ilo_rasterizer_wm {
- /* 3DSTATE_WM */
- uint32_t payload[2];
- uint32_t dw_msaa_rast;
- uint32_t dw_msaa_disp;
-};
-
-struct ilo_rasterizer_state {
- struct pipe_rasterizer_state state;
-
- struct ilo_rasterizer_clip clip;
- struct ilo_rasterizer_sf sf;
- struct ilo_rasterizer_wm wm;
-};
-
-struct ilo_dsa_state {
- /* DEPTH_STENCIL_STATE or Gen8+ 3DSTATE_WM_DEPTH_STENCIL */
- uint32_t payload[3];
-
- uint32_t dw_blend_alpha;
- uint32_t dw_ps_blend_alpha;
- ubyte alpha_ref;
-};
-
-struct ilo_blend_cso {
- /* BLEND_STATE */
- uint32_t payload[2];
-
- uint32_t dw_blend;
- uint32_t dw_blend_dst_alpha_forced_one;
-};
-
-struct ilo_blend_state {
- struct ilo_blend_cso cso[ILO_MAX_DRAW_BUFFERS];
-
- bool dual_blend;
- bool alpha_to_coverage;
-
- uint32_t dw_shared;
- uint32_t dw_alpha_mod;
- uint32_t dw_logicop;
-
- /* a part of 3DSTATE_PS_BLEND */
- uint32_t dw_ps_blend;
- uint32_t dw_ps_blend_dst_alpha_forced_one;
-};
-
-struct ilo_sampler_cso {
- /* SAMPLER_STATE and SAMPLER_BORDER_COLOR_STATE */
- uint32_t payload[15];
-
- uint32_t dw_filter;
- uint32_t dw_filter_aniso;
- uint32_t dw_wrap;
- uint32_t dw_wrap_1d;
- uint32_t dw_wrap_cube;
-
- bool anisotropic;
- bool saturate_r;
- bool saturate_s;
- bool saturate_t;
-};
-
-struct ilo_sampler_state {
- const struct ilo_sampler_cso *cso[ILO_MAX_SAMPLERS];
-};
-
-struct ilo_view_surface {
- /* SURFACE_STATE */
- uint32_t payload[13];
- struct intel_bo *bo;
-
- uint32_t scanout;
-};
-
-struct ilo_view_cso {
- struct pipe_sampler_view base;
-
- struct ilo_view_surface surface;
-};
-
-struct ilo_view_state {
- struct pipe_sampler_view *states[ILO_MAX_SAMPLER_VIEWS];
- unsigned count;
-};
-
-struct ilo_cbuf_cso {
- struct pipe_resource *resource;
- struct ilo_view_surface surface;
-
- /*
- * this CSO is not so constant because user buffer needs to be uploaded in
- * finalize_constant_buffers()
- */
- const void *user_buffer;
- unsigned user_buffer_size;
-};
-
-struct ilo_cbuf_state {
- struct ilo_cbuf_cso cso[ILO_MAX_CONST_BUFFERS];
- uint32_t enabled_mask;
-};
-
-struct ilo_resource_state {
- struct pipe_surface *states[PIPE_MAX_SHADER_RESOURCES];
- unsigned count;
-};
-
-struct ilo_surface_cso {
- struct pipe_surface base;
-
- bool is_rt;
- union {
- struct ilo_view_surface rt;
- struct ilo_zs_surface {
- uint32_t payload[12];
- uint32_t dw_aligned_8x4;
-
- struct intel_bo *bo;
- struct intel_bo *hiz_bo;
- struct intel_bo *separate_s8_bo;
- } zs;
- } u;
-};
-
-struct ilo_fb_state {
- struct pipe_framebuffer_state state;
-
- struct ilo_view_surface null_rt;
- struct ilo_zs_surface null_zs;
-
- struct ilo_fb_blend_caps {
- bool can_logicop;
- bool can_blend;
- bool can_alpha_test;
- bool dst_alpha_forced_one;
- } blend_caps[PIPE_MAX_COLOR_BUFS];
-
- unsigned num_samples;
-};
-
-struct ilo_shader_cso {
- uint32_t payload[5];
-};
-
-/**
- * Translate a pipe texture target to the matching hardware surface type.
- */
-static inline int
-ilo_gpe_gen6_translate_texture(enum pipe_texture_target target)
-{
- switch (target) {
- case PIPE_BUFFER:
- return GEN6_SURFTYPE_BUFFER;
- case PIPE_TEXTURE_1D:
- case PIPE_TEXTURE_1D_ARRAY:
- return GEN6_SURFTYPE_1D;
- case PIPE_TEXTURE_2D:
- case PIPE_TEXTURE_RECT:
- case PIPE_TEXTURE_2D_ARRAY:
- return GEN6_SURFTYPE_2D;
- case PIPE_TEXTURE_3D:
- return GEN6_SURFTYPE_3D;
- case PIPE_TEXTURE_CUBE:
- case PIPE_TEXTURE_CUBE_ARRAY:
- return GEN6_SURFTYPE_CUBE;
- default:
- assert(!"unknown texture target");
- return GEN6_SURFTYPE_BUFFER;
- }
-}
-
-void
-ilo_gpe_init_ve(const struct ilo_dev *dev,
- unsigned num_states,
- const struct pipe_vertex_element *states,
- struct ilo_ve_state *ve);
-
-void
-ilo_gpe_set_ve_edgeflag(const struct ilo_dev *dev,
- struct ilo_ve_cso *cso);
-
-void
-ilo_gpe_init_ve_nosrc(const struct ilo_dev *dev,
- int comp0, int comp1, int comp2, int comp3,
- struct ilo_ve_cso *cso);
-
-void
-ilo_gpe_set_viewport_cso(const struct ilo_dev *dev,
- const struct pipe_viewport_state *state,
- struct ilo_viewport_cso *vp);
-
-void
-ilo_gpe_set_scissor(const struct ilo_dev *dev,
- unsigned start_slot,
- unsigned num_states,
- const struct pipe_scissor_state *states,
- struct ilo_scissor_state *scissor);
-
-void
-ilo_gpe_set_scissor_null(const struct ilo_dev *dev,
- struct ilo_scissor_state *scissor);
-
-void
-ilo_gpe_init_rasterizer(const struct ilo_dev *dev,
- const struct pipe_rasterizer_state *state,
- struct ilo_rasterizer_state *rasterizer);
-void
-ilo_gpe_init_dsa(const struct ilo_dev *dev,
- const struct pipe_depth_stencil_alpha_state *state,
- struct ilo_dsa_state *dsa);
-
-void
-ilo_gpe_init_blend(const struct ilo_dev *dev,
- const struct pipe_blend_state *state,
- struct ilo_blend_state *blend);
-
-void
-ilo_gpe_init_sampler_cso(const struct ilo_dev *dev,
- const struct pipe_sampler_state *state,
- struct ilo_sampler_cso *sampler);
-
-void
-ilo_gpe_init_view_surface_null(const struct ilo_dev *dev,
- unsigned width, unsigned height,
- unsigned depth, unsigned level,
- struct ilo_view_surface *surf);
-
-void
-ilo_gpe_init_view_surface_for_buffer(const struct ilo_dev *dev,
- const struct ilo_buffer *buf,
- unsigned offset, unsigned size,
- unsigned struct_size,
- enum pipe_format elem_format,
- bool is_rt, bool render_cache_rw,
- struct ilo_view_surface *surf);
-
-void
-ilo_gpe_init_view_surface_for_image(const struct ilo_dev *dev,
- const struct ilo_image *img,
- enum pipe_texture_target target,
- enum pipe_format format,
- unsigned first_level,
- unsigned num_levels,
- unsigned first_layer,
- unsigned num_layers,
- bool is_rt,
- struct ilo_view_surface *surf);
-
-void
-ilo_gpe_init_zs_surface(const struct ilo_dev *dev,
- const struct ilo_image *img,
- const struct ilo_image *s8_img,
- enum pipe_texture_target target,
- enum pipe_format format, unsigned level,
- unsigned first_layer, unsigned num_layers,
- struct ilo_zs_surface *zs);
-
-void
-ilo_gpe_init_vs_cso(const struct ilo_dev *dev,
- const struct ilo_shader_state *vs,
- struct ilo_shader_cso *cso);
-
-void
-ilo_gpe_init_gs_cso(const struct ilo_dev *dev,
- const struct ilo_shader_state *gs,
- struct ilo_shader_cso *cso);
-
-void
-ilo_gpe_init_fs_cso(const struct ilo_dev *dev,
- const struct ilo_shader_state *fs,
- struct ilo_shader_cso *cso);
-
-void
-ilo_gpe_set_fb(const struct ilo_dev *dev,
- const struct pipe_framebuffer_state *state,
- struct ilo_fb_state *fb);
-
-#endif /* ILO_STATE_3D_H */
diff --git a/src/gallium/drivers/ilo/core/ilo_state_3d_bottom.c b/src/gallium/drivers/ilo/core/ilo_state_3d_bottom.c
deleted file mode 100644
index 5a4c5dde7e7..00000000000
--- a/src/gallium/drivers/ilo/core/ilo_state_3d_bottom.c
+++ /dev/null
@@ -1,2222 +0,0 @@
-/*
- * Mesa 3-D graphics library
- *
- * Copyright (C) 2012-2014 LunarG, Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included
- * in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
- * DEALINGS IN THE SOFTWARE.
- *
- * Authors:
- * Chia-I Wu <[email protected]>
- */
-
-#include "genhw/genhw.h"
-#include "util/u_dual_blend.h"
-#include "util/u_framebuffer.h"
-#include "util/u_half.h"
-
-#include "ilo_format.h"
-#include "ilo_image.h"
-#include "ilo_state_3d.h"
-#include "../ilo_shader.h"
-
-static void
-rasterizer_init_clip(const struct ilo_dev *dev,
- const struct pipe_rasterizer_state *state,
- struct ilo_rasterizer_clip *clip)
-{
- uint32_t dw1, dw2, dw3;
-
- ILO_DEV_ASSERT(dev, 6, 8);
-
- dw1 = GEN6_CLIP_DW1_STATISTICS;
-
- if (ilo_dev_gen(dev) >= ILO_GEN(7)) {
- /*
- * From the Ivy Bridge PRM, volume 2 part 1, page 219:
- *
- * "Workaround : Due to Hardware issue "EarlyCull" needs to be
- * enabled only for the cases where the incoming primitive topology
- * into the clipper guaranteed to be Trilist."
- *
- * What does this mean?
- */
- dw1 |= 0 << 19 |
- GEN7_CLIP_DW1_EARLY_CULL_ENABLE;
-
- if (ilo_dev_gen(dev) < ILO_GEN(8)) {
- if (state->front_ccw)
- dw1 |= GEN7_CLIP_DW1_FRONTWINDING_CCW;
-
- switch (state->cull_face) {
- case PIPE_FACE_NONE:
- dw1 |= GEN7_CLIP_DW1_CULLMODE_NONE;
- break;
- case PIPE_FACE_FRONT:
- dw1 |= GEN7_CLIP_DW1_CULLMODE_FRONT;
- break;
- case PIPE_FACE_BACK:
- dw1 |= GEN7_CLIP_DW1_CULLMODE_BACK;
- break;
- case PIPE_FACE_FRONT_AND_BACK:
- dw1 |= GEN7_CLIP_DW1_CULLMODE_BOTH;
- break;
- }
- }
- }
-
- dw2 = GEN6_CLIP_DW2_CLIP_ENABLE |
- GEN6_CLIP_DW2_XY_TEST_ENABLE |
- state->clip_plane_enable << GEN6_CLIP_DW2_UCP_CLIP_ENABLES__SHIFT |
- GEN6_CLIP_DW2_CLIPMODE_NORMAL;
-
- if (state->clip_halfz)
- dw2 |= GEN6_CLIP_DW2_APIMODE_D3D;
- else
- dw2 |= GEN6_CLIP_DW2_APIMODE_OGL;
-
- if (ilo_dev_gen(dev) < ILO_GEN(8) && state->depth_clip)
- dw2 |= GEN6_CLIP_DW2_Z_TEST_ENABLE;
-
- if (state->flatshade_first) {
- dw2 |= 0 << GEN6_CLIP_DW2_TRI_PROVOKE__SHIFT |
- 0 << GEN6_CLIP_DW2_LINE_PROVOKE__SHIFT |
- 1 << GEN6_CLIP_DW2_TRIFAN_PROVOKE__SHIFT;
- }
- else {
- dw2 |= 2 << GEN6_CLIP_DW2_TRI_PROVOKE__SHIFT |
- 1 << GEN6_CLIP_DW2_LINE_PROVOKE__SHIFT |
- 2 << GEN6_CLIP_DW2_TRIFAN_PROVOKE__SHIFT;
- }
-
- dw3 = 0x1 << GEN6_CLIP_DW3_MIN_POINT_WIDTH__SHIFT |
- 0x7ff << GEN6_CLIP_DW3_MAX_POINT_WIDTH__SHIFT;
-
- clip->payload[0] = dw1;
- clip->payload[1] = dw2;
- clip->payload[2] = dw3;
-
- clip->can_enable_guardband = true;
-
- /*
- * There are several reasons that guard band test should be disabled
- *
- * - GL wide points (to avoid partially visibie object)
- * - GL wide or AA lines (to avoid partially visibie object)
- */
- if (state->point_size_per_vertex || state->point_size > 1.0f)
- clip->can_enable_guardband = false;
- if (state->line_smooth || state->line_width > 1.0f)
- clip->can_enable_guardband = false;
-}
-
-static void
-rasterizer_init_sf_depth_offset_gen6(const struct ilo_dev *dev,
- const struct pipe_rasterizer_state *state,
- struct ilo_rasterizer_sf *sf)
-{
- ILO_DEV_ASSERT(dev, 6, 8);
-
- /*
- * Scale the constant term. The minimum representable value used by the HW
- * is not large enouch to be the minimum resolvable difference.
- */
- sf->dw_depth_offset_const = fui(state->offset_units * 2.0f);
- sf->dw_depth_offset_scale = fui(state->offset_scale);
- sf->dw_depth_offset_clamp = fui(state->offset_clamp);
-}
-
-static void
-rasterizer_init_sf_gen6(const struct ilo_dev *dev,
- const struct pipe_rasterizer_state *state,
- struct ilo_rasterizer_sf *sf)
-{
- int line_width, point_width;
- uint32_t dw1, dw2, dw3;
-
- ILO_DEV_ASSERT(dev, 6, 7.5);
-
- /*
- * From the Sandy Bridge PRM, volume 2 part 1, page 248:
- *
- * "This bit (Statistics Enable) should be set whenever clipping is
- * enabled and the Statistics Enable bit is set in CLIP_STATE. It
- * should be cleared if clipping is disabled or Statistics Enable in
- * CLIP_STATE is clear."
- */
- dw1 = GEN7_SF_DW1_STATISTICS |
- GEN7_SF_DW1_VIEWPORT_ENABLE;
-
- /* XXX GEN6 path seems to work fine for GEN7 */
- if (false && ilo_dev_gen(dev) >= ILO_GEN(7)) {
- /*
- * From the Ivy Bridge PRM, volume 2 part 1, page 258:
- *
- * "This bit (Legacy Global Depth Bias Enable, Global Depth Offset
- * Enable Solid , Global Depth Offset Enable Wireframe, and Global
- * Depth Offset Enable Point) should be set whenever non zero depth
- * bias (Slope, Bias) values are used. Setting this bit may have
- * some degradation of performance for some workloads."
- */
- if (state->offset_tri || state->offset_line || state->offset_point) {
- /* XXX need to scale offset_const according to the depth format */
- dw1 |= GEN7_SF_DW1_LEGACY_DEPTH_OFFSET;
-
- dw1 |= GEN7_SF_DW1_DEPTH_OFFSET_SOLID |
- GEN7_SF_DW1_DEPTH_OFFSET_WIREFRAME |
- GEN7_SF_DW1_DEPTH_OFFSET_POINT;
- }
- } else {
- if (state->offset_tri)
- dw1 |= GEN7_SF_DW1_DEPTH_OFFSET_SOLID;
- if (state->offset_line)
- dw1 |= GEN7_SF_DW1_DEPTH_OFFSET_WIREFRAME;
- if (state->offset_point)
- dw1 |= GEN7_SF_DW1_DEPTH_OFFSET_POINT;
- }
-
- switch (state->fill_front) {
- case PIPE_POLYGON_MODE_FILL:
- dw1 |= GEN7_SF_DW1_FRONTFACE_SOLID;
- break;
- case PIPE_POLYGON_MODE_LINE:
- dw1 |= GEN7_SF_DW1_FRONTFACE_WIREFRAME;
- break;
- case PIPE_POLYGON_MODE_POINT:
- dw1 |= GEN7_SF_DW1_FRONTFACE_POINT;
- break;
- }
-
- switch (state->fill_back) {
- case PIPE_POLYGON_MODE_FILL:
- dw1 |= GEN7_SF_DW1_BACKFACE_SOLID;
- break;
- case PIPE_POLYGON_MODE_LINE:
- dw1 |= GEN7_SF_DW1_BACKFACE_WIREFRAME;
- break;
- case PIPE_POLYGON_MODE_POINT:
- dw1 |= GEN7_SF_DW1_BACKFACE_POINT;
- break;
- }
-
- if (state->front_ccw)
- dw1 |= GEN7_SF_DW1_FRONTWINDING_CCW;
-
- dw2 = 0;
-
- if (state->line_smooth) {
- /*
- * From the Sandy Bridge PRM, volume 2 part 1, page 251:
- *
- * "This field (Anti-aliasing Enable) must be disabled if any of the
- * render targets have integer (UINT or SINT) surface format."
- *
- * From the Sandy Bridge PRM, volume 2 part 1, page 317:
- *
- * "This field (Hierarchical Depth Buffer Enable) must be disabled
- * if Anti-aliasing Enable in 3DSTATE_SF is enabled.
- *
- * TODO We do not check those yet.
- */
- dw2 |= GEN7_SF_DW2_AA_LINE_ENABLE |
- GEN7_SF_DW2_AA_LINE_CAP_1_0;
- }
-
- switch (state->cull_face) {
- case PIPE_FACE_NONE:
- dw2 |= GEN7_SF_DW2_CULLMODE_NONE;
- break;
- case PIPE_FACE_FRONT:
- dw2 |= GEN7_SF_DW2_CULLMODE_FRONT;
- break;
- case PIPE_FACE_BACK:
- dw2 |= GEN7_SF_DW2_CULLMODE_BACK;
- break;
- case PIPE_FACE_FRONT_AND_BACK:
- dw2 |= GEN7_SF_DW2_CULLMODE_BOTH;
- break;
- }
-
- /*
- * Smooth lines should intersect ceil(line_width) or (ceil(line_width) + 1)
- * pixels in the minor direction. We have to make the lines slightly
- * thicker, 0.5 pixel on both sides, so that they intersect that many
- * pixels are considered into the lines.
- *
- * Line width is in U3.7.
- */
- line_width = (int)
- ((state->line_width + (float) state->line_smooth) * 128.0f + 0.5f);
- line_width = CLAMP(line_width, 0, 1023);
-
- /* use GIQ rules */
- if (line_width == 128 && !state->line_smooth)
- line_width = 0;
-
- dw2 |= line_width << GEN7_SF_DW2_LINE_WIDTH__SHIFT;
-
- if (ilo_dev_gen(dev) == ILO_GEN(7.5) && state->line_stipple_enable)
- dw2 |= GEN75_SF_DW2_LINE_STIPPLE_ENABLE;
-
- if (state->scissor)
- dw2 |= GEN7_SF_DW2_SCISSOR_ENABLE;
-
- dw3 = GEN7_SF_DW3_TRUE_AA_LINE_DISTANCE |
- GEN7_SF_DW3_SUBPIXEL_8BITS;
-
- if (state->line_last_pixel)
- dw3 |= GEN7_SF_DW3_LINE_LAST_PIXEL_ENABLE;
-
- if (state->flatshade_first) {
- dw3 |= 0 << GEN7_SF_DW3_TRI_PROVOKE__SHIFT |
- 0 << GEN7_SF_DW3_LINE_PROVOKE__SHIFT |
- 1 << GEN7_SF_DW3_TRIFAN_PROVOKE__SHIFT;
- } else {
- dw3 |= 2 << GEN7_SF_DW3_TRI_PROVOKE__SHIFT |
- 1 << GEN7_SF_DW3_LINE_PROVOKE__SHIFT |
- 2 << GEN7_SF_DW3_TRIFAN_PROVOKE__SHIFT;
- }
-
- if (!state->point_size_per_vertex)
- dw3 |= GEN7_SF_DW3_USE_POINT_WIDTH;
-
- /* in U8.3 */
- point_width = (int) (state->point_size * 8.0f + 0.5f);
- point_width = CLAMP(point_width, 1, 2047);
-
- dw3 |= point_width;
-
- STATIC_ASSERT(Elements(sf->payload) >= 3);
- sf->payload[0] = dw1;
- sf->payload[1] = dw2;
- sf->payload[2] = dw3;
-
- if (state->multisample) {
- sf->dw_msaa = GEN7_SF_DW2_MSRASTMODE_ON_PATTERN;
-
- /*
- * From the Sandy Bridge PRM, volume 2 part 1, page 251:
- *
- * "Software must not program a value of 0.0 when running in
- * MSRASTMODE_ON_xxx modes - zero-width lines are not available
- * when multisampling rasterization is enabled."
- */
- if (!line_width) {
- line_width = 128; /* 1.0f */
-
- sf->dw_msaa |= line_width << GEN7_SF_DW2_LINE_WIDTH__SHIFT;
- }
- } else {
- sf->dw_msaa = 0;
- }
-
- rasterizer_init_sf_depth_offset_gen6(dev, state, sf);
- /* 3DSTATE_RASTER is Gen8+ only */
- sf->dw_raster = 0;
-}
-
-static uint32_t
-rasterizer_get_sf_raster_gen8(const struct ilo_dev *dev,
- const struct pipe_rasterizer_state *state)
-{
- uint32_t dw = 0;
-
- ILO_DEV_ASSERT(dev, 8, 8);
-
- if (state->front_ccw)
- dw |= GEN8_RASTER_DW1_FRONTWINDING_CCW;
-
- switch (state->cull_face) {
- case PIPE_FACE_NONE:
- dw |= GEN8_RASTER_DW1_CULLMODE_NONE;
- break;
- case PIPE_FACE_FRONT:
- dw |= GEN8_RASTER_DW1_CULLMODE_FRONT;
- break;
- case PIPE_FACE_BACK:
- dw |= GEN8_RASTER_DW1_CULLMODE_BACK;
- break;
- case PIPE_FACE_FRONT_AND_BACK:
- dw |= GEN8_RASTER_DW1_CULLMODE_BOTH;
- break;
- }
-
- if (state->point_smooth)
- dw |= GEN8_RASTER_DW1_SMOOTH_POINT_ENABLE;
-
- if (state->multisample)
- dw |= GEN8_RASTER_DW1_API_MULTISAMPLE_ENABLE;
-
- if (state->offset_tri)
- dw|= GEN8_RASTER_DW1_DEPTH_OFFSET_SOLID;
- if (state->offset_line)
- dw|= GEN8_RASTER_DW1_DEPTH_OFFSET_WIREFRAME;
- if (state->offset_point)
- dw|= GEN8_RASTER_DW1_DEPTH_OFFSET_POINT;
-
- switch (state->fill_front) {
- case PIPE_POLYGON_MODE_FILL:
- dw |= GEN8_RASTER_DW1_FRONTFACE_SOLID;
- break;
- case PIPE_POLYGON_MODE_LINE:
- dw |= GEN8_RASTER_DW1_FRONTFACE_WIREFRAME;
- break;
- case PIPE_POLYGON_MODE_POINT:
- dw |= GEN8_RASTER_DW1_FRONTFACE_POINT;
- break;
- }
-
- switch (state->fill_back) {
- case PIPE_POLYGON_MODE_FILL:
- dw |= GEN8_RASTER_DW1_BACKFACE_SOLID;
- break;
- case PIPE_POLYGON_MODE_LINE:
- dw |= GEN8_RASTER_DW1_BACKFACE_WIREFRAME;
- break;
- case PIPE_POLYGON_MODE_POINT:
- dw |= GEN8_RASTER_DW1_BACKFACE_POINT;
- break;
- }
-
- if (state->line_smooth)
- dw |= GEN8_RASTER_DW1_AA_LINE_ENABLE;
-
- if (state->scissor)
- dw |= GEN8_RASTER_DW1_SCISSOR_ENABLE;
-
- if (state->depth_clip)
- dw |= GEN8_RASTER_DW1_Z_TEST_ENABLE;
-
- return dw;
-}
-
-static void
-rasterizer_init_sf_gen8(const struct ilo_dev *dev,
- const struct pipe_rasterizer_state *state,
- struct ilo_rasterizer_sf *sf)
-{
- int line_width, point_width;
- uint32_t dw1, dw2, dw3;
-
- ILO_DEV_ASSERT(dev, 8, 8);
-
- /* in U3.7 */
- line_width = (int)
- ((state->line_width + (float) state->line_smooth) * 128.0f + 0.5f);
- line_width = CLAMP(line_width, 0, 1023);
-
- /* use GIQ rules */
- if (line_width == 128 && !state->line_smooth)
- line_width = 0;
-
- /* in U8.3 */
- point_width = (int) (state->point_size * 8.0f + 0.5f);
- point_width = CLAMP(point_width, 1, 2047);
-
- dw1 = GEN7_SF_DW1_STATISTICS |
- GEN7_SF_DW1_VIEWPORT_ENABLE;
-
- dw2 = line_width << GEN7_SF_DW2_LINE_WIDTH__SHIFT;
- if (state->line_smooth)
- dw2 |= GEN7_SF_DW2_AA_LINE_CAP_1_0;
-
- dw3 = GEN7_SF_DW3_TRUE_AA_LINE_DISTANCE |
- GEN7_SF_DW3_SUBPIXEL_8BITS |
- point_width;
-
- if (state->line_last_pixel)
- dw3 |= GEN7_SF_DW3_LINE_LAST_PIXEL_ENABLE;
-
- if (state->flatshade_first) {
- dw3 |= 0 << GEN7_SF_DW3_TRI_PROVOKE__SHIFT |
- 0 << GEN7_SF_DW3_LINE_PROVOKE__SHIFT |
- 1 << GEN7_SF_DW3_TRIFAN_PROVOKE__SHIFT;
- } else {
- dw3 |= 2 << GEN7_SF_DW3_TRI_PROVOKE__SHIFT |
- 1 << GEN7_SF_DW3_LINE_PROVOKE__SHIFT |
- 2 << GEN7_SF_DW3_TRIFAN_PROVOKE__SHIFT;
- }
-
- if (!state->point_size_per_vertex)
- dw3 |= GEN7_SF_DW3_USE_POINT_WIDTH;
-
- dw3 |= point_width;
-
- STATIC_ASSERT(Elements(sf->payload) >= 3);
- sf->payload[0] = dw1;
- sf->payload[1] = dw2;
- sf->payload[2] = dw3;
-
- rasterizer_init_sf_depth_offset_gen6(dev, state, sf);
-
- sf->dw_msaa = 0;
- sf->dw_raster = rasterizer_get_sf_raster_gen8(dev, state);
-}
-
-static void
-rasterizer_init_wm_gen6(const struct ilo_dev *dev,
- const struct pipe_rasterizer_state *state,
- struct ilo_rasterizer_wm *wm)
-{
- uint32_t dw5, dw6;
-
- ILO_DEV_ASSERT(dev, 6, 6);
-
- /* only the FF unit states are set, as in GEN7 */
-
- dw5 = GEN6_WM_DW5_AA_LINE_WIDTH_2_0;
-
- /* same value as in 3DSTATE_SF */
- if (state->line_smooth)
- dw5 |= GEN6_WM_DW5_AA_LINE_CAP_1_0;
-
- if (state->poly_stipple_enable)
- dw5 |= GEN6_WM_DW5_POLY_STIPPLE_ENABLE;
- if (state->line_stipple_enable)
- dw5 |= GEN6_WM_DW5_LINE_STIPPLE_ENABLE;
-
- /*
- * assertion that makes sure
- *
- * dw6 |= wm->dw_msaa_rast | wm->dw_msaa_disp;
- *
- * is valid
- */
- STATIC_ASSERT(GEN6_WM_DW6_MSRASTMODE_OFF_PIXEL == 0 &&
- GEN6_WM_DW6_MSDISPMODE_PERSAMPLE == 0);
- dw6 = GEN6_WM_DW6_ZW_INTERP_PIXEL;
-
- if (state->bottom_edge_rule)
- dw6 |= GEN6_WM_DW6_POINT_RASTRULE_UPPER_RIGHT;
-
- wm->dw_msaa_rast =
- (state->multisample) ? GEN6_WM_DW6_MSRASTMODE_ON_PATTERN : 0;
- wm->dw_msaa_disp = GEN6_WM_DW6_MSDISPMODE_PERPIXEL;
-
- STATIC_ASSERT(Elements(wm->payload) >= 2);
- wm->payload[0] = dw5;
- wm->payload[1] = dw6;
-}
-
-static void
-rasterizer_init_wm_gen7(const struct ilo_dev *dev,
- const struct pipe_rasterizer_state *state,
- struct ilo_rasterizer_wm *wm)
-{
- uint32_t dw1, dw2;
-
- ILO_DEV_ASSERT(dev, 7, 7.5);
-
- /*
- * assertion that makes sure
- *
- * dw1 |= wm->dw_msaa_rast;
- * dw2 |= wm->dw_msaa_disp;
- *
- * is valid
- */
- STATIC_ASSERT(GEN7_WM_DW1_MSRASTMODE_OFF_PIXEL == 0 &&
- GEN7_WM_DW2_MSDISPMODE_PERSAMPLE == 0);
- dw1 = GEN7_WM_DW1_ZW_INTERP_PIXEL |
- GEN7_WM_DW1_AA_LINE_WIDTH_2_0;
- dw2 = 0;
-
- /* same value as in 3DSTATE_SF */
- if (state->line_smooth)
- dw1 |= GEN7_WM_DW1_AA_LINE_CAP_1_0;
-
- if (state->poly_stipple_enable)
- dw1 |= GEN7_WM_DW1_POLY_STIPPLE_ENABLE;
- if (state->line_stipple_enable)
- dw1 |= GEN7_WM_DW1_LINE_STIPPLE_ENABLE;
-
- if (state->bottom_edge_rule)
- dw1 |= GEN7_WM_DW1_POINT_RASTRULE_UPPER_RIGHT;
-
- wm->dw_msaa_rast =
- (state->multisample) ? GEN7_WM_DW1_MSRASTMODE_ON_PATTERN : 0;
- wm->dw_msaa_disp = GEN7_WM_DW2_MSDISPMODE_PERPIXEL;
-
- STATIC_ASSERT(Elements(wm->payload) >= 2);
- wm->payload[0] = dw1;
- wm->payload[1] = dw2;
-}
-
-static uint32_t
-rasterizer_get_wm_gen8(const struct ilo_dev *dev,
- const struct pipe_rasterizer_state *state)
-{
- uint32_t dw;
-
- ILO_DEV_ASSERT(dev, 8, 8);
-
- dw = GEN7_WM_DW1_ZW_INTERP_PIXEL |
- GEN7_WM_DW1_AA_LINE_WIDTH_2_0;
-
- /* same value as in 3DSTATE_SF */
- if (state->line_smooth)
- dw |= GEN7_WM_DW1_AA_LINE_CAP_1_0;
-
- if (state->poly_stipple_enable)
- dw |= GEN7_WM_DW1_POLY_STIPPLE_ENABLE;
- if (state->line_stipple_enable)
- dw |= GEN7_WM_DW1_LINE_STIPPLE_ENABLE;
-
- if (state->bottom_edge_rule)
- dw |= GEN7_WM_DW1_POINT_RASTRULE_UPPER_RIGHT;
-
- return dw;
-}
-
-void
-ilo_gpe_init_rasterizer(const struct ilo_dev *dev,
- const struct pipe_rasterizer_state *state,
- struct ilo_rasterizer_state *rasterizer)
-{
- rasterizer_init_clip(dev, state, &rasterizer->clip);
-
- if (ilo_dev_gen(dev) >= ILO_GEN(8)) {
- memset(&rasterizer->wm, 0, sizeof(rasterizer->wm));
- rasterizer->wm.payload[0] = rasterizer_get_wm_gen8(dev, state);
-
- rasterizer_init_sf_gen8(dev, state, &rasterizer->sf);
- } else if (ilo_dev_gen(dev) >= ILO_GEN(7)) {
- rasterizer_init_wm_gen7(dev, state, &rasterizer->wm);
- rasterizer_init_sf_gen6(dev, state, &rasterizer->sf);
- } else {
- rasterizer_init_wm_gen6(dev, state, &rasterizer->wm);
- rasterizer_init_sf_gen6(dev, state, &rasterizer->sf);
- }
-}
-
-static void
-fs_init_cso_gen6(const struct ilo_dev *dev,
- const struct ilo_shader_state *fs,
- struct ilo_shader_cso *cso)
-{
- int start_grf, input_count, sampler_count, interps, max_threads;
- uint32_t dw2, dw4, dw5, dw6;
-
- ILO_DEV_ASSERT(dev, 6, 6);
-
- start_grf = ilo_shader_get_kernel_param(fs, ILO_KERNEL_URB_DATA_START_REG);
- input_count = ilo_shader_get_kernel_param(fs, ILO_KERNEL_INPUT_COUNT);
- sampler_count = ilo_shader_get_kernel_param(fs, ILO_KERNEL_SAMPLER_COUNT);
- interps = ilo_shader_get_kernel_param(fs,
- ILO_KERNEL_FS_BARYCENTRIC_INTERPOLATIONS);
-
- /* see brwCreateContext() */
- max_threads = (dev->gt == 2) ? 80 : 40;
-
- dw2 = (true) ? 0 : GEN6_THREADDISP_FP_MODE_ALT;
- dw2 |= ((sampler_count + 3) / 4) << GEN6_THREADDISP_SAMPLER_COUNT__SHIFT;
-
- dw4 = start_grf << GEN6_WM_DW4_URB_GRF_START0__SHIFT |
- 0 << GEN6_WM_DW4_URB_GRF_START1__SHIFT |
- 0 << GEN6_WM_DW4_URB_GRF_START2__SHIFT;
-
- dw5 = (max_threads - 1) << GEN6_WM_DW5_MAX_THREADS__SHIFT;
-
- /*
- * From the Sandy Bridge PRM, volume 2 part 1, page 275:
- *
- * "This bit (Pixel Shader Kill Pixel), if ENABLED, indicates that the
- * PS kernel or color calculator has the ability to kill (discard)
- * pixels or samples, other than due to depth or stencil testing.
- * This bit is required to be ENABLED in the following situations:
- *
- * The API pixel shader program contains "killpix" or "discard"
- * instructions, or other code in the pixel shader kernel that can
- * cause the final pixel mask to differ from the pixel mask received
- * on dispatch.
- *
- * A sampler with chroma key enabled with kill pixel mode is used by
- * the pixel shader.
- *
- * Any render target has Alpha Test Enable or AlphaToCoverage Enable
- * enabled.
- *
- * The pixel shader kernel generates and outputs oMask.
- *
- * Note: As ClipDistance clipping is fully supported in hardware and
- * therefore not via PS instructions, there should be no need to
- * ENABLE this bit due to ClipDistance clipping."
- */
- if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_USE_KILL))
- dw5 |= GEN6_WM_DW5_PS_KILL_PIXEL;
-
- /*
- * From the Sandy Bridge PRM, volume 2 part 1, page 275:
- *
- * "If a NULL Depth Buffer is selected, the Pixel Shader Computed Depth
- * field must be set to disabled."
- *
- * TODO This is not checked yet.
- */
- if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_OUTPUT_Z))
- dw5 |= GEN6_WM_DW5_PS_COMPUTE_DEPTH;
-
- if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_INPUT_Z))
- dw5 |= GEN6_WM_DW5_PS_USE_DEPTH;
-
- if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_INPUT_W))
- dw5 |= GEN6_WM_DW5_PS_USE_W;
-
- /*
- * TODO set this bit only when
- *
- * a) fs writes colors and color is not masked, or
- * b) fs writes depth, or
- * c) fs or cc kills
- */
- if (true)
- dw5 |= GEN6_WM_DW5_PS_DISPATCH_ENABLE;
-
- assert(!ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_DISPATCH_16_OFFSET));
- dw5 |= GEN6_PS_DISPATCH_8 << GEN6_WM_DW5_PS_DISPATCH_MODE__SHIFT;
-
- dw6 = input_count << GEN6_WM_DW6_SF_ATTR_COUNT__SHIFT |
- GEN6_WM_DW6_PS_POSOFFSET_NONE |
- interps << GEN6_WM_DW6_BARYCENTRIC_INTERP__SHIFT;
-
- STATIC_ASSERT(Elements(cso->payload) >= 4);
- cso->payload[0] = dw2;
- cso->payload[1] = dw4;
- cso->payload[2] = dw5;
- cso->payload[3] = dw6;
-}
-
-static uint32_t
-fs_get_wm_gen7(const struct ilo_dev *dev,
- const struct ilo_shader_state *fs)
-{
- uint32_t dw;
-
- ILO_DEV_ASSERT(dev, 7, 7.5);
-
- dw = ilo_shader_get_kernel_param(fs,
- ILO_KERNEL_FS_BARYCENTRIC_INTERPOLATIONS) <<
- GEN7_WM_DW1_BARYCENTRIC_INTERP__SHIFT;
-
- /*
- * TODO set this bit only when
- *
- * a) fs writes colors and color is not masked, or
- * b) fs writes depth, or
- * c) fs or cc kills
- */
- dw |= GEN7_WM_DW1_PS_DISPATCH_ENABLE;
-
- /*
- * From the Ivy Bridge PRM, volume 2 part 1, page 278:
- *
- * "This bit (Pixel Shader Kill Pixel), if ENABLED, indicates that
- * the PS kernel or color calculator has the ability to kill
- * (discard) pixels or samples, other than due to depth or stencil
- * testing. This bit is required to be ENABLED in the following
- * situations:
- *
- * - The API pixel shader program contains "killpix" or "discard"
- * instructions, or other code in the pixel shader kernel that
- * can cause the final pixel mask to differ from the pixel mask
- * received on dispatch.
- *
- * - A sampler with chroma key enabled with kill pixel mode is used
- * by the pixel shader.
- *
- * - Any render target has Alpha Test Enable or AlphaToCoverage
- * Enable enabled.
- *
- * - The pixel shader kernel generates and outputs oMask.
- *
- * Note: As ClipDistance clipping is fully supported in hardware
- * and therefore not via PS instructions, there should be no need
- * to ENABLE this bit due to ClipDistance clipping."
- */
- if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_USE_KILL))
- dw |= GEN7_WM_DW1_PS_KILL_PIXEL;
-
- if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_OUTPUT_Z))
- dw |= GEN7_WM_DW1_PSCDEPTH_ON;
-
- if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_INPUT_Z))
- dw |= GEN7_WM_DW1_PS_USE_DEPTH;
-
- if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_INPUT_W))
- dw |= GEN7_WM_DW1_PS_USE_W;
-
- return dw;
-}
-
-static void
-fs_init_cso_gen7(const struct ilo_dev *dev,
- const struct ilo_shader_state *fs,
- struct ilo_shader_cso *cso)
-{
- int start_grf, sampler_count, max_threads;
- uint32_t dw2, dw4, dw5;
-
- ILO_DEV_ASSERT(dev, 7, 7.5);
-
- start_grf = ilo_shader_get_kernel_param(fs, ILO_KERNEL_URB_DATA_START_REG);
- sampler_count = ilo_shader_get_kernel_param(fs, ILO_KERNEL_SAMPLER_COUNT);
-
- dw2 = (true) ? 0 : GEN6_THREADDISP_FP_MODE_ALT;
- dw2 |= ((sampler_count + 3) / 4) << GEN6_THREADDISP_SAMPLER_COUNT__SHIFT;
-
- dw4 = GEN7_PS_DW4_POSOFFSET_NONE;
-
- /* see brwCreateContext() */
- switch (ilo_dev_gen(dev)) {
- case ILO_GEN(7.5):
- max_threads = (dev->gt == 3) ? 408 : (dev->gt == 2) ? 204 : 102;
- dw4 |= (max_threads - 1) << GEN75_PS_DW4_MAX_THREADS__SHIFT;
- dw4 |= 1 << GEN75_PS_DW4_SAMPLE_MASK__SHIFT;
- break;
- case ILO_GEN(7):
- default:
- max_threads = (dev->gt == 2) ? 172 : 48;
- dw4 |= (max_threads - 1) << GEN7_PS_DW4_MAX_THREADS__SHIFT;
- break;
- }
-
- if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_PCB_CBUF0_SIZE))
- dw4 |= GEN7_PS_DW4_PUSH_CONSTANT_ENABLE;
-
- if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_INPUT_COUNT))
- dw4 |= GEN7_PS_DW4_ATTR_ENABLE;
-
- assert(!ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_DISPATCH_16_OFFSET));
- dw4 |= GEN6_PS_DISPATCH_8 << GEN7_PS_DW4_DISPATCH_MODE__SHIFT;
-
- dw5 = start_grf << GEN7_PS_DW5_URB_GRF_START0__SHIFT |
- 0 << GEN7_PS_DW5_URB_GRF_START1__SHIFT |
- 0 << GEN7_PS_DW5_URB_GRF_START2__SHIFT;
-
- STATIC_ASSERT(Elements(cso->payload) >= 4);
- cso->payload[0] = dw2;
- cso->payload[1] = dw4;
- cso->payload[2] = dw5;
- cso->payload[3] = fs_get_wm_gen7(dev, fs);
-}
-
-static uint32_t
-fs_get_psx_gen8(const struct ilo_dev *dev,
- const struct ilo_shader_state *fs)
-{
- uint32_t dw;
-
- ILO_DEV_ASSERT(dev, 8, 8);
-
- dw = GEN8_PSX_DW1_DISPATCH_ENABLE;
-
- if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_USE_KILL))
- dw |= GEN8_PSX_DW1_KILL_PIXEL;
- if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_OUTPUT_Z))
- dw |= GEN8_PSX_DW1_PSCDEPTH_ON;
- if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_INPUT_Z))
- dw |= GEN8_PSX_DW1_USE_DEPTH;
- if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_INPUT_W))
- dw |= GEN8_PSX_DW1_USE_W;
- if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_INPUT_COUNT))
- dw |= GEN8_PSX_DW1_ATTR_ENABLE;
-
- return dw;
-}
-
-static uint32_t
-fs_get_wm_gen8(const struct ilo_dev *dev,
- const struct ilo_shader_state *fs)
-{
- ILO_DEV_ASSERT(dev, 8, 8);
-
- return ilo_shader_get_kernel_param(fs,
- ILO_KERNEL_FS_BARYCENTRIC_INTERPOLATIONS) <<
- GEN7_WM_DW1_BARYCENTRIC_INTERP__SHIFT;
-}
-
-static void
-fs_init_cso_gen8(const struct ilo_dev *dev,
- const struct ilo_shader_state *fs,
- struct ilo_shader_cso *cso)
-{
- int start_grf, sampler_count;
- uint32_t dw3, dw6, dw7;
-
- ILO_DEV_ASSERT(dev, 8, 8);
-
- start_grf = ilo_shader_get_kernel_param(fs, ILO_KERNEL_URB_DATA_START_REG);
- sampler_count = ilo_shader_get_kernel_param(fs, ILO_KERNEL_SAMPLER_COUNT);
-
- dw3 = (true) ? 0 : GEN6_THREADDISP_FP_MODE_ALT;
- dw3 |= ((sampler_count + 3) / 4) << GEN6_THREADDISP_SAMPLER_COUNT__SHIFT;
-
- /* always 64? */
- dw6 = (64 - 2) << GEN8_PS_DW6_MAX_THREADS__SHIFT |
- GEN8_PS_DW6_POSOFFSET_NONE;
- if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_PCB_CBUF0_SIZE))
- dw6 |= GEN8_PS_DW6_PUSH_CONSTANT_ENABLE;
-
- assert(!ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_DISPATCH_16_OFFSET));
- dw6 |= GEN6_PS_DISPATCH_8 << GEN8_PS_DW6_DISPATCH_MODE__SHIFT;
-
- dw7 = start_grf << GEN8_PS_DW7_URB_GRF_START0__SHIFT |
- 0 << GEN8_PS_DW7_URB_GRF_START1__SHIFT |
- 0 << GEN8_PS_DW7_URB_GRF_START2__SHIFT;
-
- STATIC_ASSERT(Elements(cso->payload) >= 5);
- cso->payload[0] = dw3;
- cso->payload[1] = dw6;
- cso->payload[2] = dw7;
- cso->payload[3] = fs_get_psx_gen8(dev, fs);
- cso->payload[4] = fs_get_wm_gen8(dev, fs);
-}
-
-void
-ilo_gpe_init_fs_cso(const struct ilo_dev *dev,
- const struct ilo_shader_state *fs,
- struct ilo_shader_cso *cso)
-{
- if (ilo_dev_gen(dev) >= ILO_GEN(8))
- fs_init_cso_gen8(dev, fs, cso);
- else if (ilo_dev_gen(dev) >= ILO_GEN(7))
- fs_init_cso_gen7(dev, fs, cso);
- else
- fs_init_cso_gen6(dev, fs, cso);
-}
-
-struct ilo_zs_surface_info {
- int surface_type;
- int format;
-
- struct {
- struct intel_bo *bo;
- unsigned stride;
- unsigned qpitch;
- enum gen_surface_tiling tiling;
- uint32_t offset;
- } zs, stencil, hiz;
-
- unsigned width, height, depth;
- unsigned lod, first_layer, num_layers;
-};
-
-static void
-zs_init_info_null(const struct ilo_dev *dev,
- struct ilo_zs_surface_info *info)
-{
- ILO_DEV_ASSERT(dev, 6, 8);
-
- memset(info, 0, sizeof(*info));
-
- info->surface_type = GEN6_SURFTYPE_NULL;
- info->format = GEN6_ZFORMAT_D32_FLOAT;
- info->width = 1;
- info->height = 1;
- info->depth = 1;
- info->num_layers = 1;
-}
-
-static void
-zs_init_info(const struct ilo_dev *dev,
- const struct ilo_image *img,
- const struct ilo_image *s8_img,
- enum pipe_texture_target target,
- enum pipe_format format, unsigned level,
- unsigned first_layer, unsigned num_layers,
- struct ilo_zs_surface_info *info)
-{
- bool separate_stencil;
-
- ILO_DEV_ASSERT(dev, 6, 8);
-
- memset(info, 0, sizeof(*info));
-
- info->surface_type = ilo_gpe_gen6_translate_texture(target);
-
- if (info->surface_type == GEN6_SURFTYPE_CUBE) {
- /*
- * From the Sandy Bridge PRM, volume 2 part 1, page 325-326:
- *
- * "For Other Surfaces (Cube Surfaces):
- * This field (Minimum Array Element) is ignored."
- *
- * "For Other Surfaces (Cube Surfaces):
- * This field (Render Target View Extent) is ignored."
- *
- * As such, we cannot set first_layer and num_layers on cube surfaces.
- * To work around that, treat it as a 2D surface.
- */
- info->surface_type = GEN6_SURFTYPE_2D;
- }
-
- if (ilo_dev_gen(dev) >= ILO_GEN(7)) {
- separate_stencil = true;
- } else {
- /*
- * From the Sandy Bridge PRM, volume 2 part 1, page 317:
- *
- * "This field (Separate Stencil Buffer Enable) must be set to the
- * same value (enabled or disabled) as Hierarchical Depth Buffer
- * Enable."
- */
- separate_stencil = ilo_image_can_enable_aux(img, level);
- }
-
- /*
- * From the Sandy Bridge PRM, volume 2 part 1, page 317:
- *
- * "If this field (Hierarchical Depth Buffer Enable) is enabled, the
- * Surface Format of the depth buffer cannot be
- * D32_FLOAT_S8X24_UINT or D24_UNORM_S8_UINT. Use of stencil
- * requires the separate stencil buffer."
- *
- * From the Ironlake PRM, volume 2 part 1, page 330:
- *
- * "If this field (Separate Stencil Buffer Enable) is disabled, the
- * Surface Format of the depth buffer cannot be D24_UNORM_X8_UINT."
- *
- * There is no similar restriction for GEN6. But when D24_UNORM_X8_UINT
- * is indeed used, the depth values output by the fragment shaders will
- * be different when read back.
- *
- * As for GEN7+, separate_stencil is always true.
- */
- switch (format) {
- case PIPE_FORMAT_Z16_UNORM:
- info->format = GEN6_ZFORMAT_D16_UNORM;
- break;
- case PIPE_FORMAT_Z32_FLOAT:
- info->format = GEN6_ZFORMAT_D32_FLOAT;
- break;
- case PIPE_FORMAT_Z24X8_UNORM:
- case PIPE_FORMAT_Z24_UNORM_S8_UINT:
- info->format = (separate_stencil) ?
- GEN6_ZFORMAT_D24_UNORM_X8_UINT :
- GEN6_ZFORMAT_D24_UNORM_S8_UINT;
- break;
- case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
- info->format = (separate_stencil) ?
- GEN6_ZFORMAT_D32_FLOAT :
- GEN6_ZFORMAT_D32_FLOAT_S8X24_UINT;
- break;
- case PIPE_FORMAT_S8_UINT:
- if (separate_stencil) {
- info->format = GEN6_ZFORMAT_D32_FLOAT;
- break;
- }
- /* fall through */
- default:
- assert(!"unsupported depth/stencil format");
- zs_init_info_null(dev, info);
- return;
- break;
- }
-
- if (format != PIPE_FORMAT_S8_UINT) {
- info->zs.bo = img->bo;
- info->zs.stride = img->bo_stride;
-
- assert(img->walk_layer_height % 4 == 0);
- info->zs.qpitch = img->walk_layer_height / 4;
-
- info->zs.tiling = img->tiling;
- info->zs.offset = 0;
- }
-
- if (s8_img || format == PIPE_FORMAT_S8_UINT) {
- info->stencil.bo = s8_img->bo;
-
- /*
- * From the Sandy Bridge PRM, volume 2 part 1, page 329:
- *
- * "The pitch must be set to 2x the value computed based on width,
- * as the stencil buffer is stored with two rows interleaved."
- *
- * For GEN7, we still dobule the stride because we did not double the
- * slice widths when initializing the layout.
- */
- info->stencil.stride = s8_img->bo_stride * 2;
-
- assert(s8_img->walk_layer_height % 4 == 0);
- info->stencil.qpitch = s8_img->walk_layer_height / 4;
-
- info->stencil.tiling = s8_img->tiling;
-
- if (ilo_dev_gen(dev) == ILO_GEN(6)) {
- unsigned x, y;
-
- assert(s8_img->walk == ILO_IMAGE_WALK_LOD);
-
- /* offset to the level */
- ilo_image_get_slice_pos(s8_img, level, 0, &x, &y);
- ilo_image_pos_to_mem(s8_img, x, y, &x, &y);
- info->stencil.offset = ilo_image_mem_to_raw(s8_img, x, y);
- }
- }
-
- if (ilo_image_can_enable_aux(img, level)) {
- info->hiz.bo = img->aux.bo;
- info->hiz.stride = img->aux.bo_stride;
-
- assert(img->aux.walk_layer_height % 4 == 0);
- info->hiz.qpitch = img->aux.walk_layer_height / 4;
-
- info->hiz.tiling = GEN6_TILING_Y;
-
- /* offset to the level */
- if (ilo_dev_gen(dev) == ILO_GEN(6))
- info->hiz.offset = img->aux.walk_lod_offsets[level];
- }
-
- info->width = img->width0;
- info->height = img->height0;
- info->depth = (target == PIPE_TEXTURE_3D) ? img->depth0 : num_layers;
-
- info->lod = level;
- info->first_layer = first_layer;
- info->num_layers = num_layers;
-}
-
-void
-ilo_gpe_init_zs_surface(const struct ilo_dev *dev,
- const struct ilo_image *img,
- const struct ilo_image *s8_img,
- enum pipe_texture_target target,
- enum pipe_format format, unsigned level,
- unsigned first_layer, unsigned num_layers,
- struct ilo_zs_surface *zs)
-{
- const int max_2d_size = (ilo_dev_gen(dev) >= ILO_GEN(7)) ? 16384 : 8192;
- const int max_array_size = (ilo_dev_gen(dev) >= ILO_GEN(7)) ? 2048 : 512;
- struct ilo_zs_surface_info info;
- uint32_t dw1, dw2, dw3, dw4, dw5, dw6;
- int align_w = 8, align_h = 4;
-
- ILO_DEV_ASSERT(dev, 6, 8);
-
- if (img) {
- zs_init_info(dev, img, s8_img, target, format,
- level, first_layer, num_layers, &info);
-
- switch (img->sample_count) {
- case 2:
- align_w /= 2;
- break;
- case 4:
- align_w /= 2;
- align_h /= 2;
- break;
- case 8:
- align_w /= 4;
- align_h /= 2;
- break;
- case 16:
- align_w /= 4;
- align_h /= 4;
- break;
- default:
- break;
- }
- } else {
- zs_init_info_null(dev, &info);
- }
-
- switch (info.surface_type) {
- case GEN6_SURFTYPE_NULL:
- break;
- case GEN6_SURFTYPE_1D:
- assert(info.width <= max_2d_size && info.height == 1 &&
- info.depth <= max_array_size);
- assert(info.first_layer < max_array_size - 1 &&
- info.num_layers <= max_array_size);
- break;
- case GEN6_SURFTYPE_2D:
- assert(info.width <= max_2d_size && info.height <= max_2d_size &&
- info.depth <= max_array_size);
- assert(info.first_layer < max_array_size - 1 &&
- info.num_layers <= max_array_size);
- break;
- case GEN6_SURFTYPE_3D:
- assert(info.width <= 2048 && info.height <= 2048 && info.depth <= 2048);
- assert(info.first_layer < 2048 && info.num_layers <= max_array_size);
- break;
- case GEN6_SURFTYPE_CUBE:
- assert(info.width <= max_2d_size && info.height <= max_2d_size &&
- info.depth == 1);
- assert(info.first_layer == 0 && info.num_layers == 1);
- assert(info.width == info.height);
- break;
- default:
- assert(!"unexpected depth surface type");
- break;
- }
-
- dw1 = info.surface_type << GEN6_DEPTH_DW1_TYPE__SHIFT |
- info.format << GEN6_DEPTH_DW1_FORMAT__SHIFT;
-
- if (info.zs.bo) {
- /* required for GEN6+ */
- assert(info.zs.tiling == GEN6_TILING_Y);
- assert(info.zs.stride > 0 && info.zs.stride < 128 * 1024 &&
- info.zs.stride % 128 == 0);
- assert(info.width <= info.zs.stride);
-
- dw1 |= (info.zs.stride - 1);
- dw2 = info.zs.offset;
- } else {
- dw2 = 0;
- }
-
- if (ilo_dev_gen(dev) >= ILO_GEN(7)) {
- if (info.zs.bo)
- dw1 |= GEN7_DEPTH_DW1_DEPTH_WRITE_ENABLE;
-
- if (info.stencil.bo)
- dw1 |= GEN7_DEPTH_DW1_STENCIL_WRITE_ENABLE;
-
- if (info.hiz.bo)
- dw1 |= GEN7_DEPTH_DW1_HIZ_ENABLE;
-
- dw3 = (info.height - 1) << GEN7_DEPTH_DW3_HEIGHT__SHIFT |
- (info.width - 1) << GEN7_DEPTH_DW3_WIDTH__SHIFT |
- info.lod << GEN7_DEPTH_DW3_LOD__SHIFT;
-
- zs->dw_aligned_8x4 =
- (align(info.height, align_h) - 1) << GEN7_DEPTH_DW3_HEIGHT__SHIFT |
- (align(info.width, align_w) - 1) << GEN7_DEPTH_DW3_WIDTH__SHIFT |
- info.lod << GEN7_DEPTH_DW3_LOD__SHIFT;
-
- dw4 = (info.depth - 1) << GEN7_DEPTH_DW4_DEPTH__SHIFT |
- info.first_layer << GEN7_DEPTH_DW4_MIN_ARRAY_ELEMENT__SHIFT;
-
- dw5 = 0;
-
- dw6 = (info.num_layers - 1) << GEN7_DEPTH_DW6_RT_VIEW_EXTENT__SHIFT;
-
- if (ilo_dev_gen(dev) >= ILO_GEN(8))
- dw6 |= info.zs.qpitch;
- } else {
- /* always Y-tiled */
- dw1 |= GEN6_TILING_Y << GEN6_DEPTH_DW1_TILING__SHIFT;
-
- if (info.hiz.bo) {
- dw1 |= GEN6_DEPTH_DW1_HIZ_ENABLE |
- GEN6_DEPTH_DW1_SEPARATE_STENCIL;
- }
-
- dw3 = (info.height - 1) << GEN6_DEPTH_DW3_HEIGHT__SHIFT |
- (info.width - 1) << GEN6_DEPTH_DW3_WIDTH__SHIFT |
- info.lod << GEN6_DEPTH_DW3_LOD__SHIFT |
- GEN6_DEPTH_DW3_MIPLAYOUT_BELOW;
-
- zs->dw_aligned_8x4 =
- (align(info.height, align_h) - 1) << GEN6_DEPTH_DW3_HEIGHT__SHIFT |
- (align(info.width, align_w) - 1) << GEN6_DEPTH_DW3_WIDTH__SHIFT |
- info.lod << GEN6_DEPTH_DW3_LOD__SHIFT |
- GEN6_DEPTH_DW3_MIPLAYOUT_BELOW;
-
- dw4 = (info.depth - 1) << GEN6_DEPTH_DW4_DEPTH__SHIFT |
- info.first_layer << GEN6_DEPTH_DW4_MIN_ARRAY_ELEMENT__SHIFT |
- (info.num_layers - 1) << GEN6_DEPTH_DW4_RT_VIEW_EXTENT__SHIFT;
-
- dw5 = 0;
-
- dw6 = 0;
- }
-
- STATIC_ASSERT(Elements(zs->payload) >= 12);
-
- zs->payload[0] = dw1;
- zs->payload[1] = dw2;
- zs->payload[2] = dw3;
- zs->payload[3] = dw4;
- zs->payload[4] = dw5;
- zs->payload[5] = dw6;
-
- /* do not increment reference count */
- zs->bo = info.zs.bo;
-
- /* separate stencil */
- if (info.stencil.bo) {
- assert(info.stencil.stride > 0 && info.stencil.stride < 128 * 1024 &&
- info.stencil.stride % 128 == 0);
-
- dw1 = (info.stencil.stride - 1) << GEN6_STENCIL_DW1_PITCH__SHIFT;
- if (ilo_dev_gen(dev) >= ILO_GEN(7.5))
- dw1 |= GEN75_STENCIL_DW1_STENCIL_BUFFER_ENABLE;
-
- dw2 = info.stencil.offset;
- dw4 = info.stencil.qpitch;
- } else {
- dw1 = 0;
- dw2 = 0;
- dw4 = 0;
- }
-
- zs->payload[6] = dw1;
- zs->payload[7] = dw2;
- zs->payload[8] = dw4;
- /* do not increment reference count */
- zs->separate_s8_bo = info.stencil.bo;
-
- /* hiz */
- if (info.hiz.bo) {
- dw1 = (info.hiz.stride - 1) << GEN6_HIZ_DW1_PITCH__SHIFT;
- dw2 = info.hiz.offset;
- dw4 = info.hiz.qpitch;
- } else {
- dw1 = 0;
- dw2 = 0;
- dw4 = 0;
- }
-
- zs->payload[9] = dw1;
- zs->payload[10] = dw2;
- zs->payload[11] = dw4;
- /* do not increment reference count */
- zs->hiz_bo = info.hiz.bo;
-}
-
-static void
-viewport_get_guardband(const struct ilo_dev *dev,
- int center_x, int center_y,
- int *min_gbx, int *max_gbx,
- int *min_gby, int *max_gby)
-{
- /*
- * From the Sandy Bridge PRM, volume 2 part 1, page 234:
- *
- * "Per-Device Guardband Extents
- *
- * - Supported X,Y ScreenSpace "Guardband" Extent: [-16K,16K-1]
- * - Maximum Post-Clamp Delta (X or Y): 16K"
- *
- * "In addition, in order to be correctly rendered, objects must have a
- * screenspace bounding box not exceeding 8K in the X or Y direction.
- * This additional restriction must also be comprehended by software,
- * i.e., enforced by use of clipping."
- *
- * From the Ivy Bridge PRM, volume 2 part 1, page 248:
- *
- * "Per-Device Guardband Extents
- *
- * - Supported X,Y ScreenSpace "Guardband" Extent: [-32K,32K-1]
- * - Maximum Post-Clamp Delta (X or Y): N/A"
- *
- * "In addition, in order to be correctly rendered, objects must have a
- * screenspace bounding box not exceeding 8K in the X or Y direction.
- * This additional restriction must also be comprehended by software,
- * i.e., enforced by use of clipping."
- *
- * Combined, the bounding box of any object can not exceed 8K in both
- * width and height.
- *
- * Below we set the guardband as a squre of length 8K, centered at where
- * the viewport is. This makes sure all objects passing the GB test are
- * valid to the renderer, and those failing the XY clipping have a
- * better chance of passing the GB test.
- */
- const int max_extent = (ilo_dev_gen(dev) >= ILO_GEN(7)) ? 32768 : 16384;
- const int half_len = 8192 / 2;
-
- /* make sure the guardband is within the valid range */
- if (center_x - half_len < -max_extent)
- center_x = -max_extent + half_len;
- else if (center_x + half_len > max_extent - 1)
- center_x = max_extent - half_len;
-
- if (center_y - half_len < -max_extent)
- center_y = -max_extent + half_len;
- else if (center_y + half_len > max_extent - 1)
- center_y = max_extent - half_len;
-
- *min_gbx = (float) (center_x - half_len);
- *max_gbx = (float) (center_x + half_len);
- *min_gby = (float) (center_y - half_len);
- *max_gby = (float) (center_y + half_len);
-}
-
-void
-ilo_gpe_set_viewport_cso(const struct ilo_dev *dev,
- const struct pipe_viewport_state *state,
- struct ilo_viewport_cso *vp)
-{
- const float scale_x = fabs(state->scale[0]);
- const float scale_y = fabs(state->scale[1]);
- const float scale_z = fabs(state->scale[2]);
- int min_gbx, max_gbx, min_gby, max_gby;
-
- ILO_DEV_ASSERT(dev, 6, 8);
-
- viewport_get_guardband(dev,
- (int) state->translate[0],
- (int) state->translate[1],
- &min_gbx, &max_gbx, &min_gby, &max_gby);
-
- /* matrix form */
- vp->m00 = state->scale[0];
- vp->m11 = state->scale[1];
- vp->m22 = state->scale[2];
- vp->m30 = state->translate[0];
- vp->m31 = state->translate[1];
- vp->m32 = state->translate[2];
-
- /* guardband in NDC space */
- vp->min_gbx = ((float) min_gbx - state->translate[0]) / scale_x;
- vp->max_gbx = ((float) max_gbx - state->translate[0]) / scale_x;
- vp->min_gby = ((float) min_gby - state->translate[1]) / scale_y;
- vp->max_gby = ((float) max_gby - state->translate[1]) / scale_y;
-
- /* viewport in screen space */
- vp->min_x = scale_x * -1.0f + state->translate[0];
- vp->max_x = scale_x * 1.0f + state->translate[0];
- vp->min_y = scale_y * -1.0f + state->translate[1];
- vp->max_y = scale_y * 1.0f + state->translate[1];
- vp->min_z = scale_z * -1.0f + state->translate[2];
- vp->max_z = scale_z * 1.0f + state->translate[2];
-}
-
-/**
- * Translate a pipe logicop to the matching hardware logicop.
- */
-static int
-gen6_translate_pipe_logicop(unsigned logicop)
-{
- switch (logicop) {
- case PIPE_LOGICOP_CLEAR: return GEN6_LOGICOP_CLEAR;
- case PIPE_LOGICOP_NOR: return GEN6_LOGICOP_NOR;
- case PIPE_LOGICOP_AND_INVERTED: return GEN6_LOGICOP_AND_INVERTED;
- case PIPE_LOGICOP_COPY_INVERTED: return GEN6_LOGICOP_COPY_INVERTED;
- case PIPE_LOGICOP_AND_REVERSE: return GEN6_LOGICOP_AND_REVERSE;
- case PIPE_LOGICOP_INVERT: return GEN6_LOGICOP_INVERT;
- case PIPE_LOGICOP_XOR: return GEN6_LOGICOP_XOR;
- case PIPE_LOGICOP_NAND: return GEN6_LOGICOP_NAND;
- case PIPE_LOGICOP_AND: return GEN6_LOGICOP_AND;
- case PIPE_LOGICOP_EQUIV: return GEN6_LOGICOP_EQUIV;
- case PIPE_LOGICOP_NOOP: return GEN6_LOGICOP_NOOP;
- case PIPE_LOGICOP_OR_INVERTED: return GEN6_LOGICOP_OR_INVERTED;
- case PIPE_LOGICOP_COPY: return GEN6_LOGICOP_COPY;
- case PIPE_LOGICOP_OR_REVERSE: return GEN6_LOGICOP_OR_REVERSE;
- case PIPE_LOGICOP_OR: return GEN6_LOGICOP_OR;
- case PIPE_LOGICOP_SET: return GEN6_LOGICOP_SET;
- default:
- assert(!"unknown logicop function");
- return GEN6_LOGICOP_CLEAR;
- }
-}
-
-/**
- * Translate a pipe blend function to the matching hardware blend function.
- */
-static int
-gen6_translate_pipe_blend(unsigned blend)
-{
- switch (blend) {
- case PIPE_BLEND_ADD: return GEN6_BLENDFUNCTION_ADD;
- case PIPE_BLEND_SUBTRACT: return GEN6_BLENDFUNCTION_SUBTRACT;
- case PIPE_BLEND_REVERSE_SUBTRACT: return GEN6_BLENDFUNCTION_REVERSE_SUBTRACT;
- case PIPE_BLEND_MIN: return GEN6_BLENDFUNCTION_MIN;
- case PIPE_BLEND_MAX: return GEN6_BLENDFUNCTION_MAX;
- default:
- assert(!"unknown blend function");
- return GEN6_BLENDFUNCTION_ADD;
- };
-}
-
-/**
- * Translate a pipe blend factor to the matching hardware blend factor.
- */
-static int
-gen6_translate_pipe_blendfactor(unsigned blendfactor)
-{
- switch (blendfactor) {
- case PIPE_BLENDFACTOR_ONE: return GEN6_BLENDFACTOR_ONE;
- case PIPE_BLENDFACTOR_SRC_COLOR: return GEN6_BLENDFACTOR_SRC_COLOR;
- case PIPE_BLENDFACTOR_SRC_ALPHA: return GEN6_BLENDFACTOR_SRC_ALPHA;
- case PIPE_BLENDFACTOR_DST_ALPHA: return GEN6_BLENDFACTOR_DST_ALPHA;
- case PIPE_BLENDFACTOR_DST_COLOR: return GEN6_BLENDFACTOR_DST_COLOR;
- case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: return GEN6_BLENDFACTOR_SRC_ALPHA_SATURATE;
- case PIPE_BLENDFACTOR_CONST_COLOR: return GEN6_BLENDFACTOR_CONST_COLOR;
- case PIPE_BLENDFACTOR_CONST_ALPHA: return GEN6_BLENDFACTOR_CONST_ALPHA;
- case PIPE_BLENDFACTOR_SRC1_COLOR: return GEN6_BLENDFACTOR_SRC1_COLOR;
- case PIPE_BLENDFACTOR_SRC1_ALPHA: return GEN6_BLENDFACTOR_SRC1_ALPHA;
- case PIPE_BLENDFACTOR_ZERO: return GEN6_BLENDFACTOR_ZERO;
- case PIPE_BLENDFACTOR_INV_SRC_COLOR: return GEN6_BLENDFACTOR_INV_SRC_COLOR;
- case PIPE_BLENDFACTOR_INV_SRC_ALPHA: return GEN6_BLENDFACTOR_INV_SRC_ALPHA;
- case PIPE_BLENDFACTOR_INV_DST_ALPHA: return GEN6_BLENDFACTOR_INV_DST_ALPHA;
- case PIPE_BLENDFACTOR_INV_DST_COLOR: return GEN6_BLENDFACTOR_INV_DST_COLOR;
- case PIPE_BLENDFACTOR_INV_CONST_COLOR: return GEN6_BLENDFACTOR_INV_CONST_COLOR;
- case PIPE_BLENDFACTOR_INV_CONST_ALPHA: return GEN6_BLENDFACTOR_INV_CONST_ALPHA;
- case PIPE_BLENDFACTOR_INV_SRC1_COLOR: return GEN6_BLENDFACTOR_INV_SRC1_COLOR;
- case PIPE_BLENDFACTOR_INV_SRC1_ALPHA: return GEN6_BLENDFACTOR_INV_SRC1_ALPHA;
- default:
- assert(!"unknown blend factor");
- return GEN6_BLENDFACTOR_ONE;
- };
-}
-
-/**
- * Translate a pipe stencil op to the matching hardware stencil op.
- */
-static int
-gen6_translate_pipe_stencil_op(unsigned stencil_op)
-{
- switch (stencil_op) {
- case PIPE_STENCIL_OP_KEEP: return GEN6_STENCILOP_KEEP;
- case PIPE_STENCIL_OP_ZERO: return GEN6_STENCILOP_ZERO;
- case PIPE_STENCIL_OP_REPLACE: return GEN6_STENCILOP_REPLACE;
- case PIPE_STENCIL_OP_INCR: return GEN6_STENCILOP_INCRSAT;
- case PIPE_STENCIL_OP_DECR: return GEN6_STENCILOP_DECRSAT;
- case PIPE_STENCIL_OP_INCR_WRAP: return GEN6_STENCILOP_INCR;
- case PIPE_STENCIL_OP_DECR_WRAP: return GEN6_STENCILOP_DECR;
- case PIPE_STENCIL_OP_INVERT: return GEN6_STENCILOP_INVERT;
- default:
- assert(!"unknown stencil op");
- return GEN6_STENCILOP_KEEP;
- }
-}
-
-static int
-gen6_blend_factor_dst_alpha_forced_one(int factor)
-{
- switch (factor) {
- case GEN6_BLENDFACTOR_DST_ALPHA:
- return GEN6_BLENDFACTOR_ONE;
- case GEN6_BLENDFACTOR_INV_DST_ALPHA:
- case GEN6_BLENDFACTOR_SRC_ALPHA_SATURATE:
- return GEN6_BLENDFACTOR_ZERO;
- default:
- return factor;
- }
-}
-
-static uint32_t
-blend_get_rt_blend_enable_gen6(const struct ilo_dev *dev,
- const struct pipe_rt_blend_state *rt,
- bool dst_alpha_forced_one)
-{
- int rgb_src, rgb_dst, a_src, a_dst;
- uint32_t dw;
-
- ILO_DEV_ASSERT(dev, 6, 7.5);
-
- if (!rt->blend_enable)
- return 0;
-
- rgb_src = gen6_translate_pipe_blendfactor(rt->rgb_src_factor);
- rgb_dst = gen6_translate_pipe_blendfactor(rt->rgb_dst_factor);
- a_src = gen6_translate_pipe_blendfactor(rt->alpha_src_factor);
- a_dst = gen6_translate_pipe_blendfactor(rt->alpha_dst_factor);
-
- if (dst_alpha_forced_one) {
- rgb_src = gen6_blend_factor_dst_alpha_forced_one(rgb_src);
- rgb_dst = gen6_blend_factor_dst_alpha_forced_one(rgb_dst);
- a_src = gen6_blend_factor_dst_alpha_forced_one(a_src);
- a_dst = gen6_blend_factor_dst_alpha_forced_one(a_dst);
- }
-
- dw = GEN6_RT_DW0_BLEND_ENABLE |
- gen6_translate_pipe_blend(rt->alpha_func) << 26 |
- a_src << 20 |
- a_dst << 15 |
- gen6_translate_pipe_blend(rt->rgb_func) << 11 |
- rgb_src << 5 |
- rgb_dst;
-
- if (rt->rgb_func != rt->alpha_func ||
- rgb_src != a_src || rgb_dst != a_dst)
- dw |= GEN6_RT_DW0_INDEPENDENT_ALPHA_ENABLE;
-
- return dw;
-}
-
-static uint32_t
-blend_get_rt_blend_enable_gen8(const struct ilo_dev *dev,
- const struct pipe_rt_blend_state *rt,
- bool dst_alpha_forced_one,
- bool *independent_alpha)
-{
- int rgb_src, rgb_dst, a_src, a_dst;
- uint32_t dw;
-
- ILO_DEV_ASSERT(dev, 8, 8);
-
- if (!rt->blend_enable) {
- *independent_alpha = false;
- return 0;
- }
-
- rgb_src = gen6_translate_pipe_blendfactor(rt->rgb_src_factor);
- rgb_dst = gen6_translate_pipe_blendfactor(rt->rgb_dst_factor);
- a_src = gen6_translate_pipe_blendfactor(rt->alpha_src_factor);
- a_dst = gen6_translate_pipe_blendfactor(rt->alpha_dst_factor);
-
- if (dst_alpha_forced_one) {
- rgb_src = gen6_blend_factor_dst_alpha_forced_one(rgb_src);
- rgb_dst = gen6_blend_factor_dst_alpha_forced_one(rgb_dst);
- a_src = gen6_blend_factor_dst_alpha_forced_one(a_src);
- a_dst = gen6_blend_factor_dst_alpha_forced_one(a_dst);
- }
-
- dw = GEN8_RT_DW0_BLEND_ENABLE |
- rgb_src << 26 |
- rgb_dst << 21 |
- gen6_translate_pipe_blend(rt->rgb_func) << 18 |
- a_src << 13 |
- a_dst << 8 |
- gen6_translate_pipe_blend(rt->alpha_func) << 5;
-
- *independent_alpha = (rt->rgb_func != rt->alpha_func ||
- rgb_src != a_src ||
- rgb_dst != a_dst);
-
- return dw;
-}
-
-static void
-blend_init_cso_gen6(const struct ilo_dev *dev,
- const struct pipe_blend_state *state,
- struct ilo_blend_state *blend,
- unsigned index)
-{
- const struct pipe_rt_blend_state *rt = &state->rt[index];
- struct ilo_blend_cso *cso = &blend->cso[index];
-
- ILO_DEV_ASSERT(dev, 6, 7.5);
-
- cso->payload[0] = 0;
- cso->payload[1] = GEN6_RT_DW1_COLORCLAMP_RTFORMAT |
- GEN6_RT_DW1_PRE_BLEND_CLAMP |
- GEN6_RT_DW1_POST_BLEND_CLAMP;
-
- if (!(rt->colormask & PIPE_MASK_A))
- cso->payload[1] |= GEN6_RT_DW1_WRITE_DISABLE_A;
- if (!(rt->colormask & PIPE_MASK_R))
- cso->payload[1] |= GEN6_RT_DW1_WRITE_DISABLE_R;
- if (!(rt->colormask & PIPE_MASK_G))
- cso->payload[1] |= GEN6_RT_DW1_WRITE_DISABLE_G;
- if (!(rt->colormask & PIPE_MASK_B))
- cso->payload[1] |= GEN6_RT_DW1_WRITE_DISABLE_B;
-
- /*
- * From the Sandy Bridge PRM, volume 2 part 1, page 365:
- *
- * "Color Buffer Blending and Logic Ops must not be enabled
- * simultaneously, or behavior is UNDEFINED."
- *
- * Since state->logicop_enable takes precedence over rt->blend_enable,
- * no special care is needed.
- */
- if (state->logicop_enable) {
- cso->dw_blend = 0;
- cso->dw_blend_dst_alpha_forced_one = 0;
- } else {
- cso->dw_blend = blend_get_rt_blend_enable_gen6(dev, rt, false);
- cso->dw_blend_dst_alpha_forced_one =
- blend_get_rt_blend_enable_gen6(dev, rt, true);
- }
-}
-
-static bool
-blend_init_cso_gen8(const struct ilo_dev *dev,
- const struct pipe_blend_state *state,
- struct ilo_blend_state *blend,
- unsigned index)
-{
- const struct pipe_rt_blend_state *rt = &state->rt[index];
- struct ilo_blend_cso *cso = &blend->cso[index];
- bool independent_alpha = false;
-
- ILO_DEV_ASSERT(dev, 8, 8);
-
- cso->payload[0] = 0;
- cso->payload[1] = GEN8_RT_DW1_COLORCLAMP_RTFORMAT |
- GEN8_RT_DW1_PRE_BLEND_CLAMP |
- GEN8_RT_DW1_POST_BLEND_CLAMP;
-
- if (!(rt->colormask & PIPE_MASK_A))
- cso->payload[0] |= GEN8_RT_DW0_WRITE_DISABLE_A;
- if (!(rt->colormask & PIPE_MASK_R))
- cso->payload[0] |= GEN8_RT_DW0_WRITE_DISABLE_R;
- if (!(rt->colormask & PIPE_MASK_G))
- cso->payload[0] |= GEN8_RT_DW0_WRITE_DISABLE_G;
- if (!(rt->colormask & PIPE_MASK_B))
- cso->payload[0] |= GEN8_RT_DW0_WRITE_DISABLE_B;
-
- if (state->logicop_enable) {
- cso->dw_blend = 0;
- cso->dw_blend_dst_alpha_forced_one = 0;
- } else {
- bool tmp[2];
-
- cso->dw_blend = blend_get_rt_blend_enable_gen8(dev, rt, false, &tmp[0]);
- cso->dw_blend_dst_alpha_forced_one =
- blend_get_rt_blend_enable_gen8(dev, rt, true, &tmp[1]);
-
- if (tmp[0] || tmp[1])
- independent_alpha = true;
- }
-
- return independent_alpha;
-}
-
-static uint32_t
-blend_get_logicop_enable_gen6(const struct ilo_dev *dev,
- const struct pipe_blend_state *state)
-{
- ILO_DEV_ASSERT(dev, 6, 7.5);
-
- if (!state->logicop_enable)
- return 0;
-
- return GEN6_RT_DW1_LOGICOP_ENABLE |
- gen6_translate_pipe_logicop(state->logicop_func) << 18;
-}
-
-static uint32_t
-blend_get_logicop_enable_gen8(const struct ilo_dev *dev,
- const struct pipe_blend_state *state)
-{
- ILO_DEV_ASSERT(dev, 8, 8);
-
- if (!state->logicop_enable)
- return 0;
-
- return GEN8_RT_DW1_LOGICOP_ENABLE |
- gen6_translate_pipe_logicop(state->logicop_func) << 27;
-}
-
-static uint32_t
-blend_get_alpha_mod_gen6(const struct ilo_dev *dev,
- const struct pipe_blend_state *state,
- bool dual_blend)
-{
- uint32_t dw = 0;
-
- ILO_DEV_ASSERT(dev, 6, 7.5);
-
- if (state->alpha_to_coverage) {
- dw |= GEN6_RT_DW1_ALPHA_TO_COVERAGE;
- if (ilo_dev_gen(dev) >= ILO_GEN(7))
- dw |= GEN6_RT_DW1_ALPHA_TO_COVERAGE_DITHER;
- }
- /*
- * From the Sandy Bridge PRM, volume 2 part 1, page 378:
- *
- * "If Dual Source Blending is enabled, this bit (AlphaToOne Enable)
- * must be disabled."
- */
- if (state->alpha_to_one && !dual_blend)
- dw |= GEN6_RT_DW1_ALPHA_TO_ONE;
-
- return dw;
-}
-
-static uint32_t
-blend_get_alpha_mod_gen8(const struct ilo_dev *dev,
- const struct pipe_blend_state *state,
- bool dual_blend)
-{
- uint32_t dw = 0;
-
- ILO_DEV_ASSERT(dev, 8, 8);
-
- if (state->alpha_to_coverage) {
- dw |= GEN8_BLEND_DW0_ALPHA_TO_COVERAGE |
- GEN8_BLEND_DW0_ALPHA_TO_COVERAGE_DITHER;
- }
-
- if (state->alpha_to_one && !dual_blend)
- dw |= GEN8_BLEND_DW0_ALPHA_TO_ONE;
-
- return dw;
-}
-
-static uint32_t
-blend_get_ps_blend_gen8(const struct ilo_dev *dev, uint32_t rt_dw0)
-{
- int rgb_src, rgb_dst, a_src, a_dst;
- uint32_t dw;
-
- ILO_DEV_ASSERT(dev, 8, 8);
-
- if (!(rt_dw0 & GEN8_RT_DW0_BLEND_ENABLE))
- return 0;
-
- a_src = GEN_EXTRACT(rt_dw0, GEN8_RT_DW0_SRC_ALPHA_FACTOR);
- a_dst = GEN_EXTRACT(rt_dw0, GEN8_RT_DW0_DST_ALPHA_FACTOR);
- rgb_src = GEN_EXTRACT(rt_dw0, GEN8_RT_DW0_SRC_COLOR_FACTOR);
- rgb_dst = GEN_EXTRACT(rt_dw0, GEN8_RT_DW0_DST_COLOR_FACTOR);
-
- dw = GEN8_PS_BLEND_DW1_BLEND_ENABLE;
- dw |= GEN_SHIFT32(a_src, GEN8_PS_BLEND_DW1_SRC_ALPHA_FACTOR);
- dw |= GEN_SHIFT32(a_dst, GEN8_PS_BLEND_DW1_DST_ALPHA_FACTOR);
- dw |= GEN_SHIFT32(rgb_src, GEN8_PS_BLEND_DW1_SRC_COLOR_FACTOR);
- dw |= GEN_SHIFT32(rgb_dst, GEN8_PS_BLEND_DW1_DST_COLOR_FACTOR);
-
- if (a_src != rgb_src || a_dst != rgb_dst)
- dw |= GEN8_PS_BLEND_DW1_INDEPENDENT_ALPHA_ENABLE;
-
- return dw;
-}
-
-void
-ilo_gpe_init_blend(const struct ilo_dev *dev,
- const struct pipe_blend_state *state,
- struct ilo_blend_state *blend)
-{
- unsigned i;
-
- ILO_DEV_ASSERT(dev, 6, 8);
-
- blend->dual_blend = (util_blend_state_is_dual(state, 0) &&
- state->rt[0].blend_enable &&
- !state->logicop_enable);
- blend->alpha_to_coverage = state->alpha_to_coverage;
-
- if (ilo_dev_gen(dev) >= ILO_GEN(8)) {
- bool independent_alpha;
-
- blend->dw_alpha_mod =
- blend_get_alpha_mod_gen8(dev, state, blend->dual_blend);
- blend->dw_logicop = blend_get_logicop_enable_gen8(dev, state);
- blend->dw_shared = (state->dither) ? GEN8_BLEND_DW0_DITHER_ENABLE : 0;
-
- independent_alpha = blend_init_cso_gen8(dev, state, blend, 0);
- if (independent_alpha)
- blend->dw_shared |= GEN8_BLEND_DW0_INDEPENDENT_ALPHA_ENABLE;
-
- blend->dw_ps_blend = blend_get_ps_blend_gen8(dev,
- blend->cso[0].dw_blend);
- blend->dw_ps_blend_dst_alpha_forced_one = blend_get_ps_blend_gen8(dev,
- blend->cso[0].dw_blend_dst_alpha_forced_one);
-
- if (state->independent_blend_enable) {
- for (i = 1; i < Elements(blend->cso); i++) {
- independent_alpha = blend_init_cso_gen8(dev, state, blend, i);
- if (independent_alpha)
- blend->dw_shared |= GEN8_BLEND_DW0_INDEPENDENT_ALPHA_ENABLE;
- }
- } else {
- for (i = 1; i < Elements(blend->cso); i++)
- blend->cso[i] = blend->cso[0];
- }
- } else {
- blend->dw_alpha_mod =
- blend_get_alpha_mod_gen6(dev, state, blend->dual_blend);
- blend->dw_logicop = blend_get_logicop_enable_gen6(dev, state);
- blend->dw_shared = (state->dither) ? GEN6_RT_DW1_DITHER_ENABLE : 0;
-
- blend->dw_ps_blend = 0;
- blend->dw_ps_blend_dst_alpha_forced_one = 0;
-
- blend_init_cso_gen6(dev, state, blend, 0);
- if (state->independent_blend_enable) {
- for (i = 1; i < Elements(blend->cso); i++)
- blend_init_cso_gen6(dev, state, blend, i);
- } else {
- for (i = 1; i < Elements(blend->cso); i++)
- blend->cso[i] = blend->cso[0];
- }
- }
-}
-
-/**
- * Translate a pipe DSA test function to the matching hardware compare
- * function.
- */
-static int
-gen6_translate_dsa_func(unsigned func)
-{
- switch (func) {
- case PIPE_FUNC_NEVER: return GEN6_COMPAREFUNCTION_NEVER;
- case PIPE_FUNC_LESS: return GEN6_COMPAREFUNCTION_LESS;
- case PIPE_FUNC_EQUAL: return GEN6_COMPAREFUNCTION_EQUAL;
- case PIPE_FUNC_LEQUAL: return GEN6_COMPAREFUNCTION_LEQUAL;
- case PIPE_FUNC_GREATER: return GEN6_COMPAREFUNCTION_GREATER;
- case PIPE_FUNC_NOTEQUAL: return GEN6_COMPAREFUNCTION_NOTEQUAL;
- case PIPE_FUNC_GEQUAL: return GEN6_COMPAREFUNCTION_GEQUAL;
- case PIPE_FUNC_ALWAYS: return GEN6_COMPAREFUNCTION_ALWAYS;
- default:
- assert(!"unknown depth/stencil/alpha test function");
- return GEN6_COMPAREFUNCTION_NEVER;
- }
-}
-
-static uint32_t
-dsa_get_stencil_enable_gen6(const struct ilo_dev *dev,
- const struct pipe_stencil_state *stencil0,
- const struct pipe_stencil_state *stencil1)
-{
- uint32_t dw;
-
- ILO_DEV_ASSERT(dev, 6, 7.5);
-
- if (!stencil0->enabled)
- return 0;
-
- /*
- * From the Sandy Bridge PRM, volume 2 part 1, page 359:
- *
- * "If the Depth Buffer is either undefined or does not have a surface
- * format of D32_FLOAT_S8X24_UINT or D24_UNORM_S8_UINT and separate
- * stencil buffer is disabled, Stencil Test Enable must be DISABLED"
- *
- * From the Sandy Bridge PRM, volume 2 part 1, page 370:
- *
- * "This field (Stencil Test Enable) cannot be enabled if
- * Surface Format in 3DSTATE_DEPTH_BUFFER is set to D16_UNORM."
- *
- * TODO We do not check these yet.
- */
- dw = GEN6_ZS_DW0_STENCIL_TEST_ENABLE |
- gen6_translate_dsa_func(stencil0->func) << 28 |
- gen6_translate_pipe_stencil_op(stencil0->fail_op) << 25 |
- gen6_translate_pipe_stencil_op(stencil0->zfail_op) << 22 |
- gen6_translate_pipe_stencil_op(stencil0->zpass_op) << 19;
- if (stencil0->writemask)
- dw |= GEN6_ZS_DW0_STENCIL_WRITE_ENABLE;
-
- if (stencil1->enabled) {
- dw |= GEN6_ZS_DW0_STENCIL1_ENABLE |
- gen6_translate_dsa_func(stencil1->func) << 12 |
- gen6_translate_pipe_stencil_op(stencil1->fail_op) << 9 |
- gen6_translate_pipe_stencil_op(stencil1->zfail_op) << 6 |
- gen6_translate_pipe_stencil_op(stencil1->zpass_op) << 3;
- if (stencil1->writemask)
- dw |= GEN6_ZS_DW0_STENCIL_WRITE_ENABLE;
- }
-
- return dw;
-}
-
-static uint32_t
-dsa_get_stencil_enable_gen8(const struct ilo_dev *dev,
- const struct pipe_stencil_state *stencil0,
- const struct pipe_stencil_state *stencil1)
-{
- uint32_t dw;
-
- ILO_DEV_ASSERT(dev, 8, 8);
-
- if (!stencil0->enabled)
- return 0;
-
- dw = gen6_translate_pipe_stencil_op(stencil0->fail_op) << 29 |
- gen6_translate_pipe_stencil_op(stencil0->zfail_op) << 26 |
- gen6_translate_pipe_stencil_op(stencil0->zpass_op) << 23 |
- gen6_translate_dsa_func(stencil0->func) << 8 |
- GEN8_ZS_DW1_STENCIL_TEST_ENABLE;
- if (stencil0->writemask)
- dw |= GEN8_ZS_DW1_STENCIL_WRITE_ENABLE;
-
- if (stencil1->enabled) {
- dw |= gen6_translate_dsa_func(stencil1->func) << 20 |
- gen6_translate_pipe_stencil_op(stencil1->fail_op) << 17 |
- gen6_translate_pipe_stencil_op(stencil1->zfail_op) << 14 |
- gen6_translate_pipe_stencil_op(stencil1->zpass_op) << 11 |
- GEN8_ZS_DW1_STENCIL1_ENABLE;
- if (stencil1->writemask)
- dw |= GEN8_ZS_DW1_STENCIL_WRITE_ENABLE;
- }
-
- return dw;
-}
-
-static uint32_t
-dsa_get_depth_enable_gen6(const struct ilo_dev *dev,
- const struct pipe_depth_state *state)
-{
- uint32_t dw;
-
- ILO_DEV_ASSERT(dev, 6, 7.5);
-
- /*
- * From the Sandy Bridge PRM, volume 2 part 1, page 360:
- *
- * "Enabling the Depth Test function without defining a Depth Buffer is
- * UNDEFINED."
- *
- * From the Sandy Bridge PRM, volume 2 part 1, page 375:
- *
- * "A Depth Buffer must be defined before enabling writes to it, or
- * operation is UNDEFINED."
- *
- * TODO We do not check these yet.
- */
- if (state->enabled) {
- dw = GEN6_ZS_DW2_DEPTH_TEST_ENABLE |
- gen6_translate_dsa_func(state->func) << 27;
- } else {
- dw = GEN6_COMPAREFUNCTION_ALWAYS << 27;
- }
-
- if (state->writemask)
- dw |= GEN6_ZS_DW2_DEPTH_WRITE_ENABLE;
-
- return dw;
-}
-
-static uint32_t
-dsa_get_depth_enable_gen8(const struct ilo_dev *dev,
- const struct pipe_depth_state *state)
-{
- uint32_t dw;
-
- ILO_DEV_ASSERT(dev, 8, 8);
-
- if (state->enabled) {
- dw = GEN8_ZS_DW1_DEPTH_TEST_ENABLE |
- gen6_translate_dsa_func(state->func) << 5;
- } else {
- dw = GEN6_COMPAREFUNCTION_ALWAYS << 5;
- }
-
- if (state->writemask)
- dw |= GEN8_ZS_DW1_DEPTH_WRITE_ENABLE;
-
- return dw;
-}
-
-static uint32_t
-dsa_get_alpha_enable_gen6(const struct ilo_dev *dev,
- const struct pipe_alpha_state *state)
-{
- uint32_t dw;
-
- ILO_DEV_ASSERT(dev, 6, 7.5);
-
- if (!state->enabled)
- return 0;
-
- /* this will be ORed to BLEND_STATE */
- dw = GEN6_RT_DW1_ALPHA_TEST_ENABLE |
- gen6_translate_dsa_func(state->func) << 13;
-
- return dw;
-}
-
-static uint32_t
-dsa_get_alpha_enable_gen8(const struct ilo_dev *dev,
- const struct pipe_alpha_state *state)
-{
- uint32_t dw;
-
- ILO_DEV_ASSERT(dev, 8, 8);
-
- if (!state->enabled)
- return 0;
-
- /* this will be ORed to BLEND_STATE */
- dw = GEN8_BLEND_DW0_ALPHA_TEST_ENABLE |
- gen6_translate_dsa_func(state->func) << 24;
-
- return dw;
-}
-
-void
-ilo_gpe_init_dsa(const struct ilo_dev *dev,
- const struct pipe_depth_stencil_alpha_state *state,
- struct ilo_dsa_state *dsa)
-{
- ILO_DEV_ASSERT(dev, 6, 8);
-
- STATIC_ASSERT(Elements(dsa->payload) >= 3);
-
- if (ilo_dev_gen(dev) >= ILO_GEN(8)) {
- const uint32_t dw_stencil = dsa_get_stencil_enable_gen8(dev,
- &state->stencil[0], &state->stencil[1]);
- const uint32_t dw_depth = dsa_get_depth_enable_gen8(dev, &state->depth);
-
- assert(!(dw_stencil & dw_depth));
- dsa->payload[0] = dw_stencil | dw_depth;
-
- dsa->dw_blend_alpha = dsa_get_alpha_enable_gen8(dev, &state->alpha);
- dsa->dw_ps_blend_alpha = (state->alpha.enabled) ?
- GEN8_PS_BLEND_DW1_ALPHA_TEST_ENABLE : 0;
- } else {
- dsa->payload[0] = dsa_get_stencil_enable_gen6(dev,
- &state->stencil[0], &state->stencil[1]);
- dsa->payload[2] = dsa_get_depth_enable_gen6(dev, &state->depth);
-
- dsa->dw_blend_alpha = dsa_get_alpha_enable_gen6(dev, &state->alpha);
- dsa->dw_ps_blend_alpha = 0;
- }
-
- dsa->payload[1] = state->stencil[0].valuemask << 24 |
- state->stencil[0].writemask << 16 |
- state->stencil[1].valuemask << 8 |
- state->stencil[1].writemask;
-
- dsa->alpha_ref = float_to_ubyte(state->alpha.ref_value);
-}
-
-void
-ilo_gpe_set_scissor(const struct ilo_dev *dev,
- unsigned start_slot,
- unsigned num_states,
- const struct pipe_scissor_state *states,
- struct ilo_scissor_state *scissor)
-{
- unsigned i;
-
- ILO_DEV_ASSERT(dev, 6, 8);
-
- for (i = 0; i < num_states; i++) {
- uint16_t min_x, min_y, max_x, max_y;
-
- /* both max and min are inclusive in SCISSOR_RECT */
- if (states[i].minx < states[i].maxx &&
- states[i].miny < states[i].maxy) {
- min_x = states[i].minx;
- min_y = states[i].miny;
- max_x = states[i].maxx - 1;
- max_y = states[i].maxy - 1;
- }
- else {
- /* we have to make min greater than max */
- min_x = 1;
- min_y = 1;
- max_x = 0;
- max_y = 0;
- }
-
- scissor->payload[(start_slot + i) * 2 + 0] = min_y << 16 | min_x;
- scissor->payload[(start_slot + i) * 2 + 1] = max_y << 16 | max_x;
- }
-
- if (!start_slot && num_states)
- scissor->scissor0 = states[0];
-}
-
-void
-ilo_gpe_set_scissor_null(const struct ilo_dev *dev,
- struct ilo_scissor_state *scissor)
-{
- unsigned i;
-
- for (i = 0; i < Elements(scissor->payload); i += 2) {
- scissor->payload[i + 0] = 1 << 16 | 1;
- scissor->payload[i + 1] = 0;
- }
-}
-
-static void
-fb_set_blend_caps(const struct ilo_dev *dev,
- enum pipe_format format,
- struct ilo_fb_blend_caps *caps)
-{
- const struct util_format_description *desc =
- util_format_description(format);
- const int ch = util_format_get_first_non_void_channel(format);
-
- memset(caps, 0, sizeof(*caps));
-
- if (format == PIPE_FORMAT_NONE || desc->is_mixed)
- return;
-
- /*
- * From the Sandy Bridge PRM, volume 2 part 1, page 365:
- *
- * "Logic Ops are only supported on *_UNORM surfaces (excluding _SRGB
- * variants), otherwise Logic Ops must be DISABLED."
- *
- * According to the classic driver, this is lifted on Gen8+.
- */
- if (ilo_dev_gen(dev) >= ILO_GEN(8)) {
- caps->can_logicop = true;
- } else {
- caps->can_logicop = (ch >= 0 && desc->channel[ch].normalized &&
- desc->channel[ch].type == UTIL_FORMAT_TYPE_UNSIGNED &&
- desc->colorspace == UTIL_FORMAT_COLORSPACE_RGB);
- }
-
- /* no blending for pure integer formats */
- caps->can_blend = !util_format_is_pure_integer(format);
-
- /*
- * From the Sandy Bridge PRM, volume 2 part 1, page 382:
- *
- * "Alpha Test can only be enabled if Pixel Shader outputs a float
- * alpha value."
- */
- caps->can_alpha_test = !util_format_is_pure_integer(format);
-
- caps->dst_alpha_forced_one =
- (ilo_format_translate_render(dev, format) !=
- ilo_format_translate_color(dev, format));
-
- /* sanity check */
- if (caps->dst_alpha_forced_one) {
- enum pipe_format render_format;
-
- switch (format) {
- case PIPE_FORMAT_B8G8R8X8_UNORM:
- render_format = PIPE_FORMAT_B8G8R8A8_UNORM;
- break;
- default:
- render_format = PIPE_FORMAT_NONE;
- break;
- }
-
- assert(ilo_format_translate_render(dev, format) ==
- ilo_format_translate_color(dev, render_format));
- }
-}
-
-void
-ilo_gpe_set_fb(const struct ilo_dev *dev,
- const struct pipe_framebuffer_state *state,
- struct ilo_fb_state *fb)
-{
- const struct pipe_surface *first_surf = NULL;
- int i;
-
- ILO_DEV_ASSERT(dev, 6, 8);
-
- util_copy_framebuffer_state(&fb->state, state);
-
- ilo_gpe_init_view_surface_null(dev,
- (state->width) ? state->width : 1,
- (state->height) ? state->height : 1,
- 1, 0, &fb->null_rt);
-
- for (i = 0; i < state->nr_cbufs; i++) {
- if (state->cbufs[i]) {
- fb_set_blend_caps(dev, state->cbufs[i]->format, &fb->blend_caps[i]);
-
- if (!first_surf)
- first_surf = state->cbufs[i];
- } else {
- fb_set_blend_caps(dev, PIPE_FORMAT_NONE, &fb->blend_caps[i]);
- }
- }
-
- if (!first_surf && state->zsbuf)
- first_surf = state->zsbuf;
-
- fb->num_samples = (first_surf) ? first_surf->texture->nr_samples : 1;
- if (!fb->num_samples)
- fb->num_samples = 1;
-
- /*
- * The PRMs list several restrictions when the framebuffer has more than
- * one surface. It seems they are actually lifted on GEN6+.
- */
-}
diff --git a/src/gallium/drivers/ilo/core/ilo_state_3d_top.c b/src/gallium/drivers/ilo/core/ilo_state_3d_top.c
deleted file mode 100644
index c17957fb704..00000000000
--- a/src/gallium/drivers/ilo/core/ilo_state_3d_top.c
+++ /dev/null
@@ -1,1716 +0,0 @@
-/*
- * Mesa 3-D graphics library
- *
- * Copyright (C) 2012-2014 LunarG, Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included
- * in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
- * DEALINGS IN THE SOFTWARE.
- *
- * Authors:
- * Chia-I Wu <[email protected]>
- */
-
-#include "genhw/genhw.h"
-#include "util/u_dual_blend.h"
-#include "util/u_framebuffer.h"
-#include "util/u_half.h"
-#include "util/u_resource.h"
-
-#include "ilo_buffer.h"
-#include "ilo_format.h"
-#include "ilo_image.h"
-#include "ilo_state_3d.h"
-#include "../ilo_shader.h"
-
-static void
-ve_init_cso(const struct ilo_dev *dev,
- const struct pipe_vertex_element *state,
- unsigned vb_index,
- struct ilo_ve_cso *cso)
-{
- int comp[4] = {
- GEN6_VFCOMP_STORE_SRC,
- GEN6_VFCOMP_STORE_SRC,
- GEN6_VFCOMP_STORE_SRC,
- GEN6_VFCOMP_STORE_SRC,
- };
- int format;
-
- ILO_DEV_ASSERT(dev, 6, 8);
-
- switch (util_format_get_nr_components(state->src_format)) {
- case 1: comp[1] = GEN6_VFCOMP_STORE_0;
- case 2: comp[2] = GEN6_VFCOMP_STORE_0;
- case 3: comp[3] = (util_format_is_pure_integer(state->src_format)) ?
- GEN6_VFCOMP_STORE_1_INT :
- GEN6_VFCOMP_STORE_1_FP;
- }
-
- format = ilo_format_translate_vertex(dev, state->src_format);
-
- STATIC_ASSERT(Elements(cso->payload) >= 2);
- cso->payload[0] =
- vb_index << GEN6_VE_DW0_VB_INDEX__SHIFT |
- GEN6_VE_DW0_VALID |
- format << GEN6_VE_DW0_FORMAT__SHIFT |
- state->src_offset << GEN6_VE_DW0_VB_OFFSET__SHIFT;
-
- cso->payload[1] =
- comp[0] << GEN6_VE_DW1_COMP0__SHIFT |
- comp[1] << GEN6_VE_DW1_COMP1__SHIFT |
- comp[2] << GEN6_VE_DW1_COMP2__SHIFT |
- comp[3] << GEN6_VE_DW1_COMP3__SHIFT;
-}
-
-void
-ilo_gpe_init_ve(const struct ilo_dev *dev,
- unsigned num_states,
- const struct pipe_vertex_element *states,
- struct ilo_ve_state *ve)
-{
- unsigned i;
-
- ILO_DEV_ASSERT(dev, 6, 8);
-
- ve->count = num_states;
- ve->vb_count = 0;
-
- for (i = 0; i < num_states; i++) {
- const unsigned pipe_idx = states[i].vertex_buffer_index;
- const unsigned instance_divisor = states[i].instance_divisor;
- unsigned hw_idx;
-
- /*
- * map the pipe vb to the hardware vb, which has a fixed instance
- * divisor
- */
- for (hw_idx = 0; hw_idx < ve->vb_count; hw_idx++) {
- if (ve->vb_mapping[hw_idx] == pipe_idx &&
- ve->instance_divisors[hw_idx] == instance_divisor)
- break;
- }
-
- /* create one if there is no matching hardware vb */
- if (hw_idx >= ve->vb_count) {
- hw_idx = ve->vb_count++;
-
- ve->vb_mapping[hw_idx] = pipe_idx;
- ve->instance_divisors[hw_idx] = instance_divisor;
- }
-
- ve_init_cso(dev, &states[i], hw_idx, &ve->cso[i]);
- }
-}
-
-void
-ilo_gpe_set_ve_edgeflag(const struct ilo_dev *dev,
- struct ilo_ve_cso *cso)
-{
- int format;
-
- ILO_DEV_ASSERT(dev, 6, 8);
-
- /*
- * From the Sandy Bridge PRM, volume 2 part 1, page 94:
- *
- * "- This bit (Edge Flag Enable) must only be ENABLED on the last
- * valid VERTEX_ELEMENT structure.
- *
- * - When set, Component 0 Control must be set to VFCOMP_STORE_SRC,
- * and Component 1-3 Control must be set to VFCOMP_NOSTORE.
- *
- * - The Source Element Format must be set to the UINT format.
- *
- * - [DevSNB]: Edge Flags are not supported for QUADLIST
- * primitives. Software may elect to convert QUADLIST primitives
- * to some set of corresponding edge-flag-supported primitive
- * types (e.g., POLYGONs) prior to submission to the 3D pipeline."
- */
- cso->payload[0] |= GEN6_VE_DW0_EDGE_FLAG_ENABLE;
-
- /*
- * Edge flags have format GEN6_FORMAT_R8_USCALED when defined via
- * glEdgeFlagPointer(), and format GEN6_FORMAT_R32_FLOAT when defined
- * via glEdgeFlag(), as can be seen in vbo_attrib_tmp.h.
- *
- * Since all the hardware cares about is whether the flags are zero or not,
- * we can treat them as the corresponding _UINT formats.
- */
- format = GEN_EXTRACT(cso->payload[0], GEN6_VE_DW0_FORMAT);
- cso->payload[0] &= ~GEN6_VE_DW0_FORMAT__MASK;
-
- switch (format) {
- case GEN6_FORMAT_R32_FLOAT:
- format = GEN6_FORMAT_R32_UINT;
- break;
- case GEN6_FORMAT_R8_USCALED:
- format = GEN6_FORMAT_R8_UINT;
- break;
- default:
- break;
- }
-
- cso->payload[0] |= GEN_SHIFT32(format, GEN6_VE_DW0_FORMAT);
-
- cso->payload[1] =
- GEN6_VFCOMP_STORE_SRC << GEN6_VE_DW1_COMP0__SHIFT |
- GEN6_VFCOMP_NOSTORE << GEN6_VE_DW1_COMP1__SHIFT |
- GEN6_VFCOMP_NOSTORE << GEN6_VE_DW1_COMP2__SHIFT |
- GEN6_VFCOMP_NOSTORE << GEN6_VE_DW1_COMP3__SHIFT;
-}
-
-void
-ilo_gpe_init_ve_nosrc(const struct ilo_dev *dev,
- int comp0, int comp1, int comp2, int comp3,
- struct ilo_ve_cso *cso)
-{
- ILO_DEV_ASSERT(dev, 6, 8);
-
- STATIC_ASSERT(Elements(cso->payload) >= 2);
-
- assert(comp0 != GEN6_VFCOMP_STORE_SRC &&
- comp1 != GEN6_VFCOMP_STORE_SRC &&
- comp2 != GEN6_VFCOMP_STORE_SRC &&
- comp3 != GEN6_VFCOMP_STORE_SRC);
-
- cso->payload[0] = GEN6_VE_DW0_VALID;
- cso->payload[1] =
- comp0 << GEN6_VE_DW1_COMP0__SHIFT |
- comp1 << GEN6_VE_DW1_COMP1__SHIFT |
- comp2 << GEN6_VE_DW1_COMP2__SHIFT |
- comp3 << GEN6_VE_DW1_COMP3__SHIFT;
-}
-
-void
-ilo_gpe_init_vs_cso(const struct ilo_dev *dev,
- const struct ilo_shader_state *vs,
- struct ilo_shader_cso *cso)
-{
- int start_grf, vue_read_len, sampler_count, max_threads;
- uint32_t dw2, dw4, dw5;
-
- ILO_DEV_ASSERT(dev, 6, 8);
-
- start_grf = ilo_shader_get_kernel_param(vs, ILO_KERNEL_URB_DATA_START_REG);
- vue_read_len = ilo_shader_get_kernel_param(vs, ILO_KERNEL_INPUT_COUNT);
- sampler_count = ilo_shader_get_kernel_param(vs, ILO_KERNEL_SAMPLER_COUNT);
-
- /*
- * From the Sandy Bridge PRM, volume 2 part 1, page 135:
- *
- * "(Vertex URB Entry Read Length) Specifies the number of pairs of
- * 128-bit vertex elements to be passed into the payload for each
- * vertex."
- *
- * "It is UNDEFINED to set this field to 0 indicating no Vertex URB
- * data to be read and passed to the thread."
- */
- vue_read_len = (vue_read_len + 1) / 2;
- if (!vue_read_len)
- vue_read_len = 1;
-
- max_threads = dev->thread_count;
- if (ilo_dev_gen(dev) == ILO_GEN(7.5) && dev->gt == 2)
- max_threads *= 2;
-
- dw2 = (true) ? 0 : GEN6_THREADDISP_FP_MODE_ALT;
- dw2 |= ((sampler_count + 3) / 4) << GEN6_THREADDISP_SAMPLER_COUNT__SHIFT;
-
- dw4 = start_grf << GEN6_VS_DW4_URB_GRF_START__SHIFT |
- vue_read_len << GEN6_VS_DW4_URB_READ_LEN__SHIFT |
- 0 << GEN6_VS_DW4_URB_READ_OFFSET__SHIFT;
-
- dw5 = GEN6_VS_DW5_STATISTICS |
- GEN6_VS_DW5_VS_ENABLE;
-
- if (ilo_dev_gen(dev) >= ILO_GEN(7.5))
- dw5 |= (max_threads - 1) << GEN75_VS_DW5_MAX_THREADS__SHIFT;
- else
- dw5 |= (max_threads - 1) << GEN6_VS_DW5_MAX_THREADS__SHIFT;
-
- STATIC_ASSERT(Elements(cso->payload) >= 3);
- cso->payload[0] = dw2;
- cso->payload[1] = dw4;
- cso->payload[2] = dw5;
-}
-
-static void
-gs_init_cso_gen6(const struct ilo_dev *dev,
- const struct ilo_shader_state *gs,
- struct ilo_shader_cso *cso)
-{
- int start_grf, vue_read_len, max_threads;
- uint32_t dw2, dw4, dw5, dw6;
-
- ILO_DEV_ASSERT(dev, 6, 6);
-
- if (ilo_shader_get_type(gs) == PIPE_SHADER_GEOMETRY) {
- start_grf = ilo_shader_get_kernel_param(gs,
- ILO_KERNEL_URB_DATA_START_REG);
-
- vue_read_len = ilo_shader_get_kernel_param(gs, ILO_KERNEL_INPUT_COUNT);
- }
- else {
- start_grf = ilo_shader_get_kernel_param(gs,
- ILO_KERNEL_VS_GEN6_SO_START_REG);
-
- vue_read_len = ilo_shader_get_kernel_param(gs, ILO_KERNEL_OUTPUT_COUNT);
- }
-
- /*
- * From the Sandy Bridge PRM, volume 2 part 1, page 153:
- *
- * "Specifies the amount of URB data read and passed in the thread
- * payload for each Vertex URB entry, in 256-bit register increments.
- *
- * It is UNDEFINED to set this field (Vertex URB Entry Read Length) to
- * 0 indicating no Vertex URB data to be read and passed to the
- * thread."
- */
- vue_read_len = (vue_read_len + 1) / 2;
- if (!vue_read_len)
- vue_read_len = 1;
-
- /*
- * From the Sandy Bridge PRM, volume 2 part 1, page 154:
- *
- * "Maximum Number of Threads valid range is [0,27] when Rendering
- * Enabled bit is set."
- *
- * From the Sandy Bridge PRM, volume 2 part 1, page 173:
- *
- * "Programming Note: If the GS stage is enabled, software must always
- * allocate at least one GS URB Entry. This is true even if the GS
- * thread never needs to output vertices to the pipeline, e.g., when
- * only performing stream output. This is an artifact of the need to
- * pass the GS thread an initial destination URB handle."
- *
- * As such, we always enable rendering, and limit the number of threads.
- */
- if (dev->gt == 2) {
- /* maximum is 60, but limited to 28 */
- max_threads = 28;
- }
- else {
- /* maximum is 24, but limited to 21 (see brwCreateContext()) */
- max_threads = 21;
- }
-
- dw2 = GEN6_THREADDISP_SPF;
-
- dw4 = vue_read_len << GEN6_GS_DW4_URB_READ_LEN__SHIFT |
- 0 << GEN6_GS_DW4_URB_READ_OFFSET__SHIFT |
- start_grf << GEN6_GS_DW4_URB_GRF_START__SHIFT;
-
- dw5 = (max_threads - 1) << GEN6_GS_DW5_MAX_THREADS__SHIFT |
- GEN6_GS_DW5_STATISTICS |
- GEN6_GS_DW5_SO_STATISTICS |
- GEN6_GS_DW5_RENDER_ENABLE;
-
- /*
- * we cannot make use of GEN6_GS_REORDER because it will reorder
- * triangle strips according to D3D rules (triangle 2N+1 uses vertices
- * (2N+1, 2N+3, 2N+2)), instead of GL rules (triangle 2N+1 uses vertices
- * (2N+2, 2N+1, 2N+3)).
- */
- dw6 = GEN6_GS_DW6_GS_ENABLE;
-
- if (ilo_shader_get_kernel_param(gs, ILO_KERNEL_GS_DISCARD_ADJACENCY))
- dw6 |= GEN6_GS_DW6_DISCARD_ADJACENCY;
-
- if (ilo_shader_get_kernel_param(gs, ILO_KERNEL_VS_GEN6_SO)) {
- const uint32_t svbi_post_inc =
- ilo_shader_get_kernel_param(gs, ILO_KERNEL_GS_GEN6_SVBI_POST_INC);
-
- dw6 |= GEN6_GS_DW6_SVBI_PAYLOAD_ENABLE;
- if (svbi_post_inc) {
- dw6 |= GEN6_GS_DW6_SVBI_POST_INC_ENABLE |
- svbi_post_inc << GEN6_GS_DW6_SVBI_POST_INC_VAL__SHIFT;
- }
- }
-
- STATIC_ASSERT(Elements(cso->payload) >= 4);
- cso->payload[0] = dw2;
- cso->payload[1] = dw4;
- cso->payload[2] = dw5;
- cso->payload[3] = dw6;
-}
-
-static void
-gs_init_cso_gen7(const struct ilo_dev *dev,
- const struct ilo_shader_state *gs,
- struct ilo_shader_cso *cso)
-{
- int start_grf, vue_read_len, sampler_count, max_threads;
- uint32_t dw2, dw4, dw5;
-
- ILO_DEV_ASSERT(dev, 7, 7.5);
-
- start_grf = ilo_shader_get_kernel_param(gs, ILO_KERNEL_URB_DATA_START_REG);
- vue_read_len = ilo_shader_get_kernel_param(gs, ILO_KERNEL_INPUT_COUNT);
- sampler_count = ilo_shader_get_kernel_param(gs, ILO_KERNEL_SAMPLER_COUNT);
-
- /* in pairs */
- vue_read_len = (vue_read_len + 1) / 2;
-
- switch (ilo_dev_gen(dev)) {
- case ILO_GEN(7.5):
- max_threads = (dev->gt >= 2) ? 256 : 70;
- break;
- case ILO_GEN(7):
- max_threads = (dev->gt == 2) ? 128 : 36;
- break;
- default:
- max_threads = 1;
- break;
- }
-
- dw2 = (true) ? 0 : GEN6_THREADDISP_FP_MODE_ALT;
- dw2 |= ((sampler_count + 3) / 4) << GEN6_THREADDISP_SAMPLER_COUNT__SHIFT;
-
- dw4 = vue_read_len << GEN7_GS_DW4_URB_READ_LEN__SHIFT |
- GEN7_GS_DW4_INCLUDE_VERTEX_HANDLES |
- 0 << GEN7_GS_DW4_URB_READ_OFFSET__SHIFT |
- start_grf << GEN7_GS_DW4_URB_GRF_START__SHIFT;
-
- dw5 = (max_threads - 1) << GEN7_GS_DW5_MAX_THREADS__SHIFT |
- GEN7_GS_DW5_STATISTICS |
- GEN7_GS_DW5_GS_ENABLE;
-
- STATIC_ASSERT(Elements(cso->payload) >= 3);
- cso->payload[0] = dw2;
- cso->payload[1] = dw4;
- cso->payload[2] = dw5;
-}
-
-void
-ilo_gpe_init_gs_cso(const struct ilo_dev *dev,
- const struct ilo_shader_state *gs,
- struct ilo_shader_cso *cso)
-{
- if (ilo_dev_gen(dev) >= ILO_GEN(7))
- gs_init_cso_gen7(dev, gs, cso);
- else
- gs_init_cso_gen6(dev, gs, cso);
-}
-
-static void
-view_init_null_gen6(const struct ilo_dev *dev,
- unsigned width, unsigned height,
- unsigned depth, unsigned level,
- struct ilo_view_surface *surf)
-{
- uint32_t *dw;
-
- ILO_DEV_ASSERT(dev, 6, 6);
-
- assert(width >= 1 && height >= 1 && depth >= 1);
-
- /*
- * From the Sandy Bridge PRM, volume 4 part 1, page 71:
- *
- * "A null surface will be used in instances where an actual surface is
- * not bound. When a write message is generated to a null surface, no
- * actual surface is written to. When a read message (including any
- * sampling engine message) is generated to a null surface, the result
- * is all zeros. Note that a null surface type is allowed to be used
- * with all messages, even if it is not specificially indicated as
- * supported. All of the remaining fields in surface state are ignored
- * for null surfaces, with the following exceptions:
- *
- * * [DevSNB+]: Width, Height, Depth, and LOD fields must match the
- * depth buffer's corresponding state for all render target
- * surfaces, including null.
- * * Surface Format must be R8G8B8A8_UNORM."
- *
- * From the Sandy Bridge PRM, volume 4 part 1, page 82:
- *
- * "If Surface Type is SURFTYPE_NULL, this field (Tiled Surface) must be
- * true"
- */
-
- STATIC_ASSERT(Elements(surf->payload) >= 6);
- dw = surf->payload;
-
- dw[0] = GEN6_SURFTYPE_NULL << GEN6_SURFACE_DW0_TYPE__SHIFT |
- GEN6_FORMAT_B8G8R8A8_UNORM << GEN6_SURFACE_DW0_FORMAT__SHIFT;
-
- dw[1] = 0;
-
- dw[2] = (height - 1) << GEN6_SURFACE_DW2_HEIGHT__SHIFT |
- (width - 1) << GEN6_SURFACE_DW2_WIDTH__SHIFT |
- level << GEN6_SURFACE_DW2_MIP_COUNT_LOD__SHIFT;
-
- dw[3] = (depth - 1) << GEN6_SURFACE_DW3_DEPTH__SHIFT |
- GEN6_TILING_X;
-
- dw[4] = 0;
- dw[5] = 0;
-}
-
-static void
-view_init_for_buffer_gen6(const struct ilo_dev *dev,
- const struct ilo_buffer *buf,
- unsigned offset, unsigned size,
- unsigned struct_size,
- enum pipe_format elem_format,
- bool is_rt, bool render_cache_rw,
- struct ilo_view_surface *surf)
-{
- const int elem_size = util_format_get_blocksize(elem_format);
- int width, height, depth, pitch;
- int surface_format, num_entries;
- uint32_t *dw;
-
- ILO_DEV_ASSERT(dev, 6, 6);
-
- /*
- * For SURFTYPE_BUFFER, a SURFACE_STATE specifies an element of a
- * structure in a buffer.
- */
-
- surface_format = ilo_format_translate_color(dev, elem_format);
-
- num_entries = size / struct_size;
- /* see if there is enough space to fit another element */
- if (size % struct_size >= elem_size)
- num_entries++;
-
- /*
- * From the Sandy Bridge PRM, volume 4 part 1, page 76:
- *
- * "For SURFTYPE_BUFFER render targets, this field (Surface Base
- * Address) specifies the base address of first element of the
- * surface. The surface is interpreted as a simple array of that
- * single element type. The address must be naturally-aligned to the
- * element size (e.g., a buffer containing R32G32B32A32_FLOAT elements
- * must be 16-byte aligned).
- *
- * For SURFTYPE_BUFFER non-rendertarget surfaces, this field specifies
- * the base address of the first element of the surface, computed in
- * software by adding the surface base address to the byte offset of
- * the element in the buffer."
- */
- if (is_rt)
- assert(offset % elem_size == 0);
-
- /*
- * From the Sandy Bridge PRM, volume 4 part 1, page 77:
- *
- * "For buffer surfaces, the number of entries in the buffer ranges
- * from 1 to 2^27."
- */
- assert(num_entries >= 1 && num_entries <= 1 << 27);
-
- /*
- * From the Sandy Bridge PRM, volume 4 part 1, page 81:
- *
- * "For surfaces of type SURFTYPE_BUFFER, this field (Surface Pitch)
- * indicates the size of the structure."
- */
- pitch = struct_size;
-
- pitch--;
- num_entries--;
- /* bits [6:0] */
- width = (num_entries & 0x0000007f);
- /* bits [19:7] */
- height = (num_entries & 0x000fff80) >> 7;
- /* bits [26:20] */
- depth = (num_entries & 0x07f00000) >> 20;
-
- STATIC_ASSERT(Elements(surf->payload) >= 6);
- dw = surf->payload;
-
- dw[0] = GEN6_SURFTYPE_BUFFER << GEN6_SURFACE_DW0_TYPE__SHIFT |
- surface_format << GEN6_SURFACE_DW0_FORMAT__SHIFT;
- if (render_cache_rw)
- dw[0] |= GEN6_SURFACE_DW0_RENDER_CACHE_RW;
-
- dw[1] = offset;
-
- dw[2] = height << GEN6_SURFACE_DW2_HEIGHT__SHIFT |
- width << GEN6_SURFACE_DW2_WIDTH__SHIFT;
-
- dw[3] = depth << GEN6_SURFACE_DW3_DEPTH__SHIFT |
- pitch << GEN6_SURFACE_DW3_PITCH__SHIFT;
-
- dw[4] = 0;
- dw[5] = 0;
-}
-
-static void
-view_init_for_image_gen6(const struct ilo_dev *dev,
- const struct ilo_image *img,
- enum pipe_texture_target target,
- enum pipe_format format,
- unsigned first_level,
- unsigned num_levels,
- unsigned first_layer,
- unsigned num_layers,
- bool is_rt,
- struct ilo_view_surface *surf)
-{
- int surface_type, surface_format;
- int width, height, depth, pitch, lod;
- uint32_t *dw;
-
- ILO_DEV_ASSERT(dev, 6, 6);
-
- surface_type = ilo_gpe_gen6_translate_texture(target);
- assert(surface_type != GEN6_SURFTYPE_BUFFER);
-
- if (format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT && img->separate_stencil)
- format = PIPE_FORMAT_Z32_FLOAT;
-
- if (is_rt)
- surface_format = ilo_format_translate_render(dev, format);
- else
- surface_format = ilo_format_translate_texture(dev, format);
- assert(surface_format >= 0);
-
- width = img->width0;
- height = img->height0;
- depth = (target == PIPE_TEXTURE_3D) ? img->depth0 : num_layers;
- pitch = img->bo_stride;
-
- if (surface_type == GEN6_SURFTYPE_CUBE) {
- /*
- * From the Sandy Bridge PRM, volume 4 part 1, page 81:
- *
- * "For SURFTYPE_CUBE: [DevSNB+]: for Sampling Engine Surfaces, the
- * range of this field (Depth) is [0,84], indicating the number of
- * cube array elements (equal to the number of underlying 2D array
- * elements divided by 6). For other surfaces, this field must be
- * zero."
- *
- * When is_rt is true, we treat the texture as a 2D one to avoid the
- * restriction.
- */
- if (is_rt) {
- surface_type = GEN6_SURFTYPE_2D;
- }
- else {
- assert(num_layers % 6 == 0);
- depth = num_layers / 6;
- }
- }
-
- /* sanity check the size */
- assert(width >= 1 && height >= 1 && depth >= 1 && pitch >= 1);
- switch (surface_type) {
- case GEN6_SURFTYPE_1D:
- assert(width <= 8192 && height == 1 && depth <= 512);
- assert(first_layer < 512 && num_layers <= 512);
- break;
- case GEN6_SURFTYPE_2D:
- assert(width <= 8192 && height <= 8192 && depth <= 512);
- assert(first_layer < 512 && num_layers <= 512);
- break;
- case GEN6_SURFTYPE_3D:
- assert(width <= 2048 && height <= 2048 && depth <= 2048);
- assert(first_layer < 2048 && num_layers <= 512);
- if (!is_rt)
- assert(first_layer == 0);
- break;
- case GEN6_SURFTYPE_CUBE:
- assert(width <= 8192 && height <= 8192 && depth <= 85);
- assert(width == height);
- assert(first_layer < 512 && num_layers <= 512);
- if (is_rt)
- assert(first_layer == 0);
- break;
- default:
- assert(!"unexpected surface type");
- break;
- }
-
- /* non-full array spacing is supported only on GEN7+ */
- assert(img->walk != ILO_IMAGE_WALK_LOD);
- /* non-interleaved samples are supported only on GEN7+ */
- if (img->sample_count > 1)
- assert(img->interleaved_samples);
-
- if (is_rt) {
- assert(num_levels == 1);
- lod = first_level;
- }
- else {
- lod = num_levels - 1;
- }
-
- /*
- * From the Sandy Bridge PRM, volume 4 part 1, page 76:
- *
- * "Linear render target surface base addresses must be element-size
- * aligned, for non-YUV surface formats, or a multiple of 2
- * element-sizes for YUV surface formats. Other linear surfaces have
- * no alignment requirements (byte alignment is sufficient.)"
- *
- * From the Sandy Bridge PRM, volume 4 part 1, page 81:
- *
- * "For linear render target surfaces, the pitch must be a multiple
- * of the element size for non-YUV surface formats. Pitch must be a
- * multiple of 2 * element size for YUV surface formats."
- *
- * From the Sandy Bridge PRM, volume 4 part 1, page 86:
- *
- * "For linear surfaces, this field (X Offset) must be zero"
- */
- if (img->tiling == GEN6_TILING_NONE) {
- if (is_rt) {
- const int elem_size = util_format_get_blocksize(format);
- assert(pitch % elem_size == 0);
- }
- }
-
- STATIC_ASSERT(Elements(surf->payload) >= 6);
- dw = surf->payload;
-
- dw[0] = surface_type << GEN6_SURFACE_DW0_TYPE__SHIFT |
- surface_format << GEN6_SURFACE_DW0_FORMAT__SHIFT |
- GEN6_SURFACE_DW0_MIPLAYOUT_BELOW;
-
- if (surface_type == GEN6_SURFTYPE_CUBE && !is_rt) {
- dw[0] |= 1 << 9 |
- GEN6_SURFACE_DW0_CUBE_FACE_ENABLES__MASK;
- }
-
- if (is_rt)
- dw[0] |= GEN6_SURFACE_DW0_RENDER_CACHE_RW;
-
- dw[1] = 0;
-
- dw[2] = (height - 1) << GEN6_SURFACE_DW2_HEIGHT__SHIFT |
- (width - 1) << GEN6_SURFACE_DW2_WIDTH__SHIFT |
- lod << GEN6_SURFACE_DW2_MIP_COUNT_LOD__SHIFT;
-
- assert(img->tiling != GEN8_TILING_W);
- dw[3] = (depth - 1) << GEN6_SURFACE_DW3_DEPTH__SHIFT |
- (pitch - 1) << GEN6_SURFACE_DW3_PITCH__SHIFT |
- img->tiling;
-
- dw[4] = first_level << GEN6_SURFACE_DW4_MIN_LOD__SHIFT |
- first_layer << 17 |
- (num_layers - 1) << 8 |
- ((img->sample_count > 1) ? GEN6_SURFACE_DW4_MULTISAMPLECOUNT_4 :
- GEN6_SURFACE_DW4_MULTISAMPLECOUNT_1);
-
- dw[5] = 0;
-
- assert(img->align_j == 2 || img->align_j == 4);
- if (img->align_j == 4)
- dw[5] |= GEN6_SURFACE_DW5_VALIGN_4;
-}
-
-static void
-view_init_null_gen7(const struct ilo_dev *dev,
- unsigned width, unsigned height,
- unsigned depth, unsigned level,
- struct ilo_view_surface *surf)
-{
- uint32_t *dw;
-
- ILO_DEV_ASSERT(dev, 7, 8);
-
- assert(width >= 1 && height >= 1 && depth >= 1);
-
- /*
- * From the Ivy Bridge PRM, volume 4 part 1, page 62:
- *
- * "A null surface is used in instances where an actual surface is not
- * bound. When a write message is generated to a null surface, no
- * actual surface is written to. When a read message (including any
- * sampling engine message) is generated to a null surface, the result
- * is all zeros. Note that a null surface type is allowed to be used
- * with all messages, even if it is not specificially indicated as
- * supported. All of the remaining fields in surface state are ignored
- * for null surfaces, with the following exceptions:
- *
- * * Width, Height, Depth, LOD, and Render Target View Extent fields
- * must match the depth buffer's corresponding state for all render
- * target surfaces, including null.
- * * All sampling engine and data port messages support null surfaces
- * with the above behavior, even if not mentioned as specifically
- * supported, except for the following:
- * * Data Port Media Block Read/Write messages.
- * * The Surface Type of a surface used as a render target (accessed
- * via the Data Port's Render Target Write message) must be the same
- * as the Surface Type of all other render targets and of the depth
- * buffer (defined in 3DSTATE_DEPTH_BUFFER), unless either the depth
- * buffer or render targets are SURFTYPE_NULL."
- *
- * From the Ivy Bridge PRM, volume 4 part 1, page 65:
- *
- * "If Surface Type is SURFTYPE_NULL, this field (Tiled Surface) must be
- * true"
- */
-
- STATIC_ASSERT(Elements(surf->payload) >= 13);
- dw = surf->payload;
-
- dw[0] = GEN6_SURFTYPE_NULL << GEN7_SURFACE_DW0_TYPE__SHIFT |
- GEN6_FORMAT_B8G8R8A8_UNORM << GEN7_SURFACE_DW0_FORMAT__SHIFT;
-
- if (ilo_dev_gen(dev) >= ILO_GEN(8))
- dw[0] |= GEN6_TILING_X << GEN8_SURFACE_DW0_TILING__SHIFT;
- else
- dw[0] |= GEN6_TILING_X << GEN7_SURFACE_DW0_TILING__SHIFT;
-
- dw[1] = 0;
-
- dw[2] = GEN_SHIFT32(height - 1, GEN7_SURFACE_DW2_HEIGHT) |
- GEN_SHIFT32(width - 1, GEN7_SURFACE_DW2_WIDTH);
-
- dw[3] = GEN_SHIFT32(depth - 1, GEN7_SURFACE_DW3_DEPTH);
-
- dw[4] = 0;
- dw[5] = level;
-
- dw[6] = 0;
- dw[7] = 0;
-
- if (ilo_dev_gen(dev) >= ILO_GEN(8))
- memset(&dw[8], 0, sizeof(*dw) * (13 - 8));
-}
-
-static void
-view_init_for_buffer_gen7(const struct ilo_dev *dev,
- const struct ilo_buffer *buf,
- unsigned offset, unsigned size,
- unsigned struct_size,
- enum pipe_format elem_format,
- bool is_rt, bool render_cache_rw,
- struct ilo_view_surface *surf)
-{
- const bool typed = (elem_format != PIPE_FORMAT_NONE);
- const bool structured = (!typed && struct_size > 1);
- const int elem_size = (typed) ?
- util_format_get_blocksize(elem_format) : 1;
- int width, height, depth, pitch;
- int surface_type, surface_format, num_entries;
- uint32_t *dw;
-
- ILO_DEV_ASSERT(dev, 7, 8);
-
- surface_type = (structured) ? GEN7_SURFTYPE_STRBUF : GEN6_SURFTYPE_BUFFER;
-
- surface_format = (typed) ?
- ilo_format_translate_color(dev, elem_format) : GEN6_FORMAT_RAW;
-
- num_entries = size / struct_size;
- /* see if there is enough space to fit another element */
- if (size % struct_size >= elem_size && !structured)
- num_entries++;
-
- /*
- * From the Ivy Bridge PRM, volume 4 part 1, page 67:
- *
- * "For SURFTYPE_BUFFER render targets, this field (Surface Base
- * Address) specifies the base address of first element of the
- * surface. The surface is interpreted as a simple array of that
- * single element type. The address must be naturally-aligned to the
- * element size (e.g., a buffer containing R32G32B32A32_FLOAT elements
- * must be 16-byte aligned)
- *
- * For SURFTYPE_BUFFER non-rendertarget surfaces, this field specifies
- * the base address of the first element of the surface, computed in
- * software by adding the surface base address to the byte offset of
- * the element in the buffer."
- */
- if (is_rt)
- assert(offset % elem_size == 0);
-
- /*
- * From the Ivy Bridge PRM, volume 4 part 1, page 68:
- *
- * "For typed buffer and structured buffer surfaces, the number of
- * entries in the buffer ranges from 1 to 2^27. For raw buffer
- * surfaces, the number of entries in the buffer is the number of
- * bytes which can range from 1 to 2^30."
- */
- assert(num_entries >= 1 &&
- num_entries <= 1 << ((typed || structured) ? 27 : 30));
-
- /*
- * From the Ivy Bridge PRM, volume 4 part 1, page 69:
- *
- * "For SURFTYPE_BUFFER: The low two bits of this field (Width) must be
- * 11 if the Surface Format is RAW (the size of the buffer must be a
- * multiple of 4 bytes)."
- *
- * From the Ivy Bridge PRM, volume 4 part 1, page 70:
- *
- * "For surfaces of type SURFTYPE_BUFFER and SURFTYPE_STRBUF, this
- * field (Surface Pitch) indicates the size of the structure."
- *
- * "For linear surfaces with Surface Type of SURFTYPE_STRBUF, the pitch
- * must be a multiple of 4 bytes."
- */
- if (structured)
- assert(struct_size % 4 == 0);
- else if (!typed)
- assert(num_entries % 4 == 0);
-
- pitch = struct_size;
-
- pitch--;
- num_entries--;
- /* bits [6:0] */
- width = (num_entries & 0x0000007f);
- /* bits [20:7] */
- height = (num_entries & 0x001fff80) >> 7;
- /* bits [30:21] */
- depth = (num_entries & 0x7fe00000) >> 21;
- /* limit to [26:21] */
- if (typed || structured)
- depth &= 0x3f;
-
- STATIC_ASSERT(Elements(surf->payload) >= 13);
- dw = surf->payload;
-
- dw[0] = surface_type << GEN7_SURFACE_DW0_TYPE__SHIFT |
- surface_format << GEN7_SURFACE_DW0_FORMAT__SHIFT;
- if (render_cache_rw)
- dw[0] |= GEN7_SURFACE_DW0_RENDER_CACHE_RW;
-
- if (ilo_dev_gen(dev) >= ILO_GEN(8)) {
- dw[8] = offset;
- memset(&dw[9], 0, sizeof(*dw) * (13 - 9));
- } else {
- dw[1] = offset;
- }
-
- dw[2] = GEN_SHIFT32(height, GEN7_SURFACE_DW2_HEIGHT) |
- GEN_SHIFT32(width, GEN7_SURFACE_DW2_WIDTH);
-
- dw[3] = GEN_SHIFT32(depth, GEN7_SURFACE_DW3_DEPTH) |
- pitch;
-
- dw[4] = 0;
- dw[5] = 0;
-
- dw[6] = 0;
- dw[7] = 0;
-
- if (ilo_dev_gen(dev) >= ILO_GEN(7.5)) {
- dw[7] |= GEN_SHIFT32(GEN75_SCS_RED, GEN75_SURFACE_DW7_SCS_R) |
- GEN_SHIFT32(GEN75_SCS_GREEN, GEN75_SURFACE_DW7_SCS_G) |
- GEN_SHIFT32(GEN75_SCS_BLUE, GEN75_SURFACE_DW7_SCS_B) |
- GEN_SHIFT32(GEN75_SCS_ALPHA, GEN75_SURFACE_DW7_SCS_A);
- }
-}
-
-static void
-view_init_for_image_gen7(const struct ilo_dev *dev,
- const struct ilo_image *img,
- enum pipe_texture_target target,
- enum pipe_format format,
- unsigned first_level,
- unsigned num_levels,
- unsigned first_layer,
- unsigned num_layers,
- bool is_rt,
- struct ilo_view_surface *surf)
-{
- int surface_type, surface_format;
- int width, height, depth, pitch, lod;
- uint32_t *dw;
-
- ILO_DEV_ASSERT(dev, 7, 8);
-
- surface_type = ilo_gpe_gen6_translate_texture(target);
- assert(surface_type != GEN6_SURFTYPE_BUFFER);
-
- if (format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT && img->separate_stencil)
- format = PIPE_FORMAT_Z32_FLOAT;
-
- if (is_rt)
- surface_format = ilo_format_translate_render(dev, format);
- else
- surface_format = ilo_format_translate_texture(dev, format);
- assert(surface_format >= 0);
-
- width = img->width0;
- height = img->height0;
- depth = (target == PIPE_TEXTURE_3D) ? img->depth0 : num_layers;
- pitch = img->bo_stride;
-
- if (surface_type == GEN6_SURFTYPE_CUBE) {
- /*
- * From the Ivy Bridge PRM, volume 4 part 1, page 70:
- *
- * "For SURFTYPE_CUBE:For Sampling Engine Surfaces, the range of
- * this field is [0,340], indicating the number of cube array
- * elements (equal to the number of underlying 2D array elements
- * divided by 6). For other surfaces, this field must be zero."
- *
- * When is_rt is true, we treat the texture as a 2D one to avoid the
- * restriction.
- */
- if (is_rt) {
- surface_type = GEN6_SURFTYPE_2D;
- }
- else {
- assert(num_layers % 6 == 0);
- depth = num_layers / 6;
- }
- }
-
- /* sanity check the size */
- assert(width >= 1 && height >= 1 && depth >= 1 && pitch >= 1);
- assert(first_layer < 2048 && num_layers <= 2048);
- switch (surface_type) {
- case GEN6_SURFTYPE_1D:
- assert(width <= 16384 && height == 1 && depth <= 2048);
- break;
- case GEN6_SURFTYPE_2D:
- assert(width <= 16384 && height <= 16384 && depth <= 2048);
- break;
- case GEN6_SURFTYPE_3D:
- assert(width <= 2048 && height <= 2048 && depth <= 2048);
- if (!is_rt)
- assert(first_layer == 0);
- break;
- case GEN6_SURFTYPE_CUBE:
- assert(width <= 16384 && height <= 16384 && depth <= 86);
- assert(width == height);
- if (is_rt)
- assert(first_layer == 0);
- break;
- default:
- assert(!"unexpected surface type");
- break;
- }
-
- if (is_rt) {
- assert(num_levels == 1);
- lod = first_level;
- }
- else {
- lod = num_levels - 1;
- }
-
- /*
- * From the Ivy Bridge PRM, volume 4 part 1, page 68:
- *
- * "The Base Address for linear render target surfaces and surfaces
- * accessed with the typed surface read/write data port messages must
- * be element-size aligned, for non-YUV surface formats, or a multiple
- * of 2 element-sizes for YUV surface formats. Other linear surfaces
- * have no alignment requirements (byte alignment is sufficient)."
- *
- * From the Ivy Bridge PRM, volume 4 part 1, page 70:
- *
- * "For linear render target surfaces and surfaces accessed with the
- * typed data port messages, the pitch must be a multiple of the
- * element size for non-YUV surface formats. Pitch must be a multiple
- * of 2 * element size for YUV surface formats. For linear surfaces
- * with Surface Type of SURFTYPE_STRBUF, the pitch must be a multiple
- * of 4 bytes.For other linear surfaces, the pitch can be any multiple
- * of bytes."
- *
- * From the Ivy Bridge PRM, volume 4 part 1, page 74:
- *
- * "For linear surfaces, this field (X Offset) must be zero."
- */
- if (img->tiling == GEN6_TILING_NONE) {
- if (is_rt) {
- const int elem_size = util_format_get_blocksize(format);
- assert(pitch % elem_size == 0);
- }
- }
-
- STATIC_ASSERT(Elements(surf->payload) >= 13);
- dw = surf->payload;
-
- dw[0] = surface_type << GEN7_SURFACE_DW0_TYPE__SHIFT |
- surface_format << GEN7_SURFACE_DW0_FORMAT__SHIFT;
-
- /*
- * From the Ivy Bridge PRM, volume 4 part 1, page 63:
- *
- * "If this field (Surface Array) is enabled, the Surface Type must be
- * SURFTYPE_1D, SURFTYPE_2D, or SURFTYPE_CUBE. If this field is
- * disabled and Surface Type is SURFTYPE_1D, SURFTYPE_2D, or
- * SURFTYPE_CUBE, the Depth field must be set to zero."
- *
- * For non-3D sampler surfaces, resinfo (the sampler message) always
- * returns zero for the number of layers when this field is not set.
- */
- if (surface_type != GEN6_SURFTYPE_3D) {
- switch (target) {
- case PIPE_TEXTURE_1D_ARRAY:
- case PIPE_TEXTURE_2D_ARRAY:
- case PIPE_TEXTURE_CUBE_ARRAY:
- dw[0] |= GEN7_SURFACE_DW0_IS_ARRAY;
- break;
- default:
- assert(depth == 1);
- break;
- }
- }
-
- if (ilo_dev_gen(dev) >= ILO_GEN(8)) {
- switch (img->align_j) {
- case 4:
- dw[0] |= GEN7_SURFACE_DW0_VALIGN_4;
- break;
- case 8:
- dw[0] |= GEN8_SURFACE_DW0_VALIGN_8;
- break;
- case 16:
- dw[0] |= GEN8_SURFACE_DW0_VALIGN_16;
- break;
- default:
- assert(!"unsupported valign");
- break;
- }
-
- switch (img->align_i) {
- case 4:
- dw[0] |= GEN8_SURFACE_DW0_HALIGN_4;
- break;
- case 8:
- dw[0] |= GEN8_SURFACE_DW0_HALIGN_8;
- break;
- case 16:
- dw[0] |= GEN8_SURFACE_DW0_HALIGN_16;
- break;
- default:
- assert(!"unsupported halign");
- break;
- }
-
- dw[0] |= img->tiling << GEN8_SURFACE_DW0_TILING__SHIFT;
- } else {
- assert(img->align_i == 4 || img->align_i == 8);
- assert(img->align_j == 2 || img->align_j == 4);
-
- if (img->align_j == 4)
- dw[0] |= GEN7_SURFACE_DW0_VALIGN_4;
-
- if (img->align_i == 8)
- dw[0] |= GEN7_SURFACE_DW0_HALIGN_8;
-
- assert(img->tiling != GEN8_TILING_W);
- dw[0] |= img->tiling << GEN7_SURFACE_DW0_TILING__SHIFT;
-
- if (img->walk == ILO_IMAGE_WALK_LOD)
- dw[0] |= GEN7_SURFACE_DW0_ARYSPC_LOD0;
- else
- dw[0] |= GEN7_SURFACE_DW0_ARYSPC_FULL;
- }
-
- if (is_rt)
- dw[0] |= GEN7_SURFACE_DW0_RENDER_CACHE_RW;
-
- if (surface_type == GEN6_SURFTYPE_CUBE && !is_rt)
- dw[0] |= GEN7_SURFACE_DW0_CUBE_FACE_ENABLES__MASK;
-
- if (ilo_dev_gen(dev) >= ILO_GEN(8)) {
- assert(img->walk_layer_height % 4 == 0);
- dw[1] = img->walk_layer_height / 4;
- } else {
- dw[1] = 0;
- }
-
- dw[2] = GEN_SHIFT32(height - 1, GEN7_SURFACE_DW2_HEIGHT) |
- GEN_SHIFT32(width - 1, GEN7_SURFACE_DW2_WIDTH);
-
- dw[3] = GEN_SHIFT32(depth - 1, GEN7_SURFACE_DW3_DEPTH) |
- (pitch - 1);
-
- dw[4] = first_layer << 18 |
- (num_layers - 1) << 7;
-
- /*
- * MSFMT_MSS means the samples are not interleaved and MSFMT_DEPTH_STENCIL
- * means the samples are interleaved. The layouts are the same when the
- * number of samples is 1.
- */
- if (img->interleaved_samples && img->sample_count > 1) {
- assert(!is_rt);
- dw[4] |= GEN7_SURFACE_DW4_MSFMT_DEPTH_STENCIL;
- }
- else {
- dw[4] |= GEN7_SURFACE_DW4_MSFMT_MSS;
- }
-
- switch (img->sample_count) {
- case 0:
- case 1:
- default:
- dw[4] |= GEN7_SURFACE_DW4_MULTISAMPLECOUNT_1;
- break;
- case 2:
- dw[4] |= GEN8_SURFACE_DW4_MULTISAMPLECOUNT_2;
- break;
- case 4:
- dw[4] |= GEN7_SURFACE_DW4_MULTISAMPLECOUNT_4;
- break;
- case 8:
- dw[4] |= GEN7_SURFACE_DW4_MULTISAMPLECOUNT_8;
- break;
- case 16:
- dw[4] |= GEN8_SURFACE_DW4_MULTISAMPLECOUNT_16;
- break;
- }
-
- dw[5] = GEN_SHIFT32(first_level, GEN7_SURFACE_DW5_MIN_LOD) |
- lod;
-
- dw[6] = 0;
- dw[7] = 0;
-
- if (ilo_dev_gen(dev) >= ILO_GEN(7.5)) {
- dw[7] |= GEN_SHIFT32(GEN75_SCS_RED, GEN75_SURFACE_DW7_SCS_R) |
- GEN_SHIFT32(GEN75_SCS_GREEN, GEN75_SURFACE_DW7_SCS_G) |
- GEN_SHIFT32(GEN75_SCS_BLUE, GEN75_SURFACE_DW7_SCS_B) |
- GEN_SHIFT32(GEN75_SCS_ALPHA, GEN75_SURFACE_DW7_SCS_A);
- }
-
- if (ilo_dev_gen(dev) >= ILO_GEN(8))
- memset(&dw[8], 0, sizeof(*dw) * (13 - 8));
-}
-
-void
-ilo_gpe_init_view_surface_null(const struct ilo_dev *dev,
- unsigned width, unsigned height,
- unsigned depth, unsigned level,
- struct ilo_view_surface *surf)
-{
- if (ilo_dev_gen(dev) >= ILO_GEN(7)) {
- view_init_null_gen7(dev,
- width, height, depth, level, surf);
- } else {
- view_init_null_gen6(dev,
- width, height, depth, level, surf);
- }
-
- surf->bo = NULL;
- surf->scanout = false;
-}
-
-void
-ilo_gpe_init_view_surface_for_buffer(const struct ilo_dev *dev,
- const struct ilo_buffer *buf,
- unsigned offset, unsigned size,
- unsigned struct_size,
- enum pipe_format elem_format,
- bool is_rt, bool render_cache_rw,
- struct ilo_view_surface *surf)
-{
- if (ilo_dev_gen(dev) >= ILO_GEN(7)) {
- view_init_for_buffer_gen7(dev, buf, offset, size,
- struct_size, elem_format, is_rt, render_cache_rw, surf);
- } else {
- view_init_for_buffer_gen6(dev, buf, offset, size,
- struct_size, elem_format, is_rt, render_cache_rw, surf);
- }
-
- /* do not increment reference count */
- surf->bo = buf->bo;
- surf->scanout = false;
-}
-
-void
-ilo_gpe_init_view_surface_for_image(const struct ilo_dev *dev,
- const struct ilo_image *img,
- enum pipe_texture_target target,
- enum pipe_format format,
- unsigned first_level,
- unsigned num_levels,
- unsigned first_layer,
- unsigned num_layers,
- bool is_rt,
- struct ilo_view_surface *surf)
-{
- if (ilo_dev_gen(dev) >= ILO_GEN(7)) {
- view_init_for_image_gen7(dev, img, target, format,
- first_level, num_levels, first_layer, num_layers,
- is_rt, surf);
- } else {
- view_init_for_image_gen6(dev, img, target, format,
- first_level, num_levels, first_layer, num_layers,
- is_rt, surf);
- }
-
- surf->scanout = img->scanout;
- /* do not increment reference count */
- surf->bo = img->bo;
-}
-
-static void
-sampler_init_border_color_gen6(const struct ilo_dev *dev,
- const union pipe_color_union *color,
- uint32_t *dw, int num_dwords)
-{
- float rgba[4] = {
- color->f[0], color->f[1], color->f[2], color->f[3],
- };
-
- ILO_DEV_ASSERT(dev, 6, 6);
-
- assert(num_dwords >= 12);
-
- /*
- * This state is not documented in the Sandy Bridge PRM, but in the
- * Ironlake PRM. SNORM8 seems to be in DW11 instead of DW1.
- */
-
- /* IEEE_FP */
- dw[1] = fui(rgba[0]);
- dw[2] = fui(rgba[1]);
- dw[3] = fui(rgba[2]);
- dw[4] = fui(rgba[3]);
-
- /* FLOAT_16 */
- dw[5] = util_float_to_half(rgba[0]) |
- util_float_to_half(rgba[1]) << 16;
- dw[6] = util_float_to_half(rgba[2]) |
- util_float_to_half(rgba[3]) << 16;
-
- /* clamp to [-1.0f, 1.0f] */
- rgba[0] = CLAMP(rgba[0], -1.0f, 1.0f);
- rgba[1] = CLAMP(rgba[1], -1.0f, 1.0f);
- rgba[2] = CLAMP(rgba[2], -1.0f, 1.0f);
- rgba[3] = CLAMP(rgba[3], -1.0f, 1.0f);
-
- /* SNORM16 */
- dw[9] = (int16_t) util_iround(rgba[0] * 32767.0f) |
- (int16_t) util_iround(rgba[1] * 32767.0f) << 16;
- dw[10] = (int16_t) util_iround(rgba[2] * 32767.0f) |
- (int16_t) util_iround(rgba[3] * 32767.0f) << 16;
-
- /* SNORM8 */
- dw[11] = (int8_t) util_iround(rgba[0] * 127.0f) |
- (int8_t) util_iround(rgba[1] * 127.0f) << 8 |
- (int8_t) util_iround(rgba[2] * 127.0f) << 16 |
- (int8_t) util_iround(rgba[3] * 127.0f) << 24;
-
- /* clamp to [0.0f, 1.0f] */
- rgba[0] = CLAMP(rgba[0], 0.0f, 1.0f);
- rgba[1] = CLAMP(rgba[1], 0.0f, 1.0f);
- rgba[2] = CLAMP(rgba[2], 0.0f, 1.0f);
- rgba[3] = CLAMP(rgba[3], 0.0f, 1.0f);
-
- /* UNORM8 */
- dw[0] = (uint8_t) util_iround(rgba[0] * 255.0f) |
- (uint8_t) util_iround(rgba[1] * 255.0f) << 8 |
- (uint8_t) util_iround(rgba[2] * 255.0f) << 16 |
- (uint8_t) util_iround(rgba[3] * 255.0f) << 24;
-
- /* UNORM16 */
- dw[7] = (uint16_t) util_iround(rgba[0] * 65535.0f) |
- (uint16_t) util_iround(rgba[1] * 65535.0f) << 16;
- dw[8] = (uint16_t) util_iround(rgba[2] * 65535.0f) |
- (uint16_t) util_iround(rgba[3] * 65535.0f) << 16;
-}
-
-/**
- * Translate a pipe texture mipfilter to the matching hardware mipfilter.
- */
-static int
-gen6_translate_tex_mipfilter(unsigned filter)
-{
- switch (filter) {
- case PIPE_TEX_MIPFILTER_NEAREST: return GEN6_MIPFILTER_NEAREST;
- case PIPE_TEX_MIPFILTER_LINEAR: return GEN6_MIPFILTER_LINEAR;
- case PIPE_TEX_MIPFILTER_NONE: return GEN6_MIPFILTER_NONE;
- default:
- assert(!"unknown mipfilter");
- return GEN6_MIPFILTER_NONE;
- }
-}
-
-/**
- * Translate a pipe texture filter to the matching hardware mapfilter.
- */
-static int
-gen6_translate_tex_filter(unsigned filter)
-{
- switch (filter) {
- case PIPE_TEX_FILTER_NEAREST: return GEN6_MAPFILTER_NEAREST;
- case PIPE_TEX_FILTER_LINEAR: return GEN6_MAPFILTER_LINEAR;
- default:
- assert(!"unknown sampler filter");
- return GEN6_MAPFILTER_NEAREST;
- }
-}
-
-/**
- * Translate a pipe texture coordinate wrapping mode to the matching hardware
- * wrapping mode.
- */
-static int
-gen6_translate_tex_wrap(unsigned wrap)
-{
- switch (wrap) {
- case PIPE_TEX_WRAP_CLAMP: return GEN8_TEXCOORDMODE_HALF_BORDER;
- case PIPE_TEX_WRAP_REPEAT: return GEN6_TEXCOORDMODE_WRAP;
- case PIPE_TEX_WRAP_CLAMP_TO_EDGE: return GEN6_TEXCOORDMODE_CLAMP;
- case PIPE_TEX_WRAP_CLAMP_TO_BORDER: return GEN6_TEXCOORDMODE_CLAMP_BORDER;
- case PIPE_TEX_WRAP_MIRROR_REPEAT: return GEN6_TEXCOORDMODE_MIRROR;
- case PIPE_TEX_WRAP_MIRROR_CLAMP:
- case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
- case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
- default:
- assert(!"unknown sampler wrap mode");
- return GEN6_TEXCOORDMODE_WRAP;
- }
-}
-
-/**
- * Translate a pipe shadow compare function to the matching hardware shadow
- * function.
- */
-static int
-gen6_translate_shadow_func(unsigned func)
-{
- /*
- * For PIPE_FUNC_x, the reference value is on the left-hand side of the
- * comparison, and 1.0 is returned when the comparison is true.
- *
- * For GEN6_COMPAREFUNCTION_x, the reference value is on the right-hand side of
- * the comparison, and 0.0 is returned when the comparison is true.
- */
- switch (func) {
- case PIPE_FUNC_NEVER: return GEN6_COMPAREFUNCTION_ALWAYS;
- case PIPE_FUNC_LESS: return GEN6_COMPAREFUNCTION_LEQUAL;
- case PIPE_FUNC_EQUAL: return GEN6_COMPAREFUNCTION_NOTEQUAL;
- case PIPE_FUNC_LEQUAL: return GEN6_COMPAREFUNCTION_LESS;
- case PIPE_FUNC_GREATER: return GEN6_COMPAREFUNCTION_GEQUAL;
- case PIPE_FUNC_NOTEQUAL: return GEN6_COMPAREFUNCTION_EQUAL;
- case PIPE_FUNC_GEQUAL: return GEN6_COMPAREFUNCTION_GREATER;
- case PIPE_FUNC_ALWAYS: return GEN6_COMPAREFUNCTION_NEVER;
- default:
- assert(!"unknown shadow compare function");
- return GEN6_COMPAREFUNCTION_NEVER;
- }
-}
-
-void
-ilo_gpe_init_sampler_cso(const struct ilo_dev *dev,
- const struct pipe_sampler_state *state,
- struct ilo_sampler_cso *sampler)
-{
- int mip_filter, min_filter, mag_filter, max_aniso;
- int lod_bias, max_lod, min_lod;
- int wrap_s, wrap_t, wrap_r, wrap_cube;
- uint32_t dw0, dw1, dw3;
-
- ILO_DEV_ASSERT(dev, 6, 8);
-
- memset(sampler, 0, sizeof(*sampler));
-
- mip_filter = gen6_translate_tex_mipfilter(state->min_mip_filter);
- min_filter = gen6_translate_tex_filter(state->min_img_filter);
- mag_filter = gen6_translate_tex_filter(state->mag_img_filter);
-
- sampler->anisotropic = state->max_anisotropy;
-
- if (state->max_anisotropy >= 2 && state->max_anisotropy <= 16)
- max_aniso = state->max_anisotropy / 2 - 1;
- else if (state->max_anisotropy > 16)
- max_aniso = GEN6_ANISORATIO_16;
- else
- max_aniso = GEN6_ANISORATIO_2;
-
- /*
- *
- * Here is how the hardware calculate per-pixel LOD, from my reading of the
- * PRMs:
- *
- * 1) LOD is set to log2(ratio of texels to pixels) if not specified in
- * other ways. The number of texels is measured using level
- * SurfMinLod.
- * 2) Bias is added to LOD.
- * 3) LOD is clamped to [MinLod, MaxLod], and the clamped value is
- * compared with Base to determine whether magnification or
- * minification is needed. (if preclamp is disabled, LOD is compared
- * with Base before clamping)
- * 4) If magnification is needed, or no mipmapping is requested, LOD is
- * set to floor(MinLod).
- * 5) LOD is clamped to [0, MIPCnt], and SurfMinLod is added to LOD.
- *
- * With Gallium interface, Base is always zero and
- * pipe_sampler_view::u.tex.first_level specifies SurfMinLod.
- */
- if (ilo_dev_gen(dev) >= ILO_GEN(7)) {
- const float scale = 256.0f;
-
- /* [-16.0, 16.0) in S4.8 */
- lod_bias = (int)
- (CLAMP(state->lod_bias, -16.0f, 15.9f) * scale);
- lod_bias &= 0x1fff;
-
- /* [0.0, 14.0] in U4.8 */
- max_lod = (int) (CLAMP(state->max_lod, 0.0f, 14.0f) * scale);
- min_lod = (int) (CLAMP(state->min_lod, 0.0f, 14.0f) * scale);
- }
- else {
- const float scale = 64.0f;
-
- /* [-16.0, 16.0) in S4.6 */
- lod_bias = (int)
- (CLAMP(state->lod_bias, -16.0f, 15.9f) * scale);
- lod_bias &= 0x7ff;
-
- /* [0.0, 13.0] in U4.6 */
- max_lod = (int) (CLAMP(state->max_lod, 0.0f, 13.0f) * scale);
- min_lod = (int) (CLAMP(state->min_lod, 0.0f, 13.0f) * scale);
- }
-
- /*
- * We want LOD to be clamped to determine magnification/minification, and
- * get set to zero when it is magnification or when mipmapping is disabled.
- * The hardware would set LOD to floor(MinLod) and that is a problem when
- * MinLod is greater than or equal to 1.0f.
- *
- * With Base being zero, it is always minification when MinLod is non-zero.
- * To achieve our goal, we just need to set MinLod to zero and set
- * MagFilter to MinFilter when mipmapping is disabled.
- */
- if (state->min_mip_filter == PIPE_TEX_MIPFILTER_NONE && min_lod) {
- min_lod = 0;
- mag_filter = min_filter;
- }
-
- /* determine wrap s/t/r */
- wrap_s = gen6_translate_tex_wrap(state->wrap_s);
- wrap_t = gen6_translate_tex_wrap(state->wrap_t);
- wrap_r = gen6_translate_tex_wrap(state->wrap_r);
- if (ilo_dev_gen(dev) < ILO_GEN(8)) {
- /*
- * For nearest filtering, PIPE_TEX_WRAP_CLAMP means
- * PIPE_TEX_WRAP_CLAMP_TO_EDGE; for linear filtering,
- * PIPE_TEX_WRAP_CLAMP means PIPE_TEX_WRAP_CLAMP_TO_BORDER while
- * additionally clamping the texture coordinates to [0.0, 1.0].
- *
- * PIPE_TEX_WRAP_CLAMP is not supported natively until Gen8. The
- * clamping has to be taken care of in the shaders. There are two
- * filters here, but let the minification one has a say.
- */
- const bool clamp_is_to_edge =
- (state->min_img_filter == PIPE_TEX_FILTER_NEAREST);
-
- if (clamp_is_to_edge) {
- if (wrap_s == GEN8_TEXCOORDMODE_HALF_BORDER)
- wrap_s = GEN6_TEXCOORDMODE_CLAMP;
- if (wrap_t == GEN8_TEXCOORDMODE_HALF_BORDER)
- wrap_t = GEN6_TEXCOORDMODE_CLAMP;
- if (wrap_r == GEN8_TEXCOORDMODE_HALF_BORDER)
- wrap_r = GEN6_TEXCOORDMODE_CLAMP;
- } else {
- if (wrap_s == GEN8_TEXCOORDMODE_HALF_BORDER) {
- wrap_s = GEN6_TEXCOORDMODE_CLAMP_BORDER;
- sampler->saturate_s = true;
- }
- if (wrap_t == GEN8_TEXCOORDMODE_HALF_BORDER) {
- wrap_t = GEN6_TEXCOORDMODE_CLAMP_BORDER;
- sampler->saturate_t = true;
- }
- if (wrap_r == GEN8_TEXCOORDMODE_HALF_BORDER) {
- wrap_r = GEN6_TEXCOORDMODE_CLAMP_BORDER;
- sampler->saturate_r = true;
- }
- }
- }
-
- /*
- * From the Sandy Bridge PRM, volume 4 part 1, page 107:
- *
- * "When using cube map texture coordinates, only TEXCOORDMODE_CLAMP
- * and TEXCOORDMODE_CUBE settings are valid, and each TC component
- * must have the same Address Control mode."
- *
- * From the Ivy Bridge PRM, volume 4 part 1, page 96:
- *
- * "This field (Cube Surface Control Mode) must be set to
- * CUBECTRLMODE_PROGRAMMED"
- *
- * Therefore, we cannot use "Cube Surface Control Mode" for semless cube
- * map filtering.
- */
- if (state->seamless_cube_map &&
- (state->min_img_filter != PIPE_TEX_FILTER_NEAREST ||
- state->mag_img_filter != PIPE_TEX_FILTER_NEAREST)) {
- wrap_cube = GEN6_TEXCOORDMODE_CUBE;
- }
- else {
- wrap_cube = GEN6_TEXCOORDMODE_CLAMP;
- }
-
- if (!state->normalized_coords) {
- /*
- * From the Ivy Bridge PRM, volume 4 part 1, page 98:
- *
- * "The following state must be set as indicated if this field
- * (Non-normalized Coordinate Enable) is enabled:
- *
- * - TCX/Y/Z Address Control Mode must be TEXCOORDMODE_CLAMP,
- * TEXCOORDMODE_HALF_BORDER, or TEXCOORDMODE_CLAMP_BORDER.
- * - Surface Type must be SURFTYPE_2D or SURFTYPE_3D.
- * - Mag Mode Filter must be MAPFILTER_NEAREST or
- * MAPFILTER_LINEAR.
- * - Min Mode Filter must be MAPFILTER_NEAREST or
- * MAPFILTER_LINEAR.
- * - Mip Mode Filter must be MIPFILTER_NONE.
- * - Min LOD must be 0.
- * - Max LOD must be 0.
- * - MIP Count must be 0.
- * - Surface Min LOD must be 0.
- * - Texture LOD Bias must be 0."
- */
- assert(wrap_s == GEN6_TEXCOORDMODE_CLAMP ||
- wrap_s == GEN6_TEXCOORDMODE_CLAMP_BORDER);
- assert(wrap_t == GEN6_TEXCOORDMODE_CLAMP ||
- wrap_t == GEN6_TEXCOORDMODE_CLAMP_BORDER);
- assert(wrap_r == GEN6_TEXCOORDMODE_CLAMP ||
- wrap_r == GEN6_TEXCOORDMODE_CLAMP_BORDER);
-
- assert(mag_filter == GEN6_MAPFILTER_NEAREST ||
- mag_filter == GEN6_MAPFILTER_LINEAR);
- assert(min_filter == GEN6_MAPFILTER_NEAREST ||
- min_filter == GEN6_MAPFILTER_LINEAR);
-
- /* work around a bug in util_blitter */
- mip_filter = GEN6_MIPFILTER_NONE;
-
- assert(mip_filter == GEN6_MIPFILTER_NONE);
- }
-
- if (ilo_dev_gen(dev) >= ILO_GEN(7)) {
- dw0 = 1 << 28 |
- mip_filter << 20 |
- lod_bias << 1;
-
- sampler->dw_filter = mag_filter << 17 |
- min_filter << 14;
-
- sampler->dw_filter_aniso = GEN6_MAPFILTER_ANISOTROPIC << 17 |
- GEN6_MAPFILTER_ANISOTROPIC << 14 |
- 1;
-
- dw1 = min_lod << 20 |
- max_lod << 8;
-
- if (state->compare_mode != PIPE_TEX_COMPARE_NONE)
- dw1 |= gen6_translate_shadow_func(state->compare_func) << 1;
-
- dw3 = max_aniso << 19;
-
- /* round the coordinates for linear filtering */
- if (min_filter != GEN6_MAPFILTER_NEAREST) {
- dw3 |= (GEN6_SAMPLER_DW3_U_MIN_ROUND |
- GEN6_SAMPLER_DW3_V_MIN_ROUND |
- GEN6_SAMPLER_DW3_R_MIN_ROUND);
- }
- if (mag_filter != GEN6_MAPFILTER_NEAREST) {
- dw3 |= (GEN6_SAMPLER_DW3_U_MAG_ROUND |
- GEN6_SAMPLER_DW3_V_MAG_ROUND |
- GEN6_SAMPLER_DW3_R_MAG_ROUND);
- }
-
- if (!state->normalized_coords)
- dw3 |= 1 << 10;
-
- sampler->dw_wrap = wrap_s << 6 |
- wrap_t << 3 |
- wrap_r;
-
- /*
- * As noted in the classic i965 driver, the HW may still reference
- * wrap_t and wrap_r for 1D textures. We need to set them to a safe
- * mode
- */
- sampler->dw_wrap_1d = wrap_s << 6 |
- GEN6_TEXCOORDMODE_WRAP << 3 |
- GEN6_TEXCOORDMODE_WRAP;
-
- sampler->dw_wrap_cube = wrap_cube << 6 |
- wrap_cube << 3 |
- wrap_cube;
-
- STATIC_ASSERT(Elements(sampler->payload) >= 7);
-
- sampler->payload[0] = dw0;
- sampler->payload[1] = dw1;
- sampler->payload[2] = dw3;
-
- memcpy(&sampler->payload[3],
- state->border_color.ui, sizeof(state->border_color.ui));
- }
- else {
- dw0 = 1 << 28 |
- mip_filter << 20 |
- lod_bias << 3;
-
- if (state->compare_mode != PIPE_TEX_COMPARE_NONE)
- dw0 |= gen6_translate_shadow_func(state->compare_func);
-
- sampler->dw_filter = (min_filter != mag_filter) << 27 |
- mag_filter << 17 |
- min_filter << 14;
-
- sampler->dw_filter_aniso = GEN6_MAPFILTER_ANISOTROPIC << 17 |
- GEN6_MAPFILTER_ANISOTROPIC << 14;
-
- dw1 = min_lod << 22 |
- max_lod << 12;
-
- sampler->dw_wrap = wrap_s << 6 |
- wrap_t << 3 |
- wrap_r;
-
- sampler->dw_wrap_1d = wrap_s << 6 |
- GEN6_TEXCOORDMODE_WRAP << 3 |
- GEN6_TEXCOORDMODE_WRAP;
-
- sampler->dw_wrap_cube = wrap_cube << 6 |
- wrap_cube << 3 |
- wrap_cube;
-
- dw3 = max_aniso << 19;
-
- /* round the coordinates for linear filtering */
- if (min_filter != GEN6_MAPFILTER_NEAREST) {
- dw3 |= (GEN6_SAMPLER_DW3_U_MIN_ROUND |
- GEN6_SAMPLER_DW3_V_MIN_ROUND |
- GEN6_SAMPLER_DW3_R_MIN_ROUND);
- }
- if (mag_filter != GEN6_MAPFILTER_NEAREST) {
- dw3 |= (GEN6_SAMPLER_DW3_U_MAG_ROUND |
- GEN6_SAMPLER_DW3_V_MAG_ROUND |
- GEN6_SAMPLER_DW3_R_MAG_ROUND);
- }
-
- if (!state->normalized_coords)
- dw3 |= 1;
-
- STATIC_ASSERT(Elements(sampler->payload) >= 15);
-
- sampler->payload[0] = dw0;
- sampler->payload[1] = dw1;
- sampler->payload[2] = dw3;
-
- sampler_init_border_color_gen6(dev,
- &state->border_color, &sampler->payload[3], 12);
- }
-}
diff --git a/src/gallium/drivers/ilo/core/ilo_state_cc.c b/src/gallium/drivers/ilo/core/ilo_state_cc.c
new file mode 100644
index 00000000000..83ee8de979c
--- /dev/null
+++ b/src/gallium/drivers/ilo/core/ilo_state_cc.c
@@ -0,0 +1,890 @@
+/*
+ * Mesa 3-D graphics library
+ *
+ * Copyright (C) 2012-2015 LunarG, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ * Chia-I Wu <[email protected]>
+ */
+
+#include "ilo_debug.h"
+#include "ilo_state_cc.h"
+
+static bool
+cc_validate_gen6_stencil(const struct ilo_dev *dev,
+ const struct ilo_state_cc_info *info)
+{
+ const struct ilo_state_cc_stencil_info *stencil = &info->stencil;
+
+ ILO_DEV_ASSERT(dev, 6, 8);
+
+ /*
+ * From the Sandy Bridge PRM, volume 2 part 1, page 359:
+ *
+ * "If the Depth Buffer is either undefined or does not have a surface
+ * format of D32_FLOAT_S8X24_UINT or D24_UNORM_S8_UINT and separate
+ * stencil buffer is disabled, Stencil Test Enable must be DISABLED"
+ *
+ * From the Sandy Bridge PRM, volume 2 part 1, page 370:
+ *
+ * "This field (Stencil Test Enable) cannot be enabled if Surface
+ * Format in 3DSTATE_DEPTH_BUFFER is set to D16_UNORM."
+ */
+ if (stencil->test_enable)
+ assert(stencil->cv_has_buffer);
+
+ return true;
+}
+
+static bool
+cc_validate_gen6_depth(const struct ilo_dev *dev,
+ const struct ilo_state_cc_info *info)
+{
+ const struct ilo_state_cc_depth_info *depth = &info->depth;
+
+ ILO_DEV_ASSERT(dev, 6, 8);
+
+ /*
+ * From the Sandy Bridge PRM, volume 2 part 1, page 360:
+ *
+ * "Enabling the Depth Test function without defining a Depth Buffer is
+ * UNDEFINED."
+ *
+ * From the Sandy Bridge PRM, volume 2 part 1, page 375:
+ *
+ * "A Depth Buffer must be defined before enabling writes to it, or
+ * operation is UNDEFINED."
+ */
+ if (depth->test_enable || depth->write_enable)
+ assert(depth->cv_has_buffer);
+
+ return true;
+}
+
+static bool
+cc_set_gen6_DEPTH_STENCIL_STATE(struct ilo_state_cc *cc,
+ const struct ilo_dev *dev,
+ const struct ilo_state_cc_info *info)
+{
+ const struct ilo_state_cc_stencil_info *stencil = &info->stencil;
+ const struct ilo_state_cc_depth_info *depth = &info->depth;
+ const struct ilo_state_cc_params_info *params = &info->params;
+ uint32_t dw0, dw1, dw2;
+
+ ILO_DEV_ASSERT(dev, 6, 7.5);
+
+ if (!cc_validate_gen6_stencil(dev, info) ||
+ !cc_validate_gen6_depth(dev, info))
+ return false;
+
+ dw0 = 0;
+ dw1 = 0;
+ if (stencil->test_enable) {
+ const struct ilo_state_cc_stencil_op_info *front = &stencil->front;
+ const struct ilo_state_cc_stencil_params_info *front_p =
+ &params->stencil_front;
+ const struct ilo_state_cc_stencil_op_info *back;
+ const struct ilo_state_cc_stencil_params_info *back_p;
+
+ dw0 |= GEN6_ZS_DW0_STENCIL_TEST_ENABLE;
+
+ if (stencil->twosided_enable) {
+ dw0 |= GEN6_ZS_DW0_STENCIL1_ENABLE;
+
+ back = &stencil->back;
+ back_p = &params->stencil_back;
+ } else {
+ back = &stencil->front;
+ back_p = &params->stencil_front;
+ }
+
+ dw0 |= front->test_func << GEN6_ZS_DW0_STENCIL_FUNC__SHIFT |
+ front->fail_op << GEN6_ZS_DW0_STENCIL_FAIL_OP__SHIFT |
+ front->zfail_op << GEN6_ZS_DW0_STENCIL_ZFAIL_OP__SHIFT |
+ front->zpass_op << GEN6_ZS_DW0_STENCIL_ZPASS_OP__SHIFT |
+ back->test_func << GEN6_ZS_DW0_STENCIL1_FUNC__SHIFT |
+ back->fail_op << GEN6_ZS_DW0_STENCIL1_FAIL_OP__SHIFT |
+ back->zfail_op << GEN6_ZS_DW0_STENCIL1_ZFAIL_OP__SHIFT |
+ back->zpass_op << GEN6_ZS_DW0_STENCIL1_ZPASS_OP__SHIFT;
+
+ /*
+ * From the Ivy Bridge PRM, volume 2 part 1, page 363:
+ *
+ * "If this field (Stencil Buffer Write Enable) is enabled, Stencil
+ * Test Enable must also be enabled."
+ *
+ * This is different from depth write enable, which is independent from
+ * depth test enable.
+ */
+ if (front_p->write_mask || back_p->write_mask)
+ dw0 |= GEN6_ZS_DW0_STENCIL_WRITE_ENABLE;
+
+ dw1 |= front_p->test_mask << GEN6_ZS_DW1_STENCIL_TEST_MASK__SHIFT |
+ front_p->write_mask << GEN6_ZS_DW1_STENCIL_WRITE_MASK__SHIFT |
+ back_p->test_mask << GEN6_ZS_DW1_STENCIL1_TEST_MASK__SHIFT |
+ back_p->write_mask << GEN6_ZS_DW1_STENCIL1_WRITE_MASK__SHIFT;
+ }
+
+ dw2 = 0;
+ if (depth->test_enable) {
+ dw2 |= GEN6_ZS_DW2_DEPTH_TEST_ENABLE |
+ depth->test_func << GEN6_ZS_DW2_DEPTH_FUNC__SHIFT;
+ } else {
+ dw2 |= GEN6_COMPAREFUNCTION_ALWAYS << GEN6_ZS_DW2_DEPTH_FUNC__SHIFT;
+ }
+
+ /* independent from depth->test_enable */
+ if (depth->write_enable)
+ dw2 |= GEN6_ZS_DW2_DEPTH_WRITE_ENABLE;
+
+ STATIC_ASSERT(ARRAY_SIZE(cc->ds) >= 3);
+ cc->ds[0] = dw0;
+ cc->ds[1] = dw1;
+ cc->ds[2] = dw2;
+
+ return true;
+}
+
+static bool
+cc_set_gen8_3DSTATE_WM_DEPTH_STENCIL(struct ilo_state_cc *cc,
+ const struct ilo_dev *dev,
+ const struct ilo_state_cc_info *info)
+{
+ const struct ilo_state_cc_stencil_info *stencil = &info->stencil;
+ const struct ilo_state_cc_depth_info *depth = &info->depth;
+ const struct ilo_state_cc_params_info *params = &info->params;
+ uint32_t dw1, dw2;
+
+ ILO_DEV_ASSERT(dev, 8, 8);
+
+ if (!cc_validate_gen6_stencil(dev, info) ||
+ !cc_validate_gen6_depth(dev, info))
+ return false;
+
+ dw1 = 0;
+ dw2 = 0;
+ if (stencil->test_enable) {
+ const struct ilo_state_cc_stencil_op_info *front = &stencil->front;
+ const struct ilo_state_cc_stencil_params_info *front_p =
+ &params->stencil_front;
+ const struct ilo_state_cc_stencil_op_info *back;
+ const struct ilo_state_cc_stencil_params_info *back_p;
+
+ dw1 |= GEN8_ZS_DW1_STENCIL_TEST_ENABLE;
+
+ if (stencil->twosided_enable) {
+ dw1 |= GEN8_ZS_DW1_STENCIL1_ENABLE;
+
+ back = &stencil->back;
+ back_p = &params->stencil_back;
+ } else {
+ back = &stencil->front;
+ back_p = &params->stencil_front;
+ }
+
+ dw1 |= front->fail_op << GEN8_ZS_DW1_STENCIL_FAIL_OP__SHIFT |
+ front->zfail_op << GEN8_ZS_DW1_STENCIL_ZFAIL_OP__SHIFT |
+ front->zpass_op << GEN8_ZS_DW1_STENCIL_ZPASS_OP__SHIFT |
+ back->test_func << GEN8_ZS_DW1_STENCIL1_FUNC__SHIFT |
+ back->fail_op << GEN8_ZS_DW1_STENCIL1_FAIL_OP__SHIFT |
+ back->zfail_op << GEN8_ZS_DW1_STENCIL1_ZFAIL_OP__SHIFT |
+ back->zpass_op << GEN8_ZS_DW1_STENCIL1_ZPASS_OP__SHIFT |
+ front->test_func << GEN8_ZS_DW1_STENCIL_FUNC__SHIFT;
+
+ if (front_p->write_mask || back_p->write_mask)
+ dw1 |= GEN8_ZS_DW1_STENCIL_WRITE_ENABLE;
+
+ dw2 |= front_p->test_mask << GEN8_ZS_DW2_STENCIL_TEST_MASK__SHIFT |
+ front_p->write_mask << GEN8_ZS_DW2_STENCIL_WRITE_MASK__SHIFT |
+ back_p->test_mask << GEN8_ZS_DW2_STENCIL1_TEST_MASK__SHIFT |
+ back_p->write_mask << GEN8_ZS_DW2_STENCIL1_WRITE_MASK__SHIFT;
+ }
+
+ if (depth->test_enable) {
+ dw1 |= GEN8_ZS_DW1_DEPTH_TEST_ENABLE |
+ depth->test_func << GEN8_ZS_DW1_DEPTH_FUNC__SHIFT;
+ } else {
+ dw1 |= GEN6_COMPAREFUNCTION_ALWAYS << GEN8_ZS_DW1_DEPTH_FUNC__SHIFT;
+ }
+
+ if (depth->write_enable)
+ dw1 |= GEN8_ZS_DW1_DEPTH_WRITE_ENABLE;
+
+ STATIC_ASSERT(ARRAY_SIZE(cc->ds) >= 2);
+ cc->ds[0] = dw1;
+ cc->ds[1] = dw2;
+
+ return true;
+}
+
+static bool
+is_dual_source_blend_factor(enum gen_blend_factor factor)
+{
+ switch (factor) {
+ case GEN6_BLENDFACTOR_SRC1_COLOR:
+ case GEN6_BLENDFACTOR_SRC1_ALPHA:
+ case GEN6_BLENDFACTOR_INV_SRC1_COLOR:
+ case GEN6_BLENDFACTOR_INV_SRC1_ALPHA:
+ return true;
+ default:
+ return false;
+ }
+}
+
+static bool
+cc_get_gen6_dual_source_blending(const struct ilo_dev *dev,
+ const struct ilo_state_cc_info *info)
+{
+ const struct ilo_state_cc_blend_info *blend = &info->blend;
+ bool dual_source_blending;
+ uint8_t i;
+
+ ILO_DEV_ASSERT(dev, 6, 8);
+
+ dual_source_blending = (blend->rt_count &&
+ (is_dual_source_blend_factor(blend->rt[0].rgb_src) ||
+ is_dual_source_blend_factor(blend->rt[0].rgb_dst) ||
+ is_dual_source_blend_factor(blend->rt[0].a_src) ||
+ is_dual_source_blend_factor(blend->rt[0].a_dst)));
+
+ /*
+ * From the Ivy Bridge PRM, volume 2 part 1, page 356:
+ *
+ * "Dual Source Blending: When using "Dual Source" Render Target
+ * Write messages, the Source1 pixel color+alpha passed in the
+ * message can be selected as a src/dst blend factor. See Color
+ * Buffer Blending. In single-source mode, those blend factor
+ * selections are invalid. If SRC1 is included in a src/dst blend
+ * factor and a DualSource RT Write message is not utilized,
+ * results are UNDEFINED. (This reflects the same restriction in DX
+ * APIs, where undefined results are produced if "o1" is not
+ * written by a PS - there are no default values defined). If SRC1
+ * is not included in a src/dst blend factor, dual source blending
+ * must be disabled."
+ *
+ * From the Ivy Bridge PRM, volume 4 part 1, page 356:
+ *
+ * "The single source message will not cause a write to the render
+ * target if Dual Source Blend Enable in 3DSTATE_WM is enabled."
+ *
+ * "The dual source message will revert to a single source message
+ * using source 0 if Dual Source Blend Enable in 3DSTATE_WM is
+ * disabled."
+ *
+ * Dual source blending must be enabled or disabled universally.
+ */
+ for (i = 1; i < blend->rt_count; i++) {
+ assert(dual_source_blending ==
+ (is_dual_source_blend_factor(blend->rt[i].rgb_src) ||
+ is_dual_source_blend_factor(blend->rt[i].rgb_dst) ||
+ is_dual_source_blend_factor(blend->rt[i].a_src) ||
+ is_dual_source_blend_factor(blend->rt[i].a_dst)));
+ }
+
+ return dual_source_blending;
+}
+
+static bool
+cc_validate_gen6_alpha(const struct ilo_dev *dev,
+ const struct ilo_state_cc_info *info)
+{
+ const struct ilo_state_cc_alpha_info *alpha = &info->alpha;
+
+ ILO_DEV_ASSERT(dev, 6, 8);
+
+ /*
+ * From the Sandy Bridge PRM, volume 2 part 1, page 356:
+ *
+ * "Alpha values from the pixel shader are treated as FLOAT32 format
+ * for computing the AlphaToCoverage Mask."
+ *
+ * From the Sandy Bridge PRM, volume 2 part 1, page 378:
+ *
+ * "If set (AlphaToCoverage Enable), Source0 Alpha is converted to a
+ * temporary 1/2/4-bit coverage mask and the mask bit corresponding to
+ * the sample# ANDed with the sample mask bit. If set, sample coverage
+ * is computed based on src0 alpha value. Value of 0 disables all
+ * samples and value of 1 enables all samples for that pixel. The same
+ * coverage needs to apply to all the RTs in MRT case. Further, any
+ * value of src0 alpha between 0 and 1 monotonically increases the
+ * number of enabled pixels.
+ *
+ * The same coverage needs to be applied to all the RTs in MRT case."
+ *
+ * "If set (AlphaToOne Enable), Source0 Alpha is set to 1.0f after
+ * (possibly) being used to generate the AlphaToCoverage coverage
+ * mask.
+ *
+ * The same coverage needs to be applied to all the RTs in MRT case.
+ *
+ * If Dual Source Blending is enabled, this bit must be disabled."
+ *
+ * From the Sandy Bridge PRM, volume 2 part 1, page 382:
+ *
+ * "Alpha Test can only be enabled if Pixel Shader outputs a float
+ * alpha value.
+ *
+ * Alpha Test is applied independently on each render target by
+ * comparing that render target's alpha value against the alpha
+ * reference value. If the alpha test fails, the corresponding pixel
+ * write will be supressed only for that render target. The
+ * depth/stencil update will occur if alpha test passes for any render
+ * target."
+ *
+ * From the Sandy Bridge PRM, volume 4 part 1, page 194:
+ *
+ * "Multiple render targets are supported with the single source and
+ * replicate data messages. Each render target is accessed with a
+ * separate Render Target Write message, each with a different surface
+ * indicated (different binding table index). The depth buffer is
+ * written only by the message(s) to the last render target, indicated
+ * by the Last Render Target Select bit set to clear the pixel
+ * scoreboard bits."
+ *
+ * When AlphaToCoverage/AlphaToOne/AlphaTest is enabled, it is
+ * required/desirable for the RT write messages to set "Source0 Alpha
+ * Present to RenderTarget" in the MRT case. It is also required/desirable
+ * for the alpha values to be FLOAT32.
+ */
+ if (alpha->alpha_to_coverage || alpha->alpha_to_one || alpha->test_enable)
+ assert(alpha->cv_float_source0_alpha);
+
+ /*
+ * From the Sandy Bridge PRM, volume 2 part 1, page 356:
+ *
+ * "[DevSNB]: When NumSamples = 1, AlphaToCoverage and AlphaTo
+ * Coverage Dither both must be disabled."
+ */
+ if (ilo_dev_gen(dev) == ILO_GEN(6) && alpha->alpha_to_coverage)
+ assert(alpha->cv_sample_count_one);
+
+ /*
+ * From the Sandy Bridge PRM, volume 2 part 1, page 378:
+ *
+ * "If Dual Source Blending is enabled, this bit (AlphaToOne Enable)
+ * must be disabled."
+ */
+ if (alpha->alpha_to_one)
+ assert(!cc_get_gen6_dual_source_blending(dev, info));
+
+ return true;
+}
+
+static bool
+cc_validate_gen6_blend(const struct ilo_dev *dev,
+ const struct ilo_state_cc_info *info)
+{
+ const struct ilo_state_cc_blend_info *blend = &info->blend;
+
+ ILO_DEV_ASSERT(dev, 6, 8);
+
+ assert(blend->rt_count <= ILO_STATE_CC_BLEND_MAX_RT_COUNT);
+
+ return true;
+}
+
+static enum gen_blend_factor
+get_dst_alpha_one_blend_factor(enum gen_blend_factor factor, bool is_rgb)
+{
+ switch (factor) {
+ case GEN6_BLENDFACTOR_DST_ALPHA:
+ return GEN6_BLENDFACTOR_ONE;
+ case GEN6_BLENDFACTOR_INV_DST_ALPHA:
+ return GEN6_BLENDFACTOR_ZERO;
+ case GEN6_BLENDFACTOR_SRC_ALPHA_SATURATE:
+ return (is_rgb) ? GEN6_BLENDFACTOR_ZERO : GEN6_BLENDFACTOR_ONE;
+ default:
+ return factor;
+ }
+}
+
+static void
+cc_get_gen6_effective_rt(const struct ilo_dev *dev,
+ const struct ilo_state_cc_info *info,
+ uint8_t rt_index,
+ struct ilo_state_cc_blend_rt_info *dst)
+{
+ const struct ilo_state_cc_blend_rt_info *rt = &info->blend.rt[rt_index];
+
+ if (rt->logicop_enable || rt->blend_enable ||
+ rt->argb_write_disables != 0xf)
+ assert(rt->cv_has_buffer);
+
+ /*
+ * From the Sandy Bridge PRM, volume 2 part 1, page 365:
+ *
+ * "Logic Ops are only supported on *_UNORM surfaces (excluding _SRGB
+ * variants), otherwise Logic Ops must be DISABLED."
+ *
+ * From the Broadwell PRM, volume 7, page 671:
+ *
+ * "Logic Ops are supported on all blendable render targets and render
+ * targets with *INT formats."
+ */
+ if (ilo_dev_gen(dev) < ILO_GEN(8) && rt->logicop_enable)
+ assert(rt->cv_is_unorm);
+
+ /*
+ * From the Sandy Bridge PRM, volume 2 part 1, page 361:
+ *
+ * "Only certain surface formats support Color Buffer Blending. Refer
+ * to the Surface Format tables in Sampling Engine. Blending must be
+ * disabled on a RenderTarget if blending is not supported."
+ *
+ * From the Sandy Bridge PRM, volume 2 part 1, page 365:
+ *
+ * "Color Buffer Blending and Logic Ops must not be enabled
+ * simultaneously, or behavior is UNDEFINED."
+ */
+ if (rt->blend_enable)
+ assert(!rt->cv_is_integer && !rt->logicop_enable);
+
+ *dst = *rt;
+ if (rt->blend_enable) {
+ /* 0x0 is reserved in enum gen_blend_factor */
+ assert(rt->rgb_src && rt->rgb_dst && rt->a_src && rt->a_dst);
+
+ if (rt->force_dst_alpha_one) {
+ dst->rgb_src = get_dst_alpha_one_blend_factor(rt->rgb_src, true);
+ dst->rgb_dst = get_dst_alpha_one_blend_factor(rt->rgb_dst, true);
+ dst->a_src = get_dst_alpha_one_blend_factor(rt->a_src, false);
+ dst->a_dst = get_dst_alpha_one_blend_factor(rt->a_dst, false);
+ dst->force_dst_alpha_one = false;
+ }
+ } else {
+ dst->rgb_src = GEN6_BLENDFACTOR_ONE;
+ dst->rgb_dst = GEN6_BLENDFACTOR_ZERO;
+ dst->rgb_func = GEN6_BLENDFUNCTION_ADD;
+ dst->a_src = dst->rgb_src;
+ dst->a_dst = dst->rgb_dst;
+ dst->a_func = dst->rgb_func;
+ }
+}
+
+static bool
+cc_set_gen6_BLEND_STATE(struct ilo_state_cc *cc,
+ const struct ilo_dev *dev,
+ const struct ilo_state_cc_info *info)
+{
+ const struct ilo_state_cc_alpha_info *alpha = &info->alpha;
+ const struct ilo_state_cc_blend_info *blend = &info->blend;
+ uint32_t dw_rt[2 * ILO_STATE_CC_BLEND_MAX_RT_COUNT], dw1_invariant;
+ uint32_t dw0, dw1;
+ uint8_t i;
+
+ ILO_DEV_ASSERT(dev, 6, 7.5);
+
+ if (!cc_validate_gen6_alpha(dev, info) ||
+ !cc_validate_gen6_blend(dev, info))
+ return false;
+
+ /*
+ * According to the Sandy Bridge PRM, volume 2 part 1, page 360, pre-blend
+ * and post-blend color clamps must be enabled in most cases. For the
+ * other cases, they are either desirable or ignored. We can enable them
+ * unconditionally.
+ */
+ dw1 = GEN6_RT_DW1_COLORCLAMP_RTFORMAT |
+ GEN6_RT_DW1_PRE_BLEND_CLAMP |
+ GEN6_RT_DW1_POST_BLEND_CLAMP;
+
+ if (alpha->alpha_to_coverage) {
+ dw1 |= GEN6_RT_DW1_ALPHA_TO_COVERAGE;
+
+ /*
+ * From the Sandy Bridge PRM, volume 2 part 1, page 379:
+ *
+ * "[DevSNB]: This bit (AlphaToCoverage Dither Enable) must be
+ * disabled."
+ */
+ if (ilo_dev_gen(dev) >= ILO_GEN(7))
+ dw1 |= GEN6_RT_DW1_ALPHA_TO_COVERAGE_DITHER;
+ }
+
+ if (alpha->alpha_to_one)
+ dw1 |= GEN6_RT_DW1_ALPHA_TO_ONE;
+
+ if (alpha->test_enable) {
+ dw1 |= GEN6_RT_DW1_ALPHA_TEST_ENABLE |
+ alpha->test_func << GEN6_RT_DW1_ALPHA_TEST_FUNC__SHIFT;
+ } else {
+ /*
+ * From the Ivy Bridge PRM, volume 2 part 1, page 371:
+ *
+ * "When Alpha Test is disabled, Alpha Test Function must be
+ * COMPAREFUNCTION_ALWAYS."
+ */
+ dw1 |= GEN6_COMPAREFUNCTION_ALWAYS <<
+ GEN6_RT_DW1_ALPHA_TEST_FUNC__SHIFT;
+ }
+
+ if (blend->dither_enable)
+ dw1 |= GEN6_RT_DW1_DITHER_ENABLE;
+
+ dw1_invariant = dw1;
+
+ for (i = 0; i < blend->rt_count; i++) {
+ struct ilo_state_cc_blend_rt_info rt;
+
+ cc_get_gen6_effective_rt(dev, info, i, &rt);
+
+ /* 0x0 is reserved for blend factors and we have to set them all */
+ dw0 = rt.a_func << GEN6_RT_DW0_ALPHA_FUNC__SHIFT |
+ rt.a_src << GEN6_RT_DW0_SRC_ALPHA_FACTOR__SHIFT |
+ rt.a_dst << GEN6_RT_DW0_DST_ALPHA_FACTOR__SHIFT |
+ rt.rgb_func << GEN6_RT_DW0_COLOR_FUNC__SHIFT |
+ rt.rgb_src << GEN6_RT_DW0_SRC_COLOR_FACTOR__SHIFT |
+ rt.rgb_dst << GEN6_RT_DW0_DST_COLOR_FACTOR__SHIFT;
+
+ if (rt.blend_enable) {
+ dw0 |= GEN6_RT_DW0_BLEND_ENABLE;
+
+ if (rt.a_src != rt.rgb_src ||
+ rt.a_dst != rt.rgb_dst ||
+ rt.a_func != rt.rgb_func)
+ dw0 |= GEN6_RT_DW0_INDEPENDENT_ALPHA_ENABLE;
+ }
+
+ dw1 = dw1_invariant |
+ rt.argb_write_disables << GEN6_RT_DW1_WRITE_DISABLES__SHIFT;
+
+ if (rt.logicop_enable) {
+ dw1 |= GEN6_RT_DW1_LOGICOP_ENABLE |
+ rt.logicop_func << GEN6_RT_DW1_LOGICOP_FUNC__SHIFT;
+ }
+
+ dw_rt[2 * i + 0] = dw0;
+ dw_rt[2 * i + 1] = dw1;
+ }
+
+
+ STATIC_ASSERT(ARRAY_SIZE(cc->blend) >= ARRAY_SIZE(dw_rt));
+ memcpy(&cc->blend[0], dw_rt, sizeof(uint32_t) * 2 * blend->rt_count);
+ cc->blend_state_count = info->blend.rt_count;
+
+ return true;
+}
+
+static bool
+cc_set_gen8_BLEND_STATE(struct ilo_state_cc *cc,
+ const struct ilo_dev *dev,
+ const struct ilo_state_cc_info *info)
+{
+ const struct ilo_state_cc_alpha_info *alpha = &info->alpha;
+ const struct ilo_state_cc_blend_info *blend = &info->blend;
+ uint32_t dw_rt[2 * ILO_STATE_CC_BLEND_MAX_RT_COUNT], dw0, dw1;
+ bool indep_alpha_enable;
+ uint8_t i;
+
+ ILO_DEV_ASSERT(dev, 8, 8);
+
+ if (!cc_validate_gen6_alpha(dev, info) ||
+ !cc_validate_gen6_blend(dev, info))
+ return false;
+
+ indep_alpha_enable = false;
+ for (i = 0; i < blend->rt_count; i++) {
+ struct ilo_state_cc_blend_rt_info rt;
+
+ cc_get_gen6_effective_rt(dev, info, i, &rt);
+
+ dw0 = rt.rgb_src << GEN8_RT_DW0_SRC_COLOR_FACTOR__SHIFT |
+ rt.rgb_dst << GEN8_RT_DW0_DST_COLOR_FACTOR__SHIFT |
+ rt.rgb_func << GEN8_RT_DW0_COLOR_FUNC__SHIFT |
+ rt.a_src << GEN8_RT_DW0_SRC_ALPHA_FACTOR__SHIFT |
+ rt.a_dst << GEN8_RT_DW0_DST_ALPHA_FACTOR__SHIFT |
+ rt.a_func << GEN8_RT_DW0_ALPHA_FUNC__SHIFT |
+ rt.argb_write_disables << GEN8_RT_DW0_WRITE_DISABLES__SHIFT;
+
+ if (rt.blend_enable) {
+ dw0 |= GEN8_RT_DW0_BLEND_ENABLE;
+
+ if (rt.a_src != rt.rgb_src ||
+ rt.a_dst != rt.rgb_dst ||
+ rt.a_func != rt.rgb_func)
+ indep_alpha_enable = true;
+ }
+
+ dw1 = GEN8_RT_DW1_COLORCLAMP_RTFORMAT |
+ GEN8_RT_DW1_PRE_BLEND_CLAMP |
+ GEN8_RT_DW1_POST_BLEND_CLAMP;
+
+ if (rt.logicop_enable) {
+ dw1 |= GEN8_RT_DW1_LOGICOP_ENABLE |
+ rt.logicop_func << GEN8_RT_DW1_LOGICOP_FUNC__SHIFT;
+ }
+
+ dw_rt[2 * i + 0] = dw0;
+ dw_rt[2 * i + 1] = dw1;
+ }
+
+ dw0 = 0;
+
+ if (alpha->alpha_to_coverage) {
+ dw0 |= GEN8_BLEND_DW0_ALPHA_TO_COVERAGE |
+ GEN8_BLEND_DW0_ALPHA_TO_COVERAGE_DITHER;
+ }
+
+ if (indep_alpha_enable)
+ dw0 |= GEN8_BLEND_DW0_INDEPENDENT_ALPHA_ENABLE;
+
+ if (alpha->alpha_to_one)
+ dw0 |= GEN8_BLEND_DW0_ALPHA_TO_ONE;
+
+ if (alpha->test_enable) {
+ dw0 |= GEN8_BLEND_DW0_ALPHA_TEST_ENABLE |
+ alpha->test_func << GEN8_BLEND_DW0_ALPHA_TEST_FUNC__SHIFT;
+ } else {
+ dw0 |= GEN6_COMPAREFUNCTION_ALWAYS <<
+ GEN8_BLEND_DW0_ALPHA_TEST_FUNC__SHIFT;
+ }
+
+ if (blend->dither_enable)
+ dw0 |= GEN8_BLEND_DW0_DITHER_ENABLE;
+
+ STATIC_ASSERT(ARRAY_SIZE(cc->blend) >= 2 + ARRAY_SIZE(dw_rt));
+ cc->blend[1] = dw0;
+ memcpy(&cc->blend[2], dw_rt, sizeof(uint32_t) * 2 * blend->rt_count);
+ cc->blend_state_count = info->blend.rt_count;
+
+ return true;
+}
+
+static bool
+cc_set_gen8_3DSTATE_PS_BLEND(struct ilo_state_cc *cc,
+ const struct ilo_dev *dev,
+ const struct ilo_state_cc_info *info)
+{
+ const struct ilo_state_cc_alpha_info *alpha = &info->alpha;
+ const struct ilo_state_cc_blend_info *blend = &info->blend;
+ uint32_t dw1;
+
+ ILO_DEV_ASSERT(dev, 8, 8);
+
+ dw1 = 0;
+
+ if (alpha->alpha_to_coverage)
+ dw1 |= GEN8_PS_BLEND_DW1_ALPHA_TO_COVERAGE;
+
+ if (alpha->test_enable)
+ dw1 |= GEN8_PS_BLEND_DW1_ALPHA_TEST_ENABLE;
+
+ if (blend->rt_count) {
+ struct ilo_state_cc_blend_rt_info rt0;
+ uint8_t i;
+
+ cc_get_gen6_effective_rt(dev, info, 0, &rt0);
+
+ /* 0x0 is reserved for blend factors and we have to set them all */
+ dw1 |= rt0.a_src << GEN8_PS_BLEND_DW1_SRC_ALPHA_FACTOR__SHIFT |
+ rt0.a_dst << GEN8_PS_BLEND_DW1_DST_ALPHA_FACTOR__SHIFT |
+ rt0.rgb_src << GEN8_PS_BLEND_DW1_SRC_COLOR_FACTOR__SHIFT |
+ rt0.rgb_dst << GEN8_PS_BLEND_DW1_DST_COLOR_FACTOR__SHIFT;
+
+ for (i = 0; i < blend->rt_count; i++) {
+ if (blend->rt[i].argb_write_disables != 0xf) {
+ dw1 |= GEN8_PS_BLEND_DW1_WRITABLE_RT;
+ break;
+ }
+ }
+
+ if (rt0.blend_enable) {
+ dw1 |= GEN8_PS_BLEND_DW1_BLEND_ENABLE;
+
+ if (rt0.a_src != rt0.rgb_src || rt0.a_dst != rt0.rgb_dst)
+ dw1 |= GEN8_PS_BLEND_DW1_INDEPENDENT_ALPHA_ENABLE;
+ }
+ }
+
+ STATIC_ASSERT(ARRAY_SIZE(cc->blend) >= 1);
+ cc->blend[0] = dw1;
+
+ return true;
+}
+
+static bool
+cc_params_set_gen6_COLOR_CALC_STATE(struct ilo_state_cc *cc,
+ const struct ilo_dev *dev,
+ const struct ilo_state_cc_params_info *params)
+{
+ uint32_t dw0;
+
+ ILO_DEV_ASSERT(dev, 6, 8);
+
+ dw0 = params->stencil_front.test_ref << GEN6_CC_DW0_STENCIL_REF__SHIFT |
+ params->stencil_back.test_ref << GEN6_CC_DW0_STENCIL1_REF__SHIFT |
+ GEN6_CC_DW0_ALPHATEST_FLOAT32;
+
+ STATIC_ASSERT(ARRAY_SIZE(cc->cc) >= 6);
+ cc->cc[0] = dw0;
+ cc->cc[1] = fui(params->alpha_ref);
+ cc->cc[2] = fui(params->blend_rgba[0]);
+ cc->cc[3] = fui(params->blend_rgba[1]);
+ cc->cc[4] = fui(params->blend_rgba[2]);
+ cc->cc[5] = fui(params->blend_rgba[3]);
+
+ return true;
+}
+
+bool
+ilo_state_cc_init(struct ilo_state_cc *cc,
+ const struct ilo_dev *dev,
+ const struct ilo_state_cc_info *info)
+{
+ assert(ilo_is_zeroed(cc, sizeof(*cc)));
+ return ilo_state_cc_set_info(cc, dev, info);
+}
+
+bool
+ilo_state_cc_set_info(struct ilo_state_cc *cc,
+ const struct ilo_dev *dev,
+ const struct ilo_state_cc_info *info)
+{
+ bool ret = true;
+
+ if (ilo_dev_gen(dev) >= ILO_GEN(8)) {
+ ret &= cc_set_gen8_3DSTATE_WM_DEPTH_STENCIL(cc, dev, info);
+ ret &= cc_set_gen8_BLEND_STATE(cc, dev, info);
+ ret &= cc_set_gen8_3DSTATE_PS_BLEND(cc, dev, info);
+ } else {
+ ret &= cc_set_gen6_DEPTH_STENCIL_STATE(cc, dev, info);
+ ret &= cc_set_gen6_BLEND_STATE(cc, dev, info);
+ }
+
+ ret &= cc_params_set_gen6_COLOR_CALC_STATE(cc, dev, &info->params);
+
+ assert(ret);
+
+ return ret;
+}
+
+bool
+ilo_state_cc_set_params(struct ilo_state_cc *cc,
+ const struct ilo_dev *dev,
+ const struct ilo_state_cc_params_info *params)
+{
+ /* modify stencil masks */
+ if (ilo_dev_gen(dev) >= ILO_GEN(8)) {
+ uint32_t dw1 = cc->ds[0];
+ uint32_t dw2 = cc->ds[1];
+
+ if (dw1 & GEN8_ZS_DW1_STENCIL_TEST_ENABLE) {
+ const bool twosided_enable = (dw1 & GEN8_ZS_DW1_STENCIL1_ENABLE);
+ const struct ilo_state_cc_stencil_params_info *front_p =
+ &params->stencil_front;
+ const struct ilo_state_cc_stencil_params_info *back_p =
+ (twosided_enable) ? &params->stencil_back :
+ &params->stencil_front;
+
+ if (front_p->write_mask || back_p->write_mask)
+ dw1 |= GEN8_ZS_DW1_STENCIL_WRITE_ENABLE;
+ else
+ dw1 &= ~GEN8_ZS_DW1_STENCIL_WRITE_ENABLE;
+
+ dw2 =
+ front_p->test_mask << GEN8_ZS_DW2_STENCIL_TEST_MASK__SHIFT |
+ front_p->write_mask << GEN8_ZS_DW2_STENCIL_WRITE_MASK__SHIFT |
+ back_p->test_mask << GEN8_ZS_DW2_STENCIL1_TEST_MASK__SHIFT |
+ back_p->write_mask << GEN8_ZS_DW2_STENCIL1_WRITE_MASK__SHIFT;
+ }
+
+ cc->ds[0] = dw1;
+ cc->ds[1] = dw2;
+ } else {
+ uint32_t dw0 = cc->ds[0];
+ uint32_t dw1 = cc->ds[1];
+
+ if (dw0 & GEN6_ZS_DW0_STENCIL_TEST_ENABLE) {
+ const bool twosided_enable = (dw0 & GEN6_ZS_DW0_STENCIL1_ENABLE);
+ const struct ilo_state_cc_stencil_params_info *front_p =
+ &params->stencil_front;
+ const struct ilo_state_cc_stencil_params_info *back_p =
+ (twosided_enable) ? &params->stencil_back :
+ &params->stencil_front;
+
+ if (front_p->write_mask || back_p->write_mask)
+ dw0 |= GEN6_ZS_DW0_STENCIL_WRITE_ENABLE;
+ else
+ dw0 &= ~GEN6_ZS_DW0_STENCIL_WRITE_ENABLE;
+
+ dw1 =
+ front_p->test_mask << GEN6_ZS_DW1_STENCIL_TEST_MASK__SHIFT |
+ front_p->write_mask << GEN6_ZS_DW1_STENCIL_WRITE_MASK__SHIFT |
+ back_p->test_mask << GEN6_ZS_DW1_STENCIL1_TEST_MASK__SHIFT |
+ back_p->write_mask << GEN6_ZS_DW1_STENCIL1_WRITE_MASK__SHIFT;
+ }
+
+ cc->ds[0] = dw0;
+ cc->ds[1] = dw1;
+ }
+
+ /* modify COLOR_CALC_STATE */
+ cc_params_set_gen6_COLOR_CALC_STATE(cc, dev, params);
+
+ return true;
+}
+
+void
+ilo_state_cc_full_delta(const struct ilo_state_cc *cc,
+ const struct ilo_dev *dev,
+ struct ilo_state_cc_delta *delta)
+{
+ delta->dirty = ILO_STATE_CC_BLEND_STATE |
+ ILO_STATE_CC_COLOR_CALC_STATE;
+
+ if (ilo_dev_gen(dev) >= ILO_GEN(8)) {
+ delta->dirty |= ILO_STATE_CC_3DSTATE_WM_DEPTH_STENCIL |
+ ILO_STATE_CC_3DSTATE_PS_BLEND;
+ } else {
+ delta->dirty |= ILO_STATE_CC_DEPTH_STENCIL_STATE;
+ }
+}
+
+void
+ilo_state_cc_get_delta(const struct ilo_state_cc *cc,
+ const struct ilo_dev *dev,
+ const struct ilo_state_cc *old,
+ struct ilo_state_cc_delta *delta)
+{
+ delta->dirty = 0;
+
+ if (memcmp(cc->ds, old->ds, sizeof(cc->ds))) {
+ if (ilo_dev_gen(dev) >= ILO_GEN(8))
+ delta->dirty |= ILO_STATE_CC_3DSTATE_WM_DEPTH_STENCIL;
+ else
+ delta->dirty |= ILO_STATE_CC_DEPTH_STENCIL_STATE;
+ }
+
+ if (ilo_dev_gen(dev) >= ILO_GEN(8)) {
+ if (cc->blend[0] != old->blend[0])
+ delta->dirty |= ILO_STATE_CC_3DSTATE_PS_BLEND;
+
+ if (memcmp(&cc->blend[1], &old->blend[1],
+ sizeof(uint32_t) * (1 + 2 * cc->blend_state_count)))
+ delta->dirty |= ILO_STATE_CC_BLEND_STATE;
+ } else if (memcmp(cc->blend, old->blend,
+ sizeof(uint32_t) * 2 * cc->blend_state_count)) {
+ delta->dirty |= ILO_STATE_CC_BLEND_STATE;
+ }
+
+ if (memcmp(cc->cc, old->cc, sizeof(cc->cc)))
+ delta->dirty |= ILO_STATE_CC_COLOR_CALC_STATE;
+}
diff --git a/src/gallium/drivers/ilo/core/ilo_state_cc.h b/src/gallium/drivers/ilo/core/ilo_state_cc.h
new file mode 100644
index 00000000000..5b96a60f988
--- /dev/null
+++ b/src/gallium/drivers/ilo/core/ilo_state_cc.h
@@ -0,0 +1,199 @@
+/*
+ * Mesa 3-D graphics library
+ *
+ * Copyright (C) 2015 LunarG, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ * Chia-I Wu <[email protected]>
+ */
+
+#ifndef ILO_STATE_CC_H
+#define ILO_STATE_CC_H
+
+#include "genhw/genhw.h"
+
+#include "ilo_core.h"
+#include "ilo_dev.h"
+
+/*
+ * From the Sandy Bridge PRM, volume 2 part 1, page 38:
+ *
+ * "Render Target Index. Specifies the render target index that will be
+ * used to select blend state from BLEND_STATE.
+ * Format = U3"
+ */
+#define ILO_STATE_CC_BLEND_MAX_RT_COUNT 8
+
+enum ilo_state_cc_dirty_bits {
+ ILO_STATE_CC_3DSTATE_WM_DEPTH_STENCIL = (1 << 0),
+ ILO_STATE_CC_3DSTATE_PS_BLEND = (1 << 1),
+ ILO_STATE_CC_DEPTH_STENCIL_STATE = (1 << 2),
+ ILO_STATE_CC_BLEND_STATE = (1 << 3),
+ ILO_STATE_CC_COLOR_CALC_STATE = (1 << 4),
+};
+
+/**
+ * AlphaCoverage and AlphaTest.
+ */
+struct ilo_state_cc_alpha_info {
+ bool cv_sample_count_one;
+ bool cv_float_source0_alpha;
+
+ bool alpha_to_coverage;
+ bool alpha_to_one;
+
+ bool test_enable;
+ enum gen_compare_function test_func;
+};
+
+struct ilo_state_cc_stencil_op_info {
+ enum gen_compare_function test_func;
+ enum gen_stencil_op fail_op;
+ enum gen_stencil_op zfail_op;
+ enum gen_stencil_op zpass_op;
+};
+
+/**
+ * StencilTest.
+ */
+struct ilo_state_cc_stencil_info {
+ bool cv_has_buffer;
+
+ bool test_enable;
+ bool twosided_enable;
+
+ struct ilo_state_cc_stencil_op_info front;
+ struct ilo_state_cc_stencil_op_info back;
+};
+
+/**
+ * DepthTest.
+ */
+struct ilo_state_cc_depth_info {
+ bool cv_has_buffer;
+
+ bool test_enable;
+ /* independent from test_enable */
+ bool write_enable;
+
+ enum gen_compare_function test_func;
+};
+
+struct ilo_state_cc_blend_rt_info {
+ bool cv_has_buffer;
+ bool cv_is_unorm;
+ bool cv_is_integer;
+
+ uint8_t argb_write_disables;
+
+ bool logicop_enable;
+ enum gen_logic_op logicop_func;
+
+ bool blend_enable;
+ bool force_dst_alpha_one;
+ enum gen_blend_factor rgb_src;
+ enum gen_blend_factor rgb_dst;
+ enum gen_blend_function rgb_func;
+ enum gen_blend_factor a_src;
+ enum gen_blend_factor a_dst;
+ enum gen_blend_function a_func;
+};
+
+/**
+ * ColorBufferBlending, Dithering, and LogicOps.
+ */
+struct ilo_state_cc_blend_info {
+ const struct ilo_state_cc_blend_rt_info *rt;
+ uint8_t rt_count;
+
+ bool dither_enable;
+};
+
+struct ilo_state_cc_stencil_params_info {
+ uint8_t test_ref;
+ uint8_t test_mask;
+ uint8_t write_mask;
+};
+
+/**
+ * CC parameters.
+ */
+struct ilo_state_cc_params_info {
+ float alpha_ref;
+
+ struct ilo_state_cc_stencil_params_info stencil_front;
+ struct ilo_state_cc_stencil_params_info stencil_back;
+
+ float blend_rgba[4];
+};
+
+/**
+ * Pixel processing.
+ */
+struct ilo_state_cc_info {
+ struct ilo_state_cc_alpha_info alpha;
+ struct ilo_state_cc_stencil_info stencil;
+ struct ilo_state_cc_depth_info depth;
+ struct ilo_state_cc_blend_info blend;
+
+ struct ilo_state_cc_params_info params;
+};
+
+struct ilo_state_cc {
+ uint32_t ds[3];
+
+ uint8_t blend_state_count;
+ uint32_t blend[1 + 1 + 2 * ILO_STATE_CC_BLEND_MAX_RT_COUNT];
+
+ uint32_t cc[6];
+};
+
+struct ilo_state_cc_delta {
+ uint32_t dirty;
+};
+
+bool
+ilo_state_cc_init(struct ilo_state_cc *cc,
+ const struct ilo_dev *dev,
+ const struct ilo_state_cc_info *info);
+
+bool
+ilo_state_cc_set_info(struct ilo_state_cc *cc,
+ const struct ilo_dev *dev,
+ const struct ilo_state_cc_info *info);
+
+bool
+ilo_state_cc_set_params(struct ilo_state_cc *cc,
+ const struct ilo_dev *dev,
+ const struct ilo_state_cc_params_info *params);
+
+void
+ilo_state_cc_full_delta(const struct ilo_state_cc *cc,
+ const struct ilo_dev *dev,
+ struct ilo_state_cc_delta *delta);
+
+void
+ilo_state_cc_get_delta(const struct ilo_state_cc *cc,
+ const struct ilo_dev *dev,
+ const struct ilo_state_cc *old,
+ struct ilo_state_cc_delta *delta);
+
+#endif /* ILO_STATE_CC_H */
diff --git a/src/gallium/drivers/ilo/core/ilo_state_compute.c b/src/gallium/drivers/ilo/core/ilo_state_compute.c
new file mode 100644
index 00000000000..a5fe5e1a6b0
--- /dev/null
+++ b/src/gallium/drivers/ilo/core/ilo_state_compute.c
@@ -0,0 +1,435 @@
+/*
+ * Mesa 3-D graphics library
+ *
+ * Copyright (C) 2012-2015 LunarG, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ * Chia-I Wu <[email protected]>
+ */
+
+#include "ilo_debug.h"
+#include "ilo_state_compute.h"
+
+struct compute_urb_configuration {
+ int idrt_entry_count;
+ int curbe_entry_count;
+
+ int urb_entry_count;
+ /* in 256-bit register increments */
+ int urb_entry_size;
+};
+
+static int
+get_gen6_rob_entry_count(const struct ilo_dev *dev)
+{
+ ILO_DEV_ASSERT(dev, 6, 8);
+
+ /*
+ * From the Ivy Bridge PRM, volume 2 part 2, page 60:
+ *
+ * "ROB has 64KB of storage; 2048 entries."
+ *
+ * From the valid ranges of "CURBE Allocation Size", we can also conclude
+ * that interface entries and CURBE data must be in ROB. And that ROB
+ * should be 16KB, or 512 entries, on Gen7 GT1.
+ */
+ if (ilo_dev_gen(dev) >= ILO_GEN(7.5))
+ return 2048;
+ else if (ilo_dev_gen(dev) >= ILO_GEN(7))
+ return (dev->gt == 2) ? 2048 : 512;
+ else
+ return (dev->gt == 2) ? 2048 : 1024;
+}
+
+static int
+get_gen6_idrt_entry_count(const struct ilo_dev *dev)
+{
+ ILO_DEV_ASSERT(dev, 6, 8);
+
+ /*
+ * From the Ivy Bridge PRM, volume 2 part 2, page 21:
+ *
+ * "The first 32 URB entries are reserved for the interface
+ * descriptor..."
+ *
+ * From the Haswell PRM, volume 7, page 836:
+ *
+ * "The first 64 URB entries are reserved for the interface
+ * description..."
+ */
+ return (ilo_dev_gen(dev) >= ILO_GEN(7.5)) ? 64 : 32;
+}
+
+static int
+get_gen6_curbe_entry_count(const struct ilo_dev *dev, uint32_t curbe_size)
+{
+ /*
+ * From the Ivy Bridge PRM, volume 2 part 2, page 21:
+ *
+ * "(CURBE Allocation Size) Specifies the total length allocated for
+ * CURBE, in 256-bit register increments.
+ */
+ const int entry_count = (curbe_size + 31) / 32;
+
+ ILO_DEV_ASSERT(dev, 6, 8);
+
+ assert(get_gen6_idrt_entry_count(dev) + entry_count <=
+ get_gen6_rob_entry_count(dev));
+
+ return entry_count;
+}
+
+static bool
+compute_get_gen6_urb_configuration(const struct ilo_dev *dev,
+ const struct ilo_state_compute_info *info,
+ struct compute_urb_configuration *urb)
+{
+ ILO_DEV_ASSERT(dev, 6, 8);
+
+ urb->idrt_entry_count = get_gen6_idrt_entry_count(dev);
+ urb->curbe_entry_count =
+ get_gen6_curbe_entry_count(dev, info->curbe_alloc_size);
+
+ /*
+ * From the Broadwell PRM, volume 2b, page 451:
+ *
+ * "Please note that 0 is not allowed for this field (Number of URB
+ * Entries)."
+ */
+ urb->urb_entry_count = (ilo_dev_gen(dev) >= ILO_GEN(8)) ? 1 : 0;
+
+ /*
+ * From the Ivy Bridge PRM, volume 2 part 2, page 52:
+ *
+ * "(URB Entry Allocation Size) Specifies the length of each URB entry
+ * used by the unit, in 256-bit register increments - 1."
+ */
+ urb->urb_entry_size = 1;
+
+ /*
+ * From the Ivy Bridge PRM, volume 2 part 2, page 22:
+ *
+ * MEDIA_VFE_STATE specifies the amount of CURBE space, the URB handle
+ * size and the number of URB handles. The driver must ensure that
+ * ((URB_handle_size * URB_num_handle) - CURBE - 32) <=
+ * URB_allocation_in_L3."
+ */
+ assert(urb->idrt_entry_count + urb->curbe_entry_count +
+ urb->urb_entry_count * urb->urb_entry_size <=
+ info->cv_urb_alloc_size / 32);
+
+ return true;
+}
+
+static int
+compute_interface_get_gen6_read_end(const struct ilo_dev *dev,
+ const struct ilo_state_compute_interface_info *interface)
+{
+ const int per_thread_read = (interface->curbe_read_length + 31) / 32;
+ const int cross_thread_read =
+ (interface->cross_thread_curbe_read_length + 31) / 32;
+
+ ILO_DEV_ASSERT(dev, 6, 8);
+
+ assert(interface->curbe_read_offset % 32 == 0);
+
+ /*
+ * From the Ivy Bridge PRM, volume 2 part 2, page 60:
+ *
+ * "(Constant URB Entry Read Length) [0,63]"
+ */
+ assert(per_thread_read <= 63);
+
+ /* From the Haswell PRM, volume 2d, page 199:
+ *
+ * "(Cross-Thread Constant Data Read Length) [0,127]"
+ */
+ if (ilo_dev_gen(dev) >= ILO_GEN(7.5))
+ assert(cross_thread_read <= 127);
+ else
+ assert(!cross_thread_read);
+
+ if (per_thread_read || cross_thread_read) {
+ return interface->curbe_read_offset / 32 + cross_thread_read +
+ per_thread_read * interface->thread_group_size;
+ } else {
+ return 0;
+ }
+}
+
+static bool
+compute_validate_gen6(const struct ilo_dev *dev,
+ const struct ilo_state_compute_info *info,
+ const struct compute_urb_configuration *urb)
+{
+ int min_curbe_entry_count;
+ uint8_t i;
+
+ ILO_DEV_ASSERT(dev, 6, 8);
+
+ assert(info->interface_count <= urb->idrt_entry_count);
+
+ min_curbe_entry_count = 0;
+ for (i = 0; i < info->interface_count; i++) {
+ const int read_end =
+ compute_interface_get_gen6_read_end(dev, &info->interfaces[i]);
+
+ if (min_curbe_entry_count < read_end)
+ min_curbe_entry_count = read_end;
+ }
+
+ assert(min_curbe_entry_count <= urb->curbe_entry_count);
+
+ /*
+ * From the Broadwell PRM, volume 2b, page 452:
+ *
+ * "CURBE Allocation Size should be 0 for GPGPU workloads that uses
+ * indirect instead of CURBE."
+ */
+ if (!min_curbe_entry_count)
+ assert(!urb->curbe_entry_count);
+
+ return true;
+}
+
+static uint8_t
+compute_get_gen6_scratch_space(const struct ilo_dev *dev,
+ const struct ilo_state_compute_info *info)
+{
+ uint32_t scratch_size = 0;
+ uint8_t i;
+
+ ILO_DEV_ASSERT(dev, 6, 8);
+
+ for (i = 0; i < info->interface_count; i++) {
+ if (scratch_size < info->interfaces[i].scratch_size)
+ scratch_size = info->interfaces[i].scratch_size;
+ }
+
+ if (ilo_dev_gen(dev) >= ILO_GEN(8)) {
+ assert(scratch_size <= 2 * 1024 * 1024);
+
+ /* next power of two, starting from 1KB */
+ return (scratch_size > 1024) ?
+ (util_last_bit(scratch_size - 1) - 10): 0;
+ } else if (ilo_dev_gen(dev) >= ILO_GEN(7.5)) {
+ assert(scratch_size <= 2 * 1024 * 1024);
+
+ /* next power of two, starting from 2KB */
+ return (scratch_size > 2048) ?
+ (util_last_bit(scratch_size - 1) - 11): 0;
+ } else {
+ assert(scratch_size <= 12 * 1024);
+
+ return (scratch_size > 1024) ?
+ (scratch_size - 1) / 1024 : 0;
+ }
+}
+
+static bool
+compute_set_gen6_MEDIA_VFE_STATE(struct ilo_state_compute *compute,
+ const struct ilo_dev *dev,
+ const struct ilo_state_compute_info *info)
+{
+ struct compute_urb_configuration urb;
+ uint8_t scratch_space;
+
+ uint32_t dw1, dw2, dw4;
+
+ ILO_DEV_ASSERT(dev, 6, 8);
+
+ if (!compute_get_gen6_urb_configuration(dev, info, &urb) ||
+ !compute_validate_gen6(dev, info, &urb))
+ return false;
+
+ scratch_space = compute_get_gen6_scratch_space(dev, info);
+
+ dw1 = scratch_space << GEN6_VFE_DW1_SCRATCH_SPACE_PER_THREAD__SHIFT;
+ dw2 = (dev->thread_count - 1) << GEN6_VFE_DW2_MAX_THREADS__SHIFT |
+ urb.urb_entry_count << GEN6_VFE_DW2_URB_ENTRY_COUNT__SHIFT |
+ GEN6_VFE_DW2_RESET_GATEWAY_TIMER |
+ GEN6_VFE_DW2_BYPASS_GATEWAY_CONTROL;
+
+ if (ilo_dev_gen(dev) >= ILO_GEN(7) && ilo_dev_gen(dev) <= ILO_GEN(7.5))
+ dw2 |= GEN7_VFE_DW2_GPGPU_MODE;
+
+ assert(urb.urb_entry_size);
+
+ dw4 = (urb.urb_entry_size - 1) << GEN6_VFE_DW4_URB_ENTRY_SIZE__SHIFT |
+ urb.curbe_entry_count << GEN6_VFE_DW4_CURBE_SIZE__SHIFT;
+
+ STATIC_ASSERT(ARRAY_SIZE(compute->vfe) >= 3);
+ compute->vfe[0] = dw1;
+ compute->vfe[1] = dw2;
+ compute->vfe[2] = dw4;
+
+ return true;
+}
+
+static uint8_t
+compute_interface_get_gen6_sampler_count(const struct ilo_dev *dev,
+ const struct ilo_state_compute_interface_info *interface)
+{
+ ILO_DEV_ASSERT(dev, 6, 8);
+ return (interface->sampler_count <= 12) ?
+ (interface->sampler_count + 3) / 4 : 4;
+}
+
+static uint8_t
+compute_interface_get_gen6_surface_count(const struct ilo_dev *dev,
+ const struct ilo_state_compute_interface_info *interface)
+{
+ ILO_DEV_ASSERT(dev, 6, 8);
+ return (interface->surface_count <= 31) ? interface->surface_count : 31;
+}
+
+static uint8_t
+compute_interface_get_gen7_slm_size(const struct ilo_dev *dev,
+ const struct ilo_state_compute_interface_info *interface)
+{
+ ILO_DEV_ASSERT(dev, 7, 8);
+
+ /*
+ * From the Ivy Bridge PRM, volume 2 part 2, page 61:
+ *
+ * "The amount is specified in 4k blocks, but only powers of 2 are
+ * allowed: 0, 4k, 8k, 16k, 32k and 64k per half-slice."
+ */
+ assert(interface->slm_size <= 64 * 1024);
+
+ return util_next_power_of_two((interface->slm_size + 4095) / 4096);
+}
+
+static bool
+compute_set_gen6_INTERFACE_DESCRIPTOR_DATA(struct ilo_state_compute *compute,
+ const struct ilo_dev *dev,
+ const struct ilo_state_compute_info *info)
+{
+ uint8_t i;
+
+ ILO_DEV_ASSERT(dev, 6, 8);
+
+ for (i = 0; i < info->interface_count; i++) {
+ const struct ilo_state_compute_interface_info *interface =
+ &info->interfaces[i];
+ uint16_t read_offset, per_thread_read_len, cross_thread_read_len;
+ uint8_t sampler_count, surface_count;
+ uint32_t dw0, dw2, dw3, dw4, dw5, dw6;
+
+ assert(interface->kernel_offset % 64 == 0);
+ assert(interface->thread_group_size);
+
+ read_offset = interface->curbe_read_offset / 32;
+ per_thread_read_len = (interface->curbe_read_length + 31) / 32;
+ cross_thread_read_len =
+ (interface->cross_thread_curbe_read_length + 31) / 32;
+
+ sampler_count =
+ compute_interface_get_gen6_sampler_count(dev, interface);
+ surface_count =
+ compute_interface_get_gen6_surface_count(dev, interface);
+
+ dw0 = interface->kernel_offset;
+ dw2 = sampler_count << GEN6_IDRT_DW2_SAMPLER_COUNT__SHIFT;
+ dw3 = surface_count << GEN6_IDRT_DW3_BINDING_TABLE_SIZE__SHIFT;
+ dw4 = per_thread_read_len << GEN6_IDRT_DW4_CURBE_READ_LEN__SHIFT |
+ read_offset << GEN6_IDRT_DW4_CURBE_READ_OFFSET__SHIFT;
+
+ dw5 = 0;
+ dw6 = 0;
+ if (ilo_dev_gen(dev) >= ILO_GEN(7)) {
+ const uint8_t slm_size =
+ compute_interface_get_gen7_slm_size(dev, interface);
+
+ dw5 |= GEN7_IDRT_DW5_ROUNDING_MODE_RTNE;
+
+ if (slm_size) {
+ dw5 |= GEN7_IDRT_DW5_BARRIER_ENABLE |
+ slm_size << GEN7_IDRT_DW5_SLM_SIZE__SHIFT;
+ }
+
+ /*
+ * From the Haswell PRM, volume 2d, page 199:
+ *
+ * "(Number of Threads in GPGPU Thread Group) Specifies the
+ * number of threads that are in this thread group. Used to
+ * program the barrier for the number of messages to expect. The
+ * minimum value is 0 (which will disable the barrier), while
+ * the maximum value is the number of threads in a subslice for
+ * local barriers."
+ *
+ * From the Broadwell PRM, volume 2d, page 183:
+ *
+ * "(Number of Threads in GPGPU Thread Group) Specifies the
+ * number of threads that are in this thread group. The minimum
+ * value is 1, while the maximum value is the number of threads
+ * in a subslice for local barriers. See vol1b Configurations
+ * for the number of threads per subslice for different
+ * products. The maximum value for global barriers is limited
+ * by the number of threads in the system, or by 511, whichever
+ * is lower. This field should not be set to 0 even if the
+ * barrier is disabled, since an accurate value is needed for
+ * proper pre-emption."
+ */
+ if (slm_size || ilo_dev_gen(dev) >= ILO_GEN(8)) {
+ dw5 |= interface->thread_group_size <<
+ GEN7_IDRT_DW5_THREAD_GROUP_SIZE__SHIFT;
+ }
+
+ if (ilo_dev_gen(dev) >= ILO_GEN(7.5)) {
+ dw6 |= cross_thread_read_len <<
+ GEN75_IDRT_DW6_CROSS_THREAD_CURBE_READ_LEN__SHIFT;
+ }
+ }
+
+ STATIC_ASSERT(ARRAY_SIZE(compute->idrt[i]) >= 6);
+ compute->idrt[i][0] = dw0;
+ compute->idrt[i][1] = dw2;
+ compute->idrt[i][2] = dw3;
+ compute->idrt[i][3] = dw4;
+ compute->idrt[i][4] = dw5;
+ compute->idrt[i][5] = dw6;
+ }
+
+ return true;
+}
+
+bool
+ilo_state_compute_init(struct ilo_state_compute *compute,
+ const struct ilo_dev *dev,
+ const struct ilo_state_compute_info *info)
+{
+ bool ret = true;
+
+ assert(ilo_is_zeroed(compute, sizeof(*compute)));
+ assert(ilo_is_zeroed(info->data, info->data_size));
+
+ assert(ilo_state_compute_data_size(dev, info->interface_count) <=
+ info->data_size);
+ compute->idrt = (uint32_t (*)[6]) info->data;
+
+ ret &= compute_set_gen6_MEDIA_VFE_STATE(compute, dev, info);
+ ret &= compute_set_gen6_INTERFACE_DESCRIPTOR_DATA(compute, dev, info);
+
+ assert(ret);
+
+ return ret;
+}
diff --git a/src/gallium/drivers/ilo/core/ilo_state_compute.h b/src/gallium/drivers/ilo/core/ilo_state_compute.h
new file mode 100644
index 00000000000..346f7b617f4
--- /dev/null
+++ b/src/gallium/drivers/ilo/core/ilo_state_compute.h
@@ -0,0 +1,92 @@
+/*
+ * Mesa 3-D graphics library
+ *
+ * Copyright (C) 2015 LunarG, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ * Chia-I Wu <[email protected]>
+ */
+
+#ifndef ILO_STATE_COMPUTE_H
+#define ILO_STATE_COMPUTE_H
+
+#include "genhw/genhw.h"
+
+#include "ilo_core.h"
+#include "ilo_dev.h"
+
+/*
+ * From the Haswell PRM, volume 7, page 836:
+ *
+ * "The first 64 URB entries are reserved for the interface
+ * description..."
+ */
+#define ILO_STATE_COMPUTE_MAX_INTERFACE_COUNT 64
+
+struct ilo_state_compute_interface_info {
+ /* usually 0 unless there are multiple interfaces */
+ uint32_t kernel_offset;
+
+ uint32_t scratch_size;
+
+ uint8_t sampler_count;
+ uint8_t surface_count;
+
+ uint16_t thread_group_size;
+ uint32_t slm_size;
+
+ uint16_t curbe_read_offset;
+ uint16_t curbe_read_length;
+ uint16_t cross_thread_curbe_read_length;
+};
+
+struct ilo_state_compute_info {
+ void *data;
+ size_t data_size;
+
+ const struct ilo_state_compute_interface_info *interfaces;
+ uint8_t interface_count;
+
+ uint32_t cv_urb_alloc_size;
+ uint32_t curbe_alloc_size;
+};
+
+struct ilo_state_compute {
+ uint32_t vfe[3];
+
+ uint32_t (*idrt)[6];
+ uint8_t idrt_count;
+};
+
+static inline size_t
+ilo_state_compute_data_size(const struct ilo_dev *dev,
+ uint8_t interface_count)
+{
+ const struct ilo_state_compute *compute = NULL;
+ return sizeof(compute->idrt[0]) * interface_count;
+}
+
+bool
+ilo_state_compute_init(struct ilo_state_compute *compute,
+ const struct ilo_dev *dev,
+ const struct ilo_state_compute_info *info);
+
+#endif /* ILO_STATE_COMPUTE_H */
diff --git a/src/gallium/drivers/ilo/core/ilo_state_raster.c b/src/gallium/drivers/ilo/core/ilo_state_raster.c
new file mode 100644
index 00000000000..ed64a1f0d3c
--- /dev/null
+++ b/src/gallium/drivers/ilo/core/ilo_state_raster.c
@@ -0,0 +1,1252 @@
+/*
+ * Mesa 3-D graphics library
+ *
+ * Copyright (C) 2012-2015 LunarG, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ * Chia-I Wu <[email protected]>
+ */
+
+#include "ilo_debug.h"
+#include "ilo_state_raster.h"
+
+static bool
+raster_validate_gen6_clip(const struct ilo_dev *dev,
+ const struct ilo_state_raster_info *info)
+{
+ const struct ilo_state_raster_clip_info *clip = &info->clip;
+
+ ILO_DEV_ASSERT(dev, 6, 8);
+
+ assert(clip->viewport_count);
+
+ /*
+ * From the Sandy Bridge PRM, volume 2 part 1, page 188:
+ *
+ * ""Clip Distance Cull Test Enable Bitmask" and "Clip Distance Clip
+ * Test Enable Bitmask" should not have overlapping bits in the mask,
+ * else the results are undefined."
+ */
+ assert(!(clip->user_cull_enables & clip->user_clip_enables));
+
+ if (ilo_dev_gen(dev) < ILO_GEN(9))
+ assert(clip->z_near_enable == clip->z_far_enable);
+
+ return true;
+}
+
+static bool
+raster_set_gen6_3DSTATE_CLIP(struct ilo_state_raster *rs,
+ const struct ilo_dev *dev,
+ const struct ilo_state_raster_info *info)
+{
+ const struct ilo_state_raster_clip_info *clip = &info->clip;
+ const struct ilo_state_raster_setup_info *setup = &info->setup;
+ const struct ilo_state_raster_tri_info *tri = &info->tri;
+ const struct ilo_state_raster_scan_info *scan = &info->scan;
+ uint32_t dw1, dw2, dw3;
+
+ ILO_DEV_ASSERT(dev, 6, 8);
+
+ if (!raster_validate_gen6_clip(dev, info))
+ return false;
+
+ dw1 = clip->user_cull_enables << GEN6_CLIP_DW1_UCP_CULL_ENABLES__SHIFT;
+
+ if (clip->stats_enable)
+ dw1 |= GEN6_CLIP_DW1_STATISTICS;
+
+ if (ilo_dev_gen(dev) >= ILO_GEN(7)) {
+ /*
+ * From the Ivy Bridge PRM, volume 2 part 1, page 219:
+ *
+ * "Workaround : Due to Hardware issue "EarlyCull" needs to be
+ * enabled only for the cases where the incoming primitive topology
+ * into the clipper guaranteed to be Trilist."
+ *
+ * What does this mean?
+ */
+ dw1 |= GEN7_CLIP_DW1_SUBPIXEL_8BITS |
+ GEN7_CLIP_DW1_EARLY_CULL_ENABLE;
+
+ if (ilo_dev_gen(dev) <= ILO_GEN(7.5)) {
+ dw1 |= tri->front_winding << GEN7_CLIP_DW1_FRONT_WINDING__SHIFT |
+ tri->cull_mode << GEN7_CLIP_DW1_CULL_MODE__SHIFT;
+ }
+ }
+
+ dw2 = clip->user_clip_enables << GEN6_CLIP_DW2_UCP_CLIP_ENABLES__SHIFT |
+ GEN6_CLIPMODE_NORMAL << GEN6_CLIP_DW2_CLIP_MODE__SHIFT;
+
+ if (clip->clip_enable)
+ dw2 |= GEN6_CLIP_DW2_CLIP_ENABLE;
+
+ if (clip->z_near_zero)
+ dw2 |= GEN6_CLIP_DW2_APIMODE_D3D;
+ else
+ dw2 |= GEN6_CLIP_DW2_APIMODE_OGL;
+
+ if (clip->xy_test_enable)
+ dw2 |= GEN6_CLIP_DW2_XY_TEST_ENABLE;
+
+ if (ilo_dev_gen(dev) < ILO_GEN(8) && clip->z_near_enable)
+ dw2 |= GEN6_CLIP_DW2_Z_TEST_ENABLE;
+
+ if (clip->gb_test_enable)
+ dw2 |= GEN6_CLIP_DW2_GB_TEST_ENABLE;
+
+ if (scan->barycentric_interps & (GEN6_INTERP_NONPERSPECTIVE_PIXEL |
+ GEN6_INTERP_NONPERSPECTIVE_CENTROID |
+ GEN6_INTERP_NONPERSPECTIVE_SAMPLE))
+ dw2 |= GEN6_CLIP_DW2_NONPERSPECTIVE_BARYCENTRIC_ENABLE;
+
+ if (setup->first_vertex_provoking) {
+ dw2 |= 0 << GEN6_CLIP_DW2_TRI_PROVOKE__SHIFT |
+ 0 << GEN6_CLIP_DW2_LINE_PROVOKE__SHIFT |
+ 1 << GEN6_CLIP_DW2_TRIFAN_PROVOKE__SHIFT;
+ } else {
+ dw2 |= 2 << GEN6_CLIP_DW2_TRI_PROVOKE__SHIFT |
+ 1 << GEN6_CLIP_DW2_LINE_PROVOKE__SHIFT |
+ 2 << GEN6_CLIP_DW2_TRIFAN_PROVOKE__SHIFT;
+ }
+
+ dw3 = 0x1 << GEN6_CLIP_DW3_MIN_POINT_WIDTH__SHIFT |
+ 0x7ff << GEN6_CLIP_DW3_MAX_POINT_WIDTH__SHIFT |
+ (clip->viewport_count - 1) << GEN6_CLIP_DW3_MAX_VPINDEX__SHIFT;
+
+ if (clip->force_rtaindex_zero)
+ dw3 |= GEN6_CLIP_DW3_FORCE_RTAINDEX_ZERO;
+
+ STATIC_ASSERT(ARRAY_SIZE(rs->clip) >= 3);
+ rs->clip[0] = dw1;
+ rs->clip[1] = dw2;
+ rs->clip[2] = dw3;
+
+ return true;
+}
+
+static bool
+raster_params_is_gen6_line_aa_allowed(const struct ilo_dev *dev,
+ const struct ilo_state_raster_params_info *params)
+{
+ ILO_DEV_ASSERT(dev, 6, 8);
+
+ /*
+ * From the Sandy Bridge PRM, volume 2 part 1, page 251:
+ *
+ * "This field (Anti-aliasing Enable) must be disabled if any of the
+ * render targets have integer (UINT or SINT) surface format."
+ */
+ if (params->any_integer_rt)
+ return false;
+
+ /*
+ * From the Sandy Bridge PRM, volume 2 part 1, page 321:
+ *
+ * "[DevSNB+]: This field (Hierarchical Depth Buffer Enable) must be
+ * disabled if Anti-aliasing Enable in 3DSTATE_SF is enabled.
+ */
+ if (ilo_dev_gen(dev) == ILO_GEN(6) && params->hiz_enable)
+ return false;
+
+ return true;
+}
+
+static void
+raster_get_gen6_effective_line(const struct ilo_dev *dev,
+ const struct ilo_state_raster_info *info,
+ struct ilo_state_raster_line_info *line)
+{
+ const struct ilo_state_raster_setup_info *setup = &info->setup;
+ const struct ilo_state_raster_params_info *params = &info->params;
+
+ *line = info->line;
+
+ /*
+ * From the Sandy Bridge PRM, volume 2 part 1, page 251:
+ *
+ * "This field (Anti-aliasing Enable) is ignored when Multisample
+ * Rasterization Mode is MSRASTMODE_ON_xx."
+ *
+ * From the Sandy Bridge PRM, volume 2 part 1, page 251:
+ *
+ * "Setting a Line Width of 0.0 specifies the rasterization of the
+ * "thinnest" (one-pixel-wide), non-antialiased lines. Note that
+ * this effectively overrides the effect of AAEnable (though the
+ * AAEnable state variable is not modified). Lines rendered with
+ * zero Line Width are rasterized using GIQ (Grid Intersection
+ * Quantization) rules as specified by the GDI and Direct3D APIs."
+ *
+ * "Software must not program a value of 0.0 when running in
+ * MSRASTMODE_ON_xxx modes - zero-width lines are not available
+ * when multisampling rasterization is enabled."
+ *
+ * From the Sandy Bridge PRM, volume 2 part 1, page 294:
+ *
+ * "Line stipple, controlled via the Line Stipple Enable state variable
+ * in WM_STATE, discards certain pixels that are produced by non-AA
+ * line rasterization."
+ */
+ if (setup->line_msaa_enable ||
+ !raster_params_is_gen6_line_aa_allowed(dev, params))
+ line->aa_enable = false;
+ if (setup->line_msaa_enable || line->aa_enable) {
+ line->stipple_enable = false;
+ line->giq_enable = false;
+ line->giq_last_pixel = false;
+ }
+}
+
+static bool
+raster_validate_gen8_raster(const struct ilo_dev *dev,
+ const struct ilo_state_raster_info *info)
+{
+ const struct ilo_state_raster_setup_info *setup = &info->setup;
+ const struct ilo_state_raster_tri_info *tri = &info->tri;
+
+ ILO_DEV_ASSERT(dev, 6, 8);
+
+ /*
+ * From the Sandy Bridge PRM, volume 2 part 1, page 249:
+ *
+ * "This setting (SOLID) is required when rendering rectangle
+ * (RECTLIST) objects.
+ */
+ if (tri->fill_mode_front != GEN6_FILLMODE_SOLID ||
+ tri->fill_mode_back != GEN6_FILLMODE_SOLID)
+ assert(!setup->cv_is_rectangle);
+
+ return true;
+}
+
+static enum gen_msrast_mode
+raster_setup_get_gen6_msrast_mode(const struct ilo_dev *dev,
+ const struct ilo_state_raster_setup_info *setup)
+{
+ ILO_DEV_ASSERT(dev, 6, 8);
+
+ if (setup->line_msaa_enable) {
+ return (setup->msaa_enable) ? GEN6_MSRASTMODE_ON_PATTERN :
+ GEN6_MSRASTMODE_ON_PIXEL;
+ } else {
+ return (setup->msaa_enable) ? GEN6_MSRASTMODE_OFF_PATTERN :
+ GEN6_MSRASTMODE_OFF_PIXEL;
+ }
+}
+
+static int
+get_gen6_line_width(const struct ilo_dev *dev, float fwidth,
+ bool line_aa_enable, bool line_giq_enable)
+{
+ int line_width;
+
+ ILO_DEV_ASSERT(dev, 6, 8);
+
+ /* in U3.7 */
+ line_width = (int) (fwidth * 128.0f + 0.5f);
+
+ /*
+ * Smooth lines should intersect ceil(line_width) or (ceil(line_width) + 1)
+ * pixels in the minor direction. We have to make the lines slightly
+ * thicker, 0.5 pixel on both sides, so that they intersect that many
+ * pixels.
+ */
+ if (line_aa_enable)
+ line_width += 128;
+
+ line_width = CLAMP(line_width, 1, 1023);
+
+ if (line_giq_enable && line_width == 128)
+ line_width = 0;
+
+ return line_width;
+}
+
+static int
+get_gen6_point_width(const struct ilo_dev *dev, float fwidth)
+{
+ int point_width;
+
+ ILO_DEV_ASSERT(dev, 6, 8);
+
+ /* in U8.3 */
+ point_width = (int) (fwidth * 8.0f + 0.5f);
+ point_width = CLAMP(point_width, 1, 2047);
+
+ return point_width;
+}
+
+static bool
+raster_set_gen7_3DSTATE_SF(struct ilo_state_raster *rs,
+ const struct ilo_dev *dev,
+ const struct ilo_state_raster_info *info,
+ const struct ilo_state_raster_line_info *line)
+{
+ const struct ilo_state_raster_clip_info *clip = &info->clip;
+ const struct ilo_state_raster_setup_info *setup = &info->setup;
+ const struct ilo_state_raster_point_info *point = &info->point;
+ const struct ilo_state_raster_tri_info *tri = &info->tri;
+ const struct ilo_state_raster_params_info *params = &info->params;
+ const enum gen_msrast_mode msrast =
+ raster_setup_get_gen6_msrast_mode(dev, setup);
+ const int line_width = get_gen6_line_width(dev, params->line_width,
+ line->aa_enable, line->giq_enable);
+ const int point_width = get_gen6_point_width(dev, params->point_width);
+ uint32_t dw1, dw2, dw3;
+
+ ILO_DEV_ASSERT(dev, 6, 7.5);
+
+ if (!raster_validate_gen8_raster(dev, info))
+ return false;
+
+ dw1 = tri->fill_mode_front << GEN7_SF_DW1_FILL_MODE_FRONT__SHIFT |
+ tri->fill_mode_back << GEN7_SF_DW1_FILL_MODE_BACK__SHIFT |
+ tri->front_winding << GEN7_SF_DW1_FRONT_WINDING__SHIFT;
+
+ if (ilo_dev_gen(dev) >= ILO_GEN(7) && ilo_dev_gen(dev) <= ILO_GEN(7.5)) {
+ enum gen_depth_format format;
+
+ /* do it here as we want 0x0 to be valid */
+ switch (tri->depth_offset_format) {
+ case GEN6_ZFORMAT_D32_FLOAT_S8X24_UINT:
+ format = GEN6_ZFORMAT_D32_FLOAT;
+ break;
+ case GEN6_ZFORMAT_D24_UNORM_S8_UINT:
+ format = GEN6_ZFORMAT_D24_UNORM_X8_UINT;
+ break;
+ default:
+ format = tri->depth_offset_format;
+ break;
+ }
+
+ dw1 |= format << GEN7_SF_DW1_DEPTH_FORMAT__SHIFT;
+ }
+
+ /*
+ * From the Sandy Bridge PRM, volume 2 part 1, page 248:
+ *
+ * "This bit (Statistics Enable) should be set whenever clipping is
+ * enabled and the Statistics Enable bit is set in CLIP_STATE. It
+ * should be cleared if clipping is disabled or Statistics Enable in
+ * CLIP_STATE is clear."
+ */
+ if (clip->stats_enable && clip->clip_enable)
+ dw1 |= GEN7_SF_DW1_STATISTICS;
+
+ /*
+ * From the Ivy Bridge PRM, volume 2 part 1, page 258:
+ *
+ * "This bit (Legacy Global Depth Bias Enable, Global Depth Offset
+ * Enable Solid , Global Depth Offset Enable Wireframe, and Global
+ * Depth Offset Enable Point) should be set whenever non zero depth
+ * bias (Slope, Bias) values are used. Setting this bit may have some
+ * degradation of performance for some workloads."
+ *
+ * But it seems fine to ignore that.
+ */
+ if (tri->depth_offset_solid)
+ dw1 |= GEN7_SF_DW1_DEPTH_OFFSET_SOLID;
+ if (tri->depth_offset_wireframe)
+ dw1 |= GEN7_SF_DW1_DEPTH_OFFSET_WIREFRAME;
+ if (tri->depth_offset_point)
+ dw1 |= GEN7_SF_DW1_DEPTH_OFFSET_POINT;
+
+ if (setup->viewport_transform)
+ dw1 |= GEN7_SF_DW1_VIEWPORT_TRANSFORM;
+
+ dw2 = tri->cull_mode << GEN7_SF_DW2_CULL_MODE__SHIFT |
+ line_width << GEN7_SF_DW2_LINE_WIDTH__SHIFT |
+ GEN7_SF_DW2_AA_LINE_CAP_1_0 |
+ msrast << GEN7_SF_DW2_MSRASTMODE__SHIFT;
+
+ if (line->aa_enable)
+ dw2 |= GEN7_SF_DW2_AA_LINE_ENABLE;
+
+ if (ilo_dev_gen(dev) == ILO_GEN(7.5) && line->stipple_enable)
+ dw2 |= GEN75_SF_DW2_LINE_STIPPLE_ENABLE;
+
+ if (setup->scissor_enable)
+ dw2 |= GEN7_SF_DW2_SCISSOR_ENABLE;
+
+ dw3 = GEN7_SF_DW3_TRUE_AA_LINE_DISTANCE |
+ GEN7_SF_DW3_SUBPIXEL_8BITS;
+
+ /* this has no effect when line_width != 0 */
+ if (line->giq_last_pixel)
+ dw3 |= GEN7_SF_DW3_LINE_LAST_PIXEL_ENABLE;
+
+ if (setup->first_vertex_provoking) {
+ dw3 |= 0 << GEN7_SF_DW3_TRI_PROVOKE__SHIFT |
+ 0 << GEN7_SF_DW3_LINE_PROVOKE__SHIFT |
+ 1 << GEN7_SF_DW3_TRIFAN_PROVOKE__SHIFT;
+ } else {
+ dw3 |= 2 << GEN7_SF_DW3_TRI_PROVOKE__SHIFT |
+ 1 << GEN7_SF_DW3_LINE_PROVOKE__SHIFT |
+ 2 << GEN7_SF_DW3_TRIFAN_PROVOKE__SHIFT;
+ }
+
+ /* setup->point_aa_enable is ignored */
+ if (!point->programmable_width) {
+ dw3 |= GEN7_SF_DW3_USE_POINT_WIDTH |
+ point_width << GEN7_SF_DW3_POINT_WIDTH__SHIFT;
+ }
+
+ STATIC_ASSERT(ARRAY_SIZE(rs->sf) >= 3);
+ rs->sf[0] = dw1;
+ rs->sf[1] = dw2;
+ rs->sf[2] = dw3;
+
+ STATIC_ASSERT(ARRAY_SIZE(rs->raster) >= 4);
+ rs->raster[0] = 0;
+ rs->raster[1] = fui(params->depth_offset_const);
+ rs->raster[2] = fui(params->depth_offset_scale);
+ rs->raster[3] = fui(params->depth_offset_clamp);
+
+ rs->line_aa_enable = line->aa_enable;
+ rs->line_giq_enable = line->giq_enable;
+
+ return true;
+}
+
+static bool
+raster_set_gen8_3DSTATE_SF(struct ilo_state_raster *rs,
+ const struct ilo_dev *dev,
+ const struct ilo_state_raster_info *info,
+ const struct ilo_state_raster_line_info *line)
+{
+ const struct ilo_state_raster_clip_info *clip = &info->clip;
+ const struct ilo_state_raster_setup_info *setup = &info->setup;
+ const struct ilo_state_raster_point_info *point = &info->point;
+ const struct ilo_state_raster_params_info *params = &info->params;
+ const int line_width = get_gen6_line_width(dev, params->line_width,
+ line->aa_enable, line->giq_enable);
+ const int point_width = get_gen6_point_width(dev, params->point_width);
+ uint32_t dw1, dw2, dw3;
+
+ ILO_DEV_ASSERT(dev, 8, 8);
+
+ dw1 = 0;
+
+ if (clip->stats_enable && clip->clip_enable)
+ dw1 |= GEN7_SF_DW1_STATISTICS;
+
+ if (setup->viewport_transform)
+ dw1 |= GEN7_SF_DW1_VIEWPORT_TRANSFORM;
+
+ dw2 = line_width << GEN7_SF_DW2_LINE_WIDTH__SHIFT |
+ GEN7_SF_DW2_AA_LINE_CAP_1_0;
+
+ dw3 = GEN7_SF_DW3_TRUE_AA_LINE_DISTANCE |
+ GEN7_SF_DW3_SUBPIXEL_8BITS;
+
+ /* this has no effect when line_width != 0 */
+ if (line->giq_last_pixel)
+ dw3 |= GEN7_SF_DW3_LINE_LAST_PIXEL_ENABLE;
+
+ if (setup->first_vertex_provoking) {
+ dw3 |= 0 << GEN7_SF_DW3_TRI_PROVOKE__SHIFT |
+ 0 << GEN7_SF_DW3_LINE_PROVOKE__SHIFT |
+ 1 << GEN7_SF_DW3_TRIFAN_PROVOKE__SHIFT;
+ } else {
+ dw3 |= 2 << GEN7_SF_DW3_TRI_PROVOKE__SHIFT |
+ 1 << GEN7_SF_DW3_LINE_PROVOKE__SHIFT |
+ 2 << GEN7_SF_DW3_TRIFAN_PROVOKE__SHIFT;
+ }
+
+ if (!point->programmable_width) {
+ dw3 |= GEN7_SF_DW3_USE_POINT_WIDTH |
+ point_width << GEN7_SF_DW3_POINT_WIDTH__SHIFT;
+ }
+
+ STATIC_ASSERT(ARRAY_SIZE(rs->sf) >= 3);
+ rs->sf[0] = dw1;
+ rs->sf[1] = dw2;
+ rs->sf[2] = dw3;
+
+ return true;
+}
+
+static bool
+raster_set_gen8_3DSTATE_RASTER(struct ilo_state_raster *rs,
+ const struct ilo_dev *dev,
+ const struct ilo_state_raster_info *info,
+ const struct ilo_state_raster_line_info *line)
+{
+ const struct ilo_state_raster_clip_info *clip = &info->clip;
+ const struct ilo_state_raster_setup_info *setup = &info->setup;
+ const struct ilo_state_raster_point_info *point = &info->point;
+ const struct ilo_state_raster_tri_info *tri = &info->tri;
+ const struct ilo_state_raster_params_info *params = &info->params;
+ uint32_t dw1;
+
+ ILO_DEV_ASSERT(dev, 8, 8);
+
+ if (!raster_validate_gen8_raster(dev, info))
+ return false;
+
+ dw1 = tri->front_winding << GEN8_RASTER_DW1_FRONT_WINDING__SHIFT |
+ tri->cull_mode << GEN8_RASTER_DW1_CULL_MODE__SHIFT |
+ tri->fill_mode_front << GEN8_RASTER_DW1_FILL_MODE_FRONT__SHIFT |
+ tri->fill_mode_back << GEN8_RASTER_DW1_FILL_MODE_BACK__SHIFT;
+
+ if (point->aa_enable)
+ dw1 |= GEN8_RASTER_DW1_SMOOTH_POINT_ENABLE;
+
+ /* where should line_msaa_enable be set? */
+ if (setup->msaa_enable)
+ dw1 |= GEN8_RASTER_DW1_API_MULTISAMPLE_ENABLE;
+
+ if (tri->depth_offset_solid)
+ dw1 |= GEN8_RASTER_DW1_DEPTH_OFFSET_SOLID;
+ if (tri->depth_offset_wireframe)
+ dw1 |= GEN8_RASTER_DW1_DEPTH_OFFSET_WIREFRAME;
+ if (tri->depth_offset_point)
+ dw1 |= GEN8_RASTER_DW1_DEPTH_OFFSET_POINT;
+
+ if (line->aa_enable)
+ dw1 |= GEN8_RASTER_DW1_AA_LINE_ENABLE;
+
+ if (setup->scissor_enable)
+ dw1 |= GEN8_RASTER_DW1_SCISSOR_ENABLE;
+
+ if (ilo_dev_gen(dev) >= ILO_GEN(9)) {
+ if (clip->z_far_enable)
+ dw1 |= GEN9_RASTER_DW1_Z_TEST_FAR_ENABLE;
+ if (clip->z_near_enable)
+ dw1 |= GEN9_RASTER_DW1_Z_TEST_NEAR_ENABLE;
+ } else {
+ if (clip->z_near_enable)
+ dw1 |= GEN8_RASTER_DW1_Z_TEST_ENABLE;
+ }
+
+ STATIC_ASSERT(ARRAY_SIZE(rs->raster) >= 4);
+ rs->raster[0] = dw1;
+ rs->raster[1] = fui(params->depth_offset_const);
+ rs->raster[2] = fui(params->depth_offset_scale);
+ rs->raster[3] = fui(params->depth_offset_clamp);
+
+ rs->line_aa_enable = line->aa_enable;
+ rs->line_giq_enable = line->giq_enable;
+
+ return true;
+}
+
+static enum gen_sample_count
+get_gen6_sample_count(const struct ilo_dev *dev, uint8_t sample_count)
+{
+ enum gen_sample_count c;
+ int min_gen;
+
+ ILO_DEV_ASSERT(dev, 6, 8);
+
+ switch (sample_count) {
+ case 1:
+ c = GEN6_NUMSAMPLES_1;
+ min_gen = ILO_GEN(6);
+ break;
+ case 2:
+ c = GEN8_NUMSAMPLES_2;
+ min_gen = ILO_GEN(8);
+ break;
+ case 4:
+ c = GEN6_NUMSAMPLES_4;
+ min_gen = ILO_GEN(6);
+ break;
+ case 8:
+ c = GEN7_NUMSAMPLES_8;
+ min_gen = ILO_GEN(7);
+ break;
+ case 16:
+ c = GEN8_NUMSAMPLES_16;
+ min_gen = ILO_GEN(8);
+ break;
+ default:
+ assert(!"unexpected sample count");
+ c = GEN6_NUMSAMPLES_1;
+ break;
+ }
+
+ assert(ilo_dev_gen(dev) >= min_gen);
+
+ return c;
+}
+
+static bool
+raster_set_gen8_3DSTATE_MULTISAMPLE(struct ilo_state_raster *rs,
+ const struct ilo_dev *dev,
+ const struct ilo_state_raster_info *info)
+{
+ const struct ilo_state_raster_setup_info *setup = &info->setup;
+ const struct ilo_state_raster_scan_info *scan = &info->scan;
+ const enum gen_sample_count count =
+ get_gen6_sample_count(dev, scan->sample_count);
+ uint32_t dw1;
+
+ ILO_DEV_ASSERT(dev, 6, 8);
+
+ /*
+ * From the Sandy Bridge PRM, volume 2 part 1, page 307:
+ *
+ * "Setting Multisample Rasterization Mode to MSRASTMODE_xxx_PATTERN
+ * when Number of Multisamples == NUMSAMPLES_1 is UNDEFINED."
+ */
+ if (setup->msaa_enable)
+ assert(scan->sample_count > 1);
+
+ dw1 = scan->pixloc << GEN6_MULTISAMPLE_DW1_PIXEL_LOCATION__SHIFT |
+ count << GEN6_MULTISAMPLE_DW1_NUM_SAMPLES__SHIFT;
+
+ STATIC_ASSERT(ARRAY_SIZE(rs->sample) >= 1);
+ rs->sample[0] = dw1;
+
+ return true;
+}
+
+static bool
+raster_set_gen6_3DSTATE_SAMPLE_MASK(struct ilo_state_raster *rs,
+ const struct ilo_dev *dev,
+ const struct ilo_state_raster_info *info)
+{
+ const struct ilo_state_raster_scan_info *scan = &info->scan;
+ /*
+ * From the Ivy Bridge PRM, volume 2 part 1, page 294:
+ *
+ * "If Number of Multisamples is NUMSAMPLES_1, bits 7:1 of this field
+ * (Sample Mask) must be zero.
+ *
+ * If Number of Multisamples is NUMSAMPLES_4, bits 7:4 of this field
+ * must be zero."
+ */
+ const uint32_t mask = (1 << scan->sample_count) - 1;
+ uint32_t dw1;
+
+ ILO_DEV_ASSERT(dev, 6, 8);
+
+ dw1 = (scan->sample_mask & mask) << GEN6_SAMPLE_MASK_DW1_VAL__SHIFT;
+
+ STATIC_ASSERT(ARRAY_SIZE(rs->sample) >= 2);
+ rs->sample[1] = dw1;
+
+ return true;
+}
+
+static bool
+raster_validate_gen6_wm(const struct ilo_dev *dev,
+ const struct ilo_state_raster_info *info)
+{
+ const struct ilo_state_raster_scan_info *scan = &info->scan;
+
+ ILO_DEV_ASSERT(dev, 6, 8);
+
+ if (ilo_dev_gen(dev) == ILO_GEN(6))
+ assert(scan->earlyz_control == GEN7_EDSC_NORMAL);
+
+ /*
+ * From the Sandy Bridge PRM, volume 2 part 1, page 272:
+ *
+ * "This bit (Statistics Enable) must be disabled if either of these
+ * bits is set: Depth Buffer Clear , Hierarchical Depth Buffer Resolve
+ * Enable or Depth Buffer Resolve Enable."
+ */
+ if (scan->earlyz_op != ILO_STATE_RASTER_EARLYZ_NORMAL)
+ assert(!scan->stats_enable);
+
+ /*
+ * From the Sandy Bridge PRM, volume 2 part 1, page 273:
+ *
+ * "If this field (Depth Buffer Resolve Enable) is enabled, the Depth
+ * Buffer Clear and Hierarchical Depth Buffer Resolve Enable fields
+ * must both be disabled."
+ *
+ * "If this field (Hierarchical Depth Buffer Resolve Enable) is
+ * enabled, the Depth Buffer Clear and Depth Buffer Resolve Enable
+ * fields must both be disabled."
+ *
+ * This is guaranteed.
+ */
+
+ /*
+ * From the Sandy Bridge PRM, volume 2 part 1, page 314-315:
+ *
+ * "Stencil buffer clear can be performed at the same time by enabling
+ * Stencil Buffer Write Enable."
+ *
+ * "Note also that stencil buffer clear can be performed without depth
+ * buffer clear."
+ */
+ if (scan->earlyz_stencil_clear) {
+ assert(scan->earlyz_op == ILO_STATE_RASTER_EARLYZ_NORMAL ||
+ scan->earlyz_op == ILO_STATE_RASTER_EARLYZ_DEPTH_CLEAR);
+ }
+
+ return true;
+}
+
+static bool
+raster_set_gen6_3dstate_wm(struct ilo_state_raster *rs,
+ const struct ilo_dev *dev,
+ const struct ilo_state_raster_info *info,
+ const struct ilo_state_raster_line_info *line)
+{
+ const struct ilo_state_raster_tri_info *tri = &info->tri;
+ const struct ilo_state_raster_setup_info *setup = &info->setup;
+ const struct ilo_state_raster_scan_info *scan = &info->scan;
+ const enum gen_msrast_mode msrast =
+ raster_setup_get_gen6_msrast_mode(dev, setup);
+ /* only scan conversion states are set, as in Gen8+ */
+ uint32_t dw4, dw5, dw6;
+
+ ILO_DEV_ASSERT(dev, 6, 6);
+
+ if (!raster_validate_gen6_wm(dev, info))
+ return false;
+
+ dw4 = 0;
+
+ if (scan->stats_enable)
+ dw4 |= GEN6_WM_DW4_STATISTICS;
+
+ switch (scan->earlyz_op) {
+ case ILO_STATE_RASTER_EARLYZ_DEPTH_CLEAR:
+ dw4 |= GEN6_WM_DW4_DEPTH_CLEAR;
+ break;
+ case ILO_STATE_RASTER_EARLYZ_DEPTH_RESOLVE:
+ dw4 |= GEN6_WM_DW4_DEPTH_RESOLVE;
+ break;
+ case ILO_STATE_RASTER_EARLYZ_HIZ_RESOLVE:
+ dw4 |= GEN6_WM_DW4_HIZ_RESOLVE;
+ break;
+ default:
+ if (scan->earlyz_stencil_clear)
+ dw4 |= GEN6_WM_DW4_DEPTH_CLEAR;
+ break;
+ }
+
+ dw5 = GEN6_WM_DW5_AA_LINE_CAP_1_0 | /* same as in 3DSTATE_SF */
+ GEN6_WM_DW5_AA_LINE_WIDTH_2_0;
+
+ if (tri->poly_stipple_enable)
+ dw5 |= GEN6_WM_DW5_POLY_STIPPLE_ENABLE;
+ if (line->stipple_enable)
+ dw5 |= GEN6_WM_DW5_LINE_STIPPLE_ENABLE;
+
+ dw6 = scan->zw_interp << GEN6_WM_DW6_ZW_INTERP__SHIFT |
+ scan->barycentric_interps << GEN6_WM_DW6_BARYCENTRIC_INTERP__SHIFT |
+ GEN6_WM_DW6_POINT_RASTRULE_UPPER_RIGHT |
+ msrast << GEN6_WM_DW6_MSRASTMODE__SHIFT;
+
+ STATIC_ASSERT(ARRAY_SIZE(rs->wm) >= 3);
+ rs->wm[0] = dw4;
+ rs->wm[1] = dw5;
+ rs->wm[2] = dw6;
+
+ return true;
+}
+
+static bool
+raster_set_gen8_3DSTATE_WM(struct ilo_state_raster *rs,
+ const struct ilo_dev *dev,
+ const struct ilo_state_raster_info *info,
+ const struct ilo_state_raster_line_info *line)
+{
+ const struct ilo_state_raster_tri_info *tri = &info->tri;
+ const struct ilo_state_raster_setup_info *setup = &info->setup;
+ const struct ilo_state_raster_scan_info *scan = &info->scan;
+ const enum gen_msrast_mode msrast =
+ raster_setup_get_gen6_msrast_mode(dev, setup);
+ uint32_t dw1;
+
+ ILO_DEV_ASSERT(dev, 7, 8);
+
+ if (!raster_validate_gen6_wm(dev, info))
+ return false;
+
+ dw1 = scan->earlyz_control << GEN7_WM_DW1_EDSC__SHIFT |
+ scan->zw_interp << GEN7_WM_DW1_ZW_INTERP__SHIFT |
+ scan->barycentric_interps << GEN7_WM_DW1_BARYCENTRIC_INTERP__SHIFT |
+ GEN7_WM_DW1_AA_LINE_CAP_1_0 | /* same as in 3DSTATE_SF */
+ GEN7_WM_DW1_AA_LINE_WIDTH_2_0 |
+ GEN7_WM_DW1_POINT_RASTRULE_UPPER_RIGHT;
+
+ if (scan->stats_enable)
+ dw1 |= GEN7_WM_DW1_STATISTICS;
+
+ if (ilo_dev_gen(dev) < ILO_GEN(8)) {
+ switch (scan->earlyz_op) {
+ case ILO_STATE_RASTER_EARLYZ_DEPTH_CLEAR:
+ dw1 |= GEN7_WM_DW1_DEPTH_CLEAR;
+ break;
+ case ILO_STATE_RASTER_EARLYZ_DEPTH_RESOLVE:
+ dw1 |= GEN7_WM_DW1_DEPTH_RESOLVE;
+ break;
+ case ILO_STATE_RASTER_EARLYZ_HIZ_RESOLVE:
+ dw1 |= GEN7_WM_DW1_HIZ_RESOLVE;
+ break;
+ default:
+ if (scan->earlyz_stencil_clear)
+ dw1 |= GEN7_WM_DW1_DEPTH_CLEAR;
+ break;
+ }
+ }
+
+ if (tri->poly_stipple_enable)
+ dw1 |= GEN7_WM_DW1_POLY_STIPPLE_ENABLE;
+ if (line->stipple_enable)
+ dw1 |= GEN7_WM_DW1_LINE_STIPPLE_ENABLE;
+
+ if (ilo_dev_gen(dev) < ILO_GEN(8))
+ dw1 |= msrast << GEN7_WM_DW1_MSRASTMODE__SHIFT;
+
+ STATIC_ASSERT(ARRAY_SIZE(rs->wm) >= 1);
+ rs->wm[0] = dw1;
+
+ return true;
+}
+
+static bool
+raster_set_gen8_3dstate_wm_hz_op(struct ilo_state_raster *rs,
+ const struct ilo_dev *dev,
+ const struct ilo_state_raster_info *info)
+{
+ const struct ilo_state_raster_scan_info *scan = &info->scan;
+ const enum gen_sample_count count =
+ get_gen6_sample_count(dev, scan->sample_count);
+ const uint32_t mask = (1 << scan->sample_count) - 1;
+ uint32_t dw1, dw4;
+
+ ILO_DEV_ASSERT(dev, 8, 8);
+
+ dw1 = count << GEN8_WM_HZ_DW1_NUM_SAMPLES__SHIFT;
+
+ if (scan->earlyz_stencil_clear)
+ dw1 |= GEN8_WM_HZ_DW1_STENCIL_CLEAR;
+
+ switch (scan->earlyz_op) {
+ case ILO_STATE_RASTER_EARLYZ_DEPTH_CLEAR:
+ dw1 |= GEN8_WM_HZ_DW1_DEPTH_CLEAR;
+ break;
+ case ILO_STATE_RASTER_EARLYZ_DEPTH_RESOLVE:
+ dw1 |= GEN8_WM_HZ_DW1_DEPTH_RESOLVE;
+ break;
+ case ILO_STATE_RASTER_EARLYZ_HIZ_RESOLVE:
+ dw1 |= GEN8_WM_HZ_DW1_HIZ_RESOLVE;
+ break;
+ default:
+ break;
+ }
+
+ dw4 = (scan->sample_mask & mask) << GEN8_WM_HZ_DW4_SAMPLE_MASK__SHIFT;
+
+ STATIC_ASSERT(ARRAY_SIZE(rs->wm) >= 3);
+ rs->wm[1] = dw1;
+ rs->wm[2] = dw4;
+
+ return true;
+}
+
+static bool
+sample_pattern_get_gen6_packed_offsets(const struct ilo_dev *dev,
+ uint8_t sample_count,
+ const struct ilo_state_sample_pattern_offset_info *in,
+ uint8_t *out)
+{
+ uint8_t max_dist, i;
+
+ ILO_DEV_ASSERT(dev, 6, 8);
+
+ max_dist = 0;
+ for (i = 0; i < sample_count; i++) {
+ const int8_t dist_x = (int8_t) in[i].x - 8;
+ const int8_t dist_y = (int8_t) in[i].y - 8;
+ const uint8_t dist = dist_x * dist_x + dist_y * dist_y;
+
+ /*
+ * From the Sandy Bridge PRM, volume 2 part 1, page 305:
+ *
+ * "Programming Note: When programming the sample offsets (for
+ * NUMSAMPLES_4 or _8 and MSRASTMODE_xxx_PATTERN), the order of the
+ * samples 0 to 3 (or 7 for 8X) must have monotonically increasing
+ * distance from the pixel center. This is required to get the
+ * correct centroid computation in the device."
+ */
+ assert(dist >= max_dist);
+ max_dist = dist;
+
+ assert(in[i].x < 16);
+ assert(in[i].y < 16);
+
+ out[i] = in[i].x << 4 | in[i].y;
+ }
+
+ return true;
+}
+
+static bool
+line_stipple_set_gen6_3DSTATE_LINE_STIPPLE(struct ilo_state_line_stipple *stipple,
+ const struct ilo_dev *dev,
+ const struct ilo_state_line_stipple_info *info)
+{
+ uint32_t dw1, dw2;
+
+ ILO_DEV_ASSERT(dev, 6, 8);
+
+ assert(info->repeat_count >= 1 && info->repeat_count <= 256);
+
+ dw1 = info->pattern;
+ if (ilo_dev_gen(dev) >= ILO_GEN(7)) {
+ /* in U1.16 */
+ const uint32_t inverse = 65536 / info->repeat_count;
+ dw2 = inverse << GEN7_LINE_STIPPLE_DW2_INVERSE_REPEAT_COUNT__SHIFT |
+ info->repeat_count << GEN6_LINE_STIPPLE_DW2_REPEAT_COUNT__SHIFT;
+ } else {
+ /* in U1.13 */
+ const uint16_t inverse = 8192 / info->repeat_count;
+ dw2 = inverse << GEN6_LINE_STIPPLE_DW2_INVERSE_REPEAT_COUNT__SHIFT |
+ info->repeat_count << GEN6_LINE_STIPPLE_DW2_REPEAT_COUNT__SHIFT;
+ }
+
+ STATIC_ASSERT(ARRAY_SIZE(stipple->stipple) >= 2);
+ stipple->stipple[0] = dw1;
+ stipple->stipple[1] = dw2;
+
+ return true;
+}
+
+static bool
+sample_pattern_set_gen8_3DSTATE_SAMPLE_PATTERN(struct ilo_state_sample_pattern *pattern,
+ const struct ilo_dev *dev,
+ const struct ilo_state_sample_pattern_info *info)
+{
+ ILO_DEV_ASSERT(dev, 6, 8);
+
+ STATIC_ASSERT(ARRAY_SIZE(pattern->pattern_1x) >= 1);
+ STATIC_ASSERT(ARRAY_SIZE(pattern->pattern_2x) >= 2);
+ STATIC_ASSERT(ARRAY_SIZE(pattern->pattern_4x) >= 4);
+ STATIC_ASSERT(ARRAY_SIZE(pattern->pattern_8x) >= 8);
+ STATIC_ASSERT(ARRAY_SIZE(pattern->pattern_16x) >= 16);
+
+ return (sample_pattern_get_gen6_packed_offsets(dev, 1,
+ info->pattern_1x, pattern->pattern_1x) &&
+ sample_pattern_get_gen6_packed_offsets(dev, 2,
+ info->pattern_2x, pattern->pattern_2x) &&
+ sample_pattern_get_gen6_packed_offsets(dev, 4,
+ info->pattern_4x, pattern->pattern_4x) &&
+ sample_pattern_get_gen6_packed_offsets(dev, 8,
+ info->pattern_8x, pattern->pattern_8x) &&
+ sample_pattern_get_gen6_packed_offsets(dev, 16,
+ info->pattern_16x, pattern->pattern_16x));
+
+}
+
+static bool
+poly_stipple_set_gen6_3DSTATE_POLY_STIPPLE_PATTERN(struct ilo_state_poly_stipple *stipple,
+ const struct ilo_dev *dev,
+ const struct ilo_state_poly_stipple_info *info)
+{
+ ILO_DEV_ASSERT(dev, 6, 8);
+
+ STATIC_ASSERT(ARRAY_SIZE(stipple->stipple) >= 32);
+ memcpy(stipple->stipple, info->pattern, sizeof(info->pattern));
+
+ return true;
+}
+
+bool
+ilo_state_raster_init(struct ilo_state_raster *rs,
+ const struct ilo_dev *dev,
+ const struct ilo_state_raster_info *info)
+{
+ assert(ilo_is_zeroed(rs, sizeof(*rs)));
+ return ilo_state_raster_set_info(rs, dev, info);
+}
+
+bool
+ilo_state_raster_init_for_rectlist(struct ilo_state_raster *rs,
+ const struct ilo_dev *dev,
+ uint8_t sample_count,
+ enum ilo_state_raster_earlyz_op earlyz_op,
+ bool earlyz_stencil_clear)
+{
+ struct ilo_state_raster_info info;
+
+ memset(&info, 0, sizeof(info));
+
+ info.clip.viewport_count = 1;
+ info.setup.cv_is_rectangle = true;
+ info.setup.msaa_enable = (sample_count > 1);
+ info.scan.sample_count = sample_count;
+ info.scan.sample_mask = ~0u;
+ info.scan.earlyz_op = earlyz_op;
+ info.scan.earlyz_stencil_clear = earlyz_stencil_clear;
+
+ return ilo_state_raster_init(rs, dev, &info);
+}
+
+bool
+ilo_state_raster_set_info(struct ilo_state_raster *rs,
+ const struct ilo_dev *dev,
+ const struct ilo_state_raster_info *info)
+{
+ struct ilo_state_raster_line_info line;
+ bool ret = true;
+
+ ret &= raster_set_gen6_3DSTATE_CLIP(rs, dev, info);
+
+ raster_get_gen6_effective_line(dev, info, &line);
+
+ if (ilo_dev_gen(dev) >= ILO_GEN(8)) {
+ ret &= raster_set_gen8_3DSTATE_SF(rs, dev, info, &line);
+ ret &= raster_set_gen8_3DSTATE_RASTER(rs, dev, info, &line);
+ } else {
+ ret &= raster_set_gen7_3DSTATE_SF(rs, dev, info, &line);
+ }
+
+ ret &= raster_set_gen8_3DSTATE_MULTISAMPLE(rs, dev, info);
+ ret &= raster_set_gen6_3DSTATE_SAMPLE_MASK(rs, dev, info);
+
+ if (ilo_dev_gen(dev) >= ILO_GEN(7)) {
+ ret &= raster_set_gen8_3DSTATE_WM(rs, dev, info, &line);
+
+ if (ilo_dev_gen(dev) >= ILO_GEN(8))
+ ret &= raster_set_gen8_3dstate_wm_hz_op(rs, dev, info);
+ } else {
+ ret &= raster_set_gen6_3dstate_wm(rs, dev, info, &line);
+ }
+
+ assert(ret);
+
+ return ret;
+}
+
+bool
+ilo_state_raster_set_params(struct ilo_state_raster *rs,
+ const struct ilo_dev *dev,
+ const struct ilo_state_raster_params_info *params)
+{
+ const bool line_aa_enable = (rs->line_aa_enable &&
+ raster_params_is_gen6_line_aa_allowed(dev, params));
+ const int line_width = get_gen6_line_width(dev, params->line_width,
+ line_aa_enable, rs->line_giq_enable);
+
+ ILO_DEV_ASSERT(dev, 6, 8);
+
+ /* modify line AA enable */
+ if (rs->line_aa_enable) {
+ if (ilo_dev_gen(dev) >= ILO_GEN(8)) {
+ if (line_aa_enable)
+ rs->raster[0] |= GEN8_RASTER_DW1_AA_LINE_ENABLE;
+ else
+ rs->raster[0] &= ~GEN8_RASTER_DW1_AA_LINE_ENABLE;
+ } else {
+ if (line_aa_enable)
+ rs->sf[1] |= GEN7_SF_DW2_AA_LINE_ENABLE;
+ else
+ rs->sf[1] &= ~GEN7_SF_DW2_AA_LINE_ENABLE;
+ }
+ }
+
+ /* modify line width */
+ rs->sf[1] = (rs->sf[1] & ~GEN7_SF_DW2_LINE_WIDTH__MASK) |
+ line_width << GEN7_SF_DW2_LINE_WIDTH__SHIFT;
+
+ /* modify point width */
+ if (rs->sf[2] & GEN7_SF_DW3_USE_POINT_WIDTH) {
+ const int point_width = get_gen6_point_width(dev, params->point_width);
+
+ rs->sf[2] = (rs->sf[2] & ~GEN7_SF_DW3_POINT_WIDTH__MASK) |
+ point_width << GEN7_SF_DW3_POINT_WIDTH__SHIFT;
+ }
+
+ /* modify depth offset */
+ rs->raster[1] = fui(params->depth_offset_const);
+ rs->raster[2] = fui(params->depth_offset_scale);
+ rs->raster[3] = fui(params->depth_offset_clamp);
+
+ return true;
+}
+
+void
+ilo_state_raster_full_delta(const struct ilo_state_raster *rs,
+ const struct ilo_dev *dev,
+ struct ilo_state_raster_delta *delta)
+{
+ delta->dirty = ILO_STATE_RASTER_3DSTATE_CLIP |
+ ILO_STATE_RASTER_3DSTATE_SF |
+ ILO_STATE_RASTER_3DSTATE_MULTISAMPLE |
+ ILO_STATE_RASTER_3DSTATE_SAMPLE_MASK |
+ ILO_STATE_RASTER_3DSTATE_WM |
+ ILO_STATE_RASTER_3DSTATE_AA_LINE_PARAMETERS;
+
+ if (ilo_dev_gen(dev) >= ILO_GEN(8)) {
+ delta->dirty |= ILO_STATE_RASTER_3DSTATE_RASTER |
+ ILO_STATE_RASTER_3DSTATE_WM_HZ_OP;
+ }
+}
+
+void
+ilo_state_raster_get_delta(const struct ilo_state_raster *rs,
+ const struct ilo_dev *dev,
+ const struct ilo_state_raster *old,
+ struct ilo_state_raster_delta *delta)
+{
+ delta->dirty = 0;
+
+ if (memcmp(rs->clip, old->clip, sizeof(rs->clip)))
+ delta->dirty |= ILO_STATE_RASTER_3DSTATE_CLIP;
+
+ if (memcmp(rs->sf, old->sf, sizeof(rs->sf)))
+ delta->dirty |= ILO_STATE_RASTER_3DSTATE_SF;
+
+ if (memcmp(rs->raster, old->raster, sizeof(rs->raster))) {
+ if (ilo_dev_gen(dev) >= ILO_GEN(8))
+ delta->dirty |= ILO_STATE_RASTER_3DSTATE_RASTER;
+ else
+ delta->dirty |= ILO_STATE_RASTER_3DSTATE_SF;
+ }
+
+ if (memcmp(rs->sample, old->sample, sizeof(rs->sample))) {
+ delta->dirty |= ILO_STATE_RASTER_3DSTATE_MULTISAMPLE |
+ ILO_STATE_RASTER_3DSTATE_SAMPLE_MASK;
+ }
+
+ if (memcmp(rs->wm, old->wm, sizeof(rs->wm))) {
+ delta->dirty |= ILO_STATE_RASTER_3DSTATE_WM;
+
+ if (ilo_dev_gen(dev) >= ILO_GEN(8))
+ delta->dirty |= ILO_STATE_RASTER_3DSTATE_WM_HZ_OP;
+ }
+}
+
+bool
+ilo_state_sample_pattern_init(struct ilo_state_sample_pattern *pattern,
+ const struct ilo_dev *dev,
+ const struct ilo_state_sample_pattern_info *info)
+{
+ bool ret = true;
+
+ ret &= sample_pattern_set_gen8_3DSTATE_SAMPLE_PATTERN(pattern, dev, info);
+
+ assert(ret);
+
+ return ret;
+}
+
+bool
+ilo_state_sample_pattern_init_default(struct ilo_state_sample_pattern *pattern,
+ const struct ilo_dev *dev)
+{
+ static const struct ilo_state_sample_pattern_info default_info = {
+ .pattern_1x = {
+ { 8, 8 },
+ },
+
+ .pattern_2x = {
+ { 4, 4 }, { 12, 12 },
+ },
+
+ .pattern_4x = {
+ { 6, 2 }, { 14, 6 }, { 2, 10 }, { 10, 14 },
+ },
+
+ /* \see brw_multisample_positions_8x */
+ .pattern_8x = {
+ { 7, 9 }, { 9, 13 }, { 11, 3 }, { 13, 11 },
+ { 1, 7 }, { 5, 1 }, { 15, 5 }, { 3, 15 },
+ },
+
+ .pattern_16x = {
+ { 8, 10 }, { 11, 8 }, { 5, 6 }, { 6, 4 },
+ { 12, 11 }, { 13, 9 }, { 14, 7 }, { 10, 2 },
+ { 4, 13 }, { 3, 3 }, { 7, 1 }, { 15, 5 },
+ { 1, 12 }, { 9, 0 }, { 2, 14 }, { 0, 15 },
+ },
+ };
+
+ return ilo_state_sample_pattern_init(pattern, dev, &default_info);
+}
+
+const uint8_t *
+ilo_state_sample_pattern_get_packed_offsets(const struct ilo_state_sample_pattern *pattern,
+ const struct ilo_dev *dev,
+ uint8_t sample_count)
+{
+ switch (sample_count) {
+ case 1: return pattern->pattern_1x;
+ case 2: return pattern->pattern_2x;
+ case 4: return pattern->pattern_4x;
+ case 8: return pattern->pattern_8x;
+ case 16: return pattern->pattern_16x;
+ default:
+ assert(!"unknown sample count");
+ return NULL;
+ }
+}
+
+void
+ilo_state_sample_pattern_get_offset(const struct ilo_state_sample_pattern *pattern,
+ const struct ilo_dev *dev,
+ uint8_t sample_count, uint8_t sample_index,
+ uint8_t *x, uint8_t *y)
+{
+ const const uint8_t *packed =
+ ilo_state_sample_pattern_get_packed_offsets(pattern, dev, sample_count);
+
+ assert(sample_index < sample_count);
+
+ *x = (packed[sample_index] >> 4) & 0xf;
+ *y = packed[sample_index] & 0xf;
+}
+
+/**
+ * No need to initialize first.
+ */
+bool
+ilo_state_line_stipple_set_info(struct ilo_state_line_stipple *stipple,
+ const struct ilo_dev *dev,
+ const struct ilo_state_line_stipple_info *info)
+{
+ bool ret = true;
+
+ ret &= line_stipple_set_gen6_3DSTATE_LINE_STIPPLE(stipple,
+ dev, info);
+
+ assert(ret);
+
+ return ret;
+}
+
+/**
+ * No need to initialize first.
+ */
+bool
+ilo_state_poly_stipple_set_info(struct ilo_state_poly_stipple *stipple,
+ const struct ilo_dev *dev,
+ const struct ilo_state_poly_stipple_info *info)
+{
+ bool ret = true;
+
+ ret &= poly_stipple_set_gen6_3DSTATE_POLY_STIPPLE_PATTERN(stipple,
+ dev, info);
+
+ assert(ret);
+
+ return ret;
+}
diff --git a/src/gallium/drivers/ilo/core/ilo_state_raster.h b/src/gallium/drivers/ilo/core/ilo_state_raster.h
new file mode 100644
index 00000000000..fc90b49cfc3
--- /dev/null
+++ b/src/gallium/drivers/ilo/core/ilo_state_raster.h
@@ -0,0 +1,301 @@
+/*
+ * Mesa 3-D graphics library
+ *
+ * Copyright (C) 2015 LunarG, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ * Chia-I Wu <[email protected]>
+ */
+
+#ifndef ILO_STATE_RASTER_H
+#define ILO_STATE_RASTER_H
+
+#include "genhw/genhw.h"
+
+#include "ilo_core.h"
+#include "ilo_dev.h"
+
+enum ilo_state_raster_dirty_bits {
+ ILO_STATE_RASTER_3DSTATE_CLIP = (1 << 0),
+ ILO_STATE_RASTER_3DSTATE_SF = (1 << 1),
+ ILO_STATE_RASTER_3DSTATE_RASTER = (1 << 2),
+ ILO_STATE_RASTER_3DSTATE_MULTISAMPLE = (1 << 3),
+ ILO_STATE_RASTER_3DSTATE_SAMPLE_MASK = (1 << 4),
+ ILO_STATE_RASTER_3DSTATE_WM = (1 << 5),
+ ILO_STATE_RASTER_3DSTATE_WM_HZ_OP = (1 << 6),
+ ILO_STATE_RASTER_3DSTATE_AA_LINE_PARAMETERS = (1 << 7),
+};
+
+enum ilo_state_raster_earlyz_op {
+ ILO_STATE_RASTER_EARLYZ_NORMAL,
+ ILO_STATE_RASTER_EARLYZ_DEPTH_CLEAR,
+ ILO_STATE_RASTER_EARLYZ_DEPTH_RESOLVE,
+ ILO_STATE_RASTER_EARLYZ_HIZ_RESOLVE,
+};
+
+/**
+ * VUE readback, VertexClipTest, ClipDetermination, and primitive output.
+ */
+struct ilo_state_raster_clip_info {
+ bool clip_enable;
+ /* CL_INVOCATION_COUNT and CL_PRIMITIVES_COUNT */
+ bool stats_enable;
+
+ uint8_t viewport_count;
+ bool force_rtaindex_zero;
+
+ /* these should be mutually exclusive */
+ uint8_t user_cull_enables;
+ uint8_t user_clip_enables;
+
+ bool gb_test_enable;
+ bool xy_test_enable;
+
+ /* far/near must be enabled together prior to Gen9 */
+ bool z_far_enable;
+ bool z_near_enable;
+ bool z_near_zero;
+};
+
+/**
+ * Primitive assembly, viewport transformation, scissoring, MSAA, etc.
+ */
+struct ilo_state_raster_setup_info {
+ bool cv_is_rectangle;
+
+ bool first_vertex_provoking;
+ bool viewport_transform;
+
+ bool scissor_enable;
+
+ /* MSAA enables for lines and non-lines */
+ bool msaa_enable;
+ bool line_msaa_enable;
+};
+
+/**
+ * 3DOBJ_POINT rasterization rules.
+ */
+struct ilo_state_raster_point_info {
+ /* ignored when msaa_enable is set */
+ bool aa_enable;
+
+ bool programmable_width;
+};
+
+/**
+ * 3DOBJ_LINE rasterization rules.
+ */
+struct ilo_state_raster_line_info {
+ /* ignored when line_msaa_enable is set */
+ bool aa_enable;
+
+ /* ignored when line_msaa_enable or aa_enable is set */
+ bool stipple_enable;
+ bool giq_enable;
+ bool giq_last_pixel;
+};
+
+/**
+ * 3DOBJ_TRIANGLE rasterization rules.
+ */
+struct ilo_state_raster_tri_info {
+ enum gen_front_winding front_winding;
+ enum gen_cull_mode cull_mode;
+ enum gen_fill_mode fill_mode_front;
+ enum gen_fill_mode fill_mode_back;
+
+ enum gen_depth_format depth_offset_format;
+ bool depth_offset_solid;
+ bool depth_offset_wireframe;
+ bool depth_offset_point;
+
+ bool poly_stipple_enable;
+};
+
+/**
+ * Scan conversion.
+ */
+struct ilo_state_raster_scan_info {
+ /* PS_DEPTH_COUNT and PS_INVOCATION_COUNT */
+ bool stats_enable;
+
+ uint8_t sample_count;
+
+ /* pixel location for non-MSAA or 1x-MSAA */
+ enum gen_pixel_location pixloc;
+
+ uint32_t sample_mask;
+
+ /* interpolations */
+ enum gen_zw_interp zw_interp;
+ uint8_t barycentric_interps;
+
+ /* Gen7+ only */
+ enum gen_edsc_mode earlyz_control;
+ enum ilo_state_raster_earlyz_op earlyz_op;
+ bool earlyz_stencil_clear;
+};
+
+/**
+ * Raster parameters.
+ */
+struct ilo_state_raster_params_info {
+ bool any_integer_rt;
+ bool hiz_enable;
+
+ float point_width;
+ float line_width;
+
+ /* const term will be scaled by 'r' */
+ float depth_offset_const;
+ float depth_offset_scale;
+ float depth_offset_clamp;
+};
+
+struct ilo_state_raster_info {
+ struct ilo_state_raster_clip_info clip;
+ struct ilo_state_raster_setup_info setup;
+ struct ilo_state_raster_point_info point;
+ struct ilo_state_raster_line_info line;
+ struct ilo_state_raster_tri_info tri;
+ struct ilo_state_raster_scan_info scan;
+
+ struct ilo_state_raster_params_info params;
+};
+
+struct ilo_state_raster {
+ uint32_t clip[3];
+ uint32_t sf[3];
+ uint32_t raster[4];
+ uint32_t sample[2];
+ uint32_t wm[3];
+
+ bool line_aa_enable;
+ bool line_giq_enable;
+};
+
+struct ilo_state_raster_delta {
+ uint32_t dirty;
+};
+
+struct ilo_state_sample_pattern_offset_info {
+ /* in U0.4 */
+ uint8_t x;
+ uint8_t y;
+};
+
+struct ilo_state_sample_pattern_info {
+ struct ilo_state_sample_pattern_offset_info pattern_1x[1];
+ struct ilo_state_sample_pattern_offset_info pattern_2x[2];
+ struct ilo_state_sample_pattern_offset_info pattern_4x[4];
+ struct ilo_state_sample_pattern_offset_info pattern_8x[8];
+ struct ilo_state_sample_pattern_offset_info pattern_16x[16];
+};
+
+struct ilo_state_sample_pattern {
+ uint8_t pattern_1x[1];
+ uint8_t pattern_2x[2];
+ uint8_t pattern_4x[4];
+ uint8_t pattern_8x[8];
+ uint8_t pattern_16x[16];
+};
+
+struct ilo_state_line_stipple_info {
+ uint16_t pattern;
+ uint16_t repeat_count;
+};
+
+struct ilo_state_line_stipple {
+ uint32_t stipple[2];
+};
+
+struct ilo_state_poly_stipple_info {
+ uint32_t pattern[32];
+};
+
+struct ilo_state_poly_stipple {
+ uint32_t stipple[32];
+};
+
+bool
+ilo_state_raster_init(struct ilo_state_raster *rs,
+ const struct ilo_dev *dev,
+ const struct ilo_state_raster_info *info);
+
+bool
+ilo_state_raster_init_for_rectlist(struct ilo_state_raster *rs,
+ const struct ilo_dev *dev,
+ uint8_t sample_count,
+ enum ilo_state_raster_earlyz_op earlyz_op,
+ bool earlyz_stencil_clear);
+
+bool
+ilo_state_raster_set_info(struct ilo_state_raster *rs,
+ const struct ilo_dev *dev,
+ const struct ilo_state_raster_info *info);
+
+bool
+ilo_state_raster_set_params(struct ilo_state_raster *rs,
+ const struct ilo_dev *dev,
+ const struct ilo_state_raster_params_info *params);
+
+void
+ilo_state_raster_full_delta(const struct ilo_state_raster *rs,
+ const struct ilo_dev *dev,
+ struct ilo_state_raster_delta *delta);
+
+void
+ilo_state_raster_get_delta(const struct ilo_state_raster *rs,
+ const struct ilo_dev *dev,
+ const struct ilo_state_raster *old,
+ struct ilo_state_raster_delta *delta);
+
+bool
+ilo_state_sample_pattern_init(struct ilo_state_sample_pattern *pattern,
+ const struct ilo_dev *dev,
+ const struct ilo_state_sample_pattern_info *info);
+
+bool
+ilo_state_sample_pattern_init_default(struct ilo_state_sample_pattern *pattern,
+ const struct ilo_dev *dev);
+
+const uint8_t *
+ilo_state_sample_pattern_get_packed_offsets(const struct ilo_state_sample_pattern *pattern,
+ const struct ilo_dev *dev,
+ uint8_t sample_count);
+
+void
+ilo_state_sample_pattern_get_offset(const struct ilo_state_sample_pattern *pattern,
+ const struct ilo_dev *dev,
+ uint8_t sample_count, uint8_t sample_index,
+ uint8_t *x, uint8_t *y);
+bool
+ilo_state_line_stipple_set_info(struct ilo_state_line_stipple *stipple,
+ const struct ilo_dev *dev,
+ const struct ilo_state_line_stipple_info *info);
+
+bool
+ilo_state_poly_stipple_set_info(struct ilo_state_poly_stipple *stipple,
+ const struct ilo_dev *dev,
+ const struct ilo_state_poly_stipple_info *info);
+
+#endif /* ILO_STATE_RASTER_H */
diff --git a/src/gallium/drivers/ilo/core/ilo_state_sampler.c b/src/gallium/drivers/ilo/core/ilo_state_sampler.c
new file mode 100644
index 00000000000..3787f684fe8
--- /dev/null
+++ b/src/gallium/drivers/ilo/core/ilo_state_sampler.c
@@ -0,0 +1,742 @@
+/*
+ * Mesa 3-D graphics library
+ *
+ * Copyright (C) 2012-2015 LunarG, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ * Chia-I Wu <[email protected]>
+ */
+
+#include "util/u_half.h"
+
+#include "ilo_debug.h"
+#include "ilo_state_surface.h"
+#include "ilo_state_sampler.h"
+
+static bool
+sampler_validate_gen6_non_normalized(const struct ilo_dev *dev,
+ const struct ilo_state_sampler_info *info)
+{
+ const enum gen_texcoord_mode addr_ctrls[3] = {
+ info->tcx_ctrl, info->tcy_ctrl, info->tcz_ctrl,
+ };
+ int i;
+
+ ILO_DEV_ASSERT(dev, 6, 8);
+
+ /*
+ * From the Ivy Bridge PRM, volume 4 part 1, page 98:
+ *
+ * "The following state must be set as indicated if this field
+ * (Non-normalized Coordinate Enable) is enabled:
+ *
+ * - TCX/Y/Z Address Control Mode must be TEXCOORDMODE_CLAMP,
+ * TEXCOORDMODE_HALF_BORDER, or TEXCOORDMODE_CLAMP_BORDER.
+ * - Surface Type must be SURFTYPE_2D or SURFTYPE_3D.
+ * - Mag Mode Filter must be MAPFILTER_NEAREST or
+ * MAPFILTER_LINEAR.
+ * - Min Mode Filter must be MAPFILTER_NEAREST or
+ * MAPFILTER_LINEAR.
+ * - Mip Mode Filter must be MIPFILTER_NONE.
+ * - Min LOD must be 0.
+ * - Max LOD must be 0.
+ * - MIP Count must be 0.
+ * - Surface Min LOD must be 0.
+ * - Texture LOD Bias must be 0."
+ */
+ for (i = 0; i < 3; i++) {
+ switch (addr_ctrls[i]) {
+ case GEN6_TEXCOORDMODE_CLAMP:
+ case GEN6_TEXCOORDMODE_CLAMP_BORDER:
+ case GEN8_TEXCOORDMODE_HALF_BORDER:
+ break;
+ default:
+ assert(!"bad non-normalized coordinate wrap mode");
+ break;
+ }
+ }
+
+ assert(info->mip_filter == GEN6_MIPFILTER_NONE);
+
+ assert((info->min_filter == GEN6_MAPFILTER_NEAREST ||
+ info->min_filter == GEN6_MAPFILTER_LINEAR) &&
+ (info->mag_filter == GEN6_MAPFILTER_NEAREST ||
+ info->mag_filter == GEN6_MAPFILTER_LINEAR));
+
+ assert(info->min_lod == 0.0f &&
+ info->max_lod == 0.0f &&
+ info->lod_bias == 0.0f);
+
+ return true;
+}
+
+static bool
+sampler_validate_gen6_sampler(const struct ilo_dev *dev,
+ const struct ilo_state_sampler_info *info)
+{
+ ILO_DEV_ASSERT(dev, 6, 8);
+
+ if (info->non_normalized &&
+ !sampler_validate_gen6_non_normalized(dev, info))
+ return false;
+
+ if (ilo_dev_gen(dev) < ILO_GEN(8)) {
+ assert(info->tcx_ctrl != GEN8_TEXCOORDMODE_HALF_BORDER &&
+ info->tcy_ctrl != GEN8_TEXCOORDMODE_HALF_BORDER &&
+ info->tcz_ctrl != GEN8_TEXCOORDMODE_HALF_BORDER);
+ }
+
+ return true;
+}
+
+static uint32_t
+sampler_get_gen6_integer_filters(const struct ilo_dev *dev,
+ const struct ilo_state_sampler_info *info)
+{
+ /*
+ * From the Sandy Bridge PRM, volume 4 part 1, page 103:
+ *
+ * "MIPFILTER_LINEAR is not supported for surface formats that do not
+ * support "Sampling Engine Filtering" as indicated in the Surface
+ * Formats table unless using the sample_c message type."
+ *
+ * "Only MAPFILTER_NEAREST is supported for surface formats that do not
+ * support "Sampling Engine Filtering" as indicated in the Surface
+ * Formats table unless using the sample_c message type.
+ */
+ const enum gen_mip_filter mip_filter =
+ (info->mip_filter == GEN6_MIPFILTER_LINEAR) ?
+ GEN6_MIPFILTER_NEAREST : info->mip_filter;
+ const enum gen_map_filter min_filter = GEN6_MAPFILTER_NEAREST;
+ const enum gen_map_filter mag_filter = GEN6_MAPFILTER_NEAREST;
+
+ ILO_DEV_ASSERT(dev, 6, 8);
+
+ return mip_filter << GEN6_SAMPLER_DW0_MIP_FILTER__SHIFT |
+ mag_filter << GEN6_SAMPLER_DW0_MAG_FILTER__SHIFT |
+ min_filter << GEN6_SAMPLER_DW0_MIN_FILTER__SHIFT;
+}
+
+static uint32_t
+sampler_get_gen6_3d_filters(const struct ilo_dev *dev,
+ const struct ilo_state_sampler_info *info)
+{
+ const enum gen_mip_filter mip_filter = info->mip_filter;
+ /*
+ * From the Sandy Bridge PRM, volume 4 part 1, page 103:
+ *
+ * "Only MAPFILTER_NEAREST and MAPFILTER_LINEAR are supported for
+ * surfaces of type SURFTYPE_3D."
+ */
+ const enum gen_map_filter min_filter =
+ (info->min_filter == GEN6_MAPFILTER_NEAREST ||
+ info->min_filter == GEN6_MAPFILTER_LINEAR) ?
+ info->min_filter : GEN6_MAPFILTER_LINEAR;
+ const enum gen_map_filter mag_filter =
+ (info->mag_filter == GEN6_MAPFILTER_NEAREST ||
+ info->mag_filter == GEN6_MAPFILTER_LINEAR) ?
+ info->mag_filter : GEN6_MAPFILTER_LINEAR;
+
+ ILO_DEV_ASSERT(dev, 6, 8);
+
+ return mip_filter << GEN6_SAMPLER_DW0_MIP_FILTER__SHIFT |
+ mag_filter << GEN6_SAMPLER_DW0_MAG_FILTER__SHIFT |
+ min_filter << GEN6_SAMPLER_DW0_MIN_FILTER__SHIFT;
+}
+
+static uint32_t
+get_gen6_addr_controls(const struct ilo_dev *dev,
+ enum gen_texcoord_mode tcx_ctrl,
+ enum gen_texcoord_mode tcy_ctrl,
+ enum gen_texcoord_mode tcz_ctrl)
+{
+ ILO_DEV_ASSERT(dev, 6, 8);
+
+ if (ilo_dev_gen(dev) >= ILO_GEN(7)) {
+ return tcx_ctrl << GEN7_SAMPLER_DW3_U_WRAP__SHIFT |
+ tcy_ctrl << GEN7_SAMPLER_DW3_V_WRAP__SHIFT |
+ tcz_ctrl << GEN7_SAMPLER_DW3_R_WRAP__SHIFT;
+ } else {
+ return tcx_ctrl << GEN6_SAMPLER_DW1_U_WRAP__SHIFT |
+ tcy_ctrl << GEN6_SAMPLER_DW1_V_WRAP__SHIFT |
+ tcz_ctrl << GEN6_SAMPLER_DW1_R_WRAP__SHIFT;
+ }
+}
+
+static uint32_t
+sampler_get_gen6_1d_addr_controls(const struct ilo_dev *dev,
+ const struct ilo_state_sampler_info *info)
+{
+ const enum gen_texcoord_mode tcx_ctrl =
+ (info->tcx_ctrl == GEN6_TEXCOORDMODE_CUBE) ?
+ GEN6_TEXCOORDMODE_CLAMP : info->tcx_ctrl;
+ /*
+ * From the Ivy Bridge PRM, volume 4 part 1, page 100:
+ *
+ * "If this field (TCY Address Control Mode) is set to
+ * TEXCOORDMODE_CLAMP_BORDER or TEXCOORDMODE_HALF_BORDER and a 1D
+ * surface is sampled, incorrect blending with the border color in the
+ * vertical direction may occur."
+ */
+ const enum gen_texcoord_mode tcy_ctrl = GEN6_TEXCOORDMODE_CLAMP;
+ const enum gen_texcoord_mode tcz_ctrl = GEN6_TEXCOORDMODE_CLAMP;
+
+ ILO_DEV_ASSERT(dev, 6, 8);
+
+ return get_gen6_addr_controls(dev, tcx_ctrl, tcy_ctrl, tcz_ctrl);
+}
+
+static uint32_t
+sampler_get_gen6_2d_3d_addr_controls(const struct ilo_dev *dev,
+ const struct ilo_state_sampler_info *info)
+{
+ const enum gen_texcoord_mode tcx_ctrl =
+ (info->tcx_ctrl == GEN6_TEXCOORDMODE_CUBE) ?
+ GEN6_TEXCOORDMODE_CLAMP : info->tcx_ctrl;
+ const enum gen_texcoord_mode tcy_ctrl =
+ (info->tcy_ctrl == GEN6_TEXCOORDMODE_CUBE) ?
+ GEN6_TEXCOORDMODE_CLAMP : info->tcy_ctrl;
+ /*
+ * From the Sandy Bridge PRM, volume 4 part 1, page 108:
+ *
+ * "[DevSNB]: if this field (TCZ Address Control Mode) is set to
+ * TEXCOORDMODE_CLAMP_BORDER samples outside the map will clamp to 0
+ * instead of boarder color"
+ *
+ * From the Ivy Bridge PRM, volume 4 part 1, page 100:
+ *
+ * "If this field is set to TEXCOORDMODE_CLAMP_BORDER for 3D maps on
+ * formats without an alpha channel, samples straddling the map in the
+ * Z direction may have their alpha channels off by 1."
+ *
+ * Do we want to do something here?
+ */
+ const enum gen_texcoord_mode tcz_ctrl =
+ (info->tcz_ctrl == GEN6_TEXCOORDMODE_CUBE) ?
+ GEN6_TEXCOORDMODE_CLAMP : info->tcz_ctrl;
+
+ ILO_DEV_ASSERT(dev, 6, 8);
+
+ return get_gen6_addr_controls(dev, tcx_ctrl, tcy_ctrl, tcz_ctrl);
+}
+
+static uint32_t
+sampler_get_gen6_cube_addr_controls(const struct ilo_dev *dev,
+ const struct ilo_state_sampler_info *info)
+{
+ /*
+ * From the Ivy Bridge PRM, volume 4 part 1, page 99:
+ *
+ * "When using cube map texture coordinates, only TEXCOORDMODE_CLAMP
+ * and TEXCOORDMODE_CUBE settings are valid, and each TC component
+ * must have the same Address Control mode.
+ *
+ * When TEXCOORDMODE_CUBE is not used accessing a cube map, the map's
+ * Cube Face Enable field must be programmed to 111111b (all faces
+ * enabled)."
+ *
+ * From the Haswell PRM, volume 2d, page 278:
+ *
+ * "When using cube map texture coordinates, each TC component must
+ * have the same Address Control Mode.
+ *
+ * When TEXCOORDMODE_CUBE is not used accessing a cube map, the map's
+ * Cube Face Enable field must be programmed to 111111b (all faces
+ * enabled)."
+ *
+ * We always enable all cube faces and only need to make sure all address
+ * control modes are the same.
+ */
+ const enum gen_texcoord_mode tcx_ctrl =
+ (ilo_dev_gen(dev) >= ILO_GEN(7.5) ||
+ info->tcx_ctrl == GEN6_TEXCOORDMODE_CUBE ||
+ info->tcx_ctrl == GEN6_TEXCOORDMODE_CLAMP) ?
+ info->tcx_ctrl : GEN6_TEXCOORDMODE_CLAMP;
+ const enum gen_texcoord_mode tcy_ctrl = tcx_ctrl;
+ const enum gen_texcoord_mode tcz_ctrl = tcx_ctrl;
+
+ ILO_DEV_ASSERT(dev, 6, 8);
+
+ return get_gen6_addr_controls(dev, tcx_ctrl, tcy_ctrl, tcz_ctrl);
+}
+
+static uint16_t
+get_gen6_lod_bias(const struct ilo_dev *dev, float bias)
+{
+ /* [-16.0, 16.0) in S4.6 or S4.8 */
+ const int fbits = (ilo_dev_gen(dev) >= ILO_GEN(7)) ? 8 : 6;
+ const float max = 16.0f;
+ const float scale = (float) (1 << fbits);
+ const int mask = (1 << (1 + 4 + fbits)) - 1;
+ const int scaled_max = (16 << fbits) - 1;
+ int scaled;
+
+ ILO_DEV_ASSERT(dev, 6, 8);
+
+ if (bias > max)
+ bias = max;
+ else if (bias < -max)
+ bias = -max;
+
+ scaled = (int) (bias * scale);
+ if (scaled > scaled_max)
+ scaled = scaled_max;
+
+ return (scaled & mask);
+}
+
+static uint16_t
+get_gen6_lod_clamp(const struct ilo_dev *dev, float clamp)
+{
+ /* [0.0, 13.0] in U4.6 or [0.0, 14.0] in U4.8 */
+ const int fbits = (ilo_dev_gen(dev) >= ILO_GEN(7)) ? 8 : 6;
+ const float max = (ilo_dev_gen(dev) >= ILO_GEN(7)) ? 14.0f : 13.0f;
+ const float scale = (float) (1 << fbits);
+
+ ILO_DEV_ASSERT(dev, 6, 8);
+
+ if (clamp > max)
+ clamp = max;
+ else if (clamp < 0.0f)
+ clamp = 0.0f;
+
+ return (int) (clamp * scale);
+}
+
+static bool
+sampler_set_gen6_SAMPLER_STATE(struct ilo_state_sampler *sampler,
+ const struct ilo_dev *dev,
+ const struct ilo_state_sampler_info *info)
+{
+ uint16_t lod_bias, max_lod, min_lod;
+ uint32_t dw0, dw1, dw3;
+
+ ILO_DEV_ASSERT(dev, 6, 8);
+
+ if (!sampler_validate_gen6_sampler(dev, info))
+ return false;
+
+ /*
+ * From the Ivy Bridge PRM, volume 4 part 1, page 15:
+ *
+ * "The per-pixel LOD is computed in an implementation-dependent manner
+ * and approximates the log2 of the texel/pixel ratio at the given
+ * pixel. The computation is typically based on the differential
+ * texel-space distances associated with a one-pixel differential
+ * distance along the screen x- and y-axes. These texel-space
+ * distances are computed by evaluating neighboring pixel texture
+ * coordinates, these coordinates being in units of texels on the base
+ * MIP level (multiplied by the corresponding surface size in
+ * texels)."
+ *
+ * Judging from the LOD computation pseudocode on page 16-18, the "base MIP
+ * level" should be given by SurfMinLod. To summarize, for the "sample"
+ * message,
+ *
+ * 1) LOD is set to log2(texel/pixel ratio). The number of texels is
+ * measured against level SurfMinLod.
+ * 2) Bias is added to LOD.
+ * 3) if pre-clamp is enabled, LOD is clamped to [MinLod, MaxLod] first
+ * 4) LOD is compared with Base to determine whether magnification or
+ * minification is needed.
+ * 5) If magnification is needed, or no mipmapping is requested, LOD is
+ * set to floor(MinLod).
+ * 6) LOD is clamped to [0, MIPCnt], and SurfMinLod is added to LOD.
+ *
+ * As an example, we could set SurfMinLod to GL_TEXTURE_BASE_LEVEL and Base
+ * to 0 to match GL. But GL expects LOD to be set to 0, instead of
+ * floor(MinLod), in 5). Since this is only an issue when MinLod is
+ * greater than or equal to one, and, with Base being 0, a non-zero MinLod
+ * implies minification, we only need to deal with the case when mipmapping
+ * is disabled. We can thus do:
+ *
+ * if (MipFilter == MIPFILTER_NONE && MinLod) {
+ * MinLod = 0;
+ * MagFilter = MinFilter;
+ * }
+ */
+
+ lod_bias = get_gen6_lod_bias(dev, info->lod_bias);
+ min_lod = get_gen6_lod_clamp(dev, info->min_lod);
+ max_lod = get_gen6_lod_clamp(dev, info->max_lod);
+
+ dw0 = GEN6_SAMPLER_DW0_LOD_PRECLAMP_ENABLE |
+ 0 << GEN6_SAMPLER_DW0_BASE_LOD__SHIFT |
+ info->mip_filter << GEN6_SAMPLER_DW0_MIP_FILTER__SHIFT |
+ info->mag_filter << GEN6_SAMPLER_DW0_MAG_FILTER__SHIFT |
+ info->min_filter << GEN6_SAMPLER_DW0_MIN_FILTER__SHIFT;
+
+ if (ilo_dev_gen(dev) >= ILO_GEN(7)) {
+ dw0 |= GEN7_SAMPLER_DW0_BORDER_COLOR_MODE_DX10_OGL |
+ lod_bias << GEN7_SAMPLER_DW0_LOD_BIAS__SHIFT;
+
+ if (info->min_filter == GEN6_MAPFILTER_ANISOTROPIC ||
+ info->mag_filter == GEN6_MAPFILTER_ANISOTROPIC)
+ dw0 |= GEN7_SAMPLER_DW0_ANISO_ALGO_EWA;
+ } else {
+ dw0 |= lod_bias << GEN6_SAMPLER_DW0_LOD_BIAS__SHIFT |
+ info->shadow_func << GEN6_SAMPLER_DW0_SHADOW_FUNC__SHIFT;
+
+ /*
+ * From the Sandy Bridge PRM, volume 4 part 1, page 102:
+ *
+ * "(Min and Mag State Not Equal) Must be set to 1 if any of the
+ * following are true:
+ *
+ * - Mag Mode Filter and Min Mode Filter are not the same
+ * - Address Rounding Enable: U address mag filter and U address
+ * min filter are not the same
+ * - Address Rounding Enable: V address mag filter and V address
+ * min filter are not the same
+ * - Address Rounding Enable: R address mag filter and R address
+ * min filter are not the same"
+ *
+ * We set address rounding for U, V, and R uniformly. Only need to
+ * check the filters.
+ */
+ if (info->min_filter != info->mag_filter)
+ dw0 |= GEN6_SAMPLER_DW0_MIN_MAG_NOT_EQUAL;
+ }
+
+ dw1 = 0;
+
+ if (ilo_dev_gen(dev) >= ILO_GEN(7)) {
+ /*
+ * From the Ivy Bridge PRM, volume 4 part 1, page 96:
+ *
+ * "This field (Cube Surface Control Mode) must be set to
+ * CUBECTRLMODE_PROGRAMMED"
+ */
+ dw1 |= min_lod << GEN7_SAMPLER_DW1_MIN_LOD__SHIFT |
+ max_lod << GEN7_SAMPLER_DW1_MAX_LOD__SHIFT |
+ info->shadow_func << GEN7_SAMPLER_DW1_SHADOW_FUNC__SHIFT |
+ GEN7_SAMPLER_DW1_CUBECTRLMODE_PROGRAMMED;
+ } else {
+ dw1 |= min_lod << GEN6_SAMPLER_DW1_MIN_LOD__SHIFT |
+ max_lod << GEN6_SAMPLER_DW1_MAX_LOD__SHIFT |
+ GEN6_SAMPLER_DW1_CUBECTRLMODE_PROGRAMMED |
+ info->tcx_ctrl << GEN6_SAMPLER_DW1_U_WRAP__SHIFT |
+ info->tcy_ctrl << GEN6_SAMPLER_DW1_V_WRAP__SHIFT |
+ info->tcz_ctrl << GEN6_SAMPLER_DW1_R_WRAP__SHIFT;
+ }
+
+ dw3 = info->max_anisotropy << GEN6_SAMPLER_DW3_MAX_ANISO__SHIFT;
+
+ /* round the coordinates for linear filtering */
+ if (info->min_filter != GEN6_MAPFILTER_NEAREST) {
+ dw3 |= GEN6_SAMPLER_DW3_U_MIN_ROUND |
+ GEN6_SAMPLER_DW3_V_MIN_ROUND |
+ GEN6_SAMPLER_DW3_R_MIN_ROUND;
+ }
+ if (info->mag_filter != GEN6_MAPFILTER_NEAREST) {
+ dw3 |= GEN6_SAMPLER_DW3_U_MAG_ROUND |
+ GEN6_SAMPLER_DW3_V_MAG_ROUND |
+ GEN6_SAMPLER_DW3_R_MAG_ROUND;
+ }
+
+ if (ilo_dev_gen(dev) >= ILO_GEN(7)) {
+ dw3 |= GEN7_SAMPLER_DW3_TRIQUAL_FULL |
+ info->tcx_ctrl << GEN7_SAMPLER_DW3_U_WRAP__SHIFT |
+ info->tcy_ctrl << GEN7_SAMPLER_DW3_V_WRAP__SHIFT |
+ info->tcz_ctrl << GEN7_SAMPLER_DW3_R_WRAP__SHIFT;
+
+ if (info->non_normalized)
+ dw3 |= GEN7_SAMPLER_DW3_NON_NORMALIZED_COORD;
+ } else {
+ if (info->non_normalized)
+ dw3 |= GEN6_SAMPLER_DW3_NON_NORMALIZED_COORD;
+ }
+
+ STATIC_ASSERT(ARRAY_SIZE(sampler->sampler) >= 3);
+ sampler->sampler[0] = dw0;
+ sampler->sampler[1] = dw1;
+ sampler->sampler[2] = dw3;
+
+ sampler->filter_integer = sampler_get_gen6_integer_filters(dev, info);
+ sampler->filter_3d = sampler_get_gen6_3d_filters(dev, info);
+ sampler->addr_ctrl_1d = sampler_get_gen6_1d_addr_controls(dev, info);
+ sampler->addr_ctrl_2d_3d = sampler_get_gen6_2d_3d_addr_controls(dev, info);
+ sampler->addr_ctrl_cube = sampler_get_gen6_cube_addr_controls(dev, info);
+
+ sampler->non_normalized = info->non_normalized;
+
+ /*
+ * From the Sandy Bridge PRM, volume 4 part 1, page 21:
+ *
+ * "[DevSNB] Errata: Incorrect behavior is observed in cases where the
+ * min and mag mode filters are different and SurfMinLOD is nonzero.
+ * The determination of MagMode uses the following equation instead of
+ * the one in the above pseudocode:
+ *
+ * MagMode = (LOD + SurfMinLOD - Base <= 0)"
+ *
+ * As a way to work around that, request Base to be set to SurfMinLod.
+ */
+ if (ilo_dev_gen(dev) == ILO_GEN(6) &&
+ info->min_filter != info->mag_filter)
+ sampler->base_to_surf_min_lod = true;
+
+ return true;
+}
+
+static bool
+sampler_border_set_gen6_SAMPLER_BORDER_COLOR_STATE(struct ilo_state_sampler_border *border,
+ const struct ilo_dev *dev,
+ const struct ilo_state_sampler_border_info *info)
+{
+ uint32_t dw[12];
+ float rgba[4];
+
+ /*
+ * From the Ivy Bridge PRM, volume 4 part 1, page 117:
+ *
+ * "For ([DevSNB]), if border color is used, all formats must be
+ * provided. Hardware will choose the appropriate format based on
+ * Surface Format and Texture Border Color Mode. The values
+ * represented by each format should be the same (other than being
+ * subject to range-based clamping and precision) to avoid unexpected
+ * behavior."
+ *
+ * XXX We do not honor info->is_integer yet.
+ */
+
+ ILO_DEV_ASSERT(dev, 6, 6);
+
+ /* make a copy so that we can clamp for SNORM and UNORM */
+ memcpy(rgba, info->rgba.f, sizeof(rgba));
+
+ /* IEEE_FP */
+ dw[1] = fui(rgba[0]);
+ dw[2] = fui(rgba[1]);
+ dw[3] = fui(rgba[2]);
+ dw[4] = fui(rgba[3]);
+
+ /* FLOAT_16 */
+ dw[5] = util_float_to_half(rgba[0]) |
+ util_float_to_half(rgba[1]) << 16;
+ dw[6] = util_float_to_half(rgba[2]) |
+ util_float_to_half(rgba[3]) << 16;
+
+ /* clamp to [-1.0f, 1.0f] */
+ rgba[0] = CLAMP(rgba[0], -1.0f, 1.0f);
+ rgba[1] = CLAMP(rgba[1], -1.0f, 1.0f);
+ rgba[2] = CLAMP(rgba[2], -1.0f, 1.0f);
+ rgba[3] = CLAMP(rgba[3], -1.0f, 1.0f);
+
+ /* SNORM16 */
+ dw[9] = (int16_t) util_iround(rgba[0] * 32767.0f) |
+ (int16_t) util_iround(rgba[1] * 32767.0f) << 16;
+ dw[10] = (int16_t) util_iround(rgba[2] * 32767.0f) |
+ (int16_t) util_iround(rgba[3] * 32767.0f) << 16;
+
+ /* SNORM8 */
+ dw[11] = (int8_t) util_iround(rgba[0] * 127.0f) |
+ (int8_t) util_iround(rgba[1] * 127.0f) << 8 |
+ (int8_t) util_iround(rgba[2] * 127.0f) << 16 |
+ (int8_t) util_iround(rgba[3] * 127.0f) << 24;
+
+ /* clamp to [0.0f, 1.0f] */
+ rgba[0] = CLAMP(rgba[0], 0.0f, 1.0f);
+ rgba[1] = CLAMP(rgba[1], 0.0f, 1.0f);
+ rgba[2] = CLAMP(rgba[2], 0.0f, 1.0f);
+ rgba[3] = CLAMP(rgba[3], 0.0f, 1.0f);
+
+ /* UNORM8 */
+ dw[0] = (uint8_t) util_iround(rgba[0] * 255.0f) |
+ (uint8_t) util_iround(rgba[1] * 255.0f) << 8 |
+ (uint8_t) util_iround(rgba[2] * 255.0f) << 16 |
+ (uint8_t) util_iround(rgba[3] * 255.0f) << 24;
+
+ /* UNORM16 */
+ dw[7] = (uint16_t) util_iround(rgba[0] * 65535.0f) |
+ (uint16_t) util_iround(rgba[1] * 65535.0f) << 16;
+ dw[8] = (uint16_t) util_iround(rgba[2] * 65535.0f) |
+ (uint16_t) util_iround(rgba[3] * 65535.0f) << 16;
+
+ STATIC_ASSERT(ARRAY_SIZE(border->color) >= 12);
+ memcpy(border->color, dw, sizeof(dw));
+
+ return true;
+}
+
+static bool
+sampler_border_set_gen7_SAMPLER_BORDER_COLOR_STATE(struct ilo_state_sampler_border *border,
+ const struct ilo_dev *dev,
+ const struct ilo_state_sampler_border_info *info)
+{
+ ILO_DEV_ASSERT(dev, 7, 8);
+
+ /*
+ * From the Ivy Bridge PRM, volume 4 part 1, page 116:
+ *
+ * "In DX10/OGL mode, the format of the border color is
+ * R32G32B32A32_FLOAT, regardless of the surface format chosen."
+ *
+ * From the Haswell PRM, volume 2d, page 240:
+ *
+ * "So, SW will have to program the table in SAMPLER_BORDER_COLOR_STATE
+ * at offsets DWORD16 to 19, as per the integer surface format type."
+ *
+ * From the Broadwell PRM, volume 2d, page 297:
+ *
+ * "DX10/OGL mode: the format of the border color depends on the format
+ * of the surface being sampled. If the map format is UINT, then the
+ * border color format is R32G32B32A32_UINT. If the map format is
+ * SINT, then the border color format is R32G32B32A32_SINT. Otherwise,
+ * the border color format is R32G32B32A32_FLOAT."
+ *
+ * XXX every Gen is different
+ */
+
+ STATIC_ASSERT(ARRAY_SIZE(border->color) >= 4);
+ memcpy(border->color, info->rgba.f, sizeof(info->rgba.f));
+
+ return true;
+}
+
+bool
+ilo_state_sampler_init(struct ilo_state_sampler *sampler,
+ const struct ilo_dev *dev,
+ const struct ilo_state_sampler_info *info)
+{
+ bool ret = true;
+
+ assert(ilo_is_zeroed(sampler, sizeof(*sampler)));
+
+ ret &= sampler_set_gen6_SAMPLER_STATE(sampler, dev, info);
+
+ assert(ret);
+
+ return ret;
+}
+
+bool
+ilo_state_sampler_init_disabled(struct ilo_state_sampler *sampler,
+ const struct ilo_dev *dev)
+{
+ ILO_DEV_ASSERT(dev, 6, 8);
+
+ assert(ilo_is_zeroed(sampler, sizeof(*sampler)));
+
+ sampler->sampler[0] = GEN6_SAMPLER_DW0_DISABLE;
+ sampler->sampler[1] = 0;
+ sampler->sampler[2] = 0;
+
+ return true;
+}
+
+/**
+ * Modify \p sampler to work with \p surf. There will be loss of information.
+ * Callers should make a copy of the orignal sampler first.
+ */
+bool
+ilo_state_sampler_set_surface(struct ilo_state_sampler *sampler,
+ const struct ilo_dev *dev,
+ const struct ilo_state_surface *surf)
+{
+ uint32_t addr_ctrl;
+
+ ILO_DEV_ASSERT(dev, 6, 8);
+
+ if (sampler->non_normalized) {
+ /* see sampler_validate_gen6_non_normalized() */
+ assert(surf->type == GEN6_SURFTYPE_2D ||
+ surf->type == GEN6_SURFTYPE_3D);
+ assert(!surf->min_lod && !surf->mip_count);
+ }
+
+ if (sampler->base_to_surf_min_lod) {
+ const uint8_t base = surf->min_lod << GEN6_SAMPLER_DW0_BASE_LOD__RADIX;
+
+ sampler->sampler[0] =
+ (sampler->sampler[0] & ~GEN6_SAMPLER_DW0_BASE_LOD__MASK) |
+ base << GEN6_SAMPLER_DW0_BASE_LOD__SHIFT;
+ }
+
+ if (surf->is_integer || surf->type == GEN6_SURFTYPE_3D) {
+ const uint32_t mask = (GEN6_SAMPLER_DW0_MIP_FILTER__MASK |
+ GEN6_SAMPLER_DW0_MIN_FILTER__MASK |
+ GEN6_SAMPLER_DW0_MAG_FILTER__MASK);
+ const uint32_t filter = (surf->is_integer) ?
+ sampler->filter_integer : sampler->filter_3d;
+
+ assert((filter & mask) == filter);
+ sampler->sampler[0] = (sampler->sampler[0] & ~mask) |
+ filter;
+ }
+
+ switch (surf->type) {
+ case GEN6_SURFTYPE_1D:
+ addr_ctrl = sampler->addr_ctrl_1d;
+ break;
+ case GEN6_SURFTYPE_2D:
+ case GEN6_SURFTYPE_3D:
+ addr_ctrl = sampler->addr_ctrl_2d_3d;
+ break;
+ case GEN6_SURFTYPE_CUBE:
+ addr_ctrl = sampler->addr_ctrl_cube;
+ break;
+ default:
+ assert(!"unexpected surface type");
+ addr_ctrl = 0;
+ break;
+ }
+
+ if (ilo_dev_gen(dev) >= ILO_GEN(7)) {
+ const uint32_t mask = (GEN7_SAMPLER_DW3_U_WRAP__MASK |
+ GEN7_SAMPLER_DW3_V_WRAP__MASK |
+ GEN7_SAMPLER_DW3_R_WRAP__MASK);
+
+ assert((addr_ctrl & mask) == addr_ctrl);
+ sampler->sampler[2] = (sampler->sampler[2] & ~mask) |
+ addr_ctrl;
+ } else {
+ const uint32_t mask = (GEN6_SAMPLER_DW1_U_WRAP__MASK |
+ GEN6_SAMPLER_DW1_V_WRAP__MASK |
+ GEN6_SAMPLER_DW1_R_WRAP__MASK);
+
+ assert((addr_ctrl & mask) == addr_ctrl);
+ sampler->sampler[1] = (sampler->sampler[1] & ~mask) |
+ addr_ctrl;
+ }
+
+ return true;
+}
+
+bool
+ilo_state_sampler_border_init(struct ilo_state_sampler_border *border,
+ const struct ilo_dev *dev,
+ const struct ilo_state_sampler_border_info *info)
+{
+ bool ret = true;
+
+ if (ilo_dev_gen(dev) >= ILO_GEN(7)) {
+ ret &= sampler_border_set_gen7_SAMPLER_BORDER_COLOR_STATE(border,
+ dev, info);
+ } else {
+ ret &= sampler_border_set_gen6_SAMPLER_BORDER_COLOR_STATE(border,
+ dev, info);
+ }
+
+ assert(ret);
+
+ return ret;
+}
diff --git a/src/gallium/drivers/ilo/core/ilo_state_sampler.h b/src/gallium/drivers/ilo/core/ilo_state_sampler.h
new file mode 100644
index 00000000000..75c7620a678
--- /dev/null
+++ b/src/gallium/drivers/ilo/core/ilo_state_sampler.h
@@ -0,0 +1,103 @@
+/*
+ * Mesa 3-D graphics library
+ *
+ * Copyright (C) 2015 LunarG, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ * Chia-I Wu <[email protected]>
+ */
+
+#ifndef ILO_STATE_SAMPLER_H
+#define ILO_STATE_SAMPLER_H
+
+#include "genhw/genhw.h"
+
+#include "ilo_core.h"
+#include "ilo_dev.h"
+
+struct ilo_state_surface;
+
+struct ilo_state_sampler_info {
+ bool non_normalized;
+
+ float lod_bias;
+ float min_lod;
+ float max_lod;
+
+ enum gen_mip_filter mip_filter;
+ enum gen_map_filter min_filter;
+ enum gen_map_filter mag_filter;
+ enum gen_aniso_ratio max_anisotropy;
+
+ enum gen_texcoord_mode tcx_ctrl;
+ enum gen_texcoord_mode tcy_ctrl;
+ enum gen_texcoord_mode tcz_ctrl;
+
+ enum gen_prefilter_op shadow_func;
+};
+
+struct ilo_state_sampler_border_info {
+ union {
+ float f[4];
+ uint32_t ui[4];
+ } rgba;
+
+ bool is_integer;
+};
+
+struct ilo_state_sampler {
+ uint32_t sampler[3];
+
+ uint32_t filter_integer;
+ uint32_t filter_3d;
+
+ uint32_t addr_ctrl_1d;
+ uint32_t addr_ctrl_2d_3d;
+ uint32_t addr_ctrl_cube;
+
+ bool non_normalized;
+ bool base_to_surf_min_lod;
+};
+
+struct ilo_state_sampler_border {
+ uint32_t color[12];
+};
+
+bool
+ilo_state_sampler_init(struct ilo_state_sampler *sampler,
+ const struct ilo_dev *dev,
+ const struct ilo_state_sampler_info *info);
+
+bool
+ilo_state_sampler_init_disabled(struct ilo_state_sampler *sampler,
+ const struct ilo_dev *dev);
+
+bool
+ilo_state_sampler_set_surface(struct ilo_state_sampler *sampler,
+ const struct ilo_dev *dev,
+ const struct ilo_state_surface *surf);
+
+bool
+ilo_state_sampler_border_init(struct ilo_state_sampler_border *border,
+ const struct ilo_dev *dev,
+ const struct ilo_state_sampler_border_info *info);
+
+#endif /* ILO_STATE_SAMPLER_H */
diff --git a/src/gallium/drivers/ilo/core/ilo_state_sbe.c b/src/gallium/drivers/ilo/core/ilo_state_sbe.c
new file mode 100644
index 00000000000..5d1d400acdd
--- /dev/null
+++ b/src/gallium/drivers/ilo/core/ilo_state_sbe.c
@@ -0,0 +1,350 @@
+/*
+ * Mesa 3-D graphics library
+ *
+ * Copyright (C) 2012-2015 LunarG, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ * Chia-I Wu <[email protected]>
+ */
+
+#include "ilo_debug.h"
+#include "ilo_state_sbe.h"
+
+static bool
+sbe_validate_gen8(const struct ilo_dev *dev,
+ const struct ilo_state_sbe_info *info)
+{
+ ILO_DEV_ASSERT(dev, 6, 8);
+
+ assert(info->attr_count <= ILO_STATE_SBE_MAX_ATTR_COUNT);
+
+ assert(info->vue_read_base + info->vue_read_count <=
+ info->cv_vue_attr_count);
+
+ /*
+ * From the Sandy Bridge PRM, volume 2 part 1, page 248:
+ *
+ * "(Vertex URB Entry Read Length)
+ * Format: U5
+ * Range [1,16]
+ *
+ * Specifies the amount of URB data read for each Vertex URB entry, in
+ * 256-bit register increments.
+ *
+ * Programming Notes
+ * It is UNDEFINED to set this field to 0 indicating no Vertex URB
+ * data to be read."
+ *
+ * "(Vertex URB Entry Read Offset)
+ * Format: U6
+ * Range [0,63]
+ *
+ * Specifies the offset (in 256-bit units) at which Vertex URB data is
+ * to be read from the URB."
+ */
+ assert(info->vue_read_base % 2 == 0 && info->vue_read_base <= 126);
+ assert(info->vue_read_count <= 32);
+
+ /*
+ * From the Ivy Bridge PRM, volume 2 part 1, page 268:
+ *
+ * "This field (Point Sprite Texture Coordinate Enable) must be
+ * programmed to 0 when non-point primitives are rendered."
+ */
+ if (ilo_dev_gen(dev) < ILO_GEN(7.5) && info->point_sprite_enables)
+ assert(info->cv_is_point);
+
+ /*
+ * From the Sandy Bridge PRM, volume 2 part 1, page 246:
+ *
+ * "(Number of SF Output Attributes) 33-48: Specifies 17-32 attributes
+ * (# attributes = field value - 16). Swizzling performed on
+ * Attributes 16-31 (as required) only. Attributes 0-15 passed through
+ * unmodified.
+ *
+ * Note :
+ *
+ * Attribute n Component Override and Constant Source states apply to
+ * Attributes 16-31 (as required) instead of Attributes 0-15. E.g.,
+ * this allows an Attribute 16-31 component to be overridden with the
+ * PrimitiveID value.
+ *
+ * Attribute n WrapShortest Enables still apply to Attributes 0-15.
+ *
+ * Attribute n Swizzle Select and Attribute n Source Attribute states
+ * are ignored and none of the swizzling functions available through
+ * these controls are performed."
+ *
+ * From the Sandy Bridge PRM, volume 2 part 1, page 247:
+ *
+ * "This bit (Attribute Swizzle Enable) controls the use of the
+ * Attribute n Swizzle Select and Attribute n Source Attribute fields
+ * only. If ENABLED, those fields are used as described below. If
+ * DISABLED, attributes are copied from their corresponding source
+ * attributes, for the purposes of Swizzle Select only.
+ *
+ * Note that the following fields are unaffected by this bit, and are
+ * therefore always used to control their respective fields:
+ * Attribute n Component Override X/Y/Z/W
+ * Attribute n Constant Source
+ * Attribute n WrapShortest Enables"
+ *
+ * From the Ivy Bridge PRM, volume 2 part 1, page 264:
+ *
+ * "When Attribute Swizzle Enable is ENABLED, this bit (Attribute
+ * Swizzle Control Mode) controls whether attributes 0-15 or 16-31 are
+ * subject to the following swizzle controls:
+ *
+ * - Attribute n Component Override X/Y/Z/W
+ * - Attribute n Constant Source
+ * - Attribute n Swizzle Select
+ * - Attribute n Source Attribute
+ * - Attribute n Wrap Shortest Enables"
+ *
+ * "SWIZ_16_31... Only valid when 16 or more attributes are output."
+ */
+ assert(info->swizzle_count <= ILO_STATE_SBE_MAX_SWIZZLE_COUNT);
+ if (info->swizzle_16_31) {
+ assert(ilo_dev_gen(dev) >= ILO_GEN(7) &&
+ info->swizzle_enable &&
+ info->attr_count > 16);
+ }
+
+ return true;
+}
+
+static uint8_t
+sbe_get_gen8_min_read_count(const struct ilo_dev *dev,
+ const struct ilo_state_sbe_info *info)
+{
+ uint8_t min_count = 0;
+
+ ILO_DEV_ASSERT(dev, 6, 8);
+
+ /* minimum read count for non-swizzled attributes */
+ if (!info->swizzle_enable || info->swizzle_count < info->attr_count) {
+ if (info->swizzle_16_31 && info->swizzle_count + 16 == info->attr_count)
+ min_count = 16;
+ else
+ min_count = info->attr_count;
+ }
+
+ if (info->swizzle_enable) {
+ uint8_t i;
+
+ for (i = 0; i < info->swizzle_count; i++) {
+ const struct ilo_state_sbe_swizzle_info *swizzle =
+ &info->swizzles[i];
+ bool inputattr_facing;
+
+ switch (swizzle->attr_select) {
+ case GEN6_INPUTATTR_FACING:
+ case GEN6_INPUTATTR_FACING_W:
+ inputattr_facing = true;
+ break;
+ default:
+ inputattr_facing = false;
+ break;
+ }
+
+ if (min_count < swizzle->attr + inputattr_facing + 1)
+ min_count = swizzle->attr + inputattr_facing + 1;
+ }
+ }
+
+ return min_count;
+}
+
+static uint8_t
+sbe_get_gen8_read_length(const struct ilo_dev *dev,
+ const struct ilo_state_sbe_info *info)
+{
+ uint8_t read_len;
+
+ ILO_DEV_ASSERT(dev, 6, 8);
+
+ /*
+ * From the Sandy Bridge PRM, volume 2 part 1, page 248:
+ *
+ * "(Vertex URB Entry Read Length)
+ * This field should be set to the minimum length required to read the
+ * maximum source attribute. The maximum source attribute is indicated
+ * by the maximum value of the enabled Attribute # Source Attribute if
+ * Attribute Swizzle Enable is set, Number of Output Attributes -1 if
+ * enable is not set.
+ * read_length = ceiling((max_source_attr+1)/2)
+ *
+ * [errata] Corruption/Hang possible if length programmed larger than
+ * recommended"
+ */
+ if (info->has_min_read_count) {
+ read_len = info->vue_read_count;
+ assert(read_len == sbe_get_gen8_min_read_count(dev, info));
+ } else {
+ read_len = sbe_get_gen8_min_read_count(dev, info);
+ assert(read_len <= info->vue_read_count);
+ }
+
+ /*
+ * In pairs. URB entries are aligned to 1024-bits or 512-bits. There is
+ * no need to worry about reading past entries.
+ */
+ read_len = (read_len + 1) / 2;
+ if (!read_len)
+ read_len = 1;
+
+ return read_len;
+}
+
+static bool
+sbe_set_gen8_3DSTATE_SBE(struct ilo_state_sbe *sbe,
+ const struct ilo_dev *dev,
+ const struct ilo_state_sbe_info *info)
+{
+ uint8_t vue_read_offset, vue_read_len;
+ uint8_t attr_count;
+ uint32_t dw1, dw2, dw3;
+
+ ILO_DEV_ASSERT(dev, 6, 8);
+
+ if (!sbe_validate_gen8(dev, info))
+ return false;
+
+ vue_read_offset = info->vue_read_base / 2;
+ vue_read_len = sbe_get_gen8_read_length(dev, info);
+
+ attr_count = info->attr_count;
+ if (ilo_dev_gen(dev) == ILO_GEN(6) && info->swizzle_16_31)
+ attr_count += 16;
+
+ dw1 = attr_count << GEN7_SBE_DW1_ATTR_COUNT__SHIFT |
+ vue_read_len << GEN7_SBE_DW1_URB_READ_LEN__SHIFT;
+
+ if (ilo_dev_gen(dev) >= ILO_GEN(8)) {
+ dw1 |= GEN8_SBE_DW1_USE_URB_READ_LEN |
+ GEN8_SBE_DW1_USE_URB_READ_OFFSET |
+ vue_read_offset << GEN8_SBE_DW1_URB_READ_OFFSET__SHIFT;
+ } else {
+ dw1 |= vue_read_offset << GEN7_SBE_DW1_URB_READ_OFFSET__SHIFT;
+ }
+
+ if (ilo_dev_gen(dev) >= ILO_GEN(7) && info->swizzle_16_31)
+ dw1 |= GEN7_SBE_DW1_ATTR_SWIZZLE_16_31;
+
+ if (info->swizzle_enable)
+ dw1 |= GEN7_SBE_DW1_ATTR_SWIZZLE_ENABLE;
+
+ dw1 |= (info->point_sprite_origin_lower_left) ?
+ GEN7_SBE_DW1_POINT_SPRITE_TEXCOORD_LOWERLEFT :
+ GEN7_SBE_DW1_POINT_SPRITE_TEXCOORD_UPPERLEFT;
+
+ dw2 = info->point_sprite_enables;
+ dw3 = info->const_interp_enables;
+
+ STATIC_ASSERT(ARRAY_SIZE(sbe->sbe) >= 3);
+ sbe->sbe[0] = dw1;
+ sbe->sbe[1] = dw2;
+ sbe->sbe[2] = dw3;
+
+ return true;
+}
+
+static bool
+sbe_set_gen8_3DSTATE_SBE_SWIZ(struct ilo_state_sbe *sbe,
+ const struct ilo_dev *dev,
+ const struct ilo_state_sbe_info *info)
+{
+ uint16_t swiz[ILO_STATE_SBE_MAX_SWIZZLE_COUNT];
+ uint8_t i;
+
+ ILO_DEV_ASSERT(dev, 6, 8);
+
+ for (i = 0; i < info->swizzle_count; i++) {
+ const struct ilo_state_sbe_swizzle_info *swizzle = &info->swizzles[i];
+
+ /* U5 */
+ assert(swizzle->attr < 32);
+ swiz[i] = swizzle->attr_select << GEN8_SBE_SWIZ_SWIZZLE_SELECT__SHIFT |
+ swizzle->attr << GEN8_SBE_SWIZ_SRC_ATTR__SHIFT;
+
+ if (swizzle->force_zeros) {
+ swiz[i] |= GEN8_SBE_SWIZ_OVERRIDE_W |
+ GEN8_SBE_SWIZ_OVERRIDE_Z |
+ GEN8_SBE_SWIZ_OVERRIDE_Y |
+ GEN8_SBE_SWIZ_OVERRIDE_X |
+ GEN8_SBE_SWIZ_CONST_0000;
+ }
+ }
+
+ for (; i < ARRAY_SIZE(swiz); i++) {
+ swiz[i] = GEN6_INPUTATTR_NORMAL << GEN8_SBE_SWIZ_SWIZZLE_SELECT__SHIFT |
+ i << GEN8_SBE_SWIZ_SRC_ATTR__SHIFT;
+ }
+
+ STATIC_ASSERT(sizeof(sbe->swiz) == sizeof(swiz));
+ memcpy(sbe->swiz, swiz, sizeof(swiz));
+
+ return true;
+}
+
+bool
+ilo_state_sbe_init(struct ilo_state_sbe *sbe,
+ const struct ilo_dev *dev,
+ const struct ilo_state_sbe_info *info)
+{
+ assert(ilo_is_zeroed(sbe, sizeof(*sbe)));
+ return ilo_state_sbe_set_info(sbe, dev, info);
+}
+
+bool
+ilo_state_sbe_init_for_rectlist(struct ilo_state_sbe *sbe,
+ const struct ilo_dev *dev,
+ uint8_t read_base,
+ uint8_t read_count)
+{
+ struct ilo_state_sbe_info info;
+
+ memset(&info, 0, sizeof(info));
+ info.attr_count = read_count;
+ info.cv_vue_attr_count = read_base + read_count;
+ info.vue_read_base = read_base;
+ info.vue_read_count = read_count;
+ info.has_min_read_count = true;
+
+ return ilo_state_sbe_set_info(sbe, dev, &info);
+}
+
+bool
+ilo_state_sbe_set_info(struct ilo_state_sbe *sbe,
+ const struct ilo_dev *dev,
+ const struct ilo_state_sbe_info *info)
+{
+ bool ret = true;
+
+ ILO_DEV_ASSERT(dev, 6, 8);
+
+ ret &= sbe_set_gen8_3DSTATE_SBE(sbe, dev, info);
+ ret &= sbe_set_gen8_3DSTATE_SBE_SWIZ(sbe, dev, info);
+
+ assert(ret);
+
+ return true;
+}
diff --git a/src/gallium/drivers/ilo/core/ilo_state_sbe.h b/src/gallium/drivers/ilo/core/ilo_state_sbe.h
new file mode 100644
index 00000000000..122999a9e94
--- /dev/null
+++ b/src/gallium/drivers/ilo/core/ilo_state_sbe.h
@@ -0,0 +1,103 @@
+/*
+ * Mesa 3-D graphics library
+ *
+ * Copyright (C) 2015 LunarG, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ * Chia-I Wu <[email protected]>
+ */
+
+#ifndef ILO_STATE_SBE_H
+#define ILO_STATE_SBE_H
+
+#include "genhw/genhw.h"
+
+#include "ilo_core.h"
+#include "ilo_dev.h"
+
+/*
+ * From the Sandy Bridge PRM, volume 2 part 1, page 264:
+ *
+ * "Number of SF Output Attributes sets the number of attributes that will
+ * be output from the SF stage, not including position. This can be used
+ * to specify up to 32, and may differ from the number of input
+ * attributes."
+ *
+ * "The first or last set of 16 attributes can be swizzled according to
+ * certain state fields."
+ */
+#define ILO_STATE_SBE_MAX_ATTR_COUNT 32
+#define ILO_STATE_SBE_MAX_SWIZZLE_COUNT 16
+
+struct ilo_state_sbe_swizzle_info {
+ /* select an attribute from read ones */
+ enum gen_inputattr_select attr_select;
+ uint8_t attr;
+
+ bool force_zeros;
+};
+
+struct ilo_state_sbe_info {
+ uint8_t attr_count;
+
+ /* which VUE attributes to read */
+ uint8_t cv_vue_attr_count;
+ uint8_t vue_read_base;
+ uint8_t vue_read_count;
+ bool has_min_read_count;
+
+ bool cv_is_point;
+ bool point_sprite_origin_lower_left;
+ /* force sprite coordinates to the four corner vertices of the point */
+ uint32_t point_sprite_enables;
+
+ /* force attr at the provoking vertex to a0 and zero to a1/a2 */
+ uint32_t const_interp_enables;
+
+ bool swizzle_enable;
+ /* swizzle attribute 16 to 31 instead; Gen7+ only */
+ bool swizzle_16_31;
+ uint8_t swizzle_count;
+ const struct ilo_state_sbe_swizzle_info *swizzles;
+};
+
+struct ilo_state_sbe {
+ uint32_t sbe[3];
+ uint32_t swiz[8];
+};
+
+bool
+ilo_state_sbe_init(struct ilo_state_sbe *sbe,
+ const struct ilo_dev *dev,
+ const struct ilo_state_sbe_info *info);
+
+bool
+ilo_state_sbe_init_for_rectlist(struct ilo_state_sbe *sbe,
+ const struct ilo_dev *dev,
+ uint8_t read_base,
+ uint8_t read_count);
+
+bool
+ilo_state_sbe_set_info(struct ilo_state_sbe *sbe,
+ const struct ilo_dev *dev,
+ const struct ilo_state_sbe_info *info);
+
+#endif /* ILO_STATE_SBE_H */
diff --git a/src/gallium/drivers/ilo/core/ilo_state_shader.c b/src/gallium/drivers/ilo/core/ilo_state_shader.c
new file mode 100644
index 00000000000..f67326c7f10
--- /dev/null
+++ b/src/gallium/drivers/ilo/core/ilo_state_shader.c
@@ -0,0 +1,737 @@
+/*
+ * Mesa 3-D graphics library
+ *
+ * Copyright (C) 2012-2015 LunarG, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ * Chia-I Wu <[email protected]>
+ */
+
+#include "ilo_debug.h"
+#include "ilo_state_shader.h"
+
+enum vertex_stage {
+ STAGE_VS,
+ STAGE_HS,
+ STAGE_DS,
+ STAGE_GS,
+};
+
+struct vertex_ff {
+ uint8_t grf_start;
+ uint8_t scratch_space;
+
+ uint8_t sampler_count;
+ uint8_t surface_count;
+ bool has_uav;
+
+ uint8_t vue_read_offset;
+ uint8_t vue_read_len;
+
+ uint8_t user_clip_enables;
+};
+
+static bool
+vertex_validate_gen6_kernel(const struct ilo_dev *dev,
+ enum vertex_stage stage,
+ const struct ilo_state_shader_kernel_info *kernel)
+{
+ /*
+ * "Dispatch GRF Start Register for URB Data" is U4 for GS and U5 for
+ * others.
+ */
+ const uint8_t max_grf_start = (stage == STAGE_GS) ? 16 : 32;
+ /*
+ * From the Sandy Bridge PRM, volume 2 part 1, page 134:
+ *
+ * "(Per-Thread Scratch Space)
+ * Range [0,11] indicating [1K Bytes, 2M Bytes]"
+ */
+ const uint32_t max_scratch_size = 2 * 1024 * 1024;
+
+ ILO_DEV_ASSERT(dev, 6, 8);
+
+ /* we do not want to save it */
+ assert(!kernel->offset);
+
+ assert(kernel->grf_start < max_grf_start);
+ assert(kernel->scratch_size <= max_scratch_size);
+
+ return true;
+}
+
+static bool
+vertex_validate_gen6_urb(const struct ilo_dev *dev,
+ enum vertex_stage stage,
+ const struct ilo_state_shader_urb_info *urb)
+{
+ /* "Vertex/Patch URB Entry Read Offset" is U6, in pairs */
+ const uint8_t max_read_base = 63 * 2;
+ /*
+ * "Vertex/Patch URB Entry Read Length" is limited to 64 for DS and U6 for
+ * others, in pairs
+ */
+ const uint8_t max_read_count = ((stage == STAGE_DS) ? 64 : 63) * 2;
+
+ ILO_DEV_ASSERT(dev, 6, 8);
+
+ assert(urb->read_base + urb->read_count <= urb->cv_input_attr_count);
+
+ assert(urb->read_base % 2 == 0 && urb->read_base <= max_read_base);
+
+ /*
+ * There is no need to worry about reading past entries, as URB entries are
+ * aligned to 1024-bits (Gen6) or 512-bits (Gen7+).
+ */
+ assert(urb->read_count <= max_read_count);
+
+ return true;
+}
+
+static bool
+vertex_get_gen6_ff(const struct ilo_dev *dev,
+ enum vertex_stage stage,
+ const struct ilo_state_shader_kernel_info *kernel,
+ const struct ilo_state_shader_resource_info *resource,
+ const struct ilo_state_shader_urb_info *urb,
+ struct vertex_ff *ff)
+{
+ ILO_DEV_ASSERT(dev, 6, 8);
+
+ if (!vertex_validate_gen6_kernel(dev, stage, kernel) ||
+ !vertex_validate_gen6_urb(dev, stage, urb))
+ return false;
+
+ ff->grf_start = kernel->grf_start;
+ /* next power of two, starting from 1KB */
+ ff->scratch_space = (kernel->scratch_size > 1024) ?
+ (util_last_bit(kernel->scratch_size - 1) - 10): 0;
+
+ ff->sampler_count = (resource->sampler_count <= 12) ?
+ (resource->sampler_count + 3) / 4 : 4;
+ ff->surface_count = resource->surface_count;
+ ff->has_uav = resource->has_uav;
+
+ ff->vue_read_offset = urb->read_base / 2;
+ ff->vue_read_len = (urb->read_count + 1) / 2;
+
+ /* need to read something unless VUE handles are included */
+ switch (stage) {
+ case STAGE_VS:
+ if (!ff->vue_read_len)
+ ff->vue_read_len = 1;
+
+ /* one GRF per attribute */
+ assert(kernel->grf_start + urb->read_count * 2 <= 128);
+ break;
+ case STAGE_GS:
+ if (ilo_dev_gen(dev) == ILO_GEN(6) && !ff->vue_read_len)
+ ff->vue_read_len = 1;
+ break;
+ default:
+ break;
+ }
+
+ ff->user_clip_enables = urb->user_clip_enables;
+
+ return true;
+}
+
+static uint16_t
+vs_get_gen6_thread_count(const struct ilo_dev *dev,
+ const struct ilo_state_vs_info *info)
+{
+ uint16_t thread_count;
+
+ ILO_DEV_ASSERT(dev, 6, 8);
+
+ /* Maximum Number of Threads of 3DSTATE_VS */
+ switch (ilo_dev_gen(dev)) {
+ case ILO_GEN(8):
+ thread_count = 504;
+ break;
+ case ILO_GEN(7.5):
+ thread_count = (dev->gt >= 2) ? 280 : 70;
+ break;
+ case ILO_GEN(7):
+ case ILO_GEN(6):
+ default:
+ thread_count = dev->thread_count;
+ break;
+ }
+
+ return thread_count - 1;
+}
+
+static bool
+vs_set_gen6_3DSTATE_VS(struct ilo_state_vs *vs,
+ const struct ilo_dev *dev,
+ const struct ilo_state_vs_info *info)
+{
+ struct vertex_ff ff;
+ uint16_t thread_count;
+ uint32_t dw2, dw3, dw4, dw5;
+
+ ILO_DEV_ASSERT(dev, 6, 8);
+
+ if (!vertex_get_gen6_ff(dev, STAGE_VS, &info->kernel,
+ &info->resource, &info->urb, &ff))
+ return false;
+
+ thread_count = vs_get_gen6_thread_count(dev, info);
+
+ dw2 = ff.sampler_count << GEN6_THREADDISP_SAMPLER_COUNT__SHIFT |
+ ff.surface_count << GEN6_THREADDISP_BINDING_TABLE_SIZE__SHIFT;
+
+ if (false)
+ dw2 |= GEN6_THREADDISP_FP_MODE_ALT;
+
+ if (ilo_dev_gen(dev) >= ILO_GEN(7.5) && ff.has_uav)
+ dw2 |= GEN75_THREADDISP_ACCESS_UAV;
+
+ dw3 = ff.scratch_space << GEN6_THREADSCRATCH_SPACE_PER_THREAD__SHIFT;
+
+ dw4 = ff.grf_start << GEN6_VS_DW4_URB_GRF_START__SHIFT |
+ ff.vue_read_len << GEN6_VS_DW4_URB_READ_LEN__SHIFT |
+ ff.vue_read_offset << GEN6_VS_DW4_URB_READ_OFFSET__SHIFT;
+
+ dw5 = 0;
+
+ if (ilo_dev_gen(dev) >= ILO_GEN(7.5))
+ dw5 |= thread_count << GEN75_VS_DW5_MAX_THREADS__SHIFT;
+ else
+ dw5 |= thread_count << GEN6_VS_DW5_MAX_THREADS__SHIFT;
+
+ if (info->stats_enable)
+ dw5 |= GEN6_VS_DW5_STATISTICS;
+ if (info->dispatch_enable)
+ dw5 |= GEN6_VS_DW5_VS_ENABLE;
+
+ STATIC_ASSERT(ARRAY_SIZE(vs->vs) >= 5);
+ vs->vs[0] = dw2;
+ vs->vs[1] = dw3;
+ vs->vs[2] = dw4;
+ vs->vs[3] = dw5;
+
+ if (ilo_dev_gen(dev) >= ILO_GEN(8))
+ vs->vs[4] = ff.user_clip_enables << GEN8_VS_DW8_UCP_CLIP_ENABLES__SHIFT;
+
+ return true;
+}
+
+static uint16_t
+hs_get_gen7_thread_count(const struct ilo_dev *dev,
+ const struct ilo_state_hs_info *info)
+{
+ uint16_t thread_count;
+
+ ILO_DEV_ASSERT(dev, 7, 8);
+
+ /* Maximum Number of Threads of 3DSTATE_HS */
+ switch (ilo_dev_gen(dev)) {
+ case ILO_GEN(8):
+ thread_count = 504;
+ break;
+ case ILO_GEN(7.5):
+ thread_count = (dev->gt >= 2) ? 256 : 70;
+ break;
+ case ILO_GEN(7):
+ default:
+ thread_count = dev->thread_count;
+ break;
+ }
+
+ return thread_count - 1;
+}
+
+static bool
+hs_set_gen7_3DSTATE_HS(struct ilo_state_hs *hs,
+ const struct ilo_dev *dev,
+ const struct ilo_state_hs_info *info)
+{
+ struct vertex_ff ff;
+ uint16_t thread_count;
+ uint32_t dw1, dw2, dw4, dw5;
+
+ ILO_DEV_ASSERT(dev, 7, 8);
+
+ if (!vertex_get_gen6_ff(dev, STAGE_HS, &info->kernel,
+ &info->resource, &info->urb, &ff))
+ return false;
+
+ thread_count = hs_get_gen7_thread_count(dev, info);
+
+ dw1 = ff.sampler_count << GEN6_THREADDISP_SAMPLER_COUNT__SHIFT |
+ ff.surface_count << GEN6_THREADDISP_BINDING_TABLE_SIZE__SHIFT;
+
+ if (ilo_dev_gen(dev) >= ILO_GEN(7.5))
+ dw1 |= thread_count << GEN75_HS_DW1_DISPATCH_MAX_THREADS__SHIFT;
+ else
+ dw1 |= thread_count << GEN7_HS_DW1_DISPATCH_MAX_THREADS__SHIFT;
+
+ dw2 = 0 << GEN7_HS_DW2_INSTANCE_COUNT__SHIFT;
+
+ if (info->dispatch_enable)
+ dw2 |= GEN7_HS_DW2_HS_ENABLE;
+ if (info->stats_enable)
+ dw2 |= GEN7_HS_DW2_STATISTICS;
+
+ dw4 = ff.scratch_space << GEN6_THREADSCRATCH_SPACE_PER_THREAD__SHIFT;
+
+ dw5 = GEN7_HS_DW5_INCLUDE_VERTEX_HANDLES |
+ ff.grf_start << GEN7_HS_DW5_URB_GRF_START__SHIFT |
+ ff.vue_read_len << GEN7_HS_DW5_URB_READ_LEN__SHIFT |
+ ff.vue_read_offset << GEN7_HS_DW5_URB_READ_OFFSET__SHIFT;
+
+ if (ilo_dev_gen(dev) >= ILO_GEN(7.5) && ff.has_uav)
+ dw5 |= GEN75_HS_DW5_ACCESS_UAV;
+
+ STATIC_ASSERT(ARRAY_SIZE(hs->hs) >= 4);
+ hs->hs[0] = dw1;
+ hs->hs[1] = dw2;
+ hs->hs[2] = dw4;
+ hs->hs[3] = dw5;
+
+ return true;
+}
+
+static bool
+ds_set_gen7_3DSTATE_TE(struct ilo_state_ds *ds,
+ const struct ilo_dev *dev,
+ const struct ilo_state_ds_info *info)
+{
+ uint32_t dw1;
+
+ ILO_DEV_ASSERT(dev, 7, 8);
+
+ dw1 = 0;
+
+ if (info->dispatch_enable) {
+ dw1 |= GEN7_TE_DW1_MODE_HW |
+ GEN7_TE_DW1_TE_ENABLE;
+ }
+
+ STATIC_ASSERT(ARRAY_SIZE(ds->te) >= 3);
+ ds->te[0] = dw1;
+ ds->te[1] = fui(63.0f);
+ ds->te[2] = fui(64.0f);
+
+ return true;
+}
+
+static uint16_t
+ds_get_gen7_thread_count(const struct ilo_dev *dev,
+ const struct ilo_state_ds_info *info)
+{
+ uint16_t thread_count;
+
+ ILO_DEV_ASSERT(dev, 7, 8);
+
+ /* Maximum Number of Threads of 3DSTATE_DS */
+ switch (ilo_dev_gen(dev)) {
+ case ILO_GEN(8):
+ thread_count = 504;
+ break;
+ case ILO_GEN(7.5):
+ thread_count = (dev->gt >= 2) ? 280 : 70;
+ break;
+ case ILO_GEN(7):
+ default:
+ thread_count = dev->thread_count;
+ break;
+ }
+
+ return thread_count - 1;
+}
+
+static bool
+ds_set_gen7_3DSTATE_DS(struct ilo_state_ds *ds,
+ const struct ilo_dev *dev,
+ const struct ilo_state_ds_info *info)
+{
+ struct vertex_ff ff;
+ uint16_t thread_count;
+ uint32_t dw2, dw3, dw4, dw5;
+
+ ILO_DEV_ASSERT(dev, 7, 8);
+
+ if (!vertex_get_gen6_ff(dev, STAGE_DS, &info->kernel,
+ &info->resource, &info->urb, &ff))
+ return false;
+
+ thread_count = ds_get_gen7_thread_count(dev, info);
+
+ dw2 = ff.sampler_count << GEN6_THREADDISP_SAMPLER_COUNT__SHIFT |
+ ff.surface_count << GEN6_THREADDISP_BINDING_TABLE_SIZE__SHIFT;
+
+ if (ilo_dev_gen(dev) >= ILO_GEN(7.5) && ff.has_uav)
+ dw2 |= GEN75_THREADDISP_ACCESS_UAV;
+
+ dw3 = ff.scratch_space << GEN6_THREADSCRATCH_SPACE_PER_THREAD__SHIFT;
+
+ dw4 = ff.grf_start << GEN7_DS_DW4_URB_GRF_START__SHIFT |
+ ff.vue_read_len << GEN7_DS_DW4_URB_READ_LEN__SHIFT |
+ ff.vue_read_offset << GEN7_DS_DW4_URB_READ_OFFSET__SHIFT;
+
+ dw5 = 0;
+
+ if (ilo_dev_gen(dev) >= ILO_GEN(7.5))
+ dw5 |= thread_count << GEN75_DS_DW5_MAX_THREADS__SHIFT;
+ else
+ dw5 |= thread_count << GEN7_DS_DW5_MAX_THREADS__SHIFT;
+
+ if (info->stats_enable)
+ dw5 |= GEN7_DS_DW5_STATISTICS;
+ if (info->dispatch_enable)
+ dw5 |= GEN7_DS_DW5_DS_ENABLE;
+
+ STATIC_ASSERT(ARRAY_SIZE(ds->ds) >= 5);
+ ds->ds[0] = dw2;
+ ds->ds[1] = dw3;
+ ds->ds[2] = dw4;
+ ds->ds[3] = dw5;
+
+ if (ilo_dev_gen(dev) >= ILO_GEN(8))
+ ds->ds[4] = ff.user_clip_enables << GEN8_DS_DW8_UCP_CLIP_ENABLES__SHIFT;
+
+ return true;
+}
+
+static bool
+gs_get_gen6_ff(const struct ilo_dev *dev,
+ const struct ilo_state_gs_info *info,
+ struct vertex_ff *ff)
+{
+ const struct ilo_state_shader_urb_info *urb = &info->urb;
+ const struct ilo_state_gs_sol_info *sol = &info->sol;
+
+ ILO_DEV_ASSERT(dev, 6, 8);
+
+ if (!vertex_get_gen6_ff(dev, STAGE_GS, &info->kernel,
+ &info->resource, &info->urb, ff))
+ return false;
+
+ /*
+ * From the Ivy Bridge PRM, volume 2 part 1, page 168-169:
+ *
+ * "[0,62] indicating [1,63] 16B units"
+ *
+ * "Programming Restrictions: The vertex size must be programmed as a
+ * multiple of 32B units with the following exception: Rendering is
+ * disabled (as per SOL stage state) and the vertex size output by the
+ * GS thread is 16B.
+ *
+ * If rendering is enabled (as per SOL state) the vertex size must be
+ * programmed as a multiple of 32B units. In other words, the only
+ * time software can program a vertex size with an odd number of 16B
+ * units is when rendering is disabled."
+ */
+ assert(urb->output_attr_count <= 63);
+ if (!sol->render_disable)
+ assert(urb->output_attr_count % 2 == 0);
+
+ return true;
+}
+
+static uint16_t
+gs_get_gen6_thread_count(const struct ilo_dev *dev,
+ const struct ilo_state_gs_info *info)
+{
+ const struct ilo_state_gs_sol_info *sol = &info->sol;
+ uint16_t thread_count;
+
+ ILO_DEV_ASSERT(dev, 6, 8);
+
+ /* Maximum Number of Threads of 3DSTATE_GS */
+ switch (ilo_dev_gen(dev)) {
+ case ILO_GEN(8):
+ thread_count = 504;
+ break;
+ case ILO_GEN(7.5):
+ thread_count = (dev->gt >= 2) ? 256 : 70;
+ break;
+ case ILO_GEN(7):
+ case ILO_GEN(6):
+ default:
+ thread_count = dev->thread_count;
+
+ /*
+ * From the Sandy Bridge PRM, volume 2 part 1, page 154:
+ *
+ * "Maximum Number of Threads valid range is [0,27] when Rendering
+ * Enabled bit is set."
+ *
+ * According to the classic driver, [0, 20] for GT1.
+ */
+ if (!sol->render_disable)
+ thread_count = (dev->gt == 2) ? 27 : 20;
+ break;
+ }
+
+ return thread_count - 1;
+}
+
+static bool
+gs_set_gen6_3DSTATE_GS(struct ilo_state_gs *gs,
+ const struct ilo_dev *dev,
+ const struct ilo_state_gs_info *info)
+{
+ const struct ilo_state_gs_sol_info *sol = &info->sol;
+ struct vertex_ff ff;
+ uint16_t thread_count;
+ uint32_t dw2, dw3, dw4, dw5, dw6;
+
+ ILO_DEV_ASSERT(dev, 6, 6);
+
+ if (!gs_get_gen6_ff(dev, info, &ff))
+ return false;
+
+ thread_count = gs_get_gen6_thread_count(dev, info);
+
+ dw2 = GEN6_THREADDISP_SPF |
+ ff.sampler_count << GEN6_THREADDISP_SAMPLER_COUNT__SHIFT |
+ ff.surface_count << GEN6_THREADDISP_BINDING_TABLE_SIZE__SHIFT;
+
+ dw3 = ff.scratch_space << GEN6_THREADSCRATCH_SPACE_PER_THREAD__SHIFT;
+
+ dw4 = ff.vue_read_len << GEN6_GS_DW4_URB_READ_LEN__SHIFT |
+ ff.vue_read_offset << GEN6_GS_DW4_URB_READ_OFFSET__SHIFT |
+ ff.grf_start << GEN6_GS_DW4_URB_GRF_START__SHIFT;
+
+ dw5 = thread_count << GEN6_GS_DW5_MAX_THREADS__SHIFT;
+
+ if (info->stats_enable)
+ dw5 |= GEN6_GS_DW5_STATISTICS;
+ if (sol->stats_enable)
+ dw5 |= GEN6_GS_DW5_SO_STATISTICS;
+ if (!sol->render_disable)
+ dw5 |= GEN6_GS_DW5_RENDER_ENABLE;
+
+ dw6 = 0;
+
+ /* GEN7_REORDER_TRAILING is handled by the kernel */
+ if (sol->tristrip_reorder == GEN7_REORDER_LEADING)
+ dw6 |= GEN6_GS_DW6_REORDER_LEADING_ENABLE;
+
+ if (sol->sol_enable) {
+ dw6 |= GEN6_GS_DW6_SVBI_PAYLOAD_ENABLE;
+
+ if (sol->svbi_post_inc) {
+ dw6 |= GEN6_GS_DW6_SVBI_POST_INC_ENABLE |
+ sol->svbi_post_inc << GEN6_GS_DW6_SVBI_POST_INC_VAL__SHIFT;
+ }
+ }
+
+ if (info->dispatch_enable)
+ dw6 |= GEN6_GS_DW6_GS_ENABLE;
+
+ STATIC_ASSERT(ARRAY_SIZE(gs->gs) >= 5);
+ gs->gs[0] = dw2;
+ gs->gs[1] = dw3;
+ gs->gs[2] = dw4;
+ gs->gs[3] = dw5;
+ gs->gs[4] = dw6;
+
+ return true;
+}
+
+static uint8_t
+gs_get_gen7_vertex_size(const struct ilo_dev *dev,
+ const struct ilo_state_gs_info *info)
+{
+ const struct ilo_state_shader_urb_info *urb = &info->urb;
+
+ ILO_DEV_ASSERT(dev, 7, 8);
+
+ return (urb->output_attr_count) ? urb->output_attr_count - 1 : 0;
+}
+
+static bool
+gs_set_gen7_3DSTATE_GS(struct ilo_state_gs *gs,
+ const struct ilo_dev *dev,
+ const struct ilo_state_gs_info *info)
+{
+ struct vertex_ff ff;
+ uint16_t thread_count;
+ uint8_t vertex_size;
+ uint32_t dw2, dw3, dw4, dw5;
+
+ ILO_DEV_ASSERT(dev, 7, 8);
+
+ if (!gs_get_gen6_ff(dev, info, &ff))
+ return false;
+
+ thread_count = gs_get_gen6_thread_count(dev, info);
+ vertex_size = gs_get_gen7_vertex_size(dev, info);
+
+ dw2 = ff.sampler_count << GEN6_THREADDISP_SAMPLER_COUNT__SHIFT |
+ ff.surface_count << GEN6_THREADDISP_BINDING_TABLE_SIZE__SHIFT;
+
+ if (ilo_dev_gen(dev) >= ILO_GEN(7.5) && ff.has_uav)
+ dw2 |= GEN75_THREADDISP_ACCESS_UAV;
+
+ dw3 = ff.scratch_space << GEN6_THREADSCRATCH_SPACE_PER_THREAD__SHIFT;
+
+ dw4 = vertex_size << GEN7_GS_DW4_OUTPUT_SIZE__SHIFT |
+ 0 << GEN7_GS_DW4_OUTPUT_TOPO__SHIFT |
+ ff.vue_read_len << GEN7_GS_DW4_URB_READ_LEN__SHIFT |
+ GEN7_GS_DW4_INCLUDE_VERTEX_HANDLES |
+ ff.vue_read_offset << GEN7_GS_DW4_URB_READ_OFFSET__SHIFT |
+ ff.grf_start << GEN7_GS_DW4_URB_GRF_START__SHIFT;
+
+ dw5 = 0;
+
+ if (ilo_dev_gen(dev) >= ILO_GEN(7.5))
+ dw5 = thread_count << GEN75_GS_DW5_MAX_THREADS__SHIFT;
+ else
+ dw5 = thread_count << GEN7_GS_DW5_MAX_THREADS__SHIFT;
+
+ if (info->stats_enable)
+ dw5 |= GEN7_GS_DW5_STATISTICS;
+ if (info->dispatch_enable)
+ dw5 |= GEN7_GS_DW5_GS_ENABLE;
+
+ STATIC_ASSERT(ARRAY_SIZE(gs->gs) >= 5);
+ gs->gs[0] = dw2;
+ gs->gs[1] = dw3;
+ gs->gs[2] = dw4;
+ gs->gs[3] = dw5;
+
+ if (ilo_dev_gen(dev) >= ILO_GEN(8))
+ gs->gs[4] = ff.user_clip_enables << GEN8_GS_DW9_UCP_CLIP_ENABLES__SHIFT;
+
+ return true;
+}
+
+bool
+ilo_state_vs_init(struct ilo_state_vs *vs,
+ const struct ilo_dev *dev,
+ const struct ilo_state_vs_info *info)
+{
+ bool ret = true;
+
+ assert(ilo_is_zeroed(vs, sizeof(*vs)));
+
+ ret &= vs_set_gen6_3DSTATE_VS(vs, dev, info);
+
+ assert(ret);
+
+ return ret;
+}
+
+bool
+ilo_state_vs_init_disabled(struct ilo_state_vs *vs,
+ const struct ilo_dev *dev)
+{
+ struct ilo_state_vs_info info;
+
+ memset(&info, 0, sizeof(info));
+
+ return ilo_state_vs_init(vs, dev, &info);
+}
+
+bool
+ilo_state_hs_init(struct ilo_state_hs *hs,
+ const struct ilo_dev *dev,
+ const struct ilo_state_hs_info *info)
+{
+ bool ret = true;
+
+ assert(ilo_is_zeroed(hs, sizeof(*hs)));
+
+ if (ilo_dev_gen(dev) >= ILO_GEN(7))
+ ret &= hs_set_gen7_3DSTATE_HS(hs, dev, info);
+
+ assert(ret);
+
+ return ret;
+}
+
+bool
+ilo_state_hs_init_disabled(struct ilo_state_hs *hs,
+ const struct ilo_dev *dev)
+{
+ struct ilo_state_hs_info info;
+
+ memset(&info, 0, sizeof(info));
+
+ return ilo_state_hs_init(hs, dev, &info);
+}
+
+bool
+ilo_state_ds_init(struct ilo_state_ds *ds,
+ const struct ilo_dev *dev,
+ const struct ilo_state_ds_info *info)
+{
+ bool ret = true;
+
+ assert(ilo_is_zeroed(ds, sizeof(*ds)));
+
+ if (ilo_dev_gen(dev) >= ILO_GEN(7)) {
+ ret &= ds_set_gen7_3DSTATE_TE(ds, dev, info);
+ ret &= ds_set_gen7_3DSTATE_DS(ds, dev, info);
+ }
+
+ assert(ret);
+
+ return ret;
+}
+
+bool
+ilo_state_ds_init_disabled(struct ilo_state_ds *ds,
+ const struct ilo_dev *dev)
+{
+ struct ilo_state_ds_info info;
+
+ memset(&info, 0, sizeof(info));
+
+ return ilo_state_ds_init(ds, dev, &info);
+}
+
+bool
+ilo_state_gs_init(struct ilo_state_gs *gs,
+ const struct ilo_dev *dev,
+ const struct ilo_state_gs_info *info)
+{
+ bool ret = true;
+
+ assert(ilo_is_zeroed(gs, sizeof(*gs)));
+
+ if (ilo_dev_gen(dev) >= ILO_GEN(7))
+ ret &= gs_set_gen7_3DSTATE_GS(gs, dev, info);
+ else
+ ret &= gs_set_gen6_3DSTATE_GS(gs, dev, info);
+
+ assert(ret);
+
+ return ret;
+}
+
+bool
+ilo_state_gs_init_disabled(struct ilo_state_gs *gs,
+ const struct ilo_dev *dev)
+{
+ struct ilo_state_gs_info info;
+
+ memset(&info, 0, sizeof(info));
+
+ return ilo_state_gs_init(gs, dev, &info);
+}
diff --git a/src/gallium/drivers/ilo/core/ilo_state_shader.h b/src/gallium/drivers/ilo/core/ilo_state_shader.h
new file mode 100644
index 00000000000..44690c5b0bb
--- /dev/null
+++ b/src/gallium/drivers/ilo/core/ilo_state_shader.h
@@ -0,0 +1,256 @@
+/*
+ * Mesa 3-D graphics library
+ *
+ * Copyright (C) 2015 LunarG, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ * Chia-I Wu <[email protected]>
+ */
+
+#ifndef ILO_STATE_SHADER_H
+#define ILO_STATE_SHADER_H
+
+#include "genhw/genhw.h"
+
+#include "ilo_core.h"
+#include "ilo_dev.h"
+
+/**
+ * Kernel information.
+ */
+struct ilo_state_shader_kernel_info {
+ /* usually 0 unless the shader has multiple kernels */
+ uint32_t offset;
+
+ uint8_t grf_start;
+ uint8_t pcb_attr_count;
+
+ uint32_t scratch_size;
+};
+
+/**
+ * Shader resources.
+ */
+struct ilo_state_shader_resource_info {
+ /* for prefetches */
+ uint8_t sampler_count;
+ uint8_t surface_count;
+
+ bool has_uav;
+};
+
+/**
+ * URB inputs/outputs.
+ */
+struct ilo_state_shader_urb_info {
+ uint8_t cv_input_attr_count;
+
+ uint8_t read_base;
+ uint8_t read_count;
+
+ uint8_t output_attr_count;
+
+ uint8_t user_cull_enables;
+ uint8_t user_clip_enables;
+};
+
+struct ilo_state_vs_info {
+ struct ilo_state_shader_kernel_info kernel;
+ struct ilo_state_shader_resource_info resource;
+ struct ilo_state_shader_urb_info urb;
+
+ bool dispatch_enable;
+ bool stats_enable;
+};
+
+struct ilo_state_hs_info {
+ struct ilo_state_shader_kernel_info kernel;
+ struct ilo_state_shader_resource_info resource;
+ struct ilo_state_shader_urb_info urb;
+
+ bool dispatch_enable;
+ bool stats_enable;
+};
+
+struct ilo_state_ds_info {
+ struct ilo_state_shader_kernel_info kernel;
+ struct ilo_state_shader_resource_info resource;
+ struct ilo_state_shader_urb_info urb;
+
+ bool dispatch_enable;
+ bool stats_enable;
+};
+
+/**
+ * Stream output. Must be consistent with ilo_state_sol_info.
+ */
+struct ilo_state_gs_sol_info {
+ bool sol_enable;
+ bool stats_enable;
+ bool render_disable;
+
+ uint16_t svbi_post_inc;
+
+ enum gen_reorder_mode tristrip_reorder;
+};
+
+struct ilo_state_gs_info {
+ struct ilo_state_shader_kernel_info kernel;
+ struct ilo_state_shader_resource_info resource;
+ struct ilo_state_shader_urb_info urb;
+
+ struct ilo_state_gs_sol_info sol;
+
+ bool dispatch_enable;
+ bool stats_enable;
+};
+
+struct ilo_state_ps_io_info {
+ /* inputs */
+ enum gen_position_offset posoffset;
+ uint8_t attr_count;
+ bool use_z;
+ bool use_w;
+ bool use_coverage_mask;
+
+ /* outputs */
+ enum gen_pscdepth_mode pscdepth;
+ bool has_rt_write;
+ bool write_pixel_mask;
+ bool write_omask;
+};
+
+struct ilo_state_ps_params_info {
+ /* compatibility with raster states */
+ uint32_t sample_mask;
+ bool earlyz_control_psexec;
+
+ /* compatibility with cc states */
+ bool alpha_may_kill;
+ bool dual_source_blending;
+ bool has_writeable_rt;
+};
+
+struct ilo_state_ps_info {
+ struct ilo_state_shader_kernel_info kernel_8;
+ struct ilo_state_shader_kernel_info kernel_16;
+ struct ilo_state_shader_kernel_info kernel_32;
+ struct ilo_state_shader_resource_info resource;
+
+ struct ilo_state_ps_io_info io;
+ struct ilo_state_ps_params_info params;
+
+ /* bitmask of GEN6_PS_DISPATCH_x */
+ uint8_t valid_kernels;
+ bool per_sample_dispatch;
+ bool sample_count_one;
+ bool cv_per_sample_interp;
+ bool cv_has_earlyz_op;
+
+ bool rt_clear_enable;
+ bool rt_resolve_enable;
+
+ bool cv_has_depth_buffer;
+};
+
+struct ilo_state_vs {
+ uint32_t vs[5];
+};
+
+struct ilo_state_hs {
+ uint32_t hs[4];
+};
+
+struct ilo_state_ds {
+ uint32_t te[3];
+ uint32_t ds[5];
+};
+
+struct ilo_state_gs {
+ uint32_t gs[5];
+};
+
+struct ilo_state_ps {
+ uint32_t ps[8];
+
+ struct ilo_state_ps_dispatch_conds {
+ bool ps_valid;
+
+ bool has_rt_write;
+ bool write_odepth;
+ bool write_ostencil;
+ bool has_uav_write;
+ bool ps_may_kill;
+ } conds;
+};
+
+bool
+ilo_state_vs_init(struct ilo_state_vs *vs,
+ const struct ilo_dev *dev,
+ const struct ilo_state_vs_info *info);
+
+bool
+ilo_state_vs_init_disabled(struct ilo_state_vs *vs,
+ const struct ilo_dev *dev);
+
+bool
+ilo_state_hs_init(struct ilo_state_hs *hs,
+ const struct ilo_dev *dev,
+ const struct ilo_state_hs_info *info);
+
+bool
+ilo_state_hs_init_disabled(struct ilo_state_hs *hs,
+ const struct ilo_dev *dev);
+
+
+bool
+ilo_state_ds_init(struct ilo_state_ds *ds,
+ const struct ilo_dev *dev,
+ const struct ilo_state_ds_info *info);
+
+bool
+ilo_state_ds_init_disabled(struct ilo_state_ds *ds,
+ const struct ilo_dev *dev);
+
+bool
+ilo_state_gs_init(struct ilo_state_gs *gs,
+ const struct ilo_dev *dev,
+ const struct ilo_state_gs_info *info);
+
+bool
+ilo_state_gs_init_disabled(struct ilo_state_gs *gs,
+ const struct ilo_dev *dev);
+
+bool
+ilo_state_ps_init(struct ilo_state_ps *ps,
+ const struct ilo_dev *dev,
+ const struct ilo_state_ps_info *info);
+
+bool
+ilo_state_ps_init_disabled(struct ilo_state_ps *ps,
+ const struct ilo_dev *dev);
+
+bool
+ilo_state_ps_set_params(struct ilo_state_ps *ps,
+ const struct ilo_dev *dev,
+ const struct ilo_state_ps_params_info *params);
+
+#endif /* ILO_STATE_SHADER_H */
diff --git a/src/gallium/drivers/ilo/core/ilo_state_shader_ps.c b/src/gallium/drivers/ilo/core/ilo_state_shader_ps.c
new file mode 100644
index 00000000000..f4d801e9b56
--- /dev/null
+++ b/src/gallium/drivers/ilo/core/ilo_state_shader_ps.c
@@ -0,0 +1,771 @@
+/*
+ * Mesa 3-D graphics library
+ *
+ * Copyright (C) 2012-2015 LunarG, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ * Chia-I Wu <[email protected]>
+ */
+
+#include "ilo_debug.h"
+#include "ilo_state_shader.h"
+
+struct pixel_ff {
+ uint8_t dispatch_modes;
+
+ uint32_t kernel_offsets[3];
+ uint8_t grf_starts[3];
+ bool pcb_enable;
+ uint8_t scratch_space;
+
+ uint8_t sampler_count;
+ uint8_t surface_count;
+ bool has_uav;
+
+ uint16_t thread_count;
+
+ struct ilo_state_ps_dispatch_conds conds;
+
+ bool kill_pixel;
+ bool dispatch_enable;
+ bool dual_source_blending;
+ uint32_t sample_mask;
+};
+
+static bool
+ps_kernel_validate_gen6(const struct ilo_dev *dev,
+ const struct ilo_state_shader_kernel_info *kernel)
+{
+ /* "Dispatch GRF Start Register for Constant/Setup Data" is U7 */
+ const uint8_t max_grf_start = 128;
+ /*
+ * From the Sandy Bridge PRM, volume 2 part 1, page 271:
+ *
+ * "(Per-Thread Scratch Space)
+ * Range [0,11] indicating [1k bytes, 2M bytes] in powers of two"
+ */
+ const uint32_t max_scratch_size = 2 * 1024 * 1024;
+
+ ILO_DEV_ASSERT(dev, 6, 8);
+
+ /* "Kernel Start Pointer" is 64-byte aligned */
+ assert(kernel->offset % 64 == 0);
+
+ assert(kernel->grf_start < max_grf_start);
+ assert(kernel->scratch_size <= max_scratch_size);
+
+ return true;
+}
+
+static bool
+ps_validate_gen6(const struct ilo_dev *dev,
+ const struct ilo_state_ps_info *info)
+{
+ const struct ilo_state_shader_kernel_info *kernel_8 = &info->kernel_8;
+ const struct ilo_state_shader_kernel_info *kernel_16 = &info->kernel_16;
+ const struct ilo_state_shader_kernel_info *kernel_32 = &info->kernel_32;
+ const struct ilo_state_ps_io_info *io = &info->io;
+
+ ILO_DEV_ASSERT(dev, 6, 8);
+
+ if (!ps_kernel_validate_gen6(dev, kernel_8) ||
+ !ps_kernel_validate_gen6(dev, kernel_16) ||
+ !ps_kernel_validate_gen6(dev, kernel_32))
+ return false;
+
+ /* unsupported on Gen6 */
+ if (ilo_dev_gen(dev) == ILO_GEN(6))
+ assert(!io->use_coverage_mask);
+
+ /*
+ * From the Sandy Bridge PRM, volume 2 part 1, page 275:
+ *
+ * "If a NULL Depth Buffer is selected, the Pixel Shader Computed Depth
+ * field must be set to disabled."
+ */
+ if (ilo_dev_gen(dev) == ILO_GEN(6) && io->pscdepth != GEN7_PSCDEPTH_OFF)
+ assert(info->cv_has_depth_buffer);
+
+ if (!info->per_sample_dispatch) {
+ /*
+ * From the Sandy Bridge PRM, volume 2 part 1, page 281:
+ *
+ * "MSDISPMODE_PERSAMPLE is required in order to select
+ * POSOFFSET_SAMPLE."
+ */
+ assert(io->posoffset != GEN6_POSOFFSET_SAMPLE);
+
+ /*
+ * From the Sandy Bridge PRM, volume 2 part 1, page 282:
+ *
+ * "MSDISPMODE_PERSAMPLE is required in order to select
+ * INTERP_SAMPLE."
+ *
+ * From the Sandy Bridge PRM, volume 2 part 1, page 283:
+ *
+ * "MSDISPMODE_PERSAMPLE is required in order to select Perspective
+ * Sample or Non-perspective Sample barycentric coordinates."
+ */
+ assert(!info->cv_per_sample_interp);
+ }
+
+ /*
+ *
+ * From the Sandy Bridge PRM, volume 2 part 1, page 314:
+ *
+ * "Pixel Shader Dispatch, Alpha... must all be disabled."
+ *
+ * Simply disallow any valid kernel when there is early-z op. Also, when
+ * there is no valid kernel, io should be zeroed.
+ */
+ if (info->valid_kernels)
+ assert(!info->cv_has_earlyz_op);
+ else
+ assert(ilo_is_zeroed(io, sizeof(*io)));
+
+ return true;
+}
+
+static uint8_t
+ps_get_gen6_dispatch_modes(const struct ilo_dev *dev,
+ const struct ilo_state_ps_info *info)
+{
+ const struct ilo_state_ps_io_info *io = &info->io;
+ uint8_t dispatch_modes = info->valid_kernels;
+
+ ILO_DEV_ASSERT(dev, 6, 8);
+
+ if (!dispatch_modes)
+ return 0;
+
+ /*
+ * From the Sandy Bridge PRM, volume 2 part 1, page 334:
+ *
+ * "Not valid on [DevSNB] if 4x PERPIXEL mode with pixel shader
+ * computed depth."
+ *
+ * "Valid on all products, except when in non-1x PERSAMPLE mode
+ * (applies to [DevSNB+] only)"
+ *
+ * From the Sandy Bridge PRM, volume 4 part 1, page 239:
+ *
+ * "[DevSNB]: When Pixel Shader outputs oDepth and PS invocation mode
+ * is PERPIXEL, Message Type for Render Target Write must be SIMD8.
+ *
+ * Errata: [DevSNB+]: When Pixel Shader outputs oMask, this message
+ * type is not supported: SIMD8 (including SIMD8_DUALSRC_xx)."
+ *
+ * It is really hard to follow what combinations are valid on what
+ * platforms. Judging from the restrictions on RT write messages on Gen6,
+ * oDepth and oMask related issues should be Gen6-specific. PERSAMPLE
+ * issue should be universal, and disallows multiple dispatch modes.
+ */
+ if (ilo_dev_gen(dev) == ILO_GEN(6)) {
+ if (io->pscdepth != GEN7_PSCDEPTH_OFF && !info->per_sample_dispatch)
+ dispatch_modes &= GEN6_PS_DISPATCH_8;
+ if (io->write_omask)
+ dispatch_modes &= ~GEN6_PS_DISPATCH_8;
+ }
+ if (info->per_sample_dispatch && !info->sample_count_one) {
+ /* prefer 32 over 16 over 8 */
+ if (dispatch_modes & GEN6_PS_DISPATCH_32)
+ dispatch_modes &= GEN6_PS_DISPATCH_32;
+ else if (dispatch_modes & GEN6_PS_DISPATCH_16)
+ dispatch_modes &= GEN6_PS_DISPATCH_16;
+ else
+ dispatch_modes &= GEN6_PS_DISPATCH_8;
+ }
+
+ /*
+ * From the Broadwell PRM, volume 2b, page 149:
+ *
+ * "When Render Target Fast Clear Enable is ENABLED or Render Target
+ * Resolve Type = RESOLVE_PARTIAL or RESOLVE_FULL, this bit (8 Pixel
+ * Dispatch or Dual-8 Pixel Dispatch Enable) must be DISABLED."
+ */
+ if (info->rt_clear_enable || info->rt_resolve_enable)
+ dispatch_modes &= ~GEN6_PS_DISPATCH_8;
+
+ assert(dispatch_modes);
+
+ return dispatch_modes;
+}
+
+static uint16_t
+ps_get_gen6_thread_count(const struct ilo_dev *dev,
+ const struct ilo_state_ps_info *info)
+{
+ uint16_t thread_count;
+
+ ILO_DEV_ASSERT(dev, 6, 8);
+
+ /* Maximum Number of Threads of 3DSTATE_PS */
+ switch (ilo_dev_gen(dev)) {
+ case ILO_GEN(8):
+ /* scaled automatically */
+ thread_count = 64 - 1;
+ break;
+ case ILO_GEN(7.5):
+ thread_count = (dev->gt == 3) ? 408 :
+ (dev->gt == 2) ? 204 : 102;
+ break;
+ case ILO_GEN(7):
+ thread_count = (dev->gt == 2) ? 172 : 48;
+ break;
+ case ILO_GEN(6):
+ default:
+ /* from the classic driver instead of the PRM */
+ thread_count = (dev->gt == 2) ? 80 : 40;
+ break;
+ }
+
+ return thread_count - 1;
+}
+
+static bool
+ps_params_get_gen6_kill_pixel(const struct ilo_dev *dev,
+ const struct ilo_state_ps_params_info *params,
+ const struct ilo_state_ps_dispatch_conds *conds)
+{
+ ILO_DEV_ASSERT(dev, 6, 8);
+
+ /*
+ * From the Sandy Bridge PRM, volume 2 part 1, page 275:
+ *
+ * "This bit (Pixel Shader Kill Pixel), if ENABLED, indicates that the
+ * PS kernel or color calculator has the ability to kill (discard)
+ * pixels or samples, other than due to depth or stencil testing.
+ * This bit is required to be ENABLED in the following situations:
+ *
+ * The API pixel shader program contains "killpix" or "discard"
+ * instructions, or other code in the pixel shader kernel that can
+ * cause the final pixel mask to differ from the pixel mask received
+ * on dispatch.
+ *
+ * A sampler with chroma key enabled with kill pixel mode is used by
+ * the pixel shader.
+ *
+ * Any render target has Alpha Test Enable or AlphaToCoverage Enable
+ * enabled.
+ *
+ * The pixel shader kernel generates and outputs oMask.
+ *
+ * Note: As ClipDistance clipping is fully supported in hardware and
+ * therefore not via PS instructions, there should be no need to
+ * ENABLE this bit due to ClipDistance clipping."
+ */
+ return (conds->ps_may_kill || params->alpha_may_kill);
+}
+
+static bool
+ps_params_get_gen6_dispatch_enable(const struct ilo_dev *dev,
+ const struct ilo_state_ps_params_info *params,
+ const struct ilo_state_ps_dispatch_conds *conds)
+{
+ /*
+ * We want to skip dispatching when EarlyZ suffices. The conditions that
+ * require dispatching are
+ *
+ * - PS writes RTs and RTs are writeable
+ * - PS changes depth value and depth test/write is enabled
+ * - PS changes stencil value and stencil test is enabled
+ * - PS writes UAVs
+ * - PS or CC kills pixels
+ * - EDSC is PSEXEC, and depth test/write or stencil test is enabled
+ */
+ bool dispatch_required =
+ ((conds->has_rt_write && params->has_writeable_rt) ||
+ conds->write_odepth ||
+ conds->write_ostencil ||
+ conds->has_uav_write ||
+ ps_params_get_gen6_kill_pixel(dev, params, conds) ||
+ params->earlyz_control_psexec);
+
+ ILO_DEV_ASSERT(dev, 6, 8);
+
+ /*
+ * From the Ivy Bridge PRM, volume 2 part 1, page 280:
+ *
+ * "If EDSC_PSEXEC mode is selected, Thread Dispatch Enable must be
+ * set."
+ */
+ if (ilo_dev_gen(dev) < ILO_GEN(8) && params->earlyz_control_psexec)
+ dispatch_required = true;
+
+ /* assert it is valid to dispatch */
+ if (dispatch_required)
+ assert(conds->ps_valid);
+
+ return dispatch_required;
+}
+
+static bool
+ps_get_gen6_ff_kernels(const struct ilo_dev *dev,
+ const struct ilo_state_ps_info *info,
+ struct pixel_ff *ff)
+{
+ const struct ilo_state_shader_kernel_info *kernel_8 = &info->kernel_8;
+ const struct ilo_state_shader_kernel_info *kernel_16 = &info->kernel_16;
+ const struct ilo_state_shader_kernel_info *kernel_32 = &info->kernel_32;
+ uint32_t scratch_size;
+
+ ILO_DEV_ASSERT(dev, 6, 8);
+
+ ff->dispatch_modes = ps_get_gen6_dispatch_modes(dev, info);
+
+ /* initialize kernel offsets and GRF starts */
+ if (util_is_power_of_two(ff->dispatch_modes)) {
+ if (ff->dispatch_modes & GEN6_PS_DISPATCH_8) {
+ ff->kernel_offsets[0] = kernel_8->offset;
+ ff->grf_starts[0] = kernel_8->grf_start;
+ } else if (ff->dispatch_modes & GEN6_PS_DISPATCH_16) {
+ ff->kernel_offsets[0] = kernel_16->offset;
+ ff->grf_starts[0] = kernel_16->grf_start;
+ } else if (ff->dispatch_modes & GEN6_PS_DISPATCH_32) {
+ ff->kernel_offsets[0] = kernel_32->offset;
+ ff->grf_starts[0] = kernel_32->grf_start;
+ }
+ } else {
+ ff->kernel_offsets[0] = kernel_8->offset;
+ ff->kernel_offsets[1] = kernel_32->offset;
+ ff->kernel_offsets[2] = kernel_16->offset;
+
+ ff->grf_starts[0] = kernel_8->grf_start;
+ ff->grf_starts[1] = kernel_32->grf_start;
+ ff->grf_starts[2] = kernel_16->grf_start;
+ }
+
+ /* we do not want to save it */
+ assert(ff->kernel_offsets[0] == 0);
+
+ ff->pcb_enable = (((ff->dispatch_modes & GEN6_PS_DISPATCH_8) &&
+ kernel_8->pcb_attr_count) ||
+ ((ff->dispatch_modes & GEN6_PS_DISPATCH_16) &&
+ kernel_16->pcb_attr_count) ||
+ ((ff->dispatch_modes & GEN6_PS_DISPATCH_32) &&
+ kernel_32->pcb_attr_count));
+
+ scratch_size = 0;
+ if ((ff->dispatch_modes & GEN6_PS_DISPATCH_8) &&
+ scratch_size < kernel_8->scratch_size)
+ scratch_size = kernel_8->scratch_size;
+ if ((ff->dispatch_modes & GEN6_PS_DISPATCH_16) &&
+ scratch_size < kernel_16->scratch_size)
+ scratch_size = kernel_16->scratch_size;
+ if ((ff->dispatch_modes & GEN6_PS_DISPATCH_32) &&
+ scratch_size < kernel_32->scratch_size)
+ scratch_size = kernel_32->scratch_size;
+
+ /* next power of two, starting from 1KB */
+ ff->scratch_space = (scratch_size > 1024) ?
+ (util_last_bit(scratch_size - 1) - 10): 0;
+
+ /* GPU hangs on Haswell if none of the dispatch mode bits is set */
+ if (ilo_dev_gen(dev) == ILO_GEN(7.5) && !ff->dispatch_modes)
+ ff->dispatch_modes |= GEN6_PS_DISPATCH_8;
+
+ return true;
+}
+
+static bool
+ps_get_gen6_ff(const struct ilo_dev *dev,
+ const struct ilo_state_ps_info *info,
+ struct pixel_ff *ff)
+{
+ const struct ilo_state_shader_resource_info *resource = &info->resource;
+ const struct ilo_state_ps_io_info *io = &info->io;
+ const struct ilo_state_ps_params_info *params = &info->params;
+
+ ILO_DEV_ASSERT(dev, 6, 8);
+
+ memset(ff, 0, sizeof(*ff));
+
+ if (!ps_validate_gen6(dev, info) || !ps_get_gen6_ff_kernels(dev, info, ff))
+ return false;
+
+ ff->sampler_count = (resource->sampler_count <= 12) ?
+ (resource->sampler_count + 3) / 4 : 4;
+ ff->surface_count = resource->surface_count;
+ ff->has_uav = resource->has_uav;
+
+ ff->thread_count = ps_get_gen6_thread_count(dev, info);
+
+ ff->conds.ps_valid = (info->valid_kernels != 0x0);
+ ff->conds.has_rt_write = io->has_rt_write;
+ ff->conds.write_odepth = (io->pscdepth != GEN7_PSCDEPTH_OFF);
+ ff->conds.write_ostencil = false;
+ ff->conds.has_uav_write = resource->has_uav;
+ ff->conds.ps_may_kill = (io->write_pixel_mask || io->write_omask);
+
+ ff->kill_pixel = ps_params_get_gen6_kill_pixel(dev, params, &ff->conds);
+ ff->dispatch_enable =
+ ps_params_get_gen6_dispatch_enable(dev, params, &ff->conds);
+ ff->dual_source_blending = params->dual_source_blending;
+ ff->sample_mask = params->sample_mask;
+
+ return true;
+}
+
+static bool
+ps_set_gen6_3dstate_wm(struct ilo_state_ps *ps,
+ const struct ilo_dev *dev,
+ const struct ilo_state_ps_info *info,
+ const struct pixel_ff *ff)
+{
+ const struct ilo_state_ps_io_info *io = &info->io;
+ uint32_t dw2, dw3, dw4, dw5, dw6;
+
+ ILO_DEV_ASSERT(dev, 6, 6);
+
+ dw2 = ff->sampler_count << GEN6_THREADDISP_SAMPLER_COUNT__SHIFT |
+ ff->surface_count << GEN6_THREADDISP_BINDING_TABLE_SIZE__SHIFT;
+
+ if (false)
+ dw2 |= GEN6_THREADDISP_FP_MODE_ALT;
+
+ dw3 = ff->scratch_space << GEN6_THREADSCRATCH_SPACE_PER_THREAD__SHIFT;
+
+ dw4 = ff->grf_starts[0] << GEN6_WM_DW4_URB_GRF_START0__SHIFT |
+ ff->grf_starts[1] << GEN6_WM_DW4_URB_GRF_START1__SHIFT |
+ ff->grf_starts[2] << GEN6_WM_DW4_URB_GRF_START2__SHIFT;
+
+ dw5 = ff->thread_count << GEN6_WM_DW5_MAX_THREADS__SHIFT |
+ ff->dispatch_modes << GEN6_WM_DW5_PS_DISPATCH_MODE__SHIFT;
+
+ if (ff->kill_pixel)
+ dw5 |= GEN6_WM_DW5_PS_KILL_PIXEL;
+
+ if (io->pscdepth != GEN7_PSCDEPTH_OFF)
+ dw5 |= GEN6_WM_DW5_PS_COMPUTE_DEPTH;
+ if (io->use_z)
+ dw5 |= GEN6_WM_DW5_PS_USE_DEPTH;
+
+ if (ff->dispatch_enable)
+ dw5 |= GEN6_WM_DW5_PS_DISPATCH_ENABLE;
+
+ if (io->write_omask)
+ dw5 |= GEN6_WM_DW5_PS_COMPUTE_OMASK;
+ if (io->use_w)
+ dw5 |= GEN6_WM_DW5_PS_USE_W;
+
+ if (ff->dual_source_blending)
+ dw5 |= GEN6_WM_DW5_PS_DUAL_SOURCE_BLEND;
+
+ dw6 = io->attr_count << GEN6_WM_DW6_SF_ATTR_COUNT__SHIFT |
+ io->posoffset << GEN6_WM_DW6_PS_POSOFFSET__SHIFT;
+
+ dw6 |= (info->per_sample_dispatch) ?
+ GEN6_WM_DW6_MSDISPMODE_PERSAMPLE : GEN6_WM_DW6_MSDISPMODE_PERPIXEL;
+
+ STATIC_ASSERT(ARRAY_SIZE(ps->ps) >= 7);
+ ps->ps[0] = dw2;
+ ps->ps[1] = dw3;
+ ps->ps[2] = dw4;
+ ps->ps[3] = dw5;
+ ps->ps[4] = dw6;
+ ps->ps[5] = ff->kernel_offsets[1];
+ ps->ps[6] = ff->kernel_offsets[2];
+
+ return true;
+}
+
+static bool
+ps_set_gen7_3dstate_wm(struct ilo_state_ps *ps,
+ const struct ilo_dev *dev,
+ const struct ilo_state_ps_info *info,
+ const struct pixel_ff *ff)
+{
+ const struct ilo_state_ps_io_info *io = &info->io;
+ uint32_t dw1, dw2;
+
+ ILO_DEV_ASSERT(dev, 7, 7.5);
+
+ dw1 = io->pscdepth << GEN7_WM_DW1_PSCDEPTH__SHIFT;
+
+ if (ff->dispatch_enable)
+ dw1 |= GEN7_WM_DW1_PS_DISPATCH_ENABLE;
+ if (ff->kill_pixel)
+ dw1 |= GEN7_WM_DW1_PS_KILL_PIXEL;
+
+ if (io->use_z)
+ dw1 |= GEN7_WM_DW1_PS_USE_DEPTH;
+ if (io->use_w)
+ dw1 |= GEN7_WM_DW1_PS_USE_W;
+ if (io->use_coverage_mask)
+ dw1 |= GEN7_WM_DW1_PS_USE_COVERAGE_MASK;
+
+ dw2 = (info->per_sample_dispatch) ?
+ GEN7_WM_DW2_MSDISPMODE_PERSAMPLE : GEN7_WM_DW2_MSDISPMODE_PERPIXEL;
+
+ STATIC_ASSERT(ARRAY_SIZE(ps->ps) >= 2);
+ ps->ps[0] = dw1;
+ ps->ps[1] = dw2;
+
+ return true;
+}
+
+static bool
+ps_set_gen7_3DSTATE_PS(struct ilo_state_ps *ps,
+ const struct ilo_dev *dev,
+ const struct ilo_state_ps_info *info,
+ const struct pixel_ff *ff)
+{
+ const struct ilo_state_ps_io_info *io = &info->io;
+ uint32_t dw2, dw3, dw4, dw5;
+
+ ILO_DEV_ASSERT(dev, 7, 7.5);
+
+ dw2 = ff->sampler_count << GEN6_THREADDISP_SAMPLER_COUNT__SHIFT |
+ ff->surface_count << GEN6_THREADDISP_BINDING_TABLE_SIZE__SHIFT;
+
+ if (false)
+ dw2 |= GEN6_THREADDISP_FP_MODE_ALT;
+
+ dw3 = ff->scratch_space << GEN6_THREADSCRATCH_SPACE_PER_THREAD__SHIFT;
+
+ dw4 = io->posoffset << GEN7_PS_DW4_POSOFFSET__SHIFT |
+ ff->dispatch_modes << GEN7_PS_DW4_DISPATCH_MODE__SHIFT;
+
+ if (ilo_dev_gen(dev) == ILO_GEN(7.5)) {
+ dw4 |= ff->thread_count << GEN75_PS_DW4_MAX_THREADS__SHIFT |
+ (ff->sample_mask & 0xff) << GEN75_PS_DW4_SAMPLE_MASK__SHIFT;
+ } else {
+ dw4 |= ff->thread_count << GEN7_PS_DW4_MAX_THREADS__SHIFT;
+ }
+
+ if (ff->pcb_enable)
+ dw4 |= GEN7_PS_DW4_PUSH_CONSTANT_ENABLE;
+ if (io->attr_count)
+ dw4 |= GEN7_PS_DW4_ATTR_ENABLE;
+ if (io->write_omask)
+ dw4 |= GEN7_PS_DW4_COMPUTE_OMASK;
+ if (info->rt_clear_enable)
+ dw4 |= GEN7_PS_DW4_RT_FAST_CLEAR;
+ if (ff->dual_source_blending)
+ dw4 |= GEN7_PS_DW4_DUAL_SOURCE_BLEND;
+ if (info->rt_resolve_enable)
+ dw4 |= GEN7_PS_DW4_RT_RESOLVE;
+ if (ilo_dev_gen(dev) >= ILO_GEN(7.5) && ff->has_uav)
+ dw4 |= GEN75_PS_DW4_ACCESS_UAV;
+
+ dw5 = ff->grf_starts[0] << GEN7_PS_DW5_URB_GRF_START0__SHIFT |
+ ff->grf_starts[1] << GEN7_PS_DW5_URB_GRF_START1__SHIFT |
+ ff->grf_starts[2] << GEN7_PS_DW5_URB_GRF_START2__SHIFT;
+
+ STATIC_ASSERT(ARRAY_SIZE(ps->ps) >= 8);
+ ps->ps[2] = dw2;
+ ps->ps[3] = dw3;
+ ps->ps[4] = dw4;
+ ps->ps[5] = dw5;
+ ps->ps[6] = ff->kernel_offsets[1];
+ ps->ps[7] = ff->kernel_offsets[2];
+
+ return true;
+}
+
+static bool
+ps_set_gen8_3DSTATE_PS(struct ilo_state_ps *ps,
+ const struct ilo_dev *dev,
+ const struct ilo_state_ps_info *info,
+ const struct pixel_ff *ff)
+{
+ const struct ilo_state_ps_io_info *io = &info->io;
+ uint32_t dw3, dw4, dw6, dw7;
+
+ ILO_DEV_ASSERT(dev, 8, 8);
+
+ dw3 = ff->sampler_count << GEN6_THREADDISP_SAMPLER_COUNT__SHIFT |
+ ff->surface_count << GEN6_THREADDISP_BINDING_TABLE_SIZE__SHIFT;
+
+ if (false)
+ dw3 |= GEN6_THREADDISP_FP_MODE_ALT;
+
+ dw4 = ff->scratch_space << GEN6_THREADSCRATCH_SPACE_PER_THREAD__SHIFT;
+
+ dw6 = ff->thread_count << GEN8_PS_DW6_MAX_THREADS__SHIFT |
+ io->posoffset << GEN8_PS_DW6_POSOFFSET__SHIFT |
+ ff->dispatch_modes << GEN8_PS_DW6_DISPATCH_MODE__SHIFT;
+
+ if (ff->pcb_enable)
+ dw6 |= GEN8_PS_DW6_PUSH_CONSTANT_ENABLE;
+
+ if (info->rt_clear_enable)
+ dw6 |= GEN8_PS_DW6_RT_FAST_CLEAR;
+ if (info->rt_resolve_enable)
+ dw6 |= GEN8_PS_DW6_RT_RESOLVE;
+
+ dw7 = ff->grf_starts[0] << GEN8_PS_DW7_URB_GRF_START0__SHIFT |
+ ff->grf_starts[1] << GEN8_PS_DW7_URB_GRF_START1__SHIFT |
+ ff->grf_starts[2] << GEN8_PS_DW7_URB_GRF_START2__SHIFT;
+
+ STATIC_ASSERT(ARRAY_SIZE(ps->ps) >= 6);
+ ps->ps[0] = dw3;
+ ps->ps[1] = dw4;
+ ps->ps[2] = dw6;
+ ps->ps[3] = dw7;
+ ps->ps[4] = ff->kernel_offsets[1];
+ ps->ps[5] = ff->kernel_offsets[2];
+
+ return true;
+}
+
+static bool
+ps_set_gen8_3DSTATE_PS_EXTRA(struct ilo_state_ps *ps,
+ const struct ilo_dev *dev,
+ const struct ilo_state_ps_info *info,
+ const struct pixel_ff *ff)
+{
+ const struct ilo_state_ps_io_info *io = &info->io;
+ uint32_t dw1;
+
+ ILO_DEV_ASSERT(dev, 8, 8);
+
+ dw1 = io->pscdepth << GEN8_PSX_DW1_PSCDEPTH__SHIFT;
+
+ if (info->valid_kernels)
+ dw1 |= GEN8_PSX_DW1_VALID;
+ if (!io->has_rt_write)
+ dw1 |= GEN8_PSX_DW1_UAV_ONLY;
+ if (io->write_omask)
+ dw1 |= GEN8_PSX_DW1_COMPUTE_OMASK;
+ if (io->write_pixel_mask)
+ dw1 |= GEN8_PSX_DW1_KILL_PIXEL;
+
+ if (io->use_z)
+ dw1 |= GEN8_PSX_DW1_USE_DEPTH;
+ if (io->use_w)
+ dw1 |= GEN8_PSX_DW1_USE_W;
+ if (io->attr_count)
+ dw1 |= GEN8_PSX_DW1_ATTR_ENABLE;
+
+ if (info->per_sample_dispatch)
+ dw1 |= GEN8_PSX_DW1_PER_SAMPLE;
+ if (ff->has_uav)
+ dw1 |= GEN8_PSX_DW1_ACCESS_UAV;
+ if (io->use_coverage_mask)
+ dw1 |= GEN8_PSX_DW1_USE_COVERAGE_MASK;
+
+ /*
+ * From the Broadwell PRM, volume 2b, page 151:
+ *
+ * "When this bit (Pixel Shader Valid) clear the rest of this command
+ * should also be clear.
+ */
+ if (!info->valid_kernels)
+ dw1 = 0;
+
+ STATIC_ASSERT(ARRAY_SIZE(ps->ps) >= 5);
+ ps->ps[4] = dw1;
+
+ return true;
+}
+
+bool
+ilo_state_ps_init(struct ilo_state_ps *ps,
+ const struct ilo_dev *dev,
+ const struct ilo_state_ps_info *info)
+{
+ struct pixel_ff ff;
+ bool ret = true;
+
+ assert(ilo_is_zeroed(ps, sizeof(*ps)));
+
+ ret &= ps_get_gen6_ff(dev, info, &ff);
+
+ if (ilo_dev_gen(dev) >= ILO_GEN(8)) {
+ ret &= ps_set_gen8_3DSTATE_PS(ps, dev, info, &ff);
+ ret &= ps_set_gen8_3DSTATE_PS_EXTRA(ps, dev, info, &ff);
+ } else if (ilo_dev_gen(dev) >= ILO_GEN(7)) {
+ ret &= ps_set_gen7_3dstate_wm(ps, dev, info, &ff);
+ ret &= ps_set_gen7_3DSTATE_PS(ps, dev, info, &ff);
+ } else {
+ ret &= ps_set_gen6_3dstate_wm(ps, dev, info, &ff);
+ }
+
+ /* save conditions */
+ ps->conds = ff.conds;
+
+ assert(ret);
+
+ return ret;
+}
+
+bool
+ilo_state_ps_init_disabled(struct ilo_state_ps *ps,
+ const struct ilo_dev *dev)
+{
+ struct ilo_state_ps_info info;
+
+ memset(&info, 0, sizeof(info));
+
+ return ilo_state_ps_init(ps, dev, &info);
+}
+
+bool
+ilo_state_ps_set_params(struct ilo_state_ps *ps,
+ const struct ilo_dev *dev,
+ const struct ilo_state_ps_params_info *params)
+{
+ ILO_DEV_ASSERT(dev, 6, 8);
+
+ /* modify sample mask */
+ if (ilo_dev_gen(dev) == ILO_GEN(7.5)) {
+ ps->ps[4] = (ps->ps[4] & ~GEN75_PS_DW4_SAMPLE_MASK__MASK) |
+ (params->sample_mask & 0xff) << GEN75_PS_DW4_SAMPLE_MASK__SHIFT;
+ }
+
+ /* modify dispatch enable, pixel kill, and dual source blending */
+ if (ilo_dev_gen(dev) < ILO_GEN(8)) {
+ if (ilo_dev_gen(dev) >= ILO_GEN(7)) {
+ if (ps_params_get_gen6_dispatch_enable(dev, params, &ps->conds))
+ ps->ps[0] |= GEN7_WM_DW1_PS_DISPATCH_ENABLE;
+ else
+ ps->ps[0] &= ~GEN7_WM_DW1_PS_DISPATCH_ENABLE;
+
+ if (ps_params_get_gen6_kill_pixel(dev, params, &ps->conds))
+ ps->ps[0] |= GEN7_WM_DW1_PS_KILL_PIXEL;
+ else
+ ps->ps[0] &= ~GEN7_WM_DW1_PS_KILL_PIXEL;
+
+ if (params->dual_source_blending)
+ ps->ps[4] |= GEN7_PS_DW4_DUAL_SOURCE_BLEND;
+ else
+ ps->ps[4] &= ~GEN7_PS_DW4_DUAL_SOURCE_BLEND;
+ } else {
+ if (ps_params_get_gen6_dispatch_enable(dev, params, &ps->conds))
+ ps->ps[3] |= GEN6_WM_DW5_PS_DISPATCH_ENABLE;
+ else
+ ps->ps[3] &= ~GEN6_WM_DW5_PS_DISPATCH_ENABLE;
+
+ if (ps_params_get_gen6_kill_pixel(dev, params, &ps->conds))
+ ps->ps[3] |= GEN6_WM_DW5_PS_KILL_PIXEL;
+ else
+ ps->ps[3] &= ~GEN6_WM_DW5_PS_KILL_PIXEL;
+
+ if (params->dual_source_blending)
+ ps->ps[3] |= GEN6_WM_DW5_PS_DUAL_SOURCE_BLEND;
+ else
+ ps->ps[3] &= ~GEN6_WM_DW5_PS_DUAL_SOURCE_BLEND;
+ }
+ }
+
+ return true;
+}
diff --git a/src/gallium/drivers/ilo/core/ilo_state_sol.c b/src/gallium/drivers/ilo/core/ilo_state_sol.c
new file mode 100644
index 00000000000..38c0b719ab3
--- /dev/null
+++ b/src/gallium/drivers/ilo/core/ilo_state_sol.c
@@ -0,0 +1,464 @@
+/*
+ * Mesa 3-D graphics library
+ *
+ * Copyright (C) 2012-2015 LunarG, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ * Chia-I Wu <[email protected]>
+ */
+
+#include "ilo_debug.h"
+#include "ilo_buffer.h"
+#include "ilo_state_sol.h"
+
+static bool
+sol_stream_validate_gen7(const struct ilo_dev *dev,
+ const struct ilo_state_sol_stream_info *stream)
+{
+ uint8_t i;
+
+ ILO_DEV_ASSERT(dev, 7, 8);
+
+ assert(stream->vue_read_base + stream->vue_read_count <=
+ stream->cv_vue_attr_count);
+
+ /*
+ * From the Ivy Bridge PRM, volume 2 part 1, page 200:
+ *
+ * "(Stream 0 Vertex Read Offset)
+ * Format: U1 count of 256-bit units
+ *
+ * Specifies amount of data to skip over before reading back Stream 0
+ * vertex data. Must be zero if the GS is enabled and the Output
+ * Vertex Size field in 3DSTATE_GS is programmed to 0 (i.e., one 16B
+ * unit)."
+ *
+ * "(Stream 0 Vertex Read Length)
+ * Format: U5-1 count of 256-bit units
+ *
+ * Specifies amount of vertex data to read back for Stream 0 vertices,
+ * starting at the Stream 0 Vertex Read Offset location. Maximum
+ * readback is 17 256-bit units (34 128-bit vertex attributes). Read
+ * data past the end of the valid vertex data has undefined contents,
+ * and therefore shouldn't be used to source stream out data. Must be
+ * zero (i.e., read length = 256b) if the GS is enabled and the Output
+ * Vertex Size field in 3DSTATE_GS is programmed to 0 (i.e., one 16B
+ * unit)."
+ */
+ assert(stream->vue_read_base == 0 || stream->vue_read_base == 2);
+ assert(stream->vue_read_count <= 34);
+
+ assert(stream->decl_count <= ILO_STATE_SOL_MAX_DECL_COUNT);
+
+ for (i = 0; i < stream->decl_count; i++) {
+ const struct ilo_state_sol_decl_info *decl = &stream->decls[i];
+
+ assert(decl->is_hole || decl->attr < stream->vue_read_count);
+
+ /*
+ * From the Ivy Bridge PRM, volume 2 part 1, page 205:
+ *
+ * "There is only enough internal storage for the 128-bit vertex
+ * header and 32 128-bit vertex attributes."
+ */
+ assert(decl->attr < 33);
+
+ assert(decl->component_base < 4 &&
+ decl->component_base + decl->component_count <= 4);
+ assert(decl->buffer < ILO_STATE_SOL_MAX_BUFFER_COUNT);
+ }
+
+ return true;
+}
+
+static bool
+sol_validate_gen7(const struct ilo_dev *dev,
+ const struct ilo_state_sol_info *info)
+{
+ uint8_t i;
+
+ ILO_DEV_ASSERT(dev, 7, 8);
+
+ /*
+ * From the Ivy Bridge PRM, volume 2 part 1, page 198:
+ *
+ * "This bit (Render Stream Select) is used even if SO Function Enable
+ * is DISABLED."
+ *
+ * From the Haswell PRM, volume 2b, page 796:
+ *
+ * "SO Function Enable must also be ENABLED in order for thiis field
+ * (Render Stream Select) to select a stream for rendering. When SO
+ * Function Enable is DISABLED and Rendering Disable is cleared (i.e.,
+ * rendering is enabled), StreamID is ignored downstream of the SO
+ * stage, allowing any stream to be rendered."
+ *
+ * We want Gen7 behavior, but we have to require users to follow Gen7.5
+ * behavior: info->sol_enable must be set for info->render_stream to work.
+ */
+
+ for (i = 0; i < ARRAY_SIZE(info->streams); i++) {
+ if (!sol_stream_validate_gen7(dev, &info->streams[i]))
+ return false;
+ }
+
+ /*
+ * From the Ivy Bridge PRM, volume 2 part 1, page 208:
+ *
+ * "(Surface Pitch)
+ * [0,2048] Must be 0 or a multiple of 4 Bytes."
+ */
+ for (i = 0; i < ARRAY_SIZE(info->buffer_strides); i++) {
+ assert(info->buffer_strides[i] <= 2048 &&
+ info->buffer_strides[i] % 4 == 0);
+ }
+
+ return true;
+}
+
+static bool
+sol_set_gen7_3DSTATE_STREAMOUT(struct ilo_state_sol *sol,
+ const struct ilo_dev *dev,
+ const struct ilo_state_sol_info *info)
+{
+ struct {
+ uint8_t offset;
+ uint8_t len;
+ } vue_read[ILO_STATE_SOL_MAX_STREAM_COUNT];
+ uint8_t i;
+ uint32_t dw1, dw2;
+
+ ILO_DEV_ASSERT(dev, 7, 8);
+
+ if (!sol_validate_gen7(dev, info))
+ return false;
+
+ for (i = 0; i < ARRAY_SIZE(info->streams); i++) {
+ const struct ilo_state_sol_stream_info *stream = &info->streams[i];
+
+ vue_read[i].offset = stream->vue_read_base / 2;
+ /*
+ * In pairs minus 1. URB entries are aligned to 512-bits. There is no
+ * need to worry about reading past entries.
+ */
+ vue_read[i].len = (stream->vue_read_count + 1) / 2;
+ if (vue_read[i].len)
+ vue_read[i].len--;
+ }
+
+ dw1 = info->render_stream << GEN7_SO_DW1_RENDER_STREAM_SELECT__SHIFT |
+ info->tristrip_reorder << GEN7_SO_DW1_REORDER_MODE__SHIFT;
+
+ if (info->sol_enable)
+ dw1 |= GEN7_SO_DW1_SO_ENABLE;
+
+ if (info->render_disable)
+ dw1 |= GEN7_SO_DW1_RENDER_DISABLE;
+
+ if (info->stats_enable)
+ dw1 |= GEN7_SO_DW1_STATISTICS;
+
+ if (ilo_dev_gen(dev) < ILO_GEN(8)) {
+ const uint8_t buffer_enables = ((bool) info->buffer_strides[3]) << 3 |
+ ((bool) info->buffer_strides[2]) << 2 |
+ ((bool) info->buffer_strides[1]) << 1 |
+ ((bool) info->buffer_strides[0]);
+ dw1 |= buffer_enables << GEN7_SO_DW1_BUFFER_ENABLES__SHIFT;
+ }
+
+ dw2 = vue_read[3].offset << GEN7_SO_DW2_STREAM3_READ_OFFSET__SHIFT |
+ vue_read[3].len << GEN7_SO_DW2_STREAM3_READ_LEN__SHIFT |
+ vue_read[2].offset << GEN7_SO_DW2_STREAM2_READ_OFFSET__SHIFT |
+ vue_read[2].len << GEN7_SO_DW2_STREAM2_READ_LEN__SHIFT |
+ vue_read[1].offset << GEN7_SO_DW2_STREAM1_READ_OFFSET__SHIFT |
+ vue_read[1].len << GEN7_SO_DW2_STREAM1_READ_LEN__SHIFT |
+ vue_read[0].offset << GEN7_SO_DW2_STREAM0_READ_OFFSET__SHIFT |
+ vue_read[0].len << GEN7_SO_DW2_STREAM0_READ_LEN__SHIFT;
+
+ STATIC_ASSERT(ARRAY_SIZE(sol->streamout) >= 2);
+ sol->streamout[0] = dw1;
+ sol->streamout[1] = dw2;
+
+ memcpy(sol->strides, info->buffer_strides, sizeof(sol->strides));
+
+ return true;
+}
+
+static bool
+sol_set_gen7_3DSTATE_SO_DECL_LIST(struct ilo_state_sol *sol,
+ const struct ilo_dev *dev,
+ const struct ilo_state_sol_info *info,
+ uint8_t max_decl_count)
+{
+ uint64_t decl_list[ILO_STATE_SOL_MAX_DECL_COUNT];
+ uint8_t decl_counts[ILO_STATE_SOL_MAX_STREAM_COUNT];
+ uint8_t buffer_selects[ILO_STATE_SOL_MAX_STREAM_COUNT];
+ uint32_t dw1, dw2;
+ uint8_t i, j;
+
+ ILO_DEV_ASSERT(dev, 7, 8);
+
+ memset(decl_list, 0, sizeof(decl_list[0]) * max_decl_count);
+
+ for (i = 0; i < ARRAY_SIZE(info->streams); i++) {
+ const struct ilo_state_sol_stream_info *stream = &info->streams[i];
+
+ assert(stream->decl_count <= max_decl_count);
+ decl_counts[i] = stream->decl_count;
+ buffer_selects[i] = 0;
+
+ for (j = 0; j < stream->decl_count; j++) {
+ const struct ilo_state_sol_decl_info *decl = &stream->decls[j];
+ const uint8_t mask = ((1 << decl->component_count) - 1) <<
+ decl->component_base;
+ uint16_t val;
+
+ val = decl->buffer << GEN7_SO_DECL_OUTPUT_SLOT__SHIFT |
+ mask << GEN7_SO_DECL_COMPONENT_MASK__SHIFT;
+
+ if (decl->is_hole)
+ val |= GEN7_SO_DECL_HOLE_FLAG;
+ else
+ val |= decl->attr << GEN7_SO_DECL_REG_INDEX__SHIFT;
+
+ decl_list[j] |= (uint64_t) val << (16 * i);
+ buffer_selects[i] |= 1 << decl->buffer;
+ }
+ }
+
+ dw1 = buffer_selects[3] << GEN7_SO_DECL_DW1_STREAM3_BUFFER_SELECTS__SHIFT |
+ buffer_selects[2] << GEN7_SO_DECL_DW1_STREAM2_BUFFER_SELECTS__SHIFT |
+ buffer_selects[1] << GEN7_SO_DECL_DW1_STREAM1_BUFFER_SELECTS__SHIFT |
+ buffer_selects[0] << GEN7_SO_DECL_DW1_STREAM0_BUFFER_SELECTS__SHIFT;
+ dw2 = decl_counts[3] << GEN7_SO_DECL_DW2_STREAM3_ENTRY_COUNT__SHIFT |
+ decl_counts[2] << GEN7_SO_DECL_DW2_STREAM2_ENTRY_COUNT__SHIFT |
+ decl_counts[1] << GEN7_SO_DECL_DW2_STREAM1_ENTRY_COUNT__SHIFT |
+ decl_counts[0] << GEN7_SO_DECL_DW2_STREAM0_ENTRY_COUNT__SHIFT;
+
+ STATIC_ASSERT(ARRAY_SIZE(sol->so_decl) >= 2);
+ sol->so_decl[0] = dw1;
+ sol->so_decl[1] = dw2;
+
+ STATIC_ASSERT(ARRAY_SIZE(sol->decl[0]) == 2);
+ memcpy(sol->decl, decl_list, sizeof(sol->decl[0]) * max_decl_count);
+ sol->decl_count = max_decl_count;
+
+ return true;
+}
+
+static bool
+sol_buffer_validate_gen7(const struct ilo_dev *dev,
+ const struct ilo_state_sol_buffer_info *info)
+{
+ ILO_DEV_ASSERT(dev, 7, 8);
+
+ if (info->buf)
+ assert(info->offset < info->buf->bo_size && info->size);
+
+ /*
+ * From the Ivy Bridge PRM, volume 2 part 1, page 208:
+ *
+ * "(Surface Base Address) This field specifies the starting DWord
+ * address..."
+ */
+ assert(info->offset % 4 == 0);
+
+ /* Gen8+ only */
+ if (info->write_offset_load || info->write_offset_save)
+ assert(ilo_dev_gen(dev) >= ILO_GEN(8));
+
+ /*
+ * From the Broadwell PRM, volume 2b, page 206:
+ *
+ * "This field (Stream Offset) specifies the Offset in stream output
+ * buffer to start at, or whether to append to the end of an existing
+ * buffer. The Offset must be DWORD aligned."
+ */
+ if (info->write_offset_imm_enable) {
+ assert(info->write_offset_load);
+ assert(info->write_offset_imm % 4 == 0);
+ }
+
+ return true;
+}
+
+static uint32_t
+sol_buffer_get_gen6_size(const struct ilo_dev *dev,
+ const struct ilo_state_sol_buffer_info *info)
+{
+ uint32_t size;
+
+ ILO_DEV_ASSERT(dev, 6, 8);
+
+ if (!info->buf)
+ return 0;
+
+ size = (info->offset + info->size <= info->buf->bo_size) ? info->size :
+ info->buf->bo_size - info->offset;
+
+ /*
+ * From the Ivy Bridge PRM, volume 2 part 1, page 208:
+ *
+ * "(Surface End Address) This field specifies the ending DWord
+ * address..."
+ */
+ size &= ~3;
+
+ return size;
+}
+
+static bool
+sol_buffer_set_gen7_3dstate_so_buffer(struct ilo_state_sol_buffer *sb,
+ const struct ilo_dev *dev,
+ const struct ilo_state_sol_buffer_info *info)
+{
+ const uint32_t size = sol_buffer_get_gen6_size(dev, info);
+
+ ILO_DEV_ASSERT(dev, 7, 7.5);
+
+ if (!sol_buffer_validate_gen7(dev, info))
+ return false;
+
+ STATIC_ASSERT(ARRAY_SIZE(sb->so_buf) >= 2);
+ sb->so_buf[0] = info->offset;
+ sb->so_buf[1] = (size) ? info->offset + size : 0;
+
+ return true;
+}
+
+static bool
+sol_buffer_set_gen8_3dstate_so_buffer(struct ilo_state_sol_buffer *sb,
+ const struct ilo_dev *dev,
+ const struct ilo_state_sol_buffer_info *info)
+{
+ const uint32_t size = sol_buffer_get_gen6_size(dev, info);
+ uint32_t dw1;
+
+ ILO_DEV_ASSERT(dev, 8, 8);
+
+ if (!sol_buffer_validate_gen7(dev, info))
+ return false;
+
+ dw1 = 0;
+
+ if (info->buf)
+ dw1 |= GEN8_SO_BUF_DW1_ENABLE;
+ if (info->write_offset_load)
+ dw1 |= GEN8_SO_BUF_DW1_OFFSET_WRITE_ENABLE;
+ if (info->write_offset_save)
+ dw1 |= GEN8_SO_BUF_DW1_OFFSET_ENABLE;
+
+ STATIC_ASSERT(ARRAY_SIZE(sb->so_buf) >= 4);
+ sb->so_buf[0] = dw1;
+ sb->so_buf[1] = info->offset;
+
+ /*
+ * From the Broadwell PRM, volume 2b, page 205:
+ *
+ * "This field (Surface Size) specifies the size of buffer in number
+ * DWords minus 1 of the buffer in Graphics Memory."
+ */
+ sb->so_buf[2] = (size) ? size / 4 - 1 : 0;
+
+ /* load from imm or sb->write_offset_bo */
+ sb->so_buf[3] = (info->write_offset_imm_enable) ?
+ info->write_offset_imm : ~0u;
+
+ return true;
+}
+
+bool
+ilo_state_sol_init(struct ilo_state_sol *sol,
+ const struct ilo_dev *dev,
+ const struct ilo_state_sol_info *info)
+{
+ bool ret = true;
+
+ assert(ilo_is_zeroed(sol, sizeof(*sol)));
+ assert(ilo_is_zeroed(info->data, info->data_size));
+
+ if (ilo_dev_gen(dev) >= ILO_GEN(7)) {
+ uint8_t max_decl_count, i;
+
+ max_decl_count = info->streams[0].decl_count;
+ for (i = 1; i < ARRAY_SIZE(info->streams); i++) {
+ if (max_decl_count < info->streams[i].decl_count)
+ max_decl_count = info->streams[i].decl_count;
+ }
+
+ assert(ilo_state_sol_data_size(dev, max_decl_count) <= info->data_size);
+ sol->decl = (uint32_t (*)[2]) info->data;
+
+ ret &= sol_set_gen7_3DSTATE_STREAMOUT(sol, dev, info);
+ ret &= sol_set_gen7_3DSTATE_SO_DECL_LIST(sol, dev, info, max_decl_count);
+ }
+
+ assert(ret);
+
+ return ret;
+}
+
+bool
+ilo_state_sol_init_disabled(struct ilo_state_sol *sol,
+ const struct ilo_dev *dev,
+ bool render_disable)
+{
+ struct ilo_state_sol_info info;
+
+ memset(&info, 0, sizeof(info));
+ info.render_disable = render_disable;
+
+ return ilo_state_sol_init(sol, dev, &info);
+}
+
+bool
+ilo_state_sol_buffer_init(struct ilo_state_sol_buffer *sb,
+ const struct ilo_dev *dev,
+ const struct ilo_state_sol_buffer_info *info)
+{
+ bool ret = true;
+
+ assert(ilo_is_zeroed(sb, sizeof(*sb)));
+
+ if (ilo_dev_gen(dev) >= ILO_GEN(8))
+ ret &= sol_buffer_set_gen8_3dstate_so_buffer(sb, dev, info);
+ else
+ ret &= sol_buffer_set_gen7_3dstate_so_buffer(sb, dev, info);
+
+ sb->need_bo = (info->size > 0);
+ sb->need_write_offset_bo = (info->write_offset_save ||
+ (info->write_offset_load && !info->write_offset_imm_enable));
+
+ assert(ret);
+
+ return ret;
+}
+
+bool
+ilo_state_sol_buffer_init_disabled(struct ilo_state_sol_buffer *sb,
+ const struct ilo_dev *dev)
+{
+ struct ilo_state_sol_buffer_info info;
+
+ memset(&info, 0, sizeof(info));
+
+ return ilo_state_sol_buffer_init(sb, dev, &info);
+}
diff --git a/src/gallium/drivers/ilo/core/ilo_state_sol.h b/src/gallium/drivers/ilo/core/ilo_state_sol.h
new file mode 100644
index 00000000000..2513fcb4979
--- /dev/null
+++ b/src/gallium/drivers/ilo/core/ilo_state_sol.h
@@ -0,0 +1,166 @@
+/*
+ * Mesa 3-D graphics library
+ *
+ * Copyright (C) 2015 LunarG, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ * Chia-I Wu <[email protected]>
+ */
+
+#ifndef ILO_STATE_SOL_H
+#define ILO_STATE_SOL_H
+
+#include "genhw/genhw.h"
+
+#include "ilo_core.h"
+#include "ilo_dev.h"
+
+/*
+ * From the Ivy Bridge PRM, volume 2 part 1, page 193:
+ *
+ * "Incoming topologies are tagged with a 2-bit StreamID."
+ */
+#define ILO_STATE_SOL_MAX_STREAM_COUNT 4
+
+/*
+ * From the Ivy Bridge PRM, volume 2 part 1, page 195:
+ *
+ * "Up to four SO buffers are supported."
+ */
+#define ILO_STATE_SOL_MAX_BUFFER_COUNT 4
+
+/*
+ * From the Ivy Bridge PRM, volume 2 part 1, page 201:
+ *
+ * "All 128 decls..."
+ */
+#define ILO_STATE_SOL_MAX_DECL_COUNT 128
+
+/**
+ * Output a vertex attribute.
+ */
+struct ilo_state_sol_decl_info {
+ /* select an attribute from read ones */
+ uint8_t attr;
+ bool is_hole;
+
+ /* which components to write */
+ uint8_t component_base;
+ uint8_t component_count;
+
+ /* destination buffer */
+ uint8_t buffer;
+};
+
+struct ilo_state_sol_stream_info {
+ /* which VUE attributes to read */
+ uint8_t cv_vue_attr_count;
+ uint8_t vue_read_base;
+ uint8_t vue_read_count;
+
+ uint8_t decl_count;
+ const struct ilo_state_sol_decl_info *decls;
+};
+
+struct ilo_state_sol_info {
+ void *data;
+ size_t data_size;
+
+ bool sol_enable;
+ bool stats_enable;
+ enum gen_reorder_mode tristrip_reorder;
+
+ bool render_disable;
+ /* ignored when SOL is disabled */
+ uint8_t render_stream;
+
+ /* a buffer is disabled when its stride is zero */
+ uint16_t buffer_strides[ILO_STATE_SOL_MAX_BUFFER_COUNT];
+
+ struct ilo_state_sol_stream_info streams[ILO_STATE_SOL_MAX_STREAM_COUNT];
+};
+
+struct ilo_state_sol {
+ uint32_t streamout[2];
+ uint16_t strides[4];
+
+ uint32_t so_decl[2];
+ uint32_t (*decl)[2];
+ uint8_t decl_count;
+};
+
+struct ilo_buffer;
+
+struct ilo_state_sol_buffer_info {
+ const struct ilo_buffer *buf;
+ uint32_t offset;
+ uint32_t size;
+
+ /*
+ * Gen8+ only. When enabled, require a write offset bo of at least
+ * (sizeof(uint32_t) * ILO_STATE_SOL_MAX_BUFFER_COUNT) bytes
+ */
+ bool write_offset_load;
+ bool write_offset_save;
+
+ bool write_offset_imm_enable;
+ uint32_t write_offset_imm;
+};
+
+struct ilo_state_sol_buffer {
+ uint32_t so_buf[4];
+
+ bool need_bo;
+ bool need_write_offset_bo;
+
+ /* managed by users */
+ struct intel_bo *bo;
+ struct intel_bo *write_offset_bo;
+};
+
+static inline size_t
+ilo_state_sol_data_size(const struct ilo_dev *dev, uint8_t max_decl_count)
+{
+ const struct ilo_state_sol *so = NULL;
+ return (ilo_dev_gen(dev) >= ILO_GEN(7)) ?
+ sizeof(so->decl[0]) * max_decl_count : 0;
+}
+
+bool
+ilo_state_sol_init(struct ilo_state_sol *sol,
+ const struct ilo_dev *dev,
+ const struct ilo_state_sol_info *info);
+
+bool
+ilo_state_sol_init_disabled(struct ilo_state_sol *sol,
+ const struct ilo_dev *dev,
+ bool render_disable);
+
+bool
+ilo_state_sol_buffer_init(struct ilo_state_sol_buffer *sb,
+ const struct ilo_dev *dev,
+ const struct ilo_state_sol_buffer_info *info);
+
+bool
+ilo_state_sol_buffer_init_disabled(struct ilo_state_sol_buffer *sb,
+ const struct ilo_dev *dev);
+
+#endif /* ILO_STATE_SOL_H */
diff --git a/src/gallium/drivers/ilo/core/ilo_state_surface.c b/src/gallium/drivers/ilo/core/ilo_state_surface.c
new file mode 100644
index 00000000000..5be9f8f6270
--- /dev/null
+++ b/src/gallium/drivers/ilo/core/ilo_state_surface.c
@@ -0,0 +1,1179 @@
+/*
+ * Mesa 3-D graphics library
+ *
+ * Copyright (C) 2012-2015 LunarG, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ * Chia-I Wu <[email protected]>
+ */
+
+#include "ilo_debug.h"
+#include "ilo_buffer.h"
+#include "ilo_image.h"
+#include "ilo_state_surface.h"
+
+static bool
+surface_set_gen6_null_SURFACE_STATE(struct ilo_state_surface *surf,
+ const struct ilo_dev *dev)
+{
+ uint32_t dw0, dw3;
+
+ ILO_DEV_ASSERT(dev, 6, 6);
+
+ /*
+ * From the Sandy Bridge PRM, volume 4 part 1, page 71:
+ *
+ * "All of the remaining fields in surface state are ignored for null
+ * surfaces, with the following exceptions:
+ *
+ * - [DevSNB+]: Width, Height, Depth, and LOD fields must match the
+ * depth buffer's corresponding state for all render target
+ * surfaces, including null.
+ * - Surface Format must be R8G8B8A8_UNORM."
+ *
+ * From the Sandy Bridge PRM, volume 4 part 1, page 82:
+ *
+ * "If Surface Type is SURFTYPE_NULL, this field (Tiled Surface) must
+ * be true"
+ *
+ * Note that we ignore the first exception for all surface types.
+ */
+ dw0 = GEN6_SURFTYPE_NULL << GEN6_SURFACE_DW0_TYPE__SHIFT |
+ GEN6_FORMAT_R8G8B8A8_UNORM << GEN6_SURFACE_DW0_FORMAT__SHIFT;
+ dw3 = GEN6_TILING_X << GEN6_SURFACE_DW3_TILING__SHIFT;
+
+ STATIC_ASSERT(ARRAY_SIZE(surf->surface) >= 6);
+ surf->surface[0] = dw0;
+ surf->surface[1] = 0;
+ surf->surface[2] = 0;
+ surf->surface[3] = dw3;
+ surf->surface[4] = 0;
+ surf->surface[5] = 0;
+
+ return true;
+}
+
+static bool
+surface_set_gen7_null_SURFACE_STATE(struct ilo_state_surface *surf,
+ const struct ilo_dev *dev)
+{
+ uint32_t dw0;
+
+ ILO_DEV_ASSERT(dev, 7, 8);
+
+ dw0 = GEN6_SURFTYPE_NULL << GEN7_SURFACE_DW0_TYPE__SHIFT |
+ GEN6_FORMAT_R8G8B8A8_UNORM << GEN7_SURFACE_DW0_FORMAT__SHIFT;
+ if (ilo_dev_gen(dev) >= ILO_GEN(8))
+ dw0 |= GEN6_TILING_X << GEN8_SURFACE_DW0_TILING__SHIFT;
+ else
+ dw0 |= GEN6_TILING_X << GEN7_SURFACE_DW0_TILING__SHIFT;
+
+ STATIC_ASSERT(ARRAY_SIZE(surf->surface) >= 13);
+ surf->surface[0] = dw0;
+ memset(&surf->surface[1], 0, sizeof(uint32_t) *
+ (((ilo_dev_gen(dev) >= ILO_GEN(8)) ? 13 : 8) - 1));
+
+ return true;
+}
+
+static bool
+surface_validate_gen6_buffer(const struct ilo_dev *dev,
+ const struct ilo_state_surface_buffer_info *info)
+{
+ ILO_DEV_ASSERT(dev, 6, 8);
+
+ /* SVB writes are Gen6-only */
+ if (ilo_dev_gen(dev) >= ILO_GEN(7))
+ assert(info->access != ILO_STATE_SURFACE_ACCESS_DP_SVB);
+
+ if (info->offset + info->size > info->buf->bo_size) {
+ ilo_warn("invalid buffer range\n");
+ return false;
+ }
+
+ /*
+ * From the Sandy Bridge PRM, volume 4 part 1, page 81:
+ *
+ * "For surfaces of type SURFTYPE_BUFFER: [0,2047] -> [1B, 2048B]
+ * For surfaces of type SURFTYPE_STRBUF: [0,2047] -> [1B, 2048B]"
+ */
+ if (!info->struct_size || info->struct_size > 2048) {
+ ilo_warn("invalid buffer struct size\n");
+ return false;
+ }
+
+ /*
+ * From the Ivy Bridge PRM, volume 4 part 1, page 68:
+ *
+ * "The Base Address for linear render target surfaces and surfaces
+ * accessed with the typed surface read/write data port messages must
+ * be element-size aligned, for non-YUV surface formats, or a multiple
+ * of 2 element-sizes for YUV surface formats. Other linear surfaces
+ * have no alignment requirements (byte alignment is sufficient)."
+ *
+ * "Certain message types used to access surfaces have more stringent
+ * alignment requirements. Please refer to the specific message
+ * documentation for additional restrictions."
+ *
+ * From the Ivy Bridge PRM, volume 4 part 1, page 233, 235, and 237:
+ *
+ * "the surface base address must be OWord aligned"
+ *
+ * for OWord Block Read/Write, Unaligned OWord Block Read, and OWord Dual
+ * Block Read/Write.
+ *
+ * From the Ivy Bridge PRM, volume 4 part 1, page 246 and 249:
+ *
+ * "The surface base address must be DWord aligned"
+ *
+ * for DWord Scattered Read/Write and Byte Scattered Read/Write.
+ *
+ * We have to rely on users to correctly set info->struct_size here. DWord
+ * Scattered Read/Write has conflicting pitch and alignment, but we do not
+ * use them yet so we are fine.
+ *
+ * It is unclear if sampling engine surfaces require aligned offsets.
+ */
+ if (info->access != ILO_STATE_SURFACE_ACCESS_DP_SVB) {
+ assert(info->struct_size % info->format_size == 0);
+
+ if (info->offset % info->struct_size) {
+ ilo_warn("bad buffer offset\n");
+ return false;
+ }
+ }
+
+ if (info->format == GEN6_FORMAT_RAW) {
+ /*
+ * From the Sandy Bridge PRM, volume 4 part 1, page 97:
+ *
+ * ""RAW" is supported only with buffers and structured buffers
+ * accessed via the untyped surface read/write and untyped atomic
+ * operation messages, which do not have a column in the table."
+ *
+ * We do not have a specific access mode for untyped messages.
+ */
+ assert(info->access == ILO_STATE_SURFACE_ACCESS_DP_UNTYPED);
+
+ /*
+ * Nothing is said about Untyped* messages, but I guess they require the
+ * base address to be DWord aligned.
+ */
+ if (info->offset % 4) {
+ ilo_warn("bad RAW buffer offset\n");
+ return false;
+ }
+
+ if (info->struct_size > 1) {
+ /* no STRBUF on Gen6 */
+ if (ilo_dev_gen(dev) == ILO_GEN(6)) {
+ ilo_warn("no STRBUF support\n");
+ return false;
+ }
+
+ /*
+ * From the Ivy Bridge PRM, volume 4 part 1, page 70:
+ *
+ * "For linear surfaces with Surface Type of SURFTYPE_STRBUF, the
+ * pitch must be a multiple of 4 bytes."
+ */
+ if (info->struct_size % 4) {
+ ilo_warn("bad STRBUF pitch\n");
+ return false;
+ }
+ }
+ }
+
+ return true;
+}
+
+static bool
+surface_get_gen6_buffer_struct_count(const struct ilo_dev *dev,
+ const struct ilo_state_surface_buffer_info *info,
+ uint32_t *count)
+{
+ uint32_t max_struct, c;
+
+ ILO_DEV_ASSERT(dev, 6, 8);
+
+ c = info->size / info->struct_size;
+ if (info->access == ILO_STATE_SURFACE_ACCESS_DP_SVB &&
+ info->format_size < info->size - info->struct_size * c)
+ c++;
+
+ /*
+ * From the Sandy Bridge PRM, volume 4 part 1, page 77:
+ *
+ * "For buffer surfaces, the number of entries in the buffer ranges
+ * from 1 to 2^27."
+ *
+ * From the Ivy Bridge PRM, volume 4 part 1, page 68:
+ *
+ * "For typed buffer and structured buffer surfaces, the number of
+ * entries in the buffer ranges from 1 to 2^27. For raw buffer
+ * surfaces, the number of entries in the buffer is the number of
+ * bytes which can range from 1 to 2^30."
+ *
+ * From the Ivy Bridge PRM, volume 4 part 1, page 69:
+ *
+ * For SURFTYPE_BUFFER: The low two bits of this field (Width) must be
+ * 11 if the Surface Format is RAW (the size of the buffer must be a
+ * multiple of 4 bytes)."
+ */
+ max_struct = 1 << 27;
+ if (info->format == GEN6_FORMAT_RAW && info->struct_size == 1) {
+ if (ilo_dev_gen(dev) >= ILO_GEN(7))
+ max_struct = 1 << 30;
+
+ c &= ~3;
+ }
+
+ if (!c || c > max_struct) {
+ ilo_warn("too many or zero buffer structs\n");
+ return false;
+ }
+
+ *count = c - 1;
+
+ return true;
+}
+
+static bool
+surface_set_gen6_buffer_SURFACE_STATE(struct ilo_state_surface *surf,
+ const struct ilo_dev *dev,
+ const struct ilo_state_surface_buffer_info *info)
+{
+ uint32_t dw0, dw1, dw2, dw3;
+ uint32_t struct_count;
+ int width, height, depth;
+
+ ILO_DEV_ASSERT(dev, 6, 6);
+
+ if (!surface_validate_gen6_buffer(dev, info) ||
+ !surface_get_gen6_buffer_struct_count(dev, info, &struct_count))
+ return false;
+
+ /* bits [6:0] */
+ width = (struct_count & 0x0000007f);
+ /* bits [19:7] */
+ height = (struct_count & 0x000fff80) >> 7;
+ /* bits [26:20] */
+ depth = (struct_count & 0x07f00000) >> 20;
+
+ dw0 = GEN6_SURFTYPE_BUFFER << GEN6_SURFACE_DW0_TYPE__SHIFT |
+ info->format << GEN6_SURFACE_DW0_FORMAT__SHIFT;
+ dw1 = info->offset;
+ dw2 = height << GEN6_SURFACE_DW2_HEIGHT__SHIFT |
+ width << GEN6_SURFACE_DW2_WIDTH__SHIFT;
+ dw3 = depth << GEN6_SURFACE_DW3_DEPTH__SHIFT |
+ (info->struct_size - 1) << GEN6_SURFACE_DW3_PITCH__SHIFT;
+
+ STATIC_ASSERT(ARRAY_SIZE(surf->surface) >= 6);
+ surf->surface[0] = dw0;
+ surf->surface[1] = dw1;
+ surf->surface[2] = dw2;
+ surf->surface[3] = dw3;
+ surf->surface[4] = 0;
+ surf->surface[5] = 0;
+
+ surf->type = GEN6_SURFTYPE_BUFFER;
+ surf->min_lod = 0;
+ surf->mip_count = 0;
+
+ return true;
+}
+
+static bool
+surface_set_gen7_buffer_SURFACE_STATE(struct ilo_state_surface *surf,
+ const struct ilo_dev *dev,
+ const struct ilo_state_surface_buffer_info *info)
+{
+ uint32_t dw0, dw1, dw2, dw3, dw7;
+ enum gen_surface_type type;
+ uint32_t struct_count;
+ int width, height, depth;
+
+ ILO_DEV_ASSERT(dev, 7, 8);
+
+ if (!surface_validate_gen6_buffer(dev, info) ||
+ !surface_get_gen6_buffer_struct_count(dev, info, &struct_count))
+ return false;
+
+ type = (info->format == GEN6_FORMAT_RAW && info->struct_size > 1) ?
+ GEN7_SURFTYPE_STRBUF : GEN6_SURFTYPE_BUFFER;
+
+ /* bits [6:0] */
+ width = (struct_count & 0x0000007f);
+ /* bits [20:7] */
+ height = (struct_count & 0x001fff80) >> 7;
+ /* bits [30:21] */
+ depth = (struct_count & 0x7fe00000) >> 21;
+
+ dw0 = type << GEN7_SURFACE_DW0_TYPE__SHIFT |
+ info->format << GEN7_SURFACE_DW0_FORMAT__SHIFT;
+ dw1 = (ilo_dev_gen(dev) >= ILO_GEN(8)) ? 0 : info->offset;
+ dw2 = GEN_SHIFT32(height, GEN7_SURFACE_DW2_HEIGHT) |
+ GEN_SHIFT32(width, GEN7_SURFACE_DW2_WIDTH);
+ dw3 = GEN_SHIFT32(depth, GEN7_SURFACE_DW3_DEPTH) |
+ GEN_SHIFT32(info->struct_size - 1, GEN7_SURFACE_DW3_PITCH);
+
+ dw7 = 0;
+ if (ilo_dev_gen(dev) >= ILO_GEN(7.5)) {
+ dw7 |= GEN_SHIFT32(GEN75_SCS_RED, GEN75_SURFACE_DW7_SCS_R) |
+ GEN_SHIFT32(GEN75_SCS_GREEN, GEN75_SURFACE_DW7_SCS_G) |
+ GEN_SHIFT32(GEN75_SCS_BLUE, GEN75_SURFACE_DW7_SCS_B) |
+ GEN_SHIFT32(GEN75_SCS_ALPHA, GEN75_SURFACE_DW7_SCS_A);
+ }
+
+ STATIC_ASSERT(ARRAY_SIZE(surf->surface) >= 13);
+ surf->surface[0] = dw0;
+ surf->surface[1] = dw1;
+ surf->surface[2] = dw2;
+ surf->surface[3] = dw3;
+ surf->surface[4] = 0;
+ surf->surface[5] = 0;
+ surf->surface[6] = 0;
+ surf->surface[7] = dw7;
+ if (ilo_dev_gen(dev) >= ILO_GEN(8)) {
+ surf->surface[8] = info->offset;
+ surf->surface[9] = 0;
+ surf->surface[10] = 0;
+ surf->surface[11] = 0;
+ surf->surface[12] = 0;
+ }
+
+ surf->type = type;
+ surf->min_lod = 0;
+ surf->mip_count = 0;
+
+ return true;
+}
+
+static enum gen_surface_type
+get_gen6_surface_type(const struct ilo_dev *dev, const struct ilo_image *img)
+{
+ ILO_DEV_ASSERT(dev, 6, 8);
+
+ switch (img->target) {
+ case PIPE_TEXTURE_1D:
+ case PIPE_TEXTURE_1D_ARRAY:
+ return GEN6_SURFTYPE_1D;
+ case PIPE_TEXTURE_2D:
+ case PIPE_TEXTURE_CUBE:
+ case PIPE_TEXTURE_RECT:
+ case PIPE_TEXTURE_2D_ARRAY:
+ case PIPE_TEXTURE_CUBE_ARRAY:
+ return GEN6_SURFTYPE_2D;
+ case PIPE_TEXTURE_3D:
+ return GEN6_SURFTYPE_3D;
+ default:
+ assert(!"unknown texture target");
+ return GEN6_SURFTYPE_NULL;
+ }
+}
+
+static bool
+surface_validate_gen6_image(const struct ilo_dev *dev,
+ const struct ilo_state_surface_image_info *info)
+{
+ ILO_DEV_ASSERT(dev, 6, 8);
+
+ switch (info->access) {
+ case ILO_STATE_SURFACE_ACCESS_SAMPLER:
+ case ILO_STATE_SURFACE_ACCESS_DP_RENDER:
+ break;
+ case ILO_STATE_SURFACE_ACCESS_DP_TYPED:
+ assert(ilo_dev_gen(dev) >= ILO_GEN(7));
+ break;
+ default:
+ assert(!"unsupported surface access");
+ break;
+ }
+
+ /*
+ * From the Sandy Bridge PRM, volume 4 part 1, page 78:
+ *
+ * "For surface types other than SURFTYPE_BUFFER, the Width specified
+ * by this field must be less than or equal to the surface pitch
+ * (specified in bytes via the Surface Pitch field)."
+ */
+ assert(info->img->bo_stride && info->img->bo_stride <= 512 * 1024 &&
+ info->img->width0 <= info->img->bo_stride);
+
+ if (info->is_cube_map) {
+ assert(get_gen6_surface_type(dev, info->img) == GEN6_SURFTYPE_2D);
+
+ /*
+ * From the Sandy Bridge PRM, volume 4 part 1, page 78:
+ *
+ * "For cube maps, Width must be set equal to the Height."
+ */
+ assert(info->img->width0 == info->img->height0);
+ }
+
+ /*
+ * From the Sandy Bridge PRM, volume 4 part 1, page 72:
+ *
+ * "Tile Walk TILEWALK_YMAJOR is UNDEFINED for render target formats
+ * that have 128 bits-per-element (BPE)."
+ *
+ * "If Number of Multisamples is set to a value other than
+ * MULTISAMPLECOUNT_1, this field cannot be set to the following
+ * formats:
+ *
+ * - any format with greater than 64 bits per element
+ * - any compressed texture format (BC*)
+ * - any YCRCB* format"
+ *
+ * From the Ivy Bridge PRM, volume 4 part 1, page 63:
+ *
+ * If Number of Multisamples is set to a value other than
+ * MULTISAMPLECOUNT_1, this field cannot be set to the following
+ * formats: any format with greater than 64 bits per element, if
+ * Number of Multisamples is MULTISAMPLECOUNT_8, any compressed
+ * texture format (BC*), and any YCRCB* format.
+ *
+ * TODO
+ */
+
+ if (ilo_dev_gen(dev) < ILO_GEN(8) && info->img->tiling == GEN8_TILING_W) {
+ ilo_warn("tiling W is not supported\n");
+ return false;
+ }
+
+ return true;
+}
+
+static void
+get_gen6_max_extent(const struct ilo_dev *dev,
+ const struct ilo_image *img,
+ uint16_t *max_w, uint16_t *max_h)
+{
+ const uint16_t max_size = (ilo_dev_gen(dev) >= ILO_GEN(7)) ? 16384 : 8192;
+
+ ILO_DEV_ASSERT(dev, 6, 8);
+
+ switch (get_gen6_surface_type(dev, img)) {
+ case GEN6_SURFTYPE_1D:
+ *max_w = max_size;
+ *max_h = 1;
+ break;
+ case GEN6_SURFTYPE_2D:
+ *max_w = max_size;
+ *max_h = max_size;
+ break;
+ case GEN6_SURFTYPE_3D:
+ *max_w = 2048;
+ *max_h = 2048;
+ break;
+ default:
+ assert(!"invalid surface type");
+ *max_w = 1;
+ *max_h = 1;
+ break;
+ }
+}
+
+static bool
+surface_get_gen6_image_extent(const struct ilo_dev *dev,
+ const struct ilo_state_surface_image_info *info,
+ uint16_t *width, uint16_t *height)
+{
+ uint16_t w, h, max_w, max_h;
+
+ ILO_DEV_ASSERT(dev, 6, 8);
+
+ w = info->img->width0;
+ h = info->img->height0;
+
+ get_gen6_max_extent(dev, info->img, &max_w, &max_h);
+ assert(w && h && w <= max_w && h <= max_h);
+
+ *width = w - 1;
+ *height = h - 1;
+
+ return true;
+}
+
+static bool
+surface_get_gen6_image_slices(const struct ilo_dev *dev,
+ const struct ilo_state_surface_image_info *info,
+ uint16_t *depth, uint16_t *min_array_elem,
+ uint16_t *rt_view_extent)
+{
+ uint16_t max_slice, d;
+
+ ILO_DEV_ASSERT(dev, 6, 8);
+
+ /*
+ * From the Ivy Bridge PRM, volume 4 part 1, page 63:
+ *
+ * "If this field (Surface Array) is enabled, the Surface Type must be
+ * SURFTYPE_1D, SURFTYPE_2D, or SURFTYPE_CUBE. If this field is
+ * disabled and Surface Type is SURFTYPE_1D, SURFTYPE_2D, or
+ * SURFTYPE_CUBE, the Depth field must be set to zero."
+ *
+ * From the Ivy Bridge PRM, volume 4 part 1, page 69:
+ *
+ * "This field (Depth) specifies the total number of levels for a
+ * volume texture or the number of array elements allowed to be
+ * accessed starting at the Minimum Array Element for arrayed
+ * surfaces. If the volume texture is MIP-mapped, this field
+ * specifies the depth of the base MIP level."
+ *
+ * "For SURFTYPE_CUBE:For Sampling Engine Surfaces, the range of this
+ * field is [0,340], indicating the number of cube array elements
+ * (equal to the number of underlying 2D array elements divided by 6).
+ * For other surfaces, this field must be zero."
+ *
+ * "Errata: For SURFTYPE_CUBE sampling engine surfaces, the range of
+ * this field is limited to [0,85].
+ *
+ * Errata: If Surface Array is enabled, and Depth is between 1024 and
+ * 2047, an incorrect array slice may be accessed if the requested
+ * array index in the message is greater than or equal to 4096."
+ *
+ * The errata are for Gen7-specific, and they limit the number of useable
+ * layers to (86 * 6), about 512.
+ */
+
+ switch (get_gen6_surface_type(dev, info->img)) {
+ case GEN6_SURFTYPE_1D:
+ case GEN6_SURFTYPE_2D:
+ max_slice = (ilo_dev_gen(dev) >= ILO_GEN(7.5)) ? 2048 : 512;
+
+ assert(info->img->array_size <= max_slice);
+ max_slice = info->img->array_size;
+
+ d = info->slice_count;
+ if (info->is_cube_map) {
+ if (info->access == ILO_STATE_SURFACE_ACCESS_SAMPLER) {
+ if (!d || d % 6) {
+ ilo_warn("invalid cube slice count\n");
+ return false;
+ }
+
+ if (ilo_dev_gen(dev) == ILO_GEN(7) && d > 86 * 6) {
+ ilo_warn("cube slice count exceeds Gen7 limit\n");
+ return false;
+ }
+ } else {
+ /*
+ * Minumum Array Element and Depth must be 0; Render Target View
+ * Extent is ignored.
+ */
+ if (info->slice_base || d != 6) {
+ ilo_warn("no cube RT array support in data port\n");
+ return false;
+ }
+ }
+
+ d /= 6;
+ }
+
+ if (!info->is_array && d > 1) {
+ ilo_warn("non-array surface with non-zero depth\n");
+ return false;
+ }
+ break;
+ case GEN6_SURFTYPE_3D:
+ max_slice = 2048;
+
+ assert(info->img->depth0 <= max_slice);
+ max_slice = u_minify(info->img->depth0, info->level_base);
+
+ d = info->img->depth0;
+
+ if (info->is_array) {
+ ilo_warn("3D surfaces cannot be arrays\n");
+ return false;
+ }
+ break;
+ default:
+ assert(!"invalid surface type");
+ return false;
+ break;
+ }
+
+ if (!info->slice_count ||
+ info->slice_base + info->slice_count > max_slice) {
+ ilo_warn("invalid slice range\n");
+ return false;
+ }
+
+ assert(d);
+ *depth = d - 1;
+
+ /*
+ * From the Sandy Bridge PRM, volume 4 part 1, page 84:
+ *
+ * "For Sampling Engine and Render Target 1D and 2D Surfaces:
+ * This field (Minimum Array Element) indicates the minimum array
+ * element that can be accessed as part of this surface. This field
+ * is added to the delivered array index before it is used to address
+ * the surface.
+ *
+ * For Render Target 3D Surfaces:
+ * This field indicates the minimum `R' coordinate on the LOD
+ * currently being rendered to. This field is added to the delivered
+ * array index before it is used to address the surface.
+ *
+ * For Sampling Engine Cube Surfaces on [DevSNB+] only:
+ * This field indicates the minimum array element in the underlying 2D
+ * surface array that can be accessed as part of this surface (the
+ * cube array index is multipled by 6 to compute this value, although
+ * this field is not restricted to only multiples of 6). This field is
+ * added to the delivered array index before it is used to address the
+ * surface.
+ *
+ * For Other Surfaces:
+ * This field must be set to zero."
+ *
+ * On Gen7+, typed sufaces are treated like sampling engine 1D and 2D
+ * surfaces.
+ */
+ *min_array_elem = info->slice_base;
+
+ /*
+ * From the Sandy Bridge PRM, volume 4 part 1, page 84:
+ *
+ * "For Render Target 3D Surfaces:
+ * This field (Render Target View Extent) indicates the extent of the
+ * accessible `R' coordinates minus 1 on the LOD currently being
+ * rendered to.
+ *
+ * For Render Target 1D and 2D Surfaces:
+ * This field must be set to the same value as the Depth field.
+ *
+ * For Other Surfaces:
+ * This field is ignored."
+ */
+ *rt_view_extent = info->slice_count - 1;
+
+ return true;
+}
+
+static bool
+surface_get_gen6_image_levels(const struct ilo_dev *dev,
+ const struct ilo_state_surface_image_info *info,
+ uint8_t *min_lod, uint8_t *mip_count)
+{
+ uint8_t max_level = (ilo_dev_gen(dev) >= ILO_GEN(7)) ? 15 : 14;
+
+ ILO_DEV_ASSERT(dev, 6, 8);
+
+ assert(info->img->level_count <= max_level);
+ max_level = info->img->level_count;
+
+ if (!info->level_count ||
+ info->level_base + info->level_count > max_level) {
+ ilo_warn("invalid level range\n");
+ return false;
+ }
+
+ /*
+ * From the Sandy Bridge PRM, volume 4 part 1, page 79:
+ *
+ * "For Sampling Engine Surfaces:
+ * This field (MIP Count / LOD) indicates the number of MIP levels
+ * allowed to be accessed starting at Surface Min LOD, which must be
+ * less than or equal to the number of MIP levels actually stored in
+ * memory for this surface.
+ *
+ * Force the mip map access to be between the mipmap specified by the
+ * integer bits of the Min LOD and the ceiling of the value specified
+ * here.
+ *
+ * For Render Target Surfaces:
+ * This field defines the MIP level that is currently being rendered
+ * into. This is the absolute MIP level on the surface and is not
+ * relative to the Surface Min LOD field, which is ignored for render
+ * target surfaces.
+ *
+ * For Other Surfaces:
+ * This field is reserved : MBZ"
+ *
+ * From the Sandy Bridge PRM, volume 4 part 1, page 83:
+ *
+ * "For Sampling Engine Surfaces:
+ *
+ * This field (Surface Min LOD) indicates the most detailed LOD that
+ * can be accessed as part of this surface. This field is added to
+ * the delivered LOD (sample_l, ld, or resinfo message types) before
+ * it is used to address the surface.
+ *
+ * For Other Surfaces:
+ * This field is ignored."
+ *
+ * On Gen7+, typed sufaces are treated like sampling engine surfaces.
+ */
+ if (info->access == ILO_STATE_SURFACE_ACCESS_DP_RENDER) {
+ assert(info->level_count == 1);
+
+ *min_lod = 0;
+ *mip_count = info->level_base;
+ } else {
+ *min_lod = info->level_base;
+ *mip_count = info->level_count - 1;
+ }
+
+ return true;
+}
+
+static bool
+surface_get_gen6_image_sample_count(const struct ilo_dev *dev,
+ const struct ilo_state_surface_image_info *info,
+ enum gen_sample_count *sample_count)
+{
+ int min_gen;
+
+ ILO_DEV_ASSERT(dev, 6, 8);
+
+ switch (info->img->sample_count) {
+ case 1:
+ *sample_count = GEN6_NUMSAMPLES_1;
+ min_gen = ILO_GEN(6);
+ break;
+ case 2:
+ *sample_count = GEN8_NUMSAMPLES_2;
+ min_gen = ILO_GEN(8);
+ break;
+ case 4:
+ *sample_count = GEN6_NUMSAMPLES_4;
+ min_gen = ILO_GEN(6);
+ break;
+ case 8:
+ *sample_count = GEN7_NUMSAMPLES_8;
+ min_gen = ILO_GEN(7);
+ break;
+ case 16:
+ *sample_count = GEN8_NUMSAMPLES_16;
+ min_gen = ILO_GEN(8);
+ break;
+ default:
+ assert(!"invalid sample count");
+ *sample_count = GEN6_NUMSAMPLES_1;
+ break;
+ }
+
+ assert(ilo_dev_gen(dev) >= min_gen);
+
+ return true;
+}
+
+static bool
+surface_get_gen6_image_alignments(const struct ilo_dev *dev,
+ const struct ilo_state_surface_image_info *info,
+ uint32_t *alignments)
+{
+ uint32_t a = 0;
+ bool err = false;
+
+ ILO_DEV_ASSERT(dev, 6, 8);
+
+ if (ilo_dev_gen(dev) >= ILO_GEN(8)) {
+ switch (info->img->align_i) {
+ case 4:
+ a |= GEN8_SURFACE_DW0_HALIGN_4;
+ break;
+ case 8:
+ a |= GEN8_SURFACE_DW0_HALIGN_8;
+ break;
+ case 16:
+ a |= GEN8_SURFACE_DW0_HALIGN_16;
+ break;
+ default:
+ err = true;
+ break;
+ }
+
+ switch (info->img->align_j) {
+ case 4:
+ a |= GEN7_SURFACE_DW0_VALIGN_4;
+ break;
+ case 8:
+ a |= GEN8_SURFACE_DW0_VALIGN_8;
+ break;
+ case 16:
+ a |= GEN8_SURFACE_DW0_VALIGN_16;
+ break;
+ default:
+ err = true;
+ break;
+ }
+ } else if (ilo_dev_gen(dev) >= ILO_GEN(7)) {
+ switch (info->img->align_i) {
+ case 4:
+ a |= GEN7_SURFACE_DW0_HALIGN_4;
+ break;
+ case 8:
+ a |= GEN7_SURFACE_DW0_HALIGN_8;
+ break;
+ default:
+ err = true;
+ break;
+ }
+
+ switch (info->img->align_j) {
+ case 2:
+ a |= GEN7_SURFACE_DW0_VALIGN_2;
+ break;
+ case 4:
+ a |= GEN7_SURFACE_DW0_VALIGN_4;
+ break;
+ default:
+ err = true;
+ break;
+ }
+ } else {
+ if (info->img->align_i != 4)
+ err = true;
+
+ switch (info->img->align_j) {
+ case 2:
+ a |= GEN6_SURFACE_DW5_VALIGN_2;
+ break;
+ case 4:
+ a |= GEN6_SURFACE_DW5_VALIGN_4;
+ break;
+ default:
+ err = true;
+ break;
+ }
+ }
+
+ if (err)
+ assert(!"invalid HALIGN or VALIGN");
+
+ *alignments = a;
+
+ return true;
+}
+
+static bool
+surface_set_gen6_image_SURFACE_STATE(struct ilo_state_surface *surf,
+ const struct ilo_dev *dev,
+ const struct ilo_state_surface_image_info *info)
+{
+ uint16_t width, height, depth, array_base, view_extent;
+ uint8_t min_lod, mip_count;
+ enum gen_sample_count sample_count;
+ uint32_t alignments;
+ enum gen_surface_type type;
+ uint32_t dw0, dw2, dw3, dw4, dw5;
+
+ ILO_DEV_ASSERT(dev, 6, 6);
+
+ if (!surface_validate_gen6_image(dev, info) ||
+ !surface_get_gen6_image_extent(dev, info, &width, &height) ||
+ !surface_get_gen6_image_slices(dev, info, &depth, &array_base,
+ &view_extent) ||
+ !surface_get_gen6_image_levels(dev, info, &min_lod, &mip_count) ||
+ !surface_get_gen6_image_sample_count(dev, info, &sample_count) ||
+ !surface_get_gen6_image_alignments(dev, info, &alignments))
+ return false;
+
+ /* no ARYSPC_LOD0 */
+ assert(info->img->walk != ILO_IMAGE_WALK_LOD);
+ /* no UMS/CMS */
+ if (info->img->sample_count > 1)
+ assert(info->img->interleaved_samples);
+
+ type = (info->is_cube_map) ? GEN6_SURFTYPE_CUBE :
+ get_gen6_surface_type(dev, info->img);
+
+ dw0 = type << GEN6_SURFACE_DW0_TYPE__SHIFT |
+ info->format << GEN6_SURFACE_DW0_FORMAT__SHIFT |
+ GEN6_SURFACE_DW0_MIPLAYOUT_BELOW;
+
+ /*
+ * From the Sandy Bridge PRM, volume 4 part 1, page 74:
+ *
+ * "CUBE_AVERAGE may only be selected if all of the Cube Face Enable
+ * fields are equal to one."
+ *
+ * From the Sandy Bridge PRM, volume 4 part 1, page 75-76:
+ *
+ * "For SURFTYPE_CUBE Surfaces accessed via the Sampling Engine:
+ * Bits 5:0 of this field (Cube Face Enables) enable the individual
+ * faces of a cube map. Enabling a face indicates that the face is
+ * present in the cube map, while disabling it indicates that that
+ * face is represented by the texture map's border color. Refer to
+ * Memory Data Formats for the correlation between faces and the cube
+ * map memory layout. Note that storage for disabled faces must be
+ * provided.
+ *
+ * For other surfaces:
+ * This field is reserved : MBZ"
+ *
+ * "When TEXCOORDMODE_CLAMP is used when accessing a cube map, this
+ * field must be programmed to 111111b (all faces enabled)."
+ */
+ if (info->is_cube_map &&
+ info->access == ILO_STATE_SURFACE_ACCESS_SAMPLER) {
+ dw0 |= GEN6_SURFACE_DW0_CUBE_MAP_CORNER_MODE_AVERAGE |
+ GEN6_SURFACE_DW0_CUBE_FACE_ENABLES__MASK;
+ }
+
+ dw2 = height << GEN6_SURFACE_DW2_HEIGHT__SHIFT |
+ width << GEN6_SURFACE_DW2_WIDTH__SHIFT |
+ mip_count << GEN6_SURFACE_DW2_MIP_COUNT_LOD__SHIFT;
+
+ dw3 = depth << GEN6_SURFACE_DW3_DEPTH__SHIFT |
+ (info->img->bo_stride - 1) << GEN6_SURFACE_DW3_PITCH__SHIFT |
+ info->img->tiling << GEN6_SURFACE_DW3_TILING__SHIFT;
+
+ dw4 = min_lod << GEN6_SURFACE_DW4_MIN_LOD__SHIFT |
+ array_base << GEN6_SURFACE_DW4_MIN_ARRAY_ELEMENT__SHIFT |
+ view_extent << GEN6_SURFACE_DW4_RT_VIEW_EXTENT__SHIFT |
+ sample_count << GEN6_SURFACE_DW4_MULTISAMPLECOUNT__SHIFT;
+
+ dw5 = alignments;
+
+ STATIC_ASSERT(ARRAY_SIZE(surf->surface) >= 6);
+ surf->surface[0] = dw0;
+ surf->surface[1] = 0;
+ surf->surface[2] = dw2;
+ surf->surface[3] = dw3;
+ surf->surface[4] = dw4;
+ surf->surface[5] = dw5;
+
+ surf->type = type;
+ surf->min_lod = min_lod;
+ surf->mip_count = mip_count;
+
+ return true;
+}
+
+static bool
+surface_set_gen7_image_SURFACE_STATE(struct ilo_state_surface *surf,
+ const struct ilo_dev *dev,
+ const struct ilo_state_surface_image_info *info)
+{
+ uint16_t width, height, depth, array_base, view_extent;
+ uint8_t min_lod, mip_count;
+ uint32_t alignments;
+ enum gen_sample_count sample_count;
+ enum gen_surface_type type;
+ uint32_t dw0, dw1, dw2, dw3, dw4, dw5, dw7;
+
+ ILO_DEV_ASSERT(dev, 7, 8);
+
+ if (!surface_validate_gen6_image(dev, info) ||
+ !surface_get_gen6_image_extent(dev, info, &width, &height) ||
+ !surface_get_gen6_image_slices(dev, info, &depth, &array_base,
+ &view_extent) ||
+ !surface_get_gen6_image_levels(dev, info, &min_lod, &mip_count) ||
+ !surface_get_gen6_image_sample_count(dev, info, &sample_count) ||
+ !surface_get_gen6_image_alignments(dev, info, &alignments))
+ return false;
+
+ type = (info->is_cube_map) ? GEN6_SURFTYPE_CUBE :
+ get_gen6_surface_type(dev, info->img);
+
+ dw0 = type << GEN7_SURFACE_DW0_TYPE__SHIFT |
+ info->format << GEN7_SURFACE_DW0_FORMAT__SHIFT |
+ alignments;
+
+ if (info->is_array)
+ dw0 |= GEN7_SURFACE_DW0_IS_ARRAY;
+
+ if (ilo_dev_gen(dev) >= ILO_GEN(8)) {
+ dw0 |= info->img->tiling << GEN8_SURFACE_DW0_TILING__SHIFT;
+ } else {
+ dw0 |= info->img->tiling << GEN7_SURFACE_DW0_TILING__SHIFT;
+
+ if (info->img->walk == ILO_IMAGE_WALK_LOD)
+ dw0 |= GEN7_SURFACE_DW0_ARYSPC_LOD0;
+ else
+ dw0 |= GEN7_SURFACE_DW0_ARYSPC_FULL;
+ }
+
+ /*
+ * From the Ivy Bridge PRM, volume 4 part 1, page 67:
+ *
+ * "For SURFTYPE_CUBE Surfaces accessed via the Sampling Engine: Bits
+ * 5:0 of this field (Cube Face Enables) enable the individual faces
+ * of a cube map. Enabling a face indicates that the face is present
+ * in the cube map, while disabling it indicates that that face is
+ * represented by the texture map's border color. Refer to Memory Data
+ * Formats for the correlation between faces and the cube map memory
+ * layout. Note that storage for disabled faces must be provided. For
+ * other surfaces this field is reserved and MBZ."
+ *
+ * "When TEXCOORDMODE_CLAMP is used when accessing a cube map, this
+ * field must be programmed to 111111b (all faces enabled). This field
+ * is ignored unless the Surface Type is SURFTYPE_CUBE."
+ */
+ if (info->is_cube_map &&
+ info->access == ILO_STATE_SURFACE_ACCESS_SAMPLER)
+ dw0 |= GEN7_SURFACE_DW0_CUBE_FACE_ENABLES__MASK;
+
+ dw1 = 0;
+ if (ilo_dev_gen(dev) >= ILO_GEN(8)) {
+ assert(info->img->walk_layer_height % 4 == 0);
+ dw1 |= info->img->walk_layer_height / 4 <<
+ GEN8_SURFACE_DW1_QPITCH__SHIFT;
+ }
+
+ dw2 = height << GEN7_SURFACE_DW2_HEIGHT__SHIFT |
+ width << GEN7_SURFACE_DW2_WIDTH__SHIFT;
+
+ dw3 = depth << GEN7_SURFACE_DW3_DEPTH__SHIFT |
+ (info->img->bo_stride - 1) << GEN7_SURFACE_DW3_PITCH__SHIFT;
+
+ if (ilo_dev_gen(dev) == ILO_GEN(7.5))
+ dw3 |= 0 << GEN75_SURFACE_DW3_INTEGER_SURFACE_FORMAT__SHIFT;
+
+ dw4 = array_base << GEN7_SURFACE_DW4_MIN_ARRAY_ELEMENT__SHIFT |
+ view_extent << GEN7_SURFACE_DW4_RT_VIEW_EXTENT__SHIFT |
+ sample_count << GEN7_SURFACE_DW4_MULTISAMPLECOUNT__SHIFT;
+
+ /*
+ * MSFMT_MSS means the samples are not interleaved and MSFMT_DEPTH_STENCIL
+ * means the samples are interleaved. The layouts are the same when the
+ * number of samples is 1.
+ */
+ if (info->img->interleaved_samples && info->img->sample_count > 1) {
+ assert(info->access != ILO_STATE_SURFACE_ACCESS_DP_RENDER);
+ dw4 |= GEN7_SURFACE_DW4_MSFMT_DEPTH_STENCIL;
+ } else {
+ dw4 |= GEN7_SURFACE_DW4_MSFMT_MSS;
+ }
+
+ dw5 = min_lod << GEN7_SURFACE_DW5_MIN_LOD__SHIFT |
+ mip_count << GEN7_SURFACE_DW5_MIP_COUNT_LOD__SHIFT;
+
+ dw7 = 0;
+ if (ilo_dev_gen(dev) >= ILO_GEN(7.5)) {
+ dw7 |= GEN_SHIFT32(GEN75_SCS_RED, GEN75_SURFACE_DW7_SCS_R) |
+ GEN_SHIFT32(GEN75_SCS_GREEN, GEN75_SURFACE_DW7_SCS_G) |
+ GEN_SHIFT32(GEN75_SCS_BLUE, GEN75_SURFACE_DW7_SCS_B) |
+ GEN_SHIFT32(GEN75_SCS_ALPHA, GEN75_SURFACE_DW7_SCS_A);
+ }
+
+ STATIC_ASSERT(ARRAY_SIZE(surf->surface) >= 13);
+ surf->surface[0] = dw0;
+ surf->surface[1] = dw1;
+ surf->surface[2] = dw2;
+ surf->surface[3] = dw3;
+ surf->surface[4] = dw4;
+ surf->surface[5] = dw5;
+ surf->surface[6] = 0;
+ surf->surface[7] = dw7;
+ if (ilo_dev_gen(dev) >= ILO_GEN(8)) {
+ surf->surface[8] = 0;
+ surf->surface[9] = 0;
+ surf->surface[10] = 0;
+ surf->surface[11] = 0;
+ surf->surface[12] = 0;
+ }
+
+ surf->type = type;
+ surf->min_lod = min_lod;
+ surf->mip_count = mip_count;
+
+ return true;
+}
+
+bool
+ilo_state_surface_init_for_null(struct ilo_state_surface *surf,
+ const struct ilo_dev *dev)
+{
+ bool ret = true;
+
+ assert(ilo_is_zeroed(surf, sizeof(*surf)));
+
+ if (ilo_dev_gen(dev) >= ILO_GEN(7))
+ ret &= surface_set_gen7_null_SURFACE_STATE(surf, dev);
+ else
+ ret &= surface_set_gen6_null_SURFACE_STATE(surf, dev);
+
+ surf->type = GEN6_SURFTYPE_NULL;
+ surf->readonly = true;
+
+ assert(ret);
+
+ return ret;
+}
+
+bool
+ilo_state_surface_init_for_buffer(struct ilo_state_surface *surf,
+ const struct ilo_dev *dev,
+ const struct ilo_state_surface_buffer_info *info)
+{
+ bool ret = true;
+
+ assert(ilo_is_zeroed(surf, sizeof(*surf)));
+
+ if (ilo_dev_gen(dev) >= ILO_GEN(7))
+ ret &= surface_set_gen7_buffer_SURFACE_STATE(surf, dev, info);
+ else
+ ret &= surface_set_gen6_buffer_SURFACE_STATE(surf, dev, info);
+
+ surf->readonly = info->readonly;
+
+ assert(ret);
+
+ return ret;
+}
+
+bool
+ilo_state_surface_init_for_image(struct ilo_state_surface *surf,
+ const struct ilo_dev *dev,
+ const struct ilo_state_surface_image_info *info)
+{
+ bool ret = true;
+
+ assert(ilo_is_zeroed(surf, sizeof(*surf)));
+
+ if (ilo_dev_gen(dev) >= ILO_GEN(7))
+ ret &= surface_set_gen7_image_SURFACE_STATE(surf, dev, info);
+ else
+ ret &= surface_set_gen6_image_SURFACE_STATE(surf, dev, info);
+
+ surf->is_integer = info->is_integer;
+ surf->readonly = info->readonly;
+ surf->scanout = info->img->scanout;
+
+ assert(ret);
+
+ return ret;
+}
+
+bool
+ilo_state_surface_set_scs(struct ilo_state_surface *surf,
+ const struct ilo_dev *dev,
+ enum gen_surface_scs rgba[4])
+{
+ const uint32_t scs = GEN_SHIFT32(rgba[0], GEN75_SURFACE_DW7_SCS_R) |
+ GEN_SHIFT32(rgba[1], GEN75_SURFACE_DW7_SCS_G) |
+ GEN_SHIFT32(rgba[2], GEN75_SURFACE_DW7_SCS_B) |
+ GEN_SHIFT32(rgba[3], GEN75_SURFACE_DW7_SCS_A);
+
+ ILO_DEV_ASSERT(dev, 6, 8);
+
+ assert(ilo_dev_gen(dev) >= ILO_GEN(7.5));
+
+ surf->surface[7] = (surf->surface[7] & ~GEN75_SURFACE_DW7_SCS__MASK) | scs;
+
+ return true;
+}
diff --git a/src/gallium/drivers/ilo/core/ilo_state_surface.h b/src/gallium/drivers/ilo/core/ilo_state_surface.h
new file mode 100644
index 00000000000..9c025428d50
--- /dev/null
+++ b/src/gallium/drivers/ilo/core/ilo_state_surface.h
@@ -0,0 +1,121 @@
+/*
+ * Mesa 3-D graphics library
+ *
+ * Copyright (C) 2015 LunarG, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ * Chia-I Wu <[email protected]>
+ */
+
+#ifndef ILO_STATE_SURFACE_H
+#define ILO_STATE_SURFACE_H
+
+#include "genhw/genhw.h"
+#include "intel_winsys.h"
+
+#include "ilo_core.h"
+#include "ilo_dev.h"
+
+struct ilo_buffer;
+struct ilo_image;
+
+enum ilo_state_surface_access {
+ ILO_STATE_SURFACE_ACCESS_SAMPLER, /* sampling engine surfaces */
+ ILO_STATE_SURFACE_ACCESS_DP_RENDER, /* render target surfaces */
+ ILO_STATE_SURFACE_ACCESS_DP_TYPED, /* typed surfaces */
+ ILO_STATE_SURFACE_ACCESS_DP_UNTYPED, /* untyped surfaces */
+ ILO_STATE_SURFACE_ACCESS_DP_DATA,
+ ILO_STATE_SURFACE_ACCESS_DP_SVB,
+};
+
+struct ilo_state_surface_buffer_info {
+ const struct ilo_buffer *buf;
+
+ enum ilo_state_surface_access access;
+
+ enum gen_surface_format format;
+ uint8_t format_size;
+
+ bool readonly;
+ uint16_t struct_size;
+
+ uint32_t offset;
+ uint32_t size;
+};
+
+struct ilo_state_surface_image_info {
+ const struct ilo_image *img;
+
+ enum ilo_state_surface_access access;
+
+ enum gen_surface_format format;
+ bool is_integer;
+
+ bool readonly;
+ bool is_cube_map;
+ bool is_array;
+
+ uint8_t level_base;
+ uint8_t level_count;
+ uint16_t slice_base;
+ uint16_t slice_count;
+};
+
+struct ilo_state_surface {
+ uint32_t surface[13];
+
+ enum gen_surface_type type;
+ uint8_t min_lod;
+ uint8_t mip_count;
+ bool is_integer;
+
+ bool readonly;
+ bool scanout;
+
+ /* managed by users */
+ struct intel_bo *bo;
+};
+
+bool
+ilo_state_surface_valid_format(const struct ilo_dev *dev,
+ enum ilo_state_surface_access access,
+ enum gen_surface_format format);
+
+bool
+ilo_state_surface_init_for_null(struct ilo_state_surface *surf,
+ const struct ilo_dev *dev);
+
+bool
+ilo_state_surface_init_for_buffer(struct ilo_state_surface *surf,
+ const struct ilo_dev *dev,
+ const struct ilo_state_surface_buffer_info *info);
+
+bool
+ilo_state_surface_init_for_image(struct ilo_state_surface *surf,
+ const struct ilo_dev *dev,
+ const struct ilo_state_surface_image_info *info);
+
+bool
+ilo_state_surface_set_scs(struct ilo_state_surface *surf,
+ const struct ilo_dev *dev,
+ enum gen_surface_scs rgba[4]);
+
+#endif /* ILO_STATE_SURFACE_H */
diff --git a/src/gallium/drivers/ilo/core/ilo_state_surface_format.c b/src/gallium/drivers/ilo/core/ilo_state_surface_format.c
new file mode 100644
index 00000000000..a40c1b84d17
--- /dev/null
+++ b/src/gallium/drivers/ilo/core/ilo_state_surface_format.c
@@ -0,0 +1,351 @@
+/*
+ * Mesa 3-D graphics library
+ *
+ * Copyright (C) 2012-2013 LunarG, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ * Chia-I Wu <[email protected]>
+ */
+
+#include "genhw/genhw.h"
+#include "ilo_state_surface.h"
+
+static bool
+surface_valid_sampler_format(const struct ilo_dev *dev,
+ enum ilo_state_surface_access access,
+ enum gen_surface_format format)
+{
+ /*
+ * This table is based on:
+ *
+ * - the Sandy Bridge PRM, volume 4 part 1, page 88-97
+ * - the Ivy Bridge PRM, volume 4 part 1, page 84-87
+ */
+ static const struct sampler_cap {
+ int sampling;
+ int filtering;
+ int shadow_map;
+ int chroma_key;
+ } caps[] = {
+#define CAP(sampling, filtering, shadow_map, chroma_key) \
+ { ILO_GEN(sampling), ILO_GEN(filtering), ILO_GEN(shadow_map), ILO_GEN(chroma_key) }
+ [GEN6_FORMAT_R32G32B32A32_FLOAT] = CAP( 1, 5, 0, 0),
+ [GEN6_FORMAT_R32G32B32A32_SINT] = CAP( 1, 0, 0, 0),
+ [GEN6_FORMAT_R32G32B32A32_UINT] = CAP( 1, 0, 0, 0),
+ [GEN6_FORMAT_R32G32B32X32_FLOAT] = CAP( 1, 5, 0, 0),
+ [GEN6_FORMAT_R32G32B32_FLOAT] = CAP( 1, 5, 0, 0),
+ [GEN6_FORMAT_R32G32B32_SINT] = CAP( 1, 0, 0, 0),
+ [GEN6_FORMAT_R32G32B32_UINT] = CAP( 1, 0, 0, 0),
+ [GEN6_FORMAT_R16G16B16A16_UNORM] = CAP( 1, 1, 0, 0),
+ [GEN6_FORMAT_R16G16B16A16_SNORM] = CAP( 1, 1, 0, 0),
+ [GEN6_FORMAT_R16G16B16A16_SINT] = CAP( 1, 0, 0, 0),
+ [GEN6_FORMAT_R16G16B16A16_UINT] = CAP( 1, 0, 0, 0),
+ [GEN6_FORMAT_R16G16B16A16_FLOAT] = CAP( 1, 1, 0, 0),
+ [GEN6_FORMAT_R32G32_FLOAT] = CAP( 1, 5, 0, 0),
+ [GEN6_FORMAT_R32G32_SINT] = CAP( 1, 0, 0, 0),
+ [GEN6_FORMAT_R32G32_UINT] = CAP( 1, 0, 0, 0),
+ [GEN6_FORMAT_R32_FLOAT_X8X24_TYPELESS] = CAP( 1, 5, 1, 0),
+ [GEN6_FORMAT_X32_TYPELESS_G8X24_UINT] = CAP( 1, 0, 0, 0),
+ [GEN6_FORMAT_L32A32_FLOAT] = CAP( 1, 5, 0, 0),
+ [GEN6_FORMAT_R16G16B16X16_UNORM] = CAP( 1, 1, 0, 0),
+ [GEN6_FORMAT_R16G16B16X16_FLOAT] = CAP( 1, 1, 0, 0),
+ [GEN6_FORMAT_A32X32_FLOAT] = CAP( 1, 5, 0, 0),
+ [GEN6_FORMAT_L32X32_FLOAT] = CAP( 1, 5, 0, 0),
+ [GEN6_FORMAT_I32X32_FLOAT] = CAP( 1, 5, 0, 0),
+ [GEN6_FORMAT_B8G8R8A8_UNORM] = CAP( 1, 1, 0, 1),
+ [GEN6_FORMAT_B8G8R8A8_UNORM_SRGB] = CAP( 1, 1, 0, 0),
+ [GEN6_FORMAT_R10G10B10A2_UNORM] = CAP( 1, 1, 0, 0),
+ [GEN6_FORMAT_R10G10B10A2_UNORM_SRGB] = CAP( 1, 1, 0, 0),
+ [GEN6_FORMAT_R10G10B10A2_UINT] = CAP( 1, 0, 0, 0),
+ [GEN6_FORMAT_R10G10B10_SNORM_A2_UNORM] = CAP( 1, 1, 0, 0),
+ [GEN6_FORMAT_R8G8B8A8_UNORM] = CAP( 1, 1, 0, 0),
+ [GEN6_FORMAT_R8G8B8A8_UNORM_SRGB] = CAP( 1, 1, 0, 0),
+ [GEN6_FORMAT_R8G8B8A8_SNORM] = CAP( 1, 1, 0, 0),
+ [GEN6_FORMAT_R8G8B8A8_SINT] = CAP( 1, 0, 0, 0),
+ [GEN6_FORMAT_R8G8B8A8_UINT] = CAP( 1, 0, 0, 0),
+ [GEN6_FORMAT_R16G16_UNORM] = CAP( 1, 1, 0, 0),
+ [GEN6_FORMAT_R16G16_SNORM] = CAP( 1, 1, 0, 0),
+ [GEN6_FORMAT_R16G16_SINT] = CAP( 1, 0, 0, 0),
+ [GEN6_FORMAT_R16G16_UINT] = CAP( 1, 0, 0, 0),
+ [GEN6_FORMAT_R16G16_FLOAT] = CAP( 1, 1, 0, 0),
+ [GEN6_FORMAT_B10G10R10A2_UNORM] = CAP( 1, 1, 0, 0),
+ [GEN6_FORMAT_B10G10R10A2_UNORM_SRGB] = CAP( 1, 1, 0, 0),
+ [GEN6_FORMAT_R11G11B10_FLOAT] = CAP( 1, 1, 0, 0),
+ [GEN6_FORMAT_R32_SINT] = CAP( 1, 0, 0, 0),
+ [GEN6_FORMAT_R32_UINT] = CAP( 1, 0, 0, 0),
+ [GEN6_FORMAT_R32_FLOAT] = CAP( 1, 5, 1, 0),
+ [GEN6_FORMAT_R24_UNORM_X8_TYPELESS] = CAP( 1, 5, 1, 0),
+ [GEN6_FORMAT_X24_TYPELESS_G8_UINT] = CAP( 1, 0, 0, 0),
+ [GEN6_FORMAT_L16A16_UNORM] = CAP( 1, 1, 0, 0),
+ [GEN6_FORMAT_I24X8_UNORM] = CAP( 1, 5, 1, 0),
+ [GEN6_FORMAT_L24X8_UNORM] = CAP( 1, 5, 1, 0),
+ [GEN6_FORMAT_A24X8_UNORM] = CAP( 1, 5, 1, 0),
+ [GEN6_FORMAT_I32_FLOAT] = CAP( 1, 5, 1, 0),
+ [GEN6_FORMAT_L32_FLOAT] = CAP( 1, 5, 1, 0),
+ [GEN6_FORMAT_A32_FLOAT] = CAP( 1, 5, 1, 0),
+ [GEN6_FORMAT_B8G8R8X8_UNORM] = CAP( 1, 1, 0, 1),
+ [GEN6_FORMAT_B8G8R8X8_UNORM_SRGB] = CAP( 1, 1, 0, 0),
+ [GEN6_FORMAT_R8G8B8X8_UNORM] = CAP( 1, 1, 0, 0),
+ [GEN6_FORMAT_R8G8B8X8_UNORM_SRGB] = CAP( 1, 1, 0, 0),
+ [GEN6_FORMAT_R9G9B9E5_SHAREDEXP] = CAP( 1, 1, 0, 0),
+ [GEN6_FORMAT_B10G10R10X2_UNORM] = CAP( 1, 1, 0, 0),
+ [GEN6_FORMAT_L16A16_FLOAT] = CAP( 1, 1, 0, 0),
+ [GEN6_FORMAT_B5G6R5_UNORM] = CAP( 1, 1, 0, 1),
+ [GEN6_FORMAT_B5G6R5_UNORM_SRGB] = CAP( 1, 1, 0, 0),
+ [GEN6_FORMAT_B5G5R5A1_UNORM] = CAP( 1, 1, 0, 1),
+ [GEN6_FORMAT_B5G5R5A1_UNORM_SRGB] = CAP( 1, 1, 0, 0),
+ [GEN6_FORMAT_B4G4R4A4_UNORM] = CAP( 1, 1, 0, 1),
+ [GEN6_FORMAT_B4G4R4A4_UNORM_SRGB] = CAP( 1, 1, 0, 0),
+ [GEN6_FORMAT_R8G8_UNORM] = CAP( 1, 1, 0, 0),
+ [GEN6_FORMAT_R8G8_SNORM] = CAP( 1, 1, 0, 1),
+ [GEN6_FORMAT_R8G8_SINT] = CAP( 1, 0, 0, 0),
+ [GEN6_FORMAT_R8G8_UINT] = CAP( 1, 0, 0, 0),
+ [GEN6_FORMAT_R16_UNORM] = CAP( 1, 1, 1, 0),
+ [GEN6_FORMAT_R16_SNORM] = CAP( 1, 1, 0, 0),
+ [GEN6_FORMAT_R16_SINT] = CAP( 1, 0, 0, 0),
+ [GEN6_FORMAT_R16_UINT] = CAP( 1, 0, 0, 0),
+ [GEN6_FORMAT_R16_FLOAT] = CAP( 1, 1, 0, 0),
+ [GEN6_FORMAT_A8P8_UNORM_PALETTE0] = CAP( 5, 5, 0, 0),
+ [GEN6_FORMAT_A8P8_UNORM_PALETTE1] = CAP( 5, 5, 0, 0),
+ [GEN6_FORMAT_I16_UNORM] = CAP( 1, 1, 1, 0),
+ [GEN6_FORMAT_L16_UNORM] = CAP( 1, 1, 1, 0),
+ [GEN6_FORMAT_A16_UNORM] = CAP( 1, 1, 1, 0),
+ [GEN6_FORMAT_L8A8_UNORM] = CAP( 1, 1, 0, 1),
+ [GEN6_FORMAT_I16_FLOAT] = CAP( 1, 1, 1, 0),
+ [GEN6_FORMAT_L16_FLOAT] = CAP( 1, 1, 1, 0),
+ [GEN6_FORMAT_A16_FLOAT] = CAP( 1, 1, 1, 0),
+ [GEN6_FORMAT_L8A8_UNORM_SRGB] = CAP(4.5, 4.5, 0, 0),
+ [GEN6_FORMAT_R5G5_SNORM_B6_UNORM] = CAP( 1, 1, 0, 1),
+ [GEN6_FORMAT_P8A8_UNORM_PALETTE0] = CAP( 5, 5, 0, 0),
+ [GEN6_FORMAT_P8A8_UNORM_PALETTE1] = CAP( 5, 5, 0, 0),
+ [GEN6_FORMAT_R8_UNORM] = CAP( 1, 1, 0, 4.5),
+ [GEN6_FORMAT_R8_SNORM] = CAP( 1, 1, 0, 0),
+ [GEN6_FORMAT_R8_SINT] = CAP( 1, 0, 0, 0),
+ [GEN6_FORMAT_R8_UINT] = CAP( 1, 0, 0, 0),
+ [GEN6_FORMAT_A8_UNORM] = CAP( 1, 1, 0, 1),
+ [GEN6_FORMAT_I8_UNORM] = CAP( 1, 1, 0, 0),
+ [GEN6_FORMAT_L8_UNORM] = CAP( 1, 1, 0, 1),
+ [GEN6_FORMAT_P4A4_UNORM_PALETTE0] = CAP( 1, 1, 0, 0),
+ [GEN6_FORMAT_A4P4_UNORM_PALETTE0] = CAP( 1, 1, 0, 0),
+ [GEN6_FORMAT_P8_UNORM_PALETTE0] = CAP(4.5, 4.5, 0, 0),
+ [GEN6_FORMAT_L8_UNORM_SRGB] = CAP(4.5, 4.5, 0, 0),
+ [GEN6_FORMAT_P8_UNORM_PALETTE1] = CAP(4.5, 4.5, 0, 0),
+ [GEN6_FORMAT_P4A4_UNORM_PALETTE1] = CAP(4.5, 4.5, 0, 0),
+ [GEN6_FORMAT_A4P4_UNORM_PALETTE1] = CAP(4.5, 4.5, 0, 0),
+ [GEN6_FORMAT_DXT1_RGB_SRGB] = CAP(4.5, 4.5, 0, 0),
+ [GEN6_FORMAT_R1_UNORM] = CAP( 1, 1, 0, 0),
+ [GEN6_FORMAT_YCRCB_NORMAL] = CAP( 1, 1, 0, 1),
+ [GEN6_FORMAT_YCRCB_SWAPUVY] = CAP( 1, 1, 0, 1),
+ [GEN6_FORMAT_P2_UNORM_PALETTE0] = CAP(4.5, 4.5, 0, 0),
+ [GEN6_FORMAT_P2_UNORM_PALETTE1] = CAP(4.5, 4.5, 0, 0),
+ [GEN6_FORMAT_BC1_UNORM] = CAP( 1, 1, 0, 1),
+ [GEN6_FORMAT_BC2_UNORM] = CAP( 1, 1, 0, 1),
+ [GEN6_FORMAT_BC3_UNORM] = CAP( 1, 1, 0, 1),
+ [GEN6_FORMAT_BC4_UNORM] = CAP( 1, 1, 0, 0),
+ [GEN6_FORMAT_BC5_UNORM] = CAP( 1, 1, 0, 0),
+ [GEN6_FORMAT_BC1_UNORM_SRGB] = CAP( 1, 1, 0, 0),
+ [GEN6_FORMAT_BC2_UNORM_SRGB] = CAP( 1, 1, 0, 0),
+ [GEN6_FORMAT_BC3_UNORM_SRGB] = CAP( 1, 1, 0, 0),
+ [GEN6_FORMAT_MONO8] = CAP( 1, 0, 0, 0),
+ [GEN6_FORMAT_YCRCB_SWAPUV] = CAP( 1, 1, 0, 0),
+ [GEN6_FORMAT_YCRCB_SWAPY] = CAP( 1, 1, 0, 0),
+ [GEN6_FORMAT_DXT1_RGB] = CAP( 1, 1, 0, 0),
+ [GEN6_FORMAT_FXT1] = CAP( 1, 1, 0, 0),
+ [GEN6_FORMAT_BC4_SNORM] = CAP( 1, 1, 0, 0),
+ [GEN6_FORMAT_BC5_SNORM] = CAP( 1, 1, 0, 0),
+ [GEN6_FORMAT_R16G16B16_FLOAT] = CAP( 5, 5, 0, 0),
+ [GEN6_FORMAT_BC6H_SF16] = CAP( 7, 7, 0, 0),
+ [GEN6_FORMAT_BC7_UNORM] = CAP( 7, 7, 0, 0),
+ [GEN6_FORMAT_BC7_UNORM_SRGB] = CAP( 7, 7, 0, 0),
+ [GEN6_FORMAT_BC6H_UF16] = CAP( 7, 7, 0, 0),
+#undef CAP
+ };
+
+ ILO_DEV_ASSERT(dev, 6, 8);
+
+ return (format < ARRAY_SIZE(caps) && caps[format].sampling &&
+ ilo_dev_gen(dev) >= caps[format].sampling);
+}
+
+static bool
+surface_valid_dp_format(const struct ilo_dev *dev,
+ enum ilo_state_surface_access access,
+ enum gen_surface_format format)
+{
+ /*
+ * This table is based on:
+ *
+ * - the Sandy Bridge PRM, volume 4 part 1, page 88-97
+ * - the Ivy Bridge PRM, volume 4 part 1, page 172, 252-253, and 277-278
+ * - the Haswell PRM, volume 7, page 262-264
+ */
+ static const struct dp_cap {
+ int rt_write;
+ int rt_write_blending;
+ int typed_write;
+ int media_color_processing;
+ } caps[] = {
+#define CAP(rt_write, rt_write_blending, typed_write, media_color_processing) \
+ { ILO_GEN(rt_write), ILO_GEN(rt_write_blending), ILO_GEN(typed_write), ILO_GEN(media_color_processing) }
+ [GEN6_FORMAT_R32G32B32A32_FLOAT] = CAP( 1, 1, 7, 0),
+ [GEN6_FORMAT_R32G32B32A32_SINT] = CAP( 1, 0, 7, 0),
+ [GEN6_FORMAT_R32G32B32A32_UINT] = CAP( 1, 0, 7, 0),
+ [GEN6_FORMAT_R16G16B16A16_UNORM] = CAP( 1, 4.5, 7, 6),
+ [GEN6_FORMAT_R16G16B16A16_SNORM] = CAP( 1, 6, 7, 0),
+ [GEN6_FORMAT_R16G16B16A16_SINT] = CAP( 1, 0, 7, 0),
+ [GEN6_FORMAT_R16G16B16A16_UINT] = CAP( 1, 0, 7, 0),
+ [GEN6_FORMAT_R16G16B16A16_FLOAT] = CAP( 1, 1, 7, 0),
+ [GEN6_FORMAT_R32G32_FLOAT] = CAP( 1, 1, 7, 0),
+ [GEN6_FORMAT_R32G32_SINT] = CAP( 1, 0, 7, 0),
+ [GEN6_FORMAT_R32G32_UINT] = CAP( 1, 0, 7, 0),
+ [GEN6_FORMAT_B8G8R8A8_UNORM] = CAP( 1, 1, 7, 6),
+ [GEN6_FORMAT_B8G8R8A8_UNORM_SRGB] = CAP( 1, 1, 0, 0),
+ [GEN6_FORMAT_R10G10B10A2_UNORM] = CAP( 1, 1, 7, 6),
+ [GEN6_FORMAT_R10G10B10A2_UNORM_SRGB] = CAP( 0, 0, 0, 6),
+ [GEN6_FORMAT_R10G10B10A2_UINT] = CAP( 1, 0, 7, 0),
+ [GEN6_FORMAT_R8G8B8A8_UNORM] = CAP( 1, 1, 7, 6),
+ [GEN6_FORMAT_R8G8B8A8_UNORM_SRGB] = CAP( 1, 1, 0, 6),
+ [GEN6_FORMAT_R8G8B8A8_SNORM] = CAP( 1, 6, 7, 0),
+ [GEN6_FORMAT_R8G8B8A8_SINT] = CAP( 1, 0, 7, 0),
+ [GEN6_FORMAT_R8G8B8A8_UINT] = CAP( 1, 0, 7, 0),
+ [GEN6_FORMAT_R16G16_UNORM] = CAP( 1, 4.5, 7, 0),
+ [GEN6_FORMAT_R16G16_SNORM] = CAP( 1, 6, 7, 0),
+ [GEN6_FORMAT_R16G16_SINT] = CAP( 1, 0, 7, 0),
+ [GEN6_FORMAT_R16G16_UINT] = CAP( 1, 0, 7, 0),
+ [GEN6_FORMAT_R16G16_FLOAT] = CAP( 1, 1, 7, 0),
+ [GEN6_FORMAT_B10G10R10A2_UNORM] = CAP( 1, 1, 7, 6),
+ [GEN6_FORMAT_B10G10R10A2_UNORM_SRGB] = CAP( 1, 1, 0, 6),
+ [GEN6_FORMAT_R11G11B10_FLOAT] = CAP( 1, 1, 7, 0),
+ [GEN6_FORMAT_R32_SINT] = CAP( 1, 0, 7, 0),
+ [GEN6_FORMAT_R32_UINT] = CAP( 1, 0, 7, 0),
+ [GEN6_FORMAT_R32_FLOAT] = CAP( 1, 1, 7, 0),
+ [GEN6_FORMAT_B8G8R8X8_UNORM] = CAP( 0, 0, 0, 6),
+ [GEN6_FORMAT_B5G6R5_UNORM] = CAP( 1, 1, 7, 0),
+ [GEN6_FORMAT_B5G6R5_UNORM_SRGB] = CAP( 1, 1, 0, 0),
+ [GEN6_FORMAT_B5G5R5A1_UNORM] = CAP( 1, 1, 7, 0),
+ [GEN6_FORMAT_B5G5R5A1_UNORM_SRGB] = CAP( 1, 1, 0, 0),
+ [GEN6_FORMAT_B4G4R4A4_UNORM] = CAP( 1, 1, 7, 0),
+ [GEN6_FORMAT_B4G4R4A4_UNORM_SRGB] = CAP( 1, 1, 0, 0),
+ [GEN6_FORMAT_R8G8_UNORM] = CAP( 1, 1, 7, 0),
+ [GEN6_FORMAT_R8G8_SNORM] = CAP( 1, 6, 7, 0),
+ [GEN6_FORMAT_R8G8_SINT] = CAP( 1, 0, 7, 0),
+ [GEN6_FORMAT_R8G8_UINT] = CAP( 1, 0, 7, 0),
+ [GEN6_FORMAT_R16_UNORM] = CAP( 1, 4.5, 7, 7),
+ [GEN6_FORMAT_R16_SNORM] = CAP( 1, 6, 7, 0),
+ [GEN6_FORMAT_R16_SINT] = CAP( 1, 0, 7, 0),
+ [GEN6_FORMAT_R16_UINT] = CAP( 1, 0, 7, 0),
+ [GEN6_FORMAT_R16_FLOAT] = CAP( 1, 1, 7, 0),
+ [GEN6_FORMAT_B5G5R5X1_UNORM] = CAP( 1, 1, 7, 0),
+ [GEN6_FORMAT_B5G5R5X1_UNORM_SRGB] = CAP( 1, 1, 0, 0),
+ [GEN6_FORMAT_R8_UNORM] = CAP( 1, 1, 7, 0),
+ [GEN6_FORMAT_R8_SNORM] = CAP( 1, 6, 7, 0),
+ [GEN6_FORMAT_R8_SINT] = CAP( 1, 0, 7, 0),
+ [GEN6_FORMAT_R8_UINT] = CAP( 1, 0, 7, 0),
+ [GEN6_FORMAT_A8_UNORM] = CAP( 1, 1, 7, 0),
+ [GEN6_FORMAT_YCRCB_NORMAL] = CAP( 1, 0, 0, 6),
+ [GEN6_FORMAT_YCRCB_SWAPUVY] = CAP( 1, 0, 0, 6),
+ [GEN6_FORMAT_YCRCB_SWAPUV] = CAP( 1, 0, 0, 6),
+ [GEN6_FORMAT_YCRCB_SWAPY] = CAP( 1, 0, 0, 6),
+#undef CAP
+ };
+
+ ILO_DEV_ASSERT(dev, 6, 8);
+
+ if (format >= ARRAY_SIZE(caps))
+ return false;
+
+ switch (access) {
+ case ILO_STATE_SURFACE_ACCESS_DP_RENDER:
+ return (caps[format].rt_write &&
+ ilo_dev_gen(dev) >= caps[format].rt_write);
+ case ILO_STATE_SURFACE_ACCESS_DP_TYPED:
+ return (caps[format].typed_write &&
+ ilo_dev_gen(dev) >= caps[format].typed_write);
+ case ILO_STATE_SURFACE_ACCESS_DP_UNTYPED:
+ return (format == GEN6_FORMAT_RAW);
+ case ILO_STATE_SURFACE_ACCESS_DP_DATA:
+ /* ignored, but can it be raw? */
+ assert(format != GEN6_FORMAT_RAW);
+ return true;
+ default:
+ return false;
+ }
+}
+
+static bool
+surface_valid_svb_format(const struct ilo_dev *dev,
+ enum gen_surface_format format)
+{
+ ILO_DEV_ASSERT(dev, 6, 8);
+
+ /*
+ * This table is based on:
+ *
+ * - the Sandy Bridge PRM, volume 4 part 1, page 88-97
+ * - the Ivy Bridge PRM, volume 2 part 1, page 195
+ * - the Haswell PRM, volume 7, page 535
+ */
+ switch (format) {
+ case GEN6_FORMAT_R32G32B32A32_FLOAT:
+ case GEN6_FORMAT_R32G32B32A32_SINT:
+ case GEN6_FORMAT_R32G32B32A32_UINT:
+ case GEN6_FORMAT_R32G32B32_FLOAT:
+ case GEN6_FORMAT_R32G32B32_SINT:
+ case GEN6_FORMAT_R32G32B32_UINT:
+ case GEN6_FORMAT_R32G32_FLOAT:
+ case GEN6_FORMAT_R32G32_SINT:
+ case GEN6_FORMAT_R32G32_UINT:
+ case GEN6_FORMAT_R32_SINT:
+ case GEN6_FORMAT_R32_UINT:
+ case GEN6_FORMAT_R32_FLOAT:
+ return true;
+ default:
+ return false;
+ }
+}
+
+bool
+ilo_state_surface_valid_format(const struct ilo_dev *dev,
+ enum ilo_state_surface_access access,
+ enum gen_surface_format format)
+{
+ bool valid;
+
+ switch (access) {
+ case ILO_STATE_SURFACE_ACCESS_SAMPLER:
+ valid = surface_valid_sampler_format(dev, access, format);
+ break;
+ case ILO_STATE_SURFACE_ACCESS_DP_RENDER:
+ case ILO_STATE_SURFACE_ACCESS_DP_TYPED:
+ case ILO_STATE_SURFACE_ACCESS_DP_UNTYPED:
+ case ILO_STATE_SURFACE_ACCESS_DP_DATA:
+ valid = surface_valid_dp_format(dev, access, format);
+ break;
+ case ILO_STATE_SURFACE_ACCESS_DP_SVB:
+ valid = surface_valid_svb_format(dev, format);
+ break;
+ default:
+ valid = false;
+ break;
+ }
+
+ return valid;
+}
diff --git a/src/gallium/drivers/ilo/core/ilo_state_urb.c b/src/gallium/drivers/ilo/core/ilo_state_urb.c
new file mode 100644
index 00000000000..cbd150c71c9
--- /dev/null
+++ b/src/gallium/drivers/ilo/core/ilo_state_urb.c
@@ -0,0 +1,769 @@
+/*
+ * Mesa 3-D graphics library
+ *
+ * Copyright (C) 2012-2015 LunarG, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ * Chia-I Wu <[email protected]>
+ */
+
+#include "ilo_debug.h"
+#include "ilo_state_urb.h"
+
+struct urb_configuration {
+ uint8_t vs_pcb_alloc_kb;
+ uint8_t hs_pcb_alloc_kb;
+ uint8_t ds_pcb_alloc_kb;
+ uint8_t gs_pcb_alloc_kb;
+ uint8_t ps_pcb_alloc_kb;
+
+ uint8_t urb_offset_8kb;
+
+ uint8_t vs_urb_alloc_8kb;
+ uint8_t hs_urb_alloc_8kb;
+ uint8_t ds_urb_alloc_8kb;
+ uint8_t gs_urb_alloc_8kb;
+
+ uint8_t vs_entry_rows;
+ uint8_t hs_entry_rows;
+ uint8_t ds_entry_rows;
+ uint8_t gs_entry_rows;
+
+ int vs_entry_count;
+ int hs_entry_count;
+ int ds_entry_count;
+ int gs_entry_count;
+};
+
+static void
+urb_alloc_gen7_pcb(const struct ilo_dev *dev,
+ const struct ilo_state_urb_info *info,
+ struct urb_configuration *conf)
+{
+ /*
+ * From the Haswell PRM, volume 2b, page 940:
+ *
+ * "[0,16] (0KB - 16KB) Increments of 1KB DevHSW:GT1, DevHSW:GT2
+ * [0,32] (0KB - 32KB) Increments of 2KB DevHSW:GT3"
+ */
+ const uint8_t increment_kb =
+ (ilo_dev_gen(dev) >= ILO_GEN(8) ||
+ (ilo_dev_gen(dev) == ILO_GEN(7.5) && dev->gt == 3)) ? 2 : 1;
+
+ ILO_DEV_ASSERT(dev, 7, 8);
+
+ /*
+ * Keep the strategy simple as we do not know the workloads and how
+ * expensive it is to change the configuration frequently.
+ */
+ if (info->hs_const_data || info->ds_const_data) {
+ conf->vs_pcb_alloc_kb = increment_kb * 4;
+ conf->hs_pcb_alloc_kb = increment_kb * 3;
+ conf->ds_pcb_alloc_kb = increment_kb * 3;
+ conf->gs_pcb_alloc_kb = increment_kb * 3;
+ conf->ps_pcb_alloc_kb = increment_kb * 3;
+ } else if (info->gs_const_data) {
+ conf->vs_pcb_alloc_kb = increment_kb * 6;
+ conf->gs_pcb_alloc_kb = increment_kb * 5;
+ conf->ps_pcb_alloc_kb = increment_kb * 5;
+ } else {
+ conf->vs_pcb_alloc_kb = increment_kb * 8;
+ conf->ps_pcb_alloc_kb = increment_kb * 8;
+ }
+
+ conf->urb_offset_8kb = increment_kb * 16 / 8;
+}
+
+static void
+urb_alloc_gen6_urb(const struct ilo_dev *dev,
+ const struct ilo_state_urb_info *info,
+ struct urb_configuration *conf)
+{
+ /*
+ * From the Ivy Bridge PRM, volume 2 part 1, page 34:
+ *
+ * "(VS URB Starting Address) Offset from the start of the URB memory
+ * where VS starts its allocation, specified in multiples of 8 KB."
+ *
+ * Same for other stages.
+ */
+ const int space_avail_8kb = dev->urb_size / 8192 - conf->urb_offset_8kb;
+
+ /*
+ * From the Sandy Bridge PRM, volume 2 part 1, page 173:
+ *
+ * "Programming Note: If the GS stage is enabled, software must always
+ * allocate at least one GS URB Entry. This is true even if the GS
+ * thread never needs to output vertices to the urb, e.g., when only
+ * performing stream output. This is an artifact of the need to pass
+ * the GS thread an initial destination URB handle."
+ */
+ const bool force_gs_alloc =
+ (ilo_dev_gen(dev) == ILO_GEN(6) && info->gs_enable);
+
+ ILO_DEV_ASSERT(dev, 6, 8);
+
+ if (info->hs_entry_size || info->ds_entry_size) {
+ conf->vs_urb_alloc_8kb = space_avail_8kb / 4;
+ conf->hs_urb_alloc_8kb = space_avail_8kb / 4;
+ conf->ds_urb_alloc_8kb = space_avail_8kb / 4;
+ conf->gs_urb_alloc_8kb = space_avail_8kb / 4;
+
+ if (space_avail_8kb % 4) {
+ assert(space_avail_8kb % 2 == 0);
+ conf->vs_urb_alloc_8kb++;
+ conf->gs_urb_alloc_8kb++;
+ }
+ } else if (info->gs_entry_size || force_gs_alloc) {
+ assert(space_avail_8kb % 2 == 0);
+ conf->vs_urb_alloc_8kb = space_avail_8kb / 2;
+ conf->gs_urb_alloc_8kb = space_avail_8kb / 2;
+ } else {
+ conf->vs_urb_alloc_8kb = space_avail_8kb;
+ }
+}
+
+static bool
+urb_init_gen6_vs_entry(const struct ilo_dev *dev,
+ const struct ilo_state_urb_info *info,
+ struct urb_configuration *conf)
+{
+ /*
+ * From the Sandy Bridge PRM, volume 2 part 1, page 28:
+ *
+ * "(VS URB Entry Allocation Size)
+ * Range [0,4] = [1,5] 1024-bit URB rows"
+ *
+ * "(VS Number of URB Entries)
+ * Range [24,256] in multiples of 4
+ * [24, 128] in multiples of 4[DevSNBGT1]"
+ */
+ const int max_entry_count = (dev->gt == 2) ? 256 : 252;
+ const int row_size = 1024 / 8;
+ int row_count, entry_count;
+ int entry_size;
+
+ ILO_DEV_ASSERT(dev, 6, 6);
+
+ /* VE and VS share the same VUE for each vertex */
+ entry_size = info->vs_entry_size;
+ if (entry_size < info->ve_entry_size)
+ entry_size = info->ve_entry_size;
+
+ row_count = (entry_size + row_size - 1) / row_size;
+ if (row_count > 5)
+ return false;
+ else if (!row_count)
+ row_count++;
+
+ entry_count = conf->vs_urb_alloc_8kb * 8192 / (row_size * row_count);
+ if (entry_count > max_entry_count)
+ entry_count = max_entry_count;
+ entry_count &= ~3;
+ assert(entry_count >= 24);
+
+ conf->vs_entry_rows = row_count;
+ conf->vs_entry_count = entry_count;
+
+ return true;
+}
+
+static bool
+urb_init_gen6_gs_entry(const struct ilo_dev *dev,
+ const struct ilo_state_urb_info *info,
+ struct urb_configuration *conf)
+{
+ /*
+ * From the Sandy Bridge PRM, volume 2 part 1, page 29:
+ *
+ * "(GS Number of URB Entries)
+ * Range [0,256] in multiples of 4
+ * [0, 254] in multiples of 4[DevSNBGT1]"
+ *
+ * "(GS URB Entry Allocation Size)
+ * Range [0,4] = [1,5] 1024-bit URB rows"
+ */
+ const int max_entry_count = (dev->gt == 2) ? 256 : 252;
+ const int row_size = 1024 / 8;
+ int row_count, entry_count;
+
+ ILO_DEV_ASSERT(dev, 6, 6);
+
+ row_count = (info->gs_entry_size + row_size - 1) / row_size;
+ if (row_count > 5)
+ return false;
+ else if (!row_count)
+ row_count++;
+
+ entry_count = conf->gs_urb_alloc_8kb * 8192 / (row_size * row_count);
+ if (entry_count > max_entry_count)
+ entry_count = max_entry_count;
+ entry_count &= ~3;
+
+ conf->gs_entry_rows = row_count;
+ conf->gs_entry_count = entry_count;
+
+ return true;
+}
+
+static bool
+urb_init_gen7_vs_entry(const struct ilo_dev *dev,
+ const struct ilo_state_urb_info *info,
+ struct urb_configuration *conf)
+{
+ /*
+ * From the Ivy Bridge PRM, volume 2 part 1, page 34-35:
+ *
+ * "VS URB Entry Allocation Size equal to 4(5 512-bit URB rows) may
+ * cause performance to decrease due to banking in the URB. Element
+ * sizes of 16 to 20 should be programmed with six 512-bit URB rows."
+ *
+ * "(VS URB Entry Allocation Size)
+ * Format: U9-1 count of 512-bit units"
+ *
+ * "(VS Number of URB Entries)
+ * [32,704]
+ * [32,512]
+ *
+ * Programming Restriction: VS Number of URB Entries must be divisible
+ * by 8 if the VS URB Entry Allocation Size is less than 9 512-bit URB
+ * entries."2:0" = reserved "000b""
+ *
+ * From the Haswell PRM, volume 2b, page 847:
+ *
+ * "(VS Number of URB Entries)
+ * [64,1664] DevHSW:GT3
+ * [64,1664] DevHSW:GT2
+ * [32,640] DevHSW:GT1"
+ */
+ const int row_size = 512 / 8;
+ int row_count, entry_count;
+ int entry_size;
+ int max_entry_count, min_entry_count;
+
+ ILO_DEV_ASSERT(dev, 7, 8);
+
+ /*
+ * From the Ivy Bridge PRM, volume 2 part 1, page 35:
+ *
+ * "Programming Restriction: As the VS URB entry serves as both the
+ * per-vertex input and output of the VS shader, the VS URB Allocation
+ * Size must be sized to the maximum of the vertex input and output
+ * structures."
+ *
+ * From the Ivy Bridge PRM, volume 2 part 1, page 42:
+ *
+ * "If the VS function is enabled, the VF-written VUEs are not required
+ * to have Vertex Headers, as the VS-incoming vertices are guaranteed
+ * to be consumed by the VS (i.e., the VS thread is responsible for
+ * overwriting the input vertex data)."
+ *
+ * VE and VS share the same VUE for each vertex.
+ */
+ entry_size = info->vs_entry_size;
+ if (entry_size < info->ve_entry_size)
+ entry_size = info->ve_entry_size;
+
+ row_count = (entry_size + row_size - 1) / row_size;
+ if (row_count == 5 || !row_count)
+ row_count++;
+
+ entry_count = conf->vs_urb_alloc_8kb * 8192 / (row_size * row_count);
+ if (row_count < 9)
+ entry_count &= ~7;
+
+ switch (ilo_dev_gen(dev)) {
+ case ILO_GEN(8):
+ case ILO_GEN(7.5):
+ max_entry_count = (dev->gt >= 2) ? 1664 : 640;
+ min_entry_count = (dev->gt >= 2) ? 64 : 32;
+ break;
+ case ILO_GEN(7):
+ max_entry_count = (dev->gt == 2) ? 704 : 512;
+ min_entry_count = 32;
+ break;
+ default:
+ assert(!"unexpected gen");
+ return false;
+ break;
+ }
+
+ if (entry_count > max_entry_count)
+ entry_count = max_entry_count;
+ else if (entry_count < min_entry_count)
+ return false;
+
+ conf->vs_entry_rows = row_count;
+ conf->vs_entry_count = entry_count;
+
+ return true;
+}
+
+static bool
+urb_init_gen7_hs_entry(const struct ilo_dev *dev,
+ const struct ilo_state_urb_info *info,
+ struct urb_configuration *conf)
+{
+ /*
+ * From the Ivy Bridge PRM, volume 2 part 1, page 37:
+ *
+ * "HS Number of URB Entries must be divisible by 8 if the HS URB Entry
+ * Allocation Size is less than 9 512-bit URB
+ * entries."2:0" = reserved "000"
+ *
+ * [0,64]
+ * [0,32]"
+ *
+ * From the Haswell PRM, volume 2b, page 849:
+ *
+ * "(HS Number of URB Entries)
+ * [0,128] DevHSW:GT2
+ * [0,64] DevHSW:GT1"
+ */
+ const int row_size = 512 / 8;
+ int row_count, entry_count;
+ int max_entry_count;
+
+ ILO_DEV_ASSERT(dev, 7, 8);
+
+ row_count = (info->hs_entry_size + row_size - 1) / row_size;
+ if (!row_count)
+ row_count++;
+
+ entry_count = conf->hs_urb_alloc_8kb * 8192 / (row_size * row_count);
+ if (row_count < 9)
+ entry_count &= ~7;
+
+ switch (ilo_dev_gen(dev)) {
+ case ILO_GEN(8):
+ case ILO_GEN(7.5):
+ max_entry_count = (dev->gt >= 2) ? 128 : 64;
+ break;
+ case ILO_GEN(7):
+ max_entry_count = (dev->gt == 2) ? 64 : 32;
+ break;
+ default:
+ assert(!"unexpected gen");
+ return false;
+ break;
+ }
+
+ if (entry_count > max_entry_count)
+ entry_count = max_entry_count;
+ else if (info->hs_entry_size && !entry_count)
+ return false;
+
+ conf->hs_entry_rows = row_count;
+ conf->hs_entry_count = entry_count;
+
+ return true;
+}
+
+static bool
+urb_init_gen7_ds_entry(const struct ilo_dev *dev,
+ const struct ilo_state_urb_info *info,
+ struct urb_configuration *conf)
+{
+ /*
+ * From the Ivy Bridge PRM, volume 2 part 1, page 38:
+ *
+ * "(DS URB Entry Allocation Size)
+ * [0,9]"
+ *
+ * "(DS Number of URB Entries) If Domain Shader Thread Dispatch is
+ * Enabled then the minimum number handles that must be allocated is
+ * 138 URB entries.
+ * "2:0" = reserved "000"
+ *
+ * [0,448]
+ * [0,288]
+ *
+ * DS Number of URB Entries must be divisible by 8 if the DS URB Entry
+ * Allocation Size is less than 9 512-bit URB entries.If Domain Shader
+ * Thread Dispatch is Enabled then the minimum number of handles that
+ * must be allocated is 10 URB entries."
+ *
+ * From the Haswell PRM, volume 2b, page 851:
+ *
+ * "(DS Number of URB Entries)
+ * [0,960] DevHSW:GT2
+ * [0,384] DevHSW:GT1"
+ */
+ const int row_size = 512 / 8;
+ int row_count, entry_count;
+ int max_entry_count;
+
+ ILO_DEV_ASSERT(dev, 7, 8);
+
+ row_count = (info->ds_entry_size + row_size - 1) / row_size;
+ if (row_count > 10)
+ return false;
+ else if (!row_count)
+ row_count++;
+
+ entry_count = conf->ds_urb_alloc_8kb * 8192 / (row_size * row_count);
+ if (row_count < 9)
+ entry_count &= ~7;
+
+ switch (ilo_dev_gen(dev)) {
+ case ILO_GEN(8):
+ case ILO_GEN(7.5):
+ max_entry_count = (dev->gt >= 2) ? 960 : 384;
+ break;
+ case ILO_GEN(7):
+ max_entry_count = (dev->gt == 2) ? 448 : 288;
+ break;
+ default:
+ assert(!"unexpected gen");
+ return false;
+ break;
+ }
+
+ if (entry_count > max_entry_count)
+ entry_count = max_entry_count;
+ else if (info->ds_entry_size && entry_count < 10)
+ return false;
+
+ conf->ds_entry_rows = row_count;
+ conf->ds_entry_count = entry_count;
+
+ return true;
+}
+
+static bool
+urb_init_gen7_gs_entry(const struct ilo_dev *dev,
+ const struct ilo_state_urb_info *info,
+ struct urb_configuration *conf)
+{
+ /*
+ * From the Ivy Bridge PRM, volume 2 part 1, page 40:
+ *
+ * "(GS Number of URB Entries) GS Number of URB Entries must be
+ * divisible by 8 if the GS URB Entry Allocation Size is less than 9
+ * 512-bit URB entries.
+ * "2:0" = reserved "000"
+ *
+ * [0,320]
+ * [0,192]"
+ *
+ * From the Ivy Bridge PRM, volume 2 part 1, page 171:
+ *
+ * "(DUAL_INSTANCE and DUAL_OBJECT) The GS must be allocated at least
+ * two URB handles or behavior is UNDEFINED."
+ *
+ * From the Haswell PRM, volume 2b, page 853:
+ *
+ * "(GS Number of URB Entries)
+ * [0,640] DevHSW:GT2
+ * [0,256] DevHSW:GT1
+ *
+ * Only if GS is disabled can this field be programmed to 0. If GS is
+ * enabled this field shall be programmed to a value greater than 0.
+ * For GS Dispatch Mode "Single", this field shall be programmed to a
+ * value greater than or equal to 1. For other GS Dispatch Modes,
+ * refer to the definition of Dispatch Mode (3DSTATE_GS) for minimum
+ * values of this field."
+ */
+ const int row_size = 512 / 8;
+ int row_count, entry_count;
+ int max_entry_count;
+
+ ILO_DEV_ASSERT(dev, 7, 8);
+
+ row_count = (info->gs_entry_size + row_size - 1) / row_size;
+ if (!row_count)
+ row_count++;
+
+ entry_count = conf->gs_urb_alloc_8kb * 8192 / (row_size * row_count);
+ if (row_count < 9)
+ entry_count &= ~7;
+
+ switch (ilo_dev_gen(dev)) {
+ case ILO_GEN(8):
+ case ILO_GEN(7.5):
+ max_entry_count = (dev->gt >= 2) ? 640 : 256;
+ break;
+ case ILO_GEN(7):
+ max_entry_count = (dev->gt == 2) ? 320 : 192;
+ break;
+ default:
+ assert(!"unexpected gen");
+ return false;
+ break;
+ }
+
+ if (entry_count > max_entry_count)
+ entry_count = max_entry_count;
+ else if (info->gs_entry_size && entry_count < 2)
+ return false;
+
+ conf->gs_entry_rows = row_count;
+ conf->gs_entry_count = entry_count;
+
+ return true;
+}
+
+static bool
+urb_get_gen6_configuration(const struct ilo_dev *dev,
+ const struct ilo_state_urb_info *info,
+ struct urb_configuration *conf)
+{
+ ILO_DEV_ASSERT(dev, 6, 8);
+
+ memset(conf, 0, sizeof(*conf));
+
+ if (ilo_dev_gen(dev) >= ILO_GEN(7))
+ urb_alloc_gen7_pcb(dev, info, conf);
+
+ urb_alloc_gen6_urb(dev, info, conf);
+
+ if (ilo_dev_gen(dev) >= ILO_GEN(7)) {
+ if (!urb_init_gen7_vs_entry(dev, info, conf) ||
+ !urb_init_gen7_hs_entry(dev, info, conf) ||
+ !urb_init_gen7_ds_entry(dev, info, conf) ||
+ !urb_init_gen7_gs_entry(dev, info, conf))
+ return false;
+ } else {
+ if (!urb_init_gen6_vs_entry(dev, info, conf) ||
+ !urb_init_gen6_gs_entry(dev, info, conf))
+ return false;
+ }
+
+ return true;
+}
+
+static bool
+urb_set_gen7_3dstate_push_constant_alloc(struct ilo_state_urb *urb,
+ const struct ilo_dev *dev,
+ const struct ilo_state_urb_info *info,
+ const struct urb_configuration *conf)
+{
+ uint32_t dw1[5];
+ uint8_t sizes_kb[5], offset_kb;
+ int i;
+
+ ILO_DEV_ASSERT(dev, 7, 8);
+
+ sizes_kb[0] = conf->vs_pcb_alloc_kb;
+ sizes_kb[1] = conf->hs_pcb_alloc_kb;
+ sizes_kb[2] = conf->ds_pcb_alloc_kb;
+ sizes_kb[3] = conf->gs_pcb_alloc_kb;
+ sizes_kb[4] = conf->ps_pcb_alloc_kb;
+ offset_kb = 0;
+
+ for (i = 0; i < 5; i++) {
+ /* careful for the valid range of offsets */
+ if (sizes_kb[i]) {
+ dw1[i] = offset_kb << GEN7_PCB_ALLOC_DW1_OFFSET__SHIFT |
+ sizes_kb[i] << GEN7_PCB_ALLOC_DW1_SIZE__SHIFT;
+ offset_kb += sizes_kb[i];
+ } else {
+ dw1[i] = 0;
+ }
+ }
+
+ STATIC_ASSERT(ARRAY_SIZE(urb->pcb) >= 5);
+ memcpy(urb->pcb, dw1, sizeof(dw1));
+
+ return true;
+}
+
+static bool
+urb_set_gen6_3DSTATE_URB(struct ilo_state_urb *urb,
+ const struct ilo_dev *dev,
+ const struct ilo_state_urb_info *info,
+ const struct urb_configuration *conf)
+{
+ uint32_t dw1, dw2;
+
+ ILO_DEV_ASSERT(dev, 6, 6);
+
+ assert(conf->vs_entry_rows && conf->gs_entry_rows);
+
+ dw1 = (conf->vs_entry_rows - 1) << GEN6_URB_DW1_VS_ENTRY_SIZE__SHIFT |
+ conf->vs_entry_count << GEN6_URB_DW1_VS_ENTRY_COUNT__SHIFT;
+ dw2 = conf->gs_entry_count << GEN6_URB_DW2_GS_ENTRY_COUNT__SHIFT |
+ (conf->gs_entry_rows - 1) << GEN6_URB_DW2_GS_ENTRY_SIZE__SHIFT;
+
+ STATIC_ASSERT(ARRAY_SIZE(urb->urb) >= 2);
+ urb->urb[0] = dw1;
+ urb->urb[1] = dw2;
+
+ return true;
+}
+
+static bool
+urb_set_gen7_3dstate_urb(struct ilo_state_urb *urb,
+ const struct ilo_dev *dev,
+ const struct ilo_state_urb_info *info,
+ const struct urb_configuration *conf)
+{
+ uint32_t dw1[4];
+ struct {
+ uint8_t alloc_8kb;
+ uint8_t entry_rows;
+ int entry_count;
+ } stages[4];
+ uint8_t offset_8kb;
+ int i;
+
+ ILO_DEV_ASSERT(dev, 7, 8);
+
+ stages[0].alloc_8kb = conf->vs_urb_alloc_8kb;
+ stages[1].alloc_8kb = conf->hs_urb_alloc_8kb;
+ stages[2].alloc_8kb = conf->ds_urb_alloc_8kb;
+ stages[3].alloc_8kb = conf->gs_urb_alloc_8kb;
+
+ stages[0].entry_rows = conf->vs_entry_rows;
+ stages[1].entry_rows = conf->hs_entry_rows;
+ stages[2].entry_rows = conf->ds_entry_rows;
+ stages[3].entry_rows = conf->gs_entry_rows;
+
+ stages[0].entry_count = conf->vs_entry_count;
+ stages[1].entry_count = conf->hs_entry_count;
+ stages[2].entry_count = conf->ds_entry_count;
+ stages[3].entry_count = conf->gs_entry_count;
+
+ offset_8kb = conf->urb_offset_8kb;
+
+ for (i = 0; i < 4; i++) {
+ /* careful for the valid range of offsets */
+ if (stages[i].alloc_8kb) {
+ assert(stages[i].entry_rows);
+ dw1[i] =
+ offset_8kb << GEN7_URB_DW1_OFFSET__SHIFT |
+ (stages[i].entry_rows - 1) << GEN7_URB_DW1_ENTRY_SIZE__SHIFT |
+ stages[i].entry_count << GEN7_URB_DW1_ENTRY_COUNT__SHIFT;
+ offset_8kb += stages[i].alloc_8kb;
+ } else {
+ dw1[i] = 0;
+ }
+ }
+
+ STATIC_ASSERT(ARRAY_SIZE(urb->urb) >= 4);
+ memcpy(urb->urb, dw1, sizeof(dw1));
+
+ return true;
+}
+
+bool
+ilo_state_urb_init(struct ilo_state_urb *urb,
+ const struct ilo_dev *dev,
+ const struct ilo_state_urb_info *info)
+{
+ assert(ilo_is_zeroed(urb, sizeof(*urb)));
+ return ilo_state_urb_set_info(urb, dev, info);
+}
+
+bool
+ilo_state_urb_init_for_rectlist(struct ilo_state_urb *urb,
+ const struct ilo_dev *dev,
+ uint8_t vf_attr_count)
+{
+ struct ilo_state_urb_info info;
+
+ memset(&info, 0, sizeof(info));
+ info.ve_entry_size = sizeof(uint32_t) * 4 * vf_attr_count;
+
+ return ilo_state_urb_init(urb, dev, &info);
+}
+
+bool
+ilo_state_urb_set_info(struct ilo_state_urb *urb,
+ const struct ilo_dev *dev,
+ const struct ilo_state_urb_info *info)
+{
+ struct urb_configuration conf;
+ bool ret = true;
+
+ ret &= urb_get_gen6_configuration(dev, info, &conf);
+ if (ilo_dev_gen(dev) >= ILO_GEN(7)) {
+ ret &= urb_set_gen7_3dstate_push_constant_alloc(urb, dev, info, &conf);
+ ret &= urb_set_gen7_3dstate_urb(urb, dev, info, &conf);
+ } else {
+ ret &= urb_set_gen6_3DSTATE_URB(urb, dev, info, &conf);
+ }
+
+ assert(ret);
+
+ return ret;
+}
+
+void
+ilo_state_urb_full_delta(const struct ilo_state_urb *urb,
+ const struct ilo_dev *dev,
+ struct ilo_state_urb_delta *delta)
+{
+ if (ilo_dev_gen(dev) >= ILO_GEN(7)) {
+ delta->dirty = ILO_STATE_URB_3DSTATE_PUSH_CONSTANT_ALLOC_VS |
+ ILO_STATE_URB_3DSTATE_PUSH_CONSTANT_ALLOC_HS |
+ ILO_STATE_URB_3DSTATE_PUSH_CONSTANT_ALLOC_DS |
+ ILO_STATE_URB_3DSTATE_PUSH_CONSTANT_ALLOC_GS |
+ ILO_STATE_URB_3DSTATE_PUSH_CONSTANT_ALLOC_PS |
+ ILO_STATE_URB_3DSTATE_URB_VS |
+ ILO_STATE_URB_3DSTATE_URB_HS |
+ ILO_STATE_URB_3DSTATE_URB_DS |
+ ILO_STATE_URB_3DSTATE_URB_GS;
+ } else {
+ delta->dirty = ILO_STATE_URB_3DSTATE_URB_VS |
+ ILO_STATE_URB_3DSTATE_URB_GS;
+ }
+}
+
+void
+ilo_state_urb_get_delta(const struct ilo_state_urb *urb,
+ const struct ilo_dev *dev,
+ const struct ilo_state_urb *old,
+ struct ilo_state_urb_delta *delta)
+{
+ delta->dirty = 0;
+
+ if (ilo_dev_gen(dev) >= ILO_GEN(7)) {
+ if (memcmp(urb->pcb, old->pcb, sizeof(urb->pcb))) {
+ delta->dirty |= ILO_STATE_URB_3DSTATE_PUSH_CONSTANT_ALLOC_VS |
+ ILO_STATE_URB_3DSTATE_PUSH_CONSTANT_ALLOC_HS |
+ ILO_STATE_URB_3DSTATE_PUSH_CONSTANT_ALLOC_DS |
+ ILO_STATE_URB_3DSTATE_PUSH_CONSTANT_ALLOC_GS |
+ ILO_STATE_URB_3DSTATE_PUSH_CONSTANT_ALLOC_PS;
+ }
+
+ /*
+ * From the Ivy Bridge PRM, volume 2 part 1, page 34:
+ *
+ * "3DSTATE_URB_HS, 3DSTATE_URB_DS, and 3DSTATE_URB_GS must also be
+ * programmed in order for the programming of this state
+ * (3DSTATE_URB_VS) to be valid."
+ *
+ * The same is true for the other three states.
+ */
+ if (memcmp(urb->urb, old->urb, sizeof(urb->urb))) {
+ delta->dirty |= ILO_STATE_URB_3DSTATE_URB_VS |
+ ILO_STATE_URB_3DSTATE_URB_HS |
+ ILO_STATE_URB_3DSTATE_URB_DS |
+ ILO_STATE_URB_3DSTATE_URB_GS;
+ }
+ } else {
+ if (memcmp(urb->urb, old->urb, sizeof(uint32_t) * 2)) {
+ delta->dirty |= ILO_STATE_URB_3DSTATE_URB_VS |
+ ILO_STATE_URB_3DSTATE_URB_GS;
+ }
+ }
+}
diff --git a/src/gallium/drivers/ilo/core/ilo_state_urb.h b/src/gallium/drivers/ilo/core/ilo_state_urb.h
new file mode 100644
index 00000000000..9522b3bd681
--- /dev/null
+++ b/src/gallium/drivers/ilo/core/ilo_state_urb.h
@@ -0,0 +1,103 @@
+/*
+ * Mesa 3-D graphics library
+ *
+ * Copyright (C) 2015 LunarG, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ * Chia-I Wu <[email protected]>
+ */
+
+#ifndef ILO_STATE_URB_H
+#define ILO_STATE_URB_H
+
+#include "genhw/genhw.h"
+
+#include "ilo_core.h"
+#include "ilo_dev.h"
+
+enum ilo_state_urb_dirty_bits {
+ ILO_STATE_URB_3DSTATE_PUSH_CONSTANT_ALLOC_VS = (1 << 0),
+ ILO_STATE_URB_3DSTATE_PUSH_CONSTANT_ALLOC_HS = (1 << 1),
+ ILO_STATE_URB_3DSTATE_PUSH_CONSTANT_ALLOC_DS = (1 << 2),
+ ILO_STATE_URB_3DSTATE_PUSH_CONSTANT_ALLOC_GS = (1 << 3),
+ ILO_STATE_URB_3DSTATE_PUSH_CONSTANT_ALLOC_PS = (1 << 4),
+ ILO_STATE_URB_3DSTATE_URB_VS = (1 << 5),
+ ILO_STATE_URB_3DSTATE_URB_HS = (1 << 6),
+ ILO_STATE_URB_3DSTATE_URB_DS = (1 << 7),
+ ILO_STATE_URB_3DSTATE_URB_GS = (1 << 8),
+};
+
+/**
+ * URB entry allocation sizes and sizes of constant data extracted from PCBs
+ * to threads.
+ */
+struct ilo_state_urb_info {
+ bool gs_enable;
+
+ bool vs_const_data;
+ bool hs_const_data;
+ bool ds_const_data;
+ bool gs_const_data;
+ bool ps_const_data;
+
+ uint16_t ve_entry_size;
+ uint16_t vs_entry_size;
+ uint16_t hs_entry_size;
+ uint16_t ds_entry_size;
+ uint16_t gs_entry_size;
+};
+
+struct ilo_state_urb {
+ uint32_t pcb[5];
+ uint32_t urb[4];
+};
+
+struct ilo_state_urb_delta {
+ uint32_t dirty;
+};
+
+bool
+ilo_state_urb_init(struct ilo_state_urb *urb,
+ const struct ilo_dev *dev,
+ const struct ilo_state_urb_info *info);
+
+bool
+ilo_state_urb_init_for_rectlist(struct ilo_state_urb *urb,
+ const struct ilo_dev *dev,
+ uint8_t vf_attr_count);
+
+bool
+ilo_state_urb_set_info(struct ilo_state_urb *urb,
+ const struct ilo_dev *dev,
+ const struct ilo_state_urb_info *info);
+
+void
+ilo_state_urb_full_delta(const struct ilo_state_urb *urb,
+ const struct ilo_dev *dev,
+ struct ilo_state_urb_delta *delta);
+
+void
+ilo_state_urb_get_delta(const struct ilo_state_urb *urb,
+ const struct ilo_dev *dev,
+ const struct ilo_state_urb *old,
+ struct ilo_state_urb_delta *delta);
+
+#endif /* ILO_STATE_URB_H */
diff --git a/src/gallium/drivers/ilo/core/ilo_state_vf.c b/src/gallium/drivers/ilo/core/ilo_state_vf.c
new file mode 100644
index 00000000000..ddc75428ed7
--- /dev/null
+++ b/src/gallium/drivers/ilo/core/ilo_state_vf.c
@@ -0,0 +1,984 @@
+/*
+ * Mesa 3-D graphics library
+ *
+ * Copyright (C) 2012-2015 LunarG, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ * Chia-I Wu <[email protected]>
+ */
+
+#include "ilo_debug.h"
+#include "ilo_buffer.h"
+#include "ilo_state_vf.h"
+
+static bool
+vf_validate_gen6_elements(const struct ilo_dev *dev,
+ const struct ilo_state_vf_info *info)
+{
+ /*
+ * From the Sandy Bridge PRM, volume 2 part 1, page 95:
+ *
+ * "(Source Element Offset (in bytes))
+ * Format: U11
+ * Range [0,2047"
+ *
+ * From the Haswell PRM, volume 2d, page 415:
+ *
+ * "(Source Element Offset)
+ * Format: U12 byte offset
+ * ...
+ * [0,4095]"
+ *
+ * From the Broadwell PRM, volume 2d, page 469:
+ *
+ * "(Source Element Offset)
+ * Format: U12 byte offset
+ * ...
+ * [0,2047]"
+ */
+ const uint16_t max_vertex_offset =
+ (ilo_dev_gen(dev) == ILO_GEN(7.5)) ? 4096 : 2048;
+ uint8_t i;
+
+ ILO_DEV_ASSERT(dev, 6, 8);
+
+ assert(info->element_count <= ILO_STATE_VF_MAX_ELEMENT_COUNT);
+
+ for (i = 0; i < info->element_count; i++) {
+ const struct ilo_state_vf_element_info *elem = &info->elements[i];
+
+ assert(elem->buffer < ILO_STATE_VF_MAX_BUFFER_COUNT);
+ assert(elem->vertex_offset < max_vertex_offset);
+ assert(ilo_state_vf_valid_element_format(dev, elem->format));
+ }
+
+ return true;
+}
+
+static uint32_t
+get_gen6_component_controls(const struct ilo_dev *dev,
+ enum gen_vf_component comp_x,
+ enum gen_vf_component comp_y,
+ enum gen_vf_component comp_z,
+ enum gen_vf_component comp_w)
+{
+ ILO_DEV_ASSERT(dev, 6, 8);
+
+ return comp_x << GEN6_VE_DW1_COMP0__SHIFT |
+ comp_y << GEN6_VE_DW1_COMP1__SHIFT |
+ comp_z << GEN6_VE_DW1_COMP2__SHIFT |
+ comp_w << GEN6_VE_DW1_COMP3__SHIFT;
+}
+
+static bool
+get_gen6_edge_flag_format(const struct ilo_dev *dev,
+ const struct ilo_state_vf_element_info *elem,
+ enum gen_surface_format *format)
+{
+ ILO_DEV_ASSERT(dev, 6, 8);
+
+ /*
+ * From the Sandy Bridge PRM, volume 2 part 1, page 94:
+ *
+ * "The Source Element Format must be set to the UINT format."
+ *
+ * From the Haswell PRM, volume 2d, page 413:
+ *
+ * "The SourceElementFormat needs to be a single-component format with
+ * an element which has edge flag enabled."
+ */
+ if (elem->component_count != 1)
+ return false;
+
+ /* pick the format we like */
+ switch (elem->format_size) {
+ case 1:
+ *format = GEN6_FORMAT_R8_UINT;
+ break;
+ case 2:
+ *format = GEN6_FORMAT_R16_UINT;
+ break;
+ case 4:
+ *format = GEN6_FORMAT_R32_UINT;
+ break;
+ default:
+ return false;
+ break;
+ }
+
+ return true;
+}
+
+static bool
+vf_set_gen6_3DSTATE_VERTEX_ELEMENTS(struct ilo_state_vf *vf,
+ const struct ilo_dev *dev,
+ const struct ilo_state_vf_info *info)
+{
+ enum gen_surface_format edge_flag_format;
+ uint32_t dw0, dw1;
+ uint8_t i;
+
+ ILO_DEV_ASSERT(dev, 6, 8);
+
+ if (!vf_validate_gen6_elements(dev, info))
+ return false;
+
+ for (i = 0; i < info->element_count; i++) {
+ const struct ilo_state_vf_element_info *elem = &info->elements[i];
+ enum gen_vf_component components[4] = {
+ GEN6_VFCOMP_STORE_0,
+ GEN6_VFCOMP_STORE_0,
+ GEN6_VFCOMP_STORE_0,
+ (elem->is_integer) ? GEN6_VFCOMP_STORE_1_INT :
+ GEN6_VFCOMP_STORE_1_FP,
+ };
+
+ switch (elem->component_count) {
+ case 4: components[3] = GEN6_VFCOMP_STORE_SRC; /* fall through */
+ case 3: components[2] = GEN6_VFCOMP_STORE_SRC; /* fall through */
+ case 2: components[1] = GEN6_VFCOMP_STORE_SRC; /* fall through */
+ case 1: components[0] = GEN6_VFCOMP_STORE_SRC; break;
+ default:
+ assert(!"unexpected component count");
+ break;
+ }
+
+ dw0 = elem->buffer << GEN6_VE_DW0_VB_INDEX__SHIFT |
+ GEN6_VE_DW0_VALID |
+ elem->format << GEN6_VE_DW0_FORMAT__SHIFT |
+ elem->vertex_offset << GEN6_VE_DW0_VB_OFFSET__SHIFT;
+ dw1 = get_gen6_component_controls(dev,
+ components[0], components[1],
+ components[2], components[3]);
+
+ STATIC_ASSERT(ARRAY_SIZE(vf->user_ve[i]) >= 2);
+ vf->user_ve[i][0] = dw0;
+ vf->user_ve[i][1] = dw1;
+ }
+
+ vf->user_ve_count = i;
+
+ vf->edge_flag_supported = (i && get_gen6_edge_flag_format(dev,
+ &info->elements[i - 1], &edge_flag_format));
+ if (vf->edge_flag_supported) {
+ const struct ilo_state_vf_element_info *elem = &info->elements[i - 1];
+
+ /* without edge flag enable */
+ vf->last_user_ve[0][0] = dw0;
+ vf->last_user_ve[0][1] = dw1;
+
+ /*
+ * From the Sandy Bridge PRM, volume 2 part 1, page 94:
+ *
+ * "This bit (Edge Flag Enable) must only be ENABLED on the last
+ * valid VERTEX_ELEMENT structure.
+ *
+ * When set, Component 0 Control must be set to
+ * VFCOMP_STORE_SRC, and Component 1-3 Control must be set to
+ * VFCOMP_NOSTORE."
+ */
+ dw0 = elem->buffer << GEN6_VE_DW0_VB_INDEX__SHIFT |
+ GEN6_VE_DW0_VALID |
+ edge_flag_format << GEN6_VE_DW0_FORMAT__SHIFT |
+ GEN6_VE_DW0_EDGE_FLAG_ENABLE |
+ elem->vertex_offset << GEN6_VE_DW0_VB_OFFSET__SHIFT;
+ dw1 = get_gen6_component_controls(dev, GEN6_VFCOMP_STORE_SRC,
+ GEN6_VFCOMP_NOSTORE, GEN6_VFCOMP_NOSTORE, GEN6_VFCOMP_NOSTORE);
+
+ /* with edge flag enable */
+ vf->last_user_ve[1][0] = dw0;
+ vf->last_user_ve[1][1] = dw1;
+ }
+
+ return true;
+}
+
+static bool
+vf_set_gen6_vertex_buffer_state(struct ilo_state_vf *vf,
+ const struct ilo_dev *dev,
+ const struct ilo_state_vf_info *info)
+{
+ uint8_t i;
+
+ ILO_DEV_ASSERT(dev, 6, 7.5);
+
+ memset(vf->vb_to_first_elem, -1, sizeof(vf->vb_to_first_elem));
+
+ for (i = 0; i < info->element_count; i++) {
+ const struct ilo_state_vf_element_info *elem = &info->elements[i];
+
+ STATIC_ASSERT(ARRAY_SIZE(vf->user_instancing[i]) >= 2);
+ /* instancing enable only */
+ vf->user_instancing[i][0] = (elem->instancing_enable) ?
+ GEN6_VB_DW0_ACCESS_INSTANCEDATA :
+ GEN6_VB_DW0_ACCESS_VERTEXDATA;
+ vf->user_instancing[i][1] = elem->instancing_step_rate;
+
+ /*
+ * Instancing is per VB, not per VE, before Gen8. Set up a VB-to-VE
+ * mapping as well.
+ */
+ if (vf->vb_to_first_elem[elem->buffer] < 0) {
+ vf->vb_to_first_elem[elem->buffer] = i;
+ } else {
+ const struct ilo_state_vf_element_info *first =
+ &info->elements[vf->vb_to_first_elem[elem->buffer]];
+
+ assert(elem->instancing_enable == first->instancing_enable &&
+ elem->instancing_step_rate == first->instancing_step_rate);
+ }
+ }
+
+ return true;
+}
+
+static bool
+vf_set_gen8_3DSTATE_VF_INSTANCING(struct ilo_state_vf *vf,
+ const struct ilo_dev *dev,
+ const struct ilo_state_vf_info *info)
+{
+ uint8_t i;
+
+ ILO_DEV_ASSERT(dev, 8, 8);
+
+ for (i = 0; i < info->element_count; i++) {
+ const struct ilo_state_vf_element_info *elem = &info->elements[i];
+
+ STATIC_ASSERT(ARRAY_SIZE(vf->user_instancing[i]) >= 2);
+ vf->user_instancing[i][0] = (elem->instancing_enable) ?
+ GEN8_INSTANCING_DW1_ENABLE : 0;
+ vf->user_instancing[i][1] = elem->instancing_step_rate;
+ }
+
+ return true;
+}
+
+static uint32_t
+get_gen6_component_zeros(const struct ilo_dev *dev)
+{
+ ILO_DEV_ASSERT(dev, 6, 8);
+
+ return get_gen6_component_controls(dev,
+ GEN6_VFCOMP_STORE_0,
+ GEN6_VFCOMP_STORE_0,
+ GEN6_VFCOMP_STORE_0,
+ GEN6_VFCOMP_STORE_0);
+}
+
+static uint32_t
+get_gen6_component_ids(const struct ilo_dev *dev,
+ bool vertexid, bool instanceid)
+{
+ ILO_DEV_ASSERT(dev, 6, 7.5);
+
+ return get_gen6_component_controls(dev,
+ (vertexid) ? GEN6_VFCOMP_STORE_VID : GEN6_VFCOMP_STORE_0,
+ (instanceid) ? GEN6_VFCOMP_STORE_IID : GEN6_VFCOMP_STORE_0,
+ GEN6_VFCOMP_STORE_0,
+ GEN6_VFCOMP_STORE_0);
+}
+
+static bool
+vf_params_set_gen6_internal_ve(struct ilo_state_vf *vf,
+ const struct ilo_dev *dev,
+ const struct ilo_state_vf_params_info *params,
+ uint8_t user_ve_count)
+{
+ const bool prepend_ids =
+ (params->prepend_vertexid || params->prepend_instanceid);
+ uint8_t internal_ve_count = 0, i;
+ uint32_t dw1[2];
+
+
+ ILO_DEV_ASSERT(dev, 6, 8);
+
+ /*
+ * From the Sandy Bridge PRM, volume 2 part 1, page 92:
+ *
+ * "- At least one VERTEX_ELEMENT_STATE structure must be included.
+ *
+ * - Inclusion of partial VERTEX_ELEMENT_STATE structures is
+ * UNDEFINED.
+ *
+ * - SW must ensure that at least one vertex element is defined prior
+ * to issuing a 3DPRIMTIVE command, or operation is UNDEFINED.
+ *
+ * - There are no "holes" allowed in the destination vertex: NOSTORE
+ * components must be overwritten by subsequent components unless
+ * they are the trailing DWords of the vertex. Software must
+ * explicitly chose some value (probably 0) to be written into
+ * DWords that would otherwise be "holes"."
+ *
+ * - ...
+ *
+ * - [DevILK+] Element[0] must be valid."
+ */
+ if (params->prepend_zeros || (!user_ve_count && !prepend_ids))
+ dw1[internal_ve_count++] = get_gen6_component_zeros(dev);
+
+ if (prepend_ids) {
+ if (ilo_dev_gen(dev) >= ILO_GEN(8)) {
+ /* placeholder for 3DSTATE_VF_SGVS */
+ dw1[internal_ve_count++] = get_gen6_component_zeros(dev);
+ } else {
+ dw1[internal_ve_count++] = get_gen6_component_ids(dev,
+ params->prepend_vertexid, params->prepend_instanceid);
+ }
+ }
+
+ for (i = 0; i < internal_ve_count; i++) {
+ STATIC_ASSERT(ARRAY_SIZE(vf->internal_ve[i]) >= 2);
+ vf->internal_ve[i][0] = GEN6_VE_DW0_VALID;
+ vf->internal_ve[i][1] = dw1[i];
+ }
+
+ vf->internal_ve_count = internal_ve_count;
+
+ return true;
+}
+
+static bool
+vf_params_set_gen8_3DSTATE_VF_SGVS(struct ilo_state_vf *vf,
+ const struct ilo_dev *dev,
+ const struct ilo_state_vf_params_info *params)
+{
+ const uint8_t attr = (params->prepend_zeros) ? 1 : 0;
+ uint32_t dw1;
+
+ ILO_DEV_ASSERT(dev, 8, 8);
+
+ dw1 = 0;
+
+ if (params->prepend_instanceid) {
+ dw1 |= GEN8_SGVS_DW1_IID_ENABLE |
+ 1 << GEN8_SGVS_DW1_IID_VE_COMP__SHIFT |
+ attr << GEN8_SGVS_DW1_IID_VE_INDEX__SHIFT;
+ }
+
+ if (params->prepend_vertexid) {
+ dw1 |= GEN8_SGVS_DW1_VID_ENABLE |
+ 0 << GEN8_SGVS_DW1_VID_VE_COMP__SHIFT |
+ attr << GEN8_SGVS_DW1_VID_VE_INDEX__SHIFT;
+ }
+
+ STATIC_ASSERT(ARRAY_SIZE(vf->sgvs) >= 1);
+ vf->sgvs[0] = dw1;
+
+ return true;
+}
+
+static uint32_t
+get_gen6_fixed_cut_index(const struct ilo_dev *dev,
+ enum gen_index_format format)
+{
+ const uint32_t fixed = ~0u;
+
+ ILO_DEV_ASSERT(dev, 6, 7);
+
+ switch (format) {
+ case GEN6_INDEX_BYTE: return (uint8_t) fixed;
+ case GEN6_INDEX_WORD: return (uint16_t) fixed;
+ case GEN6_INDEX_DWORD: return (uint32_t) fixed;
+ default:
+ assert(!"unknown index format");
+ return fixed;
+ }
+}
+
+static bool
+get_gen6_cut_index_supported(const struct ilo_dev *dev,
+ enum gen_3dprim_type topology)
+{
+ ILO_DEV_ASSERT(dev, 6, 8);
+
+ /*
+ * See the Sandy Bridge PRM, volume 2 part 1, page 80 and the Haswell PRM,
+ * volume 7, page 456.
+ */
+ switch (topology) {
+ case GEN6_3DPRIM_TRIFAN:
+ case GEN6_3DPRIM_QUADLIST:
+ case GEN6_3DPRIM_QUADSTRIP:
+ case GEN6_3DPRIM_POLYGON:
+ case GEN6_3DPRIM_LINELOOP:
+ return (ilo_dev_gen(dev) >= ILO_GEN(7.5));
+ case GEN6_3DPRIM_RECTLIST:
+ case GEN6_3DPRIM_TRIFAN_NOSTIPPLE:
+ return false;
+ default:
+ return true;
+ }
+}
+
+static bool
+vf_params_set_gen6_3dstate_index_buffer(struct ilo_state_vf *vf,
+ const struct ilo_dev *dev,
+ const struct ilo_state_vf_params_info *params)
+{
+ uint32_t dw0 = 0;
+
+ ILO_DEV_ASSERT(dev, 6, 7);
+
+ /* cut index only, as in 3DSTATE_VF */
+ if (params->cut_index_enable) {
+ assert(get_gen6_cut_index_supported(dev, params->cv_topology));
+ assert(get_gen6_fixed_cut_index(dev, params->cv_index_format) ==
+ params->cut_index);
+
+ dw0 |= GEN6_IB_DW0_CUT_INDEX_ENABLE;
+ }
+
+ STATIC_ASSERT(ARRAY_SIZE(vf->cut) >= 1);
+ vf->cut[0] = dw0;
+
+ return true;
+}
+
+static bool
+vf_params_set_gen75_3DSTATE_VF(struct ilo_state_vf *vf,
+ const struct ilo_dev *dev,
+ const struct ilo_state_vf_params_info *params)
+{
+ uint32_t dw0 = 0;
+
+ ILO_DEV_ASSERT(dev, 7.5, 8);
+
+ if (params->cut_index_enable) {
+ assert(get_gen6_cut_index_supported(dev, params->cv_topology));
+ dw0 |= GEN75_VF_DW0_CUT_INDEX_ENABLE;
+ }
+
+ STATIC_ASSERT(ARRAY_SIZE(vf->cut) >= 2);
+ vf->cut[0] = dw0;
+ vf->cut[1] = params->cut_index;
+
+ return true;
+}
+
+static bool
+vertex_buffer_validate_gen6(const struct ilo_dev *dev,
+ const struct ilo_state_vertex_buffer_info *info)
+{
+ ILO_DEV_ASSERT(dev, 6, 8);
+
+ if (info->buf)
+ assert(info->offset < info->buf->bo_size && info->size);
+
+ /*
+ * From the Sandy Bridge PRM, volume 2 part 1, page 86:
+ *
+ * "(Buffer Pitch)
+ * Range [DevCTG+]: [0,2048] Bytes"
+ */
+ assert(info->stride <= 2048);
+
+ /*
+ * From the Sandy Bridge PRM, volume 2 part 1, page 86:
+ *
+ * "64-bit floating point values must be 64-bit aligned in memory, or
+ * UNPREDICTABLE data will be fetched. When accessing an element
+ * containing 64-bit floating point values, the Buffer Starting
+ * Address and Source Element Offset values must add to a 64-bit
+ * aligned address, and BufferPitch must be a multiple of 64-bits."
+ */
+ if (info->cv_has_double) {
+ assert(info->stride % 8 == 0);
+ assert((info->offset + info->cv_double_vertex_offset_mod_8) % 8 == 0);
+ }
+
+ return true;
+}
+
+static uint32_t
+vertex_buffer_get_gen6_size(const struct ilo_dev *dev,
+ const struct ilo_state_vertex_buffer_info *info)
+{
+ ILO_DEV_ASSERT(dev, 6, 8);
+
+ if (!info->buf)
+ return 0;
+
+ return (info->offset + info->size <= info->buf->bo_size) ? info->size :
+ info->buf->bo_size - info->offset;
+}
+
+static bool
+vertex_buffer_set_gen8_vertex_buffer_state(struct ilo_state_vertex_buffer *vb,
+ const struct ilo_dev *dev,
+ const struct ilo_state_vertex_buffer_info *info)
+{
+ const uint32_t size = vertex_buffer_get_gen6_size(dev, info);
+ uint32_t dw0;
+
+ ILO_DEV_ASSERT(dev, 6, 8);
+
+ if (!vertex_buffer_validate_gen6(dev, info))
+ return false;
+
+ dw0 = info->stride << GEN6_VB_DW0_PITCH__SHIFT;
+
+ if (ilo_dev_gen(dev) >= ILO_GEN(7))
+ dw0 |= GEN7_VB_DW0_ADDR_MODIFIED;
+ if (!info->buf)
+ dw0 |= GEN6_VB_DW0_IS_NULL;
+
+ STATIC_ASSERT(ARRAY_SIZE(vb->vb) >= 3);
+ vb->vb[0] = dw0;
+ vb->vb[1] = info->offset;
+
+ if (ilo_dev_gen(dev) >= ILO_GEN(8)) {
+ vb->vb[2] = size;
+ } else {
+ /* address of the last valid byte */
+ vb->vb[2] = (size) ? info->offset + size - 1 : 0;
+ }
+
+ vb->need_bo = (info->buf != NULL);
+
+ return true;
+}
+
+static uint32_t
+get_index_format_size(enum gen_index_format format)
+{
+ switch (format) {
+ case GEN6_INDEX_BYTE: return 1;
+ case GEN6_INDEX_WORD: return 2;
+ case GEN6_INDEX_DWORD: return 4;
+ default:
+ assert(!"unknown index format");
+ return 1;
+ }
+}
+
+static bool
+index_buffer_validate_gen6(const struct ilo_dev *dev,
+ const struct ilo_state_index_buffer_info *info)
+{
+ const uint32_t format_size = get_index_format_size(info->format);
+
+ ILO_DEV_ASSERT(dev, 6, 8);
+
+ /*
+ * From the Sandy Bridge PRM, volume 2 part 1, page 79:
+ *
+ * "This field (Buffer Starting Address) contains the size-aligned (as
+ * specified by Index Format) Graphics Address of the first element of
+ * interest within the index buffer."
+ */
+ assert(info->offset % format_size == 0);
+
+ if (info->buf)
+ assert(info->offset < info->buf->bo_size && info->size);
+
+ return true;
+}
+
+static uint32_t
+index_buffer_get_gen6_size(const struct ilo_dev *dev,
+ const struct ilo_state_index_buffer_info *info)
+{
+ uint32_t size;
+
+ ILO_DEV_ASSERT(dev, 6, 8);
+
+ if (!info->buf)
+ return 0;
+
+ size = (info->offset + info->size <= info->buf->bo_size) ? info->size :
+ info->buf->bo_size - info->offset;
+
+ if (ilo_dev_gen(dev) < ILO_GEN(8)) {
+ const uint32_t format_size = get_index_format_size(info->format);
+ size -= (size % format_size);
+ }
+
+ return size;
+}
+
+static bool
+index_buffer_set_gen8_3DSTATE_INDEX_BUFFER(struct ilo_state_index_buffer *ib,
+ const struct ilo_dev *dev,
+ const struct ilo_state_index_buffer_info *info)
+{
+ const uint32_t size = index_buffer_get_gen6_size(dev, info);
+
+ ILO_DEV_ASSERT(dev, 6, 8);
+
+ if (!index_buffer_validate_gen6(dev, info))
+ return false;
+
+ STATIC_ASSERT(ARRAY_SIZE(ib->ib) >= 3);
+ if (ilo_dev_gen(dev) >= ILO_GEN(8)) {
+ ib->ib[0] = info->format << GEN8_IB_DW1_FORMAT__SHIFT;
+ ib->ib[1] = info->offset;
+ ib->ib[2] = size;
+ } else {
+ ib->ib[0] = info->format << GEN6_IB_DW0_FORMAT__SHIFT;
+ ib->ib[1] = info->offset;
+ /* address of the last valid byte, or 0 */
+ ib->ib[2] = (size) ? info->offset + size - 1 : 0;
+ }
+
+ ib->need_bo = (info->buf != NULL);
+
+ return true;
+}
+
+bool
+ilo_state_vf_valid_element_format(const struct ilo_dev *dev,
+ enum gen_surface_format format)
+{
+ /*
+ * This table is based on:
+ *
+ * - the Sandy Bridge PRM, volume 4 part 1, page 88-97
+ * - the Ivy Bridge PRM, volume 2 part 1, page 97-99
+ * - the Haswell PRM, volume 7, page 467-470
+ */
+ static const int vf_element_formats[] = {
+ [GEN6_FORMAT_R32G32B32A32_FLOAT] = ILO_GEN( 1),
+ [GEN6_FORMAT_R32G32B32A32_SINT] = ILO_GEN( 1),
+ [GEN6_FORMAT_R32G32B32A32_UINT] = ILO_GEN( 1),
+ [GEN6_FORMAT_R32G32B32A32_UNORM] = ILO_GEN( 1),
+ [GEN6_FORMAT_R32G32B32A32_SNORM] = ILO_GEN( 1),
+ [GEN6_FORMAT_R64G64_FLOAT] = ILO_GEN( 1),
+ [GEN6_FORMAT_R32G32B32A32_SSCALED] = ILO_GEN( 1),
+ [GEN6_FORMAT_R32G32B32A32_USCALED] = ILO_GEN( 1),
+ [GEN6_FORMAT_R32G32B32A32_SFIXED] = ILO_GEN(7.5),
+ [GEN6_FORMAT_R32G32B32_FLOAT] = ILO_GEN( 1),
+ [GEN6_FORMAT_R32G32B32_SINT] = ILO_GEN( 1),
+ [GEN6_FORMAT_R32G32B32_UINT] = ILO_GEN( 1),
+ [GEN6_FORMAT_R32G32B32_UNORM] = ILO_GEN( 1),
+ [GEN6_FORMAT_R32G32B32_SNORM] = ILO_GEN( 1),
+ [GEN6_FORMAT_R32G32B32_SSCALED] = ILO_GEN( 1),
+ [GEN6_FORMAT_R32G32B32_USCALED] = ILO_GEN( 1),
+ [GEN6_FORMAT_R32G32B32_SFIXED] = ILO_GEN(7.5),
+ [GEN6_FORMAT_R16G16B16A16_UNORM] = ILO_GEN( 1),
+ [GEN6_FORMAT_R16G16B16A16_SNORM] = ILO_GEN( 1),
+ [GEN6_FORMAT_R16G16B16A16_SINT] = ILO_GEN( 1),
+ [GEN6_FORMAT_R16G16B16A16_UINT] = ILO_GEN( 1),
+ [GEN6_FORMAT_R16G16B16A16_FLOAT] = ILO_GEN( 1),
+ [GEN6_FORMAT_R32G32_FLOAT] = ILO_GEN( 1),
+ [GEN6_FORMAT_R32G32_SINT] = ILO_GEN( 1),
+ [GEN6_FORMAT_R32G32_UINT] = ILO_GEN( 1),
+ [GEN6_FORMAT_R32G32_UNORM] = ILO_GEN( 1),
+ [GEN6_FORMAT_R32G32_SNORM] = ILO_GEN( 1),
+ [GEN6_FORMAT_R64_FLOAT] = ILO_GEN( 1),
+ [GEN6_FORMAT_R16G16B16A16_SSCALED] = ILO_GEN( 1),
+ [GEN6_FORMAT_R16G16B16A16_USCALED] = ILO_GEN( 1),
+ [GEN6_FORMAT_R32G32_SSCALED] = ILO_GEN( 1),
+ [GEN6_FORMAT_R32G32_USCALED] = ILO_GEN( 1),
+ [GEN6_FORMAT_R32G32_SFIXED] = ILO_GEN(7.5),
+ [GEN6_FORMAT_B8G8R8A8_UNORM] = ILO_GEN( 1),
+ [GEN6_FORMAT_R10G10B10A2_UNORM] = ILO_GEN( 1),
+ [GEN6_FORMAT_R10G10B10A2_UINT] = ILO_GEN( 1),
+ [GEN6_FORMAT_R10G10B10_SNORM_A2_UNORM] = ILO_GEN( 1),
+ [GEN6_FORMAT_R8G8B8A8_UNORM] = ILO_GEN( 1),
+ [GEN6_FORMAT_R8G8B8A8_SNORM] = ILO_GEN( 1),
+ [GEN6_FORMAT_R8G8B8A8_SINT] = ILO_GEN( 1),
+ [GEN6_FORMAT_R8G8B8A8_UINT] = ILO_GEN( 1),
+ [GEN6_FORMAT_R16G16_UNORM] = ILO_GEN( 1),
+ [GEN6_FORMAT_R16G16_SNORM] = ILO_GEN( 1),
+ [GEN6_FORMAT_R16G16_SINT] = ILO_GEN( 1),
+ [GEN6_FORMAT_R16G16_UINT] = ILO_GEN( 1),
+ [GEN6_FORMAT_R16G16_FLOAT] = ILO_GEN( 1),
+ [GEN6_FORMAT_B10G10R10A2_UNORM] = ILO_GEN(7.5),
+ [GEN6_FORMAT_R11G11B10_FLOAT] = ILO_GEN( 1),
+ [GEN6_FORMAT_R32_SINT] = ILO_GEN( 1),
+ [GEN6_FORMAT_R32_UINT] = ILO_GEN( 1),
+ [GEN6_FORMAT_R32_FLOAT] = ILO_GEN( 1),
+ [GEN6_FORMAT_R32_UNORM] = ILO_GEN( 1),
+ [GEN6_FORMAT_R32_SNORM] = ILO_GEN( 1),
+ [GEN6_FORMAT_R10G10B10X2_USCALED] = ILO_GEN( 1),
+ [GEN6_FORMAT_R8G8B8A8_SSCALED] = ILO_GEN( 1),
+ [GEN6_FORMAT_R8G8B8A8_USCALED] = ILO_GEN( 1),
+ [GEN6_FORMAT_R16G16_SSCALED] = ILO_GEN( 1),
+ [GEN6_FORMAT_R16G16_USCALED] = ILO_GEN( 1),
+ [GEN6_FORMAT_R32_SSCALED] = ILO_GEN( 1),
+ [GEN6_FORMAT_R32_USCALED] = ILO_GEN( 1),
+ [GEN6_FORMAT_R8G8_UNORM] = ILO_GEN( 1),
+ [GEN6_FORMAT_R8G8_SNORM] = ILO_GEN( 1),
+ [GEN6_FORMAT_R8G8_SINT] = ILO_GEN( 1),
+ [GEN6_FORMAT_R8G8_UINT] = ILO_GEN( 1),
+ [GEN6_FORMAT_R16_UNORM] = ILO_GEN( 1),
+ [GEN6_FORMAT_R16_SNORM] = ILO_GEN( 1),
+ [GEN6_FORMAT_R16_SINT] = ILO_GEN( 1),
+ [GEN6_FORMAT_R16_UINT] = ILO_GEN( 1),
+ [GEN6_FORMAT_R16_FLOAT] = ILO_GEN( 1),
+ [GEN6_FORMAT_R8G8_SSCALED] = ILO_GEN( 1),
+ [GEN6_FORMAT_R8G8_USCALED] = ILO_GEN( 1),
+ [GEN6_FORMAT_R16_SSCALED] = ILO_GEN( 1),
+ [GEN6_FORMAT_R16_USCALED] = ILO_GEN( 1),
+ [GEN6_FORMAT_R8_UNORM] = ILO_GEN( 1),
+ [GEN6_FORMAT_R8_SNORM] = ILO_GEN( 1),
+ [GEN6_FORMAT_R8_SINT] = ILO_GEN( 1),
+ [GEN6_FORMAT_R8_UINT] = ILO_GEN( 1),
+ [GEN6_FORMAT_R8_SSCALED] = ILO_GEN( 1),
+ [GEN6_FORMAT_R8_USCALED] = ILO_GEN( 1),
+ [GEN6_FORMAT_R8G8B8_UNORM] = ILO_GEN( 1),
+ [GEN6_FORMAT_R8G8B8_SNORM] = ILO_GEN( 1),
+ [GEN6_FORMAT_R8G8B8_SSCALED] = ILO_GEN( 1),
+ [GEN6_FORMAT_R8G8B8_USCALED] = ILO_GEN( 1),
+ [GEN6_FORMAT_R64G64B64A64_FLOAT] = ILO_GEN( 1),
+ [GEN6_FORMAT_R64G64B64_FLOAT] = ILO_GEN( 1),
+ [GEN6_FORMAT_R16G16B16_FLOAT] = ILO_GEN( 6),
+ [GEN6_FORMAT_R16G16B16_UNORM] = ILO_GEN( 1),
+ [GEN6_FORMAT_R16G16B16_SNORM] = ILO_GEN( 1),
+ [GEN6_FORMAT_R16G16B16_SSCALED] = ILO_GEN( 1),
+ [GEN6_FORMAT_R16G16B16_USCALED] = ILO_GEN( 1),
+ [GEN6_FORMAT_R16G16B16_UINT] = ILO_GEN(7.5),
+ [GEN6_FORMAT_R16G16B16_SINT] = ILO_GEN(7.5),
+ [GEN6_FORMAT_R32_SFIXED] = ILO_GEN(7.5),
+ [GEN6_FORMAT_R10G10B10A2_SNORM] = ILO_GEN(7.5),
+ [GEN6_FORMAT_R10G10B10A2_USCALED] = ILO_GEN(7.5),
+ [GEN6_FORMAT_R10G10B10A2_SSCALED] = ILO_GEN(7.5),
+ [GEN6_FORMAT_R10G10B10A2_SINT] = ILO_GEN(7.5),
+ [GEN6_FORMAT_B10G10R10A2_SNORM] = ILO_GEN(7.5),
+ [GEN6_FORMAT_B10G10R10A2_USCALED] = ILO_GEN(7.5),
+ [GEN6_FORMAT_B10G10R10A2_SSCALED] = ILO_GEN(7.5),
+ [GEN6_FORMAT_B10G10R10A2_UINT] = ILO_GEN(7.5),
+ [GEN6_FORMAT_B10G10R10A2_SINT] = ILO_GEN(7.5),
+ [GEN6_FORMAT_R8G8B8_UINT] = ILO_GEN(7.5),
+ [GEN6_FORMAT_R8G8B8_SINT] = ILO_GEN(7.5),
+ };
+
+ ILO_DEV_ASSERT(dev, 6, 8);
+
+ return (format < ARRAY_SIZE(vf_element_formats) &&
+ vf_element_formats[format] &&
+ ilo_dev_gen(dev) >= vf_element_formats[format]);
+}
+
+bool
+ilo_state_vf_init(struct ilo_state_vf *vf,
+ const struct ilo_dev *dev,
+ const struct ilo_state_vf_info *info)
+{
+ bool ret = true;
+
+ assert(ilo_is_zeroed(vf, sizeof(*vf)));
+ assert(ilo_is_zeroed(info->data, info->data_size));
+
+ assert(ilo_state_vf_data_size(dev, info->element_count) <=
+ info->data_size);
+ vf->user_ve = (uint32_t (*)[2]) info->data;
+ vf->user_instancing =
+ (uint32_t (*)[2]) (vf->user_ve + info->element_count);
+
+ ret &= vf_set_gen6_3DSTATE_VERTEX_ELEMENTS(vf, dev, info);
+
+ if (ilo_dev_gen(dev) >= ILO_GEN(8))
+ ret &= vf_set_gen8_3DSTATE_VF_INSTANCING(vf, dev, info);
+ else
+ ret &= vf_set_gen6_vertex_buffer_state(vf, dev, info);
+
+ ret &= ilo_state_vf_set_params(vf, dev, &info->params);
+
+ assert(ret);
+
+ return ret;
+}
+
+bool
+ilo_state_vf_init_for_rectlist(struct ilo_state_vf *vf,
+ const struct ilo_dev *dev,
+ void *data, size_t data_size,
+ const struct ilo_state_vf_element_info *elements,
+ uint8_t element_count)
+{
+ struct ilo_state_vf_info info;
+
+ memset(&info, 0, sizeof(info));
+
+ info.data = data;
+ info.data_size = data_size;
+
+ info.elements = elements;
+ info.element_count = element_count;
+
+ /*
+ * For VUE header,
+ *
+ * DW0: Reserved: MBZ
+ * DW1: Render Target Array Index
+ * DW2: Viewport Index
+ * DW3: Point Width
+ */
+ info.params.prepend_zeros = true;
+
+ return ilo_state_vf_init(vf, dev, &info);
+}
+
+bool
+ilo_state_vf_set_params(struct ilo_state_vf *vf,
+ const struct ilo_dev *dev,
+ const struct ilo_state_vf_params_info *params)
+{
+ bool ret = true;
+
+ ILO_DEV_ASSERT(dev, 6, 8);
+
+ ret &= vf_params_set_gen6_internal_ve(vf, dev, params, vf->user_ve_count);
+ if (ilo_dev_gen(dev) >= ILO_GEN(8))
+ ret &= vf_params_set_gen8_3DSTATE_VF_SGVS(vf, dev, params);
+
+ /*
+ * From the Sandy Bridge PRM, volume 2 part 1, page 94:
+ *
+ * "Edge flags are supported for the following primitive topology types
+ * only, otherwise EdgeFlagEnable must not be ENABLED.
+ *
+ * - 3DPRIM_TRILIST*
+ * - 3DPRIM_TRISTRIP*
+ * - 3DPRIM_TRIFAN*
+ * - 3DPRIM_POLYGON"
+ *
+ * "[DevSNB]: Edge Flags are not supported for QUADLIST primitives.
+ * Software may elect to convert QUADLIST primitives to some set of
+ * corresponding edge-flag-supported primitive types (e.g., POLYGONs)
+ * prior to submission to the 3D vf."
+ *
+ * From the Ivy Bridge PRM, volume 2 part 1, page 86:
+ *
+ * "Edge flags are supported for all primitive topology types."
+ *
+ * Both PRMs are confusing...
+ */
+ if (params->last_element_edge_flag) {
+ assert(vf->edge_flag_supported);
+ if (ilo_dev_gen(dev) == ILO_GEN(6))
+ assert(params->cv_topology != GEN6_3DPRIM_QUADLIST);
+ }
+
+ if (vf->edge_flag_supported) {
+ assert(vf->user_ve_count);
+ memcpy(vf->user_ve[vf->user_ve_count - 1],
+ vf->last_user_ve[params->last_element_edge_flag],
+ sizeof(vf->user_ve[vf->user_ve_count - 1]));
+ }
+
+ if (ilo_dev_gen(dev) >= ILO_GEN(7.5))
+ ret &= vf_params_set_gen75_3DSTATE_VF(vf, dev, params);
+ else
+ ret &= vf_params_set_gen6_3dstate_index_buffer(vf, dev, params);
+
+ assert(ret);
+
+ return ret;
+}
+
+void
+ilo_state_vf_full_delta(const struct ilo_state_vf *vf,
+ const struct ilo_dev *dev,
+ struct ilo_state_vf_delta *delta)
+{
+ delta->dirty = ILO_STATE_VF_3DSTATE_VERTEX_ELEMENTS;
+
+ if (ilo_dev_gen(dev) >= ILO_GEN(8)) {
+ delta->dirty |= ILO_STATE_VF_3DSTATE_VF_SGVS |
+ ILO_STATE_VF_3DSTATE_VF_INSTANCING;
+ } else {
+ delta->dirty |= ILO_STATE_VF_3DSTATE_VERTEX_BUFFERS;
+ }
+
+ if (ilo_dev_gen(dev) >= ILO_GEN(7.5))
+ delta->dirty |= ILO_STATE_VF_3DSTATE_VF;
+ else
+ delta->dirty |= ILO_STATE_VF_3DSTATE_INDEX_BUFFER;
+}
+
+void
+ilo_state_vf_get_delta(const struct ilo_state_vf *vf,
+ const struct ilo_dev *dev,
+ const struct ilo_state_vf *old,
+ struct ilo_state_vf_delta *delta)
+{
+ /* no shallow copying */
+ assert(vf->user_ve != old->user_ve &&
+ vf->user_instancing != old->user_instancing);
+
+ delta->dirty = 0;
+
+ if (vf->internal_ve_count != old->internal_ve_count ||
+ vf->user_ve_count != old->user_ve_count ||
+ memcmp(vf->internal_ve, old->internal_ve,
+ sizeof(vf->internal_ve[0]) * vf->internal_ve_count) ||
+ memcmp(vf->user_ve, old->user_ve,
+ sizeof(vf->user_ve[0]) * vf->user_ve_count))
+ delta->dirty |= ILO_STATE_VF_3DSTATE_VERTEX_ELEMENTS;
+
+ if (vf->user_ve_count != old->user_ve_count ||
+ memcmp(vf->user_instancing, old->user_instancing,
+ sizeof(vf->user_instancing[0]) * vf->user_ve_count)) {
+ if (ilo_dev_gen(dev) >= ILO_GEN(8))
+ delta->dirty |= ILO_STATE_VF_3DSTATE_VF_INSTANCING;
+ else
+ delta->dirty |= ILO_STATE_VF_3DSTATE_VERTEX_BUFFERS;
+ }
+
+ if (ilo_dev_gen(dev) >= ILO_GEN(8)) {
+ if (vf->sgvs[0] != old->sgvs[0])
+ delta->dirty |= ILO_STATE_VF_3DSTATE_VF_SGVS;
+ }
+
+ if (ilo_dev_gen(dev) >= ILO_GEN(7.5)) {
+ if (memcmp(vf->cut, old->cut, sizeof(vf->cut)))
+ delta->dirty |= ILO_STATE_VF_3DSTATE_VF;
+ } else {
+ if (vf->cut[0] != old->cut[0])
+ delta->dirty |= ILO_STATE_VF_3DSTATE_INDEX_BUFFER;
+ }
+}
+
+/**
+ * No need to initialize first.
+ */
+bool
+ilo_state_vertex_buffer_set_info(struct ilo_state_vertex_buffer *vb,
+ const struct ilo_dev *dev,
+ const struct ilo_state_vertex_buffer_info *info)
+{
+ bool ret = true;
+
+ ret &= vertex_buffer_set_gen8_vertex_buffer_state(vb, dev, info);
+
+ assert(ret);
+
+ return ret;
+}
+
+/**
+ * No need to initialize first.
+ */
+bool
+ilo_state_index_buffer_set_info(struct ilo_state_index_buffer *ib,
+ const struct ilo_dev *dev,
+ const struct ilo_state_index_buffer_info *info)
+{
+ bool ret = true;
+
+ ret &= index_buffer_set_gen8_3DSTATE_INDEX_BUFFER(ib, dev, info);
+
+ assert(ret);
+
+ return ret;
+}
diff --git a/src/gallium/drivers/ilo/core/ilo_state_vf.h b/src/gallium/drivers/ilo/core/ilo_state_vf.h
new file mode 100644
index 00000000000..f15c63a248a
--- /dev/null
+++ b/src/gallium/drivers/ilo/core/ilo_state_vf.h
@@ -0,0 +1,228 @@
+/*
+ * Mesa 3-D graphics library
+ *
+ * Copyright (C) 2015 LunarG, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ * Chia-I Wu <[email protected]>
+ */
+
+#ifndef ILO_STATE_VF_H
+#define ILO_STATE_VF_H
+
+#include "genhw/genhw.h"
+
+#include "ilo_core.h"
+#include "ilo_dev.h"
+
+/*
+ * From the Sandy Bridge PRM, volume 2 part 1, page 93:
+ *
+ * "Up to 34 (DevSNB+) vertex elements are supported."
+ *
+ * "Up to 33 VBs are supported"
+ *
+ * Reserve two VEs and one VB for internal use.
+ */
+#define ILO_STATE_VF_MAX_ELEMENT_COUNT (34 - 2)
+#define ILO_STATE_VF_MAX_BUFFER_COUNT (33 - 1)
+
+enum ilo_state_vf_dirty_bits {
+ ILO_STATE_VF_3DSTATE_VERTEX_ELEMENTS = (1 << 0),
+ ILO_STATE_VF_3DSTATE_VF_SGVS = (1 << 1),
+ ILO_STATE_VF_3DSTATE_VF_INSTANCING = (1 << 2),
+ ILO_STATE_VF_3DSTATE_VERTEX_BUFFERS = (1 << 3),
+ ILO_STATE_VF_3DSTATE_VF = (1 << 4),
+ ILO_STATE_VF_3DSTATE_INDEX_BUFFER = (1 << 5),
+};
+
+/**
+ * Fetch a 128-bit vertex attribute.
+ */
+struct ilo_state_vf_element_info {
+ uint8_t buffer;
+ uint16_t vertex_offset;
+ enum gen_surface_format format;
+
+ uint8_t format_size;
+ uint8_t component_count;
+ bool is_integer;
+
+ /* must be the same for those share the same buffer before Gen8 */
+ bool instancing_enable;
+ uint32_t instancing_step_rate;
+};
+
+/**
+ * VF parameters.
+ */
+struct ilo_state_vf_params_info {
+ enum gen_3dprim_type cv_topology;
+
+ /* prepend an attribute of zeros */
+ bool prepend_zeros;
+
+ /* prepend an attribute of VertexID and/or InstanceID */
+ bool prepend_vertexid;
+ bool prepend_instanceid;
+
+ bool last_element_edge_flag;
+
+ enum gen_index_format cv_index_format;
+ bool cut_index_enable;
+ uint32_t cut_index;
+};
+
+/**
+ * Vertex fetch.
+ */
+struct ilo_state_vf_info {
+ void *data;
+ size_t data_size;
+
+ const struct ilo_state_vf_element_info *elements;
+ uint8_t element_count;
+
+ struct ilo_state_vf_params_info params;
+};
+
+struct ilo_state_vf {
+ uint32_t (*user_ve)[2];
+ uint32_t (*user_instancing)[2];
+ int8_t vb_to_first_elem[ILO_STATE_VF_MAX_BUFFER_COUNT];
+ uint8_t user_ve_count;
+
+ bool edge_flag_supported;
+ uint32_t last_user_ve[2][2];
+
+ /* two VEs are reserved for internal use */
+ uint32_t internal_ve[2][2];
+ uint8_t internal_ve_count;
+
+ uint32_t sgvs[1];
+
+ uint32_t cut[2];
+};
+
+struct ilo_state_vf_delta {
+ uint32_t dirty;
+};
+
+struct ilo_buffer;
+
+struct ilo_state_vertex_buffer_info {
+ const struct ilo_buffer *buf;
+ uint32_t offset;
+ uint32_t size;
+
+ uint16_t stride;
+
+ /* doubles must be at 64-bit aligned addresses */
+ bool cv_has_double;
+ uint8_t cv_double_vertex_offset_mod_8;
+};
+
+struct ilo_state_vertex_buffer {
+ uint32_t vb[3];
+
+ bool need_bo;
+
+ /* managed by users */
+ struct intel_bo *bo;
+};
+
+struct ilo_state_index_buffer_info {
+ const struct ilo_buffer *buf;
+ uint32_t offset;
+ uint32_t size;
+
+ enum gen_index_format format;
+};
+
+struct ilo_state_index_buffer {
+ uint32_t ib[3];
+
+ bool need_bo;
+
+ /* managed by users */
+ struct intel_bo *bo;
+};
+
+static inline size_t
+ilo_state_vf_data_size(const struct ilo_dev *dev, uint8_t element_count)
+{
+ const struct ilo_state_vf *vf = NULL;
+ return (sizeof(vf->user_ve[0]) +
+ sizeof(vf->user_instancing[0])) * element_count;
+}
+
+bool
+ilo_state_vf_valid_element_format(const struct ilo_dev *dev,
+ enum gen_surface_format format);
+
+bool
+ilo_state_vf_init(struct ilo_state_vf *vf,
+ const struct ilo_dev *dev,
+ const struct ilo_state_vf_info *info);
+
+bool
+ilo_state_vf_init_for_rectlist(struct ilo_state_vf *vf,
+ const struct ilo_dev *dev,
+ void *data, size_t data_size,
+ const struct ilo_state_vf_element_info *elements,
+ uint8_t element_count);
+
+bool
+ilo_state_vf_set_params(struct ilo_state_vf *vf,
+ const struct ilo_dev *dev,
+ const struct ilo_state_vf_params_info *params);
+
+/**
+ * Return the number of attributes in the VUE.
+ */
+static inline uint8_t
+ilo_state_vf_get_attr_count(const struct ilo_state_vf *vf)
+{
+ return vf->internal_ve_count + vf->user_ve_count;
+}
+
+void
+ilo_state_vf_full_delta(const struct ilo_state_vf *vf,
+ const struct ilo_dev *dev,
+ struct ilo_state_vf_delta *delta);
+
+void
+ilo_state_vf_get_delta(const struct ilo_state_vf *vf,
+ const struct ilo_dev *dev,
+ const struct ilo_state_vf *old,
+ struct ilo_state_vf_delta *delta);
+
+bool
+ilo_state_vertex_buffer_set_info(struct ilo_state_vertex_buffer *vb,
+ const struct ilo_dev *dev,
+ const struct ilo_state_vertex_buffer_info *info);
+
+bool
+ilo_state_index_buffer_set_info(struct ilo_state_index_buffer *ib,
+ const struct ilo_dev *dev,
+ const struct ilo_state_index_buffer_info *info);
+
+#endif /* ILO_STATE_VF_H */
diff --git a/src/gallium/drivers/ilo/core/ilo_state_viewport.c b/src/gallium/drivers/ilo/core/ilo_state_viewport.c
new file mode 100644
index 00000000000..aae57334541
--- /dev/null
+++ b/src/gallium/drivers/ilo/core/ilo_state_viewport.c
@@ -0,0 +1,378 @@
+/*
+ * Mesa 3-D graphics library
+ *
+ * Copyright (C) 2012-2015 LunarG, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ * Chia-I Wu <[email protected]>
+ */
+
+#include "ilo_debug.h"
+#include "ilo_state_viewport.h"
+
+static void
+viewport_matrix_get_gen6_guardband(const struct ilo_dev *dev,
+ const struct ilo_state_viewport_matrix_info *mat,
+ float *min_gbx, float *max_gbx,
+ float *min_gby, float *max_gby)
+{
+ /*
+ * From the Sandy Bridge PRM, volume 2 part 1, page 234:
+ *
+ * "Per-Device Guardband Extents
+ *
+ * - Supported X,Y ScreenSpace "Guardband" Extent: [-16K,16K-1]
+ * - Maximum Post-Clamp Delta (X or Y): 16K"
+ *
+ * "In addition, in order to be correctly rendered, objects must have a
+ * screenspace bounding box not exceeding 8K in the X or Y direction.
+ * This additional restriction must also be comprehended by software,
+ * i.e., enforced by use of clipping."
+ *
+ * From the Ivy Bridge PRM, volume 2 part 1, page 248:
+ *
+ * "Per-Device Guardband Extents
+ *
+ * - Supported X,Y ScreenSpace "Guardband" Extent: [-32K,32K-1]
+ * - Maximum Post-Clamp Delta (X or Y): N/A"
+ *
+ * "In addition, in order to be correctly rendered, objects must have a
+ * screenspace bounding box not exceeding 8K in the X or Y direction.
+ * This additional restriction must also be comprehended by software,
+ * i.e., enforced by use of clipping."
+ *
+ * Combined, the bounding box of any object can not exceed 8K in both
+ * width and height.
+ *
+ * Below we set the guardband as a squre of length 8K, centered at where
+ * the viewport is. This makes sure all objects passing the GB test are
+ * valid to the renderer, and those failing the XY clipping have a
+ * better chance of passing the GB test.
+ */
+ const int max_extent = (ilo_dev_gen(dev) >= ILO_GEN(7)) ? 32768 : 16384;
+ const int half_len = 8192 / 2;
+ int center_x = (int) mat->translate[0];
+ int center_y = (int) mat->translate[1];
+ float scale_x, scale_y;
+
+ ILO_DEV_ASSERT(dev, 6, 8);
+
+ /* make sure the guardband is within the valid range */
+ if (center_x - half_len < -max_extent)
+ center_x = -max_extent + half_len;
+ else if (center_x + half_len > max_extent - 1)
+ center_x = max_extent - half_len;
+
+ if (center_y - half_len < -max_extent)
+ center_y = -max_extent + half_len;
+ else if (center_y + half_len > max_extent - 1)
+ center_y = max_extent - half_len;
+
+ scale_x = fabsf(mat->scale[0]);
+ scale_y = fabsf(mat->scale[1]);
+ /*
+ * From the Haswell PRM, volume 2d, page 292-293:
+ *
+ * "Note: Minimum allowed value for this field (X/Y Min Clip Guardband)
+ * is -16384."
+ *
+ * "Note: Maximum allowed value for this field (X/Y Max Clip Guardband)
+ * is 16383."
+ *
+ * Avoid small scales.
+ */
+ if (scale_x < 1.0f)
+ scale_x = 1.0f;
+ if (scale_y < 1.0f)
+ scale_y = 1.0f;
+
+ /* in NDC space */
+ *min_gbx = ((float) (center_x - half_len) - mat->translate[0]) / scale_x;
+ *max_gbx = ((float) (center_x + half_len) - mat->translate[0]) / scale_x;
+ *min_gby = ((float) (center_y - half_len) - mat->translate[1]) / scale_y;
+ *max_gby = ((float) (center_y + half_len) - mat->translate[1]) / scale_y;
+}
+
+static void
+viewport_matrix_get_extent(const struct ilo_state_viewport_matrix_info *mat,
+ int axis, float *min, float *max)
+{
+ const float scale_abs = fabsf(mat->scale[axis]);
+
+ *min = -1.0f * scale_abs + mat->translate[axis];
+ *max = 1.0f * scale_abs + mat->translate[axis];
+}
+
+static bool
+viewport_matrix_set_gen7_SF_CLIP_VIEWPORT(struct ilo_state_viewport *vp,
+ const struct ilo_dev *dev,
+ const struct ilo_state_viewport_matrix_info *matrices,
+ uint8_t count)
+{
+ uint8_t i;
+
+ ILO_DEV_ASSERT(dev, 6, 8);
+
+ for (i = 0; i < count; i++) {
+ const struct ilo_state_viewport_matrix_info *mat = &matrices[i];
+ float min_gbx, max_gbx, min_gby, max_gby;
+ uint32_t dw[16];
+
+ viewport_matrix_get_gen6_guardband(dev, mat,
+ &min_gbx, &max_gbx, &min_gby, &max_gby);
+
+ dw[0] = fui(mat->scale[0]);
+ dw[1] = fui(mat->scale[1]);
+ dw[2] = fui(mat->scale[2]);
+ dw[3] = fui(mat->translate[0]);
+ dw[4] = fui(mat->translate[1]);
+ dw[5] = fui(mat->translate[2]);
+ dw[6] = 0;
+ dw[7] = 0;
+
+ dw[8] = fui(min_gbx);
+ dw[9] = fui(max_gbx);
+ dw[10] = fui(min_gby);
+ dw[11] = fui(max_gby);
+
+ if (ilo_dev_gen(dev) >= ILO_GEN(8)) {
+ float min_x, max_x, min_y, max_y;
+
+ viewport_matrix_get_extent(mat, 0, &min_x, &max_x);
+ viewport_matrix_get_extent(mat, 1, &min_y, &max_y);
+
+ dw[12] = fui(min_x);
+ dw[13] = fui(max_x - 1.0f);
+ dw[14] = fui(min_y);
+ dw[15] = fui(max_y - 1.0f);
+ } else {
+ dw[12] = 0;
+ dw[13] = 0;
+ dw[14] = 0;
+ dw[15] = 0;
+ }
+
+ STATIC_ASSERT(ARRAY_SIZE(vp->sf_clip[i]) >= 16);
+ memcpy(vp->sf_clip[i], dw, sizeof(dw));
+ }
+
+ return true;
+}
+
+static bool
+viewport_matrix_set_gen6_CC_VIEWPORT(struct ilo_state_viewport *vp,
+ const struct ilo_dev *dev,
+ const struct ilo_state_viewport_matrix_info *matrices,
+ uint8_t count)
+{
+ uint8_t i;
+
+ ILO_DEV_ASSERT(dev, 6, 8);
+
+ for (i = 0; i < count; i++) {
+ const struct ilo_state_viewport_matrix_info *mat = &matrices[i];
+ float min_z, max_z;
+
+ viewport_matrix_get_extent(mat, 2, &min_z, &max_z);
+
+ STATIC_ASSERT(ARRAY_SIZE(vp->cc[i]) >= 2);
+ vp->cc[i][0] = fui(min_z);
+ vp->cc[i][1] = fui(max_z);
+ }
+
+ return true;
+}
+
+static bool
+viewport_scissor_set_gen6_SCISSOR_RECT(struct ilo_state_viewport *vp,
+ const struct ilo_dev *dev,
+ const struct ilo_state_viewport_scissor_info *scissors,
+ uint8_t count)
+{
+ const uint16_t max_size = (ilo_dev_gen(dev) >= ILO_GEN(7)) ? 16384 : 8192;
+ uint8_t i;
+
+ ILO_DEV_ASSERT(dev, 6, 8);
+
+ for (i = 0; i < count; i++) {
+ const struct ilo_state_viewport_scissor_info *scissor = &scissors[i];
+ uint16_t min_x, min_y, max_x, max_y;
+ uint32_t dw0, dw1;
+
+ min_x = (scissor->min_x < max_size) ? scissor->min_x : max_size - 1;
+ min_y = (scissor->min_y < max_size) ? scissor->min_y : max_size - 1;
+ max_x = (scissor->max_x < max_size) ? scissor->max_x : max_size - 1;
+ max_y = (scissor->max_y < max_size) ? scissor->max_y : max_size - 1;
+
+ dw0 = min_y << GEN6_SCISSOR_DW0_MIN_Y__SHIFT |
+ min_x << GEN6_SCISSOR_DW0_MIN_X__SHIFT;
+ dw1 = max_y << GEN6_SCISSOR_DW1_MAX_Y__SHIFT |
+ max_x << GEN6_SCISSOR_DW1_MAX_X__SHIFT;
+
+ STATIC_ASSERT(ARRAY_SIZE(vp->scissor[i]) >= 2);
+ vp->scissor[i][0] = dw0;
+ vp->scissor[i][1] = dw1;
+ }
+
+ return true;
+}
+
+bool
+ilo_state_viewport_init(struct ilo_state_viewport *vp,
+ const struct ilo_dev *dev,
+ const struct ilo_state_viewport_info *info)
+{
+ const size_t elem_size = ilo_state_viewport_data_size(dev, 1);
+
+ assert(ilo_is_zeroed(vp, sizeof(*vp)));
+ assert(ilo_is_zeroed(info->data, info->data_size));
+
+ vp->data = info->data;
+
+ if (info->data_size / elem_size < ILO_STATE_VIEWPORT_MAX_COUNT)
+ vp->array_size = info->data_size / elem_size;
+ else
+ vp->array_size = ILO_STATE_VIEWPORT_MAX_COUNT;
+
+ return ilo_state_viewport_set_params(vp, dev, &info->params, false);
+}
+
+bool
+ilo_state_viewport_init_data_only(struct ilo_state_viewport *vp,
+ const struct ilo_dev *dev,
+ void *data, size_t data_size)
+{
+ struct ilo_state_viewport_info info;
+
+ memset(&info, 0, sizeof(info));
+ info.data = data;
+ info.data_size = data_size;
+
+ return ilo_state_viewport_init(vp, dev, &info);
+}
+
+bool
+ilo_state_viewport_init_for_rectlist(struct ilo_state_viewport *vp,
+ const struct ilo_dev *dev,
+ void *data, size_t data_size)
+{
+ struct ilo_state_viewport_info info;
+ struct ilo_state_viewport_matrix_info mat;
+ struct ilo_state_viewport_scissor_info sci;
+
+ memset(&info, 0, sizeof(info));
+ memset(&mat, 0, sizeof(mat));
+ memset(&sci, 0, sizeof(sci));
+
+ info.data = data;
+ info.data_size = data_size;
+ info.params.matrices = &mat;
+ info.params.scissors = &sci;
+ info.params.count = 1;
+
+ mat.scale[0] = 1.0f;
+ mat.scale[1] = 1.0f;
+ mat.scale[2] = 1.0f;
+
+ return ilo_state_viewport_init(vp, dev, &info);
+}
+
+static void
+viewport_set_count(struct ilo_state_viewport *vp,
+ const struct ilo_dev *dev,
+ uint8_t count)
+{
+ assert(count <= vp->array_size);
+
+ vp->count = count;
+ vp->sf_clip = (uint32_t (*)[16]) vp->data;
+ vp->cc = (uint32_t (*)[ 2]) (vp->sf_clip + count);
+ vp->scissor = (uint32_t (*)[ 2]) (vp->cc + count);
+}
+
+bool
+ilo_state_viewport_set_params(struct ilo_state_viewport *vp,
+ const struct ilo_dev *dev,
+ const struct ilo_state_viewport_params_info *params,
+ bool scissors_only)
+{
+ bool ret = true;
+
+ if (scissors_only) {
+ assert(vp->count == params->count);
+
+ ret &= viewport_scissor_set_gen6_SCISSOR_RECT(vp, dev,
+ params->scissors, params->count);
+ } else {
+ viewport_set_count(vp, dev, params->count);
+
+ ret &= viewport_matrix_set_gen7_SF_CLIP_VIEWPORT(vp, dev,
+ params->matrices, params->count);
+ ret &= viewport_matrix_set_gen6_CC_VIEWPORT(vp, dev,
+ params->matrices, params->count);
+ ret &= viewport_scissor_set_gen6_SCISSOR_RECT(vp, dev,
+ params->scissors, params->count);
+ }
+
+ assert(ret);
+
+ return ret;
+}
+
+void
+ilo_state_viewport_full_delta(const struct ilo_state_viewport *vp,
+ const struct ilo_dev *dev,
+ struct ilo_state_viewport_delta *delta)
+{
+ delta->dirty = ILO_STATE_VIEWPORT_SF_CLIP_VIEWPORT |
+ ILO_STATE_VIEWPORT_CC_VIEWPORT |
+ ILO_STATE_VIEWPORT_SCISSOR_RECT;
+}
+
+void
+ilo_state_viewport_get_delta(const struct ilo_state_viewport *vp,
+ const struct ilo_dev *dev,
+ const struct ilo_state_viewport *old,
+ struct ilo_state_viewport_delta *delta)
+{
+ const size_t sf_clip_size = sizeof(vp->sf_clip[0]) * vp->count;
+ const size_t cc_size = sizeof(vp->cc[0]) * vp->count;
+ const size_t scissor_size = sizeof(vp->scissor[0]) * vp->count;
+
+ /* no shallow copying */
+ assert(vp->data != old->data);
+
+ if (vp->count != old->count) {
+ ilo_state_viewport_full_delta(vp, dev, delta);
+ return;
+ }
+
+ delta->dirty = 0;
+
+ if (memcmp(vp->sf_clip, old->sf_clip, sf_clip_size))
+ delta->dirty |= ILO_STATE_VIEWPORT_SF_CLIP_VIEWPORT;
+
+ if (memcmp(vp->cc, old->cc, cc_size))
+ delta->dirty |= ILO_STATE_VIEWPORT_CC_VIEWPORT;
+
+ if (memcmp(vp->scissor, old->scissor, scissor_size))
+ delta->dirty |= ILO_STATE_VIEWPORT_SCISSOR_RECT;
+}
diff --git a/src/gallium/drivers/ilo/core/ilo_state_viewport.h b/src/gallium/drivers/ilo/core/ilo_state_viewport.h
new file mode 100644
index 00000000000..b42ad6571da
--- /dev/null
+++ b/src/gallium/drivers/ilo/core/ilo_state_viewport.h
@@ -0,0 +1,132 @@
+/*
+ * Mesa 3-D graphics library
+ *
+ * Copyright (C) 2015 LunarG, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ * Chia-I Wu <[email protected]>
+ */
+
+#ifndef ILO_STATE_VIEWPORT_H
+#define ILO_STATE_VIEWPORT_H
+
+#include "genhw/genhw.h"
+
+#include "ilo_core.h"
+#include "ilo_dev.h"
+
+/*
+ * From the Sandy Bridge PRM, volume 2 part 1, page 38:
+ *
+ * "... 16 sets of viewport (VP) state parameters in the Clip unit's
+ * VertexClipTest function and in the SF unit's ViewportMapping and
+ * Scissor functions."
+ */
+#define ILO_STATE_VIEWPORT_MAX_COUNT 16
+
+enum ilo_state_viewport_dirty_bits {
+ ILO_STATE_VIEWPORT_SF_CLIP_VIEWPORT = (1 << 0),
+ ILO_STATE_VIEWPORT_CC_VIEWPORT = (1 << 1),
+ ILO_STATE_VIEWPORT_SCISSOR_RECT = (1 << 2),
+};
+
+struct ilo_state_viewport_matrix_info {
+ float scale[3];
+ float translate[3];
+};
+
+struct ilo_state_viewport_scissor_info {
+ /* all inclusive */
+ uint16_t min_x;
+ uint16_t min_y;
+ uint16_t max_x;
+ uint16_t max_y;
+};
+
+struct ilo_state_viewport_params_info {
+ const struct ilo_state_viewport_matrix_info *matrices;
+ const struct ilo_state_viewport_scissor_info *scissors;
+ uint8_t count;
+};
+
+struct ilo_state_viewport_info {
+ void *data;
+ size_t data_size;
+
+ struct ilo_state_viewport_params_info params;
+};
+
+struct ilo_state_viewport {
+ void *data;
+ uint8_t array_size;
+
+ uint8_t count;
+ uint32_t (*sf_clip)[16];
+ uint32_t (*cc)[2];
+ uint32_t (*scissor)[2];
+};
+
+struct ilo_state_viewport_delta {
+ uint32_t dirty;
+};
+
+static inline size_t
+ilo_state_viewport_data_size(const struct ilo_dev *dev, uint8_t array_size)
+{
+ const struct ilo_state_viewport *vp = NULL;
+ return (sizeof(vp->sf_clip[0]) +
+ sizeof(vp->cc[0]) +
+ sizeof(vp->scissor[0])) * array_size;
+}
+
+bool
+ilo_state_viewport_init(struct ilo_state_viewport *vp,
+ const struct ilo_dev *dev,
+ const struct ilo_state_viewport_info *info);
+
+bool
+ilo_state_viewport_init_data_only(struct ilo_state_viewport *vp,
+ const struct ilo_dev *dev,
+ void *data, size_t data_size);
+
+bool
+ilo_state_viewport_init_for_rectlist(struct ilo_state_viewport *vp,
+ const struct ilo_dev *dev,
+ void *data, size_t data_size);
+
+bool
+ilo_state_viewport_set_params(struct ilo_state_viewport *vp,
+ const struct ilo_dev *dev,
+ const struct ilo_state_viewport_params_info *params,
+ bool scissors_only);
+
+void
+ilo_state_viewport_full_delta(const struct ilo_state_viewport *vp,
+ const struct ilo_dev *dev,
+ struct ilo_state_viewport_delta *delta);
+
+void
+ilo_state_viewport_get_delta(const struct ilo_state_viewport *vp,
+ const struct ilo_dev *dev,
+ const struct ilo_state_viewport *old,
+ struct ilo_state_viewport_delta *delta);
+
+#endif /* ILO_STATE_VIEWPORT_H */
diff --git a/src/gallium/drivers/ilo/core/ilo_state_zs.c b/src/gallium/drivers/ilo/core/ilo_state_zs.c
new file mode 100644
index 00000000000..901fedb5599
--- /dev/null
+++ b/src/gallium/drivers/ilo/core/ilo_state_zs.c
@@ -0,0 +1,727 @@
+/*
+ * Mesa 3-D graphics library
+ *
+ * Copyright (C) 2012-2015 LunarG, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ * Chia-I Wu <[email protected]>
+ */
+
+#include "intel_winsys.h"
+
+#include "ilo_debug.h"
+#include "ilo_image.h"
+#include "ilo_state_zs.h"
+
+static bool
+zs_set_gen6_null_3DSTATE_DEPTH_BUFFER(struct ilo_state_zs *zs,
+ const struct ilo_dev *dev)
+{
+ const enum gen_depth_format format = GEN6_ZFORMAT_D32_FLOAT;
+ uint32_t dw1;
+
+ ILO_DEV_ASSERT(dev, 6, 8);
+
+ if (ilo_dev_gen(dev) >= ILO_GEN(7)) {
+ dw1 = GEN6_SURFTYPE_NULL << GEN7_DEPTH_DW1_TYPE__SHIFT |
+ format << GEN7_DEPTH_DW1_FORMAT__SHIFT;
+ } else {
+ dw1 = GEN6_SURFTYPE_NULL << GEN6_DEPTH_DW1_TYPE__SHIFT |
+ GEN6_TILING_Y << GEN6_DEPTH_DW1_TILING__SHIFT |
+ format << GEN6_DEPTH_DW1_FORMAT__SHIFT;
+ }
+
+ STATIC_ASSERT(ARRAY_SIZE(zs->depth) >= 5);
+ zs->depth[0] = dw1;
+ zs->depth[1] = 0;
+ zs->depth[2] = 0;
+ zs->depth[3] = 0;
+ zs->depth[4] = 0;
+
+ zs->depth_format = format;
+
+ return true;
+}
+
+static enum gen_surface_type
+get_gen6_surface_type(const struct ilo_dev *dev, const struct ilo_image *img)
+{
+ ILO_DEV_ASSERT(dev, 6, 8);
+
+ switch (img->target) {
+ case PIPE_TEXTURE_1D:
+ case PIPE_TEXTURE_1D_ARRAY:
+ return GEN6_SURFTYPE_1D;
+ case PIPE_TEXTURE_2D:
+ case PIPE_TEXTURE_CUBE:
+ case PIPE_TEXTURE_RECT:
+ case PIPE_TEXTURE_2D_ARRAY:
+ case PIPE_TEXTURE_CUBE_ARRAY:
+ return GEN6_SURFTYPE_2D;
+ case PIPE_TEXTURE_3D:
+ return GEN6_SURFTYPE_3D;
+ default:
+ assert(!"unknown texture target");
+ return GEN6_SURFTYPE_NULL;
+ }
+}
+
+static enum gen_depth_format
+get_gen6_depth_format(const struct ilo_dev *dev, const struct ilo_image *img)
+{
+ ILO_DEV_ASSERT(dev, 6, 8);
+
+ if (ilo_dev_gen(dev) >= ILO_GEN(7)) {
+ switch (img->format) {
+ case PIPE_FORMAT_Z32_FLOAT:
+ return GEN6_ZFORMAT_D32_FLOAT;
+ case PIPE_FORMAT_Z24X8_UNORM:
+ return GEN6_ZFORMAT_D24_UNORM_X8_UINT;
+ case PIPE_FORMAT_Z16_UNORM:
+ return GEN6_ZFORMAT_D16_UNORM;
+ default:
+ assert(!"unknown depth format");
+ return GEN6_ZFORMAT_D32_FLOAT;
+ }
+ } else {
+ switch (img->format) {
+ case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
+ return GEN6_ZFORMAT_D32_FLOAT_S8X24_UINT;
+ case PIPE_FORMAT_Z32_FLOAT:
+ return GEN6_ZFORMAT_D32_FLOAT;
+ case PIPE_FORMAT_Z24_UNORM_S8_UINT:
+ return GEN6_ZFORMAT_D24_UNORM_S8_UINT;
+ case PIPE_FORMAT_Z24X8_UNORM:
+ return GEN6_ZFORMAT_D24_UNORM_X8_UINT;
+ case PIPE_FORMAT_Z16_UNORM:
+ return GEN6_ZFORMAT_D16_UNORM;
+ default:
+ assert(!"unknown depth format");
+ return GEN6_ZFORMAT_D32_FLOAT;
+ }
+ }
+}
+
+static bool
+zs_validate_gen6(const struct ilo_dev *dev,
+ const struct ilo_state_zs_info *info)
+{
+ const struct ilo_image *img = (info->z_img) ? info->z_img : info->s_img;
+
+ ILO_DEV_ASSERT(dev, 6, 8);
+
+ /*
+ * From the Ivy Bridge PRM, volume 2 part 1, page 315:
+ *
+ * The stencil buffer has a format of S8_UINT, and shares Surface
+ * Type, Height, Width, and Depth, Minimum Array Element, Render
+ * Target View Extent, Depth Coordinate Offset X/Y, LOD, and Depth
+ * Buffer Object Control State fields of the depth buffer.
+ */
+ if (info->z_img == info->s_img) {
+ assert(info->z_img->target == info->s_img->target &&
+ info->z_img->width0 == info->s_img->width0 &&
+ info->z_img->height0 == info->s_img->height0 &&
+ info->z_img->depth0 == info->s_img->depth0);
+ }
+
+ assert(info->level < img->level_count);
+ assert(img->bo_stride);
+
+ if (info->hiz_enable) {
+ assert(info->z_img &&
+ ilo_image_can_enable_aux(info->z_img, info->level));
+ }
+
+ if (info->is_cube_map) {
+ assert(get_gen6_surface_type(dev, img) == GEN6_SURFTYPE_2D);
+
+ /*
+ * From the Sandy Bridge PRM, volume 2 part 1, page 323:
+ *
+ * "For cube maps, Width must be set equal to Height."
+ */
+ assert(img->width0 == img->height0);
+ }
+
+ if (info->z_img)
+ assert(info->z_img->tiling == GEN6_TILING_Y);
+ if (info->s_img)
+ assert(info->s_img->tiling == GEN8_TILING_W);
+
+ return true;
+}
+
+static void
+get_gen6_max_extent(const struct ilo_dev *dev,
+ const struct ilo_image *img,
+ uint16_t *max_w, uint16_t *max_h)
+{
+ const uint16_t max_size = (ilo_dev_gen(dev) >= ILO_GEN(7)) ? 16384 : 8192;
+
+ ILO_DEV_ASSERT(dev, 6, 8);
+
+ switch (get_gen6_surface_type(dev, img)) {
+ case GEN6_SURFTYPE_1D:
+ *max_w = max_size;
+ *max_h = 1;
+ break;
+ case GEN6_SURFTYPE_2D:
+ *max_w = max_size;
+ *max_h = max_size;
+ break;
+ case GEN6_SURFTYPE_3D:
+ *max_w = 2048;
+ *max_h = 2048;
+ break;
+ default:
+ assert(!"invalid surface type");
+ *max_w = 1;
+ *max_h = 1;
+ break;
+ }
+}
+
+static void
+get_gen6_hiz_alignments(const struct ilo_dev *dev,
+ const struct ilo_image *img,
+ uint16_t *align_w, uint16_t *align_h)
+{
+ ILO_DEV_ASSERT(dev, 6, 8);
+
+ /*
+ * From the Sandy Bridge PRM, volume 2 part 1, page 313:
+ *
+ * "A rectangle primitive representing the clear area is delivered. The
+ * primitive must adhere to the following restrictions on size:
+ *
+ * - If Number of Multisamples is NUMSAMPLES_1, the rectangle must be
+ * aligned to an 8x4 pixel block relative to the upper left corner
+ * of the depth buffer, and contain an integer number of these pixel
+ * blocks, and all 8x4 pixels must be lit.
+ * - If Number of Multisamples is NUMSAMPLES_4, the rectangle must be
+ * aligned to a 4x2 pixel block (8x4 sample block) relative to the
+ * upper left corner of the depth buffer, and contain an integer
+ * number of these pixel blocks, and all samples of the 4x2 pixels
+ * must be lit
+ * - If Number of Multisamples is NUMSAMPLES_8, the rectangle must be
+ * aligned to a 2x2 pixel block (8x4 sample block) relative to the
+ * upper left corner of the depth buffer, and contain an integer
+ * number of these pixel blocks, and all samples of the 2x2 pixels
+ * must be list."
+ *
+ * Experiments on Gen7.5 show that HiZ resolve also requires the rectangle
+ * to be aligned to 8x4 sample blocks. But to be on the safe side, we
+ * always require a level to be aligned when HiZ is enabled.
+ */
+ switch (img->sample_count) {
+ case 1:
+ *align_w = 8;
+ *align_h = 4;
+ break;
+ case 2:
+ *align_w = 4;
+ *align_h = 4;
+ break;
+ case 4:
+ *align_w = 4;
+ *align_h = 2;
+ break;
+ case 8:
+ *align_w = 2;
+ *align_h = 2;
+ break;
+ case 16:
+ *align_w = 2;
+ *align_h = 1;
+ break;
+ default:
+ assert(!"unknown sample count");
+ *align_w = 1;
+ *align_h = 1;
+ break;
+ }
+}
+
+static bool
+zs_get_gen6_depth_extent(const struct ilo_dev *dev,
+ const struct ilo_state_zs_info *info,
+ uint16_t *width, uint16_t *height)
+{
+ const struct ilo_image *img = (info->z_img) ? info->z_img : info->s_img;
+ uint16_t w, h, max_w, max_h;
+
+ ILO_DEV_ASSERT(dev, 6, 8);
+
+ w = img->width0;
+ h = img->height0;
+
+ if (info->hiz_enable) {
+ uint16_t align_w, align_h;
+
+ get_gen6_hiz_alignments(dev, info->z_img, &align_w, &align_h);
+
+ /*
+ * We want to force 8x4 alignment, but we can do so only for level 0 and
+ * only when it is padded. ilo_image should know all these.
+ */
+ if (info->level)
+ assert(w % align_w == 0 && h % align_h == 0);
+
+ w = align(w, align_w);
+ h = align(h, align_h);
+ }
+
+ get_gen6_max_extent(dev, img, &max_w, &max_h);
+ assert(w && h && w <= max_w && h <= max_h);
+
+ *width = w - 1;
+ *height = h - 1;
+
+ return true;
+}
+
+static bool
+zs_get_gen6_depth_slices(const struct ilo_dev *dev,
+ const struct ilo_state_zs_info *info,
+ uint16_t *depth, uint16_t *min_array_elem,
+ uint16_t *rt_view_extent)
+{
+ const struct ilo_image *img = (info->z_img) ? info->z_img : info->s_img;
+ uint16_t max_slice, d;
+
+ ILO_DEV_ASSERT(dev, 6, 8);
+
+ /*
+ * From the Sandy Bridge PRM, volume 2 part 1, page 325:
+ *
+ * "This field (Depth) specifies the total number of levels for a
+ * volume texture or the number of array elements allowed to be
+ * accessed starting at the Minimum Array Element for arrayed
+ * surfaces. If the volume texture is MIP-mapped, this field specifies
+ * the depth of the base MIP level."
+ */
+ switch (get_gen6_surface_type(dev, img)) {
+ case GEN6_SURFTYPE_1D:
+ case GEN6_SURFTYPE_2D:
+ max_slice = (ilo_dev_gen(dev) >= ILO_GEN(7)) ? 2048 : 512;
+
+ assert(img->array_size <= max_slice);
+ max_slice = img->array_size;
+
+ d = info->slice_count;
+ if (info->is_cube_map) {
+ /*
+ * Minumum Array Element and Depth must be 0; Render Target View
+ * Extent is ignored.
+ */
+ if (info->slice_base || d != 6) {
+ ilo_warn("no cube array dpeth buffer\n");
+ return false;
+ }
+
+ d /= 6;
+ }
+ break;
+ case GEN6_SURFTYPE_3D:
+ max_slice = 2048;
+
+ assert(img->depth0 <= max_slice);
+ max_slice = u_minify(img->depth0, info->level);
+
+ d = img->depth0;
+ break;
+ default:
+ assert(!"invalid surface type");
+ return false;
+ break;
+ }
+
+ if (!info->slice_count ||
+ info->slice_base + info->slice_count > max_slice) {
+ ilo_warn("invalid slice range\n");
+ return false;
+ }
+
+ assert(d);
+ *depth = d - 1;
+
+ /*
+ * From the Sandy Bridge PRM, volume 2 part 1, page 325:
+ *
+ * "For 1D and 2D Surfaces:
+ * This field (Minimum Array Element) indicates the minimum array
+ * element that can be accessed as part of this surface. The delivered
+ * array index is added to this field before being used to address the
+ * surface.
+ *
+ * For 3D Surfaces:
+ * This field indicates the minimum `R' coordinate on the LOD
+ * currently being rendered to. This field is added to the delivered
+ * array index before it is used to address the surface.
+ *
+ * For Other Surfaces:
+ * This field is ignored."
+ */
+ *min_array_elem = info->slice_base;
+
+ /*
+ * From the Sandy Bridge PRM, volume 2 part 1, page 326:
+ *
+ * "For 3D Surfaces:
+ * This field (Render Target View Extent) indicates the extent of the
+ * accessible `R' coordinates minus 1 on the LOD currently being
+ * rendered to.
+ *
+ * For 1D and 2D Surfaces:
+ * This field must be set to the same value as the Depth field.
+ *
+ * For Other Surfaces:
+ * This field is ignored."
+ */
+ *rt_view_extent = info->slice_count - 1;
+
+ return true;
+}
+
+static bool
+zs_set_gen6_3DSTATE_DEPTH_BUFFER(struct ilo_state_zs *zs,
+ const struct ilo_dev *dev,
+ const struct ilo_state_zs_info *info)
+{
+ uint16_t width, height, depth, array_base, view_extent;
+ enum gen_surface_type type;
+ enum gen_depth_format format;
+ uint32_t dw1, dw2, dw3, dw4;
+
+ ILO_DEV_ASSERT(dev, 6, 6);
+
+ if (!zs_validate_gen6(dev, info) ||
+ !zs_get_gen6_depth_extent(dev, info, &width, &height) ||
+ !zs_get_gen6_depth_slices(dev, info, &depth, &array_base,
+ &view_extent))
+ return false;
+
+ type = (info->is_cube_map) ? GEN6_SURFTYPE_CUBE :
+ (info->z_img) ? get_gen6_surface_type(dev, info->z_img) :
+ get_gen6_surface_type(dev, info->s_img);
+
+ format = (info->z_img) ? get_gen6_depth_format(dev, info->z_img) :
+ GEN6_ZFORMAT_D32_FLOAT;
+
+ /*
+ * From the Ironlake PRM, volume 2 part 1, page 330:
+ *
+ * "If this field (Separate Stencil Buffer Enable) is disabled, the
+ * Surface Format of the depth buffer cannot be D24_UNORM_X8_UINT."
+ *
+ * From the Sandy Bridge PRM, volume 2 part 1, page 321:
+ *
+ * "[DevSNB]: This field (Separate Stencil Buffer Enable) must be set
+ * to the same value (enabled or disabled) as Hierarchical Depth
+ * Buffer Enable."
+ */
+ if (!info->hiz_enable && format == GEN6_ZFORMAT_D24_UNORM_X8_UINT)
+ format = GEN6_ZFORMAT_D24_UNORM_S8_UINT;
+
+ /* info->z_readonly and info->s_readonly are ignored on Gen6 */
+ dw1 = type << GEN6_DEPTH_DW1_TYPE__SHIFT |
+ GEN6_TILING_Y << GEN6_DEPTH_DW1_TILING__SHIFT |
+ format << GEN6_DEPTH_DW1_FORMAT__SHIFT;
+
+ if (info->z_img)
+ dw1 |= (info->z_img->bo_stride - 1) << GEN6_DEPTH_DW1_PITCH__SHIFT;
+
+ if (info->hiz_enable || !info->z_img) {
+ dw1 |= GEN6_DEPTH_DW1_HIZ_ENABLE |
+ GEN6_DEPTH_DW1_SEPARATE_STENCIL;
+ }
+
+ dw2 = 0;
+ dw3 = height << GEN6_DEPTH_DW3_HEIGHT__SHIFT |
+ width << GEN6_DEPTH_DW3_WIDTH__SHIFT |
+ info->level << GEN6_DEPTH_DW3_LOD__SHIFT |
+ GEN6_DEPTH_DW3_MIPLAYOUT_BELOW;
+ dw4 = depth << GEN6_DEPTH_DW4_DEPTH__SHIFT |
+ array_base << GEN6_DEPTH_DW4_MIN_ARRAY_ELEMENT__SHIFT |
+ view_extent << GEN6_DEPTH_DW4_RT_VIEW_EXTENT__SHIFT;
+
+ STATIC_ASSERT(ARRAY_SIZE(zs->depth) >= 5);
+ zs->depth[0] = dw1;
+ zs->depth[1] = dw2;
+ zs->depth[2] = dw3;
+ zs->depth[3] = dw4;
+ zs->depth[4] = 0;
+
+ zs->depth_format = format;
+
+ return true;
+}
+
+static bool
+zs_set_gen7_3DSTATE_DEPTH_BUFFER(struct ilo_state_zs *zs,
+ const struct ilo_dev *dev,
+ const struct ilo_state_zs_info *info)
+{
+ enum gen_surface_type type;
+ enum gen_depth_format format;
+ uint16_t width, height, depth;
+ uint16_t array_base, view_extent;
+ uint32_t dw1, dw2, dw3, dw4, dw6;
+
+ ILO_DEV_ASSERT(dev, 7, 8);
+
+ if (!zs_validate_gen6(dev, info) ||
+ !zs_get_gen6_depth_extent(dev, info, &width, &height) ||
+ !zs_get_gen6_depth_slices(dev, info, &depth, &array_base,
+ &view_extent))
+ return false;
+
+ type = (info->is_cube_map) ? GEN6_SURFTYPE_CUBE :
+ (info->z_img) ? get_gen6_surface_type(dev, info->z_img) :
+ get_gen6_surface_type(dev, info->s_img);
+
+ format = (info->z_img) ? get_gen6_depth_format(dev, info->z_img) :
+ GEN6_ZFORMAT_D32_FLOAT;
+
+ dw1 = type << GEN7_DEPTH_DW1_TYPE__SHIFT |
+ format << GEN7_DEPTH_DW1_FORMAT__SHIFT;
+
+ if (info->z_img) {
+ if (!info->z_readonly)
+ dw1 |= GEN7_DEPTH_DW1_DEPTH_WRITE_ENABLE;
+ if (info->hiz_enable)
+ dw1 |= GEN7_DEPTH_DW1_HIZ_ENABLE;
+
+ dw1 |= (info->z_img->bo_stride - 1) << GEN7_DEPTH_DW1_PITCH__SHIFT;
+ }
+
+ if (info->s_img && !info->s_readonly)
+ dw1 |= GEN7_DEPTH_DW1_STENCIL_WRITE_ENABLE;
+
+ dw2 = 0;
+ dw3 = height << GEN7_DEPTH_DW3_HEIGHT__SHIFT |
+ width << GEN7_DEPTH_DW3_WIDTH__SHIFT |
+ info->level << GEN7_DEPTH_DW3_LOD__SHIFT;
+ dw4 = depth << GEN7_DEPTH_DW4_DEPTH__SHIFT |
+ array_base << GEN7_DEPTH_DW4_MIN_ARRAY_ELEMENT__SHIFT;
+ dw6 = view_extent << GEN7_DEPTH_DW6_RT_VIEW_EXTENT__SHIFT;
+
+ if (ilo_dev_gen(dev) >= ILO_GEN(8) && info->z_img) {
+ assert(info->z_img->walk_layer_height % 4 == 0);
+ /* note that DW is off-by-one for Gen8+ */
+ dw6 |= (info->z_img->walk_layer_height / 4) <<
+ GEN8_DEPTH_DW7_QPITCH__SHIFT;
+ }
+
+ STATIC_ASSERT(ARRAY_SIZE(zs->depth) >= 5);
+ zs->depth[0] = dw1;
+ zs->depth[1] = dw2;
+ zs->depth[2] = dw3;
+ zs->depth[3] = dw4;
+ zs->depth[4] = dw6;
+
+ zs->depth_format = format;
+
+ return true;
+}
+
+static bool
+zs_set_gen6_null_3DSTATE_STENCIL_BUFFER(struct ilo_state_zs *zs,
+ const struct ilo_dev *dev)
+{
+ ILO_DEV_ASSERT(dev, 6, 8);
+
+ STATIC_ASSERT(ARRAY_SIZE(zs->stencil) >= 3);
+ zs->stencil[0] = 0;
+ zs->stencil[1] = 0;
+ if (ilo_dev_gen(dev) >= ILO_GEN(8))
+ zs->stencil[2] = 0;
+
+ return true;
+}
+
+static bool
+zs_set_gen6_3DSTATE_STENCIL_BUFFER(struct ilo_state_zs *zs,
+ const struct ilo_dev *dev,
+ const struct ilo_state_zs_info *info)
+{
+ const struct ilo_image *img = info->s_img;
+ uint32_t dw1, dw2;
+
+ ILO_DEV_ASSERT(dev, 6, 8);
+
+ assert(img->bo_stride);
+
+ /*
+ * From the Sandy Bridge PRM, volume 2 part 1, page 329:
+ *
+ * "The pitch must be set to 2x the value computed based on width, as
+ * the stencil buffer is stored with two rows interleaved."
+ *
+ * For Gen7+, we still dobule the stride because we did not double the
+ * slice widths when initializing ilo_image.
+ */
+ dw1 = (img->bo_stride * 2 - 1) << GEN6_STENCIL_DW1_PITCH__SHIFT;
+
+ if (ilo_dev_gen(dev) >= ILO_GEN(7.5))
+ dw1 |= GEN75_STENCIL_DW1_STENCIL_BUFFER_ENABLE;
+
+ dw2 = 0;
+ /* offset to the level as Gen6 does not support mipmapped stencil */
+ if (ilo_dev_gen(dev) == ILO_GEN(6)) {
+ unsigned x, y;
+
+ ilo_image_get_slice_pos(img, info->level, 0, &x, &y);
+ ilo_image_pos_to_mem(img, x, y, &x, &y);
+ dw2 |= ilo_image_mem_to_raw(img, x, y);
+ }
+
+ STATIC_ASSERT(ARRAY_SIZE(zs->stencil) >= 3);
+ zs->stencil[0] = dw1;
+ zs->stencil[1] = dw2;
+
+ if (ilo_dev_gen(dev) >= ILO_GEN(8)) {
+ uint32_t dw4;
+
+ assert(img->walk_layer_height % 4 == 0);
+ dw4 = (img->walk_layer_height / 4) << GEN8_STENCIL_DW4_QPITCH__SHIFT;
+
+ zs->stencil[2] = dw4;
+ }
+
+ return true;
+}
+
+static bool
+zs_set_gen6_null_3DSTATE_HIER_DEPTH_BUFFER(struct ilo_state_zs *zs,
+ const struct ilo_dev *dev)
+{
+ ILO_DEV_ASSERT(dev, 6, 8);
+
+ STATIC_ASSERT(ARRAY_SIZE(zs->hiz) >= 3);
+ zs->hiz[0] = 0;
+ zs->hiz[1] = 0;
+ if (ilo_dev_gen(dev) >= ILO_GEN(8))
+ zs->hiz[2] = 0;
+
+ return true;
+}
+
+static bool
+zs_set_gen6_3DSTATE_HIER_DEPTH_BUFFER(struct ilo_state_zs *zs,
+ const struct ilo_dev *dev,
+ const struct ilo_state_zs_info *info)
+{
+ const struct ilo_image *img = info->z_img;
+ uint32_t dw1, dw2;
+
+ ILO_DEV_ASSERT(dev, 6, 8);
+
+ assert(img->aux.bo_stride);
+
+ dw1 = (img->aux.bo_stride - 1) << GEN6_HIZ_DW1_PITCH__SHIFT;
+
+ dw2 = 0;
+ /* offset to the level as Gen6 does not support mipmapped HiZ */
+ if (ilo_dev_gen(dev) == ILO_GEN(6))
+ dw2 |= img->aux.walk_lod_offsets[info->level];
+
+ STATIC_ASSERT(ARRAY_SIZE(zs->hiz) >= 3);
+ zs->hiz[0] = dw1;
+ zs->hiz[1] = dw2;
+
+ if (ilo_dev_gen(dev) >= ILO_GEN(8)) {
+ uint32_t dw4;
+
+ assert(img->aux.walk_layer_height % 4 == 0);
+ dw4 = (img->aux.walk_layer_height / 4) << GEN8_HIZ_DW4_QPITCH__SHIFT;
+
+ zs->hiz[2] = dw4;
+ }
+
+ return true;
+}
+
+bool
+ilo_state_zs_init(struct ilo_state_zs *zs, const struct ilo_dev *dev,
+ const struct ilo_state_zs_info *info)
+{
+ bool ret = true;
+
+ assert(ilo_is_zeroed(zs, sizeof(*zs)));
+
+ if (info->z_img || info->s_img) {
+ if (ilo_dev_gen(dev) >= ILO_GEN(7))
+ ret &= zs_set_gen7_3DSTATE_DEPTH_BUFFER(zs, dev, info);
+ else
+ ret &= zs_set_gen6_3DSTATE_DEPTH_BUFFER(zs, dev, info);
+ } else {
+ ret &= zs_set_gen6_null_3DSTATE_DEPTH_BUFFER(zs, dev);
+ }
+
+ if (info->s_img)
+ ret &= zs_set_gen6_3DSTATE_STENCIL_BUFFER(zs, dev, info);
+ else
+ ret &= zs_set_gen6_null_3DSTATE_STENCIL_BUFFER(zs, dev);
+
+ if (info->z_img && info->hiz_enable)
+ ret &= zs_set_gen6_3DSTATE_HIER_DEPTH_BUFFER(zs, dev, info);
+ else
+ ret &= zs_set_gen6_null_3DSTATE_HIER_DEPTH_BUFFER(zs, dev);
+
+ zs->z_readonly = info->z_readonly;
+ zs->s_readonly = info->s_readonly;
+
+ assert(ret);
+
+ return ret;
+}
+
+bool
+ilo_state_zs_init_for_null(struct ilo_state_zs *zs,
+ const struct ilo_dev *dev)
+{
+ struct ilo_state_zs_info info;
+
+ memset(&info, 0, sizeof(info));
+
+ return ilo_state_zs_init(zs, dev, &info);
+}
+
+bool
+ilo_state_zs_disable_hiz(struct ilo_state_zs *zs,
+ const struct ilo_dev *dev)
+{
+ ILO_DEV_ASSERT(dev, 6, 8);
+
+ /*
+ * Separate stencil must be disabled simultaneously on Gen6. We can make
+ * it work when there is no stencil buffer, but it is probably not worth
+ * it.
+ */
+ assert(ilo_dev_gen(dev) >= ILO_GEN(7));
+
+ zs->depth[0] &= ~GEN7_DEPTH_DW1_HIZ_ENABLE;
+ zs_set_gen6_null_3DSTATE_HIER_DEPTH_BUFFER(zs, dev);
+
+ return true;
+}
diff --git a/src/gallium/drivers/ilo/core/ilo_state_zs.h b/src/gallium/drivers/ilo/core/ilo_state_zs.h
new file mode 100644
index 00000000000..98212daf74f
--- /dev/null
+++ b/src/gallium/drivers/ilo/core/ilo_state_zs.h
@@ -0,0 +1,93 @@
+/*
+ * Mesa 3-D graphics library
+ *
+ * Copyright (C) 2015 LunarG, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ * Chia-I Wu <[email protected]>
+ */
+
+#ifndef ILO_STATE_ZS_H
+#define ILO_STATE_ZS_H
+
+#include "genhw/genhw.h"
+#include "intel_winsys.h"
+
+#include "ilo_core.h"
+#include "ilo_dev.h"
+
+struct ilo_image;
+
+struct ilo_state_zs_info {
+ /* both are optional */
+ const struct ilo_image *z_img;
+ const struct ilo_image *s_img;
+
+ /* ignored prior to Gen7 */
+ bool z_readonly;
+ bool s_readonly;
+
+ bool hiz_enable;
+ bool is_cube_map;
+
+ uint8_t level;
+ uint16_t slice_base;
+ uint16_t slice_count;
+};
+
+struct ilo_state_zs {
+ uint32_t depth[5];
+ uint32_t stencil[3];
+ uint32_t hiz[3];
+
+ /* TODO move this to ilo_image */
+ enum gen_depth_format depth_format;
+
+ bool z_readonly;
+ bool s_readonly;
+
+ /* managed by users */
+ struct intel_bo *depth_bo;
+ struct intel_bo *stencil_bo;
+ struct intel_bo *hiz_bo;
+};
+
+bool
+ilo_state_zs_init(struct ilo_state_zs *zs,
+ const struct ilo_dev *dev,
+ const struct ilo_state_zs_info *info);
+
+bool
+ilo_state_zs_init_for_null(struct ilo_state_zs *zs,
+ const struct ilo_dev *dev);
+
+bool
+ilo_state_zs_disable_hiz(struct ilo_state_zs *zs,
+ const struct ilo_dev *dev);
+
+static inline enum gen_depth_format
+ilo_state_zs_get_depth_format(const struct ilo_state_zs *zs,
+ const struct ilo_dev *dev)
+{
+ return zs->depth_format;
+}
+
+#endif /* ILO_STATE_ZS_H */
diff --git a/src/gallium/drivers/ilo/genhw/gen_mi.xml.h b/src/gallium/drivers/ilo/genhw/gen_mi.xml.h
index 24d726adcb3..5a0bb4f8d77 100644
--- a/src/gallium/drivers/ilo/genhw/gen_mi.xml.h
+++ b/src/gallium/drivers/ilo/genhw/gen_mi.xml.h
@@ -97,6 +97,9 @@ enum gen_mi_alu_operand {
#define GEN6_MI_LENGTH__MASK 0x0000003f
#define GEN6_MI_LENGTH__SHIFT 0
#define GEN6_MI_NOOP__SIZE 1
+#define GEN6_MI_NOOP_DW0_WRITE_NOPID (0x1 << 22)
+#define GEN6_MI_NOOP_DW0_VALUE__MASK 0x003fffff
+#define GEN6_MI_NOOP_DW0_VALUE__SHIFT 0
#define GEN75_MI_SET_PREDICATE__SIZE 1
#define GEN75_MI_SET_PREDICATE_DW0_PREDICATE__MASK 0x00000003
diff --git a/src/gallium/drivers/ilo/genhw/gen_regs.xml.h b/src/gallium/drivers/ilo/genhw/gen_regs.xml.h
index 2bdd72b29bc..c51e4f78bc0 100644
--- a/src/gallium/drivers/ilo/genhw/gen_regs.xml.h
+++ b/src/gallium/drivers/ilo/genhw/gen_regs.xml.h
@@ -35,6 +35,8 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#define GEN6_REG_MASK__MASK 0xffff0000
#define GEN6_REG_MASK__SHIFT 16
#define GEN6_REG__SIZE 0x400000
+#define GEN6_REG_NOPID 0x2094
+
#define GEN7_REG_HS_INVOCATION_COUNT 0x2300
#define GEN7_REG_DS_INVOCATION_COUNT 0x2308
diff --git a/src/gallium/drivers/ilo/genhw/gen_render_3d.xml.h b/src/gallium/drivers/ilo/genhw/gen_render_3d.xml.h
index d25542e8cc2..52173fe5d07 100644
--- a/src/gallium/drivers/ilo/genhw/gen_render_3d.xml.h
+++ b/src/gallium/drivers/ilo/genhw/gen_render_3d.xml.h
@@ -32,7 +32,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
-enum gen_prim_type {
+enum gen_3dprim_type {
GEN6_3DPRIM_POINTLIST = 0x1,
GEN6_3DPRIM_LINELIST = 0x2,
GEN6_3DPRIM_LINESTRIP = 0x3,
@@ -105,6 +105,12 @@ enum gen_state_alignment {
GEN8_ALIGNMENT_SURFACE_STATE = 0x40,
};
+enum gen_index_format {
+ GEN6_INDEX_BYTE = 0x0,
+ GEN6_INDEX_WORD = 0x1,
+ GEN6_INDEX_DWORD = 0x2,
+};
+
enum gen_vf_component {
GEN6_VFCOMP_NOSTORE = 0x0,
GEN6_VFCOMP_STORE_SRC = 0x1,
@@ -123,6 +129,87 @@ enum gen_depth_format {
GEN6_ZFORMAT_D16_UNORM = 0x5,
};
+enum gen_reorder_mode {
+ GEN7_REORDER_LEADING = 0x0,
+ GEN7_REORDER_TRAILING = 0x1,
+};
+
+enum gen_clip_mode {
+ GEN6_CLIPMODE_NORMAL = 0x0,
+ GEN6_CLIPMODE_REJECT_ALL = 0x3,
+ GEN6_CLIPMODE_ACCEPT_ALL = 0x4,
+};
+
+enum gen_front_winding {
+ GEN6_FRONTWINDING_CW = 0x0,
+ GEN6_FRONTWINDING_CCW = 0x1,
+};
+
+enum gen_fill_mode {
+ GEN6_FILLMODE_SOLID = 0x0,
+ GEN6_FILLMODE_WIREFRAME = 0x1,
+ GEN6_FILLMODE_POINT = 0x2,
+};
+
+enum gen_cull_mode {
+ GEN6_CULLMODE_BOTH = 0x0,
+ GEN6_CULLMODE_NONE = 0x1,
+ GEN6_CULLMODE_FRONT = 0x2,
+ GEN6_CULLMODE_BACK = 0x3,
+};
+
+enum gen_pixel_location {
+ GEN6_PIXLOC_CENTER = 0x0,
+ GEN6_PIXLOC_UL_CORNER = 0x1,
+};
+
+enum gen_sample_count {
+ GEN6_NUMSAMPLES_1 = 0x0,
+ GEN8_NUMSAMPLES_2 = 0x1,
+ GEN6_NUMSAMPLES_4 = 0x2,
+ GEN7_NUMSAMPLES_8 = 0x3,
+ GEN8_NUMSAMPLES_16 = 0x4,
+};
+
+enum gen_inputattr_select {
+ GEN6_INPUTATTR_NORMAL = 0x0,
+ GEN6_INPUTATTR_FACING = 0x1,
+ GEN6_INPUTATTR_W = 0x2,
+ GEN6_INPUTATTR_FACING_W = 0x3,
+};
+
+enum gen_zw_interp {
+ GEN6_ZW_INTERP_PIXEL = 0x0,
+ GEN6_ZW_INTERP_CENTROID = 0x2,
+ GEN6_ZW_INTERP_SAMPLE = 0x3,
+};
+
+enum gen_position_offset {
+ GEN6_POSOFFSET_NONE = 0x0,
+ GEN6_POSOFFSET_CENTROID = 0x2,
+ GEN6_POSOFFSET_SAMPLE = 0x3,
+};
+
+enum gen_edsc_mode {
+ GEN7_EDSC_NORMAL = 0x0,
+ GEN7_EDSC_PSEXEC = 0x1,
+ GEN7_EDSC_PREPS = 0x2,
+};
+
+enum gen_pscdepth_mode {
+ GEN7_PSCDEPTH_OFF = 0x0,
+ GEN7_PSCDEPTH_ON = 0x1,
+ GEN7_PSCDEPTH_ON_GE = 0x2,
+ GEN7_PSCDEPTH_ON_LE = 0x3,
+};
+
+enum gen_msrast_mode {
+ GEN6_MSRASTMODE_OFF_PIXEL = 0x0,
+ GEN6_MSRASTMODE_OFF_PATTERN = 0x1,
+ GEN6_MSRASTMODE_ON_PIXEL = 0x2,
+ GEN6_MSRASTMODE_ON_PATTERN = 0x3,
+};
+
#define GEN6_INTERP_NONPERSPECTIVE_SAMPLE (0x1 << 5)
#define GEN6_INTERP_NONPERSPECTIVE_CENTROID (0x1 << 4)
#define GEN6_INTERP_NONPERSPECTIVE_PIXEL (0x1 << 3)
@@ -285,9 +372,6 @@ enum gen_depth_format {
#define GEN6_IB_DW0_CUT_INDEX_ENABLE (0x1 << 10)
#define GEN6_IB_DW0_FORMAT__MASK 0x00000300
#define GEN6_IB_DW0_FORMAT__SHIFT 8
-#define GEN6_IB_DW0_FORMAT_BYTE (0x0 << 8)
-#define GEN6_IB_DW0_FORMAT_WORD (0x1 << 8)
-#define GEN6_IB_DW0_FORMAT_DWORD (0x2 << 8)
@@ -295,9 +379,6 @@ enum gen_depth_format {
#define GEN8_IB_DW1_FORMAT__MASK 0x00000300
#define GEN8_IB_DW1_FORMAT__SHIFT 8
-#define GEN8_IB_DW1_FORMAT_BYTE (0x0 << 8)
-#define GEN8_IB_DW1_FORMAT_WORD (0x1 << 8)
-#define GEN8_IB_DW1_FORMAT_DWORD (0x2 << 8)
#define GEN8_IB_DW1_MOCS__MASK 0x0000007f
#define GEN8_IB_DW1_MOCS__SHIFT 0
@@ -313,8 +394,8 @@ enum gen_depth_format {
#define GEN8_INSTANCING_DW1_ENABLE (0x1 << 8)
-#define GEN8_INSTANCING_DW1_VB_INDEX__MASK 0x0000003f
-#define GEN8_INSTANCING_DW1_VB_INDEX__SHIFT 0
+#define GEN8_INSTANCING_DW1_VE_INDEX__MASK 0x0000003f
+#define GEN8_INSTANCING_DW1_VE_INDEX__SHIFT 0
#define GEN8_3DSTATE_VF_SGVS__SIZE 2
@@ -614,7 +695,7 @@ enum gen_depth_format {
#define GEN6_GS_DW5_SO_STATISTICS (0x1 << 9)
#define GEN6_GS_DW5_RENDER_ENABLE (0x1 << 8)
-#define GEN6_GS_DW6_REORDER_ENABLE (0x1 << 30)
+#define GEN6_GS_DW6_REORDER_LEADING_ENABLE (0x1 << 30)
#define GEN6_GS_DW6_DISCARD_ADJACENCY (0x1 << 29)
#define GEN6_GS_DW6_SVBI_PAYLOAD_ENABLE (0x1 << 28)
#define GEN6_GS_DW6_SVBI_POST_INC_ENABLE (0x1 << 27)
@@ -666,11 +747,9 @@ enum gen_depth_format {
#define GEN7_GS_DW5_INVOCATION_INCR__SHIFT 5
#define GEN7_GS_DW5_INCLUDE_PRIMITIVE_ID (0x1 << 4)
#define GEN7_GS_DW5_HINT (0x1 << 3)
-#define GEN7_GS_DW5_REORDER_ENABLE (0x1 << 2)
-#define GEN75_GS_DW5_REORDER__MASK 0x00000004
-#define GEN75_GS_DW5_REORDER__SHIFT 2
-#define GEN75_GS_DW5_REORDER_LEADING (0x0 << 2)
-#define GEN75_GS_DW5_REORDER_TRAILING (0x1 << 2)
+#define GEN7_GS_DW5_REORDER_LEADING_ENABLE (0x1 << 2)
+#define GEN75_GS_DW5_REORDER_MODE__MASK 0x00000004
+#define GEN75_GS_DW5_REORDER_MODE__SHIFT 2
#define GEN7_GS_DW5_DISCARD_ADJACENCY (0x1 << 1)
#define GEN7_GS_DW5_GS_ENABLE (0x1 << 0)
@@ -727,10 +806,8 @@ enum gen_depth_format {
#define GEN8_GS_DW7_INVOCATION_INCR__SHIFT 5
#define GEN8_GS_DW7_INCLUDE_PRIMITIVE_ID (0x1 << 4)
#define GEN8_GS_DW7_HINT (0x1 << 3)
-#define GEN8_GS_DW7_REORDER__MASK 0x00000004
-#define GEN8_GS_DW7_REORDER__SHIFT 2
-#define GEN8_GS_DW7_REORDER_LEADING (0x0 << 2)
-#define GEN8_GS_DW7_REORDER_TRAILING (0x1 << 2)
+#define GEN8_GS_DW7_REORDER_MODE__MASK 0x00000004
+#define GEN8_GS_DW7_REORDER_MODE__SHIFT 2
#define GEN8_GS_DW7_DISCARD_ADJACENCY (0x1 << 1)
#define GEN8_GS_DW7_GS_ENABLE (0x1 << 0)
@@ -758,10 +835,8 @@ enum gen_depth_format {
#define GEN7_SO_DW1_RENDER_DISABLE (0x1 << 30)
#define GEN7_SO_DW1_RENDER_STREAM_SELECT__MASK 0x18000000
#define GEN7_SO_DW1_RENDER_STREAM_SELECT__SHIFT 27
-#define GEN7_SO_DW1_REORDER__MASK 0x04000000
-#define GEN7_SO_DW1_REORDER__SHIFT 26
-#define GEN7_SO_DW1_REORDER_LEADING (0x0 << 26)
-#define GEN7_SO_DW1_REORDER_TRAILING (0x1 << 26)
+#define GEN7_SO_DW1_REORDER_MODE__MASK 0x04000000
+#define GEN7_SO_DW1_REORDER_MODE__SHIFT 26
#define GEN7_SO_DW1_STATISTICS (0x1 << 25)
#define GEN7_SO_DW1_BUFFER_ENABLES__MASK 0x00000f00
#define GEN7_SO_DW1_BUFFER_ENABLES__SHIFT 8
@@ -862,21 +937,15 @@ enum gen_depth_format {
#define GEN6_3DSTATE_CLIP__SIZE 4
-#define GEN7_CLIP_DW1_FRONTWINDING__MASK 0x00100000
-#define GEN7_CLIP_DW1_FRONTWINDING__SHIFT 20
-#define GEN7_CLIP_DW1_FRONTWINDING_CW (0x0 << 20)
-#define GEN7_CLIP_DW1_FRONTWINDING_CCW (0x1 << 20)
+#define GEN7_CLIP_DW1_FRONT_WINDING__MASK 0x00100000
+#define GEN7_CLIP_DW1_FRONT_WINDING__SHIFT 20
#define GEN7_CLIP_DW1_SUBPIXEL__MASK 0x00080000
#define GEN7_CLIP_DW1_SUBPIXEL__SHIFT 19
#define GEN7_CLIP_DW1_SUBPIXEL_8BITS (0x0 << 19)
#define GEN7_CLIP_DW1_SUBPIXEL_4BITS (0x1 << 19)
#define GEN7_CLIP_DW1_EARLY_CULL_ENABLE (0x1 << 18)
-#define GEN7_CLIP_DW1_CULLMODE__MASK 0x00030000
-#define GEN7_CLIP_DW1_CULLMODE__SHIFT 16
-#define GEN7_CLIP_DW1_CULLMODE_BOTH (0x0 << 16)
-#define GEN7_CLIP_DW1_CULLMODE_NONE (0x1 << 16)
-#define GEN7_CLIP_DW1_CULLMODE_FRONT (0x2 << 16)
-#define GEN7_CLIP_DW1_CULLMODE_BACK (0x3 << 16)
+#define GEN7_CLIP_DW1_CULL_MODE__MASK 0x00030000
+#define GEN7_CLIP_DW1_CULL_MODE__SHIFT 16
#define GEN6_CLIP_DW1_STATISTICS (0x1 << 10)
#define GEN6_CLIP_DW1_UCP_CULL_ENABLES__MASK 0x000000ff
#define GEN6_CLIP_DW1_UCP_CULL_ENABLES__SHIFT 0
@@ -891,11 +960,8 @@ enum gen_depth_format {
#define GEN6_CLIP_DW2_GB_TEST_ENABLE (0x1 << 26)
#define GEN6_CLIP_DW2_UCP_CLIP_ENABLES__MASK 0x00ff0000
#define GEN6_CLIP_DW2_UCP_CLIP_ENABLES__SHIFT 16
-#define GEN6_CLIP_DW2_CLIPMODE__MASK 0x0000e000
-#define GEN6_CLIP_DW2_CLIPMODE__SHIFT 13
-#define GEN6_CLIP_DW2_CLIPMODE_NORMAL (0x0 << 13)
-#define GEN6_CLIP_DW2_CLIPMODE_REJECT_ALL (0x3 << 13)
-#define GEN6_CLIP_DW2_CLIPMODE_ACCEPT_ALL (0x4 << 13)
+#define GEN6_CLIP_DW2_CLIP_MODE__MASK 0x0000e000
+#define GEN6_CLIP_DW2_CLIP_MODE__SHIFT 13
#define GEN6_CLIP_DW2_PERSPECTIVE_DIVIDE_DISABLE (0x1 << 9)
#define GEN6_CLIP_DW2_NONPERSPECTIVE_BARYCENTRIC_ENABLE (0x1 << 8)
#define GEN6_CLIP_DW2_TRI_PROVOKE__MASK 0x00000030
@@ -911,7 +977,7 @@ enum gen_depth_format {
#define GEN6_CLIP_DW3_MAX_POINT_WIDTH__MASK 0x0001ffc0
#define GEN6_CLIP_DW3_MAX_POINT_WIDTH__SHIFT 6
#define GEN6_CLIP_DW3_MAX_POINT_WIDTH__RADIX 3
-#define GEN6_CLIP_DW3_RTAINDEX_FORCED_ZERO (0x1 << 5)
+#define GEN6_CLIP_DW3_FORCE_RTAINDEX_ZERO (0x1 << 5)
#define GEN6_CLIP_DW3_MAX_VPINDEX__MASK 0x0000000f
#define GEN6_CLIP_DW3_MAX_VPINDEX__SHIFT 0
@@ -927,29 +993,17 @@ enum gen_depth_format {
#define GEN7_SF_DW1_DEPTH_OFFSET_SOLID (0x1 << 9)
#define GEN7_SF_DW1_DEPTH_OFFSET_WIREFRAME (0x1 << 8)
#define GEN7_SF_DW1_DEPTH_OFFSET_POINT (0x1 << 7)
-#define GEN7_SF_DW1_FRONTFACE__MASK 0x00000060
-#define GEN7_SF_DW1_FRONTFACE__SHIFT 5
-#define GEN7_SF_DW1_FRONTFACE_SOLID (0x0 << 5)
-#define GEN7_SF_DW1_FRONTFACE_WIREFRAME (0x1 << 5)
-#define GEN7_SF_DW1_FRONTFACE_POINT (0x2 << 5)
-#define GEN7_SF_DW1_BACKFACE__MASK 0x00000018
-#define GEN7_SF_DW1_BACKFACE__SHIFT 3
-#define GEN7_SF_DW1_BACKFACE_SOLID (0x0 << 3)
-#define GEN7_SF_DW1_BACKFACE_WIREFRAME (0x1 << 3)
-#define GEN7_SF_DW1_BACKFACE_POINT (0x2 << 3)
-#define GEN7_SF_DW1_VIEWPORT_ENABLE (0x1 << 1)
-#define GEN7_SF_DW1_FRONTWINDING__MASK 0x00000001
-#define GEN7_SF_DW1_FRONTWINDING__SHIFT 0
-#define GEN7_SF_DW1_FRONTWINDING_CW 0x0
-#define GEN7_SF_DW1_FRONTWINDING_CCW 0x1
+#define GEN7_SF_DW1_FILL_MODE_FRONT__MASK 0x00000060
+#define GEN7_SF_DW1_FILL_MODE_FRONT__SHIFT 5
+#define GEN7_SF_DW1_FILL_MODE_BACK__MASK 0x00000018
+#define GEN7_SF_DW1_FILL_MODE_BACK__SHIFT 3
+#define GEN7_SF_DW1_VIEWPORT_TRANSFORM (0x1 << 1)
+#define GEN7_SF_DW1_FRONT_WINDING__MASK 0x00000001
+#define GEN7_SF_DW1_FRONT_WINDING__SHIFT 0
#define GEN7_SF_DW2_AA_LINE_ENABLE (0x1 << 31)
-#define GEN7_SF_DW2_CULLMODE__MASK 0x60000000
-#define GEN7_SF_DW2_CULLMODE__SHIFT 29
-#define GEN7_SF_DW2_CULLMODE_BOTH (0x0 << 29)
-#define GEN7_SF_DW2_CULLMODE_NONE (0x1 << 29)
-#define GEN7_SF_DW2_CULLMODE_FRONT (0x2 << 29)
-#define GEN7_SF_DW2_CULLMODE_BACK (0x3 << 29)
+#define GEN7_SF_DW2_CULL_MODE__MASK 0x60000000
+#define GEN7_SF_DW2_CULL_MODE__SHIFT 29
#define GEN7_SF_DW2_LINE_WIDTH__MASK 0x0ffc0000
#define GEN7_SF_DW2_LINE_WIDTH__SHIFT 18
#define GEN7_SF_DW2_LINE_WIDTH__RADIX 7
@@ -963,10 +1017,6 @@ enum gen_depth_format {
#define GEN7_SF_DW2_SCISSOR_ENABLE (0x1 << 11)
#define GEN7_SF_DW2_MSRASTMODE__MASK 0x00000300
#define GEN7_SF_DW2_MSRASTMODE__SHIFT 8
-#define GEN7_SF_DW2_MSRASTMODE_OFF_PIXEL (0x0 << 8)
-#define GEN7_SF_DW2_MSRASTMODE_OFF_PATTERN (0x1 << 8)
-#define GEN7_SF_DW2_MSRASTMODE_ON_PIXEL (0x2 << 8)
-#define GEN7_SF_DW2_MSRASTMODE_ON_PATTERN (0x3 << 8)
#define GEN7_SF_DW3_LINE_LAST_PIXEL_ENABLE (0x1 << 31)
#define GEN7_SF_DW3_TRI_PROVOKE__MASK 0x60000000
@@ -1021,14 +1071,10 @@ enum gen_depth_format {
#define GEN8_SBE_SWIZ_CONST_0001_FLOAT (0x1 << 9)
#define GEN8_SBE_SWIZ_CONST_1111_FLOAT (0x2 << 9)
#define GEN8_SBE_SWIZ_CONST_PRIM_ID (0x3 << 9)
-#define GEN8_SBE_SWIZ_INPUTATTR__MASK 0x000000c0
-#define GEN8_SBE_SWIZ_INPUTATTR__SHIFT 6
-#define GEN8_SBE_SWIZ_INPUTATTR_NORMAL (0x0 << 6)
-#define GEN8_SBE_SWIZ_INPUTATTR_FACING (0x1 << 6)
-#define GEN8_SBE_SWIZ_INPUTATTR_W (0x2 << 6)
-#define GEN8_SBE_SWIZ_INPUTATTR_FACING_W (0x3 << 6)
-#define GEN8_SBE_SWIZ_URB_ENTRY_OFFSET__MASK 0x0000001f
-#define GEN8_SBE_SWIZ_URB_ENTRY_OFFSET__SHIFT 0
+#define GEN8_SBE_SWIZ_SWIZZLE_SELECT__MASK 0x000000c0
+#define GEN8_SBE_SWIZ_SWIZZLE_SELECT__SHIFT 6
+#define GEN8_SBE_SWIZ_SRC_ATTR__MASK 0x0000001f
+#define GEN8_SBE_SWIZ_SRC_ATTR__SHIFT 0
#define GEN6_3DSTATE_SF__SIZE 20
@@ -1080,31 +1126,19 @@ enum gen_depth_format {
#define GEN9_RASTER_DW1_Z_TEST_FAR_ENABLE (0x1 << 26)
-#define GEN8_RASTER_DW1_FRONTWINDING__MASK 0x00200000
-#define GEN8_RASTER_DW1_FRONTWINDING__SHIFT 21
-#define GEN8_RASTER_DW1_FRONTWINDING_CW (0x0 << 21)
-#define GEN8_RASTER_DW1_FRONTWINDING_CCW (0x1 << 21)
-#define GEN8_RASTER_DW1_CULLMODE__MASK 0x00030000
-#define GEN8_RASTER_DW1_CULLMODE__SHIFT 16
-#define GEN8_RASTER_DW1_CULLMODE_BOTH (0x0 << 16)
-#define GEN8_RASTER_DW1_CULLMODE_NONE (0x1 << 16)
-#define GEN8_RASTER_DW1_CULLMODE_FRONT (0x2 << 16)
-#define GEN8_RASTER_DW1_CULLMODE_BACK (0x3 << 16)
+#define GEN8_RASTER_DW1_FRONT_WINDING__MASK 0x00200000
+#define GEN8_RASTER_DW1_FRONT_WINDING__SHIFT 21
+#define GEN8_RASTER_DW1_CULL_MODE__MASK 0x00030000
+#define GEN8_RASTER_DW1_CULL_MODE__SHIFT 16
#define GEN8_RASTER_DW1_SMOOTH_POINT_ENABLE (0x1 << 13)
#define GEN8_RASTER_DW1_API_MULTISAMPLE_ENABLE (0x1 << 12)
#define GEN8_RASTER_DW1_DEPTH_OFFSET_SOLID (0x1 << 9)
#define GEN8_RASTER_DW1_DEPTH_OFFSET_WIREFRAME (0x1 << 8)
#define GEN8_RASTER_DW1_DEPTH_OFFSET_POINT (0x1 << 7)
-#define GEN8_RASTER_DW1_FRONTFACE__MASK 0x00000060
-#define GEN8_RASTER_DW1_FRONTFACE__SHIFT 5
-#define GEN8_RASTER_DW1_FRONTFACE_SOLID (0x0 << 5)
-#define GEN8_RASTER_DW1_FRONTFACE_WIREFRAME (0x1 << 5)
-#define GEN8_RASTER_DW1_FRONTFACE_POINT (0x2 << 5)
-#define GEN8_RASTER_DW1_BACKFACE__MASK 0x00000018
-#define GEN8_RASTER_DW1_BACKFACE__SHIFT 3
-#define GEN8_RASTER_DW1_BACKFACE_SOLID (0x0 << 3)
-#define GEN8_RASTER_DW1_BACKFACE_WIREFRAME (0x1 << 3)
-#define GEN8_RASTER_DW1_BACKFACE_POINT (0x2 << 3)
+#define GEN8_RASTER_DW1_FILL_MODE_FRONT__MASK 0x00000060
+#define GEN8_RASTER_DW1_FILL_MODE_FRONT__SHIFT 5
+#define GEN8_RASTER_DW1_FILL_MODE_BACK__MASK 0x00000018
+#define GEN8_RASTER_DW1_FILL_MODE_BACK__SHIFT 3
#define GEN8_RASTER_DW1_AA_LINE_ENABLE (0x1 << 2)
#define GEN8_RASTER_DW1_SCISSOR_ENABLE (0x1 << 1)
#define GEN8_RASTER_DW1_Z_TEST_ENABLE (0x1 << 0)
@@ -1164,14 +1198,8 @@ enum gen_depth_format {
#define GEN6_WM_DW6_SF_ATTR_COUNT__SHIFT 20
#define GEN6_WM_DW6_PS_POSOFFSET__MASK 0x000c0000
#define GEN6_WM_DW6_PS_POSOFFSET__SHIFT 18
-#define GEN6_WM_DW6_PS_POSOFFSET_NONE (0x0 << 18)
-#define GEN6_WM_DW6_PS_POSOFFSET_CENTROID (0x2 << 18)
-#define GEN6_WM_DW6_PS_POSOFFSET_SAMPLE (0x3 << 18)
#define GEN6_WM_DW6_ZW_INTERP__MASK 0x00030000
#define GEN6_WM_DW6_ZW_INTERP__SHIFT 16
-#define GEN6_WM_DW6_ZW_INTERP_PIXEL (0x0 << 16)
-#define GEN6_WM_DW6_ZW_INTERP_CENTROID (0x2 << 16)
-#define GEN6_WM_DW6_ZW_INTERP_SAMPLE (0x3 << 16)
#define GEN6_WM_DW6_BARYCENTRIC_INTERP__MASK 0x0000fc00
#define GEN6_WM_DW6_BARYCENTRIC_INTERP__SHIFT 10
#define GEN6_WM_DW6_POINT_RASTRULE__MASK 0x00000200
@@ -1180,10 +1208,6 @@ enum gen_depth_format {
#define GEN6_WM_DW6_POINT_RASTRULE_UPPER_RIGHT (0x1 << 9)
#define GEN6_WM_DW6_MSRASTMODE__MASK 0x00000006
#define GEN6_WM_DW6_MSRASTMODE__SHIFT 1
-#define GEN6_WM_DW6_MSRASTMODE_OFF_PIXEL (0x0 << 1)
-#define GEN6_WM_DW6_MSRASTMODE_OFF_PATTERN (0x1 << 1)
-#define GEN6_WM_DW6_MSRASTMODE_ON_PIXEL (0x2 << 1)
-#define GEN6_WM_DW6_MSRASTMODE_ON_PATTERN (0x3 << 1)
#define GEN6_WM_DW6_MSDISPMODE__MASK 0x00000001
#define GEN6_WM_DW6_MSDISPMODE__SHIFT 0
#define GEN6_WM_DW6_MSDISPMODE_PERSAMPLE 0x0
@@ -1207,22 +1231,12 @@ enum gen_depth_format {
#define GEN7_WM_DW1_PS_KILL_PIXEL (0x1 << 25)
#define GEN7_WM_DW1_PSCDEPTH__MASK 0x01800000
#define GEN7_WM_DW1_PSCDEPTH__SHIFT 23
-#define GEN7_WM_DW1_PSCDEPTH_OFF (0x0 << 23)
-#define GEN7_WM_DW1_PSCDEPTH_ON (0x1 << 23)
-#define GEN7_WM_DW1_PSCDEPTH_ON_GE (0x2 << 23)
-#define GEN7_WM_DW1_PSCDEPTH_ON_LE (0x3 << 23)
#define GEN7_WM_DW1_EDSC__MASK 0x00600000
#define GEN7_WM_DW1_EDSC__SHIFT 21
-#define GEN7_WM_DW1_EDSC_NORMAL (0x0 << 21)
-#define GEN7_WM_DW1_EDSC_PSEXEC (0x1 << 21)
-#define GEN7_WM_DW1_EDSC_PREPS (0x2 << 21)
#define GEN7_WM_DW1_PS_USE_DEPTH (0x1 << 20)
#define GEN7_WM_DW1_PS_USE_W (0x1 << 19)
#define GEN7_WM_DW1_ZW_INTERP__MASK 0x00060000
#define GEN7_WM_DW1_ZW_INTERP__SHIFT 17
-#define GEN7_WM_DW1_ZW_INTERP_PIXEL (0x0 << 17)
-#define GEN7_WM_DW1_ZW_INTERP_CENTROID (0x2 << 17)
-#define GEN7_WM_DW1_ZW_INTERP_SAMPLE (0x3 << 17)
#define GEN7_WM_DW1_BARYCENTRIC_INTERP__MASK 0x0001f800
#define GEN7_WM_DW1_BARYCENTRIC_INTERP__SHIFT 11
#define GEN7_WM_DW1_PS_USE_COVERAGE_MASK (0x1 << 10)
@@ -1247,10 +1261,6 @@ enum gen_depth_format {
#define GEN7_WM_DW1_POINT_RASTRULE_UPPER_RIGHT (0x1 << 2)
#define GEN7_WM_DW1_MSRASTMODE__MASK 0x00000003
#define GEN7_WM_DW1_MSRASTMODE__SHIFT 0
-#define GEN7_WM_DW1_MSRASTMODE_OFF_PIXEL 0x0
-#define GEN7_WM_DW1_MSRASTMODE_OFF_PATTERN 0x1
-#define GEN7_WM_DW1_MSRASTMODE_ON_PIXEL 0x2
-#define GEN7_WM_DW1_MSRASTMODE_ON_PATTERN 0x3
#define GEN7_WM_DW2_MSDISPMODE__MASK 0x80000000
#define GEN7_WM_DW2_MSDISPMODE__SHIFT 31
@@ -1265,12 +1275,12 @@ enum gen_depth_format {
#define GEN8_3DSTATE_WM_DEPTH_STENCIL__SIZE 4
-#define GEN8_ZS_DW1_STENCIL0_FAIL_OP__MASK 0xe0000000
-#define GEN8_ZS_DW1_STENCIL0_FAIL_OP__SHIFT 29
-#define GEN8_ZS_DW1_STENCIL0_ZFAIL_OP__MASK 0x1c000000
-#define GEN8_ZS_DW1_STENCIL0_ZFAIL_OP__SHIFT 26
-#define GEN8_ZS_DW1_STENCIL0_ZPASS_OP__MASK 0x03800000
-#define GEN8_ZS_DW1_STENCIL0_ZPASS_OP__SHIFT 23
+#define GEN8_ZS_DW1_STENCIL_FAIL_OP__MASK 0xe0000000
+#define GEN8_ZS_DW1_STENCIL_FAIL_OP__SHIFT 29
+#define GEN8_ZS_DW1_STENCIL_ZFAIL_OP__MASK 0x1c000000
+#define GEN8_ZS_DW1_STENCIL_ZFAIL_OP__SHIFT 26
+#define GEN8_ZS_DW1_STENCIL_ZPASS_OP__MASK 0x03800000
+#define GEN8_ZS_DW1_STENCIL_ZPASS_OP__SHIFT 23
#define GEN8_ZS_DW1_STENCIL1_FUNC__MASK 0x00700000
#define GEN8_ZS_DW1_STENCIL1_FUNC__SHIFT 20
#define GEN8_ZS_DW1_STENCIL1_FAIL_OP__MASK 0x000e0000
@@ -1279,8 +1289,8 @@ enum gen_depth_format {
#define GEN8_ZS_DW1_STENCIL1_ZFAIL_OP__SHIFT 14
#define GEN8_ZS_DW1_STENCIL1_ZPASS_OP__MASK 0x00003800
#define GEN8_ZS_DW1_STENCIL1_ZPASS_OP__SHIFT 11
-#define GEN8_ZS_DW1_STENCIL0_FUNC__MASK 0x00000700
-#define GEN8_ZS_DW1_STENCIL0_FUNC__SHIFT 8
+#define GEN8_ZS_DW1_STENCIL_FUNC__MASK 0x00000700
+#define GEN8_ZS_DW1_STENCIL_FUNC__SHIFT 8
#define GEN8_ZS_DW1_DEPTH_FUNC__MASK 0x000000e0
#define GEN8_ZS_DW1_DEPTH_FUNC__SHIFT 5
#define GEN8_ZS_DW1_STENCIL1_ENABLE (0x1 << 4)
@@ -1289,17 +1299,17 @@ enum gen_depth_format {
#define GEN8_ZS_DW1_DEPTH_TEST_ENABLE (0x1 << 1)
#define GEN8_ZS_DW1_DEPTH_WRITE_ENABLE (0x1 << 0)
-#define GEN8_ZS_DW2_STENCIL0_VALUEMASK__MASK 0xff000000
-#define GEN8_ZS_DW2_STENCIL0_VALUEMASK__SHIFT 24
-#define GEN8_ZS_DW2_STENCIL0_WRITEMASK__MASK 0x00ff0000
-#define GEN8_ZS_DW2_STENCIL0_WRITEMASK__SHIFT 16
-#define GEN8_ZS_DW2_STENCIL1_VALUEMASK__MASK 0x0000ff00
-#define GEN8_ZS_DW2_STENCIL1_VALUEMASK__SHIFT 8
-#define GEN8_ZS_DW2_STENCIL1_WRITEMASK__MASK 0x000000ff
-#define GEN8_ZS_DW2_STENCIL1_WRITEMASK__SHIFT 0
-
-#define GEN9_ZS_DW3_STENCIL0_REF__MASK 0x0000ff00
-#define GEN9_ZS_DW3_STENCIL0_REF__SHIFT 8
+#define GEN8_ZS_DW2_STENCIL_TEST_MASK__MASK 0xff000000
+#define GEN8_ZS_DW2_STENCIL_TEST_MASK__SHIFT 24
+#define GEN8_ZS_DW2_STENCIL_WRITE_MASK__MASK 0x00ff0000
+#define GEN8_ZS_DW2_STENCIL_WRITE_MASK__SHIFT 16
+#define GEN8_ZS_DW2_STENCIL1_TEST_MASK__MASK 0x0000ff00
+#define GEN8_ZS_DW2_STENCIL1_TEST_MASK__SHIFT 8
+#define GEN8_ZS_DW2_STENCIL1_WRITE_MASK__MASK 0x000000ff
+#define GEN8_ZS_DW2_STENCIL1_WRITE_MASK__SHIFT 0
+
+#define GEN9_ZS_DW3_STENCIL_REF__MASK 0x0000ff00
+#define GEN9_ZS_DW3_STENCIL_REF__SHIFT 8
#define GEN9_ZS_DW3_STENCIL1_REF__MASK 0x000000ff
#define GEN9_ZS_DW3_STENCIL1_REF__SHIFT 0
@@ -1314,13 +1324,8 @@ enum gen_depth_format {
#define GEN8_WM_HZ_DW1_FULL_SURFACE_DEPTH_CLEAR (0x1 << 25)
#define GEN8_WM_HZ_DW1_STENCIL_CLEAR_VALUE__MASK 0x00ff0000
#define GEN8_WM_HZ_DW1_STENCIL_CLEAR_VALUE__SHIFT 16
-#define GEN8_WM_HZ_DW1_NUMSAMPLES__MASK 0x0000e000
-#define GEN8_WM_HZ_DW1_NUMSAMPLES__SHIFT 13
-#define GEN8_WM_HZ_DW1_NUMSAMPLES_1 (0x0 << 13)
-#define GEN8_WM_HZ_DW1_NUMSAMPLES_2 (0x1 << 13)
-#define GEN8_WM_HZ_DW1_NUMSAMPLES_4 (0x2 << 13)
-#define GEN8_WM_HZ_DW1_NUMSAMPLES_8 (0x3 << 13)
-#define GEN8_WM_HZ_DW1_NUMSAMPLES_16 (0x4 << 13)
+#define GEN8_WM_HZ_DW1_NUM_SAMPLES__MASK 0x0000e000
+#define GEN8_WM_HZ_DW1_NUM_SAMPLES__SHIFT 13
#define GEN8_WM_HZ_DW2_RECT_MIN_Y__MASK 0xffff0000
#define GEN8_WM_HZ_DW2_RECT_MIN_Y__SHIFT 16
@@ -1359,9 +1364,6 @@ enum gen_depth_format {
#define GEN75_PS_DW4_ACCESS_UAV (0x1 << 5)
#define GEN7_PS_DW4_POSOFFSET__MASK 0x00000018
#define GEN7_PS_DW4_POSOFFSET__SHIFT 3
-#define GEN7_PS_DW4_POSOFFSET_NONE (0x0 << 3)
-#define GEN7_PS_DW4_POSOFFSET_CENTROID (0x2 << 3)
-#define GEN7_PS_DW4_POSOFFSET_SAMPLE (0x3 << 3)
#define GEN7_PS_DW4_DISPATCH_MODE__MASK 0x00000007
#define GEN7_PS_DW4_DISPATCH_MODE__SHIFT 0
@@ -1397,9 +1399,6 @@ enum gen_depth_format {
#define GEN8_PS_DW6_RT_RESOLVE (0x1 << 6)
#define GEN8_PS_DW6_POSOFFSET__MASK 0x00000018
#define GEN8_PS_DW6_POSOFFSET__SHIFT 3
-#define GEN8_PS_DW6_POSOFFSET_NONE (0x0 << 3)
-#define GEN8_PS_DW6_POSOFFSET_CENTROID (0x2 << 3)
-#define GEN8_PS_DW6_POSOFFSET_SAMPLE (0x3 << 3)
#define GEN8_PS_DW6_DISPATCH_MODE__MASK 0x00000007
#define GEN8_PS_DW6_DISPATCH_MODE__SHIFT 0
@@ -1423,16 +1422,12 @@ enum gen_depth_format {
#define GEN8_3DSTATE_PS_EXTRA__SIZE 2
-#define GEN8_PSX_DW1_DISPATCH_ENABLE (0x1 << 31)
+#define GEN8_PSX_DW1_VALID (0x1 << 31)
#define GEN8_PSX_DW1_UAV_ONLY (0x1 << 30)
#define GEN8_PSX_DW1_COMPUTE_OMASK (0x1 << 29)
#define GEN8_PSX_DW1_KILL_PIXEL (0x1 << 28)
#define GEN8_PSX_DW1_PSCDEPTH__MASK 0x0c000000
#define GEN8_PSX_DW1_PSCDEPTH__SHIFT 26
-#define GEN8_PSX_DW1_PSCDEPTH_OFF (0x0 << 26)
-#define GEN8_PSX_DW1_PSCDEPTH_ON (0x1 << 26)
-#define GEN8_PSX_DW1_PSCDEPTH_ON_GE (0x2 << 26)
-#define GEN8_PSX_DW1_PSCDEPTH_ON_LE (0x3 << 26)
#define GEN8_PSX_DW1_FORCE_COMPUTE_DEPTH (0x1 << 25)
#define GEN8_PSX_DW1_USE_DEPTH (0x1 << 24)
#define GEN8_PSX_DW1_USE_W (0x1 << 23)
@@ -1696,17 +1691,10 @@ enum gen_depth_format {
#define GEN75_MULTISAMPLE_DW1_DX9_MULTISAMPLE_ENABLE (0x1 << 5)
-#define GEN6_MULTISAMPLE_DW1_PIXLOC__MASK 0x00000010
-#define GEN6_MULTISAMPLE_DW1_PIXLOC__SHIFT 4
-#define GEN6_MULTISAMPLE_DW1_PIXLOC_CENTER (0x0 << 4)
-#define GEN6_MULTISAMPLE_DW1_PIXLOC_UL_CORNER (0x1 << 4)
-#define GEN6_MULTISAMPLE_DW1_NUMSAMPLES__MASK 0x0000000e
-#define GEN6_MULTISAMPLE_DW1_NUMSAMPLES__SHIFT 1
-#define GEN6_MULTISAMPLE_DW1_NUMSAMPLES_1 (0x0 << 1)
-#define GEN8_MULTISAMPLE_DW1_NUMSAMPLES_2 (0x1 << 1)
-#define GEN6_MULTISAMPLE_DW1_NUMSAMPLES_4 (0x2 << 1)
-#define GEN7_MULTISAMPLE_DW1_NUMSAMPLES_8 (0x3 << 1)
-#define GEN8_MULTISAMPLE_DW1_NUMSAMPLES_16 (0x4 << 1)
+#define GEN6_MULTISAMPLE_DW1_PIXEL_LOCATION__MASK 0x00000010
+#define GEN6_MULTISAMPLE_DW1_PIXEL_LOCATION__SHIFT 4
+#define GEN6_MULTISAMPLE_DW1_NUM_SAMPLES__MASK 0x0000000e
+#define GEN6_MULTISAMPLE_DW1_NUM_SAMPLES__SHIFT 1
diff --git a/src/gallium/drivers/ilo/genhw/gen_render_dynamic.xml.h b/src/gallium/drivers/ilo/genhw/gen_render_dynamic.xml.h
index 6d815beecb3..b65b704adc6 100644
--- a/src/gallium/drivers/ilo/genhw/gen_render_dynamic.xml.h
+++ b/src/gallium/drivers/ilo/genhw/gen_render_dynamic.xml.h
@@ -84,7 +84,7 @@ enum gen_blend_function {
GEN6_BLENDFUNCTION_MAX = 0x4,
};
-enum gen_logicop_function {
+enum gen_logic_op {
GEN6_LOGICOP_CLEAR = 0x0,
GEN6_LOGICOP_NOR = 0x1,
GEN6_LOGICOP_AND_INVERTED = 0x2,
@@ -103,20 +103,31 @@ enum gen_logicop_function {
GEN6_LOGICOP_SET = 0xf,
};
-enum gen_sampler_mip_filter {
+enum gen_mip_filter {
GEN6_MIPFILTER_NONE = 0x0,
GEN6_MIPFILTER_NEAREST = 0x1,
GEN6_MIPFILTER_LINEAR = 0x3,
};
-enum gen_sampler_map_filter {
+enum gen_map_filter {
GEN6_MAPFILTER_NEAREST = 0x0,
GEN6_MAPFILTER_LINEAR = 0x1,
GEN6_MAPFILTER_ANISOTROPIC = 0x2,
GEN6_MAPFILTER_MONO = 0x6,
};
-enum gen_sampler_aniso_ratio {
+enum gen_prefilter_op {
+ GEN6_PREFILTEROP_ALWAYS = 0x0,
+ GEN6_PREFILTEROP_NEVER = 0x1,
+ GEN6_PREFILTEROP_LESS = 0x2,
+ GEN6_PREFILTEROP_EQUAL = 0x3,
+ GEN6_PREFILTEROP_LEQUAL = 0x4,
+ GEN6_PREFILTEROP_GREATER = 0x5,
+ GEN6_PREFILTEROP_NOTEQUAL = 0x6,
+ GEN6_PREFILTEROP_GEQUAL = 0x7,
+};
+
+enum gen_aniso_ratio {
GEN6_ANISORATIO_2 = 0x0,
GEN6_ANISORATIO_4 = 0x1,
GEN6_ANISORATIO_6 = 0x2,
@@ -127,7 +138,7 @@ enum gen_sampler_aniso_ratio {
GEN6_ANISORATIO_16 = 0x7,
};
-enum gen_sampler_texcoord_mode {
+enum gen_texcoord_mode {
GEN6_TEXCOORDMODE_WRAP = 0x0,
GEN6_TEXCOORDMODE_MIRROR = 0x1,
GEN6_TEXCOORDMODE_CLAMP = 0x2,
@@ -137,15 +148,15 @@ enum gen_sampler_texcoord_mode {
GEN8_TEXCOORDMODE_HALF_BORDER = 0x6,
};
-enum gen_sampler_key_filter {
+enum gen_key_filter {
GEN6_KEYFILTER_KILL_ON_ANY_MATCH = 0x0,
GEN6_KEYFILTER_REPLACE_BLACK = 0x1,
};
#define GEN6_COLOR_CALC_STATE__SIZE 6
-#define GEN6_CC_DW0_STENCIL0_REF__MASK 0xff000000
-#define GEN6_CC_DW0_STENCIL0_REF__SHIFT 24
+#define GEN6_CC_DW0_STENCIL_REF__MASK 0xff000000
+#define GEN6_CC_DW0_STENCIL_REF__SHIFT 24
#define GEN6_CC_DW0_STENCIL1_REF__MASK 0x00ff0000
#define GEN6_CC_DW0_STENCIL1_REF__SHIFT 16
#define GEN6_CC_DW0_ROUND_DISABLE_DISABLE (0x1 << 15)
@@ -162,14 +173,14 @@ enum gen_sampler_key_filter {
#define GEN6_DEPTH_STENCIL_STATE__SIZE 3
#define GEN6_ZS_DW0_STENCIL_TEST_ENABLE (0x1 << 31)
-#define GEN6_ZS_DW0_STENCIL0_FUNC__MASK 0x70000000
-#define GEN6_ZS_DW0_STENCIL0_FUNC__SHIFT 28
-#define GEN6_ZS_DW0_STENCIL0_FAIL_OP__MASK 0x0e000000
-#define GEN6_ZS_DW0_STENCIL0_FAIL_OP__SHIFT 25
-#define GEN6_ZS_DW0_STENCIL0_ZFAIL_OP__MASK 0x01c00000
-#define GEN6_ZS_DW0_STENCIL0_ZFAIL_OP__SHIFT 22
-#define GEN6_ZS_DW0_STENCIL0_ZPASS_OP__MASK 0x00380000
-#define GEN6_ZS_DW0_STENCIL0_ZPASS_OP__SHIFT 19
+#define GEN6_ZS_DW0_STENCIL_FUNC__MASK 0x70000000
+#define GEN6_ZS_DW0_STENCIL_FUNC__SHIFT 28
+#define GEN6_ZS_DW0_STENCIL_FAIL_OP__MASK 0x0e000000
+#define GEN6_ZS_DW0_STENCIL_FAIL_OP__SHIFT 25
+#define GEN6_ZS_DW0_STENCIL_ZFAIL_OP__MASK 0x01c00000
+#define GEN6_ZS_DW0_STENCIL_ZFAIL_OP__SHIFT 22
+#define GEN6_ZS_DW0_STENCIL_ZPASS_OP__MASK 0x00380000
+#define GEN6_ZS_DW0_STENCIL_ZPASS_OP__SHIFT 19
#define GEN6_ZS_DW0_STENCIL_WRITE_ENABLE (0x1 << 18)
#define GEN6_ZS_DW0_STENCIL1_ENABLE (0x1 << 15)
#define GEN6_ZS_DW0_STENCIL1_FUNC__MASK 0x00007000
@@ -181,14 +192,14 @@ enum gen_sampler_key_filter {
#define GEN6_ZS_DW0_STENCIL1_ZPASS_OP__MASK 0x00000038
#define GEN6_ZS_DW0_STENCIL1_ZPASS_OP__SHIFT 3
-#define GEN6_ZS_DW1_STENCIL0_VALUEMASK__MASK 0xff000000
-#define GEN6_ZS_DW1_STENCIL0_VALUEMASK__SHIFT 24
-#define GEN6_ZS_DW1_STENCIL0_WRITEMASK__MASK 0x00ff0000
-#define GEN6_ZS_DW1_STENCIL0_WRITEMASK__SHIFT 16
-#define GEN6_ZS_DW1_STENCIL1_VALUEMASK__MASK 0x0000ff00
-#define GEN6_ZS_DW1_STENCIL1_VALUEMASK__SHIFT 8
-#define GEN6_ZS_DW1_STENCIL1_WRITEMASK__MASK 0x000000ff
-#define GEN6_ZS_DW1_STENCIL1_WRITEMASK__SHIFT 0
+#define GEN6_ZS_DW1_STENCIL_TEST_MASK__MASK 0xff000000
+#define GEN6_ZS_DW1_STENCIL_TEST_MASK__SHIFT 24
+#define GEN6_ZS_DW1_STENCIL_WRITE_MASK__MASK 0x00ff0000
+#define GEN6_ZS_DW1_STENCIL_WRITE_MASK__SHIFT 16
+#define GEN6_ZS_DW1_STENCIL1_TEST_MASK__MASK 0x0000ff00
+#define GEN6_ZS_DW1_STENCIL1_TEST_MASK__SHIFT 8
+#define GEN6_ZS_DW1_STENCIL1_WRITE_MASK__MASK 0x000000ff
+#define GEN6_ZS_DW1_STENCIL1_WRITE_MASK__SHIFT 0
#define GEN6_ZS_DW2_DEPTH_TEST_ENABLE (0x1 << 31)
#define GEN6_ZS_DW2_DEPTH_FUNC__MASK 0x38000000
@@ -216,10 +227,12 @@ enum gen_sampler_key_filter {
#define GEN6_RT_DW1_ALPHA_TO_COVERAGE (0x1 << 31)
#define GEN6_RT_DW1_ALPHA_TO_ONE (0x1 << 30)
#define GEN6_RT_DW1_ALPHA_TO_COVERAGE_DITHER (0x1 << 29)
-#define GEN6_RT_DW1_WRITE_DISABLE_A (0x1 << 27)
-#define GEN6_RT_DW1_WRITE_DISABLE_R (0x1 << 26)
-#define GEN6_RT_DW1_WRITE_DISABLE_G (0x1 << 25)
-#define GEN6_RT_DW1_WRITE_DISABLE_B (0x1 << 24)
+#define GEN6_RT_DW1_WRITE_DISABLES__MASK 0x0f000000
+#define GEN6_RT_DW1_WRITE_DISABLES__SHIFT 24
+#define GEN6_RT_DW1_WRITE_DISABLES_A (0x1 << 27)
+#define GEN6_RT_DW1_WRITE_DISABLES_R (0x1 << 26)
+#define GEN6_RT_DW1_WRITE_DISABLES_G (0x1 << 25)
+#define GEN6_RT_DW1_WRITE_DISABLES_B (0x1 << 24)
#define GEN6_RT_DW1_LOGICOP_ENABLE (0x1 << 22)
#define GEN6_RT_DW1_LOGICOP_FUNC__MASK 0x003c0000
#define GEN6_RT_DW1_LOGICOP_FUNC__SHIFT 18
@@ -267,10 +280,12 @@ enum gen_sampler_key_filter {
#define GEN8_RT_DW0_DST_ALPHA_FACTOR__SHIFT 8
#define GEN8_RT_DW0_ALPHA_FUNC__MASK 0x000000e0
#define GEN8_RT_DW0_ALPHA_FUNC__SHIFT 5
-#define GEN8_RT_DW0_WRITE_DISABLE_A (0x1 << 3)
-#define GEN8_RT_DW0_WRITE_DISABLE_R (0x1 << 2)
-#define GEN8_RT_DW0_WRITE_DISABLE_G (0x1 << 1)
-#define GEN8_RT_DW0_WRITE_DISABLE_B (0x1 << 0)
+#define GEN8_RT_DW0_WRITE_DISABLES__MASK 0x0000000f
+#define GEN8_RT_DW0_WRITE_DISABLES__SHIFT 0
+#define GEN8_RT_DW0_WRITE_DISABLES_A (0x1 << 3)
+#define GEN8_RT_DW0_WRITE_DISABLES_R (0x1 << 2)
+#define GEN8_RT_DW0_WRITE_DISABLES_G (0x1 << 1)
+#define GEN8_RT_DW0_WRITE_DISABLES_B (0x1 << 0)
#define GEN8_RT_DW1_LOGICOP_ENABLE (0x1 << 31)
#define GEN8_RT_DW1_LOGICOP_FUNC__MASK 0x78000000
@@ -419,6 +434,7 @@ enum gen_sampler_key_filter {
#define GEN8_SAMPLER_DW0_LOD_PRECLAMP_ENABLE__SHIFT 27
#define GEN6_SAMPLER_DW0_BASE_LOD__MASK 0x07c00000
#define GEN6_SAMPLER_DW0_BASE_LOD__SHIFT 22
+#define GEN6_SAMPLER_DW0_BASE_LOD__RADIX 1
#define GEN6_SAMPLER_DW0_MIP_FILTER__MASK 0x00300000
#define GEN6_SAMPLER_DW0_MIP_FILTER__SHIFT 20
#define GEN6_SAMPLER_DW0_MAG_FILTER__MASK 0x000e0000
diff --git a/src/gallium/drivers/ilo/genhw/gen_render_surface.xml.h b/src/gallium/drivers/ilo/genhw/gen_render_surface.xml.h
index 7c2349f2447..b5d09f64429 100644
--- a/src/gallium/drivers/ilo/genhw/gen_render_surface.xml.h
+++ b/src/gallium/drivers/ilo/genhw/gen_render_surface.xml.h
@@ -299,7 +299,10 @@ enum gen_surface_scs {
#define GEN6_SURFACE_DW0_MIPLAYOUT__SHIFT 10
#define GEN6_SURFACE_DW0_MIPLAYOUT_BELOW (0x0 << 10)
#define GEN6_SURFACE_DW0_MIPLAYOUT_RIGHT (0x1 << 10)
-#define GEN6_SURFACE_DW0_CUBE_MAP_CORNER_MODE (0x1 << 9)
+#define GEN6_SURFACE_DW0_CUBE_MAP_CORNER_MODE__MASK 0x00000200
+#define GEN6_SURFACE_DW0_CUBE_MAP_CORNER_MODE__SHIFT 9
+#define GEN6_SURFACE_DW0_CUBE_MAP_CORNER_MODE_REPLICATE (0x0 << 9)
+#define GEN6_SURFACE_DW0_CUBE_MAP_CORNER_MODE_AVERAGE (0x1 << 9)
#define GEN6_SURFACE_DW0_RENDER_CACHE_RW (0x1 << 8)
#define GEN6_SURFACE_DW0_MEDIA_BOUNDARY_PIXEL_MODE__MASK 0x000000c0
#define GEN6_SURFACE_DW0_MEDIA_BOUNDARY_PIXEL_MODE__SHIFT 6
@@ -485,6 +488,8 @@ enum gen_surface_scs {
#define GEN7_SURFACE_DW7_CC_B__SHIFT 29
#define GEN7_SURFACE_DW7_CC_A__MASK 0x10000000
#define GEN7_SURFACE_DW7_CC_A__SHIFT 28
+#define GEN75_SURFACE_DW7_SCS__MASK 0x0fff0000
+#define GEN75_SURFACE_DW7_SCS__SHIFT 16
#define GEN75_SURFACE_DW7_SCS_R__MASK 0x0e000000
#define GEN75_SURFACE_DW7_SCS_R__SHIFT 25
#define GEN75_SURFACE_DW7_SCS_G__MASK 0x01c00000
diff --git a/src/gallium/drivers/ilo/genhw/genhw.h b/src/gallium/drivers/ilo/genhw/genhw.h
index 9e05bf5beca..3a777a18c2a 100644
--- a/src/gallium/drivers/ilo/genhw/genhw.h
+++ b/src/gallium/drivers/ilo/genhw/genhw.h
@@ -1,6 +1,4 @@
/*
- * Mesa 3-D graphics library
- *
* Copyright (C) 2014 LunarG, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
@@ -25,8 +23,9 @@
#ifndef GENHW_H
#define GENHW_H
-#include "pipe/p_compiler.h"
-#include "util/u_debug.h"
+#include <stdbool.h>
+#include <stdint.h>
+#include <assert.h>
#include "gen_regs.xml.h"
#include "gen_mi.xml.h"
diff --git a/src/gallium/drivers/ilo/ilo_blitter.h b/src/gallium/drivers/ilo/ilo_blitter.h
index 4284f415c1c..4eba8481c28 100644
--- a/src/gallium/drivers/ilo/ilo_blitter.h
+++ b/src/gallium/drivers/ilo/ilo_blitter.h
@@ -39,12 +39,6 @@ enum ilo_blitter_uses {
ILO_BLITTER_USE_FB_STENCIL = 1 << 4,
};
-enum ilo_blitter_rectlist_op {
- ILO_BLITTER_RECTLIST_CLEAR_ZS,
- ILO_BLITTER_RECTLIST_RESOLVE_Z,
- ILO_BLITTER_RECTLIST_RESOLVE_HIZ,
-};
-
struct blitter_context;
struct pipe_resource;
struct pipe_surface;
@@ -57,30 +51,42 @@ struct ilo_blitter {
/*
* A minimal context with the goal to send RECTLISTs down the pipeline.
*/
- enum ilo_blitter_rectlist_op op;
+ enum ilo_state_raster_earlyz_op earlyz_op;
+ bool earlyz_stencil_clear;
uint32_t uses;
bool initialized;
float vertices[3][2];
- struct ilo_ve_state ve;
- struct pipe_draw_info draw;
+ struct gen6_3dprimitive_info draw_info;
- struct ilo_viewport_cso viewport;
- struct ilo_dsa_state dsa;
+ uint32_t vf_data[4];
+ struct ilo_state_vf vf;
- struct {
- struct pipe_stencil_ref stencil_ref;
- ubyte alpha_ref;
- struct pipe_blend_color blend_color;
- } cc;
+ struct ilo_state_vs vs;
+ struct ilo_state_hs hs;
+ struct ilo_state_ds ds;
+ struct ilo_state_gs gs;
+
+ struct ilo_state_sol sol;
+
+ struct ilo_state_viewport vp;
+ uint32_t vp_data[20];
+
+ struct ilo_state_sbe sbe;
+ struct ilo_state_ps ps;
+ struct ilo_state_cc cc;
uint32_t depth_clear_value;
+ struct ilo_state_urb urb;
+
struct {
struct ilo_surface_cso dst;
unsigned width, height;
unsigned num_samples;
+
+ struct ilo_state_raster rs;
} fb;
};
diff --git a/src/gallium/drivers/ilo/ilo_blitter_pipe.c b/src/gallium/drivers/ilo/ilo_blitter_pipe.c
index c4c02bd3e53..0bfe7827f11 100644
--- a/src/gallium/drivers/ilo/ilo_blitter_pipe.c
+++ b/src/gallium/drivers/ilo/ilo_blitter_pipe.c
@@ -63,7 +63,7 @@ ilo_blitter_pipe_begin(struct ilo_blitter *blitter,
util_blitter_save_viewport(b, &vec->viewport.viewport0);
if (scissor_enable)
- util_blitter_save_scissor(b, &vec->scissor.scissor0);
+ util_blitter_save_scissor(b, &vec->viewport.scissor0);
switch (op) {
case ILO_BLITTER_PIPE_BLIT:
diff --git a/src/gallium/drivers/ilo/ilo_blitter_rectlist.c b/src/gallium/drivers/ilo/ilo_blitter_rectlist.c
index 6d8afed9dca..13c8f500680 100644
--- a/src/gallium/drivers/ilo/ilo_blitter_rectlist.c
+++ b/src/gallium/drivers/ilo/ilo_blitter_rectlist.c
@@ -25,7 +25,6 @@
* Chia-I Wu <[email protected]>
*/
-#include "core/ilo_state_3d.h"
#include "util/u_draw.h"
#include "util/u_pack_color.h"
@@ -40,45 +39,48 @@
static bool
ilo_blitter_set_invariants(struct ilo_blitter *blitter)
{
- struct pipe_vertex_element velem;
- struct pipe_viewport_state vp;
+ struct ilo_state_vf_element_info elem;
if (blitter->initialized)
return true;
+ /* a rectangle has 3 vertices in a RECTLIST */
+ blitter->draw_info.topology = GEN6_3DPRIM_RECTLIST;
+ blitter->draw_info.vertex_count = 3;
+ blitter->draw_info.instance_count = 1;
+
+ memset(&elem, 0, sizeof(elem));
/* only vertex X and Y */
- memset(&velem, 0, sizeof(velem));
- velem.src_format = PIPE_FORMAT_R32G32_FLOAT;
- ilo_gpe_init_ve(blitter->ilo->dev, 1, &velem, &blitter->ve);
-
- /* generate VUE header */
- ilo_gpe_init_ve_nosrc(blitter->ilo->dev,
- GEN6_VFCOMP_STORE_0, /* Reserved */
- GEN6_VFCOMP_STORE_0, /* Render Target Array Index */
- GEN6_VFCOMP_STORE_0, /* Viewport Index */
- GEN6_VFCOMP_STORE_0, /* Point Width */
- &blitter->ve.nosrc_cso);
- blitter->ve.prepend_nosrc_cso = true;
+ elem.format = GEN6_FORMAT_R32G32_FLOAT;
+ elem.format_size = 8;
+ elem.component_count = 2;
- /* a rectangle has 3 vertices in a RECTLIST */
- util_draw_init_info(&blitter->draw);
- blitter->draw.mode = ILO_PRIM_RECTANGLES;
- blitter->draw.count = 3;
+ ilo_state_vf_init_for_rectlist(&blitter->vf, blitter->ilo->dev,
+ blitter->vf_data, sizeof(blitter->vf_data), &elem, 1);
+
+ ilo_state_vs_init_disabled(&blitter->vs, blitter->ilo->dev);
+ ilo_state_hs_init_disabled(&blitter->hs, blitter->ilo->dev);
+ ilo_state_ds_init_disabled(&blitter->ds, blitter->ilo->dev);
+ ilo_state_gs_init_disabled(&blitter->gs, blitter->ilo->dev);
+ ilo_state_sol_init_disabled(&blitter->sol, blitter->ilo->dev, false);
/**
* From the Haswell PRM, volume 7, page 615:
*
* "The clear value must be between the min and max depth values
- * (inclusive) defined in the CC_VIEWPORT."
+ * (inclusive) defined in the CC_VIEWPORT."
*
* Even though clipping and viewport transformation will be disabled, we
* still need to set up the viewport states.
*/
- memset(&vp, 0, sizeof(vp));
- vp.scale[0] = 1.0f;
- vp.scale[1] = 1.0f;
- vp.scale[2] = 1.0f;
- ilo_gpe_set_viewport_cso(blitter->ilo->dev, &vp, &blitter->viewport);
+ ilo_state_viewport_init_for_rectlist(&blitter->vp, blitter->ilo->dev,
+ blitter->vp_data, sizeof(blitter->vp_data));
+
+ ilo_state_sbe_init_for_rectlist(&blitter->sbe, blitter->ilo->dev, 0, 0);
+ ilo_state_ps_init_disabled(&blitter->ps, blitter->ilo->dev);
+
+ ilo_state_urb_init_for_rectlist(&blitter->urb, blitter->ilo->dev,
+ ilo_state_vf_get_attr_count(&blitter->vf));
blitter->initialized = true;
@@ -86,10 +88,12 @@ ilo_blitter_set_invariants(struct ilo_blitter *blitter)
}
static void
-ilo_blitter_set_op(struct ilo_blitter *blitter,
- enum ilo_blitter_rectlist_op op)
+ilo_blitter_set_earlyz_op(struct ilo_blitter *blitter,
+ enum ilo_state_raster_earlyz_op op,
+ bool earlyz_stencil_clear)
{
- blitter->op = op;
+ blitter->earlyz_op = op;
+ blitter->earlyz_stencil_clear = earlyz_stencil_clear;
}
/**
@@ -117,18 +121,27 @@ ilo_blitter_set_rectlist(struct ilo_blitter *blitter,
}
static void
-ilo_blitter_set_clear_values(struct ilo_blitter *blitter,
- uint32_t depth, ubyte stencil)
+ilo_blitter_set_depth_clear_value(struct ilo_blitter *blitter,
+ uint32_t depth)
{
blitter->depth_clear_value = depth;
- blitter->cc.stencil_ref.ref_value[0] = stencil;
}
static void
-ilo_blitter_set_dsa(struct ilo_blitter *blitter,
- const struct pipe_depth_stencil_alpha_state *state)
+ilo_blitter_set_cc(struct ilo_blitter *blitter,
+ const struct ilo_state_cc_info *info)
+{
+ memset(&blitter->cc, 0, sizeof(blitter->cc));
+ ilo_state_cc_init(&blitter->cc, blitter->ilo->dev, info);
+}
+
+static void
+ilo_blitter_set_fb_rs(struct ilo_blitter *blitter)
{
- ilo_gpe_init_dsa(blitter->ilo->dev, state, &blitter->dsa);
+ memset(&blitter->fb.rs, 0, sizeof(blitter->fb.rs));
+ ilo_state_raster_init_for_rectlist(&blitter->fb.rs, blitter->ilo->dev,
+ blitter->fb.num_samples, blitter->earlyz_op,
+ blitter->earlyz_stencil_clear);
}
static void
@@ -146,6 +159,8 @@ ilo_blitter_set_fb(struct ilo_blitter *blitter,
blitter->fb.num_samples = 1;
memcpy(&blitter->fb.dst, cso, sizeof(*cso));
+
+ ilo_blitter_set_fb_rs(blitter);
}
static void
@@ -191,9 +206,9 @@ hiz_align_fb(struct ilo_blitter *blitter)
{
unsigned align_w, align_h;
- switch (blitter->op) {
- case ILO_BLITTER_RECTLIST_CLEAR_ZS:
- case ILO_BLITTER_RECTLIST_RESOLVE_Z:
+ switch (blitter->earlyz_op) {
+ case ILO_STATE_RASTER_EARLYZ_DEPTH_CLEAR:
+ case ILO_STATE_RASTER_EARLYZ_DEPTH_RESOLVE:
break;
default:
return;
@@ -328,7 +343,7 @@ ilo_blitter_rectlist_clear_zs(struct ilo_blitter *blitter,
double depth, unsigned stencil)
{
struct ilo_texture *tex = ilo_texture(zs->texture);
- struct pipe_depth_stencil_alpha_state dsa_state;
+ struct ilo_state_cc_info info;
uint32_t uses, clear_value;
if (!ilo_image_can_enable_aux(&tex->image, zs->u.tex.level))
@@ -368,17 +383,20 @@ ilo_blitter_rectlist_clear_zs(struct ilo_blitter *blitter,
* - [DevSNB] errata: For stencil buffer only clear, the previous
* depth clear value must be delivered during the clear."
*/
- memset(&dsa_state, 0, sizeof(dsa_state));
+ memset(&info, 0, sizeof(info));
- if (clear_flags & PIPE_CLEAR_DEPTH)
- dsa_state.depth.writemask = true;
+ if (clear_flags & PIPE_CLEAR_DEPTH) {
+ info.depth.cv_has_buffer = true;
+ info.depth.write_enable = true;
+ }
if (clear_flags & PIPE_CLEAR_STENCIL) {
- dsa_state.stencil[0].enabled = true;
- dsa_state.stencil[0].func = PIPE_FUNC_ALWAYS;
- dsa_state.stencil[0].fail_op = PIPE_STENCIL_OP_KEEP;
- dsa_state.stencil[0].zpass_op = PIPE_STENCIL_OP_REPLACE;
- dsa_state.stencil[0].zfail_op = PIPE_STENCIL_OP_KEEP;
+ info.stencil.cv_has_buffer = true;
+ info.stencil.test_enable = true;
+ info.stencil.front.test_func = GEN6_COMPAREFUNCTION_ALWAYS;
+ info.stencil.front.fail_op = GEN6_STENCILOP_KEEP;
+ info.stencil.front.zfail_op = GEN6_STENCILOP_KEEP;
+ info.stencil.front.zpass_op = GEN6_STENCILOP_REPLACE;
/*
* From the Ivy Bridge PRM, volume 2 part 1, page 277:
@@ -389,18 +407,21 @@ ilo_blitter_rectlist_clear_zs(struct ilo_blitter *blitter,
* - DEPTH_STENCIL_STATE::Stencil Test Mask must be 0xFF
* - DEPTH_STENCIL_STATE::Back Face Stencil Write Mask must be 0xFF
* - DEPTH_STENCIL_STATE::Back Face Stencil Test Mask must be 0xFF"
+ *
+ * Back frace masks will be copied from front face masks.
*/
- dsa_state.stencil[0].valuemask = 0xff;
- dsa_state.stencil[0].writemask = 0xff;
- dsa_state.stencil[1].valuemask = 0xff;
- dsa_state.stencil[1].writemask = 0xff;
+ info.params.stencil_front.test_ref = (uint8_t) stencil;
+ info.params.stencil_front.test_mask = 0xff;
+ info.params.stencil_front.write_mask = 0xff;
}
ilo_blitter_set_invariants(blitter);
- ilo_blitter_set_op(blitter, ILO_BLITTER_RECTLIST_CLEAR_ZS);
+ ilo_blitter_set_earlyz_op(blitter,
+ ILO_STATE_RASTER_EARLYZ_DEPTH_CLEAR,
+ clear_flags & PIPE_CLEAR_STENCIL);
- ilo_blitter_set_dsa(blitter, &dsa_state);
- ilo_blitter_set_clear_values(blitter, clear_value, (ubyte) stencil);
+ ilo_blitter_set_cc(blitter, &info);
+ ilo_blitter_set_depth_clear_value(blitter, clear_value);
ilo_blitter_set_fb_from_surface(blitter, zs);
uses = ILO_BLITTER_USE_DSA;
@@ -421,7 +442,7 @@ ilo_blitter_rectlist_resolve_z(struct ilo_blitter *blitter,
unsigned level, unsigned slice)
{
struct ilo_texture *tex = ilo_texture(res);
- struct pipe_depth_stencil_alpha_state dsa_state;
+ struct ilo_state_cc_info info;
const struct ilo_texture_slice *s =
ilo_texture_get_slice(tex, level, slice);
@@ -435,16 +456,18 @@ ilo_blitter_rectlist_resolve_z(struct ilo_blitter *blitter,
* to NEVER. Depth Buffer Write Enable must be enabled. Stencil Test
* Enable and Stencil Buffer Write Enable must be disabled."
*/
- memset(&dsa_state, 0, sizeof(dsa_state));
- dsa_state.depth.writemask = true;
- dsa_state.depth.enabled = true;
- dsa_state.depth.func = PIPE_FUNC_NEVER;
+ memset(&info, 0, sizeof(info));
+ info.depth.cv_has_buffer = true;
+ info.depth.test_enable = true;
+ info.depth.write_enable = true;
+ info.depth.test_func = GEN6_COMPAREFUNCTION_NEVER;
ilo_blitter_set_invariants(blitter);
- ilo_blitter_set_op(blitter, ILO_BLITTER_RECTLIST_RESOLVE_Z);
+ ilo_blitter_set_earlyz_op(blitter,
+ ILO_STATE_RASTER_EARLYZ_DEPTH_RESOLVE, false);
- ilo_blitter_set_dsa(blitter, &dsa_state);
- ilo_blitter_set_clear_values(blitter, s->clear_value, 0);
+ ilo_blitter_set_cc(blitter, &info);
+ ilo_blitter_set_depth_clear_value(blitter, s->clear_value);
ilo_blitter_set_fb_from_resource(blitter, res, res->format, level, slice);
ilo_blitter_set_uses(blitter,
ILO_BLITTER_USE_DSA | ILO_BLITTER_USE_FB_DEPTH);
@@ -458,7 +481,7 @@ ilo_blitter_rectlist_resolve_hiz(struct ilo_blitter *blitter,
unsigned level, unsigned slice)
{
struct ilo_texture *tex = ilo_texture(res);
- struct pipe_depth_stencil_alpha_state dsa_state;
+ struct ilo_state_cc_info info;
if (!ilo_image_can_enable_aux(&tex->image, level))
return;
@@ -470,13 +493,15 @@ ilo_blitter_rectlist_resolve_hiz(struct ilo_blitter *blitter,
* disabled. Depth Buffer Write Enable must be enabled. Stencil Test
* Enable and Stencil Buffer Write Enable must be disabled."
*/
- memset(&dsa_state, 0, sizeof(dsa_state));
- dsa_state.depth.writemask = true;
+ memset(&info, 0, sizeof(info));
+ info.depth.cv_has_buffer = true;
+ info.depth.write_enable = true;
ilo_blitter_set_invariants(blitter);
- ilo_blitter_set_op(blitter, ILO_BLITTER_RECTLIST_RESOLVE_HIZ);
+ ilo_blitter_set_earlyz_op(blitter,
+ ILO_STATE_RASTER_EARLYZ_HIZ_RESOLVE, false);
- ilo_blitter_set_dsa(blitter, &dsa_state);
+ ilo_blitter_set_cc(blitter, &info);
ilo_blitter_set_fb_from_resource(blitter, res, res->format, level, slice);
ilo_blitter_set_uses(blitter,
ILO_BLITTER_USE_DSA | ILO_BLITTER_USE_FB_DEPTH);
diff --git a/src/gallium/drivers/ilo/ilo_draw.c b/src/gallium/drivers/ilo/ilo_draw.c
index fc91fd312d2..e8e1a4cd14c 100644
--- a/src/gallium/drivers/ilo/ilo_draw.c
+++ b/src/gallium/drivers/ilo/ilo_draw.c
@@ -452,12 +452,12 @@ draw_vbo_with_sw_restart(struct ilo_context *ilo,
} u;
/* we will draw with IB mapped */
- if (ib->buffer) {
- u.ptr = intel_bo_map(ilo_buffer(ib->buffer)->bo, false);
+ if (ib->state.buffer) {
+ u.ptr = intel_bo_map(ilo_buffer(ib->state.buffer)->bo, false);
if (u.ptr)
- u.u8 += ib->offset;
+ u.u8 += ib->state.offset;
} else {
- u.ptr = ib->user_buffer;
+ u.ptr = ib->state.user_buffer;
}
if (!u.ptr)
@@ -483,7 +483,7 @@ draw_vbo_with_sw_restart(struct ilo_context *ilo,
(pipe)->draw_vbo(pipe, &subinfo); \
} while (0)
- switch (ib->index_size) {
+ switch (ib->state.index_size) {
case 1:
DRAW_VBO_WITH_SW_RESTART(&ilo->base, info, u.u8);
break;
@@ -500,8 +500,8 @@ draw_vbo_with_sw_restart(struct ilo_context *ilo,
#undef DRAW_VBO_WITH_SW_RESTART
- if (ib->buffer)
- intel_bo_unmap(ilo_buffer(ib->buffer)->bo);
+ if (ib->state.buffer)
+ intel_bo_unmap(ilo_buffer(ib->state.buffer)->bo);
}
static bool
@@ -511,9 +511,9 @@ draw_vbo_need_sw_restart(const struct ilo_context *ilo,
/* the restart index is fixed prior to GEN7.5 */
if (ilo_dev_gen(ilo->dev) < ILO_GEN(7.5)) {
const unsigned cut_index =
- (ilo->state_vector.ib.index_size == 1) ? 0xff :
- (ilo->state_vector.ib.index_size == 2) ? 0xffff :
- (ilo->state_vector.ib.index_size == 4) ? 0xffffffff : 0;
+ (ilo->state_vector.ib.state.index_size == 1) ? 0xff :
+ (ilo->state_vector.ib.state.index_size == 2) ? 0xffff :
+ (ilo->state_vector.ib.state.index_size == 4) ? 0xffffffff : 0;
if (info->restart_index < cut_index)
return true;
diff --git a/src/gallium/drivers/ilo/ilo_format.c b/src/gallium/drivers/ilo/ilo_format.c
new file mode 100644
index 00000000000..ca7e6b55ca1
--- /dev/null
+++ b/src/gallium/drivers/ilo/ilo_format.c
@@ -0,0 +1,356 @@
+/*
+ * Mesa 3-D graphics library
+ *
+ * Copyright (C) 2012-2013 LunarG, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ * Chia-I Wu <[email protected]>
+ */
+
+#include "genhw/genhw.h"
+#include "core/ilo_state_surface.h"
+#include "core/ilo_state_vf.h"
+#include "ilo_format.h"
+
+bool
+ilo_format_support_vb(const struct ilo_dev *dev,
+ enum pipe_format format)
+{
+ const int idx = ilo_format_translate(dev, format, PIPE_BIND_VERTEX_BUFFER);
+
+ return (idx >= 0 && ilo_state_vf_valid_element_format(dev, idx));
+}
+
+bool
+ilo_format_support_sol(const struct ilo_dev *dev,
+ enum pipe_format format)
+{
+ const int idx = ilo_format_translate(dev, format, PIPE_BIND_STREAM_OUTPUT);
+
+ return (idx >= 0 && ilo_state_surface_valid_format(dev,
+ ILO_STATE_SURFACE_ACCESS_DP_SVB, idx));
+}
+
+bool
+ilo_format_support_sampler(const struct ilo_dev *dev,
+ enum pipe_format format)
+{
+ const int idx = ilo_format_translate(dev, format, PIPE_BIND_SAMPLER_VIEW);
+
+ return (idx >= 0 && ilo_state_surface_valid_format(dev,
+ ILO_STATE_SURFACE_ACCESS_SAMPLER, idx));
+}
+
+bool
+ilo_format_support_rt(const struct ilo_dev *dev,
+ enum pipe_format format)
+{
+ const int idx = ilo_format_translate(dev, format, PIPE_BIND_RENDER_TARGET);
+
+ return (idx >= 0 && ilo_state_surface_valid_format(dev,
+ ILO_STATE_SURFACE_ACCESS_DP_RENDER, idx));
+}
+
+bool
+ilo_format_support_zs(const struct ilo_dev *dev,
+ enum pipe_format format)
+{
+ switch (format) {
+ case PIPE_FORMAT_Z16_UNORM:
+ case PIPE_FORMAT_Z24X8_UNORM:
+ case PIPE_FORMAT_Z32_FLOAT:
+ case PIPE_FORMAT_Z24_UNORM_S8_UINT:
+ case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
+ return true;
+ case PIPE_FORMAT_S8_UINT:
+ /* TODO separate stencil */
+ default:
+ return false;
+ }
+}
+
+/**
+ * Translate a color (non-depth/stencil) pipe format to the matching hardware
+ * format. Return -1 on errors.
+ */
+int
+ilo_format_translate_color(const struct ilo_dev *dev,
+ enum pipe_format format)
+{
+ static const int format_mapping[PIPE_FORMAT_COUNT] = {
+ [PIPE_FORMAT_NONE] = 0,
+ [PIPE_FORMAT_B8G8R8A8_UNORM] = GEN6_FORMAT_B8G8R8A8_UNORM,
+ [PIPE_FORMAT_B8G8R8X8_UNORM] = GEN6_FORMAT_B8G8R8X8_UNORM,
+ [PIPE_FORMAT_A8R8G8B8_UNORM] = 0,
+ [PIPE_FORMAT_X8R8G8B8_UNORM] = 0,
+ [PIPE_FORMAT_B5G5R5A1_UNORM] = GEN6_FORMAT_B5G5R5A1_UNORM,
+ [PIPE_FORMAT_B4G4R4A4_UNORM] = GEN6_FORMAT_B4G4R4A4_UNORM,
+ [PIPE_FORMAT_B5G6R5_UNORM] = GEN6_FORMAT_B5G6R5_UNORM,
+ [PIPE_FORMAT_R10G10B10A2_UNORM] = GEN6_FORMAT_R10G10B10A2_UNORM,
+ [PIPE_FORMAT_L8_UNORM] = GEN6_FORMAT_L8_UNORM,
+ [PIPE_FORMAT_A8_UNORM] = GEN6_FORMAT_A8_UNORM,
+ [PIPE_FORMAT_I8_UNORM] = GEN6_FORMAT_I8_UNORM,
+ [PIPE_FORMAT_L8A8_UNORM] = GEN6_FORMAT_L8A8_UNORM,
+ [PIPE_FORMAT_L16_UNORM] = GEN6_FORMAT_L16_UNORM,
+ [PIPE_FORMAT_UYVY] = GEN6_FORMAT_YCRCB_SWAPUVY,
+ [PIPE_FORMAT_YUYV] = GEN6_FORMAT_YCRCB_NORMAL,
+ [PIPE_FORMAT_Z16_UNORM] = 0,
+ [PIPE_FORMAT_Z32_UNORM] = 0,
+ [PIPE_FORMAT_Z32_FLOAT] = 0,
+ [PIPE_FORMAT_Z24_UNORM_S8_UINT] = 0,
+ [PIPE_FORMAT_S8_UINT_Z24_UNORM] = 0,
+ [PIPE_FORMAT_Z24X8_UNORM] = 0,
+ [PIPE_FORMAT_X8Z24_UNORM] = 0,
+ [PIPE_FORMAT_S8_UINT] = 0,
+ [PIPE_FORMAT_R64_FLOAT] = GEN6_FORMAT_R64_FLOAT,
+ [PIPE_FORMAT_R64G64_FLOAT] = GEN6_FORMAT_R64G64_FLOAT,
+ [PIPE_FORMAT_R64G64B64_FLOAT] = GEN6_FORMAT_R64G64B64_FLOAT,
+ [PIPE_FORMAT_R64G64B64A64_FLOAT] = GEN6_FORMAT_R64G64B64A64_FLOAT,
+ [PIPE_FORMAT_R32_FLOAT] = GEN6_FORMAT_R32_FLOAT,
+ [PIPE_FORMAT_R32G32_FLOAT] = GEN6_FORMAT_R32G32_FLOAT,
+ [PIPE_FORMAT_R32G32B32_FLOAT] = GEN6_FORMAT_R32G32B32_FLOAT,
+ [PIPE_FORMAT_R32G32B32A32_FLOAT] = GEN6_FORMAT_R32G32B32A32_FLOAT,
+ [PIPE_FORMAT_R32_UNORM] = GEN6_FORMAT_R32_UNORM,
+ [PIPE_FORMAT_R32G32_UNORM] = GEN6_FORMAT_R32G32_UNORM,
+ [PIPE_FORMAT_R32G32B32_UNORM] = GEN6_FORMAT_R32G32B32_UNORM,
+ [PIPE_FORMAT_R32G32B32A32_UNORM] = GEN6_FORMAT_R32G32B32A32_UNORM,
+ [PIPE_FORMAT_R32_USCALED] = GEN6_FORMAT_R32_USCALED,
+ [PIPE_FORMAT_R32G32_USCALED] = GEN6_FORMAT_R32G32_USCALED,
+ [PIPE_FORMAT_R32G32B32_USCALED] = GEN6_FORMAT_R32G32B32_USCALED,
+ [PIPE_FORMAT_R32G32B32A32_USCALED] = GEN6_FORMAT_R32G32B32A32_USCALED,
+ [PIPE_FORMAT_R32_SNORM] = GEN6_FORMAT_R32_SNORM,
+ [PIPE_FORMAT_R32G32_SNORM] = GEN6_FORMAT_R32G32_SNORM,
+ [PIPE_FORMAT_R32G32B32_SNORM] = GEN6_FORMAT_R32G32B32_SNORM,
+ [PIPE_FORMAT_R32G32B32A32_SNORM] = GEN6_FORMAT_R32G32B32A32_SNORM,
+ [PIPE_FORMAT_R32_SSCALED] = GEN6_FORMAT_R32_SSCALED,
+ [PIPE_FORMAT_R32G32_SSCALED] = GEN6_FORMAT_R32G32_SSCALED,
+ [PIPE_FORMAT_R32G32B32_SSCALED] = GEN6_FORMAT_R32G32B32_SSCALED,
+ [PIPE_FORMAT_R32G32B32A32_SSCALED] = GEN6_FORMAT_R32G32B32A32_SSCALED,
+ [PIPE_FORMAT_R16_UNORM] = GEN6_FORMAT_R16_UNORM,
+ [PIPE_FORMAT_R16G16_UNORM] = GEN6_FORMAT_R16G16_UNORM,
+ [PIPE_FORMAT_R16G16B16_UNORM] = GEN6_FORMAT_R16G16B16_UNORM,
+ [PIPE_FORMAT_R16G16B16A16_UNORM] = GEN6_FORMAT_R16G16B16A16_UNORM,
+ [PIPE_FORMAT_R16_USCALED] = GEN6_FORMAT_R16_USCALED,
+ [PIPE_FORMAT_R16G16_USCALED] = GEN6_FORMAT_R16G16_USCALED,
+ [PIPE_FORMAT_R16G16B16_USCALED] = GEN6_FORMAT_R16G16B16_USCALED,
+ [PIPE_FORMAT_R16G16B16A16_USCALED] = GEN6_FORMAT_R16G16B16A16_USCALED,
+ [PIPE_FORMAT_R16_SNORM] = GEN6_FORMAT_R16_SNORM,
+ [PIPE_FORMAT_R16G16_SNORM] = GEN6_FORMAT_R16G16_SNORM,
+ [PIPE_FORMAT_R16G16B16_SNORM] = GEN6_FORMAT_R16G16B16_SNORM,
+ [PIPE_FORMAT_R16G16B16A16_SNORM] = GEN6_FORMAT_R16G16B16A16_SNORM,
+ [PIPE_FORMAT_R16_SSCALED] = GEN6_FORMAT_R16_SSCALED,
+ [PIPE_FORMAT_R16G16_SSCALED] = GEN6_FORMAT_R16G16_SSCALED,
+ [PIPE_FORMAT_R16G16B16_SSCALED] = GEN6_FORMAT_R16G16B16_SSCALED,
+ [PIPE_FORMAT_R16G16B16A16_SSCALED] = GEN6_FORMAT_R16G16B16A16_SSCALED,
+ [PIPE_FORMAT_R8_UNORM] = GEN6_FORMAT_R8_UNORM,
+ [PIPE_FORMAT_R8G8_UNORM] = GEN6_FORMAT_R8G8_UNORM,
+ [PIPE_FORMAT_R8G8B8_UNORM] = GEN6_FORMAT_R8G8B8_UNORM,
+ [PIPE_FORMAT_R8G8B8A8_UNORM] = GEN6_FORMAT_R8G8B8A8_UNORM,
+ [PIPE_FORMAT_X8B8G8R8_UNORM] = 0,
+ [PIPE_FORMAT_R8_USCALED] = GEN6_FORMAT_R8_USCALED,
+ [PIPE_FORMAT_R8G8_USCALED] = GEN6_FORMAT_R8G8_USCALED,
+ [PIPE_FORMAT_R8G8B8_USCALED] = GEN6_FORMAT_R8G8B8_USCALED,
+ [PIPE_FORMAT_R8G8B8A8_USCALED] = GEN6_FORMAT_R8G8B8A8_USCALED,
+ [PIPE_FORMAT_R8_SNORM] = GEN6_FORMAT_R8_SNORM,
+ [PIPE_FORMAT_R8G8_SNORM] = GEN6_FORMAT_R8G8_SNORM,
+ [PIPE_FORMAT_R8G8B8_SNORM] = GEN6_FORMAT_R8G8B8_SNORM,
+ [PIPE_FORMAT_R8G8B8A8_SNORM] = GEN6_FORMAT_R8G8B8A8_SNORM,
+ [PIPE_FORMAT_R8_SSCALED] = GEN6_FORMAT_R8_SSCALED,
+ [PIPE_FORMAT_R8G8_SSCALED] = GEN6_FORMAT_R8G8_SSCALED,
+ [PIPE_FORMAT_R8G8B8_SSCALED] = GEN6_FORMAT_R8G8B8_SSCALED,
+ [PIPE_FORMAT_R8G8B8A8_SSCALED] = GEN6_FORMAT_R8G8B8A8_SSCALED,
+ [PIPE_FORMAT_R32_FIXED] = GEN6_FORMAT_R32_SFIXED,
+ [PIPE_FORMAT_R32G32_FIXED] = GEN6_FORMAT_R32G32_SFIXED,
+ [PIPE_FORMAT_R32G32B32_FIXED] = GEN6_FORMAT_R32G32B32_SFIXED,
+ [PIPE_FORMAT_R32G32B32A32_FIXED] = GEN6_FORMAT_R32G32B32A32_SFIXED,
+ [PIPE_FORMAT_R16_FLOAT] = GEN6_FORMAT_R16_FLOAT,
+ [PIPE_FORMAT_R16G16_FLOAT] = GEN6_FORMAT_R16G16_FLOAT,
+ [PIPE_FORMAT_R16G16B16_FLOAT] = GEN6_FORMAT_R16G16B16_FLOAT,
+ [PIPE_FORMAT_R16G16B16A16_FLOAT] = GEN6_FORMAT_R16G16B16A16_FLOAT,
+ [PIPE_FORMAT_L8_SRGB] = GEN6_FORMAT_L8_UNORM_SRGB,
+ [PIPE_FORMAT_L8A8_SRGB] = GEN6_FORMAT_L8A8_UNORM_SRGB,
+ [PIPE_FORMAT_R8G8B8_SRGB] = GEN6_FORMAT_R8G8B8_UNORM_SRGB,
+ [PIPE_FORMAT_A8B8G8R8_SRGB] = 0,
+ [PIPE_FORMAT_X8B8G8R8_SRGB] = 0,
+ [PIPE_FORMAT_B8G8R8A8_SRGB] = GEN6_FORMAT_B8G8R8A8_UNORM_SRGB,
+ [PIPE_FORMAT_B8G8R8X8_SRGB] = GEN6_FORMAT_B8G8R8X8_UNORM_SRGB,
+ [PIPE_FORMAT_A8R8G8B8_SRGB] = 0,
+ [PIPE_FORMAT_X8R8G8B8_SRGB] = 0,
+ [PIPE_FORMAT_R8G8B8A8_SRGB] = GEN6_FORMAT_R8G8B8A8_UNORM_SRGB,
+ [PIPE_FORMAT_DXT1_RGB] = GEN6_FORMAT_DXT1_RGB,
+ [PIPE_FORMAT_DXT1_RGBA] = GEN6_FORMAT_BC1_UNORM,
+ [PIPE_FORMAT_DXT3_RGBA] = GEN6_FORMAT_BC2_UNORM,
+ [PIPE_FORMAT_DXT5_RGBA] = GEN6_FORMAT_BC3_UNORM,
+ [PIPE_FORMAT_DXT1_SRGB] = GEN6_FORMAT_DXT1_RGB_SRGB,
+ [PIPE_FORMAT_DXT1_SRGBA] = GEN6_FORMAT_BC1_UNORM_SRGB,
+ [PIPE_FORMAT_DXT3_SRGBA] = GEN6_FORMAT_BC2_UNORM_SRGB,
+ [PIPE_FORMAT_DXT5_SRGBA] = GEN6_FORMAT_BC3_UNORM_SRGB,
+ [PIPE_FORMAT_RGTC1_UNORM] = GEN6_FORMAT_BC4_UNORM,
+ [PIPE_FORMAT_RGTC1_SNORM] = GEN6_FORMAT_BC4_SNORM,
+ [PIPE_FORMAT_RGTC2_UNORM] = GEN6_FORMAT_BC5_UNORM,
+ [PIPE_FORMAT_RGTC2_SNORM] = GEN6_FORMAT_BC5_SNORM,
+ [PIPE_FORMAT_R8G8_B8G8_UNORM] = 0,
+ [PIPE_FORMAT_G8R8_G8B8_UNORM] = 0,
+ [PIPE_FORMAT_R8SG8SB8UX8U_NORM] = 0,
+ [PIPE_FORMAT_R5SG5SB6U_NORM] = 0,
+ [PIPE_FORMAT_A8B8G8R8_UNORM] = 0,
+ [PIPE_FORMAT_B5G5R5X1_UNORM] = GEN6_FORMAT_B5G5R5X1_UNORM,
+ [PIPE_FORMAT_R10G10B10A2_USCALED] = GEN6_FORMAT_R10G10B10A2_USCALED,
+ [PIPE_FORMAT_R11G11B10_FLOAT] = GEN6_FORMAT_R11G11B10_FLOAT,
+ [PIPE_FORMAT_R9G9B9E5_FLOAT] = GEN6_FORMAT_R9G9B9E5_SHAREDEXP,
+ [PIPE_FORMAT_Z32_FLOAT_S8X24_UINT] = 0,
+ [PIPE_FORMAT_R1_UNORM] = GEN6_FORMAT_R1_UNORM,
+ [PIPE_FORMAT_R10G10B10X2_USCALED] = GEN6_FORMAT_R10G10B10X2_USCALED,
+ [PIPE_FORMAT_R10G10B10X2_SNORM] = 0,
+ [PIPE_FORMAT_L4A4_UNORM] = 0,
+ [PIPE_FORMAT_B10G10R10A2_UNORM] = GEN6_FORMAT_B10G10R10A2_UNORM,
+ [PIPE_FORMAT_R10SG10SB10SA2U_NORM] = 0,
+ [PIPE_FORMAT_R8G8Bx_SNORM] = 0,
+ [PIPE_FORMAT_R8G8B8X8_UNORM] = GEN6_FORMAT_R8G8B8X8_UNORM,
+ [PIPE_FORMAT_B4G4R4X4_UNORM] = 0,
+ [PIPE_FORMAT_X24S8_UINT] = 0,
+ [PIPE_FORMAT_S8X24_UINT] = 0,
+ [PIPE_FORMAT_X32_S8X24_UINT] = 0,
+ [PIPE_FORMAT_B2G3R3_UNORM] = 0,
+ [PIPE_FORMAT_L16A16_UNORM] = GEN6_FORMAT_L16A16_UNORM,
+ [PIPE_FORMAT_A16_UNORM] = GEN6_FORMAT_A16_UNORM,
+ [PIPE_FORMAT_I16_UNORM] = GEN6_FORMAT_I16_UNORM,
+ [PIPE_FORMAT_LATC1_UNORM] = 0,
+ [PIPE_FORMAT_LATC1_SNORM] = 0,
+ [PIPE_FORMAT_LATC2_UNORM] = 0,
+ [PIPE_FORMAT_LATC2_SNORM] = 0,
+ [PIPE_FORMAT_A8_SNORM] = 0,
+ [PIPE_FORMAT_L8_SNORM] = 0,
+ [PIPE_FORMAT_L8A8_SNORM] = 0,
+ [PIPE_FORMAT_I8_SNORM] = 0,
+ [PIPE_FORMAT_A16_SNORM] = 0,
+ [PIPE_FORMAT_L16_SNORM] = 0,
+ [PIPE_FORMAT_L16A16_SNORM] = 0,
+ [PIPE_FORMAT_I16_SNORM] = 0,
+ [PIPE_FORMAT_A16_FLOAT] = GEN6_FORMAT_A16_FLOAT,
+ [PIPE_FORMAT_L16_FLOAT] = GEN6_FORMAT_L16_FLOAT,
+ [PIPE_FORMAT_L16A16_FLOAT] = GEN6_FORMAT_L16A16_FLOAT,
+ [PIPE_FORMAT_I16_FLOAT] = GEN6_FORMAT_I16_FLOAT,
+ [PIPE_FORMAT_A32_FLOAT] = GEN6_FORMAT_A32_FLOAT,
+ [PIPE_FORMAT_L32_FLOAT] = GEN6_FORMAT_L32_FLOAT,
+ [PIPE_FORMAT_L32A32_FLOAT] = GEN6_FORMAT_L32A32_FLOAT,
+ [PIPE_FORMAT_I32_FLOAT] = GEN6_FORMAT_I32_FLOAT,
+ [PIPE_FORMAT_YV12] = 0,
+ [PIPE_FORMAT_YV16] = 0,
+ [PIPE_FORMAT_IYUV] = 0,
+ [PIPE_FORMAT_NV12] = 0,
+ [PIPE_FORMAT_NV21] = 0,
+ [PIPE_FORMAT_A4R4_UNORM] = 0,
+ [PIPE_FORMAT_R4A4_UNORM] = 0,
+ [PIPE_FORMAT_R8A8_UNORM] = 0,
+ [PIPE_FORMAT_A8R8_UNORM] = 0,
+ [PIPE_FORMAT_R10G10B10A2_SSCALED] = GEN6_FORMAT_R10G10B10A2_SSCALED,
+ [PIPE_FORMAT_R10G10B10A2_SNORM] = GEN6_FORMAT_R10G10B10A2_SNORM,
+ [PIPE_FORMAT_B10G10R10A2_USCALED] = GEN6_FORMAT_B10G10R10A2_USCALED,
+ [PIPE_FORMAT_B10G10R10A2_SSCALED] = GEN6_FORMAT_B10G10R10A2_SSCALED,
+ [PIPE_FORMAT_B10G10R10A2_SNORM] = GEN6_FORMAT_B10G10R10A2_SNORM,
+ [PIPE_FORMAT_R8_UINT] = GEN6_FORMAT_R8_UINT,
+ [PIPE_FORMAT_R8G8_UINT] = GEN6_FORMAT_R8G8_UINT,
+ [PIPE_FORMAT_R8G8B8_UINT] = GEN6_FORMAT_R8G8B8_UINT,
+ [PIPE_FORMAT_R8G8B8A8_UINT] = GEN6_FORMAT_R8G8B8A8_UINT,
+ [PIPE_FORMAT_R8_SINT] = GEN6_FORMAT_R8_SINT,
+ [PIPE_FORMAT_R8G8_SINT] = GEN6_FORMAT_R8G8_SINT,
+ [PIPE_FORMAT_R8G8B8_SINT] = GEN6_FORMAT_R8G8B8_SINT,
+ [PIPE_FORMAT_R8G8B8A8_SINT] = GEN6_FORMAT_R8G8B8A8_SINT,
+ [PIPE_FORMAT_R16_UINT] = GEN6_FORMAT_R16_UINT,
+ [PIPE_FORMAT_R16G16_UINT] = GEN6_FORMAT_R16G16_UINT,
+ [PIPE_FORMAT_R16G16B16_UINT] = GEN6_FORMAT_R16G16B16_UINT,
+ [PIPE_FORMAT_R16G16B16A16_UINT] = GEN6_FORMAT_R16G16B16A16_UINT,
+ [PIPE_FORMAT_R16_SINT] = GEN6_FORMAT_R16_SINT,
+ [PIPE_FORMAT_R16G16_SINT] = GEN6_FORMAT_R16G16_SINT,
+ [PIPE_FORMAT_R16G16B16_SINT] = GEN6_FORMAT_R16G16B16_SINT,
+ [PIPE_FORMAT_R16G16B16A16_SINT] = GEN6_FORMAT_R16G16B16A16_SINT,
+ [PIPE_FORMAT_R32_UINT] = GEN6_FORMAT_R32_UINT,
+ [PIPE_FORMAT_R32G32_UINT] = GEN6_FORMAT_R32G32_UINT,
+ [PIPE_FORMAT_R32G32B32_UINT] = GEN6_FORMAT_R32G32B32_UINT,
+ [PIPE_FORMAT_R32G32B32A32_UINT] = GEN6_FORMAT_R32G32B32A32_UINT,
+ [PIPE_FORMAT_R32_SINT] = GEN6_FORMAT_R32_SINT,
+ [PIPE_FORMAT_R32G32_SINT] = GEN6_FORMAT_R32G32_SINT,
+ [PIPE_FORMAT_R32G32B32_SINT] = GEN6_FORMAT_R32G32B32_SINT,
+ [PIPE_FORMAT_R32G32B32A32_SINT] = GEN6_FORMAT_R32G32B32A32_SINT,
+ [PIPE_FORMAT_A8_UINT] = 0,
+ [PIPE_FORMAT_I8_UINT] = GEN6_FORMAT_I8_UINT,
+ [PIPE_FORMAT_L8_UINT] = GEN6_FORMAT_L8_UINT,
+ [PIPE_FORMAT_L8A8_UINT] = GEN6_FORMAT_L8A8_UINT,
+ [PIPE_FORMAT_A8_SINT] = 0,
+ [PIPE_FORMAT_I8_SINT] = GEN6_FORMAT_I8_SINT,
+ [PIPE_FORMAT_L8_SINT] = GEN6_FORMAT_L8_SINT,
+ [PIPE_FORMAT_L8A8_SINT] = GEN6_FORMAT_L8A8_SINT,
+ [PIPE_FORMAT_A16_UINT] = 0,
+ [PIPE_FORMAT_I16_UINT] = 0,
+ [PIPE_FORMAT_L16_UINT] = 0,
+ [PIPE_FORMAT_L16A16_UINT] = 0,
+ [PIPE_FORMAT_A16_SINT] = 0,
+ [PIPE_FORMAT_I16_SINT] = 0,
+ [PIPE_FORMAT_L16_SINT] = 0,
+ [PIPE_FORMAT_L16A16_SINT] = 0,
+ [PIPE_FORMAT_A32_UINT] = 0,
+ [PIPE_FORMAT_I32_UINT] = 0,
+ [PIPE_FORMAT_L32_UINT] = 0,
+ [PIPE_FORMAT_L32A32_UINT] = 0,
+ [PIPE_FORMAT_A32_SINT] = 0,
+ [PIPE_FORMAT_I32_SINT] = 0,
+ [PIPE_FORMAT_L32_SINT] = 0,
+ [PIPE_FORMAT_L32A32_SINT] = 0,
+ [PIPE_FORMAT_B10G10R10A2_UINT] = GEN6_FORMAT_B10G10R10A2_UINT,
+ [PIPE_FORMAT_ETC1_RGB8] = GEN6_FORMAT_ETC1_RGB8,
+ [PIPE_FORMAT_R8G8_R8B8_UNORM] = 0,
+ [PIPE_FORMAT_G8R8_B8R8_UNORM] = 0,
+ [PIPE_FORMAT_R8G8B8X8_SNORM] = 0,
+ [PIPE_FORMAT_R8G8B8X8_SRGB] = 0,
+ [PIPE_FORMAT_R8G8B8X8_UINT] = 0,
+ [PIPE_FORMAT_R8G8B8X8_SINT] = 0,
+ [PIPE_FORMAT_B10G10R10X2_UNORM] = GEN6_FORMAT_B10G10R10X2_UNORM,
+ [PIPE_FORMAT_R16G16B16X16_UNORM] = GEN6_FORMAT_R16G16B16X16_UNORM,
+ [PIPE_FORMAT_R16G16B16X16_SNORM] = 0,
+ [PIPE_FORMAT_R16G16B16X16_FLOAT] = GEN6_FORMAT_R16G16B16X16_FLOAT,
+ [PIPE_FORMAT_R16G16B16X16_UINT] = 0,
+ [PIPE_FORMAT_R16G16B16X16_SINT] = 0,
+ [PIPE_FORMAT_R32G32B32X32_FLOAT] = GEN6_FORMAT_R32G32B32X32_FLOAT,
+ [PIPE_FORMAT_R32G32B32X32_UINT] = 0,
+ [PIPE_FORMAT_R32G32B32X32_SINT] = 0,
+ [PIPE_FORMAT_R8A8_SNORM] = 0,
+ [PIPE_FORMAT_R16A16_UNORM] = 0,
+ [PIPE_FORMAT_R16A16_SNORM] = 0,
+ [PIPE_FORMAT_R16A16_FLOAT] = 0,
+ [PIPE_FORMAT_R32A32_FLOAT] = 0,
+ [PIPE_FORMAT_R8A8_UINT] = 0,
+ [PIPE_FORMAT_R8A8_SINT] = 0,
+ [PIPE_FORMAT_R16A16_UINT] = 0,
+ [PIPE_FORMAT_R16A16_SINT] = 0,
+ [PIPE_FORMAT_R32A32_UINT] = 0,
+ [PIPE_FORMAT_R32A32_SINT] = 0,
+ [PIPE_FORMAT_R10G10B10A2_UINT] = GEN6_FORMAT_R10G10B10A2_UINT,
+ [PIPE_FORMAT_B5G6R5_SRGB] = GEN6_FORMAT_B5G6R5_UNORM_SRGB,
+ };
+ int sfmt = format_mapping[format];
+
+ /* GEN6_FORMAT_R32G32B32A32_FLOAT happens to be 0 */
+ if (!sfmt && format != PIPE_FORMAT_R32G32B32A32_FLOAT)
+ sfmt = -1;
+
+ return sfmt;
+}
diff --git a/src/gallium/drivers/ilo/core/ilo_format.h b/src/gallium/drivers/ilo/ilo_format.h
index 6b73ea1dad7..4e955c09c14 100644
--- a/src/gallium/drivers/ilo/core/ilo_format.h
+++ b/src/gallium/drivers/ilo/ilo_format.h
@@ -29,8 +29,8 @@
#define ILO_FORMAT_H
#include "genhw/genhw.h"
-#include "ilo_core.h"
-#include "ilo_dev.h"
+
+#include "ilo_common.h"
bool
ilo_format_support_vb(const struct ilo_dev *dev,
diff --git a/src/gallium/drivers/ilo/ilo_render.c b/src/gallium/drivers/ilo/ilo_render.c
index f5be3360f05..21f75de11a0 100644
--- a/src/gallium/drivers/ilo/ilo_render.c
+++ b/src/gallium/drivers/ilo/ilo_render.c
@@ -35,76 +35,10 @@
#include "ilo_query.h"
#include "ilo_render_gen.h"
-/* in S1.3 */
-struct sample_position {
- int8_t x, y;
-};
-
-static const struct sample_position ilo_sample_pattern_1x[1] = {
- { 0, 0 },
-};
-
-static const struct sample_position ilo_sample_pattern_2x[2] = {
- { -4, -4 },
- { 4, 4 },
-};
-
-static const struct sample_position ilo_sample_pattern_4x[4] = {
- { -2, -6 },
- { 6, -2 },
- { -6, 2 },
- { 2, 6 },
-};
-
-/* \see brw_multisample_positions_8x */
-static const struct sample_position ilo_sample_pattern_8x[8] = {
- { -1, 1 },
- { 1, 5 },
- { 3, -5 },
- { 5, 3 },
- { -7, -1 },
- { -3, -7 },
- { 7, -3 },
- { -5, 7 },
-};
-
-static const struct sample_position ilo_sample_pattern_16x[16] = {
- { 0, 2 },
- { 3, 0 },
- { -3, -2 },
- { -2, -4 },
- { 4, 3 },
- { 5, 1 },
- { 6, -1 },
- { 2, -6 },
- { -4, 5 },
- { -5, -5 },
- { -1, -7 },
- { 7, -3 },
- { -7, 4 },
- { 1, -8 },
- { -6, 6 },
- { -8, 7 },
-};
-
-static uint8_t
-pack_sample_position(const struct sample_position *pos)
-{
- return (pos->x + 8) << 4 | (pos->y + 8);
-}
-
-static void
-get_sample_position(const struct sample_position *pos, float *x, float *y)
-{
- *x = (float) (pos->x + 8) / 16.0f;
- *y = (float) (pos->y + 8) / 16.0f;
-}
-
struct ilo_render *
ilo_render_create(struct ilo_builder *builder)
{
struct ilo_render *render;
- int i;
render = CALLOC_STRUCT(ilo_render);
if (!render)
@@ -121,29 +55,8 @@ ilo_render_create(struct ilo_builder *builder)
return NULL;
}
- /* pack into dwords */
- render->sample_pattern_1x = pack_sample_position(ilo_sample_pattern_1x);
- render->sample_pattern_2x =
- pack_sample_position(&ilo_sample_pattern_2x[1]) << 8 |
- pack_sample_position(&ilo_sample_pattern_2x[0]);
- for (i = 0; i < 4; i++) {
- render->sample_pattern_4x |=
- pack_sample_position(&ilo_sample_pattern_4x[i]) << (8 * i);
-
- render->sample_pattern_8x[0] |=
- pack_sample_position(&ilo_sample_pattern_8x[i]) << (8 * i);
- render->sample_pattern_8x[1] |=
- pack_sample_position(&ilo_sample_pattern_8x[i + 4]) << (8 * i);
-
- render->sample_pattern_16x[0] |=
- pack_sample_position(&ilo_sample_pattern_16x[i]) << (8 * i);
- render->sample_pattern_16x[1] |=
- pack_sample_position(&ilo_sample_pattern_16x[i + 4]) << (8 * i);
- render->sample_pattern_16x[2] |=
- pack_sample_position(&ilo_sample_pattern_16x[i + 8]) << (8 * i);
- render->sample_pattern_16x[3] |=
- pack_sample_position(&ilo_sample_pattern_16x[i + 12]) << (8 * i);
- }
+ ilo_state_sample_pattern_init_default(&render->sample_pattern,
+ render->dev);
ilo_render_invalidate_hw(render);
ilo_render_invalidate_builder(render);
@@ -164,38 +77,13 @@ ilo_render_get_sample_position(const struct ilo_render *render,
unsigned sample_index,
float *x, float *y)
{
- const struct sample_position *pattern;
+ uint8_t off_x, off_y;
- switch (sample_count) {
- case 1:
- assert(sample_index < Elements(ilo_sample_pattern_1x));
- pattern = ilo_sample_pattern_1x;
- break;
- case 2:
- assert(sample_index < Elements(ilo_sample_pattern_2x));
- pattern = ilo_sample_pattern_2x;
- break;
- case 4:
- assert(sample_index < Elements(ilo_sample_pattern_4x));
- pattern = ilo_sample_pattern_4x;
- break;
- case 8:
- assert(sample_index < Elements(ilo_sample_pattern_8x));
- pattern = ilo_sample_pattern_8x;
- break;
- case 16:
- assert(sample_index < Elements(ilo_sample_pattern_16x));
- pattern = ilo_sample_pattern_16x;
- break;
- default:
- assert(!"unknown sample count");
- *x = 0.5f;
- *y = 0.5f;
- return;
- break;
- }
+ ilo_state_sample_pattern_get_offset(&render->sample_pattern, render->dev,
+ sample_count, sample_index, &off_x, &off_y);
- get_sample_position(&pattern[sample_index], x, y);
+ *x = (float) off_x / 16.0f;
+ *y = (float) off_y / 16.0f;
}
void
@@ -446,12 +334,44 @@ draw_session_prepare(struct ilo_render *render,
render->instruction_bo_changed = true;
session->prim_changed = true;
- session->primitive_restart_changed = true;
+
+ ilo_state_urb_full_delta(&vec->urb, render->dev, &session->urb_delta);
+ ilo_state_vf_full_delta(&vec->ve->vf, render->dev, &session->vf_delta);
+
+ ilo_state_raster_full_delta(&vec->rasterizer->rs, render->dev,
+ &session->rs_delta);
+
+ ilo_state_viewport_full_delta(&vec->viewport.vp, render->dev,
+ &session->vp_delta);
+
+ ilo_state_cc_full_delta(&vec->blend->cc, render->dev,
+ &session->cc_delta);
} else {
session->prim_changed =
(render->state.reduced_prim != session->reduced_prim);
- session->primitive_restart_changed =
- (render->state.primitive_restart != vec->draw->primitive_restart);
+
+ ilo_state_urb_get_delta(&vec->urb, render->dev,
+ &render->state.urb, &session->urb_delta);
+
+ if (vec->dirty & ILO_DIRTY_VE) {
+ ilo_state_vf_full_delta(&vec->ve->vf, render->dev,
+ &session->vf_delta);
+ }
+
+ if (vec->dirty & ILO_DIRTY_RASTERIZER) {
+ ilo_state_raster_get_delta(&vec->rasterizer->rs, render->dev,
+ &render->state.rs, &session->rs_delta);
+ }
+
+ if (vec->dirty & ILO_DIRTY_VIEWPORT) {
+ ilo_state_viewport_full_delta(&vec->viewport.vp, render->dev,
+ &session->vp_delta);
+ }
+
+ if (vec->dirty & ILO_DIRTY_BLEND) {
+ ilo_state_cc_get_delta(&vec->blend->cc, render->dev,
+ &render->state.cc, &session->cc_delta);
+ }
}
}
@@ -467,7 +387,10 @@ draw_session_end(struct ilo_render *render,
render->instruction_bo_changed = false;
render->state.reduced_prim = session->reduced_prim;
- render->state.primitive_restart = vec->draw->primitive_restart;
+
+ render->state.urb = vec->urb;
+ render->state.rs = vec->rasterizer->rs;
+ render->state.cc = vec->blend->cc;
}
void
diff --git a/src/gallium/drivers/ilo/ilo_render.h b/src/gallium/drivers/ilo/ilo_render.h
index a85b2800fb1..098af73ec9b 100644
--- a/src/gallium/drivers/ilo/ilo_render.h
+++ b/src/gallium/drivers/ilo/ilo_render.h
@@ -43,9 +43,6 @@ ilo_render_create(struct ilo_builder *builder);
void
ilo_render_destroy(struct ilo_render *render);
-/**
- * Estimate the size of an action.
- */
void
ilo_render_get_sample_position(const struct ilo_render *render,
unsigned sample_count,
diff --git a/src/gallium/drivers/ilo/ilo_render_dynamic.c b/src/gallium/drivers/ilo/ilo_render_dynamic.c
index ef92b12da83..3b4c80227a6 100644
--- a/src/gallium/drivers/ilo/ilo_render_dynamic.c
+++ b/src/gallium/drivers/ilo/ilo_render_dynamic.c
@@ -30,6 +30,7 @@
#include "ilo_common.h"
#include "ilo_blitter.h"
+#include "ilo_shader.h"
#include "ilo_state.h"
#include "ilo_render_gen.h"
@@ -42,16 +43,14 @@ gen6_emit_draw_dynamic_viewports(struct ilo_render *r,
{
ILO_DEV_ASSERT(r->dev, 6, 6);
- /* SF_VIEWPORT, CLIP_VIEWPORT, and CC_VIEWPORT */
- if (DIRTY(VIEWPORT)) {
+ /* CLIP_VIEWPORT, SF_VIEWPORT, and CC_VIEWPORT */
+ if ((session->vp_delta.dirty & (ILO_STATE_VIEWPORT_SF_CLIP_VIEWPORT |
+ ILO_STATE_VIEWPORT_CC_VIEWPORT)) ||
+ r->state_bo_changed) {
r->state.CLIP_VIEWPORT = gen6_CLIP_VIEWPORT(r->builder,
- vec->viewport.cso, vec->viewport.count);
-
- r->state.SF_VIEWPORT = gen6_SF_VIEWPORT(r->builder,
- vec->viewport.cso, vec->viewport.count);
-
- r->state.CC_VIEWPORT = gen6_CC_VIEWPORT(r->builder,
- vec->viewport.cso, vec->viewport.count);
+ &vec->viewport.vp);
+ r->state.SF_VIEWPORT = gen6_SF_VIEWPORT(r->builder, &vec->viewport.vp);
+ r->state.CC_VIEWPORT = gen6_CC_VIEWPORT(r->builder, &vec->viewport.vp);
session->viewport_changed = true;
}
@@ -65,12 +64,12 @@ gen7_emit_draw_dynamic_viewports(struct ilo_render *r,
ILO_DEV_ASSERT(r->dev, 7, 8);
/* SF_CLIP_VIEWPORT and CC_VIEWPORT */
- if (DIRTY(VIEWPORT)) {
+ if ((session->vp_delta.dirty & (ILO_STATE_VIEWPORT_SF_CLIP_VIEWPORT |
+ ILO_STATE_VIEWPORT_CC_VIEWPORT)) ||
+ r->state_bo_changed) {
r->state.SF_CLIP_VIEWPORT = gen7_SF_CLIP_VIEWPORT(r->builder,
- vec->viewport.cso, vec->viewport.count);
-
- r->state.CC_VIEWPORT = gen6_CC_VIEWPORT(r->builder,
- vec->viewport.cso, vec->viewport.count);
+ &vec->viewport.vp);
+ r->state.CC_VIEWPORT = gen6_CC_VIEWPORT(r->builder, &vec->viewport.vp);
session->viewport_changed = true;
}
@@ -84,10 +83,10 @@ gen6_emit_draw_dynamic_scissors(struct ilo_render *r,
ILO_DEV_ASSERT(r->dev, 6, 8);
/* SCISSOR_RECT */
- if (DIRTY(SCISSOR) || DIRTY(VIEWPORT)) {
- /* there should be as many scissors as there are viewports */
+ if ((session->vp_delta.dirty & ILO_STATE_VIEWPORT_SCISSOR_RECT) ||
+ r->state_bo_changed) {
r->state.SCISSOR_RECT = gen6_SCISSOR_RECT(r->builder,
- &vec->scissor, vec->viewport.count);
+ &vec->viewport.vp);
session->scissor_changed = true;
}
@@ -101,32 +100,30 @@ gen6_emit_draw_dynamic_cc(struct ilo_render *r,
ILO_DEV_ASSERT(r->dev, 6, 8);
/* BLEND_STATE */
- if (DIRTY(BLEND) || DIRTY(FB) || DIRTY(DSA)) {
- if (ilo_dev_gen(r->dev) >= ILO_GEN(8)) {
- r->state.BLEND_STATE = gen8_BLEND_STATE(r->builder,
- vec->blend, &vec->fb, vec->dsa);
- } else {
- r->state.BLEND_STATE = gen6_BLEND_STATE(r->builder,
- vec->blend, &vec->fb, vec->dsa);
- }
+ if ((session->cc_delta.dirty & ILO_STATE_CC_BLEND_STATE) ||
+ r->state_bo_changed) {
+ if (ilo_dev_gen(r->dev) >= ILO_GEN(8))
+ r->state.BLEND_STATE = gen8_BLEND_STATE(r->builder, &vec->blend->cc);
+ else
+ r->state.BLEND_STATE = gen6_BLEND_STATE(r->builder, &vec->blend->cc);
session->blend_changed = true;
}
/* COLOR_CALC_STATE */
- if (DIRTY(DSA) || DIRTY(STENCIL_REF) || DIRTY(BLEND_COLOR)) {
+ if ((session->cc_delta.dirty & ILO_STATE_CC_COLOR_CALC_STATE) ||
+ r->state_bo_changed) {
r->state.COLOR_CALC_STATE =
- gen6_COLOR_CALC_STATE(r->builder, &vec->stencil_ref,
- vec->dsa->alpha_ref, &vec->blend_color);
-
+ gen6_COLOR_CALC_STATE(r->builder, &vec->blend->cc);
session->cc_changed = true;
}
/* DEPTH_STENCIL_STATE */
- if (ilo_dev_gen(r->dev) < ILO_GEN(8) && DIRTY(DSA)) {
+ if (ilo_dev_gen(r->dev) < ILO_GEN(8) &&
+ ((session->cc_delta.dirty & ILO_STATE_CC_DEPTH_STENCIL_STATE) ||
+ r->state_bo_changed)) {
r->state.DEPTH_STENCIL_STATE =
- gen6_DEPTH_STENCIL_STATE(r->builder, vec->dsa);
-
+ gen6_DEPTH_STENCIL_STATE(r->builder, &vec->blend->cc);
session->dsa_changed = true;
}
}
@@ -137,12 +134,11 @@ gen6_emit_draw_dynamic_samplers(struct ilo_render *r,
int shader_type,
struct ilo_render_draw_session *session)
{
- const struct ilo_sampler_cso * const *samplers =
- vec->sampler[shader_type].cso;
- const struct pipe_sampler_view * const *views =
- (const struct pipe_sampler_view **) vec->view[shader_type].states;
+ const struct ilo_view_cso * const *views =
+ (const struct ilo_view_cso **) vec->view[shader_type].states;
+ struct ilo_state_sampler samplers[ILO_MAX_SAMPLERS];
uint32_t *sampler_state, *border_color_state;
- int sampler_count;
+ int sampler_count, i;
bool emit_border_color = false;
bool skip = false;
@@ -194,16 +190,28 @@ gen6_emit_draw_dynamic_samplers(struct ilo_render *r,
sampler_count <= Elements(vec->sampler[shader_type].cso));
if (emit_border_color) {
- int i;
-
for (i = 0; i < sampler_count; i++) {
- border_color_state[i] = (samplers[i]) ?
- gen6_SAMPLER_BORDER_COLOR_STATE(r->builder, samplers[i]) : 0;
+ const struct ilo_sampler_cso *cso = vec->sampler[shader_type].cso[i];
+
+ border_color_state[i] = (cso) ?
+ gen6_SAMPLER_BORDER_COLOR_STATE(r->builder, &cso->border) : 0;
+ }
+ }
+
+ for (i = 0; i < sampler_count; i++) {
+ const struct ilo_sampler_cso *cso = vec->sampler[shader_type].cso[i];
+
+ if (cso && views[i]) {
+ samplers[i] = cso->sampler;
+ ilo_state_sampler_set_surface(&samplers[i],
+ r->dev, &views[i]->surface);
+ } else {
+ samplers[i] = vec->disabled_sampler;
}
}
- *sampler_state = gen6_SAMPLER_STATE(r->builder,
- samplers, views, border_color_state, sampler_count);
+ *sampler_state = gen6_SAMPLER_STATE(r->builder, samplers,
+ border_color_state, sampler_count);
}
static void
@@ -234,13 +242,13 @@ gen6_emit_draw_dynamic_pcb(struct ilo_render *r,
const struct ilo_cbuf_state *cbuf =
&vec->cbuf[PIPE_SHADER_VERTEX];
- if (cbuf0_size <= cbuf->cso[0].user_buffer_size) {
+ if (cbuf0_size <= cbuf->cso[0].info.size) {
memcpy(pcb, cbuf->cso[0].user_buffer, cbuf0_size);
} else {
memcpy(pcb, cbuf->cso[0].user_buffer,
- cbuf->cso[0].user_buffer_size);
- memset(pcb + cbuf->cso[0].user_buffer_size, 0,
- cbuf0_size - cbuf->cso[0].user_buffer_size);
+ cbuf->cso[0].info.size);
+ memset(pcb + cbuf->cso[0].info.size, 0,
+ cbuf0_size - cbuf->cso[0].info.size);
}
pcb += cbuf0_size;
@@ -271,13 +279,13 @@ gen6_emit_draw_dynamic_pcb(struct ilo_render *r,
gen6_push_constant_buffer(r->builder, cbuf0_size, &pcb);
r->state.wm.PUSH_CONSTANT_BUFFER_size = cbuf0_size;
- if (cbuf0_size <= cbuf->cso[0].user_buffer_size) {
+ if (cbuf0_size <= cbuf->cso[0].info.size) {
memcpy(pcb, cbuf->cso[0].user_buffer, cbuf0_size);
} else {
memcpy(pcb, cbuf->cso[0].user_buffer,
- cbuf->cso[0].user_buffer_size);
- memset(pcb + cbuf->cso[0].user_buffer_size, 0,
- cbuf0_size - cbuf->cso[0].user_buffer_size);
+ cbuf->cso[0].info.size);
+ memset(pcb + cbuf->cso[0].info.size, 0,
+ cbuf0_size - cbuf->cso[0].info.size);
}
session->pcb_fs_changed = true;
@@ -441,18 +449,17 @@ ilo_render_emit_rectlist_dynamic_states(struct ilo_render *render,
if (blitter->uses & ILO_BLITTER_USE_DSA) {
render->state.DEPTH_STENCIL_STATE =
- gen6_DEPTH_STENCIL_STATE(render->builder, &blitter->dsa);
+ gen6_DEPTH_STENCIL_STATE(render->builder, &blitter->cc);
}
if (blitter->uses & ILO_BLITTER_USE_CC) {
render->state.COLOR_CALC_STATE =
- gen6_COLOR_CALC_STATE(render->builder, &blitter->cc.stencil_ref,
- blitter->cc.alpha_ref, &blitter->cc.blend_color);
+ gen6_COLOR_CALC_STATE(render->builder, &blitter->cc);
}
if (blitter->uses & ILO_BLITTER_USE_VIEWPORT) {
render->state.CC_VIEWPORT =
- gen6_CC_VIEWPORT(render->builder, &blitter->viewport, 1);
+ gen6_CC_VIEWPORT(render->builder, &blitter->vp);
}
assert(ilo_builder_dynamic_used(render->builder) <= dynamic_used +
@@ -466,10 +473,9 @@ gen6_emit_launch_grid_dynamic_samplers(struct ilo_render *r,
{
const unsigned shader_type = PIPE_SHADER_COMPUTE;
const struct ilo_shader_state *cs = vec->cs;
- const struct ilo_sampler_cso * const *samplers =
- vec->sampler[shader_type].cso;
- const struct pipe_sampler_view * const *views =
- (const struct pipe_sampler_view **) vec->view[shader_type].states;
+ const struct ilo_view_cso * const *views =
+ (const struct ilo_view_cso **) vec->view[shader_type].states;
+ struct ilo_state_sampler samplers[ILO_MAX_SAMPLERS];
int sampler_count, i;
ILO_DEV_ASSERT(r->dev, 7, 7.5);
@@ -480,11 +486,25 @@ gen6_emit_launch_grid_dynamic_samplers(struct ilo_render *r,
sampler_count <= Elements(vec->sampler[shader_type].cso));
for (i = 0; i < sampler_count; i++) {
- r->state.cs.SAMPLER_BORDER_COLOR_STATE[i] = (samplers[i]) ?
- gen6_SAMPLER_BORDER_COLOR_STATE(r->builder, samplers[i]) : 0;
+ const struct ilo_sampler_cso *cso = vec->sampler[shader_type].cso[i];
+
+ r->state.cs.SAMPLER_BORDER_COLOR_STATE[i] = (cso) ?
+ gen6_SAMPLER_BORDER_COLOR_STATE(r->builder, &cso->border) : 0;
}
- r->state.cs.SAMPLER_STATE = gen6_SAMPLER_STATE(r->builder, samplers, views,
+ for (i = 0; i < sampler_count; i++) {
+ const struct ilo_sampler_cso *cso = vec->sampler[shader_type].cso[i];
+
+ if (cso && views[i]) {
+ samplers[i] = cso->sampler;
+ ilo_state_sampler_set_surface(&samplers[i],
+ r->dev, &views[i]->surface);
+ } else {
+ samplers[i] = vec->disabled_sampler;
+ }
+ }
+
+ r->state.cs.SAMPLER_STATE = gen6_SAMPLER_STATE(r->builder, samplers,
r->state.cs.SAMPLER_BORDER_COLOR_STATE, sampler_count);
}
@@ -503,20 +523,39 @@ gen6_emit_launch_grid_dynamic_idrt(struct ilo_render *r,
struct ilo_render_launch_grid_session *session)
{
const struct ilo_shader_state *cs = vec->cs;
- struct gen6_idrt_data data;
+ struct ilo_state_compute_interface_info interface;
+ struct ilo_state_compute_info info;
+ uint32_t kernel_offset;
ILO_DEV_ASSERT(r->dev, 7, 7.5);
- memset(&data, 0, sizeof(data));
+ memset(&interface, 0, sizeof(interface));
+
+ interface.sampler_count =
+ ilo_shader_get_kernel_param(cs, ILO_KERNEL_SAMPLER_COUNT);
+ interface.surface_count =
+ ilo_shader_get_kernel_param(cs, ILO_KERNEL_SURFACE_TOTAL_COUNT);
+ interface.thread_group_size = session->thread_group_size;
+ interface.slm_size =
+ ilo_shader_get_kernel_param(cs, ILO_KERNEL_CS_LOCAL_SIZE);
+ interface.curbe_read_length = r->state.cs.PUSH_CONSTANT_BUFFER_size;
+
+ memset(&info, 0, sizeof(info));
+ info.data = session->compute_data;
+ info.data_size = sizeof(session->compute_data);
+ info.interfaces = &interface;
+ info.interface_count = 1;
+ info.cv_urb_alloc_size = r->dev->urb_size;
+ info.curbe_alloc_size = r->state.cs.PUSH_CONSTANT_BUFFER_size;
+
+ ilo_state_compute_init(&session->compute, r->dev, &info);
- data.cs = cs;
- data.sampler_offset = r->state.cs.SAMPLER_STATE;
- data.binding_table_offset = r->state.cs.BINDING_TABLE_STATE;
+ kernel_offset = ilo_shader_get_kernel_offset(cs);
- data.curbe_size = r->state.cs.PUSH_CONSTANT_BUFFER_size;
- data.thread_group_size = session->thread_group_size;
+ session->idrt = gen6_INTERFACE_DESCRIPTOR_DATA(r->builder,
+ &session->compute, &kernel_offset,
+ &r->state.cs.SAMPLER_STATE, &r->state.cs.BINDING_TABLE_STATE);
- session->idrt = gen6_INTERFACE_DESCRIPTOR_DATA(r->builder, &data, 1);
session->idrt_size = 32;
}
diff --git a/src/gallium/drivers/ilo/ilo_render_gen.h b/src/gallium/drivers/ilo/ilo_render_gen.h
index acfe8be3088..6b133750043 100644
--- a/src/gallium/drivers/ilo/ilo_render_gen.h
+++ b/src/gallium/drivers/ilo/ilo_render_gen.h
@@ -31,6 +31,7 @@
#include "core/ilo_builder.h"
#include "core/ilo_builder_3d.h"
#include "core/ilo_builder_render.h"
+#include "core/ilo_state_raster.h"
#include "ilo_common.h"
#include "ilo_state.h"
@@ -50,11 +51,7 @@ struct ilo_render {
struct intel_bo *workaround_bo;
- uint32_t sample_pattern_1x;
- uint32_t sample_pattern_2x;
- uint32_t sample_pattern_4x;
- uint32_t sample_pattern_8x[2];
- uint32_t sample_pattern_16x[4];
+ struct ilo_state_sample_pattern sample_pattern;
bool hw_ctx_changed;
@@ -85,10 +82,13 @@ struct ilo_render {
*/
uint32_t deferred_pipe_control_dw1;
- bool primitive_restart;
int reduced_prim;
int so_max_vertices;
+ struct ilo_state_urb urb;
+ struct ilo_state_raster rs;
+ struct ilo_state_cc cc;
+
uint32_t SF_VIEWPORT;
uint32_t CLIP_VIEWPORT;
uint32_t SF_CLIP_VIEWPORT; /* GEN7+ */
@@ -142,7 +142,12 @@ struct ilo_render_draw_session {
int reduced_prim;
bool prim_changed;
- bool primitive_restart_changed;
+
+ struct ilo_state_urb_delta urb_delta;
+ struct ilo_state_vf_delta vf_delta;
+ struct ilo_state_raster_delta rs_delta;
+ struct ilo_state_viewport_delta vp_delta;
+ struct ilo_state_cc_delta cc_delta;
/* dynamic states */
bool viewport_changed;
@@ -180,6 +185,9 @@ struct ilo_render_launch_grid_session {
uint32_t idrt;
int idrt_size;
+
+ uint32_t compute_data[6];
+ struct ilo_state_compute compute;
};
int
@@ -381,8 +389,7 @@ ilo_render_pipe_control(struct ilo_render *r, uint32_t dw1)
*/
static inline void
ilo_render_3dprimitive(struct ilo_render *r,
- const struct pipe_draw_info *info,
- const struct ilo_ib_state *ib)
+ const struct gen6_3dprimitive_info *info)
{
ILO_DEV_ASSERT(r->dev, 6, 8);
@@ -391,9 +398,9 @@ ilo_render_3dprimitive(struct ilo_render *r,
/* 3DPRIMITIVE */
if (ilo_dev_gen(r->dev) >= ILO_GEN(7))
- gen7_3DPRIMITIVE(r->builder, info, ib);
+ gen7_3DPRIMITIVE(r->builder, info);
else
- gen6_3DPRIMITIVE(r->builder, info, ib);
+ gen6_3DPRIMITIVE(r->builder, info);
r->state.current_pipe_control_dw1 = 0;
assert(!r->state.deferred_pipe_control_dw1);
diff --git a/src/gallium/drivers/ilo/ilo_render_gen6.c b/src/gallium/drivers/ilo/ilo_render_gen6.c
index 47f711e7956..c1f759f3043 100644
--- a/src/gallium/drivers/ilo/ilo_render_gen6.c
+++ b/src/gallium/drivers/ilo/ilo_render_gen6.c
@@ -29,11 +29,11 @@
#include "core/ilo_builder_3d.h"
#include "core/ilo_builder_mi.h"
#include "core/ilo_builder_render.h"
-#include "util/u_dual_blend.h"
#include "util/u_prim.h"
#include "ilo_blitter.h"
#include "ilo_query.h"
+#include "ilo_resource.h"
#include "ilo_shader.h"
#include "ilo_state.h"
#include "ilo_render_gen.h"
@@ -330,64 +330,19 @@ gen6_draw_common_urb(struct ilo_render *r,
const struct ilo_state_vector *vec,
struct ilo_render_draw_session *session)
{
- /* 3DSTATE_URB */
- if (DIRTY(VE) || DIRTY(VS) || DIRTY(GS)) {
- const bool gs_active = (vec->gs || (vec->vs &&
- ilo_shader_get_kernel_param(vec->vs, ILO_KERNEL_VS_GEN6_SO)));
- int vs_entry_size, gs_entry_size;
- int vs_total_size, gs_total_size;
-
- vs_entry_size = (vec->vs) ?
- ilo_shader_get_kernel_param(vec->vs, ILO_KERNEL_OUTPUT_COUNT) : 0;
+ const bool gs_active = (vec->gs || (vec->vs &&
+ ilo_shader_get_kernel_param(vec->vs, ILO_KERNEL_VS_GEN6_SO)));
- /*
- * As indicated by 2e712e41db0c0676e9f30fc73172c0e8de8d84d4, VF and VS
- * share VUE handles. The VUE allocation size must be large enough to
- * store either VF outputs (number of VERTEX_ELEMENTs) and VS outputs.
- *
- * I am not sure if the PRM explicitly states that VF and VS share VUE
- * handles. But here is a citation that implies so:
- *
- * From the Sandy Bridge PRM, volume 2 part 1, page 44:
- *
- * "Once a FF stage that spawn threads has sufficient input to
- * initiate a thread, it must guarantee that it is safe to request
- * the thread initiation. For all these FF stages, this check is
- * based on :
- *
- * - The availability of output URB entries:
- * - VS: As the input URB entries are overwritten with the
- * VS-generated output data, output URB availability isn't a
- * factor."
- */
- if (vs_entry_size < vec->ve->count + vec->ve->prepend_nosrc_cso)
- vs_entry_size = vec->ve->count + vec->ve->prepend_nosrc_cso;
-
- gs_entry_size = (vec->gs) ?
- ilo_shader_get_kernel_param(vec->gs, ILO_KERNEL_OUTPUT_COUNT) :
- (gs_active) ? vs_entry_size : 0;
-
- /* in bytes */
- vs_entry_size *= sizeof(float) * 4;
- gs_entry_size *= sizeof(float) * 4;
- vs_total_size = r->dev->urb_size;
-
- if (gs_active) {
- vs_total_size /= 2;
- gs_total_size = vs_total_size;
- }
- else {
- gs_total_size = 0;
- }
-
- gen6_3DSTATE_URB(r->builder, vs_total_size, gs_total_size,
- vs_entry_size, gs_entry_size);
+ /* 3DSTATE_URB */
+ if (session->urb_delta.dirty & (ILO_STATE_URB_3DSTATE_URB_VS |
+ ILO_STATE_URB_3DSTATE_URB_GS)) {
+ gen6_3DSTATE_URB(r->builder, &vec->urb);
if (r->state.gs.active && !gs_active)
gen6_wa_post_3dstate_urb_no_gs(r);
-
- r->state.gs.active = gs_active;
}
+
+ r->state.gs.active = gs_active;
}
static void
@@ -459,33 +414,30 @@ gen6_draw_vf(struct ilo_render *r,
{
if (ilo_dev_gen(r->dev) >= ILO_GEN(7.5)) {
/* 3DSTATE_INDEX_BUFFER */
- if (DIRTY(IB) || r->batch_bo_changed) {
- gen6_3DSTATE_INDEX_BUFFER(r->builder,
- &vec->ib, false);
- }
+ if ((session->vf_delta.dirty & ILO_STATE_VF_3DSTATE_INDEX_BUFFER) ||
+ DIRTY(IB) || r->batch_bo_changed)
+ gen6_3DSTATE_INDEX_BUFFER(r->builder, &vec->ve->vf, &vec->ib.ib);
/* 3DSTATE_VF */
- if (session->primitive_restart_changed) {
- gen75_3DSTATE_VF(r->builder, vec->draw->primitive_restart,
- vec->draw->restart_index);
- }
- }
- else {
+ if (session->vf_delta.dirty & ILO_STATE_VF_3DSTATE_VF)
+ gen75_3DSTATE_VF(r->builder, &vec->ve->vf);
+ } else {
/* 3DSTATE_INDEX_BUFFER */
- if (DIRTY(IB) || session->primitive_restart_changed ||
- r->batch_bo_changed) {
- gen6_3DSTATE_INDEX_BUFFER(r->builder,
- &vec->ib, vec->draw->primitive_restart);
- }
+ if ((session->vf_delta.dirty & ILO_STATE_VF_3DSTATE_INDEX_BUFFER) ||
+ DIRTY(IB) || r->batch_bo_changed)
+ gen6_3DSTATE_INDEX_BUFFER(r->builder, &vec->ve->vf, &vec->ib.ib);
}
/* 3DSTATE_VERTEX_BUFFERS */
- if (DIRTY(VB) || DIRTY(VE) || r->batch_bo_changed)
- gen6_3DSTATE_VERTEX_BUFFERS(r->builder, vec->ve, &vec->vb);
+ if ((session->vf_delta.dirty & ILO_STATE_VF_3DSTATE_VERTEX_BUFFERS) ||
+ DIRTY(VB) || DIRTY(VE) || r->batch_bo_changed) {
+ gen6_3DSTATE_VERTEX_BUFFERS(r->builder, &vec->ve->vf,
+ vec->vb.vb, vec->ve->vb_count);
+ }
/* 3DSTATE_VERTEX_ELEMENTS */
- if (DIRTY(VE))
- gen6_3DSTATE_VERTEX_ELEMENTS(r->builder, vec->ve);
+ if (session->vf_delta.dirty & ILO_STATE_VF_3DSTATE_VERTEX_ELEMENTS)
+ gen6_3DSTATE_VERTEX_ELEMENTS(r->builder, &vec->ve->vf);
}
void
@@ -516,10 +468,17 @@ gen6_draw_vs(struct ilo_render *r,
/* 3DSTATE_VS */
if (DIRTY(VS) || r->instruction_bo_changed) {
+ const union ilo_shader_cso *cso = ilo_shader_get_kernel_cso(vec->vs);
+ const uint32_t kernel_offset = ilo_shader_get_kernel_offset(vec->vs);
+
if (ilo_dev_gen(r->dev) == ILO_GEN(6))
gen6_wa_pre_3dstate_vs_toggle(r);
- gen6_3DSTATE_VS(r->builder, vec->vs);
+ if (ilo_dev_gen(r->dev) == ILO_GEN(6) &&
+ ilo_shader_get_kernel_param(vec->vs, ILO_KERNEL_VS_GEN6_SO))
+ gen6_3DSTATE_VS(r->builder, &cso->vs_sol.vs, kernel_offset);
+ else
+ gen6_3DSTATE_VS(r->builder, &cso->vs, kernel_offset);
}
}
@@ -535,14 +494,39 @@ gen6_draw_gs(struct ilo_render *r,
/* 3DSTATE_GS */
if (DIRTY(GS) || DIRTY(VS) ||
session->prim_changed || r->instruction_bo_changed) {
+ const union ilo_shader_cso *cso;
+ uint32_t kernel_offset;
+
if (vec->gs) {
- gen6_3DSTATE_GS(r->builder, vec->gs);
- } else if (vec->vs &&
+ cso = ilo_shader_get_kernel_cso(vec->gs);
+ kernel_offset = ilo_shader_get_kernel_offset(vec->gs);
+
+ gen6_3DSTATE_GS(r->builder, &cso->gs, kernel_offset);
+ } else if (ilo_dev_gen(r->dev) == ILO_GEN(6) &&
ilo_shader_get_kernel_param(vec->vs, ILO_KERNEL_VS_GEN6_SO)) {
- const int verts_per_prim = u_vertices_per_prim(session->reduced_prim);
- gen6_so_3DSTATE_GS(r->builder, vec->vs, verts_per_prim);
+ const int verts_per_prim =
+ u_vertices_per_prim(session->reduced_prim);
+ enum ilo_kernel_param param;
+
+ switch (verts_per_prim) {
+ case 1:
+ param = ILO_KERNEL_VS_GEN6_SO_POINT_OFFSET;
+ break;
+ case 2:
+ param = ILO_KERNEL_VS_GEN6_SO_LINE_OFFSET;
+ break;
+ default:
+ param = ILO_KERNEL_VS_GEN6_SO_TRI_OFFSET;
+ break;
+ }
+
+ cso = ilo_shader_get_kernel_cso(vec->vs);
+ kernel_offset = ilo_shader_get_kernel_offset(vec->vs) +
+ ilo_shader_get_kernel_param(vec->vs, param);
+
+ gen6_3DSTATE_GS(r->builder, &cso->vs_sol.sol, kernel_offset);
} else {
- gen6_disable_3DSTATE_GS(r->builder);
+ gen6_3DSTATE_GS(r->builder, &vec->disabled_gs, 0);
}
}
}
@@ -633,30 +617,8 @@ gen6_draw_clip(struct ilo_render *r,
struct ilo_render_draw_session *session)
{
/* 3DSTATE_CLIP */
- if (DIRTY(RASTERIZER) || DIRTY(FS) || DIRTY(VIEWPORT) || DIRTY(FB)) {
- bool enable_guardband = true;
- unsigned i;
-
- /*
- * Gen8+ has viewport extent test. Guard band test can be enabled on
- * prior Gens only when the viewport is larger than the framebuffer,
- * unless we emulate viewport extent test on them.
- */
- if (ilo_dev_gen(r->dev) < ILO_GEN(8)) {
- for (i = 0; i < vec->viewport.count; i++) {
- const struct ilo_viewport_cso *vp = &vec->viewport.cso[i];
-
- if (vp->min_x > 0.0f || vp->max_x < vec->fb.state.width ||
- vp->min_y > 0.0f || vp->max_y < vec->fb.state.height) {
- enable_guardband = false;
- break;
- }
- }
- }
-
- gen6_3DSTATE_CLIP(r->builder, vec->rasterizer,
- vec->fs, enable_guardband, 1);
- }
+ if (session->rs_delta.dirty & ILO_STATE_RASTER_3DSTATE_CLIP)
+ gen6_3DSTATE_CLIP(r->builder, &vec->rasterizer->rs);
}
static void
@@ -665,9 +627,9 @@ gen6_draw_sf(struct ilo_render *r,
struct ilo_render_draw_session *session)
{
/* 3DSTATE_SF */
- if (DIRTY(RASTERIZER) || DIRTY(FS) || DIRTY(FB)) {
- gen6_3DSTATE_SF(r->builder, vec->rasterizer, vec->fs,
- vec->fb.num_samples);
+ if ((session->rs_delta.dirty & ILO_STATE_RASTER_3DSTATE_SF) || DIRTY(FS)) {
+ const struct ilo_state_sbe *sbe = ilo_shader_get_kernel_sbe(vec->fs);
+ gen6_3DSTATE_SF(r->builder, &vec->rasterizer->rs, sbe);
}
}
@@ -700,17 +662,17 @@ gen6_draw_wm(struct ilo_render *r,
}
/* 3DSTATE_WM */
- if (DIRTY(FS) || DIRTY(BLEND) || DIRTY(DSA) ||
- DIRTY(RASTERIZER) || r->instruction_bo_changed) {
- const bool dual_blend = vec->blend->dual_blend;
- const bool cc_may_kill = (vec->dsa->dw_blend_alpha ||
- vec->blend->alpha_to_coverage);
+ if (DIRTY(FS) ||
+ (session->rs_delta.dirty & ILO_STATE_RASTER_3DSTATE_WM) ||
+ r->instruction_bo_changed) {
+ const union ilo_shader_cso *cso = ilo_shader_get_kernel_cso(vec->fs);
+ const uint32_t kernel_offset = ilo_shader_get_kernel_offset(vec->fs);
if (ilo_dev_gen(r->dev) == ILO_GEN(6) && r->hw_ctx_changed)
gen6_wa_pre_3dstate_wm_max_threads(r);
- gen6_3DSTATE_WM(r->builder, vec->fs,
- vec->rasterizer, dual_blend, cc_may_kill);
+ gen6_3DSTATE_WM(r->builder, &vec->rasterizer->rs,
+ &cso->ps, kernel_offset);
}
}
@@ -719,25 +681,23 @@ gen6_draw_wm_multisample(struct ilo_render *r,
const struct ilo_state_vector *vec,
struct ilo_render_draw_session *session)
{
- /* 3DSTATE_MULTISAMPLE and 3DSTATE_SAMPLE_MASK */
- if (DIRTY(SAMPLE_MASK) || DIRTY(FB)) {
- const uint32_t *pattern;
-
- pattern = (vec->fb.num_samples > 1) ?
- &r->sample_pattern_4x : &r->sample_pattern_1x;
+ /* 3DSTATE_MULTISAMPLE */
+ if (DIRTY(FB) || (session->rs_delta.dirty &
+ ILO_STATE_RASTER_3DSTATE_MULTISAMPLE)) {
+ const uint8_t sample_count = (vec->fb.num_samples > 1) ? 4 : 1;
if (ilo_dev_gen(r->dev) == ILO_GEN(6)) {
gen6_wa_pre_non_pipelined(r);
gen6_wa_pre_3dstate_multisample(r);
}
- gen6_3DSTATE_MULTISAMPLE(r->builder,
- vec->fb.num_samples, pattern,
- vec->rasterizer->state.half_pixel_center);
-
- gen6_3DSTATE_SAMPLE_MASK(r->builder,
- (vec->fb.num_samples > 1) ? vec->sample_mask : 0x1);
+ gen6_3DSTATE_MULTISAMPLE(r->builder, &vec->rasterizer->rs,
+ &r->sample_pattern, sample_count);
}
+
+ /* 3DSTATE_SAMPLE_MASK */
+ if (session->rs_delta.dirty & ILO_STATE_RASTER_3DSTATE_SAMPLE_MASK)
+ gen6_3DSTATE_SAMPLE_MASK(r->builder, &vec->rasterizer->rs);
}
static void
@@ -747,7 +707,7 @@ gen6_draw_wm_depth(struct ilo_render *r,
{
/* 3DSTATE_DEPTH_BUFFER and 3DSTATE_CLEAR_PARAMS */
if (DIRTY(FB) || r->batch_bo_changed) {
- const struct ilo_zs_surface *zs;
+ const struct ilo_state_zs *zs;
uint32_t clear_params;
if (vec->fb.state.zsbuf) {
@@ -772,7 +732,7 @@ gen6_draw_wm_depth(struct ilo_render *r,
gen6_wa_pre_depth(r);
}
- gen6_3DSTATE_DEPTH_BUFFER(r->builder, zs, false);
+ gen6_3DSTATE_DEPTH_BUFFER(r->builder, zs);
gen6_3DSTATE_HIER_DEPTH_BUFFER(r->builder, zs);
gen6_3DSTATE_STENCIL_BUFFER(r->builder, zs);
gen6_3DSTATE_CLEAR_PARAMS(r->builder, clear_params);
@@ -790,10 +750,8 @@ gen6_draw_wm_raster(struct ilo_render *r,
if (ilo_dev_gen(r->dev) == ILO_GEN(6))
gen6_wa_pre_non_pipelined(r);
- gen6_3DSTATE_POLY_STIPPLE_PATTERN(r->builder,
- &vec->poly_stipple);
-
- gen6_3DSTATE_POLY_STIPPLE_OFFSET(r->builder, 0, 0);
+ gen6_3DSTATE_POLY_STIPPLE_PATTERN(r->builder, &vec->poly_stipple);
+ gen6_3DSTATE_POLY_STIPPLE_OFFSET(r->builder, &vec->poly_stipple);
}
/* 3DSTATE_LINE_STIPPLE */
@@ -801,17 +759,16 @@ gen6_draw_wm_raster(struct ilo_render *r,
if (ilo_dev_gen(r->dev) == ILO_GEN(6))
gen6_wa_pre_non_pipelined(r);
- gen6_3DSTATE_LINE_STIPPLE(r->builder,
- vec->rasterizer->state.line_stipple_pattern,
- vec->rasterizer->state.line_stipple_factor + 1);
+ gen6_3DSTATE_LINE_STIPPLE(r->builder, &vec->line_stipple);
}
/* 3DSTATE_AA_LINE_PARAMETERS */
- if (DIRTY(RASTERIZER) && vec->rasterizer->state.line_smooth) {
+ if (session->rs_delta.dirty &
+ ILO_STATE_RASTER_3DSTATE_AA_LINE_PARAMETERS) {
if (ilo_dev_gen(r->dev) == ILO_GEN(6))
gen6_wa_pre_non_pipelined(r);
- gen6_3DSTATE_AA_LINE_PARAMETERS(r->builder);
+ gen6_3DSTATE_AA_LINE_PARAMETERS(r->builder, &vec->rasterizer->rs);
}
}
@@ -849,7 +806,7 @@ ilo_render_emit_draw_commands_gen6(struct ilo_render *render,
gen6_draw_sf_rect(render, vec, session);
gen6_draw_vf(render, vec, session);
- ilo_render_3dprimitive(render, vec->draw, &vec->ib);
+ ilo_render_3dprimitive(render, &vec->draw_info);
}
static void
@@ -860,40 +817,23 @@ gen6_rectlist_vs_to_sf(struct ilo_render *r,
gen6_wa_post_3dstate_constant_vs(r);
gen6_wa_pre_3dstate_vs_toggle(r);
- gen6_disable_3DSTATE_VS(r->builder);
+ gen6_3DSTATE_VS(r->builder, &blitter->vs, 0);
gen6_3DSTATE_CONSTANT_GS(r->builder, NULL, NULL, 0);
- gen6_disable_3DSTATE_GS(r->builder);
+ gen6_3DSTATE_GS(r->builder, &blitter->gs, 0);
- gen6_disable_3DSTATE_CLIP(r->builder);
- gen6_3DSTATE_SF(r->builder, NULL, NULL, blitter->fb.num_samples);
+ gen6_3DSTATE_CLIP(r->builder, &blitter->fb.rs);
+ gen6_3DSTATE_SF(r->builder, &blitter->fb.rs, &blitter->sbe);
}
static void
gen6_rectlist_wm(struct ilo_render *r,
const struct ilo_blitter *blitter)
{
- uint32_t hiz_op;
-
- switch (blitter->op) {
- case ILO_BLITTER_RECTLIST_CLEAR_ZS:
- hiz_op = GEN6_WM_DW4_DEPTH_CLEAR;
- break;
- case ILO_BLITTER_RECTLIST_RESOLVE_Z:
- hiz_op = GEN6_WM_DW4_DEPTH_RESOLVE;
- break;
- case ILO_BLITTER_RECTLIST_RESOLVE_HIZ:
- hiz_op = GEN6_WM_DW4_HIZ_RESOLVE;
- break;
- default:
- hiz_op = 0;
- break;
- }
-
gen6_3DSTATE_CONSTANT_PS(r->builder, NULL, NULL, 0);
gen6_wa_pre_3dstate_wm_max_threads(r);
- gen6_hiz_3DSTATE_WM(r->builder, hiz_op);
+ gen6_3DSTATE_WM(r->builder, &blitter->fb.rs, &blitter->ps, 0);
}
static void
@@ -903,10 +843,8 @@ gen6_rectlist_wm_depth(struct ilo_render *r,
gen6_wa_pre_depth(r);
if (blitter->uses & (ILO_BLITTER_USE_FB_DEPTH |
- ILO_BLITTER_USE_FB_STENCIL)) {
- gen6_3DSTATE_DEPTH_BUFFER(r->builder,
- &blitter->fb.dst.u.zs, true);
- }
+ ILO_BLITTER_USE_FB_STENCIL))
+ gen6_3DSTATE_DEPTH_BUFFER(r->builder, &blitter->fb.dst.u.zs);
if (blitter->uses & ILO_BLITTER_USE_FB_DEPTH) {
gen6_3DSTATE_HIER_DEPTH_BUFFER(r->builder,
@@ -926,16 +864,12 @@ static void
gen6_rectlist_wm_multisample(struct ilo_render *r,
const struct ilo_blitter *blitter)
{
- const uint32_t *pattern = (blitter->fb.num_samples > 1) ?
- &r->sample_pattern_4x : &r->sample_pattern_1x;
+ const uint8_t sample_count = (blitter->fb.num_samples > 1) ? 4 : 1;
gen6_wa_pre_3dstate_multisample(r);
- gen6_3DSTATE_MULTISAMPLE(r->builder, blitter->fb.num_samples,
- pattern, true);
-
- gen6_3DSTATE_SAMPLE_MASK(r->builder,
- (1 << blitter->fb.num_samples) - 1);
+ gen6_3DSTATE_MULTISAMPLE(r->builder, &blitter->fb.rs, &r->sample_pattern, sample_count);
+ gen6_3DSTATE_SAMPLE_MASK(r->builder, &blitter->fb.rs);
}
int
@@ -964,11 +898,9 @@ ilo_render_emit_rectlist_commands_gen6(struct ilo_render *r,
session->vb_start, session->vb_end,
sizeof(blitter->vertices[0]));
- gen6_3DSTATE_VERTEX_ELEMENTS(r->builder, &blitter->ve);
+ gen6_3DSTATE_VERTEX_ELEMENTS(r->builder, &blitter->vf);
- gen6_3DSTATE_URB(r->builder, r->dev->urb_size, 0,
- (blitter->ve.count + blitter->ve.prepend_nosrc_cso) * 4 * sizeof(float),
- 0);
+ gen6_3DSTATE_URB(r->builder, &blitter->urb);
if (r->state.gs.active) {
gen6_wa_post_3dstate_urb_no_gs(r);
@@ -994,7 +926,7 @@ ilo_render_emit_rectlist_commands_gen6(struct ilo_render *r,
gen6_3DSTATE_DRAWING_RECTANGLE(r->builder, 0, 0,
blitter->fb.width, blitter->fb.height);
- ilo_render_3dprimitive(r, &blitter->draw, NULL);
+ ilo_render_3dprimitive(r, &blitter->draw_info);
}
int
diff --git a/src/gallium/drivers/ilo/ilo_render_gen7.c b/src/gallium/drivers/ilo/ilo_render_gen7.c
index 07fe7c83536..6623a8bcb43 100644
--- a/src/gallium/drivers/ilo/ilo_render_gen7.c
+++ b/src/gallium/drivers/ilo/ilo_render_gen7.c
@@ -28,9 +28,9 @@
#include "genhw/genhw.h"
#include "core/ilo_builder_3d.h"
#include "core/ilo_builder_render.h"
-#include "util/u_dual_blend.h"
#include "ilo_blitter.h"
+#include "ilo_resource.h"
#include "ilo_shader.h"
#include "ilo_state.h"
#include "ilo_render_gen.h"
@@ -201,40 +201,17 @@ gen7_draw_common_urb(struct ilo_render *r,
struct ilo_render_draw_session *session)
{
/* 3DSTATE_URB_{VS,GS,HS,DS} */
- if (DIRTY(VE) || DIRTY(VS)) {
- /* the first 16KB are reserved for VS and PS PCBs */
- const int offset =
- (ilo_dev_gen(r->dev) >= ILO_GEN(8)) ||
- (ilo_dev_gen(r->dev) == ILO_GEN(7.5) && r->dev->gt == 3) ?
- 32768 : 16384;
- int vs_entry_size, vs_total_size;
-
- vs_entry_size = (vec->vs) ?
- ilo_shader_get_kernel_param(vec->vs, ILO_KERNEL_OUTPUT_COUNT) : 0;
-
- /*
- * From the Ivy Bridge PRM, volume 2 part 1, page 35:
- *
- * "Programming Restriction: As the VS URB entry serves as both the
- * per-vertex input and output of the VS shader, the VS URB
- * Allocation Size must be sized to the maximum of the vertex input
- * and output structures."
- */
- if (vs_entry_size < vec->ve->count + vec->ve->prepend_nosrc_cso)
- vs_entry_size = vec->ve->count + vec->ve->prepend_nosrc_cso;
-
- vs_entry_size *= sizeof(float) * 4;
- vs_total_size = r->dev->urb_size - offset;
-
+ if (session->urb_delta.dirty & (ILO_STATE_URB_3DSTATE_URB_VS |
+ ILO_STATE_URB_3DSTATE_URB_HS |
+ ILO_STATE_URB_3DSTATE_URB_DS |
+ ILO_STATE_URB_3DSTATE_URB_GS)) {
if (ilo_dev_gen(r->dev) == ILO_GEN(7))
gen7_wa_pre_vs(r);
- gen7_3DSTATE_URB_VS(r->builder,
- offset, vs_total_size, vs_entry_size);
-
- gen7_3DSTATE_URB_GS(r->builder, offset, 0, 0);
- gen7_3DSTATE_URB_HS(r->builder, offset, 0, 0);
- gen7_3DSTATE_URB_DS(r->builder, offset, 0, 0);
+ gen7_3DSTATE_URB_VS(r->builder, &vec->urb);
+ gen7_3DSTATE_URB_GS(r->builder, &vec->urb);
+ gen7_3DSTATE_URB_HS(r->builder, &vec->urb);
+ gen7_3DSTATE_URB_DS(r->builder, &vec->urb);
}
}
@@ -244,22 +221,15 @@ gen7_draw_common_pcb_alloc(struct ilo_render *r,
struct ilo_render_draw_session *session)
{
/* 3DSTATE_PUSH_CONSTANT_ALLOC_{VS,PS} */
- if (r->hw_ctx_changed) {
- /*
- * Push constant buffers are only allowed to take up at most the first
- * 16KB of the URB. Split the space evenly for VS and FS.
- */
- const int max_size =
- (ilo_dev_gen(r->dev) >= ILO_GEN(8)) ||
- (ilo_dev_gen(r->dev) == ILO_GEN(7.5) && r->dev->gt == 3) ?
- 32768 : 16384;
- const int size = max_size / 2;
- int offset = 0;
-
- gen7_3DSTATE_PUSH_CONSTANT_ALLOC_VS(r->builder, offset, size);
- offset += size;
-
- gen7_3DSTATE_PUSH_CONSTANT_ALLOC_PS(r->builder, offset, size);
+ if (session->urb_delta.dirty &
+ (ILO_STATE_URB_3DSTATE_PUSH_CONSTANT_ALLOC_VS |
+ ILO_STATE_URB_3DSTATE_PUSH_CONSTANT_ALLOC_HS |
+ ILO_STATE_URB_3DSTATE_PUSH_CONSTANT_ALLOC_DS |
+ ILO_STATE_URB_3DSTATE_PUSH_CONSTANT_ALLOC_GS |
+ ILO_STATE_URB_3DSTATE_PUSH_CONSTANT_ALLOC_PS)) {
+ gen7_3DSTATE_PUSH_CONSTANT_ALLOC_VS(r->builder, &vec->urb);
+ gen7_3DSTATE_PUSH_CONSTANT_ALLOC_GS(r->builder, &vec->urb);
+ gen7_3DSTATE_PUSH_CONSTANT_ALLOC_PS(r->builder, &vec->urb);
if (ilo_dev_gen(r->dev) == ILO_GEN(7))
gen7_wa_post_3dstate_push_constant_alloc_ps(r);
@@ -344,14 +314,14 @@ gen7_draw_vs(struct ilo_render *r,
}
/* 3DSTATE_VS */
- if (ilo_dev_gen(r->dev) >= ILO_GEN(8)) {
- if (emit_3dstate_vs || DIRTY(RASTERIZER)) {
- gen8_3DSTATE_VS(r->builder, vec->vs,
- vec->rasterizer->state.clip_plane_enable);
- }
- } else {
- if (emit_3dstate_vs)
- gen6_3DSTATE_VS(r->builder, vec->vs);
+ if (emit_3dstate_vs) {
+ const union ilo_shader_cso *cso = ilo_shader_get_kernel_cso(vec->vs);
+ const uint32_t kernel_offset = ilo_shader_get_kernel_offset(vec->vs);
+
+ if (ilo_dev_gen(r->dev) >= ILO_GEN(8))
+ gen8_3DSTATE_VS(r->builder, &cso->vs, kernel_offset);
+ else
+ gen6_3DSTATE_VS(r->builder, &cso->vs, kernel_offset);
}
}
@@ -362,8 +332,15 @@ gen7_draw_hs(struct ilo_render *r,
{
/* 3DSTATE_CONSTANT_HS and 3DSTATE_HS */
if (r->hw_ctx_changed) {
+ const struct ilo_state_hs *hs = &vec->disabled_hs;
+ const uint32_t kernel_offset = 0;
+
gen7_3DSTATE_CONSTANT_HS(r->builder, 0, 0, 0);
- gen7_disable_3DSTATE_HS(r->builder);
+
+ if (ilo_dev_gen(r->dev) >= ILO_GEN(8))
+ gen8_3DSTATE_HS(r->builder, hs, kernel_offset);
+ else
+ gen7_3DSTATE_HS(r->builder, hs, kernel_offset);
}
/* 3DSTATE_BINDING_TABLE_POINTERS_HS */
@@ -377,8 +354,10 @@ gen7_draw_te(struct ilo_render *r,
struct ilo_render_draw_session *session)
{
/* 3DSTATE_TE */
- if (r->hw_ctx_changed)
- gen7_3DSTATE_TE(r->builder);
+ if (r->hw_ctx_changed) {
+ const struct ilo_state_ds *ds = &vec->disabled_ds;
+ gen7_3DSTATE_TE(r->builder, ds);
+ }
}
void
@@ -388,8 +367,15 @@ gen7_draw_ds(struct ilo_render *r,
{
/* 3DSTATE_CONSTANT_DS and 3DSTATE_DS */
if (r->hw_ctx_changed) {
+ const struct ilo_state_ds *ds = &vec->disabled_ds;
+ const uint32_t kernel_offset = 0;
+
gen7_3DSTATE_CONSTANT_DS(r->builder, 0, 0, 0);
- gen7_disable_3DSTATE_DS(r->builder);
+
+ if (ilo_dev_gen(r->dev) >= ILO_GEN(8))
+ gen8_3DSTATE_DS(r->builder, ds, kernel_offset);
+ else
+ gen7_3DSTATE_DS(r->builder, ds, kernel_offset);
}
/* 3DSTATE_BINDING_TABLE_POINTERS_DS */
@@ -405,8 +391,15 @@ gen7_draw_gs(struct ilo_render *r,
{
/* 3DSTATE_CONSTANT_GS and 3DSTATE_GS */
if (r->hw_ctx_changed) {
+ const struct ilo_state_gs *gs = &vec->disabled_gs;
+ const uint32_t kernel_offset = 0;
+
gen7_3DSTATE_CONSTANT_GS(r->builder, 0, 0, 0);
- gen7_disable_3DSTATE_GS(r->builder);
+
+ if (ilo_dev_gen(r->dev) >= ILO_GEN(8))
+ gen8_3DSTATE_GS(r->builder, gs, kernel_offset);
+ else
+ gen7_3DSTATE_GS(r->builder, gs, kernel_offset);
}
/* 3DSTATE_BINDING_TABLE_POINTERS_GS */
@@ -421,7 +414,7 @@ gen7_draw_sol(struct ilo_render *r,
const struct ilo_state_vector *vec,
struct ilo_render_draw_session *session)
{
- const struct pipe_stream_output_info *so_info;
+ const struct ilo_state_sol *sol;
const struct ilo_shader_state *shader;
bool dirty_sh = false;
@@ -434,41 +427,54 @@ gen7_draw_sol(struct ilo_render *r,
dirty_sh = DIRTY(VS);
}
- so_info = ilo_shader_get_kernel_so_info(shader);
+ sol = ilo_shader_get_kernel_sol(shader);
/* 3DSTATE_SO_BUFFER */
if ((DIRTY(SO) || dirty_sh || r->batch_bo_changed) &&
vec->so.enabled) {
int i;
- for (i = 0; i < vec->so.count; i++) {
- const int stride = so_info->stride[i] * 4; /* in bytes */
-
- gen7_3DSTATE_SO_BUFFER(r->builder, i, stride, vec->so.states[i]);
+ for (i = 0; i < ILO_STATE_SOL_MAX_BUFFER_COUNT; i++) {
+ const struct pipe_stream_output_target *target =
+ (i < vec->so.count && vec->so.states[i]) ?
+ vec->so.states[i] : NULL;
+ const struct ilo_state_sol_buffer *sb = (target) ?
+ &((const struct ilo_stream_output_target *) target)->sb :
+ &vec->so.dummy_sb;
+
+ if (ilo_dev_gen(r->dev) >= ILO_GEN(8))
+ gen8_3DSTATE_SO_BUFFER(r->builder, sol, sb, i);
+ else
+ gen7_3DSTATE_SO_BUFFER(r->builder, sol, sb, i);
}
-
- for (; i < 4; i++)
- gen7_disable_3DSTATE_SO_BUFFER(r->builder, i);
}
/* 3DSTATE_SO_DECL_LIST */
if (dirty_sh && vec->so.enabled)
- gen7_3DSTATE_SO_DECL_LIST(r->builder, so_info);
-
- /* 3DSTATE_STREAMOUT */
- if (DIRTY(SO) || DIRTY(RASTERIZER) || dirty_sh) {
- const int output_count = ilo_shader_get_kernel_param(shader,
- ILO_KERNEL_OUTPUT_COUNT);
- int buf_strides[4] = { 0, 0, 0, 0 };
- int i;
+ gen7_3DSTATE_SO_DECL_LIST(r->builder, sol);
- for (i = 0; i < vec->so.count; i++)
- buf_strides[i] = so_info->stride[i] * 4;
+ /*
+ * From the Ivy Bridge PRM, volume 2 part 1, page 196-197:
+ *
+ * "Anytime the SOL unit MMIO registers or non-pipeline state are
+ * written, the SOL unit needs to receive a pipeline state update with
+ * SOL unit dirty state for information programmed in MMIO/NP to get
+ * loaded into the SOL unit.
+ *
+ * The SOL unit incorrectly double buffers MMIO/NP registers and only
+ * moves them into the design for usage when control topology is
+ * received with the SOL unit dirty state.
+ *
+ * If the state does not change, need to resend the same state.
+ *
+ * Because of corruption, software must flush the whole fixed function
+ * pipeline when 3DSTATE_STREAMOUT changes state."
+ *
+ * The first and fourth paragraphs are gone on Gen7.5+.
+ */
- gen7_3DSTATE_STREAMOUT(r->builder, 0,
- vec->rasterizer->state.rasterizer_discard,
- output_count, buf_strides);
- }
+ /* 3DSTATE_STREAMOUT */
+ gen7_3DSTATE_STREAMOUT(r->builder, sol);
}
static void
@@ -477,22 +483,17 @@ gen7_draw_sf(struct ilo_render *r,
struct ilo_render_draw_session *session)
{
/* 3DSTATE_SBE */
- if (DIRTY(RASTERIZER) || DIRTY(FS)) {
- gen7_3DSTATE_SBE(r->builder, vec->fs, (vec->rasterizer) ?
- vec->rasterizer->state.sprite_coord_mode : 0);
+ if (DIRTY(FS)) {
+ const struct ilo_state_sbe *sbe = ilo_shader_get_kernel_sbe(vec->fs);
+ gen7_3DSTATE_SBE(r->builder, sbe);
}
/* 3DSTATE_SF */
- if (DIRTY(RASTERIZER) || DIRTY(FB)) {
- struct pipe_surface *zs = vec->fb.state.zsbuf;
-
+ if (session->rs_delta.dirty & ILO_STATE_RASTER_3DSTATE_SF) {
if (ilo_dev_gen(r->dev) == ILO_GEN(7))
gen7_wa_pre_3dstate_sf_depth_bias(r);
- gen7_3DSTATE_SF(r->builder,
- (vec->rasterizer) ? &vec->rasterizer->sf : NULL,
- (zs) ? zs->format : PIPE_FORMAT_NONE,
- vec->fb.num_samples);
+ gen7_3DSTATE_SF(r->builder, &vec->rasterizer->rs);
}
}
@@ -501,13 +502,12 @@ gen7_draw_wm(struct ilo_render *r,
const struct ilo_state_vector *vec,
struct ilo_render_draw_session *session)
{
- /* 3DSTATE_WM */
- if (DIRTY(FS) || DIRTY(BLEND) || DIRTY(DSA) || DIRTY(RASTERIZER)) {
- const bool cc_may_kill = (vec->dsa->dw_blend_alpha ||
- vec->blend->alpha_to_coverage);
+ const union ilo_shader_cso *cso = ilo_shader_get_kernel_cso(vec->fs);
+ const uint32_t kernel_offset = ilo_shader_get_kernel_offset(vec->fs);
- gen7_3DSTATE_WM(r->builder, vec->fs, vec->rasterizer, cc_may_kill);
- }
+ /* 3DSTATE_WM */
+ if (DIRTY(FS) || (session->rs_delta.dirty & ILO_STATE_RASTER_3DSTATE_WM))
+ gen7_3DSTATE_WM(r->builder, &vec->rasterizer->rs, &cso->ps);
/* 3DSTATE_BINDING_TABLE_POINTERS_PS */
if (session->binding_table_fs_changed) {
@@ -530,13 +530,11 @@ gen7_draw_wm(struct ilo_render *r,
}
/* 3DSTATE_PS */
- if (DIRTY(FS) || DIRTY(BLEND) || r->instruction_bo_changed) {
- const bool dual_blend = vec->blend->dual_blend;
-
+ if (DIRTY(FS) || r->instruction_bo_changed) {
if (r->hw_ctx_changed)
gen7_wa_pre_3dstate_ps_max_threads(r);
- gen7_3DSTATE_PS(r->builder, vec->fs, dual_blend);
+ gen7_3DSTATE_PS(r->builder, &cso->ps, kernel_offset);
}
/* 3DSTATE_SCISSOR_STATE_POINTERS */
@@ -569,7 +567,7 @@ gen7_draw_wm(struct ilo_render *r,
/* 3DSTATE_DEPTH_BUFFER and 3DSTATE_CLEAR_PARAMS */
if (DIRTY(FB) || r->batch_bo_changed) {
- const struct ilo_zs_surface *zs;
+ const struct ilo_state_zs *zs;
uint32_t clear_params;
if (vec->fb.state.zsbuf) {
@@ -588,7 +586,7 @@ gen7_draw_wm(struct ilo_render *r,
clear_params = 0;
}
- gen6_3DSTATE_DEPTH_BUFFER(r->builder, zs, false);
+ gen6_3DSTATE_DEPTH_BUFFER(r->builder, zs);
gen6_3DSTATE_HIER_DEPTH_BUFFER(r->builder, zs);
gen6_3DSTATE_STENCIL_BUFFER(r->builder, zs);
gen7_3DSTATE_CLEAR_PARAMS(r->builder, clear_params);
@@ -600,24 +598,21 @@ gen7_draw_wm_multisample(struct ilo_render *r,
const struct ilo_state_vector *vec,
struct ilo_render_draw_session *session)
{
- /* 3DSTATE_MULTISAMPLE and 3DSTATE_SAMPLE_MASK */
- if (DIRTY(SAMPLE_MASK) || DIRTY(FB)) {
- const uint32_t *pattern;
+ /* 3DSTATE_MULTISAMPLE */
+ if (DIRTY(FB) || (session->rs_delta.dirty &
+ ILO_STATE_RASTER_3DSTATE_MULTISAMPLE)) {
+ const uint8_t sample_count = (vec->fb.num_samples > 4) ? 8 :
+ (vec->fb.num_samples > 1) ? 4 : 1;
gen7_wa_pre_3dstate_multisample(r);
- pattern = (vec->fb.num_samples > 4) ? r->sample_pattern_8x :
- (vec->fb.num_samples > 1) ? &r->sample_pattern_4x :
- &r->sample_pattern_1x;
-
- gen6_3DSTATE_MULTISAMPLE(r->builder,
- vec->fb.num_samples, pattern,
- vec->rasterizer->state.half_pixel_center);
-
- gen7_3DSTATE_SAMPLE_MASK(r->builder,
- (vec->fb.num_samples > 1) ? vec->sample_mask : 0x1,
- vec->fb.num_samples);
+ gen6_3DSTATE_MULTISAMPLE(r->builder, &vec->rasterizer->rs,
+ &r->sample_pattern, sample_count);
}
+
+ /* 3DSTATE_SAMPLE_MASK */
+ if (session->rs_delta.dirty & ILO_STATE_RASTER_3DSTATE_SAMPLE_MASK)
+ gen6_3DSTATE_SAMPLE_MASK(r->builder, &vec->rasterizer->rs);
}
void
@@ -654,28 +649,15 @@ ilo_render_emit_draw_commands_gen7(struct ilo_render *render,
gen6_draw_sf_rect(render, vec, session);
gen6_draw_vf(render, vec, session);
- ilo_render_3dprimitive(render, vec->draw, &vec->ib);
+ ilo_render_3dprimitive(render, &vec->draw_info);
}
static void
gen7_rectlist_pcb_alloc(struct ilo_render *r,
const struct ilo_blitter *blitter)
{
- /*
- * Push constant buffers are only allowed to take up at most the first
- * 16KB of the URB. Split the space evenly for VS and FS.
- */
- const int max_size =
- (ilo_dev_gen(r->dev) >= ILO_GEN(8)) ||
- (ilo_dev_gen(r->dev) == ILO_GEN(7.5) && r->dev->gt == 3) ?
- 32768 : 16384;
- const int size = max_size / 2;
- int offset = 0;
-
- gen7_3DSTATE_PUSH_CONSTANT_ALLOC_VS(r->builder, offset, size);
- offset += size;
-
- gen7_3DSTATE_PUSH_CONSTANT_ALLOC_PS(r->builder, offset, size);
+ gen7_3DSTATE_PUSH_CONSTANT_ALLOC_VS(r->builder, &blitter->urb);
+ gen7_3DSTATE_PUSH_CONSTANT_ALLOC_PS(r->builder, &blitter->urb);
if (ilo_dev_gen(r->dev) == ILO_GEN(7))
gen7_wa_post_3dstate_push_constant_alloc_ps(r);
@@ -685,19 +667,10 @@ static void
gen7_rectlist_urb(struct ilo_render *r,
const struct ilo_blitter *blitter)
{
- /* the first 16KB are reserved for VS and PS PCBs */
- const int offset =
- (ilo_dev_gen(r->dev) >= ILO_GEN(8)) ||
- (ilo_dev_gen(r->dev) == ILO_GEN(7.5) && r->dev->gt == 3) ?
- 32768 : 16384;
-
- gen7_3DSTATE_URB_VS(r->builder, offset, r->dev->urb_size - offset,
- (blitter->ve.count + blitter->ve.prepend_nosrc_cso) *
- 4 * sizeof(float));
-
- gen7_3DSTATE_URB_GS(r->builder, offset, 0, 0);
- gen7_3DSTATE_URB_HS(r->builder, offset, 0, 0);
- gen7_3DSTATE_URB_DS(r->builder, offset, 0, 0);
+ gen7_3DSTATE_URB_VS(r->builder, &blitter->urb);
+ gen7_3DSTATE_URB_GS(r->builder, &blitter->urb);
+ gen7_3DSTATE_URB_HS(r->builder, &blitter->urb);
+ gen7_3DSTATE_URB_DS(r->builder, &blitter->urb);
}
static void
@@ -705,58 +678,40 @@ gen7_rectlist_vs_to_sf(struct ilo_render *r,
const struct ilo_blitter *blitter)
{
gen7_3DSTATE_CONSTANT_VS(r->builder, NULL, NULL, 0);
- gen6_disable_3DSTATE_VS(r->builder);
+ gen6_3DSTATE_VS(r->builder, &blitter->vs, 0);
gen7_3DSTATE_CONSTANT_HS(r->builder, NULL, NULL, 0);
- gen7_disable_3DSTATE_HS(r->builder);
+ gen7_3DSTATE_HS(r->builder, &blitter->hs, 0);
- gen7_3DSTATE_TE(r->builder);
+ gen7_3DSTATE_TE(r->builder, &blitter->ds);
gen7_3DSTATE_CONSTANT_DS(r->builder, NULL, NULL, 0);
- gen7_disable_3DSTATE_DS(r->builder);
+ gen7_3DSTATE_DS(r->builder, &blitter->ds, 0);
gen7_3DSTATE_CONSTANT_GS(r->builder, NULL, NULL, 0);
- gen7_disable_3DSTATE_GS(r->builder);
+ gen7_3DSTATE_GS(r->builder, &blitter->gs, 0);
- gen7_3DSTATE_STREAMOUT(r->builder, 0, false, 0x0, 0);
+ gen7_3DSTATE_STREAMOUT(r->builder, &blitter->sol);
- gen6_disable_3DSTATE_CLIP(r->builder);
+ gen6_3DSTATE_CLIP(r->builder, &blitter->fb.rs);
if (ilo_dev_gen(r->dev) == ILO_GEN(7))
gen7_wa_pre_3dstate_sf_depth_bias(r);
- gen7_3DSTATE_SF(r->builder, NULL, blitter->fb.dst.base.format,
- blitter->fb.num_samples);
- gen7_3DSTATE_SBE(r->builder, NULL, 0);
+ gen7_3DSTATE_SF(r->builder, &blitter->fb.rs);
+ gen7_3DSTATE_SBE(r->builder, &blitter->sbe);
}
static void
gen7_rectlist_wm(struct ilo_render *r,
const struct ilo_blitter *blitter)
{
- uint32_t hiz_op;
-
- switch (blitter->op) {
- case ILO_BLITTER_RECTLIST_CLEAR_ZS:
- hiz_op = GEN7_WM_DW1_DEPTH_CLEAR;
- break;
- case ILO_BLITTER_RECTLIST_RESOLVE_Z:
- hiz_op = GEN7_WM_DW1_DEPTH_RESOLVE;
- break;
- case ILO_BLITTER_RECTLIST_RESOLVE_HIZ:
- hiz_op = GEN7_WM_DW1_HIZ_RESOLVE;
- break;
- default:
- hiz_op = 0;
- break;
- }
-
- gen7_hiz_3DSTATE_WM(r->builder, hiz_op);
+ gen7_3DSTATE_WM(r->builder, &blitter->fb.rs, &blitter->ps);
gen7_3DSTATE_CONSTANT_PS(r->builder, NULL, NULL, 0);
gen7_wa_pre_3dstate_ps_max_threads(r);
- gen7_disable_3DSTATE_PS(r->builder);
+ gen7_3DSTATE_PS(r->builder, &blitter->ps, 0);
}
static void
@@ -766,10 +721,8 @@ gen7_rectlist_wm_depth(struct ilo_render *r,
gen7_wa_pre_depth(r);
if (blitter->uses & (ILO_BLITTER_USE_FB_DEPTH |
- ILO_BLITTER_USE_FB_STENCIL)) {
- gen6_3DSTATE_DEPTH_BUFFER(r->builder,
- &blitter->fb.dst.u.zs, true);
- }
+ ILO_BLITTER_USE_FB_STENCIL))
+ gen6_3DSTATE_DEPTH_BUFFER(r->builder, &blitter->fb.dst.u.zs);
if (blitter->uses & ILO_BLITTER_USE_FB_DEPTH) {
gen6_3DSTATE_HIER_DEPTH_BUFFER(r->builder,
@@ -789,18 +742,15 @@ static void
gen7_rectlist_wm_multisample(struct ilo_render *r,
const struct ilo_blitter *blitter)
{
- const uint32_t *pattern =
- (blitter->fb.num_samples > 4) ? r->sample_pattern_8x :
- (blitter->fb.num_samples > 1) ? &r->sample_pattern_4x :
- &r->sample_pattern_1x;
+ const uint8_t sample_count = (blitter->fb.num_samples > 4) ? 8 :
+ (blitter->fb.num_samples > 1) ? 4 : 1;
gen7_wa_pre_3dstate_multisample(r);
- gen6_3DSTATE_MULTISAMPLE(r->builder, blitter->fb.num_samples,
- pattern, true);
+ gen6_3DSTATE_MULTISAMPLE(r->builder, &blitter->fb.rs,
+ &r->sample_pattern, sample_count);
- gen7_3DSTATE_SAMPLE_MASK(r->builder,
- (1 << blitter->fb.num_samples) - 1, blitter->fb.num_samples);
+ gen6_3DSTATE_SAMPLE_MASK(r->builder, &blitter->fb.rs);
}
void
@@ -818,7 +768,7 @@ ilo_render_emit_rectlist_commands_gen7(struct ilo_render *r,
session->vb_start, session->vb_end,
sizeof(blitter->vertices[0]));
- gen6_3DSTATE_VERTEX_ELEMENTS(r->builder, &blitter->ve);
+ gen6_3DSTATE_VERTEX_ELEMENTS(r->builder, &blitter->vf);
gen7_rectlist_pcb_alloc(r, blitter);
@@ -854,7 +804,7 @@ ilo_render_emit_rectlist_commands_gen7(struct ilo_render *r,
if (ilo_dev_gen(r->dev) == ILO_GEN(7))
gen7_wa_post_ps_and_later(r);
- ilo_render_3dprimitive(r, &blitter->draw, NULL);
+ ilo_render_3dprimitive(r, &blitter->draw_info);
}
int
diff --git a/src/gallium/drivers/ilo/ilo_render_gen8.c b/src/gallium/drivers/ilo/ilo_render_gen8.c
index 715b93611f1..65494b4058a 100644
--- a/src/gallium/drivers/ilo/ilo_render_gen8.c
+++ b/src/gallium/drivers/ilo/ilo_render_gen8.c
@@ -28,9 +28,9 @@
#include "genhw/genhw.h"
#include "core/ilo_builder_3d.h"
#include "core/ilo_builder_render.h"
-#include "util/u_dual_blend.h"
#include "ilo_blitter.h"
+#include "ilo_resource.h"
#include "ilo_shader.h"
#include "ilo_state.h"
#include "ilo_render_gen.h"
@@ -66,26 +66,20 @@ gen8_draw_sf(struct ilo_render *r,
struct ilo_render_draw_session *session)
{
/* 3DSTATE_RASTER */
- if (DIRTY(RASTERIZER)) {
- gen8_3DSTATE_RASTER(r->builder, (vec->rasterizer) ?
- &vec->rasterizer->sf : NULL);
- }
+ if (session->rs_delta.dirty & ILO_STATE_RASTER_3DSTATE_RASTER)
+ gen8_3DSTATE_RASTER(r->builder, &vec->rasterizer->rs);
- /* 3DSTATE_SBE */
- if (DIRTY(RASTERIZER) || DIRTY(FS)) {
- gen8_3DSTATE_SBE(r->builder, vec->fs, (vec->rasterizer) ?
- vec->rasterizer->state.sprite_coord_mode : 0);
- }
+ /* 3DSTATE_SBE and 3DSTATE_SBE_SWIZ */
+ if (DIRTY(FS)) {
+ const struct ilo_state_sbe *sbe = ilo_shader_get_kernel_sbe(vec->fs);
- /* 3DSTATE_SBE_SWIZ */
- if (DIRTY(FS))
- gen8_3DSTATE_SBE_SWIZ(r->builder, vec->fs);
+ gen8_3DSTATE_SBE(r->builder, sbe);
+ gen8_3DSTATE_SBE_SWIZ(r->builder, sbe);
+ }
/* 3DSTATE_SF */
- if (DIRTY(RASTERIZER)) {
- gen8_3DSTATE_SF(r->builder, (vec->rasterizer) ?
- &vec->rasterizer->sf : NULL);
- }
+ if (session->rs_delta.dirty & ILO_STATE_RASTER_3DSTATE_SF)
+ gen7_3DSTATE_SF(r->builder, &vec->rasterizer->rs);
}
static void
@@ -93,12 +87,15 @@ gen8_draw_wm(struct ilo_render *r,
const struct ilo_state_vector *vec,
struct ilo_render_draw_session *session)
{
+ const union ilo_shader_cso *cso = ilo_shader_get_kernel_cso(vec->fs);
+ const uint32_t kernel_offset = ilo_shader_get_kernel_offset(vec->fs);
+
/* 3DSTATE_WM */
- if (DIRTY(FS) || DIRTY(RASTERIZER))
- gen8_3DSTATE_WM(r->builder, vec->fs, vec->rasterizer);
+ if (session->rs_delta.dirty & ILO_STATE_RASTER_3DSTATE_WM)
+ gen8_3DSTATE_WM(r->builder, &vec->rasterizer->rs);
- if (DIRTY(DSA))
- gen8_3DSTATE_WM_DEPTH_STENCIL(r->builder, vec->dsa);
+ if (session->cc_delta.dirty & ILO_STATE_CC_3DSTATE_WM_DEPTH_STENCIL)
+ gen8_3DSTATE_WM_DEPTH_STENCIL(r->builder, &vec->blend->cc);
/* 3DSTATE_WM_HZ_OP and 3DSTATE_WM_CHROMAKEY */
if (r->hw_ctx_changed) {
@@ -128,18 +125,15 @@ gen8_draw_wm(struct ilo_render *r,
/* 3DSTATE_PS */
if (DIRTY(FS) || r->instruction_bo_changed)
- gen8_3DSTATE_PS(r->builder, vec->fs);
+ gen8_3DSTATE_PS(r->builder, &cso->ps, kernel_offset);
/* 3DSTATE_PS_EXTRA */
- if (DIRTY(FS) || DIRTY(DSA) || DIRTY(BLEND)) {
- const bool cc_may_kill = (vec->dsa->dw_blend_alpha ||
- vec->blend->alpha_to_coverage);
- gen8_3DSTATE_PS_EXTRA(r->builder, vec->fs, cc_may_kill, false);
- }
+ if (DIRTY(FS))
+ gen8_3DSTATE_PS_EXTRA(r->builder, &cso->ps);
/* 3DSTATE_PS_BLEND */
- if (DIRTY(BLEND) || DIRTY(FB) || DIRTY(DSA))
- gen8_3DSTATE_PS_BLEND(r->builder, vec->blend, &vec->fb, vec->dsa);
+ if (session->cc_delta.dirty & ILO_STATE_CC_3DSTATE_PS_BLEND)
+ gen8_3DSTATE_PS_BLEND(r->builder, &vec->blend->cc);
/* 3DSTATE_SCISSOR_STATE_POINTERS */
if (session->scissor_changed) {
@@ -149,7 +143,7 @@ gen8_draw_wm(struct ilo_render *r,
/* 3DSTATE_DEPTH_BUFFER and 3DSTATE_CLEAR_PARAMS */
if (DIRTY(FB) || r->batch_bo_changed) {
- const struct ilo_zs_surface *zs;
+ const struct ilo_state_zs *zs;
uint32_t clear_params;
if (vec->fb.state.zsbuf) {
@@ -170,7 +164,7 @@ gen8_draw_wm(struct ilo_render *r,
gen8_wa_pre_depth(r);
- gen6_3DSTATE_DEPTH_BUFFER(r->builder, zs, false);
+ gen6_3DSTATE_DEPTH_BUFFER(r->builder, zs);
gen6_3DSTATE_HIER_DEPTH_BUFFER(r->builder, zs);
gen6_3DSTATE_STENCIL_BUFFER(r->builder, zs);
gen7_3DSTATE_CLEAR_PARAMS(r->builder, clear_params);
@@ -183,14 +177,8 @@ gen8_draw_wm_sample_pattern(struct ilo_render *r,
struct ilo_render_draw_session *session)
{
/* 3DSTATE_SAMPLE_PATTERN */
- if (r->hw_ctx_changed) {
- gen8_3DSTATE_SAMPLE_PATTERN(r->builder,
- &r->sample_pattern_1x,
- &r->sample_pattern_2x,
- &r->sample_pattern_4x,
- r->sample_pattern_8x,
- r->sample_pattern_16x);
- }
+ if (r->hw_ctx_changed)
+ gen8_3DSTATE_SAMPLE_PATTERN(r->builder, &r->sample_pattern);
}
static void
@@ -198,15 +186,13 @@ gen8_draw_wm_multisample(struct ilo_render *r,
const struct ilo_state_vector *vec,
struct ilo_render_draw_session *session)
{
- /* 3DSTATE_MULTISAMPLE and 3DSTATE_SAMPLE_MASK */
- if (DIRTY(SAMPLE_MASK) || DIRTY(FB) || DIRTY(RASTERIZER)) {
- gen8_3DSTATE_MULTISAMPLE(r->builder, vec->fb.num_samples,
- vec->rasterizer->state.half_pixel_center);
-
- gen7_3DSTATE_SAMPLE_MASK(r->builder,
- (vec->fb.num_samples > 1) ? vec->sample_mask : 0x1,
- vec->fb.num_samples);
- }
+ /* 3DSTATE_MULTISAMPLE */
+ if (session->rs_delta.dirty & ILO_STATE_RASTER_3DSTATE_MULTISAMPLE)
+ gen8_3DSTATE_MULTISAMPLE(r->builder, &vec->rasterizer->rs);
+
+ /* 3DSTATE_SAMPLE_MASK */
+ if (session->rs_delta.dirty & ILO_STATE_RASTER_3DSTATE_SAMPLE_MASK)
+ gen6_3DSTATE_SAMPLE_MASK(r->builder, &vec->rasterizer->rs);
}
static void
@@ -214,36 +200,38 @@ gen8_draw_vf(struct ilo_render *r,
const struct ilo_state_vector *vec,
struct ilo_render_draw_session *session)
{
- int i;
-
/* 3DSTATE_INDEX_BUFFER */
- if (DIRTY(IB) || r->batch_bo_changed)
- gen8_3DSTATE_INDEX_BUFFER(r->builder, &vec->ib);
+ if ((session->vf_delta.dirty & ILO_STATE_VF_3DSTATE_INDEX_BUFFER) ||
+ DIRTY(IB) || r->batch_bo_changed)
+ gen8_3DSTATE_INDEX_BUFFER(r->builder, &vec->ve->vf, &vec->ib.ib);
/* 3DSTATE_VF */
- if (session->primitive_restart_changed) {
- gen75_3DSTATE_VF(r->builder, vec->draw->primitive_restart,
- vec->draw->restart_index);
- }
+ if (session->vf_delta.dirty & ILO_STATE_VF_3DSTATE_VF)
+ gen75_3DSTATE_VF(r->builder, &vec->ve->vf);
/* 3DSTATE_VERTEX_BUFFERS */
- if (DIRTY(VB) || DIRTY(VE) || r->batch_bo_changed)
- gen6_3DSTATE_VERTEX_BUFFERS(r->builder, vec->ve, &vec->vb);
+ if ((session->vf_delta.dirty & ILO_STATE_VF_3DSTATE_VERTEX_BUFFERS) ||
+ DIRTY(VB) || DIRTY(VE) || r->batch_bo_changed) {
+ gen6_3DSTATE_VERTEX_BUFFERS(r->builder, &vec->ve->vf,
+ vec->vb.vb, vec->ve->vb_count);
+ }
/* 3DSTATE_VERTEX_ELEMENTS */
- if (DIRTY(VE))
- gen6_3DSTATE_VERTEX_ELEMENTS(r->builder, vec->ve);
+ if (session->vf_delta.dirty & ILO_STATE_VF_3DSTATE_VERTEX_ELEMENTS)
+ gen6_3DSTATE_VERTEX_ELEMENTS(r->builder, &vec->ve->vf);
+
+ gen8_3DSTATE_VF_TOPOLOGY(r->builder, vec->draw_info.topology);
- gen8_3DSTATE_VF_TOPOLOGY(r->builder, vec->draw->mode);
+ if (session->vf_delta.dirty & ILO_STATE_VF_3DSTATE_VF_INSTANCING) {
+ const uint8_t attr_count = ilo_state_vf_get_attr_count(&vec->ve->vf);
+ uint8_t i;
- for (i = 0; i < vec->ve->vb_count; i++) {
- gen8_3DSTATE_VF_INSTANCING(r->builder, i,
- vec->ve->instance_divisors[i]);
+ for (i = 0; i < attr_count; i++)
+ gen8_3DSTATE_VF_INSTANCING(r->builder, &vec->ve->vf, i);
}
- gen8_3DSTATE_VF_SGVS(r->builder,
- false, 0, 0,
- false, 0, 0);
+ if (session->vf_delta.dirty & ILO_STATE_VF_3DSTATE_VF_SGVS)
+ gen8_3DSTATE_VF_SGVS(r->builder, &vec->ve->vf);
}
void
@@ -281,7 +269,7 @@ ilo_render_emit_draw_commands_gen8(struct ilo_render *render,
gen6_draw_sf_rect(render, vec, session);
gen8_draw_vf(render, vec, session);
- ilo_render_3dprimitive(render, vec->draw, &vec->ib);
+ ilo_render_3dprimitive(render, &vec->draw_info);
}
int
@@ -365,17 +353,13 @@ ilo_render_emit_rectlist_commands_gen8(struct ilo_render *r,
const struct ilo_blitter *blitter,
const struct ilo_render_rectlist_session *session)
{
- uint32_t op;
-
ILO_DEV_ASSERT(r->dev, 8, 8);
gen8_wa_pre_depth(r);
if (blitter->uses & (ILO_BLITTER_USE_FB_DEPTH |
- ILO_BLITTER_USE_FB_STENCIL)) {
- gen6_3DSTATE_DEPTH_BUFFER(r->builder,
- &blitter->fb.dst.u.zs, true);
- }
+ ILO_BLITTER_USE_FB_STENCIL))
+ gen6_3DSTATE_DEPTH_BUFFER(r->builder, &blitter->fb.dst.u.zs);
if (blitter->uses & ILO_BLITTER_USE_FB_DEPTH) {
gen6_3DSTATE_HIER_DEPTH_BUFFER(r->builder,
@@ -393,27 +377,8 @@ ilo_render_emit_rectlist_commands_gen8(struct ilo_render *r,
gen6_3DSTATE_DRAWING_RECTANGLE(r->builder, 0, 0,
blitter->fb.width, blitter->fb.height);
- switch (blitter->op) {
- case ILO_BLITTER_RECTLIST_CLEAR_ZS:
- op = 0;
- if (blitter->uses & ILO_BLITTER_USE_FB_DEPTH)
- op |= GEN8_WM_HZ_DW1_DEPTH_CLEAR;
- if (blitter->uses & ILO_BLITTER_USE_FB_STENCIL)
- op |= GEN8_WM_HZ_DW1_STENCIL_CLEAR;
- break;
- case ILO_BLITTER_RECTLIST_RESOLVE_Z:
- op = GEN8_WM_HZ_DW1_DEPTH_RESOLVE;
- break;
- case ILO_BLITTER_RECTLIST_RESOLVE_HIZ:
- op = GEN8_WM_HZ_DW1_HIZ_RESOLVE;
- break;
- default:
- op = 0;
- break;
- }
-
- gen8_3DSTATE_WM_HZ_OP(r->builder, op, blitter->fb.width,
- blitter->fb.height, blitter->fb.num_samples);
+ gen8_3DSTATE_WM_HZ_OP(r->builder, &blitter->fb.rs,
+ blitter->fb.width, blitter->fb.height);
ilo_render_pipe_control(r, GEN6_PIPE_CONTROL_WRITE_IMM);
diff --git a/src/gallium/drivers/ilo/ilo_render_media.c b/src/gallium/drivers/ilo/ilo_render_media.c
index 387920a912c..a0de0024d61 100644
--- a/src/gallium/drivers/ilo/ilo_render_media.c
+++ b/src/gallium/drivers/ilo/ilo_render_media.c
@@ -30,6 +30,7 @@
#include "core/ilo_builder_mi.h"
#include "core/ilo_builder_render.h"
+#include "ilo_shader.h"
#include "ilo_state.h"
#include "ilo_render_gen.h"
@@ -206,7 +207,7 @@ ilo_render_emit_launch_grid_commands(struct ilo_render *render,
gen6_state_base_address(render->builder, true);
- gen6_MEDIA_VFE_STATE(render->builder, pcb_size, use_slm);
+ gen6_MEDIA_VFE_STATE(render->builder, &session->compute);
if (pcb_size)
gen6_MEDIA_CURBE_LOAD(render->builder, pcb, pcb_size);
diff --git a/src/gallium/drivers/ilo/ilo_render_surface.c b/src/gallium/drivers/ilo/ilo_render_surface.c
index b345dfb4fc4..ad053564294 100644
--- a/src/gallium/drivers/ilo/ilo_render_surface.c
+++ b/src/gallium/drivers/ilo/ilo_render_surface.c
@@ -29,11 +29,65 @@
#include "ilo_common.h"
#include "ilo_blitter.h"
+#include "ilo_resource.h"
+#include "ilo_shader.h"
#include "ilo_state.h"
#include "ilo_render_gen.h"
#define DIRTY(state) (session->pipe_dirty & ILO_DIRTY_ ## state)
+static inline uint32_t
+gen6_so_SURFACE_STATE(struct ilo_builder *builder,
+ const struct pipe_stream_output_target *so,
+ const struct pipe_stream_output_info *so_info,
+ int so_index)
+{
+ struct ilo_buffer *buf = ilo_buffer(so->buffer);
+ struct ilo_state_surface_buffer_info info;
+ struct ilo_state_surface surf;
+
+ ILO_DEV_ASSERT(builder->dev, 6, 6);
+
+ memset(&info, 0, sizeof(info));
+ info.buf = buf;
+ info.access = ILO_STATE_SURFACE_ACCESS_DP_SVB;
+
+ switch (so_info->output[so_index].num_components) {
+ case 1:
+ info.format = GEN6_FORMAT_R32_FLOAT;
+ info.format_size = 4;
+ break;
+ case 2:
+ info.format = GEN6_FORMAT_R32G32_FLOAT;
+ info.format_size = 8;
+ break;
+ case 3:
+ info.format = GEN6_FORMAT_R32G32B32_FLOAT;
+ info.format_size = 12;
+ break;
+ case 4:
+ info.format = GEN6_FORMAT_R32G32B32A32_FLOAT;
+ info.format_size = 16;
+ break;
+ default:
+ assert(!"unexpected SO components length");
+ info.format = GEN6_FORMAT_R32_FLOAT;
+ info.format_size = 4;
+ break;
+ }
+
+ info.struct_size =
+ so_info->stride[so_info->output[so_index].output_buffer] * 4;
+ info.offset = so->buffer_offset + so_info->output[so_index].dst_offset * 4;
+ info.size = so->buffer_size - so_info->output[so_index].dst_offset * 4;
+
+ memset(&surf, 0, sizeof(surf));
+ ilo_state_surface_init_for_buffer(&surf, builder->dev, &info);
+ surf.bo = info.buf->bo;
+
+ return gen6_SURFACE_STATE(builder, &surf);
+}
+
static void
gen6_emit_draw_surface_rt(struct ilo_render *r,
const struct ilo_state_vector *vec,
@@ -64,11 +118,9 @@ gen6_emit_draw_surface_rt(struct ilo_render *r,
(const struct ilo_surface_cso *) fb->state.cbufs[i];
assert(surface->is_rt);
- surface_state[i] =
- gen6_SURFACE_STATE(r->builder, &surface->u.rt, true);
+ surface_state[i] = gen6_SURFACE_STATE(r->builder, &surface->u.rt);
} else {
- surface_state[i] =
- gen6_SURFACE_STATE(r->builder, &fb->null_rt, true);
+ surface_state[i] = gen6_SURFACE_STATE(r->builder, &fb->null_rt);
}
}
}
@@ -173,8 +225,7 @@ gen6_emit_draw_surface_view(struct ilo_render *r,
const struct ilo_view_cso *cso =
(const struct ilo_view_cso *) view->states[i];
- surface_state[i] =
- gen6_SURFACE_STATE(r->builder, &cso->surface, false);
+ surface_state[i] = gen6_SURFACE_STATE(r->builder, &cso->surface);
} else {
surface_state[i] = 0;
}
@@ -228,12 +279,10 @@ gen6_emit_draw_surface_const(struct ilo_render *r,
for (i = 0; i < count; i++) {
const struct ilo_cbuf_cso *cso = &cbuf->cso[i];
- if (cso->resource) {
- surface_state[i] = gen6_SURFACE_STATE(r->builder,
- &cso->surface, false);
- } else {
+ if (cso->resource)
+ surface_state[i] = gen6_SURFACE_STATE(r->builder, &cso->surface);
+ else
surface_state[i] = 0;
- }
}
}
@@ -406,8 +455,7 @@ gen6_emit_launch_grid_surface_view(struct ilo_render *r,
const struct ilo_view_cso *cso =
(const struct ilo_view_cso *) view->states[i];
- surface_state[i] =
- gen6_SURFACE_STATE(r->builder, &cso->surface, false);
+ surface_state[i] = gen6_SURFACE_STATE(r->builder, &cso->surface);
} else {
surface_state[i] = 0;
}
@@ -421,7 +469,8 @@ gen6_emit_launch_grid_surface_const(struct ilo_render *r,
{
const struct ilo_shader_state *cs = vec->cs;
uint32_t *surface_state = r->state.cs.SURFACE_STATE;
- struct ilo_view_surface view;
+ struct ilo_state_surface_buffer_info info;
+ struct ilo_state_surface surf;
int base, count;
ILO_DEV_ASSERT(r->dev, 7, 7.5);
@@ -432,15 +481,22 @@ gen6_emit_launch_grid_surface_const(struct ilo_render *r,
if (!count)
return;
- ilo_gpe_init_view_surface_for_buffer(r->dev,
- ilo_buffer(session->input->buffer),
- session->input->buffer_offset,
- session->input->buffer_size,
- 1, PIPE_FORMAT_NONE,
- false, false, &view);
+ memset(&info, 0, sizeof(info));
+ info.buf = ilo_buffer(session->input->buffer);
+ info.access = ILO_STATE_SURFACE_ACCESS_DP_UNTYPED;
+ info.format = GEN6_FORMAT_RAW;
+ info.format_size = 1;
+ info.struct_size = 1;
+ info.readonly = true;
+ info.offset = session->input->buffer_offset;
+ info.size = session->input->buffer_size;
+
+ memset(&surf, 0, sizeof(surf));
+ ilo_state_surface_init_for_buffer(&surf, r->dev, &info);
+ surf.bo = info.buf->bo;
assert(count == 1 && session->input->buffer);
- surface_state[base] = gen6_SURFACE_STATE(r->builder, &view, false);
+ surface_state[base] = gen6_SURFACE_STATE(r->builder, &surf);
}
static void
@@ -483,14 +539,24 @@ gen6_emit_launch_grid_surface_global(struct ilo_render *r,
for (i = 0; i < count; i++) {
if (i < vec->global_binding.count && bindings[i].resource) {
const struct ilo_buffer *buf = ilo_buffer(bindings[i].resource);
- struct ilo_view_surface view;
+ struct ilo_state_surface_buffer_info info;
+ struct ilo_state_surface surf;
assert(bindings[i].resource->target == PIPE_BUFFER);
- ilo_gpe_init_view_surface_for_buffer(r->dev, buf, 0, buf->bo_size,
- 1, PIPE_FORMAT_NONE, true, true, &view);
- surface_state[i] =
- gen6_SURFACE_STATE(r->builder, &view, true);
+ memset(&info, 0, sizeof(info));
+ info.buf = buf;
+ info.access = ILO_STATE_SURFACE_ACCESS_DP_UNTYPED;
+ info.format = GEN6_FORMAT_RAW;
+ info.format_size = 1;
+ info.struct_size = 1;
+ info.size = buf->bo_size;
+
+ memset(&surf, 0, sizeof(surf));
+ ilo_state_surface_init_for_buffer(&surf, r->dev, &info);
+ surf.bo = info.buf->bo;
+
+ surface_state[i] = gen6_SURFACE_STATE(r->builder, &surf);
} else {
surface_state[i] = 0;
}
diff --git a/src/gallium/drivers/ilo/ilo_resource.c b/src/gallium/drivers/ilo/ilo_resource.c
index ad4852278d0..be9fd10a84c 100644
--- a/src/gallium/drivers/ilo/ilo_resource.c
+++ b/src/gallium/drivers/ilo/ilo_resource.c
@@ -178,8 +178,8 @@ tex_create_bo(struct ilo_texture *tex)
if (!bo)
return false;
- ilo_image_set_bo(&tex->image, bo);
- intel_bo_unref(bo);
+ intel_bo_unref(tex->image.bo);
+ tex->image.bo = bo;
return true;
}
@@ -223,7 +223,7 @@ tex_create_hiz(struct ilo_texture *tex)
if (!bo)
return false;
- ilo_image_set_aux_bo(&tex->image, bo);
+ tex->image.aux.bo = bo;
if (tex->imported) {
unsigned lv;
@@ -256,7 +256,7 @@ tex_create_mcs(struct ilo_texture *tex)
if (!bo)
return false;
- ilo_image_set_aux_bo(&tex->image, bo);
+ tex->image.aux.bo = bo;
return true;
}
@@ -267,7 +267,8 @@ tex_destroy(struct ilo_texture *tex)
if (tex->separate_s8)
tex_destroy(tex->separate_s8);
- ilo_image_cleanup(&tex->image);
+ intel_bo_unref(tex->image.bo);
+ intel_bo_unref(tex->image.aux.bo);
tex_free_slices(tex);
FREE(tex);
@@ -287,15 +288,13 @@ tex_alloc_bos(struct ilo_texture *tex)
switch (tex->image.aux.type) {
case ILO_IMAGE_AUX_HIZ:
- if (!tex_create_hiz(tex)) {
- /* Separate Stencil Buffer requires HiZ to be enabled */
- if (ilo_dev_gen(&is->dev) == ILO_GEN(6) &&
- tex->image.separate_stencil)
- return false;
- }
+ if (!tex_create_hiz(tex) &&
+ !ilo_image_disable_aux(&tex->image, &is->dev))
+ return false;
break;
case ILO_IMAGE_AUX_MCS:
- if (!tex_create_mcs(tex))
+ if (!tex_create_mcs(tex) &&
+ !ilo_image_disable_aux(&tex->image, &is->dev))
return false;
break;
default:
@@ -328,8 +327,7 @@ tex_import_handle(struct ilo_texture *tex,
return false;
}
- ilo_image_set_bo(&tex->image, bo);
- intel_bo_unref(bo);
+ tex->image.bo = bo;
tex->imported = true;
@@ -427,8 +425,8 @@ buf_create_bo(struct ilo_buffer_resource *buf)
if (!bo)
return false;
- ilo_buffer_set_bo(&buf->buffer, bo);
- intel_bo_unref(bo);
+ intel_bo_unref(buf->buffer.bo);
+ buf->buffer.bo = bo;
return true;
}
@@ -436,7 +434,7 @@ buf_create_bo(struct ilo_buffer_resource *buf)
static void
buf_destroy(struct ilo_buffer_resource *buf)
{
- ilo_buffer_cleanup(&buf->buffer);
+ intel_bo_unref(buf->buffer.bo);
FREE(buf);
}
@@ -445,6 +443,7 @@ buf_create(struct pipe_screen *screen, const struct pipe_resource *templ)
{
const struct ilo_screen *is = ilo_screen(screen);
struct ilo_buffer_resource *buf;
+ unsigned size;
buf = CALLOC_STRUCT(ilo_buffer_resource);
if (!buf)
@@ -454,8 +453,25 @@ buf_create(struct pipe_screen *screen, const struct pipe_resource *templ)
buf->base.screen = screen;
pipe_reference_init(&buf->base.reference, 1);
- ilo_buffer_init(&buf->buffer, &is->dev,
- templ->width0, templ->bind, templ->flags);
+ size = templ->width0;
+
+ /*
+ * As noted in ilo_format_translate(), we treat some 3-component formats as
+ * 4-component formats to work around hardware limitations. Imagine the
+ * case where the vertex buffer holds a single PIPE_FORMAT_R16G16B16_FLOAT
+ * vertex, and buf->bo_size is 6. The hardware would fail to fetch it at
+ * boundary check because the vertex buffer is expected to hold a
+ * PIPE_FORMAT_R16G16B16A16_FLOAT vertex and that takes at least 8 bytes.
+ *
+ * For the workaround to work, we should add 2 to the bo size. But that
+ * would waste a page when the bo size is already page aligned. Let's
+ * round it to page size for now and revisit this when needed.
+ */
+ if ((templ->bind & PIPE_BIND_VERTEX_BUFFER) &&
+ ilo_dev_gen(&is->dev) < ILO_GEN(7.5))
+ size = align(size, 4096);
+
+ ilo_buffer_init(&buf->buffer, &is->dev, size, templ->bind, templ->flags);
if (buf->buffer.bo_size < templ->width0 ||
buf->buffer.bo_size > ilo_max_resource_size ||
diff --git a/src/gallium/drivers/ilo/ilo_screen.c b/src/gallium/drivers/ilo/ilo_screen.c
index 918af0820de..94105559b80 100644
--- a/src/gallium/drivers/ilo/ilo_screen.c
+++ b/src/gallium/drivers/ilo/ilo_screen.c
@@ -31,11 +31,10 @@
#include "vl/vl_decoder.h"
#include "vl/vl_video_buffer.h"
#include "genhw/genhw.h" /* for GEN6_REG_TIMESTAMP */
-#include "core/ilo_fence.h"
-#include "core/ilo_format.h"
#include "core/intel_winsys.h"
#include "ilo_context.h"
+#include "ilo_format.h"
#include "ilo_resource.h"
#include "ilo_transfer.h" /* for ILO_TRANSFER_MAP_BUFFER_ALIGNMENT */
#include "ilo_public.h"
@@ -43,8 +42,7 @@
struct pipe_fence_handle {
struct pipe_reference reference;
-
- struct ilo_fence fence;
+ struct intel_bo *seqno_bo;
};
static float
@@ -347,7 +345,7 @@ ilo_get_param(struct pipe_screen *screen, enum pipe_cap param)
case PIPE_CAP_INDEP_BLEND_FUNC:
return true;
case PIPE_CAP_MAX_TEXTURE_ARRAY_LAYERS:
- return (ilo_dev_gen(&is->dev) >= ILO_GEN(7)) ? 2048 : 512;
+ return (ilo_dev_gen(&is->dev) >= ILO_GEN(7.5)) ? 2048 : 512;
case PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT:
case PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT:
case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER:
@@ -458,6 +456,7 @@ ilo_get_param(struct pipe_screen *screen, enum pipe_cap param)
case PIPE_CAP_SAMPLER_VIEW_TARGET:
case PIPE_CAP_MULTISAMPLE_Z_RESOLVE:
case PIPE_CAP_RESOURCE_FROM_USER_MEMORY:
+ case PIPE_CAP_DEVICE_RESET_STATUS_QUERY:
return 0;
case PIPE_CAP_VENDOR_ID:
@@ -641,7 +640,7 @@ ilo_screen_fence_reference(struct pipe_screen *screen,
STATIC_ASSERT(&((struct pipe_fence_handle *) NULL)->reference == NULL);
if (pipe_reference(&old->reference, &fence->reference)) {
- ilo_fence_cleanup(&old->fence);
+ intel_bo_unref(old->seqno_bo);
FREE(old);
}
}
@@ -654,10 +653,14 @@ ilo_screen_fence_finish(struct pipe_screen *screen,
const int64_t wait_timeout = (timeout > INT64_MAX) ? -1 : timeout;
bool signaled;
- signaled = ilo_fence_wait(&fence->fence, wait_timeout);
+ signaled = (!fence->seqno_bo ||
+ intel_bo_wait(fence->seqno_bo, wait_timeout) == 0);
+
/* XXX not thread safe */
- if (signaled)
- ilo_fence_set_seq_bo(&fence->fence, NULL);
+ if (signaled && fence->seqno_bo) {
+ intel_bo_unref(fence->seqno_bo);
+ fence->seqno_bo = NULL;
+ }
return signaled;
}
@@ -676,7 +679,6 @@ ilo_screen_fence_signalled(struct pipe_screen *screen,
struct pipe_fence_handle *
ilo_screen_fence_create(struct pipe_screen *screen, struct intel_bo *bo)
{
- struct ilo_screen *is = ilo_screen(screen);
struct pipe_fence_handle *fence;
fence = CALLOC_STRUCT(pipe_fence_handle);
@@ -685,8 +687,7 @@ ilo_screen_fence_create(struct pipe_screen *screen, struct intel_bo *bo)
pipe_reference_init(&fence->reference, 1);
- ilo_fence_init(&fence->fence, &is->dev);
- ilo_fence_set_seq_bo(&fence->fence, bo);
+ fence->seqno_bo = intel_bo_ref(bo);
return fence;
}
@@ -696,7 +697,7 @@ ilo_screen_destroy(struct pipe_screen *screen)
{
struct ilo_screen *is = ilo_screen(screen);
- ilo_dev_cleanup(&is->dev);
+ intel_winsys_destroy(is->dev.winsys);
FREE(is);
}
diff --git a/src/gallium/drivers/ilo/ilo_shader.c b/src/gallium/drivers/ilo/ilo_shader.c
index 799db2cbfcb..5f2b01017e2 100644
--- a/src/gallium/drivers/ilo/ilo_shader.c
+++ b/src/gallium/drivers/ilo/ilo_shader.c
@@ -27,7 +27,6 @@
#include "genhw/genhw.h" /* for SBE setup */
#include "core/ilo_builder.h"
-#include "core/ilo_state_3d.h"
#include "core/intel_winsys.h"
#include "shader/ilo_shader_internal.h"
#include "tgsi/tgsi_parse.h"
@@ -557,39 +556,255 @@ ilo_shader_state_search_variant(struct ilo_shader_state *state,
}
static void
-copy_so_info(struct ilo_shader *sh,
- const struct pipe_stream_output_info *so_info)
+init_shader_urb(const struct ilo_shader *kernel,
+ const struct ilo_shader_state *state,
+ struct ilo_state_shader_urb_info *urb)
{
- unsigned i, attr;
+ urb->cv_input_attr_count = kernel->in.count;
+ urb->read_base = 0;
+ urb->read_count = kernel->in.count;
- if (!so_info->num_outputs)
+ urb->output_attr_count = kernel->out.count;
+ urb->user_cull_enables = 0x0;
+ urb->user_clip_enables = 0x0;
+}
+
+static void
+init_shader_kernel(const struct ilo_shader *kernel,
+ const struct ilo_shader_state *state,
+ struct ilo_state_shader_kernel_info *kern)
+{
+ kern->offset = 0;
+ kern->grf_start = kernel->in.start_grf;
+ kern->pcb_attr_count =
+ (kernel->pcb.cbuf0_size + kernel->pcb.clip_state_size + 15) / 16;
+ kern->scratch_size = 0;
+}
+
+static void
+init_shader_resource(const struct ilo_shader *kernel,
+ const struct ilo_shader_state *state,
+ struct ilo_state_shader_resource_info *resource)
+{
+ resource->sampler_count = state->info.num_samplers;
+ resource->surface_count = 0;
+ resource->has_uav = false;
+}
+
+static void
+init_vs(struct ilo_shader *kernel,
+ const struct ilo_shader_state *state)
+{
+ struct ilo_state_vs_info info;
+
+ memset(&info, 0, sizeof(info));
+
+ init_shader_urb(kernel, state, &info.urb);
+ init_shader_kernel(kernel, state, &info.kernel);
+ init_shader_resource(kernel, state, &info.resource);
+ info.dispatch_enable = true;
+ info.stats_enable = true;
+
+ if (ilo_dev_gen(state->info.dev) == ILO_GEN(6) && kernel->stream_output) {
+ struct ilo_state_gs_info gs_info;
+
+ memset(&gs_info, 0, sizeof(gs_info));
+
+ gs_info.urb.cv_input_attr_count = kernel->out.count;
+ gs_info.urb.read_count = kernel->out.count;
+ gs_info.kernel.grf_start = kernel->gs_start_grf;
+ gs_info.sol.sol_enable = true;
+ gs_info.sol.stats_enable = true;
+ gs_info.sol.render_disable = kernel->variant.u.vs.rasterizer_discard;
+ gs_info.sol.svbi_post_inc = kernel->svbi_post_inc;
+ gs_info.sol.tristrip_reorder = GEN7_REORDER_LEADING;
+ gs_info.dispatch_enable = true;
+ gs_info.stats_enable = true;
+
+ ilo_state_vs_init(&kernel->cso.vs_sol.vs, state->info.dev, &info);
+ ilo_state_gs_init(&kernel->cso.vs_sol.sol, state->info.dev, &gs_info);
+ } else {
+ ilo_state_vs_init(&kernel->cso.vs, state->info.dev, &info);
+ }
+}
+
+static void
+init_gs(struct ilo_shader *kernel,
+ const struct ilo_shader_state *state)
+{
+ const struct pipe_stream_output_info *so_info = &state->info.stream_output;
+ struct ilo_state_gs_info info;
+
+ memset(&info, 0, sizeof(info));
+
+ init_shader_urb(kernel, state, &info.urb);
+ init_shader_kernel(kernel, state, &info.kernel);
+ init_shader_resource(kernel, state, &info.resource);
+ info.dispatch_enable = true;
+ info.stats_enable = true;
+
+ if (so_info->num_outputs > 0) {
+ info.sol.sol_enable = true;
+ info.sol.stats_enable = true;
+ info.sol.render_disable = kernel->variant.u.gs.rasterizer_discard;
+ info.sol.tristrip_reorder = GEN7_REORDER_LEADING;
+ }
+
+ ilo_state_gs_init(&kernel->cso.gs, state->info.dev, &info);
+}
+
+static void
+init_ps(struct ilo_shader *kernel,
+ const struct ilo_shader_state *state)
+{
+ struct ilo_state_ps_info info;
+
+ memset(&info, 0, sizeof(info));
+
+ init_shader_kernel(kernel, state, &info.kernel_8);
+ init_shader_resource(kernel, state, &info.resource);
+
+ info.io.has_rt_write = true;
+ info.io.posoffset = GEN6_POSOFFSET_NONE;
+ info.io.attr_count = kernel->in.count;
+ info.io.use_z = kernel->in.has_pos;
+ info.io.use_w = kernel->in.has_pos;
+ info.io.use_coverage_mask = false;
+ info.io.pscdepth = (kernel->out.has_pos) ?
+ GEN7_PSCDEPTH_ON : GEN7_PSCDEPTH_OFF;
+ info.io.write_pixel_mask = kernel->has_kill;
+ info.io.write_omask = false;
+
+ info.params.sample_mask = 0x1;
+ info.params.earlyz_control_psexec = false;
+ info.params.alpha_may_kill = false;
+ info.params.dual_source_blending = false;
+ info.params.has_writeable_rt = true;
+
+ info.valid_kernels = GEN6_PS_DISPATCH_8;
+
+ /*
+ * From the Sandy Bridge PRM, volume 2 part 1, page 284:
+ *
+ * "(MSDISPMODE_PERSAMPLE) This is the high-quality multisample mode
+ * where (over and above PERPIXEL mode) the PS is run for each covered
+ * sample. This mode is also used for "normal" non-multisample
+ * rendering (aka 1X), given Number of Multisamples is programmed to
+ * NUMSAMPLES_1."
+ */
+ info.per_sample_dispatch = true;
+
+ info.rt_clear_enable = false;
+ info.rt_resolve_enable = false;
+ info.cv_per_sample_interp = false;
+ info.cv_has_earlyz_op = false;
+ info.sample_count_one = true;
+ info.cv_has_depth_buffer = true;
+
+ ilo_state_ps_init(&kernel->cso.ps, state->info.dev, &info);
+
+ /* remember current parameters */
+ kernel->ps_params = info.params;
+}
+
+static void
+init_sol(struct ilo_shader *kernel,
+ const struct ilo_dev *dev,
+ const struct pipe_stream_output_info *so_info,
+ bool rasterizer_discard)
+{
+ struct ilo_state_sol_decl_info decls[4][PIPE_MAX_SO_OUTPUTS];
+ unsigned buf_offsets[PIPE_MAX_SO_BUFFERS];
+ struct ilo_state_sol_info info;
+ unsigned i;
+
+ if (!so_info->num_outputs) {
+ ilo_state_sol_init_disabled(&kernel->sol, dev, rasterizer_discard);
return;
+ }
+
+ memset(&info, 0, sizeof(info));
+ info.data = kernel->sol_data;
+ info.data_size = sizeof(kernel->sol_data);
+ info.sol_enable = true;
+ info.stats_enable = true;
+ info.tristrip_reorder = GEN7_REORDER_TRAILING;
+ info.render_disable = rasterizer_discard;
+ info.render_stream = 0;
+
+ for (i = 0; i < 4; i++) {
+ info.buffer_strides[i] = so_info->stride[i] * 4;
- sh->so_info = *so_info;
+ info.streams[i].cv_vue_attr_count = kernel->out.count;
+ info.streams[i].decls = decls[i];
+ }
+ memset(decls, 0, sizeof(decls));
+ memset(buf_offsets, 0, sizeof(buf_offsets));
for (i = 0; i < so_info->num_outputs; i++) {
+ const unsigned stream = so_info->output[i].stream;
+ const unsigned buffer = so_info->output[i].output_buffer;
+ struct ilo_state_sol_decl_info *decl;
+ unsigned attr;
+
/* figure out which attribute is sourced */
- for (attr = 0; attr < sh->out.count; attr++) {
- const int reg_idx = sh->out.register_indices[attr];
+ for (attr = 0; attr < kernel->out.count; attr++) {
+ const int reg_idx = kernel->out.register_indices[attr];
if (reg_idx == so_info->output[i].register_index)
break;
}
-
- if (attr < sh->out.count) {
- sh->so_info.output[i].register_index = attr;
- }
- else {
+ if (attr >= kernel->out.count) {
assert(!"stream output an undefined register");
- sh->so_info.output[i].register_index = 0;
+ attr = 0;
}
+ if (info.streams[stream].vue_read_count < attr + 1)
+ info.streams[stream].vue_read_count = attr + 1;
+
+ /* pad with holes first */
+ while (buf_offsets[buffer] < so_info->output[i].dst_offset) {
+ int num_dwords;
+
+ num_dwords = so_info->output[i].dst_offset - buf_offsets[buffer];
+ if (num_dwords > 4)
+ num_dwords = 4;
+
+ assert(info.streams[stream].decl_count < ARRAY_SIZE(decls[stream]));
+ decl = &decls[stream][info.streams[stream].decl_count];
+
+ decl->attr = 0;
+ decl->is_hole = true;
+ decl->component_base = 0;
+ decl->component_count = num_dwords;
+ decl->buffer = buffer;
+
+ info.streams[stream].decl_count++;
+ buf_offsets[buffer] += num_dwords;
+ }
+ assert(buf_offsets[buffer] == so_info->output[i].dst_offset);
+
+ assert(info.streams[stream].decl_count < ARRAY_SIZE(decls[stream]));
+ decl = &decls[stream][info.streams[stream].decl_count];
+
+ decl->attr = attr;
+ decl->is_hole = false;
/* PSIZE is at W channel */
- if (sh->out.semantic_names[attr] == TGSI_SEMANTIC_PSIZE) {
+ if (kernel->out.semantic_names[attr] == TGSI_SEMANTIC_PSIZE) {
assert(so_info->output[i].start_component == 0);
assert(so_info->output[i].num_components == 1);
- sh->so_info.output[i].start_component = 3;
+ decl->component_base = 3;
+ decl->component_count = 1;
+ } else {
+ decl->component_base = so_info->output[i].start_component;
+ decl->component_count = so_info->output[i].num_components;
}
+ decl->buffer = buffer;
+
+ info.streams[stream].decl_count++;
+ buf_offsets[buffer] += so_info->output[i].num_components;
}
+
+ ilo_state_sol_init(&kernel->sol, dev, &info);
}
/**
@@ -599,17 +814,20 @@ static struct ilo_shader *
ilo_shader_state_add_variant(struct ilo_shader_state *state,
const struct ilo_shader_variant *variant)
{
+ bool rasterizer_discard = false;
struct ilo_shader *sh;
switch (state->info.type) {
case PIPE_SHADER_VERTEX:
sh = ilo_shader_compile_vs(state, variant);
+ rasterizer_discard = variant->u.vs.rasterizer_discard;
break;
case PIPE_SHADER_FRAGMENT:
sh = ilo_shader_compile_fs(state, variant);
break;
case PIPE_SHADER_GEOMETRY:
sh = ilo_shader_compile_gs(state, variant);
+ rasterizer_discard = variant->u.gs.rasterizer_discard;
break;
case PIPE_SHADER_COMPUTE:
sh = ilo_shader_compile_cs(state, variant);
@@ -625,7 +843,8 @@ ilo_shader_state_add_variant(struct ilo_shader_state *state,
sh->variant = *variant;
- copy_so_info(sh, &state->info.stream_output);
+ init_sol(sh, state->info.dev, &state->info.stream_output,
+ rasterizer_discard);
ilo_shader_state_add_shader(state, sh);
@@ -665,13 +884,13 @@ ilo_shader_state_use_variant(struct ilo_shader_state *state,
if (construct_cso) {
switch (state->info.type) {
case PIPE_SHADER_VERTEX:
- ilo_gpe_init_vs_cso(state->info.dev, state, &sh->cso);
+ init_vs(sh, state);
break;
case PIPE_SHADER_GEOMETRY:
- ilo_gpe_init_gs_cso(state->info.dev, state, &sh->cso);
+ init_gs(sh, state);
break;
case PIPE_SHADER_FRAGMENT:
- ilo_gpe_init_fs_cso(state->info.dev, state, &sh->cso);
+ init_ps(sh, state);
break;
default:
break;
@@ -789,16 +1008,33 @@ ilo_shader_select_kernel(struct ilo_shader_state *shader,
const struct ilo_state_vector *vec,
uint32_t dirty)
{
- const struct ilo_shader * const cur = shader->shader;
struct ilo_shader_variant variant;
+ bool changed = false;
- if (!(shader->info.non_orthogonal_states & dirty))
- return false;
+ if (shader->info.non_orthogonal_states & dirty) {
+ const struct ilo_shader * const old = shader->shader;
+
+ ilo_shader_variant_init(&variant, &shader->info, vec);
+ ilo_shader_state_use_variant(shader, &variant);
+ changed = (shader->shader != old);
+ }
- ilo_shader_variant_init(&variant, &shader->info, vec);
- ilo_shader_state_use_variant(shader, &variant);
+ if (shader->info.type == PIPE_SHADER_FRAGMENT) {
+ struct ilo_shader *kernel = shader->shader;
- return (shader->shader != cur);
+ if (kernel->ps_params.sample_mask != vec->sample_mask ||
+ kernel->ps_params.alpha_may_kill != vec->blend->alpha_may_kill) {
+ kernel->ps_params.sample_mask = vec->sample_mask;
+ kernel->ps_params.alpha_may_kill = vec->blend->alpha_may_kill;
+
+ ilo_state_ps_set_params(&kernel->cso.ps, shader->info.dev,
+ &kernel->ps_params);
+
+ changed = true;
+ }
+ }
+
+ return changed;
}
static int
@@ -829,82 +1065,104 @@ route_attr(const int *semantics, const int *indices, int len,
* \return true if a different routing is selected
*/
bool
-ilo_shader_select_kernel_routing(struct ilo_shader_state *shader,
- const struct ilo_shader_state *source,
- const struct ilo_rasterizer_state *rasterizer)
+ilo_shader_select_kernel_sbe(struct ilo_shader_state *shader,
+ const struct ilo_shader_state *source,
+ const struct ilo_rasterizer_state *rasterizer)
{
- const uint32_t sprite_coord_enable = rasterizer->state.sprite_coord_enable;
+ const bool is_point = true;
const bool light_twoside = rasterizer->state.light_twoside;
+ const uint32_t sprite_coord_enable = rasterizer->state.sprite_coord_enable;
+ const int sprite_coord_mode = rasterizer->state.sprite_coord_mode;
struct ilo_shader *kernel = shader->shader;
struct ilo_kernel_routing *routing = &kernel->routing;
+ struct ilo_state_sbe_swizzle_info swizzles[ILO_STATE_SBE_MAX_SWIZZLE_COUNT];
+ struct ilo_state_sbe_info info;
const int *src_semantics, *src_indices;
- int src_len, max_src_slot;
+ int src_skip, src_len, src_slot;
int dst_len, dst_slot;
- /* we are constructing 3DSTATE_SBE here */
- ILO_DEV_ASSERT(shader->info.dev, 6, 8);
-
assert(kernel);
if (source) {
assert(source->shader);
+
src_semantics = source->shader->out.semantic_names;
src_indices = source->shader->out.semantic_indices;
src_len = source->shader->out.count;
- }
- else {
+ src_skip = 0;
+
+ assert(src_len >= 2 &&
+ src_semantics[0] == TGSI_SEMANTIC_PSIZE &&
+ src_semantics[1] == TGSI_SEMANTIC_POSITION);
+
+ /*
+ * skip PSIZE and POSITION (how about the optional CLIPDISTs?), unless
+ * they are all the source shader has and FS needs to read some
+ * attributes.
+ */
+ if (src_len > 2 || !kernel->in.count) {
+ src_semantics += 2;
+ src_indices += 2;
+ src_len -= 2;
+ src_skip = 2;
+ }
+ } else {
src_semantics = kernel->in.semantic_names;
src_indices = kernel->in.semantic_indices;
src_len = kernel->in.count;
+ src_skip = 0;
}
/* no change */
- if (kernel->routing_initialized &&
- routing->source_skip + routing->source_len <= src_len &&
- kernel->routing_sprite_coord_enable == sprite_coord_enable &&
- !memcmp(kernel->routing_src_semantics,
- &src_semantics[routing->source_skip],
- sizeof(kernel->routing_src_semantics[0]) * routing->source_len) &&
- !memcmp(kernel->routing_src_indices,
- &src_indices[routing->source_skip],
- sizeof(kernel->routing_src_indices[0]) * routing->source_len))
+ if (routing->initialized &&
+ routing->is_point == is_point &&
+ routing->light_twoside == light_twoside &&
+ routing->sprite_coord_enable == sprite_coord_enable &&
+ routing->sprite_coord_mode == sprite_coord_mode &&
+ routing->src_len <= src_len &&
+ !memcmp(routing->src_semantics, src_semantics,
+ sizeof(src_semantics[0]) * routing->src_len) &&
+ !memcmp(routing->src_indices, src_indices,
+ sizeof(src_indices[0]) * routing->src_len))
return false;
- if (source) {
- /* skip PSIZE and POSITION (how about the optional CLIPDISTs?) */
- assert(src_semantics[0] == TGSI_SEMANTIC_PSIZE);
- assert(src_semantics[1] == TGSI_SEMANTIC_POSITION);
- routing->source_skip = 2;
-
- routing->source_len = src_len - routing->source_skip;
- src_semantics += routing->source_skip;
- src_indices += routing->source_skip;
- }
- else {
- routing->source_skip = 0;
- routing->source_len = src_len;
- }
-
- routing->const_interp_enable = kernel->in.const_interp_enable;
- routing->point_sprite_enable = 0;
- routing->swizzle_enable = false;
-
- assert(kernel->in.count <= Elements(routing->swizzles));
- dst_len = MIN2(kernel->in.count, Elements(routing->swizzles));
- max_src_slot = -1;
+ routing->is_point = is_point;
+ routing->light_twoside = light_twoside;
+ routing->sprite_coord_enable = sprite_coord_enable;
+ routing->sprite_coord_mode = sprite_coord_mode;
+
+ assert(kernel->in.count <= Elements(swizzles));
+ dst_len = MIN2(kernel->in.count, Elements(swizzles));
+
+ memset(&swizzles, 0, sizeof(swizzles));
+ memset(&info, 0, sizeof(info));
+
+ info.attr_count = dst_len;
+ info.cv_vue_attr_count = src_skip + src_len;
+ info.vue_read_base = src_skip;
+ info.vue_read_count = 0;
+ info.has_min_read_count = true;
+ info.swizzle_enable = false;
+ info.swizzle_16_31 = false;
+ info.swizzle_count = 0;
+ info.swizzles = swizzles;
+ info.const_interp_enables = kernel->in.const_interp_enable;
+ info.point_sprite_enables = 0x0;
+ info.point_sprite_origin_lower_left =
+ (sprite_coord_mode == PIPE_SPRITE_COORD_LOWER_LEFT);
+ info.cv_is_point = is_point;
for (dst_slot = 0; dst_slot < dst_len; dst_slot++) {
const int semantic = kernel->in.semantic_names[dst_slot];
const int index = kernel->in.semantic_indices[dst_slot];
- int src_slot;
if (semantic == TGSI_SEMANTIC_GENERIC &&
(sprite_coord_enable & (1 << index)))
- routing->point_sprite_enable |= 1 << dst_slot;
+ info.point_sprite_enables |= 1 << dst_slot;
if (source) {
- src_slot = route_attr(src_semantics, src_indices,
- routing->source_len, semantic, index);
+ src_slot = route_attr(src_semantics, src_indices, src_len,
+ semantic, index);
/*
* The source shader stage does not output this attribute. The value
@@ -918,58 +1176,47 @@ ilo_shader_select_kernel_routing(struct ilo_shader_state *shader,
*/
if (src_slot < 0)
src_slot = 0;
- }
- else {
+ } else {
src_slot = dst_slot;
}
- routing->swizzles[dst_slot] = src_slot;
-
/* use the following slot for two-sided lighting */
if (semantic == TGSI_SEMANTIC_COLOR && light_twoside &&
- src_slot + 1 < routing->source_len &&
+ src_slot + 1 < src_len &&
src_semantics[src_slot + 1] == TGSI_SEMANTIC_BCOLOR &&
src_indices[src_slot + 1] == index) {
- routing->swizzles[dst_slot] |= GEN8_SBE_SWIZ_INPUTATTR_FACING;
+ swizzles[dst_slot].attr_select = GEN6_INPUTATTR_FACING;
+ swizzles[dst_slot].attr = src_slot;
+ info.swizzle_enable = true;
src_slot++;
+ } else {
+ swizzles[dst_slot].attr_select = GEN6_INPUTATTR_NORMAL;
+ swizzles[dst_slot].attr = src_slot;
+ if (src_slot != dst_slot)
+ info.swizzle_enable = true;
}
- if (routing->swizzles[dst_slot] != dst_slot)
- routing->swizzle_enable = true;
+ swizzles[dst_slot].force_zeros = false;
- if (max_src_slot < src_slot)
- max_src_slot = src_slot;
+ if (info.vue_read_count < src_slot + 1)
+ info.vue_read_count = src_slot + 1;
}
- memset(&routing->swizzles[dst_slot], 0, sizeof(routing->swizzles) -
- sizeof(routing->swizzles[0]) * dst_slot);
+ if (info.swizzle_enable)
+ info.swizzle_count = dst_len;
- /*
- * From the Sandy Bridge PRM, volume 2 part 1, page 248:
- *
- * "It is UNDEFINED to set this field (Vertex URB Entry Read Length) to
- * 0 indicating no Vertex URB data to be read.
- *
- * This field should be set to the minimum length required to read the
- * maximum source attribute. The maximum source attribute is indicated
- * by the maximum value of the enabled Attribute # Source Attribute if
- * Attribute Swizzle Enable is set, Number of Output Attributes-1 if
- * enable is not set.
- *
- * read_length = ceiling((max_source_attr+1)/2)
- *
- * [errata] Corruption/Hang possible if length programmed larger than
- * recommended"
- */
- routing->source_len = max_src_slot + 1;
+ if (routing->initialized)
+ ilo_state_sbe_set_info(&routing->sbe, shader->info.dev, &info);
+ else
+ ilo_state_sbe_init(&routing->sbe, shader->info.dev, &info);
+
+ routing->src_len = info.vue_read_count;
+ memcpy(routing->src_semantics, src_semantics,
+ sizeof(src_semantics[0]) * routing->src_len);
+ memcpy(routing->src_indices, src_indices,
+ sizeof(src_indices[0]) * routing->src_len);
- /* remember the states of the source */
- kernel->routing_initialized = true;
- kernel->routing_sprite_coord_enable = sprite_coord_enable;
- memcpy(kernel->routing_src_semantics, src_semantics,
- sizeof(kernel->routing_src_semantics[0]) * routing->source_len);
- memcpy(kernel->routing_src_indices, src_indices,
- sizeof(kernel->routing_src_indices[0]) * routing->source_len);
+ routing->initialized = true;
return true;
}
@@ -1147,7 +1394,7 @@ ilo_shader_get_kernel_param(const struct ilo_shader_state *shader,
/**
* Return the CSO of the selected kernel.
*/
-const struct ilo_shader_cso *
+const union ilo_shader_cso *
ilo_shader_get_kernel_cso(const struct ilo_shader_state *shader)
{
const struct ilo_shader *kernel = shader->shader;
@@ -1163,22 +1410,28 @@ ilo_shader_get_kernel_cso(const struct ilo_shader_state *shader)
const struct pipe_stream_output_info *
ilo_shader_get_kernel_so_info(const struct ilo_shader_state *shader)
{
+ return &shader->info.stream_output;
+}
+
+const struct ilo_state_sol *
+ilo_shader_get_kernel_sol(const struct ilo_shader_state *shader)
+{
const struct ilo_shader *kernel = shader->shader;
assert(kernel);
- return &kernel->so_info;
+ return &kernel->sol;
}
/**
* Return the routing info of the selected kernel.
*/
-const struct ilo_kernel_routing *
-ilo_shader_get_kernel_routing(const struct ilo_shader_state *shader)
+const struct ilo_state_sbe *
+ilo_shader_get_kernel_sbe(const struct ilo_shader_state *shader)
{
const struct ilo_shader *kernel = shader->shader;
assert(kernel);
- return &kernel->routing;
+ return &kernel->routing.sbe;
}
diff --git a/src/gallium/drivers/ilo/ilo_shader.h b/src/gallium/drivers/ilo/ilo_shader.h
index 8a359001bb8..d9f02a4746a 100644
--- a/src/gallium/drivers/ilo/ilo_shader.h
+++ b/src/gallium/drivers/ilo/ilo_shader.h
@@ -28,6 +28,8 @@
#ifndef ILO_SHADER_H
#define ILO_SHADER_H
+#include "core/ilo_state_shader.h"
+
#include "ilo_common.h"
enum ilo_kernel_param {
@@ -81,23 +83,28 @@ enum ilo_kernel_param {
ILO_KERNEL_PARAM_COUNT,
};
-struct ilo_kernel_routing {
- uint32_t const_interp_enable;
- uint32_t point_sprite_enable;
- unsigned source_skip, source_len;
-
- bool swizzle_enable;
- uint16_t swizzles[16];
-};
-
struct intel_bo;
struct ilo_builder;
struct ilo_rasterizer_state;
struct ilo_shader_cache;
struct ilo_shader_state;
-struct ilo_shader_cso;
+struct ilo_state_sbe;
+struct ilo_state_sol;
struct ilo_state_vector;
+union ilo_shader_cso {
+ struct ilo_state_vs vs;
+ struct ilo_state_hs hs;
+ struct ilo_state_ds ds;
+ struct ilo_state_gs gs;
+ struct ilo_state_ps ps;
+
+ struct {
+ struct ilo_state_vs vs;
+ struct ilo_state_gs sol;
+ } vs_sol;
+};
+
struct ilo_shader_cache *
ilo_shader_cache_create(void);
@@ -151,9 +158,9 @@ ilo_shader_select_kernel(struct ilo_shader_state *shader,
uint32_t dirty);
bool
-ilo_shader_select_kernel_routing(struct ilo_shader_state *shader,
- const struct ilo_shader_state *source,
- const struct ilo_rasterizer_state *rasterizer);
+ilo_shader_select_kernel_sbe(struct ilo_shader_state *shader,
+ const struct ilo_shader_state *source,
+ const struct ilo_rasterizer_state *rasterizer);
uint32_t
ilo_shader_get_kernel_offset(const struct ilo_shader_state *shader);
@@ -162,13 +169,16 @@ int
ilo_shader_get_kernel_param(const struct ilo_shader_state *shader,
enum ilo_kernel_param param);
-const struct ilo_shader_cso *
+const union ilo_shader_cso *
ilo_shader_get_kernel_cso(const struct ilo_shader_state *shader);
const struct pipe_stream_output_info *
ilo_shader_get_kernel_so_info(const struct ilo_shader_state *shader);
-const struct ilo_kernel_routing *
-ilo_shader_get_kernel_routing(const struct ilo_shader_state *shader);
+const struct ilo_state_sol *
+ilo_shader_get_kernel_sol(const struct ilo_shader_state *shader);
+
+const struct ilo_state_sbe *
+ilo_shader_get_kernel_sbe(const struct ilo_shader_state *shader);
#endif /* ILO_SHADER_H */
diff --git a/src/gallium/drivers/ilo/ilo_state.c b/src/gallium/drivers/ilo/ilo_state.c
index b1bd49a0b6c..63534f33fa7 100644
--- a/src/gallium/drivers/ilo/ilo_state.c
+++ b/src/gallium/drivers/ilo/ilo_state.c
@@ -25,16 +25,288 @@
* Chia-I Wu <[email protected]>
*/
-#include "core/ilo_state_3d.h"
+#include "util/u_dual_blend.h"
#include "util/u_dynarray.h"
+#include "util/u_framebuffer.h"
#include "util/u_helpers.h"
+#include "util/u_resource.h"
#include "util/u_upload_mgr.h"
#include "ilo_context.h"
+#include "ilo_format.h"
#include "ilo_resource.h"
#include "ilo_shader.h"
#include "ilo_state.h"
+/**
+ * Translate a pipe primitive type to the matching hardware primitive type.
+ */
+static enum gen_3dprim_type
+ilo_translate_draw_mode(unsigned mode)
+{
+ static const enum gen_3dprim_type prim_mapping[PIPE_PRIM_MAX] = {
+ [PIPE_PRIM_POINTS] = GEN6_3DPRIM_POINTLIST,
+ [PIPE_PRIM_LINES] = GEN6_3DPRIM_LINELIST,
+ [PIPE_PRIM_LINE_LOOP] = GEN6_3DPRIM_LINELOOP,
+ [PIPE_PRIM_LINE_STRIP] = GEN6_3DPRIM_LINESTRIP,
+ [PIPE_PRIM_TRIANGLES] = GEN6_3DPRIM_TRILIST,
+ [PIPE_PRIM_TRIANGLE_STRIP] = GEN6_3DPRIM_TRISTRIP,
+ [PIPE_PRIM_TRIANGLE_FAN] = GEN6_3DPRIM_TRIFAN,
+ [PIPE_PRIM_QUADS] = GEN6_3DPRIM_QUADLIST,
+ [PIPE_PRIM_QUAD_STRIP] = GEN6_3DPRIM_QUADSTRIP,
+ [PIPE_PRIM_POLYGON] = GEN6_3DPRIM_POLYGON,
+ [PIPE_PRIM_LINES_ADJACENCY] = GEN6_3DPRIM_LINELIST_ADJ,
+ [PIPE_PRIM_LINE_STRIP_ADJACENCY] = GEN6_3DPRIM_LINESTRIP_ADJ,
+ [PIPE_PRIM_TRIANGLES_ADJACENCY] = GEN6_3DPRIM_TRILIST_ADJ,
+ [PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY] = GEN6_3DPRIM_TRISTRIP_ADJ,
+ };
+
+ assert(prim_mapping[mode]);
+
+ return prim_mapping[mode];
+}
+
+static enum gen_index_format
+ilo_translate_index_size(unsigned index_size)
+{
+ switch (index_size) {
+ case 1: return GEN6_INDEX_BYTE;
+ case 2: return GEN6_INDEX_WORD;
+ case 4: return GEN6_INDEX_DWORD;
+ default:
+ assert(!"unknown index size");
+ return GEN6_INDEX_BYTE;
+ }
+}
+
+static enum gen_mip_filter
+ilo_translate_mip_filter(unsigned filter)
+{
+ switch (filter) {
+ case PIPE_TEX_MIPFILTER_NEAREST: return GEN6_MIPFILTER_NEAREST;
+ case PIPE_TEX_MIPFILTER_LINEAR: return GEN6_MIPFILTER_LINEAR;
+ case PIPE_TEX_MIPFILTER_NONE: return GEN6_MIPFILTER_NONE;
+ default:
+ assert(!"unknown mipfilter");
+ return GEN6_MIPFILTER_NONE;
+ }
+}
+
+static int
+ilo_translate_img_filter(unsigned filter)
+{
+ switch (filter) {
+ case PIPE_TEX_FILTER_NEAREST: return GEN6_MAPFILTER_NEAREST;
+ case PIPE_TEX_FILTER_LINEAR: return GEN6_MAPFILTER_LINEAR;
+ default:
+ assert(!"unknown sampler filter");
+ return GEN6_MAPFILTER_NEAREST;
+ }
+}
+
+static enum gen_texcoord_mode
+ilo_translate_address_wrap(unsigned wrap)
+{
+ switch (wrap) {
+ case PIPE_TEX_WRAP_CLAMP: return GEN8_TEXCOORDMODE_HALF_BORDER;
+ case PIPE_TEX_WRAP_REPEAT: return GEN6_TEXCOORDMODE_WRAP;
+ case PIPE_TEX_WRAP_CLAMP_TO_EDGE: return GEN6_TEXCOORDMODE_CLAMP;
+ case PIPE_TEX_WRAP_CLAMP_TO_BORDER: return GEN6_TEXCOORDMODE_CLAMP_BORDER;
+ case PIPE_TEX_WRAP_MIRROR_REPEAT: return GEN6_TEXCOORDMODE_MIRROR;
+ case PIPE_TEX_WRAP_MIRROR_CLAMP:
+ case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
+ case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
+ default:
+ assert(!"unknown sampler wrap mode");
+ return GEN6_TEXCOORDMODE_WRAP;
+ }
+}
+
+static enum gen_aniso_ratio
+ilo_translate_max_anisotropy(unsigned max_anisotropy)
+{
+ switch (max_anisotropy) {
+ case 0: case 1: case 2: return GEN6_ANISORATIO_2;
+ case 3: case 4: return GEN6_ANISORATIO_4;
+ case 5: case 6: return GEN6_ANISORATIO_6;
+ case 7: case 8: return GEN6_ANISORATIO_8;
+ case 9: case 10: return GEN6_ANISORATIO_10;
+ case 11: case 12: return GEN6_ANISORATIO_12;
+ case 13: case 14: return GEN6_ANISORATIO_14;
+ default: return GEN6_ANISORATIO_16;
+ }
+}
+
+static enum gen_prefilter_op
+ilo_translate_shadow_func(unsigned func)
+{
+ /*
+ * For PIPE_FUNC_x, the reference value is on the left-hand side of the
+ * comparison, and 1.0 is returned when the comparison is true.
+ *
+ * For GEN6_PREFILTEROP_x, the reference value is on the right-hand side of
+ * the comparison, and 0.0 is returned when the comparison is true.
+ */
+ switch (func) {
+ case PIPE_FUNC_NEVER: return GEN6_PREFILTEROP_ALWAYS;
+ case PIPE_FUNC_LESS: return GEN6_PREFILTEROP_LEQUAL;
+ case PIPE_FUNC_EQUAL: return GEN6_PREFILTEROP_NOTEQUAL;
+ case PIPE_FUNC_LEQUAL: return GEN6_PREFILTEROP_LESS;
+ case PIPE_FUNC_GREATER: return GEN6_PREFILTEROP_GEQUAL;
+ case PIPE_FUNC_NOTEQUAL: return GEN6_PREFILTEROP_EQUAL;
+ case PIPE_FUNC_GEQUAL: return GEN6_PREFILTEROP_GREATER;
+ case PIPE_FUNC_ALWAYS: return GEN6_PREFILTEROP_NEVER;
+ default:
+ assert(!"unknown shadow compare function");
+ return GEN6_PREFILTEROP_NEVER;
+ }
+}
+
+static enum gen_front_winding
+ilo_translate_front_ccw(unsigned front_ccw)
+{
+ return (front_ccw) ? GEN6_FRONTWINDING_CCW : GEN6_FRONTWINDING_CW;
+}
+
+static enum gen_cull_mode
+ilo_translate_cull_face(unsigned cull_face)
+{
+ switch (cull_face) {
+ case PIPE_FACE_NONE: return GEN6_CULLMODE_NONE;
+ case PIPE_FACE_FRONT: return GEN6_CULLMODE_FRONT;
+ case PIPE_FACE_BACK: return GEN6_CULLMODE_BACK;
+ case PIPE_FACE_FRONT_AND_BACK: return GEN6_CULLMODE_BOTH;
+ default:
+ assert(!"unknown face culling");
+ return GEN6_CULLMODE_NONE;
+ }
+}
+
+static enum gen_fill_mode
+ilo_translate_poly_mode(unsigned poly_mode)
+{
+ switch (poly_mode) {
+ case PIPE_POLYGON_MODE_FILL: return GEN6_FILLMODE_SOLID;
+ case PIPE_POLYGON_MODE_LINE: return GEN6_FILLMODE_WIREFRAME;
+ case PIPE_POLYGON_MODE_POINT: return GEN6_FILLMODE_POINT;
+ default:
+ assert(!"unknown polygon mode");
+ return GEN6_FILLMODE_SOLID;
+ }
+}
+
+static enum gen_pixel_location
+ilo_translate_half_pixel_center(bool half_pixel_center)
+{
+ return (half_pixel_center) ? GEN6_PIXLOC_CENTER : GEN6_PIXLOC_UL_CORNER;
+}
+
+static enum gen_compare_function
+ilo_translate_compare_func(unsigned func)
+{
+ switch (func) {
+ case PIPE_FUNC_NEVER: return GEN6_COMPAREFUNCTION_NEVER;
+ case PIPE_FUNC_LESS: return GEN6_COMPAREFUNCTION_LESS;
+ case PIPE_FUNC_EQUAL: return GEN6_COMPAREFUNCTION_EQUAL;
+ case PIPE_FUNC_LEQUAL: return GEN6_COMPAREFUNCTION_LEQUAL;
+ case PIPE_FUNC_GREATER: return GEN6_COMPAREFUNCTION_GREATER;
+ case PIPE_FUNC_NOTEQUAL: return GEN6_COMPAREFUNCTION_NOTEQUAL;
+ case PIPE_FUNC_GEQUAL: return GEN6_COMPAREFUNCTION_GEQUAL;
+ case PIPE_FUNC_ALWAYS: return GEN6_COMPAREFUNCTION_ALWAYS;
+ default:
+ assert(!"unknown compare function");
+ return GEN6_COMPAREFUNCTION_NEVER;
+ }
+}
+
+static enum gen_stencil_op
+ilo_translate_stencil_op(unsigned stencil_op)
+{
+ switch (stencil_op) {
+ case PIPE_STENCIL_OP_KEEP: return GEN6_STENCILOP_KEEP;
+ case PIPE_STENCIL_OP_ZERO: return GEN6_STENCILOP_ZERO;
+ case PIPE_STENCIL_OP_REPLACE: return GEN6_STENCILOP_REPLACE;
+ case PIPE_STENCIL_OP_INCR: return GEN6_STENCILOP_INCRSAT;
+ case PIPE_STENCIL_OP_DECR: return GEN6_STENCILOP_DECRSAT;
+ case PIPE_STENCIL_OP_INCR_WRAP: return GEN6_STENCILOP_INCR;
+ case PIPE_STENCIL_OP_DECR_WRAP: return GEN6_STENCILOP_DECR;
+ case PIPE_STENCIL_OP_INVERT: return GEN6_STENCILOP_INVERT;
+ default:
+ assert(!"unknown stencil op");
+ return GEN6_STENCILOP_KEEP;
+ }
+}
+
+static enum gen_logic_op
+ilo_translate_logicop(unsigned logicop)
+{
+ switch (logicop) {
+ case PIPE_LOGICOP_CLEAR: return GEN6_LOGICOP_CLEAR;
+ case PIPE_LOGICOP_NOR: return GEN6_LOGICOP_NOR;
+ case PIPE_LOGICOP_AND_INVERTED: return GEN6_LOGICOP_AND_INVERTED;
+ case PIPE_LOGICOP_COPY_INVERTED: return GEN6_LOGICOP_COPY_INVERTED;
+ case PIPE_LOGICOP_AND_REVERSE: return GEN6_LOGICOP_AND_REVERSE;
+ case PIPE_LOGICOP_INVERT: return GEN6_LOGICOP_INVERT;
+ case PIPE_LOGICOP_XOR: return GEN6_LOGICOP_XOR;
+ case PIPE_LOGICOP_NAND: return GEN6_LOGICOP_NAND;
+ case PIPE_LOGICOP_AND: return GEN6_LOGICOP_AND;
+ case PIPE_LOGICOP_EQUIV: return GEN6_LOGICOP_EQUIV;
+ case PIPE_LOGICOP_NOOP: return GEN6_LOGICOP_NOOP;
+ case PIPE_LOGICOP_OR_INVERTED: return GEN6_LOGICOP_OR_INVERTED;
+ case PIPE_LOGICOP_COPY: return GEN6_LOGICOP_COPY;
+ case PIPE_LOGICOP_OR_REVERSE: return GEN6_LOGICOP_OR_REVERSE;
+ case PIPE_LOGICOP_OR: return GEN6_LOGICOP_OR;
+ case PIPE_LOGICOP_SET: return GEN6_LOGICOP_SET;
+ default:
+ assert(!"unknown logicop function");
+ return GEN6_LOGICOP_CLEAR;
+ }
+}
+
+static int
+ilo_translate_blend_func(unsigned blend)
+{
+ switch (blend) {
+ case PIPE_BLEND_ADD: return GEN6_BLENDFUNCTION_ADD;
+ case PIPE_BLEND_SUBTRACT: return GEN6_BLENDFUNCTION_SUBTRACT;
+ case PIPE_BLEND_REVERSE_SUBTRACT: return GEN6_BLENDFUNCTION_REVERSE_SUBTRACT;
+ case PIPE_BLEND_MIN: return GEN6_BLENDFUNCTION_MIN;
+ case PIPE_BLEND_MAX: return GEN6_BLENDFUNCTION_MAX;
+ default:
+ assert(!"unknown blend function");
+ return GEN6_BLENDFUNCTION_ADD;
+ }
+}
+
+static int
+ilo_translate_blend_factor(unsigned factor)
+{
+ switch (factor) {
+ case PIPE_BLENDFACTOR_ONE: return GEN6_BLENDFACTOR_ONE;
+ case PIPE_BLENDFACTOR_SRC_COLOR: return GEN6_BLENDFACTOR_SRC_COLOR;
+ case PIPE_BLENDFACTOR_SRC_ALPHA: return GEN6_BLENDFACTOR_SRC_ALPHA;
+ case PIPE_BLENDFACTOR_DST_ALPHA: return GEN6_BLENDFACTOR_DST_ALPHA;
+ case PIPE_BLENDFACTOR_DST_COLOR: return GEN6_BLENDFACTOR_DST_COLOR;
+ case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: return GEN6_BLENDFACTOR_SRC_ALPHA_SATURATE;
+ case PIPE_BLENDFACTOR_CONST_COLOR: return GEN6_BLENDFACTOR_CONST_COLOR;
+ case PIPE_BLENDFACTOR_CONST_ALPHA: return GEN6_BLENDFACTOR_CONST_ALPHA;
+ case PIPE_BLENDFACTOR_SRC1_COLOR: return GEN6_BLENDFACTOR_SRC1_COLOR;
+ case PIPE_BLENDFACTOR_SRC1_ALPHA: return GEN6_BLENDFACTOR_SRC1_ALPHA;
+ case PIPE_BLENDFACTOR_ZERO: return GEN6_BLENDFACTOR_ZERO;
+ case PIPE_BLENDFACTOR_INV_SRC_COLOR: return GEN6_BLENDFACTOR_INV_SRC_COLOR;
+ case PIPE_BLENDFACTOR_INV_SRC_ALPHA: return GEN6_BLENDFACTOR_INV_SRC_ALPHA;
+ case PIPE_BLENDFACTOR_INV_DST_ALPHA: return GEN6_BLENDFACTOR_INV_DST_ALPHA;
+ case PIPE_BLENDFACTOR_INV_DST_COLOR: return GEN6_BLENDFACTOR_INV_DST_COLOR;
+ case PIPE_BLENDFACTOR_INV_CONST_COLOR: return GEN6_BLENDFACTOR_INV_CONST_COLOR;
+ case PIPE_BLENDFACTOR_INV_CONST_ALPHA: return GEN6_BLENDFACTOR_INV_CONST_ALPHA;
+ case PIPE_BLENDFACTOR_INV_SRC1_COLOR: return GEN6_BLENDFACTOR_INV_SRC1_COLOR;
+ case PIPE_BLENDFACTOR_INV_SRC1_ALPHA: return GEN6_BLENDFACTOR_INV_SRC1_ALPHA;
+ default:
+ assert(!"unknown blend factor");
+ return GEN6_BLENDFACTOR_ONE;
+ }
+}
+
static void
finalize_shader_states(struct ilo_state_vector *vec)
{
@@ -78,7 +350,7 @@ finalize_shader_states(struct ilo_state_vector *vec)
/* need to setup SBE for FS */
if (type == PIPE_SHADER_FRAGMENT && vec->dirty &
(state | ILO_DIRTY_GS | ILO_DIRTY_VS | ILO_DIRTY_RASTERIZER)) {
- if (ilo_shader_select_kernel_routing(shader,
+ if (ilo_shader_select_kernel_sbe(shader,
(vec->gs) ? vec->gs : vec->vs, vec->rasterizer))
vec->dirty |= state;
}
@@ -97,7 +369,6 @@ finalize_cbuf_state(struct ilo_context *ilo,
~ilo_shader_get_kernel_param(sh, ILO_KERNEL_SKIP_CBUF0_UPLOAD);
while (upload_mask) {
- const enum pipe_format elem_format = PIPE_FORMAT_R32G32B32A32_FLOAT;
unsigned offset, i;
i = u_bit_scan(&upload_mask);
@@ -105,14 +376,16 @@ finalize_cbuf_state(struct ilo_context *ilo,
if (cbuf->cso[i].resource)
continue;
- u_upload_data(ilo->uploader, 0, cbuf->cso[i].user_buffer_size,
+ u_upload_data(ilo->uploader, 0, cbuf->cso[i].info.size,
cbuf->cso[i].user_buffer, &offset, &cbuf->cso[i].resource);
- ilo_gpe_init_view_surface_for_buffer(ilo->dev,
- ilo_buffer(cbuf->cso[i].resource),
- offset, cbuf->cso[i].user_buffer_size,
- util_format_get_blocksize(elem_format), elem_format,
- false, false, &cbuf->cso[i].surface);
+ cbuf->cso[i].info.buf = ilo_buffer(cbuf->cso[i].resource);
+ cbuf->cso[i].info.offset = offset;
+
+ memset(&cbuf->cso[i].surface, 0, sizeof(cbuf->cso[i].surface));
+ ilo_state_surface_init_for_buffer(&cbuf->cso[i].surface,
+ ilo->dev, &cbuf->cso[i].info);
+ cbuf->cso[i].surface.bo = cbuf->cso[i].info.buf->bo;
ilo->state_vector.dirty |= ILO_DIRTY_CBUF;
}
@@ -133,114 +406,380 @@ finalize_constant_buffers(struct ilo_context *ilo)
static void
finalize_index_buffer(struct ilo_context *ilo)
{
+ const struct ilo_dev *dev = ilo->dev;
struct ilo_state_vector *vec = &ilo->state_vector;
const bool need_upload = (vec->draw->indexed &&
- (vec->ib.user_buffer || vec->ib.offset % vec->ib.index_size));
+ (vec->ib.state.user_buffer ||
+ vec->ib.state.offset % vec->ib.state.index_size));
struct pipe_resource *current_hw_res = NULL;
+ struct ilo_state_index_buffer_info info;
+ int64_t vertex_start_bias = 0;
if (!(vec->dirty & ILO_DIRTY_IB) && !need_upload)
return;
+ /* make sure vec->ib.hw_resource changes when reallocated */
pipe_resource_reference(&current_hw_res, vec->ib.hw_resource);
if (need_upload) {
- const unsigned offset = vec->ib.index_size * vec->draw->start;
- const unsigned size = vec->ib.index_size * vec->draw->count;
+ const unsigned offset = vec->ib.state.index_size * vec->draw->start;
+ const unsigned size = vec->ib.state.index_size * vec->draw->count;
unsigned hw_offset;
- if (vec->ib.user_buffer) {
+ if (vec->ib.state.user_buffer) {
u_upload_data(ilo->uploader, 0, size,
- vec->ib.user_buffer + offset, &hw_offset, &vec->ib.hw_resource);
- }
- else {
- u_upload_buffer(ilo->uploader, 0, vec->ib.offset + offset, size,
- vec->ib.buffer, &hw_offset, &vec->ib.hw_resource);
+ vec->ib.state.user_buffer + offset,
+ &hw_offset, &vec->ib.hw_resource);
+ } else {
+ u_upload_buffer(ilo->uploader, 0,
+ vec->ib.state.offset + offset, size, vec->ib.state.buffer,
+ &hw_offset, &vec->ib.hw_resource);
}
/* the HW offset should be aligned */
- assert(hw_offset % vec->ib.index_size == 0);
- vec->ib.draw_start_offset = hw_offset / vec->ib.index_size;
+ assert(hw_offset % vec->ib.state.index_size == 0);
+ vertex_start_bias = hw_offset / vec->ib.state.index_size;
/*
* INDEX[vec->draw->start] in the original buffer is INDEX[0] in the HW
* resource
*/
- vec->ib.draw_start_offset -= vec->draw->start;
- }
- else {
- pipe_resource_reference(&vec->ib.hw_resource, vec->ib.buffer);
+ vertex_start_bias -= vec->draw->start;
+ } else {
+ pipe_resource_reference(&vec->ib.hw_resource, vec->ib.state.buffer);
/* note that index size may be zero when the draw is not indexed */
if (vec->draw->indexed)
- vec->ib.draw_start_offset = vec->ib.offset / vec->ib.index_size;
- else
- vec->ib.draw_start_offset = 0;
+ vertex_start_bias = vec->ib.state.offset / vec->ib.state.index_size;
}
+ vec->draw_info.vertex_start += vertex_start_bias;
+
/* treat the IB as clean if the HW states do not change */
if (vec->ib.hw_resource == current_hw_res &&
- vec->ib.hw_index_size == vec->ib.index_size)
+ vec->ib.hw_index_size == vec->ib.state.index_size)
vec->dirty &= ~ILO_DIRTY_IB;
else
- vec->ib.hw_index_size = vec->ib.index_size;
+ vec->ib.hw_index_size = vec->ib.state.index_size;
pipe_resource_reference(&current_hw_res, NULL);
+
+ memset(&info, 0, sizeof(info));
+ if (vec->ib.hw_resource) {
+ info.buf = ilo_buffer(vec->ib.hw_resource);
+ info.size = info.buf->bo_size;
+ info.format = ilo_translate_index_size(vec->ib.hw_index_size);
+
+ vec->ib.ib.bo = info.buf->bo;
+ }
+
+ ilo_state_index_buffer_set_info(&vec->ib.ib, dev, &info);
}
static void
finalize_vertex_elements(struct ilo_context *ilo)
{
+ const struct ilo_dev *dev = ilo->dev;
+ struct ilo_state_vector *vec = &ilo->state_vector;
+ struct ilo_ve_state *ve = vec->ve;
+ const bool last_element_edge_flag = (vec->vs &&
+ ilo_shader_get_kernel_param(vec->vs, ILO_KERNEL_VS_INPUT_EDGEFLAG));
+ const bool prepend_vertexid = (vec->vs &&
+ ilo_shader_get_kernel_param(vec->vs, ILO_KERNEL_VS_INPUT_VERTEXID));
+ const bool prepend_instanceid = (vec->vs &&
+ ilo_shader_get_kernel_param(vec->vs,
+ ILO_KERNEL_VS_INPUT_INSTANCEID));
+ const enum gen_index_format index_format = (vec->draw->indexed) ?
+ ilo_translate_index_size(vec->ib.state.index_size) : GEN6_INDEX_DWORD;
+
+ /* check for non-orthogonal states */
+ if (ve->vf_params.cv_topology != vec->draw_info.topology ||
+ ve->vf_params.prepend_vertexid != prepend_vertexid ||
+ ve->vf_params.prepend_instanceid != prepend_instanceid ||
+ ve->vf_params.last_element_edge_flag != last_element_edge_flag ||
+ ve->vf_params.cv_index_format != index_format ||
+ ve->vf_params.cut_index_enable != vec->draw->primitive_restart ||
+ ve->vf_params.cut_index != vec->draw->restart_index) {
+ ve->vf_params.cv_topology = vec->draw_info.topology;
+ ve->vf_params.prepend_vertexid = prepend_vertexid;
+ ve->vf_params.prepend_instanceid = prepend_instanceid;
+ ve->vf_params.last_element_edge_flag = last_element_edge_flag;
+ ve->vf_params.cv_index_format = index_format;
+ ve->vf_params.cut_index_enable = vec->draw->primitive_restart;
+ ve->vf_params.cut_index = vec->draw->restart_index;
+
+ ilo_state_vf_set_params(&ve->vf, dev, &ve->vf_params);
+
+ vec->dirty |= ILO_DIRTY_VE;
+ }
+}
+
+static void
+finalize_vertex_buffers(struct ilo_context *ilo)
+{
+ const struct ilo_dev *dev = ilo->dev;
struct ilo_state_vector *vec = &ilo->state_vector;
+ struct ilo_state_vertex_buffer_info info;
+ unsigned i;
- if (!(vec->dirty & (ILO_DIRTY_VE | ILO_DIRTY_VS)))
+ if (!(vec->dirty & (ILO_DIRTY_VE | ILO_DIRTY_VB)))
return;
- vec->dirty |= ILO_DIRTY_VE;
+ memset(&info, 0, sizeof(info));
+
+ for (i = 0; i < vec->ve->vb_count; i++) {
+ const unsigned pipe_idx = vec->ve->vb_mapping[i];
+ const struct pipe_vertex_buffer *cso = &vec->vb.states[pipe_idx];
+
+ if (cso->buffer) {
+ info.buf = ilo_buffer(cso->buffer);
+ info.offset = cso->buffer_offset;
+ info.size = info.buf->bo_size;
+
+ info.stride = cso->stride;
+
+ vec->vb.vb[i].bo = info.buf->bo;
+ } else {
+ memset(&info, 0, sizeof(info));
+ }
+
+ ilo_state_vertex_buffer_set_info(&vec->vb.vb[i], dev, &info);
+ }
+}
+
+static void
+finalize_urb(struct ilo_context *ilo)
+{
+ const uint16_t attr_size = sizeof(uint32_t) * 4;
+ const struct ilo_dev *dev = ilo->dev;
+ struct ilo_state_vector *vec = &ilo->state_vector;
+ struct ilo_state_urb_info info;
+
+ if (!(vec->dirty & (ILO_DIRTY_VE | ILO_DIRTY_VS |
+ ILO_DIRTY_GS | ILO_DIRTY_FS)))
+ return;
+
+ memset(&info, 0, sizeof(info));
+
+ info.ve_entry_size = attr_size * ilo_state_vf_get_attr_count(&vec->ve->vf);
+
+ if (vec->vs) {
+ info.vs_const_data = (bool)
+ (ilo_shader_get_kernel_param(vec->vs, ILO_KERNEL_PCB_CBUF0_SIZE) +
+ ilo_shader_get_kernel_param(vec->vs, ILO_KERNEL_VS_PCB_UCP_SIZE));
+ info.vs_entry_size = attr_size *
+ ilo_shader_get_kernel_param(vec->vs, ILO_KERNEL_OUTPUT_COUNT);
+ }
+
+ if (vec->gs) {
+ info.gs_const_data = (bool)
+ ilo_shader_get_kernel_param(vec->gs, ILO_KERNEL_PCB_CBUF0_SIZE);
- vec->ve->last_cso_edgeflag = false;
- if (vec->ve->count && vec->vs &&
- ilo_shader_get_kernel_param(vec->vs, ILO_KERNEL_VS_INPUT_EDGEFLAG)) {
- vec->ve->edgeflag_cso = vec->ve->cso[vec->ve->count - 1];
- ilo_gpe_set_ve_edgeflag(ilo->dev, &vec->ve->edgeflag_cso);
- vec->ve->last_cso_edgeflag = true;
- }
-
- vec->ve->prepend_nosrc_cso = false;
- if (vec->vs &&
- (ilo_shader_get_kernel_param(vec->vs,
- ILO_KERNEL_VS_INPUT_INSTANCEID) ||
- ilo_shader_get_kernel_param(vec->vs,
- ILO_KERNEL_VS_INPUT_VERTEXID))) {
- ilo_gpe_init_ve_nosrc(ilo->dev,
- GEN6_VFCOMP_STORE_VID,
- GEN6_VFCOMP_STORE_IID,
- GEN6_VFCOMP_NOSTORE,
- GEN6_VFCOMP_NOSTORE,
- &vec->ve->nosrc_cso);
- vec->ve->prepend_nosrc_cso = true;
- } else if (!vec->vs) {
- /* generate VUE header */
- ilo_gpe_init_ve_nosrc(ilo->dev,
- GEN6_VFCOMP_STORE_0, /* Reserved */
- GEN6_VFCOMP_STORE_0, /* Render Target Array Index */
- GEN6_VFCOMP_STORE_0, /* Viewport Index */
- GEN6_VFCOMP_STORE_0, /* Point Width */
- &vec->ve->nosrc_cso);
- vec->ve->prepend_nosrc_cso = true;
- } else if (!vec->ve->count) {
/*
- * From the Sandy Bridge PRM, volume 2 part 1, page 92:
+ * From the Ivy Bridge PRM, volume 2 part 1, page 189:
+ *
+ * "All outputs of a GS thread will be stored in the single GS
+ * thread output URB entry."
*
- * "SW must ensure that at least one vertex element is defined prior
- * to issuing a 3DPRIMTIVE command, or operation is UNDEFINED."
+ * TODO
*/
- ilo_gpe_init_ve_nosrc(ilo->dev,
- GEN6_VFCOMP_STORE_0,
- GEN6_VFCOMP_STORE_0,
- GEN6_VFCOMP_STORE_0,
- GEN6_VFCOMP_STORE_1_FP,
- &vec->ve->nosrc_cso);
- vec->ve->prepend_nosrc_cso = true;
+ info.gs_entry_size = attr_size *
+ ilo_shader_get_kernel_param(vec->gs, ILO_KERNEL_OUTPUT_COUNT);
+ }
+
+ if (vec->fs) {
+ info.ps_const_data = (bool)
+ ilo_shader_get_kernel_param(vec->fs, ILO_KERNEL_PCB_CBUF0_SIZE);
+ }
+
+ ilo_state_urb_set_info(&vec->urb, dev, &info);
+}
+
+static void
+finalize_viewport(struct ilo_context *ilo)
+{
+ const struct ilo_dev *dev = ilo->dev;
+ struct ilo_state_vector *vec = &ilo->state_vector;
+
+ if (vec->dirty & ILO_DIRTY_VIEWPORT) {
+ ilo_state_viewport_set_params(&vec->viewport.vp,
+ dev, &vec->viewport.params, false);
+ } else if (vec->dirty & ILO_DIRTY_SCISSOR) {
+ ilo_state_viewport_set_params(&vec->viewport.vp,
+ dev, &vec->viewport.params, true);
+ vec->dirty |= ILO_DIRTY_VIEWPORT;
+ }
+}
+
+static bool
+can_enable_gb_test(const struct ilo_rasterizer_state *rasterizer,
+ const struct ilo_viewport_state *viewport,
+ const struct ilo_fb_state *fb)
+{
+ unsigned i;
+
+ /*
+ * There are several reasons that guard band test should be disabled
+ *
+ * - GL wide points (to avoid partially visibie object)
+ * - GL wide or AA lines (to avoid partially visibie object)
+ * - missing 2D clipping
+ */
+ if (rasterizer->state.point_size_per_vertex ||
+ rasterizer->state.point_size > 1.0f ||
+ rasterizer->state.line_width > 1.0f ||
+ rasterizer->state.line_smooth)
+ return false;
+
+ for (i = 0; i < viewport->params.count; i++) {
+ const struct ilo_state_viewport_matrix_info *mat =
+ &viewport->matrices[i];
+ float min_x, max_x, min_y, max_y;
+
+ min_x = -1.0f * fabsf(mat->scale[0]) + mat->translate[0];
+ max_x = 1.0f * fabsf(mat->scale[0]) + mat->translate[0];
+ min_y = -1.0f * fabsf(mat->scale[1]) + mat->translate[1];
+ max_y = 1.0f * fabsf(mat->scale[1]) + mat->translate[1];
+
+ if (min_x > 0.0f || max_x < fb->state.width ||
+ min_y > 0.0f || max_y < fb->state.height)
+ return false;
+ }
+
+ return true;
+}
+
+static void
+finalize_rasterizer(struct ilo_context *ilo)
+{
+ const struct ilo_dev *dev = ilo->dev;
+ struct ilo_state_vector *vec = &ilo->state_vector;
+ struct ilo_rasterizer_state *rasterizer = vec->rasterizer;
+ struct ilo_state_raster_info *info = &vec->rasterizer->info;
+ const bool gb_test_enable =
+ can_enable_gb_test(rasterizer, &vec->viewport, &vec->fb);
+ const bool multisample =
+ (rasterizer->state.multisample && vec->fb.num_samples > 1);
+ const uint8_t barycentric_interps = ilo_shader_get_kernel_param(vec->fs,
+ ILO_KERNEL_FS_BARYCENTRIC_INTERPOLATIONS);
+
+ /* check for non-orthogonal states */
+ if (info->clip.viewport_count != vec->viewport.params.count ||
+ info->clip.gb_test_enable != gb_test_enable ||
+ info->setup.msaa_enable != multisample ||
+ info->setup.line_msaa_enable != multisample ||
+ info->tri.depth_offset_format != vec->fb.depth_offset_format ||
+ info->scan.sample_count != vec->fb.num_samples ||
+ info->scan.sample_mask != vec->sample_mask ||
+ info->scan.barycentric_interps != barycentric_interps ||
+ info->params.any_integer_rt != vec->fb.has_integer_rt ||
+ info->params.hiz_enable != vec->fb.has_hiz) {
+ info->clip.viewport_count = vec->viewport.params.count;
+ info->clip.gb_test_enable = gb_test_enable;
+ info->setup.msaa_enable = multisample;
+ info->setup.line_msaa_enable = multisample;
+ info->tri.depth_offset_format = vec->fb.depth_offset_format;
+ info->scan.sample_count = vec->fb.num_samples;
+ info->scan.sample_mask = vec->sample_mask;
+ info->scan.barycentric_interps = barycentric_interps;
+ info->params.any_integer_rt = vec->fb.has_integer_rt;
+ info->params.hiz_enable = vec->fb.has_hiz;
+
+ ilo_state_raster_set_info(&rasterizer->rs, dev, &rasterizer->info);
+
+ vec->dirty |= ILO_DIRTY_RASTERIZER;
+ }
+}
+
+static bool
+finalize_blend_rt(struct ilo_context *ilo)
+{
+ struct ilo_state_vector *vec = &ilo->state_vector;
+ const struct ilo_fb_state *fb = &vec->fb;
+ struct ilo_blend_state *blend = vec->blend;
+ struct ilo_state_cc_blend_info *info = &vec->blend->info.blend;
+ bool changed = false;
+ unsigned i;
+
+ if (!(vec->dirty & (ILO_DIRTY_FB | ILO_DIRTY_BLEND)))
+ return false;
+
+ /* set up one for dummy RT writes */
+ if (!fb->state.nr_cbufs) {
+ if (info->rt != &blend->dummy_rt) {
+ info->rt = &blend->dummy_rt;
+ info->rt_count = 1;
+ changed = true;
+ }
+
+ return changed;
+ }
+
+ if (info->rt != blend->effective_rt ||
+ info->rt_count != fb->state.nr_cbufs) {
+ info->rt = blend->effective_rt;
+ info->rt_count = fb->state.nr_cbufs;
+ changed = true;
+ }
+
+ for (i = 0; i < fb->state.nr_cbufs; i++) {
+ const struct ilo_fb_blend_caps *caps = &fb->blend_caps[i];
+ struct ilo_state_cc_blend_rt_info *rt = &blend->effective_rt[i];
+ /* ignore logicop when not UNORM */
+ const bool logicop_enable =
+ (blend->rt[i].logicop_enable && caps->is_unorm);
+
+ if (rt->cv_is_unorm != caps->is_unorm ||
+ rt->cv_is_integer != caps->is_integer ||
+ rt->logicop_enable != logicop_enable ||
+ rt->force_dst_alpha_one != caps->force_dst_alpha_one) {
+ rt->cv_is_unorm = caps->is_unorm;
+ rt->cv_is_integer = caps->is_integer;
+ rt->logicop_enable = logicop_enable;
+ rt->force_dst_alpha_one = caps->force_dst_alpha_one;
+
+ changed = true;
+ }
+ }
+
+ return changed;
+}
+
+static void
+finalize_blend(struct ilo_context *ilo)
+{
+ const struct ilo_dev *dev = ilo->dev;
+ struct ilo_state_vector *vec = &ilo->state_vector;
+ struct ilo_blend_state *blend = vec->blend;
+ struct ilo_state_cc_info *info = &blend->info;
+ const bool sample_count_one = (vec->fb.num_samples <= 1);
+ const bool float_source0_alpha =
+ (!vec->fb.state.nr_cbufs || !vec->fb.state.cbufs[0] ||
+ !util_format_is_pure_integer(vec->fb.state.cbufs[0]->format));
+
+ /* check for non-orthogonal states */
+ if (finalize_blend_rt(ilo) ||
+ info->alpha.cv_sample_count_one != sample_count_one ||
+ info->alpha.cv_float_source0_alpha != float_source0_alpha ||
+ info->alpha.test_enable != vec->dsa->alpha_test ||
+ info->alpha.test_func != vec->dsa->alpha_func ||
+ memcmp(&info->stencil, &vec->dsa->stencil, sizeof(info->stencil)) ||
+ memcmp(&info->depth, &vec->dsa->depth, sizeof(info->depth)) ||
+ memcmp(&info->params, &vec->cc_params, sizeof(info->params))) {
+ info->alpha.cv_sample_count_one = sample_count_one;
+ info->alpha.cv_float_source0_alpha = float_source0_alpha;
+ info->alpha.test_enable = vec->dsa->alpha_test;
+ info->alpha.test_func = vec->dsa->alpha_func;
+ info->stencil = vec->dsa->stencil;
+ info->depth = vec->dsa->depth;
+ info->params = vec->cc_params;
+
+ ilo_state_cc_set_info(&blend->cc, dev, info);
+
+ blend->alpha_may_kill = (info->alpha.alpha_to_coverage ||
+ info->alpha.test_enable);
+
+ vec->dirty |= ILO_DIRTY_BLEND;
}
}
@@ -254,10 +793,24 @@ ilo_finalize_3d_states(struct ilo_context *ilo,
{
ilo->state_vector.draw = draw;
+ ilo->state_vector.draw_info.topology = ilo_translate_draw_mode(draw->mode);
+ ilo->state_vector.draw_info.indexed = draw->indexed;
+ ilo->state_vector.draw_info.vertex_count = draw->count;
+ ilo->state_vector.draw_info.vertex_start = draw->start;
+ ilo->state_vector.draw_info.instance_count = draw->instance_count;
+ ilo->state_vector.draw_info.instance_start = draw->start_instance;
+ ilo->state_vector.draw_info.vertex_base = draw->index_bias;
+
+ finalize_blend(ilo);
finalize_shader_states(&ilo->state_vector);
finalize_constant_buffers(ilo);
finalize_index_buffer(ilo);
finalize_vertex_elements(ilo);
+ finalize_vertex_buffers(ilo);
+
+ finalize_urb(ilo);
+ finalize_rasterizer(ilo);
+ finalize_viewport(ilo);
u_upload_unmap(ilo->uploader);
}
@@ -301,12 +854,79 @@ ilo_create_blend_state(struct pipe_context *pipe,
const struct pipe_blend_state *state)
{
const struct ilo_dev *dev = ilo_context(pipe)->dev;
+ struct ilo_state_cc_info *info;
struct ilo_blend_state *blend;
+ int i;
- blend = MALLOC_STRUCT(ilo_blend_state);
+ blend = CALLOC_STRUCT(ilo_blend_state);
assert(blend);
- ilo_gpe_init_blend(dev, state, blend);
+ info = &blend->info;
+
+ info->alpha.cv_float_source0_alpha = true;
+ info->alpha.cv_sample_count_one = true;
+ info->alpha.alpha_to_one = state->alpha_to_one;
+ info->alpha.alpha_to_coverage = state->alpha_to_coverage;
+ info->alpha.test_enable = false;
+ info->alpha.test_func = GEN6_COMPAREFUNCTION_ALWAYS;
+
+ info->stencil.cv_has_buffer = true;
+ info->depth.cv_has_buffer= true;
+
+ info->blend.rt = blend->effective_rt;
+ info->blend.rt_count = 1;
+ info->blend.dither_enable = state->dither;
+
+ for (i = 0; i < ARRAY_SIZE(blend->rt); i++) {
+ const struct pipe_rt_blend_state *rt = &state->rt[i];
+ struct ilo_state_cc_blend_rt_info *rt_info = &blend->rt[i];
+
+ rt_info->cv_has_buffer = true;
+ rt_info->cv_is_unorm = true;
+ rt_info->cv_is_integer = false;
+
+ /* logic op takes precedence over blending */
+ if (state->logicop_enable) {
+ rt_info->logicop_enable = true;
+ rt_info->logicop_func = ilo_translate_logicop(state->logicop_func);
+ } else if (rt->blend_enable) {
+ rt_info->blend_enable = true;
+
+ rt_info->rgb_src = ilo_translate_blend_factor(rt->rgb_src_factor);
+ rt_info->rgb_dst = ilo_translate_blend_factor(rt->rgb_dst_factor);
+ rt_info->rgb_func = ilo_translate_blend_func(rt->rgb_func);
+
+ rt_info->a_src = ilo_translate_blend_factor(rt->alpha_src_factor);
+ rt_info->a_dst = ilo_translate_blend_factor(rt->alpha_dst_factor);
+ rt_info->a_func = ilo_translate_blend_func(rt->alpha_func);
+ }
+
+ if (!(rt->colormask & PIPE_MASK_A))
+ rt_info->argb_write_disables |= (1 << 3);
+ if (!(rt->colormask & PIPE_MASK_R))
+ rt_info->argb_write_disables |= (1 << 2);
+ if (!(rt->colormask & PIPE_MASK_G))
+ rt_info->argb_write_disables |= (1 << 1);
+ if (!(rt->colormask & PIPE_MASK_B))
+ rt_info->argb_write_disables |= (1 << 0);
+
+ if (!state->independent_blend_enable) {
+ for (i = 1; i < ARRAY_SIZE(blend->rt); i++)
+ blend->rt[i] = *rt_info;
+ break;
+ }
+ }
+
+ memcpy(blend->effective_rt, blend->rt, sizeof(blend->rt));
+
+ blend->dummy_rt.argb_write_disables = 0xf;
+
+ if (!ilo_state_cc_init(&blend->cc, dev, &blend->info)) {
+ FREE(blend);
+ return NULL;
+ }
+
+ blend->dual_blend = util_blend_state_is_dual(state, 0);
return blend;
}
@@ -333,11 +953,105 @@ ilo_create_sampler_state(struct pipe_context *pipe,
{
const struct ilo_dev *dev = ilo_context(pipe)->dev;
struct ilo_sampler_cso *sampler;
+ struct ilo_state_sampler_info info;
+ struct ilo_state_sampler_border_info border;
- sampler = MALLOC_STRUCT(ilo_sampler_cso);
+ sampler = CALLOC_STRUCT(ilo_sampler_cso);
assert(sampler);
- ilo_gpe_init_sampler_cso(dev, state, sampler);
+ memset(&info, 0, sizeof(info));
+
+ info.non_normalized = !state->normalized_coords;
+ if (state->normalized_coords) {
+ info.lod_bias = state->lod_bias;
+ info.min_lod = state->min_lod;
+ info.max_lod = state->max_lod;
+
+ info.mip_filter = ilo_translate_mip_filter(state->min_mip_filter);
+ } else {
+ /* work around a bug in util_blitter */
+ info.mip_filter = GEN6_MIPFILTER_NONE;
+ }
+
+ if (state->max_anisotropy) {
+ info.min_filter = GEN6_MAPFILTER_ANISOTROPIC;
+ info.mag_filter = GEN6_MAPFILTER_ANISOTROPIC;
+ } else {
+ info.min_filter = ilo_translate_img_filter(state->min_img_filter);
+ info.mag_filter = ilo_translate_img_filter(state->mag_img_filter);
+ }
+
+ info.max_anisotropy = ilo_translate_max_anisotropy(state->max_anisotropy);
+
+ /* use LOD 0 when no mipmapping (see sampler_set_gen6_SAMPLER_STATE()) */
+ if (info.mip_filter == GEN6_MIPFILTER_NONE && info.min_lod > 0.0f) {
+ info.min_lod = 0.0f;
+ info.mag_filter = info.min_filter;
+ }
+
+ if (state->seamless_cube_map) {
+ if (state->min_img_filter == PIPE_TEX_FILTER_NEAREST ||
+ state->mag_img_filter == PIPE_TEX_FILTER_NEAREST) {
+ info.tcx_ctrl = GEN6_TEXCOORDMODE_CLAMP;
+ info.tcy_ctrl = GEN6_TEXCOORDMODE_CLAMP;
+ info.tcz_ctrl = GEN6_TEXCOORDMODE_CLAMP;
+ } else {
+ info.tcx_ctrl = GEN6_TEXCOORDMODE_CUBE;
+ info.tcy_ctrl = GEN6_TEXCOORDMODE_CUBE;
+ info.tcz_ctrl = GEN6_TEXCOORDMODE_CUBE;
+ }
+ } else {
+ info.tcx_ctrl = ilo_translate_address_wrap(state->wrap_s);
+ info.tcy_ctrl = ilo_translate_address_wrap(state->wrap_t);
+ info.tcz_ctrl = ilo_translate_address_wrap(state->wrap_r);
+
+ if (ilo_dev_gen(dev) < ILO_GEN(8)) {
+ /*
+ * For nearest filtering, PIPE_TEX_WRAP_CLAMP means
+ * PIPE_TEX_WRAP_CLAMP_TO_EDGE; for linear filtering,
+ * PIPE_TEX_WRAP_CLAMP means PIPE_TEX_WRAP_CLAMP_TO_BORDER while
+ * additionally clamping the texture coordinates to [0.0, 1.0].
+ *
+ * PIPE_TEX_WRAP_CLAMP is not supported natively until Gen8. The
+ * clamping has to be taken care of in the shaders. There are two
+ * filters here, but let the minification one has a say.
+ */
+ const bool clamp_is_to_edge =
+ (state->min_img_filter == PIPE_TEX_FILTER_NEAREST);
+
+ if (clamp_is_to_edge) {
+ if (info.tcx_ctrl == GEN8_TEXCOORDMODE_HALF_BORDER)
+ info.tcx_ctrl = GEN6_TEXCOORDMODE_CLAMP;
+ if (info.tcy_ctrl == GEN8_TEXCOORDMODE_HALF_BORDER)
+ info.tcy_ctrl = GEN6_TEXCOORDMODE_CLAMP;
+ if (info.tcz_ctrl == GEN8_TEXCOORDMODE_HALF_BORDER)
+ info.tcz_ctrl = GEN6_TEXCOORDMODE_CLAMP;
+ } else {
+ if (info.tcx_ctrl == GEN8_TEXCOORDMODE_HALF_BORDER) {
+ info.tcx_ctrl = GEN6_TEXCOORDMODE_CLAMP_BORDER;
+ sampler->saturate_s = true;
+ }
+ if (info.tcy_ctrl == GEN8_TEXCOORDMODE_HALF_BORDER) {
+ info.tcy_ctrl = GEN6_TEXCOORDMODE_CLAMP_BORDER;
+ sampler->saturate_t = true;
+ }
+ if (info.tcz_ctrl == GEN8_TEXCOORDMODE_HALF_BORDER) {
+ info.tcz_ctrl = GEN6_TEXCOORDMODE_CLAMP_BORDER;
+ sampler->saturate_r = true;
+ }
+ }
+ }
+ }
+
+ if (state->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE)
+ info.shadow_func = ilo_translate_shadow_func(state->compare_func);
+
+ ilo_state_sampler_init(&sampler->sampler, dev, &info);
+
+ memset(&border, 0, sizeof(border));
+ memcpy(border.rgba.f, state->border_color.f, sizeof(border.rgba.f));
+
+ ilo_state_sampler_border_init(&sampler->border, dev, &border);
return sampler;
}
@@ -403,12 +1117,74 @@ ilo_create_rasterizer_state(struct pipe_context *pipe,
{
const struct ilo_dev *dev = ilo_context(pipe)->dev;
struct ilo_rasterizer_state *rast;
+ struct ilo_state_raster_info *info;
- rast = MALLOC_STRUCT(ilo_rasterizer_state);
+ rast = CALLOC_STRUCT(ilo_rasterizer_state);
assert(rast);
rast->state = *state;
- ilo_gpe_init_rasterizer(dev, state, rast);
+
+ info = &rast->info;
+
+ info->clip.clip_enable = true;
+ info->clip.stats_enable = true;
+ info->clip.viewport_count = 1;
+ info->clip.force_rtaindex_zero = true;
+ info->clip.user_clip_enables = state->clip_plane_enable;
+ info->clip.gb_test_enable = true;
+ info->clip.xy_test_enable = true;
+ info->clip.z_far_enable = state->depth_clip;
+ info->clip.z_near_enable = state->depth_clip;
+ info->clip.z_near_zero = state->clip_halfz;
+
+ info->setup.first_vertex_provoking = state->flatshade_first;
+ info->setup.viewport_transform = true;
+ info->setup.scissor_enable = state->scissor;
+ info->setup.msaa_enable = false;
+ info->setup.line_msaa_enable = false;
+ info->point.aa_enable = state->point_smooth;
+ info->point.programmable_width = state->point_size_per_vertex;
+ info->line.aa_enable = state->line_smooth;
+ info->line.stipple_enable = state->line_stipple_enable;
+ info->line.giq_enable = true;
+ info->line.giq_last_pixel = state->line_last_pixel;
+ info->tri.front_winding = ilo_translate_front_ccw(state->front_ccw);
+ info->tri.cull_mode = ilo_translate_cull_face(state->cull_face);
+ info->tri.fill_mode_front = ilo_translate_poly_mode(state->fill_front);
+ info->tri.fill_mode_back = ilo_translate_poly_mode(state->fill_back);
+ info->tri.depth_offset_format = GEN6_ZFORMAT_D24_UNORM_X8_UINT;
+ info->tri.depth_offset_solid = state->offset_tri;
+ info->tri.depth_offset_wireframe = state->offset_line;
+ info->tri.depth_offset_point = state->offset_point;
+ info->tri.poly_stipple_enable = state->poly_stipple_enable;
+
+ info->scan.stats_enable = true;
+ info->scan.sample_count = 1;
+ info->scan.pixloc =
+ ilo_translate_half_pixel_center(state->half_pixel_center);
+ info->scan.sample_mask = ~0u;
+ info->scan.zw_interp = GEN6_ZW_INTERP_PIXEL;
+ info->scan.barycentric_interps = GEN6_INTERP_PERSPECTIVE_PIXEL;
+ info->scan.earlyz_control = GEN7_EDSC_NORMAL;
+ info->scan.earlyz_op = ILO_STATE_RASTER_EARLYZ_NORMAL;
+ info->scan.earlyz_stencil_clear = false;
+
+ info->params.any_integer_rt = false;
+ info->params.hiz_enable = true;
+ info->params.point_width =
+ (state->point_size == 0.0f) ? 1.0f : state->point_size;
+ info->params.line_width =
+ (state->line_width == 0.0f) ? 1.0f : state->line_width;
+
+ info->params.depth_offset_scale = state->offset_scale;
+ /*
+ * Scale the constant term. The minimum representable value used by the HW
+ * is not large enouch to be the minimum resolvable difference.
+ */
+ info->params.depth_offset_const = state->offset_units * 2.0f;
+ info->params.depth_offset_clamp = state->offset_clamp;
+
+ ilo_state_raster_init(&rast->rs, dev, info);
return rast;
}
@@ -416,10 +1192,20 @@ ilo_create_rasterizer_state(struct pipe_context *pipe,
static void
ilo_bind_rasterizer_state(struct pipe_context *pipe, void *state)
{
+ const struct ilo_dev *dev = ilo_context(pipe)->dev;
struct ilo_state_vector *vec = &ilo_context(pipe)->state_vector;
vec->rasterizer = state;
+ if (vec->rasterizer) {
+ struct ilo_state_line_stipple_info info;
+
+ info.pattern = vec->rasterizer->state.line_stipple_pattern;
+ info.repeat_count = vec->rasterizer->state.line_stipple_factor + 1;
+
+ ilo_state_line_stipple_set_info(&vec->line_stipple, dev, &info);
+ }
+
vec->dirty |= ILO_DIRTY_RASTERIZER;
}
@@ -433,13 +1219,48 @@ static void *
ilo_create_depth_stencil_alpha_state(struct pipe_context *pipe,
const struct pipe_depth_stencil_alpha_state *state)
{
- const struct ilo_dev *dev = ilo_context(pipe)->dev;
struct ilo_dsa_state *dsa;
+ int i;
- dsa = MALLOC_STRUCT(ilo_dsa_state);
+ dsa = CALLOC_STRUCT(ilo_dsa_state);
assert(dsa);
- ilo_gpe_init_dsa(dev, state, dsa);
+ dsa->depth.cv_has_buffer = true;
+ dsa->depth.test_enable = state->depth.enabled;
+ dsa->depth.write_enable = state->depth.writemask;
+ dsa->depth.test_func = ilo_translate_compare_func(state->depth.func);
+
+ dsa->stencil.cv_has_buffer = true;
+ for (i = 0; i < ARRAY_SIZE(state->stencil); i++) {
+ const struct pipe_stencil_state *stencil = &state->stencil[i];
+ struct ilo_state_cc_stencil_op_info *op;
+
+ if (!stencil->enabled)
+ break;
+
+ if (i == 0) {
+ dsa->stencil.test_enable = true;
+ dsa->stencil_front.test_mask = stencil->valuemask;
+ dsa->stencil_front.write_mask = stencil->writemask;
+
+ op = &dsa->stencil.front;
+ } else {
+ dsa->stencil.twosided_enable = true;
+ dsa->stencil_back.test_mask = stencil->valuemask;
+ dsa->stencil_back.write_mask = stencil->writemask;
+
+ op = &dsa->stencil.back;
+ }
+
+ op->test_func = ilo_translate_compare_func(stencil->func);
+ op->fail_op = ilo_translate_stencil_op(stencil->fail_op);
+ op->zfail_op = ilo_translate_stencil_op(stencil->zfail_op);
+ op->zpass_op = ilo_translate_stencil_op(stencil->zpass_op);
+ }
+
+ dsa->alpha_test = state->alpha.enabled;
+ dsa->alpha_ref = state->alpha.ref_value;
+ dsa->alpha_func = ilo_translate_compare_func(state->alpha.func);
return dsa;
}
@@ -450,6 +1271,17 @@ ilo_bind_depth_stencil_alpha_state(struct pipe_context *pipe, void *state)
struct ilo_state_vector *vec = &ilo_context(pipe)->state_vector;
vec->dsa = state;
+ if (vec->dsa) {
+ vec->cc_params.alpha_ref = vec->dsa->alpha_ref;
+ vec->cc_params.stencil_front.test_mask =
+ vec->dsa->stencil_front.test_mask;
+ vec->cc_params.stencil_front.write_mask =
+ vec->dsa->stencil_front.write_mask;
+ vec->cc_params.stencil_back.test_mask =
+ vec->dsa->stencil_back.test_mask;
+ vec->cc_params.stencil_back.write_mask =
+ vec->dsa->stencil_back.write_mask;
+ }
vec->dirty |= ILO_DIRTY_DSA;
}
@@ -575,12 +1407,60 @@ ilo_create_vertex_elements_state(struct pipe_context *pipe,
const struct pipe_vertex_element *elements)
{
const struct ilo_dev *dev = ilo_context(pipe)->dev;
+ struct ilo_state_vf_element_info vf_elements[PIPE_MAX_ATTRIBS];
+ unsigned instance_divisors[PIPE_MAX_ATTRIBS];
+ struct ilo_state_vf_info vf_info;
struct ilo_ve_state *ve;
+ unsigned i;
- ve = MALLOC_STRUCT(ilo_ve_state);
+ ve = CALLOC_STRUCT(ilo_ve_state);
assert(ve);
- ilo_gpe_init_ve(dev, num_elements, elements, ve);
+ for (i = 0; i < num_elements; i++) {
+ const struct pipe_vertex_element *elem = &elements[i];
+ struct ilo_state_vf_element_info *attr = &vf_elements[i];
+ unsigned hw_idx;
+
+ /*
+ * map the pipe vb to the hardware vb, which has a fixed instance
+ * divisor
+ */
+ for (hw_idx = 0; hw_idx < ve->vb_count; hw_idx++) {
+ if (ve->vb_mapping[hw_idx] == elem->vertex_buffer_index &&
+ instance_divisors[hw_idx] == elem->instance_divisor)
+ break;
+ }
+
+ /* create one if there is no matching hardware vb */
+ if (hw_idx >= ve->vb_count) {
+ hw_idx = ve->vb_count++;
+
+ ve->vb_mapping[hw_idx] = elem->vertex_buffer_index;
+ instance_divisors[hw_idx] = elem->instance_divisor;
+ }
+
+ attr->buffer = hw_idx;
+ attr->vertex_offset = elem->src_offset;
+ attr->format = ilo_format_translate_vertex(dev, elem->src_format);
+ attr->format_size = util_format_get_blocksize(elem->src_format);
+ attr->component_count = util_format_get_nr_components(elem->src_format);
+ attr->is_integer = util_format_is_pure_integer(elem->src_format);
+
+ attr->instancing_enable = (elem->instance_divisor != 0);
+ attr->instancing_step_rate = elem->instance_divisor;
+ }
+
+ memset(&vf_info, 0, sizeof(vf_info));
+ vf_info.data = ve->vf_data;
+ vf_info.data_size = sizeof(ve->vf_data);
+ vf_info.elements = vf_elements;
+ vf_info.element_count = num_elements;
+ /* vf_info.params and ve->vf_params are both zeroed */
+
+ if (!ilo_state_vf_init(&ve->vf, dev, &vf_info)) {
+ FREE(ve);
+ return NULL;
+ }
return ve;
}
@@ -609,7 +1489,7 @@ ilo_set_blend_color(struct pipe_context *pipe,
{
struct ilo_state_vector *vec = &ilo_context(pipe)->state_vector;
- vec->blend_color = *state;
+ memcpy(vec->cc_params.blend_rgba, state->color, sizeof(state->color));
vec->dirty |= ILO_DIRTY_BLEND_COLOR;
}
@@ -626,6 +1506,9 @@ ilo_set_stencil_ref(struct pipe_context *pipe,
vec->stencil_ref = *state;
+ vec->cc_params.stencil_front.test_ref = state->ref_value[0];
+ vec->cc_params.stencil_back.test_ref = state->ref_value[1];
+
vec->dirty |= ILO_DIRTY_STENCIL_REF;
}
@@ -675,47 +1558,47 @@ ilo_set_constant_buffer(struct pipe_context *pipe,
pipe_resource_reference(&cso->resource, buf[i].buffer);
+ cso->info.access = ILO_STATE_SURFACE_ACCESS_DP_DATA;
+ cso->info.format = GEN6_FORMAT_R32G32B32A32_FLOAT;
+ cso->info.format_size = 16;
+ cso->info.struct_size = 16;
+ cso->info.readonly = true;
+ cso->info.size = buf[i].buffer_size;
+
if (buf[i].buffer) {
- const enum pipe_format elem_format =
- PIPE_FORMAT_R32G32B32A32_FLOAT;
+ cso->info.buf = ilo_buffer(buf[i].buffer);
+ cso->info.offset = buf[i].buffer_offset;
- ilo_gpe_init_view_surface_for_buffer(dev,
- ilo_buffer(buf[i].buffer),
- buf[i].buffer_offset, buf[i].buffer_size,
- util_format_get_blocksize(elem_format), elem_format,
- false, false, &cso->surface);
+ memset(&cso->surface, 0, sizeof(cso->surface));
+ ilo_state_surface_init_for_buffer(&cso->surface, dev, &cso->info);
+ cso->surface.bo = cso->info.buf->bo;
cso->user_buffer = NULL;
- cso->user_buffer_size = 0;
cbuf->enabled_mask |= 1 << (index + i);
- }
- else if (buf[i].user_buffer) {
- cso->surface.bo = NULL;
-
+ } else if (buf[i].user_buffer) {
+ cso->info.buf = NULL;
/* buffer_offset does not apply for user buffer */
cso->user_buffer = buf[i].user_buffer;
- cso->user_buffer_size = buf[i].buffer_size;
cbuf->enabled_mask |= 1 << (index + i);
- }
- else {
- cso->surface.bo = NULL;
+ } else {
+ cso->info.buf = NULL;
+ cso->info.size = 0;
cso->user_buffer = NULL;
- cso->user_buffer_size = 0;
cbuf->enabled_mask &= ~(1 << (index + i));
}
}
- }
- else {
+ } else {
for (i = 0; i < count; i++) {
struct ilo_cbuf_cso *cso = &cbuf->cso[index + i];
pipe_resource_reference(&cso->resource, NULL);
- cso->surface.bo = NULL;
+
+ cso->info.buf = NULL;
+ cso->info.size = 0;
cso->user_buffer = NULL;
- cso->user_buffer_size = 0;
cbuf->enabled_mask &= ~(1 << (index + i));
}
@@ -725,13 +1608,116 @@ ilo_set_constant_buffer(struct pipe_context *pipe,
}
static void
+fb_set_blend_caps(const struct ilo_dev *dev,
+ enum pipe_format format,
+ struct ilo_fb_blend_caps *caps)
+{
+ const struct util_format_description *desc =
+ util_format_description(format);
+ const int ch = util_format_get_first_non_void_channel(format);
+
+ memset(caps, 0, sizeof(*caps));
+
+ if (format == PIPE_FORMAT_NONE || desc->is_mixed)
+ return;
+
+ caps->is_unorm = (ch >= 0 && desc->channel[ch].normalized &&
+ desc->channel[ch].type == UTIL_FORMAT_TYPE_UNSIGNED &&
+ desc->colorspace == UTIL_FORMAT_COLORSPACE_RGB);
+ caps->is_integer = util_format_is_pure_integer(format);
+
+ /*
+ * From the Sandy Bridge PRM, volume 2 part 1, page 365:
+ *
+ * "Logic Ops are only supported on *_UNORM surfaces (excluding _SRGB
+ * variants), otherwise Logic Ops must be DISABLED."
+ *
+ * According to the classic driver, this is lifted on Gen8+.
+ */
+ caps->can_logicop = (ilo_dev_gen(dev) >= ILO_GEN(8) || caps->is_unorm);
+
+ /* no blending for pure integer formats */
+ caps->can_blend = !caps->is_integer;
+
+ /*
+ * From the Sandy Bridge PRM, volume 2 part 1, page 382:
+ *
+ * "Alpha Test can only be enabled if Pixel Shader outputs a float
+ * alpha value."
+ */
+ caps->can_alpha_test = !caps->is_integer;
+
+ caps->force_dst_alpha_one =
+ (ilo_format_translate_render(dev, format) !=
+ ilo_format_translate_color(dev, format));
+
+ /* sanity check */
+ if (caps->force_dst_alpha_one) {
+ enum pipe_format render_format;
+
+ switch (format) {
+ case PIPE_FORMAT_B8G8R8X8_UNORM:
+ render_format = PIPE_FORMAT_B8G8R8A8_UNORM;
+ break;
+ default:
+ render_format = PIPE_FORMAT_NONE;
+ break;
+ }
+
+ assert(ilo_format_translate_render(dev, format) ==
+ ilo_format_translate_color(dev, render_format));
+ }
+}
+
+static void
ilo_set_framebuffer_state(struct pipe_context *pipe,
const struct pipe_framebuffer_state *state)
{
const struct ilo_dev *dev = ilo_context(pipe)->dev;
struct ilo_state_vector *vec = &ilo_context(pipe)->state_vector;
+ struct ilo_fb_state *fb = &vec->fb;
+ const struct pipe_surface *first_surf = NULL;
+ int i;
+
+ util_copy_framebuffer_state(&fb->state, state);
+
+ fb->has_integer_rt = false;
+ for (i = 0; i < state->nr_cbufs; i++) {
+ if (state->cbufs[i]) {
+ fb_set_blend_caps(dev, state->cbufs[i]->format, &fb->blend_caps[i]);
- ilo_gpe_set_fb(dev, state, &vec->fb);
+ fb->has_integer_rt |= fb->blend_caps[i].is_integer;
+
+ if (!first_surf)
+ first_surf = state->cbufs[i];
+ } else {
+ fb_set_blend_caps(dev, PIPE_FORMAT_NONE, &fb->blend_caps[i]);
+ }
+ }
+
+ if (!first_surf && state->zsbuf)
+ first_surf = state->zsbuf;
+
+ fb->num_samples = (first_surf) ? first_surf->texture->nr_samples : 1;
+ if (!fb->num_samples)
+ fb->num_samples = 1;
+
+ if (state->zsbuf) {
+ const struct ilo_surface_cso *cso =
+ (const struct ilo_surface_cso *) state->zsbuf;
+
+ fb->has_hiz = cso->u.zs.hiz_bo;
+ fb->depth_offset_format =
+ ilo_state_zs_get_depth_format(&cso->u.zs, dev);
+ } else {
+ fb->has_hiz = false;
+ fb->depth_offset_format = GEN6_ZFORMAT_D32_FLOAT;
+ }
+
+ /*
+ * The PRMs list several restrictions when the framebuffer has more than
+ * one surface. It seems they are actually lifted on GEN6+.
+ */
vec->dirty |= ILO_DIRTY_FB;
}
@@ -740,9 +1726,15 @@ static void
ilo_set_polygon_stipple(struct pipe_context *pipe,
const struct pipe_poly_stipple *state)
{
+ const struct ilo_dev *dev = ilo_context(pipe)->dev;
struct ilo_state_vector *vec = &ilo_context(pipe)->state_vector;
+ struct ilo_state_poly_stipple_info info;
+ int i;
+
+ for (i = 0; i < 32; i++)
+ info.pattern[i] = state->stipple[i];
- vec->poly_stipple = *state;
+ ilo_state_poly_stipple_set_info(&vec->poly_stipple, dev, &info);
vec->dirty |= ILO_DIRTY_POLY_STIPPLE;
}
@@ -753,11 +1745,26 @@ ilo_set_scissor_states(struct pipe_context *pipe,
unsigned num_scissors,
const struct pipe_scissor_state *scissors)
{
- const struct ilo_dev *dev = ilo_context(pipe)->dev;
struct ilo_state_vector *vec = &ilo_context(pipe)->state_vector;
+ unsigned i;
+
+ for (i = 0; i < num_scissors; i++) {
+ struct ilo_state_viewport_scissor_info *info =
+ &vec->viewport.scissors[start_slot + i];
- ilo_gpe_set_scissor(dev, start_slot, num_scissors,
- scissors, &vec->scissor);
+ if (scissors[i].minx < scissors[i].maxx &&
+ scissors[i].miny < scissors[i].maxy) {
+ info->min_x = scissors[i].minx;
+ info->min_y = scissors[i].miny;
+ info->max_x = scissors[i].maxx - 1;
+ info->max_y = scissors[i].maxy - 1;
+ } else {
+ info->min_x = 1;
+ info->min_y = 1;
+ info->max_x = 0;
+ info->max_y = 0;
+ }
+ }
vec->dirty |= ILO_DIRTY_SCISSOR;
}
@@ -768,28 +1775,31 @@ ilo_set_viewport_states(struct pipe_context *pipe,
unsigned num_viewports,
const struct pipe_viewport_state *viewports)
{
- const struct ilo_dev *dev = ilo_context(pipe)->dev;
struct ilo_state_vector *vec = &ilo_context(pipe)->state_vector;
if (viewports) {
unsigned i;
for (i = 0; i < num_viewports; i++) {
- ilo_gpe_set_viewport_cso(dev, &viewports[i],
- &vec->viewport.cso[start_slot + i]);
+ struct ilo_state_viewport_matrix_info *info =
+ &vec->viewport.matrices[start_slot + i];
+
+ memcpy(info->scale, viewports[i].scale, sizeof(info->scale));
+ memcpy(info->translate, viewports[i].translate,
+ sizeof(info->translate));
}
- if (vec->viewport.count < start_slot + num_viewports)
- vec->viewport.count = start_slot + num_viewports;
+ if (vec->viewport.params.count < start_slot + num_viewports)
+ vec->viewport.params.count = start_slot + num_viewports;
/* need to save viewport 0 for util_blitter */
if (!start_slot && num_viewports)
vec->viewport.viewport0 = viewports[0];
}
else {
- if (vec->viewport.count <= start_slot + num_viewports &&
- vec->viewport.count > start_slot)
- vec->viewport.count = start_slot;
+ if (vec->viewport.params.count <= start_slot + num_viewports &&
+ vec->viewport.params.count > start_slot)
+ vec->viewport.params.count = start_slot;
}
vec->dirty |= ILO_DIRTY_VIEWPORT;
@@ -905,16 +1915,11 @@ ilo_set_index_buffer(struct pipe_context *pipe,
struct ilo_state_vector *vec = &ilo_context(pipe)->state_vector;
if (state) {
- pipe_resource_reference(&vec->ib.buffer, state->buffer);
- vec->ib.user_buffer = state->user_buffer;
- vec->ib.offset = state->offset;
- vec->ib.index_size = state->index_size;
- }
- else {
- pipe_resource_reference(&vec->ib.buffer, NULL);
- vec->ib.user_buffer = NULL;
- vec->ib.offset = 0;
- vec->ib.index_size = 0;
+ pipe_resource_reference(&vec->ib.state.buffer, state->buffer);
+ vec->ib.state = *state;
+ } else {
+ pipe_resource_reference(&vec->ib.state.buffer, NULL);
+ memset(&vec->ib.state, 0, sizeof(vec->ib.state));
}
vec->dirty |= ILO_DIRTY_IB;
@@ -926,19 +1931,28 @@ ilo_create_stream_output_target(struct pipe_context *pipe,
unsigned buffer_offset,
unsigned buffer_size)
{
- struct pipe_stream_output_target *target;
+ const struct ilo_dev *dev = ilo_context(pipe)->dev;
+ struct ilo_stream_output_target *target;
+ struct ilo_state_sol_buffer_info info;
- target = MALLOC_STRUCT(pipe_stream_output_target);
+ target = CALLOC_STRUCT(ilo_stream_output_target);
assert(target);
- pipe_reference_init(&target->reference, 1);
- target->buffer = NULL;
- pipe_resource_reference(&target->buffer, res);
- target->context = pipe;
- target->buffer_offset = buffer_offset;
- target->buffer_size = buffer_size;
+ pipe_reference_init(&target->base.reference, 1);
+ pipe_resource_reference(&target->base.buffer, res);
+ target->base.context = pipe;
+ target->base.buffer_offset = buffer_offset;
+ target->base.buffer_size = buffer_size;
+
+ memset(&info, 0, sizeof(info));
+ info.buf = ilo_buffer(res);
+ info.offset = buffer_offset;
+ info.size = buffer_size;
- return target;
+ ilo_state_sol_buffer_init(&target->sb, dev, &info);
+ target->sb.bo = info.buf->bo;
+
+ return &target->base;
}
static void
@@ -991,7 +2005,7 @@ ilo_create_sampler_view(struct pipe_context *pipe,
const struct ilo_dev *dev = ilo_context(pipe)->dev;
struct ilo_view_cso *view;
- view = MALLOC_STRUCT(ilo_view_cso);
+ view = CALLOC_STRUCT(ilo_view_cso);
assert(view);
view->base = *templ;
@@ -1001,16 +2015,24 @@ ilo_create_sampler_view(struct pipe_context *pipe,
view->base.context = pipe;
if (res->target == PIPE_BUFFER) {
- const unsigned elem_size = util_format_get_blocksize(templ->format);
- const unsigned first_elem = templ->u.buf.first_element;
- const unsigned num_elems = templ->u.buf.last_element - first_elem + 1;
-
- ilo_gpe_init_view_surface_for_buffer(dev, ilo_buffer(res),
- first_elem * elem_size, num_elems * elem_size,
- elem_size, templ->format, false, false, &view->surface);
- }
- else {
+ struct ilo_state_surface_buffer_info info;
+
+ memset(&info, 0, sizeof(info));
+ info.buf = ilo_buffer(res);
+ info.access = ILO_STATE_SURFACE_ACCESS_SAMPLER;
+ info.format = ilo_format_translate_color(dev, templ->format);
+ info.format_size = util_format_get_blocksize(templ->format);
+ info.struct_size = info.format_size;
+ info.readonly = true;
+ info.offset = templ->u.buf.first_element * info.struct_size;
+ info.size = (templ->u.buf.last_element -
+ templ->u.buf.first_element + 1) * info.struct_size;
+
+ ilo_state_surface_init_for_buffer(&view->surface, dev, &info);
+ view->surface.bo = info.buf->bo;
+ } else {
struct ilo_texture *tex = ilo_texture(res);
+ struct ilo_state_surface_image_info info;
/* warn about degraded performance because of a missing binding flag */
if (tex->image.tiling == GEN6_TILING_NONE &&
@@ -1019,13 +2041,33 @@ ilo_create_sampler_view(struct pipe_context *pipe,
"not created for sampling\n");
}
- ilo_gpe_init_view_surface_for_image(dev, &tex->image,
- tex->base.target, templ->format,
- templ->u.tex.first_level,
- templ->u.tex.last_level - templ->u.tex.first_level + 1,
- templ->u.tex.first_layer,
- templ->u.tex.last_layer - templ->u.tex.first_layer + 1,
- false, &view->surface);
+ memset(&info, 0, sizeof(info));
+ info.img = &tex->image;
+
+ info.access = ILO_STATE_SURFACE_ACCESS_SAMPLER;
+
+ if (templ->format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT &&
+ tex->image.separate_stencil) {
+ info.format = ilo_format_translate_texture(dev,
+ PIPE_FORMAT_Z32_FLOAT);
+ } else {
+ info.format = ilo_format_translate_texture(dev, templ->format);
+ }
+
+ info.is_cube_map = (tex->image.target == PIPE_TEXTURE_CUBE ||
+ tex->image.target == PIPE_TEXTURE_CUBE_ARRAY);
+ info.is_array = util_resource_is_array_texture(&tex->base);
+ info.readonly = true;
+
+ info.level_base = templ->u.tex.first_level;
+ info.level_count = templ->u.tex.last_level -
+ templ->u.tex.first_level + 1;
+ info.slice_base = templ->u.tex.first_layer;
+ info.slice_count = templ->u.tex.last_layer -
+ templ->u.tex.first_layer + 1;
+
+ ilo_state_surface_init_for_image(&view->surface, dev, &info);
+ view->surface.bo = info.img->bo;
}
return &view->base;
@@ -1048,7 +2090,7 @@ ilo_create_surface(struct pipe_context *pipe,
struct ilo_texture *tex = ilo_texture(res);
struct ilo_surface_cso *surf;
- surf = MALLOC_STRUCT(ilo_surface_cso);
+ surf = CALLOC_STRUCT(ilo_surface_cso);
assert(surf);
surf->base = *templ;
@@ -1063,28 +2105,56 @@ ilo_create_surface(struct pipe_context *pipe,
surf->is_rt = !util_format_is_depth_or_stencil(templ->format);
if (surf->is_rt) {
+ struct ilo_state_surface_image_info info;
+
/* relax this? */
assert(tex->base.target != PIPE_BUFFER);
- /*
- * classic i965 sets render_cache_rw for constant buffers and sol
- * surfaces but not render buffers. Why?
- */
- ilo_gpe_init_view_surface_for_image(dev,
- &tex->image, tex->base.target,
- templ->format, templ->u.tex.level, 1,
- templ->u.tex.first_layer,
- templ->u.tex.last_layer - templ->u.tex.first_layer + 1,
- true, &surf->u.rt);
+ memset(&info, 0, sizeof(info));
+ info.img = &tex->image;
+ info.access = ILO_STATE_SURFACE_ACCESS_DP_RENDER;
+ info.format = ilo_format_translate_render(dev, templ->format);
+ info.is_array = util_resource_is_array_texture(&tex->base);
+ info.level_base = templ->u.tex.level;
+ info.level_count = 1;
+ info.slice_base = templ->u.tex.first_layer;
+ info.slice_count = templ->u.tex.last_layer -
+ templ->u.tex.first_layer + 1;
+
+ ilo_state_surface_init_for_image(&surf->u.rt, dev, &info);
+ surf->u.rt.bo = info.img->bo;
} else {
+ struct ilo_state_zs_info info;
+
assert(res->target != PIPE_BUFFER);
- ilo_gpe_init_zs_surface(dev, &tex->image,
- (tex->separate_s8) ? &tex->separate_s8->image : NULL,
- tex->base.target, templ->format,
- templ->u.tex.level, templ->u.tex.first_layer,
- templ->u.tex.last_layer - templ->u.tex.first_layer + 1,
- &surf->u.zs);
+ memset(&info, 0, sizeof(info));
+
+ if (templ->format == PIPE_FORMAT_S8_UINT) {
+ info.s_img = &tex->image;
+ } else {
+ info.z_img = &tex->image;
+ info.s_img = (tex->separate_s8) ? &tex->separate_s8->image : NULL;
+
+ info.hiz_enable =
+ ilo_image_can_enable_aux(&tex->image, templ->u.tex.level);
+ }
+
+ info.level = templ->u.tex.level;
+ info.slice_base = templ->u.tex.first_layer;
+ info.slice_count = templ->u.tex.last_layer -
+ templ->u.tex.first_layer + 1;
+
+ ilo_state_zs_init(&surf->u.zs, dev, &info);
+
+ if (info.z_img) {
+ surf->u.zs.depth_bo = info.z_img->bo;
+ if (info.hiz_enable)
+ surf->u.zs.hiz_bo = info.z_img->aux.bo;
+ }
+
+ if (info.s_img)
+ surf->u.zs.stencil_bo = info.s_img->bo;
}
return &surf->base;
@@ -1294,10 +2364,30 @@ void
ilo_state_vector_init(const struct ilo_dev *dev,
struct ilo_state_vector *vec)
{
- ilo_gpe_set_scissor_null(dev, &vec->scissor);
+ struct ilo_state_urb_info urb_info;
- ilo_gpe_init_zs_surface(dev, NULL, NULL, PIPE_TEXTURE_2D,
- PIPE_FORMAT_NONE, 0, 0, 1, &vec->fb.null_zs);
+ vec->sample_mask = ~0u;
+
+ ilo_state_viewport_init_data_only(&vec->viewport.vp, dev,
+ vec->viewport.vp_data, sizeof(vec->viewport.vp_data));
+ assert(vec->viewport.vp.array_size >= ILO_MAX_VIEWPORTS);
+
+ vec->viewport.params.matrices = vec->viewport.matrices;
+ vec->viewport.params.scissors = vec->viewport.scissors;
+
+ ilo_state_hs_init_disabled(&vec->disabled_hs, dev);
+ ilo_state_ds_init_disabled(&vec->disabled_ds, dev);
+ ilo_state_gs_init_disabled(&vec->disabled_gs, dev);
+
+ ilo_state_sol_buffer_init_disabled(&vec->so.dummy_sb, dev);
+
+ ilo_state_surface_init_for_null(&vec->fb.null_rt, dev);
+ ilo_state_zs_init_for_null(&vec->fb.null_zs, dev);
+
+ ilo_state_sampler_init_disabled(&vec->disabled_sampler, dev);
+
+ memset(&urb_info, 0, sizeof(urb_info));
+ ilo_state_urb_init(&vec->urb, dev, &urb_info);
util_dynarray_init(&vec->global_binding.bindings);
@@ -1314,7 +2404,7 @@ ilo_state_vector_cleanup(struct ilo_state_vector *vec)
pipe_resource_reference(&vec->vb.states[i].buffer, NULL);
}
- pipe_resource_reference(&vec->ib.buffer, NULL);
+ pipe_resource_reference(&vec->ib.state.buffer, NULL);
pipe_resource_reference(&vec->ib.hw_resource, NULL);
for (i = 0; i < vec->so.count; i++)
@@ -1377,7 +2467,7 @@ ilo_state_vector_resource_renamed(struct ilo_state_vector *vec,
}
}
- if (vec->ib.buffer == res) {
+ if (vec->ib.state.buffer == res) {
states |= ILO_DIRTY_IB;
/*
@@ -1392,6 +2482,10 @@ ilo_state_vector_resource_renamed(struct ilo_state_vector *vec,
for (i = 0; i < vec->so.count; i++) {
if (vec->so.states[i]->buffer == res) {
+ struct ilo_stream_output_target *target =
+ (struct ilo_stream_output_target *) vec->so.states[i];
+
+ target->sb.bo = ilo_buffer(res)->bo;
states |= ILO_DIRTY_SO;
break;
}
@@ -1456,7 +2550,8 @@ ilo_state_vector_resource_renamed(struct ilo_state_vector *vec,
struct ilo_surface_cso *cso =
(struct ilo_surface_cso *) vec->fb.state.zsbuf;
- cso->u.rt.bo = bo;
+ cso->u.zs.depth_bo = bo;
+
states |= ILO_DIRTY_FB;
}
}
diff --git a/src/gallium/drivers/ilo/ilo_state.h b/src/gallium/drivers/ilo/ilo_state.h
index fd0a3156ebc..3e6fd8a2554 100644
--- a/src/gallium/drivers/ilo/ilo_state.h
+++ b/src/gallium/drivers/ilo/ilo_state.h
@@ -28,13 +28,38 @@
#ifndef ILO_STATE_H
#define ILO_STATE_H
-#include "core/ilo_state_3d.h"
+#include "core/ilo_builder_3d.h" /* for gen6_3dprimitive_info */
+#include "core/ilo_state_cc.h"
+#include "core/ilo_state_compute.h"
+#include "core/ilo_state_raster.h"
+#include "core/ilo_state_sampler.h"
+#include "core/ilo_state_sbe.h"
+#include "core/ilo_state_shader.h"
+#include "core/ilo_state_sol.h"
+#include "core/ilo_state_surface.h"
+#include "core/ilo_state_urb.h"
+#include "core/ilo_state_vf.h"
+#include "core/ilo_state_viewport.h"
+#include "core/ilo_state_zs.h"
#include "pipe/p_state.h"
#include "util/u_dynarray.h"
#include "ilo_common.h"
/**
+ * \see brw_context.h
+ */
+#define ILO_MAX_DRAW_BUFFERS 8
+#define ILO_MAX_CONST_BUFFERS (1 + 12)
+#define ILO_MAX_SAMPLER_VIEWS 16
+#define ILO_MAX_SAMPLERS 16
+#define ILO_MAX_SO_BINDINGS 64
+#define ILO_MAX_SO_BUFFERS 4
+#define ILO_MAX_VIEWPORTS 1
+
+#define ILO_MAX_SURFACES 256
+
+/**
* States that we track.
*
* XXX Do we want to count each sampler or vertex buffer as a state? If that
@@ -120,6 +145,172 @@ enum ilo_dirty_flags {
};
struct ilo_context;
+struct ilo_shader_state;
+
+struct ilo_ve_state {
+ unsigned vb_mapping[PIPE_MAX_ATTRIBS];
+ unsigned vb_count;
+
+ /* these are not valid until the state is finalized */
+ uint32_t vf_data[PIPE_MAX_ATTRIBS][4];
+ struct ilo_state_vf_params_info vf_params;
+ struct ilo_state_vf vf;
+};
+
+struct ilo_vb_state {
+ struct pipe_vertex_buffer states[PIPE_MAX_ATTRIBS];
+ struct ilo_state_vertex_buffer vb[PIPE_MAX_ATTRIBS];
+ uint32_t enabled_mask;
+};
+
+struct ilo_ib_state {
+ struct pipe_index_buffer state;
+
+ /* these are not valid until the state is finalized */
+ struct pipe_resource *hw_resource;
+ unsigned hw_index_size;
+ struct ilo_state_index_buffer ib;
+};
+
+struct ilo_cbuf_cso {
+ struct pipe_resource *resource;
+ struct ilo_state_surface_buffer_info info;
+ struct ilo_state_surface surface;
+
+ /*
+ * this CSO is not so constant because user buffer needs to be uploaded in
+ * finalize_constant_buffers()
+ */
+ const void *user_buffer;
+};
+
+struct ilo_sampler_cso {
+ struct ilo_state_sampler sampler;
+ struct ilo_state_sampler_border border;
+ bool saturate_s;
+ bool saturate_t;
+ bool saturate_r;
+};
+
+struct ilo_sampler_state {
+ const struct ilo_sampler_cso *cso[ILO_MAX_SAMPLERS];
+};
+
+struct ilo_cbuf_state {
+ struct ilo_cbuf_cso cso[ILO_MAX_CONST_BUFFERS];
+ uint32_t enabled_mask;
+};
+
+struct ilo_resource_state {
+ struct pipe_surface *states[PIPE_MAX_SHADER_RESOURCES];
+ unsigned count;
+};
+
+struct ilo_view_cso {
+ struct pipe_sampler_view base;
+
+ struct ilo_state_surface surface;
+};
+
+struct ilo_view_state {
+ struct pipe_sampler_view *states[ILO_MAX_SAMPLER_VIEWS];
+ unsigned count;
+};
+
+struct ilo_stream_output_target {
+ struct pipe_stream_output_target base;
+
+ struct ilo_state_sol_buffer sb;
+};
+
+struct ilo_so_state {
+ struct pipe_stream_output_target *states[ILO_MAX_SO_BUFFERS];
+ unsigned count;
+ unsigned append_bitmask;
+
+ struct ilo_state_sol_buffer dummy_sb;
+
+ bool enabled;
+};
+
+struct ilo_rasterizer_state {
+ struct pipe_rasterizer_state state;
+
+ /* these are invalid until finalize_rasterizer() */
+ struct ilo_state_raster_info info;
+ struct ilo_state_raster rs;
+};
+
+struct ilo_viewport_state {
+ struct ilo_state_viewport_matrix_info matrices[ILO_MAX_VIEWPORTS];
+ struct ilo_state_viewport_scissor_info scissors[ILO_MAX_VIEWPORTS];
+ struct ilo_state_viewport_params_info params;
+
+ struct pipe_viewport_state viewport0;
+ struct pipe_scissor_state scissor0;
+
+ struct ilo_state_viewport vp;
+ uint32_t vp_data[20 * ILO_MAX_VIEWPORTS];
+};
+
+struct ilo_surface_cso {
+ struct pipe_surface base;
+
+ bool is_rt;
+ union {
+ struct ilo_state_surface rt;
+ struct ilo_state_zs zs;
+ } u;
+};
+
+struct ilo_fb_state {
+ struct pipe_framebuffer_state state;
+
+ struct ilo_state_surface null_rt;
+ struct ilo_state_zs null_zs;
+
+ struct ilo_fb_blend_caps {
+ bool is_unorm;
+ bool is_integer;
+ bool force_dst_alpha_one;
+
+ bool can_logicop;
+ bool can_blend;
+ bool can_alpha_test;
+ } blend_caps[PIPE_MAX_COLOR_BUFS];
+
+ unsigned num_samples;
+
+ bool has_integer_rt;
+ bool has_hiz;
+ enum gen_depth_format depth_offset_format;
+};
+
+struct ilo_dsa_state {
+ struct ilo_state_cc_depth_info depth;
+
+ struct ilo_state_cc_stencil_info stencil;
+ struct {
+ uint8_t test_mask;
+ uint8_t write_mask;
+ } stencil_front, stencil_back;
+
+ bool alpha_test;
+ float alpha_ref;
+ enum gen_compare_function alpha_func;
+};
+
+struct ilo_blend_state {
+ struct ilo_state_cc_blend_rt_info rt[PIPE_MAX_COLOR_BUFS];
+ struct ilo_state_cc_blend_rt_info dummy_rt;
+ bool dual_blend;
+
+ /* these are invalid until finalize_blend() */
+ struct ilo_state_cc_blend_rt_info effective_rt[PIPE_MAX_COLOR_BUFS];
+ struct ilo_state_cc_info info;
+ struct ilo_state_cc cc;
+ bool alpha_may_kill;
+};
struct ilo_global_binding_cso {
struct pipe_resource *resource;
@@ -147,6 +338,7 @@ struct ilo_global_binding {
struct ilo_state_vector {
const struct pipe_draw_info *draw;
+ struct gen6_3dprimitive_info draw_info;
uint32_t dirty;
@@ -157,30 +349,41 @@ struct ilo_state_vector {
struct ilo_shader_state *vs;
struct ilo_shader_state *gs;
+ struct ilo_state_hs disabled_hs;
+ struct ilo_state_ds disabled_ds;
+ struct ilo_state_gs disabled_gs;
+
struct ilo_so_state so;
struct pipe_clip_state clip;
+
struct ilo_viewport_state viewport;
- struct ilo_scissor_state scissor;
- const struct ilo_rasterizer_state *rasterizer;
- struct pipe_poly_stipple poly_stipple;
+ struct ilo_rasterizer_state *rasterizer;
+
+ struct ilo_state_line_stipple line_stipple;
+ struct ilo_state_poly_stipple poly_stipple;
unsigned sample_mask;
struct ilo_shader_state *fs;
- const struct ilo_dsa_state *dsa;
+ struct ilo_state_cc_params_info cc_params;
struct pipe_stencil_ref stencil_ref;
- const struct ilo_blend_state *blend;
- struct pipe_blend_color blend_color;
+ const struct ilo_dsa_state *dsa;
+ struct ilo_blend_state *blend;
+
struct ilo_fb_state fb;
+ struct ilo_state_urb urb;
+
/* shader resources */
struct ilo_sampler_state sampler[PIPE_SHADER_TYPES];
struct ilo_view_state view[PIPE_SHADER_TYPES];
struct ilo_cbuf_state cbuf[PIPE_SHADER_TYPES];
struct ilo_resource_state resource;
+ struct ilo_state_sampler disabled_sampler;
+
/* GPGPU */
struct ilo_shader_state *cs;
struct ilo_resource_state cs_resource;
diff --git a/src/gallium/drivers/ilo/shader/ilo_shader_internal.h b/src/gallium/drivers/ilo/shader/ilo_shader_internal.h
index d2dc2f5b5b4..01c86675202 100644
--- a/src/gallium/drivers/ilo/shader/ilo_shader_internal.h
+++ b/src/gallium/drivers/ilo/shader/ilo_shader_internal.h
@@ -28,6 +28,9 @@
#ifndef ILO_SHADER_INTERNAL_H
#define ILO_SHADER_INTERNAL_H
+#include "core/ilo_state_sbe.h"
+#include "core/ilo_state_sol.h"
+
#include "ilo_common.h"
#include "ilo_state.h"
#include "ilo_shader.h"
@@ -72,13 +75,27 @@ struct ilo_shader_variant {
uint32_t saturate_tex_coords[3];
};
+struct ilo_kernel_routing {
+ bool initialized;
+
+ bool is_point;
+ bool light_twoside;
+ uint32_t sprite_coord_enable;
+ int sprite_coord_mode;
+ int src_len;
+ int src_semantics[PIPE_MAX_SHADER_OUTPUTS];
+ int src_indices[PIPE_MAX_SHADER_OUTPUTS];
+
+ struct ilo_state_sbe sbe;
+};
+
/**
* A compiled shader.
*/
struct ilo_shader {
struct ilo_shader_variant variant;
- struct ilo_shader_cso cso;
+ union ilo_shader_cso cso;
struct {
int semantic_names[PIPE_MAX_SHADER_INPUTS];
@@ -111,7 +128,9 @@ struct ilo_shader {
bool stream_output;
int svbi_post_inc;
- struct pipe_stream_output_info so_info;
+
+ uint32_t sol_data[PIPE_MAX_SO_OUTPUTS][2];
+ struct ilo_state_sol sol;
/* for VS stream output / rasterizer discard */
int gs_offsets[3];
@@ -121,11 +140,8 @@ struct ilo_shader {
void *kernel;
int kernel_size;
- bool routing_initialized;
- int routing_src_semantics[PIPE_MAX_SHADER_OUTPUTS];
- int routing_src_indices[PIPE_MAX_SHADER_OUTPUTS];
- uint32_t routing_sprite_coord_enable;
struct ilo_kernel_routing routing;
+ struct ilo_state_ps_params_info ps_params;
/* what does the push constant buffer consist of? */
struct {
diff --git a/src/gallium/drivers/ilo/shader/toy_tgsi.c b/src/gallium/drivers/ilo/shader/toy_tgsi.c
index 65e47bf3a4a..d38585f1475 100644
--- a/src/gallium/drivers/ilo/shader/toy_tgsi.c
+++ b/src/gallium/drivers/ilo/shader/toy_tgsi.c
@@ -2036,9 +2036,6 @@ parse_instruction(struct toy_tgsi *tgsi,
if (!dst_is_scratch[i])
continue;
- if (tgsi_inst->Instruction.Saturate == TGSI_SAT_MINUS_PLUS_ONE)
- tc_fail(tgsi->tc, "TGSI_SAT_MINUS_PLUS_ONE unhandled");
-
tgsi->tc->templ.saturate = tgsi_inst->Instruction.Saturate;
/* emit indirect store */
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_depth.c b/src/gallium/drivers/llvmpipe/lp_bld_depth.c
index b6c32ffb979..b25e0413750 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_depth.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_depth.c
@@ -975,10 +975,6 @@ lp_build_depth_stencil_test(struct gallivm_state *gallivm,
s_bld.int_vec_type, "");
}
- /* convert scalar stencil refs into vectors */
- stencil_refs[0] = lp_build_broadcast_scalar(&s_bld, stencil_refs[0]);
- stencil_refs[1] = lp_build_broadcast_scalar(&s_bld, stencil_refs[1]);
-
s_pass_mask = lp_build_stencil_test(&s_bld, stencil,
stencil_refs, stencil_vals,
front_facing);
diff --git a/src/gallium/drivers/llvmpipe/lp_public.h b/src/gallium/drivers/llvmpipe/lp_public.h
index ec6b660b48e..27ab1baefbb 100644
--- a/src/gallium/drivers/llvmpipe/lp_public.h
+++ b/src/gallium/drivers/llvmpipe/lp_public.h
@@ -1,10 +1,18 @@
#ifndef LP_PUBLIC_H
#define LP_PUBLIC_H
+#ifdef __cplusplus
+extern "C" {
+#endif
+
struct pipe_screen;
struct sw_winsys;
struct pipe_screen *
llvmpipe_create_screen(struct sw_winsys *winsys);
+#ifdef __cplusplus
+}
+#endif
+
#endif
diff --git a/src/gallium/drivers/llvmpipe/lp_query.c b/src/gallium/drivers/llvmpipe/lp_query.c
index 4f8bab62e7b..fc593670671 100644
--- a/src/gallium/drivers/llvmpipe/lp_query.c
+++ b/src/gallium/drivers/llvmpipe/lp_query.c
@@ -315,7 +315,7 @@ llvmpipe_check_render_cond(struct llvmpipe_context *lp)
b = pipe->get_query_result(pipe, lp->render_cond_query, wait, (void*)&result);
if (b)
- return (!result == lp->render_cond_cond);
+ return ((!result) == lp->render_cond_cond);
else
return TRUE;
}
diff --git a/src/gallium/drivers/llvmpipe/lp_screen.c b/src/gallium/drivers/llvmpipe/lp_screen.c
index f4ba596f358..47f1897c732 100644
--- a/src/gallium/drivers/llvmpipe/lp_screen.c
+++ b/src/gallium/drivers/llvmpipe/lp_screen.c
@@ -165,7 +165,7 @@ llvmpipe_get_param(struct pipe_screen *screen, enum pipe_cap param)
case PIPE_CAP_DEPTH_CLIP_DISABLE:
return 1;
case PIPE_CAP_SHADER_STENCIL_EXPORT:
- return 0;
+ return 1;
case PIPE_CAP_TGSI_INSTANCEID:
case PIPE_CAP_VERTEX_ELEMENT_INSTANCE_DIVISOR:
case PIPE_CAP_START_INSTANCE:
@@ -258,8 +258,9 @@ llvmpipe_get_param(struct pipe_screen *screen, enum pipe_cap param)
case PIPE_CAP_TGSI_VS_WINDOW_SPACE_POSITION:
return 1;
case PIPE_CAP_TGSI_FS_FINE_DERIVATIVE:
- case PIPE_CAP_SAMPLER_VIEW_TARGET:
return 0;
+ case PIPE_CAP_SAMPLER_VIEW_TARGET:
+ return 1;
case PIPE_CAP_FAKE_SW_MSAA:
return 1;
case PIPE_CAP_CONDITIONAL_RENDER_INVERTED:
@@ -290,6 +291,7 @@ llvmpipe_get_param(struct pipe_screen *screen, enum pipe_cap param)
return 1;
case PIPE_CAP_MULTISAMPLE_Z_RESOLVE:
case PIPE_CAP_RESOURCE_FROM_USER_MEMORY:
+ case PIPE_CAP_DEVICE_RESET_STATUS_QUERY:
return 0;
}
/* should only get here on unhandled cases */
diff --git a/src/gallium/drivers/llvmpipe/lp_setup.c b/src/gallium/drivers/llvmpipe/lp_setup.c
index 96cc77c250c..4c8167a9e7d 100644
--- a/src/gallium/drivers/llvmpipe/lp_setup.c
+++ b/src/gallium/drivers/llvmpipe/lp_setup.c
@@ -854,9 +854,10 @@ lp_setup_set_fragment_sampler_views(struct lp_setup_context *setup,
jit_tex->img_stride[j] = lp_tex->img_stride[j];
}
- if (res->target == PIPE_TEXTURE_1D_ARRAY ||
- res->target == PIPE_TEXTURE_2D_ARRAY ||
- res->target == PIPE_TEXTURE_CUBE_ARRAY) {
+ if (view->target == PIPE_TEXTURE_1D_ARRAY ||
+ view->target == PIPE_TEXTURE_2D_ARRAY ||
+ view->target == PIPE_TEXTURE_CUBE ||
+ view->target == PIPE_TEXTURE_CUBE_ARRAY) {
/*
* For array textures, we don't have first_layer, instead
* adjust last_layer (stored as depth) plus the mip level offsets
@@ -868,7 +869,8 @@ lp_setup_set_fragment_sampler_views(struct lp_setup_context *setup,
jit_tex->mip_offsets[j] += view->u.tex.first_layer *
lp_tex->img_stride[j];
}
- if (res->target == PIPE_TEXTURE_CUBE_ARRAY) {
+ if (view->target == PIPE_TEXTURE_CUBE ||
+ view->target == PIPE_TEXTURE_CUBE_ARRAY) {
assert(jit_tex->depth % 6 == 0);
}
assert(view->u.tex.first_layer <= view->u.tex.last_layer);
@@ -1067,10 +1069,13 @@ try_update_scene_state( struct lp_setup_context *setup )
if (setup->dirty & LP_SETUP_NEW_CONSTANTS) {
for (i = 0; i < Elements(setup->constants); ++i) {
struct pipe_resource *buffer = setup->constants[i].current.buffer;
- const unsigned current_size = setup->constants[i].current.buffer_size;
+ const unsigned current_size = MIN2(setup->constants[i].current.buffer_size,
+ LP_MAX_TGSI_CONST_BUFFER_SIZE);
const ubyte *current_data = NULL;
int num_constants;
+ STATIC_ASSERT(DATA_BLOCK_SIZE >= LP_MAX_TGSI_CONST_BUFFER_SIZE);
+
if (buffer) {
/* resource buffer */
current_data = (ubyte *) llvmpipe_resource_data(buffer);
diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c
index 35fe7b20181..b5ce8683f1a 100644
--- a/src/gallium/drivers/llvmpipe/lp_state_fs.c
+++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c
@@ -260,7 +260,8 @@ generate_fs_loop(struct gallivm_state *gallivm,
{
const struct util_format_description *zs_format_desc = NULL;
const struct tgsi_token *tokens = shader->base.tokens;
- LLVMTypeRef vec_type;
+ struct lp_type int_type = lp_int_type(type);
+ LLVMTypeRef vec_type, int_vec_type;
LLVMValueRef mask_ptr, mask_val;
LLVMValueRef consts_ptr, num_consts_ptr;
LLVMValueRef z;
@@ -295,7 +296,7 @@ generate_fs_loop(struct gallivm_state *gallivm,
zs_format_desc = util_format_description(key->zsbuf_format);
assert(zs_format_desc);
- if (!shader->info.base.writes_z) {
+ if (!shader->info.base.writes_z && !shader->info.base.writes_stencil) {
if (key->alpha.enabled ||
key->blend.alpha_to_coverage ||
shader->info.base.uses_kill) {
@@ -329,11 +330,14 @@ generate_fs_loop(struct gallivm_state *gallivm,
depth_mode = 0;
}
+ vec_type = lp_build_vec_type(gallivm, type);
+ int_vec_type = lp_build_vec_type(gallivm, int_type);
stencil_refs[0] = lp_jit_context_stencil_ref_front_value(gallivm, context_ptr);
stencil_refs[1] = lp_jit_context_stencil_ref_back_value(gallivm, context_ptr);
-
- vec_type = lp_build_vec_type(gallivm, type);
+ /* convert scalar stencil refs into vectors */
+ stencil_refs[0] = lp_build_broadcast(gallivm, int_vec_type, stencil_refs[0]);
+ stencil_refs[1] = lp_build_broadcast(gallivm, int_vec_type, stencil_refs[1]);
consts_ptr = lp_jit_context_constants(gallivm, context_ptr);
num_consts_ptr = lp_jit_context_num_constants(gallivm, context_ptr);
@@ -462,7 +466,9 @@ generate_fs_loop(struct gallivm_state *gallivm,
int pos0 = find_output_by_semantic(&shader->info.base,
TGSI_SEMANTIC_POSITION,
0);
-
+ int s_out = find_output_by_semantic(&shader->info.base,
+ TGSI_SEMANTIC_STENCIL,
+ 0);
if (pos0 != -1 && outputs[pos0][2]) {
z = LLVMBuildLoad(builder, outputs[pos0][2], "output.z");
@@ -512,6 +518,15 @@ generate_fs_loop(struct gallivm_state *gallivm,
}
}
+ if (s_out != -1 && outputs[s_out][1]) {
+ /* there's only one value, and spec says to discard additional bits */
+ LLVMValueRef s_max_mask = lp_build_const_int_vec(gallivm, int_type, 255);
+ stencil_refs[0] = LLVMBuildLoad(builder, outputs[s_out][1], "output.s");
+ stencil_refs[0] = LLVMBuildBitCast(builder, stencil_refs[0], int_vec_type, "");
+ stencil_refs[0] = LLVMBuildAnd(builder, stencil_refs[0], s_max_mask, "");
+ stencil_refs[1] = stencil_refs[0];
+ }
+
lp_build_depth_stencil_load_swizzled(gallivm, type,
zs_format_desc, key->resource_1d,
depth_ptr, depth_stride,
diff --git a/src/gallium/drivers/llvmpipe/lp_state_sampler.c b/src/gallium/drivers/llvmpipe/lp_state_sampler.c
index 21da6290574..b205f02fdba 100644
--- a/src/gallium/drivers/llvmpipe/lp_state_sampler.c
+++ b/src/gallium/drivers/llvmpipe/lp_state_sampler.c
@@ -170,6 +170,36 @@ llvmpipe_create_sampler_view(struct pipe_context *pipe,
view->texture = NULL;
pipe_resource_reference(&view->texture, texture);
view->context = pipe;
+
+#ifdef DEBUG
+ /*
+ * This is possibly too lenient, but the primary reason is just
+ * to catch state trackers which forget to initialize this, so
+ * it only catches clearly impossible view targets.
+ */
+ if (view->target != texture->target) {
+ if (view->target == PIPE_TEXTURE_1D)
+ assert(texture->target == PIPE_TEXTURE_1D_ARRAY);
+ else if (view->target == PIPE_TEXTURE_1D_ARRAY)
+ assert(texture->target == PIPE_TEXTURE_1D);
+ else if (view->target == PIPE_TEXTURE_2D)
+ assert(texture->target == PIPE_TEXTURE_2D_ARRAY ||
+ texture->target == PIPE_TEXTURE_CUBE ||
+ texture->target == PIPE_TEXTURE_CUBE_ARRAY);
+ else if (view->target == PIPE_TEXTURE_2D_ARRAY)
+ assert(texture->target == PIPE_TEXTURE_2D ||
+ texture->target == PIPE_TEXTURE_CUBE ||
+ texture->target == PIPE_TEXTURE_CUBE_ARRAY);
+ else if (view->target == PIPE_TEXTURE_CUBE)
+ assert(texture->target == PIPE_TEXTURE_CUBE_ARRAY ||
+ texture->target == PIPE_TEXTURE_2D_ARRAY);
+ else if (view->target == PIPE_TEXTURE_CUBE_ARRAY)
+ assert(texture->target == PIPE_TEXTURE_CUBE ||
+ texture->target == PIPE_TEXTURE_2D_ARRAY);
+ else
+ assert(0);
+ }
+#endif
}
return view;
@@ -245,15 +275,17 @@ prepare_shader_sampling(
row_stride[j] = lp_tex->row_stride[j];
img_stride[j] = lp_tex->img_stride[j];
}
- if (res->target == PIPE_TEXTURE_1D_ARRAY ||
- res->target == PIPE_TEXTURE_2D_ARRAY ||
- res->target == PIPE_TEXTURE_CUBE_ARRAY) {
+ if (view->target == PIPE_TEXTURE_1D_ARRAY ||
+ view->target == PIPE_TEXTURE_2D_ARRAY ||
+ view->target == PIPE_TEXTURE_CUBE ||
+ view->target == PIPE_TEXTURE_CUBE_ARRAY) {
num_layers = view->u.tex.last_layer - view->u.tex.first_layer + 1;
for (j = first_level; j <= last_level; j++) {
mip_offsets[j] += view->u.tex.first_layer *
lp_tex->img_stride[j];
}
- if (res->target == PIPE_TEXTURE_CUBE_ARRAY) {
+ if (view->target == PIPE_TEXTURE_CUBE ||
+ view->target == PIPE_TEXTURE_CUBE_ARRAY) {
assert(num_layers % 6 == 0);
}
assert(view->u.tex.first_layer <= view->u.tex.last_layer);
diff --git a/src/gallium/drivers/llvmpipe/lp_surface.c b/src/gallium/drivers/llvmpipe/lp_surface.c
index 08f968f7f0a..96f8ed82cd8 100644
--- a/src/gallium/drivers/llvmpipe/lp_surface.c
+++ b/src/gallium/drivers/llvmpipe/lp_surface.c
@@ -42,13 +42,6 @@ lp_resource_copy(struct pipe_context *pipe,
struct pipe_resource *src, unsigned src_level,
const struct pipe_box *src_box)
{
- struct llvmpipe_resource *src_tex = llvmpipe_resource(src);
- struct llvmpipe_resource *dst_tex = llvmpipe_resource(dst);
- const enum pipe_format format = src_tex->base.format;
- unsigned width = src_box->width;
- unsigned height = src_box->height;
- unsigned depth = src_box->depth;
-
llvmpipe_flush_resource(pipe,
dst, dst_level,
FALSE, /* read_only */
@@ -63,58 +56,8 @@ lp_resource_copy(struct pipe_context *pipe,
FALSE, /* do_not_block */
"blit src");
- /* Fallback for buffers. */
- if (dst->target == PIPE_BUFFER && src->target == PIPE_BUFFER) {
- util_resource_copy_region(pipe, dst, dst_level, dstx, dsty, dstz,
- src, src_level, src_box);
- return;
- }
-
- /*
- printf("surface copy from %u lvl %u to %u lvl %u: %u,%u,%u to %u,%u,%u %u x %u x %u\n",
- src_tex->id, src_level, dst_tex->id, dst_level,
- src_box->x, src_box->y, src_box->z, dstx, dsty, dstz,
- src_box->width, src_box->height, src_box->depth);
- */
-
- /* make sure display target resources (which cannot have levels/layers) are mapped */
- if (src_tex->dt)
- (void) llvmpipe_resource_map(src, src_level, 0, LP_TEX_USAGE_READ);
- if (dst_tex->dt)
- /*
- * Could set this to WRITE_ALL if complete dst is covered but it gets
- * ignored anyway.
- */
- (void) llvmpipe_resource_map(dst, dst_level, 0, LP_TEX_USAGE_READ_WRITE);
-
-
- /* copy */
- {
- const ubyte *src_linear_ptr
- = llvmpipe_get_texture_image_address(src_tex, src_box->z,
- src_level);
- ubyte *dst_linear_ptr
- = llvmpipe_get_texture_image_address(dst_tex, dstz,
- dst_level);
-
- if (dst_linear_ptr && src_linear_ptr) {
- util_copy_box(dst_linear_ptr, format,
- llvmpipe_resource_stride(&dst_tex->base, dst_level),
- dst_tex->img_stride[dst_level],
- dstx, dsty, 0,
- width, height, depth,
- src_linear_ptr,
- llvmpipe_resource_stride(&src_tex->base, src_level),
- src_tex->img_stride[src_level],
- src_box->x, src_box->y, 0);
- }
- }
-
- if (src_tex->dt)
- llvmpipe_resource_unmap(src, 0, 0);
- if (dst_tex->dt)
- llvmpipe_resource_unmap(dst, 0, 0);
-
+ util_resource_copy_region(pipe, dst, dst_level, dstx, dsty, dstz,
+ src, src_level, src_box);
}
@@ -139,11 +82,6 @@ static void lp_blit(struct pipe_context *pipe,
return; /* done */
}
- if (info.mask & PIPE_MASK_S) {
- debug_printf("llvmpipe: cannot blit stencil, skipping\n");
- info.mask &= ~PIPE_MASK_S;
- }
-
if (!util_blitter_is_blit_supported(lp->blitter, &info)) {
debug_printf("llvmpipe: blit unsupported %s -> %s\n",
util_format_short_name(info.src.resource->format),
diff --git a/src/gallium/drivers/nouveau/Android.mk b/src/gallium/drivers/nouveau/Android.mk
index 420c8e5734c..daf3abd1bb3 100644
--- a/src/gallium/drivers/nouveau/Android.mk
+++ b/src/gallium/drivers/nouveau/Android.mk
@@ -39,6 +39,10 @@ LOCAL_SRC_FILES := \
LOCAL_SHARED_LIBRARIES := libdrm libdrm_nouveau
LOCAL_MODULE := libmesa_pipe_nouveau
+ifeq ($(MESA_LOLLIPOP_BUILD),true)
+LOCAL_C_INCLUDES := external/libcxx/include
+else
include external/stlport/libstlport.mk
+endif
include $(GALLIUM_COMMON_MK)
include $(BUILD_STATIC_LIBRARY)
diff --git a/src/gallium/drivers/nouveau/Makefile.am b/src/gallium/drivers/nouveau/Makefile.am
index 0aefc031210..d05f0a17ab4 100644
--- a/src/gallium/drivers/nouveau/Makefile.am
+++ b/src/gallium/drivers/nouveau/Makefile.am
@@ -48,7 +48,7 @@ nouveau_compiler_SOURCES = \
nouveau_compiler_LDADD = \
libnouveau.la \
- ../../auxiliary/libgallium.la \
+ $(top_builddir)/src/gallium/auxiliary/libgallium.la \
$(top_builddir)/src/util/libmesautil.la \
$(GALLIUM_COMMON_LIB_DEPS)
diff --git a/src/gallium/drivers/nouveau/codegen/lib/gk110.asm b/src/gallium/drivers/nouveau/codegen/lib/gk110.asm
index be17871edd4..b9c05a04b9a 100644
--- a/src/gallium/drivers/nouveau/codegen/lib/gk110.asm
+++ b/src/gallium/drivers/nouveau/codegen/lib/gk110.asm
@@ -11,7 +11,7 @@
// SIZE: 22 / 14 * 8 bytes
//
gk110_div_u32:
- sched 0x28282804280428
+ sched 0x28 0x04 0x28 0x04 0x28 0x28 0x28
bfind u32 $r2 $r1
xor b32 $r2 $r2 0x1f
mov b32 $r3 0x1
@@ -19,7 +19,7 @@ gk110_div_u32:
cvt u32 $r1 neg u32 $r1
mul $r3 u32 $r1 u32 $r2
add $r2 (mul high u32 $r2 u32 $r3) $r2
- sched 0x28282828282828
+ sched 0x28 0x28 0x28 0x28 0x28 0x28 0x28
mul $r3 u32 $r1 u32 $r2
add $r2 (mul high u32 $r2 u32 $r3) $r2
mul $r3 u32 $r1 u32 $r2
@@ -27,7 +27,7 @@ gk110_div_u32:
mul $r3 u32 $r1 u32 $r2
add $r2 (mul high u32 $r2 u32 $r3) $r2
mul $r3 u32 $r1 u32 $r2
- sched 0x042c2828042804
+ sched 0x04 0x28 0x04 0x28 0x28 0x2c 0x04
add $r2 (mul high u32 $r2 u32 $r3) $r2
mov b32 $r3 $r0
mul high $r0 u32 $r0 u32 $r2
@@ -35,7 +35,7 @@ gk110_div_u32:
add $r1 (mul u32 $r1 u32 $r0) $r3
set $p0 0x1 ge u32 $r1 $r2
$p0 sub b32 $r1 $r1 $r2
- sched 0x20282e20042c28
+ sched 0x28 0x2c 0x04 0x20 0x2e 0x28 0x20
$p0 add b32 $r0 $r0 0x1
$p0 set $p0 0x1 ge u32 $r1 $r2
$p0 sub b32 $r1 $r1 $r2
@@ -51,7 +51,7 @@ gk110_div_u32:
gk110_div_s32:
set $p2 0x1 lt s32 $r0 0x0
set $p3 0x1 lt s32 $r1 0x0 xor $p2
- sched 0x28042804282820
+ sched 0x20 0x28 0x28 0x04 0x28 0x04 0x28
cvt s32 $r0 abs s32 $r0
cvt s32 $r1 abs s32 $r1
bfind u32 $r2 $r1
@@ -59,7 +59,7 @@ gk110_div_s32:
mov b32 $r3 0x1
shl b32 $r2 $r3 clamp $r2
cvt u32 $r1 neg u32 $r1
- sched 0x28282828282828
+ sched 0x28 0x28 0x28 0x28 0x28 0x28 0x28
mul $r3 u32 $r1 u32 $r2
add $r2 (mul high u32 $r2 u32 $r3) $r2
mul $r3 u32 $r1 u32 $r2
@@ -67,7 +67,7 @@ gk110_div_s32:
mul $r3 u32 $r1 u32 $r2
add $r2 (mul high u32 $r2 u32 $r3) $r2
mul $r3 u32 $r1 u32 $r2
- sched 0x28280428042828
+ sched 0x28 0x28 0x04 0x28 0x04 0x28 0x28
add $r2 (mul high u32 $r2 u32 $r3) $r2
mul $r3 u32 $r1 u32 $r2
add $r2 (mul high u32 $r2 u32 $r3) $r2
@@ -75,7 +75,7 @@ gk110_div_s32:
mul high $r0 u32 $r0 u32 $r2
cvt u32 $r2 neg u32 $r1
add $r1 (mul u32 $r1 u32 $r0) $r3
- sched 0x2028042c28042c
+ sched 0x2c 0x04 0x28 0x2c 0x04 0x28 0x20
set $p0 0x1 ge u32 $r1 $r2
$p0 sub b32 $r1 $r1 $r2
$p0 add b32 $r0 $r0 0x1
@@ -83,7 +83,7 @@ gk110_div_s32:
$p0 sub b32 $r1 $r1 $r2
$p0 add b32 $r0 $r0 0x1
$p3 cvt s32 $r0 neg s32 $r0
- sched 0x2c200428042e04
+ sched 0x04 0x2e 0x04 0x28 0x04 0x20 0x2c
$p2 cvt s32 $r1 neg s32 $r1
ret
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp
index 6bb9620d5f7..ab8bf2e5504 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp
@@ -967,8 +967,8 @@ CodeEmitterGK110::emitSET(const CmpInstruction *i)
code[0] = (code[0] & ~0xfc) | ((code[0] << 3) & 0xe0);
if (i->defExists(1))
defId(i->def(1), 2);
- else
- code[0] |= 0x1c;
+ else
+ code[0] |= 0x1c;
} else {
switch (i->sType) {
case TYPE_F32: op2 = 0x000; op1 = 0x800; break;
@@ -990,8 +990,12 @@ CodeEmitterGK110::emitSET(const CmpInstruction *i)
}
FTZ_(3a);
- if (i->dType == TYPE_F32)
- code[1] |= 1 << 23;
+ if (i->dType == TYPE_F32) {
+ if (isFloatType(i->sType))
+ code[1] |= 1 << 23;
+ else
+ code[1] |= 1 << 15;
+ }
}
if (i->sType == TYPE_S32)
code[1] |= 1 << 19;
@@ -1316,6 +1320,8 @@ CodeEmitterGK110::emitFlow(const Instruction *i)
} else
if (mask & 2) {
int32_t pcRel = f->target.bb->binPos - (codeSize + 8);
+ if (writeIssueDelays && !(f->target.bb->binPos & 0x3f))
+ pcRel += 8;
// currently we don't want absolute branches
assert(!f->absolute);
code[0] |= (pcRel & 0x1ff) << 23;
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp
index 22db368b371..399a6f1db13 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp
@@ -509,10 +509,13 @@ CodeEmitterGM107::emitBRA()
emitCond5(0x00, CC_TR);
if (!insn->srcExists(0) || insn->src(0).getFile() != FILE_MEMORY_CONST) {
+ int32_t pos = insn->target.bb->binPos;
+ if (writeIssueDelays && !(pos & 0x1f))
+ pos += 8;
if (!insn->absolute)
- emitField(0x14, 24, insn->target.bb->binPos - (codeSize + 8));
+ emitField(0x14, 24, pos - (codeSize + 8));
else
- emitField(0x14, 32, insn->target.bb->binPos);
+ emitField(0x14, 32, pos);
} else {
emitCBUF (0x24, gpr, 20, 16, 0, insn->src(0));
emitField(0x05, 1, 1);
@@ -1827,6 +1830,7 @@ CodeEmitterGM107::emitISET()
emitCond3(0x31, insn->setCond);
emitField(0x30, 1, isSignedType(insn->sType));
emitCC (0x2f);
+ emitField(0x2c, 1, insn->dType == TYPE_F32);
emitX (0x2b);
emitGPR (0x08, insn->src(0));
emitGPR (0x00, insn->def(0));
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp
index d9aed34a0ce..472e3a84119 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp
@@ -1078,8 +1078,14 @@ CodeEmitterNVC0::emitSET(const CmpInstruction *i)
if (!isFloatType(i->sType))
lo = 0x3;
- if (isFloatType(i->dType) || isSignedIntType(i->sType))
+ if (isSignedIntType(i->sType))
lo |= 0x20;
+ if (isFloatType(i->dType)) {
+ if (isFloatType(i->sType))
+ lo |= 0x20;
+ else
+ lo |= 0x80;
+ }
switch (i->op) {
case OP_SET_AND: hi = 0x10000000; break;
@@ -1406,6 +1412,8 @@ CodeEmitterNVC0::emitFlow(const Instruction *i)
} else
if (mask & 2) {
int32_t pcRel = f->target.bb->binPos - (codeSize + 8);
+ if (writeIssueDelays && !(f->target.bb->binPos & 0x3f))
+ pcRel += 8;
// currently we don't want absolute branches
assert(!f->absolute);
code[0] |= (pcRel & 0x3f) << 26;
@@ -2712,7 +2720,6 @@ private:
RegScores *score; // for current BB
std::vector<RegScores> scoreBoards;
- int cycle;
int prevData;
operation prevOp;
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
index 254629f907a..ecd115f9807 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
@@ -1316,7 +1316,7 @@ private:
};
private:
- const struct tgsi::Source *code;
+ const tgsi::Source *code;
const struct nv50_ir_prog_info *info;
struct {
@@ -1356,18 +1356,20 @@ Converter::srcToSym(tgsi::Instruction::SrcRegister src, int c)
{
const int swz = src.getSwizzle(c);
+ /* TODO: Use Array ID when it's available for the index */
return makeSym(src.getFile(),
src.is2D() ? src.getIndex(1) : 0,
- src.isIndirect(0) ? -1 : src.getIndex(0), swz,
+ src.getIndex(0), swz,
src.getIndex(0) * 16 + swz * 4);
}
Symbol *
Converter::dstToSym(tgsi::Instruction::DstRegister dst, int c)
{
+ /* TODO: Use Array ID when it's available for the index */
return makeSym(dst.getFile(),
dst.is2D() ? dst.getIndex(1) : 0,
- dst.isIndirect(0) ? -1 : dst.getIndex(0), c,
+ dst.getIndex(0), c,
dst.getIndex(0) * 16 + c * 4);
}
@@ -1604,19 +1606,8 @@ Converter::storeDst(int d, int c, Value *val)
{
const tgsi::Instruction::DstRegister dst = tgsi.getDst(d);
- switch (tgsi.getSaturate()) {
- case TGSI_SAT_NONE:
- break;
- case TGSI_SAT_ZERO_ONE:
+ if (tgsi.getSaturate()) {
mkOp1(OP_SAT, dstTy, val, val);
- break;
- case TGSI_SAT_MINUS_PLUS_ONE:
- mkOp2(OP_MAX, dstTy, val, val, mkImm(-1.0f));
- mkOp2(OP_MIN, dstTy, val, val, mkImm(+1.0f));
- break;
- default:
- assert(!"invalid saturation mode");
- break;
}
Value *ptr = NULL;
@@ -1955,13 +1946,13 @@ isResourceSpecial(const int r)
}
static inline bool
-isResourceRaw(const struct tgsi::Source *code, const int r)
+isResourceRaw(const tgsi::Source *code, const int r)
{
return isResourceSpecial(r) || code->resources[r].raw;
}
static inline nv50_ir::TexTarget
-getResourceTarget(const struct tgsi::Source *code, int r)
+getResourceTarget(const tgsi::Source *code, int r)
{
if (isResourceSpecial(r))
return nv50_ir::TEX_TARGET_BUFFER;
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_gm107.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_gm107.cpp
index 64989ac8846..596ac95d489 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_gm107.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_gm107.cpp
@@ -240,6 +240,7 @@ GM107LoweringPass::visit(Instruction *i)
Value *ptr = bld.mkOp2v(OP_SHL, TYPE_U32, bld.getSSA(),
i->getIndirect(0, 0), bld.mkImm(4));
i->setIndirect(0, 0, ptr);
+ i->op = OP_VFETCH;
} else {
i->op = OP_VFETCH;
assert(prog->getType() != Program::TYPE_FRAGMENT); // INTERP
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nv50.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nv50.cpp
index 1ad086094dc..2c7f7e326b2 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nv50.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nv50.cpp
@@ -887,7 +887,7 @@ NV50LoweringPreSSA::handleTXL(TexInstruction *i)
}
}
bld.setPosition(joinBB, false);
- bld.mkOp(OP_JOIN, TYPE_NONE, NULL);
+ bld.mkFlow(OP_JOIN, NULL, CC_ALWAYS, NULL)->fixed = 1;
return true;
}
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
index b61f3c49bb9..7a5d1ce0299 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
@@ -100,8 +100,7 @@ void
NVC0LegalizeSSA::handleFTZ(Instruction *i)
{
// Only want to flush float inputs
- if (i->sType != TYPE_F32)
- return;
+ assert(i->sType == TYPE_F32);
// If we're already flushing denorms (and NaN's) to zero, no need for this.
if (i->dnz)
@@ -129,7 +128,7 @@ NVC0LegalizeSSA::visit(BasicBlock *bb)
Instruction *next;
for (Instruction *i = bb->getEntry(); i; i = next) {
next = i->next;
- if (i->dType == TYPE_F32) {
+ if (i->sType == TYPE_F32) {
if (prog->getType() != Program::TYPE_COMPUTE)
handleFTZ(i);
continue;
@@ -169,7 +168,7 @@ NVC0LegalizePostRA::insnDominatedBy(const Instruction *later,
void
NVC0LegalizePostRA::addTexUse(std::list<TexUse> &uses,
- Instruction *usei, const Instruction *insn)
+ Instruction *usei, const Instruction *texi)
{
bool add = true;
for (std::list<TexUse>::iterator it = uses.begin();
@@ -184,7 +183,7 @@ NVC0LegalizePostRA::addTexUse(std::list<TexUse> &uses,
++it;
}
if (add)
- uses.push_back(TexUse(usei, insn));
+ uses.push_back(TexUse(usei, texi));
}
void
@@ -196,7 +195,8 @@ NVC0LegalizePostRA::findOverwritingDefs(const Instruction *texi,
while (insn->op == OP_MOV && insn->getDef(0)->equals(insn->getSrc(0)))
insn = insn->getSrc(0)->getUniqueInsn();
- if (!insn->bb->reachableBy(texi->bb, term))
+ // NOTE: the tex itself is, of course, not an overwriting definition
+ if (insn == texi || !insn->bb->reachableBy(texi->bb, term))
return;
switch (insn->op) {
@@ -244,7 +244,12 @@ NVC0LegalizePostRA::findFirstUses(
visited.insert(usei);
if (usei->op == OP_PHI || usei->op == OP_UNION) {
- // need a barrier before WAW cases
+ // need a barrier before WAW cases, like:
+ // %r0 = tex
+ // if ...
+ // texbar <- is required or tex might replace x again
+ // %r1 = x <- overwriting def
+ // %r2 = phi %r0, %r1
for (int s = 0; usei->srcExists(s); ++s) {
Instruction *defi = usei->getSrc(s)->getUniqueInsn();
if (defi && &usei->src(s) != *u)
@@ -263,7 +268,7 @@ NVC0LegalizePostRA::findFirstUses(
usei->subOp != NV50_IR_SUBOP_MOV_FINAL) {
findFirstUses(texi, usei, uses, visited);
} else {
- addTexUse(uses, usei, insn);
+ addTexUse(uses, usei, texi);
}
}
}
@@ -1751,6 +1756,7 @@ NVC0LoweringPass::visit(Instruction *i)
Value *ptr = bld.mkOp2v(OP_SHL, TYPE_U32, bld.getSSA(),
i->getIndirect(0, 0), bld.mkImm(4));
i->setIndirect(0, 0, ptr);
+ i->op = OP_VFETCH;
} else {
i->op = OP_VFETCH;
assert(prog->getType() != Program::TYPE_FRAGMENT); // INTERP
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
index 14446b6b53f..ae739eeda83 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
@@ -236,6 +236,9 @@ LoadPropagation::visit(BasicBlock *bb)
if (i->op == OP_CALL) // calls have args as sources, they must be in regs
continue;
+ if (i->op == OP_PFETCH) // pfetch expects arg1 to be a reg
+ continue;
+
if (i->srcExists(1))
checkSwapSrc01(i);
@@ -278,7 +281,6 @@ private:
void tryCollapseChainedMULs(Instruction *, const int s, ImmediateValue&);
- // TGSI 'true' is converted to -1 by F2I(NEG(SET)), track back to SET
CmpInstruction *findOriginForTestWithZero(Value *);
unsigned int foldCount;
@@ -337,25 +339,33 @@ ConstantFolding::findOriginForTestWithZero(Value *value)
return NULL;
Instruction *insn = value->getInsn();
- while (insn && insn->op != OP_SET) {
- Instruction *next = NULL;
- switch (insn->op) {
- case OP_NEG:
- case OP_ABS:
- case OP_CVT:
- next = insn->getSrc(0)->getInsn();
- if (insn->sType != next->dType)
+ if (insn->asCmp() && insn->op != OP_SLCT)
+ return insn->asCmp();
+
+ /* Sometimes mov's will sneak in as a result of other folding. This gets
+ * cleaned up later.
+ */
+ if (insn->op == OP_MOV)
+ return findOriginForTestWithZero(insn->getSrc(0));
+
+ /* Deal with AND 1.0 here since nv50 can't fold into boolean float */
+ if (insn->op == OP_AND) {
+ int s = 0;
+ ImmediateValue imm;
+ if (!insn->src(s).getImmediate(imm)) {
+ s = 1;
+ if (!insn->src(s).getImmediate(imm))
return NULL;
- break;
- case OP_MOV:
- next = insn->getSrc(0)->getInsn();
- break;
- default:
- return NULL;
}
- insn = next;
+ if (imm.reg.data.f32 != 1.0f)
+ return NULL;
+ /* TODO: Come up with a way to handle the condition being inverted */
+ if (insn->src(!s).mod != Modifier(0))
+ return NULL;
+ return findOriginForTestWithZero(insn->getSrc(!s));
}
- return insn ? insn->asCmp() : NULL;
+
+ return NULL;
}
void
@@ -574,6 +584,11 @@ ConstantFolding::expr(Instruction *i,
case OP_POPCNT:
res.data.u32 = util_bitcount(a->data.u32 & b->data.u32);
break;
+ case OP_PFETCH:
+ // The two arguments to pfetch are logically added together. Normally
+ // the second argument will not be constant, but that can happen.
+ res.data.u32 = a->data.u32 + b->data.u32;
+ break;
default:
return;
}
@@ -588,7 +603,9 @@ ConstantFolding::expr(Instruction *i,
i->getSrc(0)->reg.data = res.data;
- if (i->op == OP_MAD || i->op == OP_FMA) {
+ switch (i->op) {
+ case OP_MAD:
+ case OP_FMA: {
i->op = OP_ADD;
i->setSrc(1, i->getSrc(0));
@@ -603,8 +620,14 @@ ConstantFolding::expr(Instruction *i,
bld.setPosition(i, false);
i->setSrc(1, bld.loadImm(NULL, res.data.u32));
}
- } else {
+ break;
+ }
+ case OP_PFETCH:
+ // Leave PFETCH alone... we just folded its 2 args into 1.
+ break;
+ default:
i->op = i->saturate ? OP_SAT : OP_MOV; /* SAT handled by unary() */
+ break;
}
i->subOp = 0;
}
@@ -946,33 +969,82 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s)
case OP_SET: // TODO: SET_AND,OR,XOR
{
+ /* This optimizes the case where the output of a set is being compared
+ * to zero. Since the set can only produce 0/-1 (int) or 0/1 (float), we
+ * can be a lot cleverer in our comparison.
+ */
CmpInstruction *si = findOriginForTestWithZero(i->getSrc(t));
CondCode cc, ccZ;
- if (i->src(t).mod != Modifier(0))
- return;
- if (imm0.reg.data.u32 != 0 || !si || si->op != OP_SET)
+ if (imm0.reg.data.u32 != 0 || !si)
return;
cc = si->setCond;
ccZ = (CondCode)((unsigned int)i->asCmp()->setCond & ~CC_U);
+ // We do everything assuming var (cmp) 0, reverse the condition if 0 is
+ // first.
if (s == 0)
ccZ = reverseCondCode(ccZ);
+ // If there is a negative modifier, we need to undo that, by flipping
+ // the comparison to zero.
+ if (i->src(t).mod.neg())
+ ccZ = reverseCondCode(ccZ);
+ // If this is a signed comparison, we expect the input to be a regular
+ // boolean, i.e. 0/-1. However the rest of the logic assumes that true
+ // is positive, so just flip the sign.
+ if (i->sType == TYPE_S32) {
+ assert(!isFloatType(si->dType));
+ ccZ = reverseCondCode(ccZ);
+ }
switch (ccZ) {
- case CC_LT: cc = CC_FL; break;
- case CC_GE: cc = CC_TR; break;
- case CC_EQ: cc = inverseCondCode(cc); break;
- case CC_LE: cc = inverseCondCode(cc); break;
- case CC_GT: break;
- case CC_NE: break;
+ case CC_LT: cc = CC_FL; break; // bool < 0 -- this is never true
+ case CC_GE: cc = CC_TR; break; // bool >= 0 -- this is always true
+ case CC_EQ: cc = inverseCondCode(cc); break; // bool == 0 -- !bool
+ case CC_LE: cc = inverseCondCode(cc); break; // bool <= 0 -- !bool
+ case CC_GT: break; // bool > 0 -- bool
+ case CC_NE: break; // bool != 0 -- bool
default:
return;
}
+
+ // Update the condition of this SET to be identical to the origin set,
+ // but with the updated condition code. The original SET should get
+ // DCE'd, ideally.
+ i->op = si->op;
i->asCmp()->setCond = cc;
i->setSrc(0, si->src(0));
i->setSrc(1, si->src(1));
+ if (si->srcExists(2))
+ i->setSrc(2, si->src(2));
i->sType = si->sType;
}
break;
+ case OP_AND:
+ {
+ CmpInstruction *cmp = i->getSrc(t)->getInsn()->asCmp();
+ if (!cmp || cmp->op == OP_SLCT || cmp->getDef(0)->refCount() > 1)
+ return;
+ if (!prog->getTarget()->isOpSupported(cmp->op, TYPE_F32))
+ return;
+ if (imm0.reg.data.f32 != 1.0)
+ return;
+ if (i->getSrc(t)->getInsn()->dType != TYPE_U32)
+ return;
+
+ i->getSrc(t)->getInsn()->dType = TYPE_F32;
+ if (i->src(t).mod != Modifier(0)) {
+ assert(i->src(t).mod == Modifier(NV50_IR_MOD_NOT));
+ i->src(t).mod = Modifier(0);
+ cmp->setCond = inverseCondCode(cmp->setCond);
+ }
+ i->op = OP_MOV;
+ i->setSrc(s, NULL);
+ if (t) {
+ i->setSrc(0, i->getSrc(t));
+ i->setSrc(t, NULL);
+ }
+ }
+ break;
+
case OP_SHL:
{
if (s != 1 || i->src(0).mod != Modifier(0))
@@ -2216,7 +2288,7 @@ FlatteningPass::visit(BasicBlock *bb)
insn->op != OP_LINTERP && // probably just nve4
insn->op != OP_PINTERP && // probably just nve4
((insn->op != OP_LOAD && insn->op != OP_STORE) ||
- typeSizeof(insn->dType) <= 4) &&
+ (typeSizeof(insn->dType) <= 4 && !insn->src(0).isIndirect(0))) &&
!insn->isNop()) {
insn->join = 1;
bb->remove(bb->getExit());
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp
index 178a1671c3f..ca545a6024a 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp
@@ -84,7 +84,7 @@ static const struct opProperties _initProps[] =
// neg abs not sat c[] s[], a[], imm
{ OP_ADD, 0x3, 0x0, 0x0, 0x8, 0x2, 0x1, 0x1, 0x2 },
{ OP_SUB, 0x3, 0x0, 0x0, 0x8, 0x2, 0x1, 0x1, 0x2 },
- { OP_MUL, 0x3, 0x0, 0x0, 0x8, 0x2, 0x1, 0x1, 0x2 },
+ { OP_MUL, 0x3, 0x0, 0x0, 0x0, 0x2, 0x1, 0x1, 0x2 },
{ OP_MAX, 0x3, 0x3, 0x0, 0x0, 0x2, 0x1, 0x1, 0x0 },
{ OP_MIN, 0x3, 0x3, 0x0, 0x0, 0x2, 0x1, 0x1, 0x0 },
{ OP_MAD, 0x7, 0x0, 0x0, 0x8, 0x6, 0x1, 0x1, 0x0 }, // special constraint
@@ -188,6 +188,9 @@ void TargetNV50::initOpInfo()
if (prop->mSat & 8)
opInfo[prop->op].dstMods = NV50_IR_MOD_SAT;
}
+
+ if (chipset >= 0xa0)
+ opInfo[OP_MUL].dstMods = NV50_IR_MOD_SAT;
}
unsigned int
@@ -413,6 +416,8 @@ TargetNV50::isOpSupported(operation op, DataType ty) const
return false;
case OP_SAD:
return ty == TYPE_S32;
+ case OP_SET:
+ return !isFloatType(ty);
default:
return true;
}
diff --git a/src/gallium/drivers/nouveau/nouveau_buffer.c b/src/gallium/drivers/nouveau/nouveau_buffer.c
index 32fa65c8a51..09cdbb53ecb 100644
--- a/src/gallium/drivers/nouveau/nouveau_buffer.c
+++ b/src/gallium/drivers/nouveau/nouveau_buffer.c
@@ -658,13 +658,13 @@ nouveau_buffer_create(struct pipe_screen *pscreen,
switch (buffer->base.usage) {
case PIPE_USAGE_DEFAULT:
case PIPE_USAGE_IMMUTABLE:
- buffer->domain = NOUVEAU_BO_VRAM;
+ buffer->domain = NV_VRAM_DOMAIN(screen);
break;
case PIPE_USAGE_DYNAMIC:
/* For most apps, we'd have to do staging transfers to avoid sync
* with this usage, and GART -> GART copies would be suboptimal.
*/
- buffer->domain = NOUVEAU_BO_VRAM;
+ buffer->domain = NV_VRAM_DOMAIN(screen);
break;
case PIPE_USAGE_STAGING:
case PIPE_USAGE_STREAM:
@@ -676,7 +676,7 @@ nouveau_buffer_create(struct pipe_screen *pscreen,
}
} else {
if (buffer->base.bind & screen->vidmem_bindings)
- buffer->domain = NOUVEAU_BO_VRAM;
+ buffer->domain = NV_VRAM_DOMAIN(screen);
else
if (buffer->base.bind & screen->sysmem_bindings)
buffer->domain = NOUVEAU_BO_GART;
diff --git a/src/gallium/drivers/nouveau/nouveau_heap.h b/src/gallium/drivers/nouveau/nouveau_heap.h
index d0b22844ad0..a3d64a65623 100644
--- a/src/gallium/drivers/nouveau/nouveau_heap.h
+++ b/src/gallium/drivers/nouveau/nouveau_heap.h
@@ -23,6 +23,26 @@
#ifndef __NOUVEAU_HEAP_H__
#define __NOUVEAU_HEAP_H__
+/* This datastructure represents a memory allocation heap. Fundamentally, this
+ * is a doubly-linked list with a few properties, and a usage convention.
+ *
+ * On initial allocation, there is a single node with the full size that's
+ * marked as not in-use. As allocations are made, blocks are taken off the end
+ * of that first node, and inserted right after it. If the first node doesn't
+ * have enough free space, we look for free space down in the rest of the
+ * list. This can happen if an allocation is made and then freed.
+ *
+ * The first node will remain with in_use == 0 even if the whole heap is
+ * exhausted. Another invariant is that there will never be two sequential
+ * in_use == 0 nodes. If a node is freed and it has one (or both) adjacent
+ * free nodes, they are merged into one, and the relevant heap entries are
+ * freed.
+ *
+ * The pattern to free the whole heap is to start with the first node and then
+ * just free the "next" node, until there is no next node. This should assure
+ * that at the end the first (and only) node is not in use and contains the
+ * full size of the heap.
+ */
struct nouveau_heap {
struct nouveau_heap *prev;
struct nouveau_heap *next;
diff --git a/src/gallium/drivers/nouveau/nouveau_screen.c b/src/gallium/drivers/nouveau/nouveau_screen.c
index b4f1413fd8b..c6e5074db19 100644
--- a/src/gallium/drivers/nouveau/nouveau_screen.c
+++ b/src/gallium/drivers/nouveau/nouveau_screen.c
@@ -164,6 +164,16 @@ nouveau_screen_init(struct nouveau_screen *screen, struct nouveau_device *dev)
size = sizeof(nvc0_data);
}
+ /*
+ * Set default VRAM domain if not overridden
+ */
+ if (!screen->vram_domain) {
+ if (dev->vram_size > 0)
+ screen->vram_domain = NOUVEAU_BO_VRAM;
+ else
+ screen->vram_domain = NOUVEAU_BO_GART;
+ }
+
ret = nouveau_object_new(&dev->object, 0, NOUVEAU_FIFO_CHANNEL_CLASS,
data, size, &screen->channel);
if (ret)
diff --git a/src/gallium/drivers/nouveau/nouveau_screen.h b/src/gallium/drivers/nouveau/nouveau_screen.h
index cf06f7e88aa..30041b271c9 100644
--- a/src/gallium/drivers/nouveau/nouveau_screen.h
+++ b/src/gallium/drivers/nouveau/nouveau_screen.h
@@ -51,6 +51,8 @@ struct nouveau_screen {
boolean hint_buf_keep_sysmem_copy;
+ unsigned vram_domain;
+
struct {
unsigned profiles_checked;
unsigned profiles_present;
@@ -94,6 +96,8 @@ struct nouveau_screen {
#endif
};
+#define NV_VRAM_DOMAIN(screen) ((screen)->vram_domain)
+
#ifdef NOUVEAU_ENABLE_DRIVER_STATISTICS
# define NOUVEAU_DRV_STAT(s, n, v) do { \
(s)->stats.named.n += (v); \
diff --git a/src/gallium/drivers/nouveau/nv30/nv30_clear.c b/src/gallium/drivers/nouveau/nv30/nv30_clear.c
index 1ab8929cc38..83fd1fa38dd 100644
--- a/src/gallium/drivers/nouveau/nv30/nv30_clear.c
+++ b/src/gallium/drivers/nouveau/nv30/nv30_clear.c
@@ -58,7 +58,7 @@ nv30_clear(struct pipe_context *pipe, unsigned buffers,
struct pipe_framebuffer_state *fb = &nv30->framebuffer;
uint32_t colr = 0, zeta = 0, mode = 0;
- if (!nv30_state_validate(nv30, TRUE))
+ if (!nv30_state_validate(nv30, NV30_NEW_FRAMEBUFFER | NV30_NEW_SCISSOR, TRUE))
return;
if (buffers & PIPE_CLEAR_COLOR && fb->nr_cbufs) {
diff --git a/src/gallium/drivers/nouveau/nv30/nv30_context.h b/src/gallium/drivers/nouveau/nv30/nv30_context.h
index 7b32aaee936..592cdbe24f9 100644
--- a/src/gallium/drivers/nouveau/nv30/nv30_context.h
+++ b/src/gallium/drivers/nouveau/nv30/nv30_context.h
@@ -204,7 +204,7 @@ void
nv30_render_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info);
boolean
-nv30_state_validate(struct nv30_context *nv30, boolean hwtnl);
+nv30_state_validate(struct nv30_context *nv30, uint32_t mask, boolean hwtnl);
void
nv30_state_release(struct nv30_context *nv30);
diff --git a/src/gallium/drivers/nouveau/nv30/nv30_draw.c b/src/gallium/drivers/nouveau/nv30/nv30_draw.c
index 3575c3d29fa..c1665b7ad2f 100644
--- a/src/gallium/drivers/nouveau/nv30/nv30_draw.c
+++ b/src/gallium/drivers/nouveau/nv30/nv30_draw.c
@@ -71,12 +71,12 @@ nv30_render_allocate_vertices(struct vbuf_render *render,
struct nv30_render *r = nv30_render(render);
struct nv30_context *nv30 = r->nv30;
- r->length = vertex_size * nr_vertices;
+ r->length = (uint32_t)vertex_size * (uint32_t)nr_vertices;
if (r->offset + r->length >= render->max_vertex_buffer_bytes) {
pipe_resource_reference(&r->buffer, NULL);
r->buffer = pipe_buffer_create(&nv30->screen->base.base,
- PIPE_BIND_VERTEX_BUFFER, 0,
+ PIPE_BIND_VERTEX_BUFFER, PIPE_USAGE_STREAM,
render->max_vertex_buffer_bytes);
if (!r->buffer)
return FALSE;
@@ -91,10 +91,14 @@ static void *
nv30_render_map_vertices(struct vbuf_render *render)
{
struct nv30_render *r = nv30_render(render);
- char *map = pipe_buffer_map(&r->nv30->base.pipe, r->buffer,
- PIPE_TRANSFER_WRITE |
- PIPE_TRANSFER_UNSYNCHRONIZED, &r->transfer);
- return map + r->offset;
+ char *map = pipe_buffer_map_range(
+ &r->nv30->base.pipe, r->buffer,
+ r->offset, r->length,
+ PIPE_TRANSFER_WRITE |
+ PIPE_TRANSFER_DISCARD_RANGE,
+ &r->transfer);
+ assert(map);
+ return map;
}
static void
@@ -103,6 +107,7 @@ nv30_render_unmap_vertices(struct vbuf_render *render,
{
struct nv30_render *r = nv30_render(render);
pipe_buffer_unmap(&r->nv30->base.pipe, r->transfer);
+ r->transfer = NULL;
}
static void
@@ -126,10 +131,10 @@ nv30_render_draw_elements(struct vbuf_render *render,
for (i = 0; i < r->vertex_info.num_attribs; i++) {
PUSH_RESRC(push, NV30_3D(VTXBUF(i)), BUFCTX_VTXTMP,
nv04_resource(r->buffer), r->offset + r->vtxptr[i],
- NOUVEAU_BO_LOW | NOUVEAU_BO_RD, 0, 0);
+ NOUVEAU_BO_LOW | NOUVEAU_BO_RD, 0, NV30_3D_VTXBUF_DMA1);
}
- if (!nv30_state_validate(nv30, FALSE))
+ if (!nv30_state_validate(nv30, ~0, FALSE))
return;
BEGIN_NV04(push, NV30_3D(VERTEX_BEGIN_END), 1);
@@ -171,10 +176,10 @@ nv30_render_draw_arrays(struct vbuf_render *render, unsigned start, uint nr)
for (i = 0; i < r->vertex_info.num_attribs; i++) {
PUSH_RESRC(push, NV30_3D(VTXBUF(i)), BUFCTX_VTXTMP,
nv04_resource(r->buffer), r->offset + r->vtxptr[i],
- NOUVEAU_BO_LOW | NOUVEAU_BO_RD, 0, 0);
+ NOUVEAU_BO_LOW | NOUVEAU_BO_RD, 0, NV30_3D_VTXBUF_DMA1);
}
- if (!nv30_state_validate(nv30, FALSE))
+ if (!nv30_state_validate(nv30, ~0, FALSE))
return;
BEGIN_NV04(push, NV30_3D(VERTEX_BEGIN_END), 1);
@@ -213,22 +218,24 @@ static const struct {
[TGSI_SEMANTIC_BCOLOR ] = { EMIT_4F, INTERP_LINEAR , 1, 3, 0x00000004 },
[TGSI_SEMANTIC_FOG ] = { EMIT_4F, INTERP_PERSPECTIVE, 5, 5, 0x00000010 },
[TGSI_SEMANTIC_PSIZE ] = { EMIT_1F_PSIZE, INTERP_POS , 6, 6, 0x00000020 },
- [TGSI_SEMANTIC_GENERIC ] = { EMIT_4F, INTERP_PERSPECTIVE, 8, 7, 0x00004000 }
+ [TGSI_SEMANTIC_TEXCOORD] = { EMIT_4F, INTERP_PERSPECTIVE, 8, 7, 0x00004000 },
};
static boolean
vroute_add(struct nv30_render *r, uint attrib, uint sem, uint *idx)
{
- struct pipe_screen *pscreen = &r->nv30->screen->base.base;
+ struct nv30_screen *screen = r->nv30->screen;
struct nv30_fragprog *fp = r->nv30->fragprog.program;
struct vertex_info *vinfo = &r->vertex_info;
enum pipe_format format;
uint emit = EMIT_OMIT;
uint result = *idx;
- if (sem == TGSI_SEMANTIC_GENERIC && result >= 8) {
- for (result = 0; result < 8; result++) {
- if (fp->texcoord[result] == *idx) {
+ if (sem == TGSI_SEMANTIC_GENERIC) {
+ uint num_texcoords = (screen->eng3d->oclass < NV40_3D_CLASS) ? 8 : 10;
+ for (result = 0; result < num_texcoords; result++) {
+ if (fp->texcoord[result] == *idx + 8) {
+ sem = TGSI_SEMANTIC_TEXCOORD;
emit = vroute[sem].emit;
break;
}
@@ -243,11 +250,11 @@ vroute_add(struct nv30_render *r, uint attrib, uint sem, uint *idx)
draw_emit_vertex_attr(vinfo, emit, vroute[sem].interp, attrib);
format = draw_translate_vinfo_format(emit);
- r->vtxfmt[attrib] = nv30_vtxfmt(pscreen, format)->hw;
- r->vtxptr[attrib] = vinfo->size | NV30_3D_VTXBUF_DMA1;
+ r->vtxfmt[attrib] = nv30_vtxfmt(&screen->base.base, format)->hw;
+ r->vtxptr[attrib] = vinfo->size;
vinfo->size += draw_translate_vinfo_size(emit);
- if (nv30_screen(pscreen)->eng3d->oclass < NV40_3D_CLASS) {
+ if (screen->eng3d->oclass < NV40_3D_CLASS) {
r->vtxprog[attrib][0] = 0x001f38d8;
r->vtxprog[attrib][1] = 0x0080001b | (attrib << 9);
r->vtxprog[attrib][2] = 0x0836106c;
@@ -259,7 +266,12 @@ vroute_add(struct nv30_render *r, uint attrib, uint sem, uint *idx)
r->vtxprog[attrib][3] = 0x6041ff80 | (result + vroute[sem].vp40) << 2;
}
- *idx = vroute[sem].ow40 << result;
+ if (result < 8)
+ *idx = vroute[sem].ow40 << result;
+ else {
+ assert(sem == TGSI_SEMANTIC_TEXCOORD);
+ *idx = 0x00001000 << (result - 8);
+ }
return TRUE;
}
@@ -313,7 +325,7 @@ nv30_render_validate(struct nv30_context *nv30)
while (pntc && attrib < 16) {
uint index = ffs(pntc) - 1; pntc &= ~(1 << index);
- if (vroute_add(r, attrib, TGSI_SEMANTIC_GENERIC, &index)) {
+ if (vroute_add(r, attrib, TGSI_SEMANTIC_TEXCOORD, &index)) {
vp_attribs |= (1 << attrib++);
vp_results |= index;
}
@@ -398,17 +410,17 @@ nv30_render_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
if (nv30->vertprog.constbuf) {
void *map = nv04_resource(nv30->vertprog.constbuf)->data;
draw_set_mapped_constant_buffer(draw, PIPE_SHADER_VERTEX, 0,
- map, nv30->vertprog.constbuf_nr);
+ map, nv30->vertprog.constbuf_nr * 16);
+ } else {
+ draw_set_mapped_constant_buffer(draw, PIPE_SHADER_VERTEX, 0, NULL, 0);
}
}
for (i = 0; i < nv30->num_vtxbufs; i++) {
const void *map = nv30->vtxbuf[i].user_buffer;
if (!map) {
- if (!nv30->vtxbuf[i].buffer) {
- continue;
- }
- map = pipe_buffer_map(pipe, nv30->vtxbuf[i].buffer,
+ if (nv30->vtxbuf[i].buffer)
+ map = pipe_buffer_map(pipe, nv30->vtxbuf[i].buffer,
PIPE_TRANSFER_UNSYNCHRONIZED |
PIPE_TRANSFER_READ, &transfer[i]);
}
@@ -418,9 +430,9 @@ nv30_render_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
if (info->indexed) {
const void *map = nv30->idxbuf.user_buffer;
if (!map)
- pipe_buffer_map(pipe, nv30->idxbuf.buffer,
- PIPE_TRANSFER_UNSYNCHRONIZED |
- PIPE_TRANSFER_READ, &transferi);
+ map = pipe_buffer_map(pipe, nv30->idxbuf.buffer,
+ PIPE_TRANSFER_UNSYNCHRONIZED |
+ PIPE_TRANSFER_READ, &transferi);
draw_set_indexes(draw,
(ubyte *) map + nv30->idxbuf.offset,
nv30->idxbuf.index_size, ~0);
@@ -444,6 +456,12 @@ nv30_render_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
static void
nv30_render_destroy(struct vbuf_render *render)
{
+ struct nv30_render *r = nv30_render(render);
+
+ if (r->transfer)
+ pipe_buffer_unmap(&r->nv30->base.pipe, r->transfer);
+ pipe_resource_reference(&r->buffer, NULL);
+ nouveau_heap_free(&r->vertprog);
FREE(render);
}
diff --git a/src/gallium/drivers/nouveau/nv30/nv30_fragprog.c b/src/gallium/drivers/nouveau/nv30/nv30_fragprog.c
index a05bfe10ee9..7f227868f73 100644
--- a/src/gallium/drivers/nouveau/nv30/nv30_fragprog.c
+++ b/src/gallium/drivers/nouveau/nv30/nv30_fragprog.c
@@ -23,6 +23,7 @@
*
*/
+#include "draw/draw_context.h"
#include "tgsi/tgsi_parse.h"
#include "nv_object.xml.h"
@@ -147,8 +148,12 @@ nv30_fp_state_delete(struct pipe_context *pipe, void *hwcso)
pipe_resource_reference(&fp->buffer, NULL);
+ if (fp->draw)
+ draw_delete_fragment_shader(nv30_context(pipe)->draw, fp->draw);
+
FREE((void *)fp->pipe.tokens);
FREE(fp->insn);
+ FREE(fp->consts);
FREE(fp);
}
diff --git a/src/gallium/drivers/nouveau/nv30/nv30_screen.c b/src/gallium/drivers/nouveau/nv30/nv30_screen.c
index eeb714864e2..2e38a1978ae 100644
--- a/src/gallium/drivers/nouveau/nv30/nv30_screen.c
+++ b/src/gallium/drivers/nouveau/nv30/nv30_screen.c
@@ -161,6 +161,7 @@ nv30_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
case PIPE_CAP_POLYGON_OFFSET_CLAMP:
case PIPE_CAP_MULTISAMPLE_Z_RESOLVE:
case PIPE_CAP_RESOURCE_FROM_USER_MEMORY:
+ case PIPE_CAP_DEVICE_RESET_STATUS_QUERY:
return 0;
case PIPE_CAP_VENDOR_ID:
@@ -251,6 +252,7 @@ nv30_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader,
case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED:
case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED:
case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
+ case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE:
return 0;
default:
debug_printf("unknown vertex shader param %d\n", param);
@@ -291,6 +293,7 @@ nv30_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader,
case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED:
case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED:
case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
+ case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE:
return 0;
default:
debug_printf("unknown fragment shader param %d\n", param);
@@ -523,7 +526,7 @@ nv30_screen_create(struct nouveau_device *dev)
ret = nouveau_bo_wrap(screen->base.device, fifo->notify, &screen->notify);
if (ret == 0)
- nouveau_bo_map(screen->notify, 0, screen->base.client);
+ ret = nouveau_bo_map(screen->notify, 0, screen->base.client);
if (ret)
FAIL_SCREEN_INIT("error mapping notifier memory: %d\n", ret);
diff --git a/src/gallium/drivers/nouveau/nv30/nv30_state_validate.c b/src/gallium/drivers/nouveau/nv30/nv30_state_validate.c
index 0f9d19dd68e..a954dcce562 100644
--- a/src/gallium/drivers/nouveau/nv30/nv30_state_validate.c
+++ b/src/gallium/drivers/nouveau/nv30/nv30_state_validate.c
@@ -272,15 +272,13 @@ nv30_validate_clip(struct nv30_context *nv30)
uint32_t clpd_enable = 0;
for (i = 0; i < 6; i++) {
- if (nv30->rast->pipe.clip_plane_enable & (1 << i)) {
- if (nv30->dirty & NV30_NEW_CLIP) {
- BEGIN_NV04(push, NV30_3D(VP_UPLOAD_CONST_ID), 5);
- PUSH_DATA (push, i);
- PUSH_DATAp(push, nv30->clip.ucp[i], 4);
- }
-
- clpd_enable |= 1 << (1 + 4*i);
+ if (nv30->dirty & NV30_NEW_CLIP) {
+ BEGIN_NV04(push, NV30_3D(VP_UPLOAD_CONST_ID), 5);
+ PUSH_DATA (push, i);
+ PUSH_DATAp(push, nv30->clip.ucp[i], 4);
}
+ if (nv30->rast->pipe.clip_plane_enable & (1 << i))
+ clpd_enable |= 2 << (4*i);
}
BEGIN_NV04(push, NV30_3D(VP_CLIP_PLANES_ENABLE), 1);
@@ -389,7 +387,7 @@ static struct state_validate hwtnl_validate_list[] = {
{ nv30_validate_stipple, NV30_NEW_STIPPLE },
{ nv30_validate_scissor, NV30_NEW_SCISSOR | NV30_NEW_RASTERIZER },
{ nv30_validate_viewport, NV30_NEW_VIEWPORT },
- { nv30_validate_clip, NV30_NEW_CLIP },
+ { nv30_validate_clip, NV30_NEW_CLIP | NV30_NEW_RASTERIZER },
{ nv30_fragprog_validate, NV30_NEW_FRAGPROG | NV30_NEW_FRAGCONST },
{ nv30_vertprog_validate, NV30_NEW_VERTPROG | NV30_NEW_VERTCONST |
NV30_NEW_FRAGPROG | NV30_NEW_RASTERIZER },
@@ -456,7 +454,7 @@ nv30_state_context_switch(struct nv30_context *nv30)
}
boolean
-nv30_state_validate(struct nv30_context *nv30, boolean hwtnl)
+nv30_state_validate(struct nv30_context *nv30, uint32_t mask, boolean hwtnl)
{
struct nouveau_screen *screen = &nv30->screen->base;
struct nouveau_pushbuf *push = nv30->base.pushbuf;
@@ -481,14 +479,16 @@ nv30_state_validate(struct nv30_context *nv30, boolean hwtnl)
else
validate = swtnl_validate_list;
- if (nv30->dirty) {
+ mask &= nv30->dirty;
+
+ if (mask) {
while (validate->func) {
- if (nv30->dirty & validate->mask)
+ if (mask & validate->mask)
validate->func(nv30);
validate++;
}
- nv30->dirty = 0;
+ nv30->dirty &= ~mask;
}
nouveau_pushbuf_bufctx(push, bctx);
diff --git a/src/gallium/drivers/nouveau/nv30/nv30_vbo.c b/src/gallium/drivers/nouveau/nv30/nv30_vbo.c
index 67ab8295218..d4e384b21d2 100644
--- a/src/gallium/drivers/nouveau/nv30/nv30_vbo.c
+++ b/src/gallium/drivers/nouveau/nv30/nv30_vbo.c
@@ -564,7 +564,7 @@ nv30_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
if (nv30->vbo_user && !(nv30->dirty & (NV30_NEW_VERTEX | NV30_NEW_ARRAYS)))
nv30_update_user_vbufs(nv30);
- nv30_state_validate(nv30, TRUE);
+ nv30_state_validate(nv30, ~0, TRUE);
if (nv30->draw_flags) {
nv30_render_vbo(pipe, info);
return;
diff --git a/src/gallium/drivers/nouveau/nv30/nv30_vertprog.c b/src/gallium/drivers/nouveau/nv30/nv30_vertprog.c
index 3c1b7e714ea..4d4145d10b5 100644
--- a/src/gallium/drivers/nouveau/nv30/nv30_vertprog.c
+++ b/src/gallium/drivers/nouveau/nv30/nv30_vertprog.c
@@ -23,6 +23,7 @@
*
*/
+#include "draw/draw_context.h"
#include "util/u_dynarray.h"
#include "tgsi/tgsi_parse.h"
@@ -237,6 +238,10 @@ nv30_vp_state_delete(struct pipe_context *pipe, void *hwcso)
if (vp->translated)
nv30_vertprog_destroy(vp);
+
+ if (vp->draw)
+ draw_delete_vertex_shader(nv30_context(pipe)->draw, vp->draw);
+
FREE((void *)vp->pipe.tokens);
FREE(vp);
}
diff --git a/src/gallium/drivers/nouveau/nv30/nvfx_fragprog.c b/src/gallium/drivers/nouveau/nv30/nvfx_fragprog.c
index bbdca8102f0..9ef16965f39 100644
--- a/src/gallium/drivers/nouveau/nv30/nvfx_fragprog.c
+++ b/src/gallium/drivers/nouveau/nv30/nvfx_fragprog.c
@@ -327,6 +327,8 @@ nv40_fp_rep(struct nvfx_fpc *fpc, unsigned count, unsigned target)
//util_dynarray_append(&fpc->loop_stack, unsigned, target);
}
+#if 0
+/* documentation only */
/* warning: this only works forward, and probably only if not inside any IF */
static void
nv40_fp_bra(struct nvfx_fpc *fpc, unsigned target)
@@ -352,6 +354,7 @@ nv40_fp_bra(struct nvfx_fpc *fpc, unsigned target)
reloc.location = fpc->inst_offset + 3;
util_dynarray_append(&fpc->label_relocs, struct nvfx_relocation, reloc);
}
+#endif
static void
nv40_fp_brk(struct nvfx_fpc *fpc)
@@ -528,7 +531,7 @@ nvfx_fragprog_parse_instruction(struct nvfx_fpc *fpc,
dst = tgsi_dst(fpc, &finst->Dst[0]);
mask = tgsi_mask(finst->Dst[0].Register.WriteMask);
- sat = (finst->Instruction.Saturate == TGSI_SAT_ZERO_ONE);
+ sat = finst->Instruction.Saturate;
switch (finst->Instruction.Opcode) {
case TGSI_OPCODE_ABS:
@@ -1201,17 +1204,3 @@ out_err:
tgsi_dump(fp->pipe.tokens, 0);
goto out;
}
-
-static inline void
-nvfx_fp_memcpy(void* dst, const void* src, size_t len)
-{
-#ifndef PIPE_ARCH_BIG_ENDIAN
- memcpy(dst, src, len);
-#else
- size_t i;
- for(i = 0; i < len; i += 4) {
- uint32_t v = *(uint32_t*)((char*)src + i);
- *(uint32_t*)((char*)dst + i) = (v >> 16) | (v << 16);
- }
-#endif
-}
diff --git a/src/gallium/drivers/nouveau/nv30/nvfx_vertprog.c b/src/gallium/drivers/nouveau/nv30/nvfx_vertprog.c
index 29d506b6e9b..1ce0589be71 100644
--- a/src/gallium/drivers/nouveau/nv30/nvfx_vertprog.c
+++ b/src/gallium/drivers/nouveau/nv30/nvfx_vertprog.c
@@ -539,7 +539,7 @@ nvfx_vertprog_parse_instruction(struct nvfx_vpc *vpc,
final_dst = dst = tgsi_dst(vpc, &finst->Dst[0]);
mask = tgsi_mask(finst->Dst[0].Register.WriteMask);
- if(finst->Instruction.Saturate == TGSI_SAT_ZERO_ONE) {
+ if(finst->Instruction.Saturate) {
assert(finst->Instruction.Opcode != TGSI_OPCODE_ARL);
if (vpc->is_nv4x)
sat = TRUE;
@@ -796,7 +796,7 @@ nvfx_vertprog_parse_instruction(struct nvfx_vpc *vpc,
return FALSE;
}
- if(finst->Instruction.Saturate == TGSI_SAT_ZERO_ONE && !vpc->is_nv4x) {
+ if(finst->Instruction.Saturate && !vpc->is_nv4x) {
if (!vpc->r_0_1.type)
vpc->r_0_1 = constant(vpc, -1, 0, 1, 0, 0);
nvfx_vp_emit(vpc, arith(0, VEC, MAX, dst, mask, nvfx_src(dst), swz(nvfx_src(vpc->r_0_1), X, X, X, X), none));
@@ -872,9 +872,8 @@ nvfx_vertprog_parse_decl_output(struct nvfx_vpc *vpc,
}
break;
case TGSI_SEMANTIC_EDGEFLAG:
- /* not really an error just a fallback */
- NOUVEAU_ERR("cannot handle edgeflag output\n");
- return FALSE;
+ vpc->r_result[idx] = nvfx_reg(NVFXSR_NONE, 0);
+ return TRUE;
default:
NOUVEAU_ERR("bad output semantic\n");
return FALSE;
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_context.c b/src/gallium/drivers/nouveau/nv50/nv50_context.c
index 2cfd5db5ea0..5b5d3912c20 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_context.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_context.c
@@ -138,8 +138,11 @@ nv50_destroy(struct pipe_context *pipe)
{
struct nv50_context *nv50 = nv50_context(pipe);
- if (nv50_context_screen(nv50)->cur_ctx == nv50)
- nv50_context_screen(nv50)->cur_ctx = NULL;
+ if (nv50->screen->cur_ctx == nv50) {
+ nv50->screen->cur_ctx = NULL;
+ /* Save off the state in case another context gets created */
+ nv50->screen->save_state = nv50->state;
+ }
nouveau_pushbuf_bufctx(nv50->base.pushbuf, NULL);
nouveau_pushbuf_kick(nv50->base.pushbuf, nv50->base.pushbuf->channel);
@@ -290,6 +293,10 @@ nv50_create(struct pipe_screen *pscreen, void *priv)
pipe->get_sample_position = nv50_context_get_sample_position;
if (!screen->cur_ctx) {
+ /* Restore the last context's state here, normally handled during
+ * context switch
+ */
+ nv50->state = screen->save_state;
screen->cur_ctx = nv50;
nouveau_pushbuf_bufctx(screen->base.pushbuf, nv50->bufctx);
}
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_context.h b/src/gallium/drivers/nouveau/nv50/nv50_context.h
index 45eb554eb4f..1f123ef7e92 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_context.h
+++ b/src/gallium/drivers/nouveau/nv50/nv50_context.h
@@ -104,28 +104,7 @@ struct nv50_context {
uint32_t dirty;
boolean cb_dirty;
- struct {
- uint32_t instance_elts; /* bitmask of per-instance elements */
- uint32_t instance_base;
- uint32_t interpolant_ctrl;
- uint32_t semantic_color;
- uint32_t semantic_psize;
- int32_t index_bias;
- boolean uniform_buffer_bound[3];
- boolean prim_restart;
- boolean point_sprite;
- boolean rt_serialize;
- boolean flushed;
- boolean rasterizer_discard;
- uint8_t tls_required;
- boolean new_tls_space;
- uint8_t num_vtxbufs;
- uint8_t num_vtxelts;
- uint8_t num_textures[3];
- uint8_t num_samplers[3];
- uint8_t prim_size;
- uint16_t scissor;
- } state;
+ struct nv50_graph_state state;
struct nv50_blend_stateobj *blend;
struct nv50_rasterizer_stateobj *rast;
@@ -191,12 +170,6 @@ nv50_context(struct pipe_context *pipe)
return (struct nv50_context *)pipe;
}
-static INLINE struct nv50_screen *
-nv50_context_screen(struct nv50_context *nv50)
-{
- return nv50_screen(&nv50->base.screen->base);
-}
-
/* return index used in nv50_context arrays for a specific shader type */
static INLINE unsigned
nv50_context_shader_stage(unsigned pipe)
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_miptree.c b/src/gallium/drivers/nouveau/nv50/nv50_miptree.c
index 744a3a5bf8b..f15d8f3ecb6 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_miptree.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_miptree.c
@@ -377,7 +377,7 @@ nv50_miptree_create(struct pipe_screen *pscreen,
if (!bo_config.nv50.memtype && (pt->bind & PIPE_BIND_SHARED))
mt->base.domain = NOUVEAU_BO_GART;
else
- mt->base.domain = NOUVEAU_BO_VRAM;
+ mt->base.domain = NV_VRAM_DOMAIN(nouveau_screen(pscreen));
bo_flags = mt->base.domain | NOUVEAU_BO_NOSNOOP;
if (mt->base.base.bind & (PIPE_BIND_CURSOR | PIPE_BIND_DISPLAY_TARGET))
@@ -419,7 +419,7 @@ nv50_miptree_from_handle(struct pipe_screen *pscreen,
FREE(mt);
return NULL;
}
- mt->base.domain = NOUVEAU_BO_VRAM;
+ mt->base.domain = mt->base.bo->flags & NOUVEAU_BO_APER;
mt->base.address = mt->base.bo->offset;
mt->base.base = *templ;
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_query.c b/src/gallium/drivers/nouveau/nv50/nv50_query.c
index 6690aa282eb..81f7474e36b 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_query.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_query.c
@@ -27,6 +27,11 @@
#include "nv50/nv50_context.h"
#include "nv_object.xml.h"
+#define NV50_QUERY_STATE_READY 0
+#define NV50_QUERY_STATE_ACTIVE 1
+#define NV50_QUERY_STATE_ENDED 2
+#define NV50_QUERY_STATE_FLUSHED 3
+
/* XXX: Nested queries, and simultaneous queries on multiple gallium contexts
* (since we use only a single GPU channel per screen) will not work properly.
*
@@ -42,10 +47,10 @@ struct nv50_query {
struct nouveau_bo *bo;
uint32_t base;
uint32_t offset; /* base + i * 32 */
- boolean ready;
- boolean flushed;
+ uint8_t state;
boolean is64bit;
struct nouveau_mm_allocation *mm;
+ struct nouveau_fence *fence;
};
#define NV50_QUERY_ALLOC_SPACE 256
@@ -65,7 +70,7 @@ nv50_query_allocate(struct nv50_context *nv50, struct nv50_query *q, int size)
if (q->bo) {
nouveau_bo_ref(NULL, &q->bo);
if (q->mm) {
- if (q->ready)
+ if (q->state == NV50_QUERY_STATE_READY)
nouveau_mm_free(q->mm);
else
nouveau_fence_work(screen->base.fence.current, nouveau_mm_free_work,
@@ -92,6 +97,7 @@ static void
nv50_query_destroy(struct pipe_context *pipe, struct pipe_query *pq)
{
nv50_query_allocate(nv50_context(pipe), nv50_query(pq), 0);
+ nouveau_fence_ref(NULL, &nv50_query(pq)->fence);
FREE(nv50_query(pq));
}
@@ -112,7 +118,8 @@ nv50_query_create(struct pipe_context *pipe, unsigned type, unsigned index)
q->is64bit = (type == PIPE_QUERY_PRIMITIVES_GENERATED ||
type == PIPE_QUERY_PRIMITIVES_EMITTED ||
- type == PIPE_QUERY_SO_STATISTICS);
+ type == PIPE_QUERY_SO_STATISTICS ||
+ type == PIPE_QUERY_PIPELINE_STATISTICS);
q->type = type;
if (q->type == PIPE_QUERY_OCCLUSION_COUNTER) {
@@ -200,7 +207,7 @@ nv50_query_begin(struct pipe_context *pipe, struct pipe_query *pq)
default:
break;
}
- q->ready = FALSE;
+ q->state = NV50_QUERY_STATE_ACTIVE;
return true;
}
@@ -211,6 +218,8 @@ nv50_query_end(struct pipe_context *pipe, struct pipe_query *pq)
struct nouveau_pushbuf *push = nv50->base.pushbuf;
struct nv50_query *q = nv50_query(pq);
+ q->state = NV50_QUERY_STATE_ENDED;
+
switch (q->type) {
case PIPE_QUERY_OCCLUSION_COUNTER:
nv50_query_get(push, q, 0, 0x0100f002);
@@ -253,19 +262,27 @@ nv50_query_end(struct pipe_context *pipe, struct pipe_query *pq)
break;
case PIPE_QUERY_TIMESTAMP_DISJOINT:
/* This query is not issued on GPU because disjoint is forced to FALSE */
- q->ready = TRUE;
+ q->state = NV50_QUERY_STATE_READY;
break;
default:
assert(0);
break;
}
- q->ready = q->flushed = FALSE;
+
+ if (q->is64bit)
+ nouveau_fence_ref(nv50->screen->base.fence.current, &q->fence);
}
-static INLINE boolean
-nv50_query_ready(struct nv50_query *q)
+static INLINE void
+nv50_query_update(struct nv50_query *q)
{
- return q->ready || (!q->is64bit && (q->data[0] == q->sequence));
+ if (q->is64bit) {
+ if (nouveau_fence_signalled(q->fence))
+ q->state = NV50_QUERY_STATE_READY;
+ } else {
+ if (q->data[0] == q->sequence)
+ q->state = NV50_QUERY_STATE_READY;
+ }
}
static boolean
@@ -280,13 +297,14 @@ nv50_query_result(struct pipe_context *pipe, struct pipe_query *pq,
uint64_t *data64 = (uint64_t *)q->data;
int i;
- if (!q->ready) /* update ? */
- q->ready = nv50_query_ready(q);
- if (!q->ready) {
+ if (q->state != NV50_QUERY_STATE_READY)
+ nv50_query_update(q);
+
+ if (q->state != NV50_QUERY_STATE_READY) {
if (!wait) {
/* for broken apps that spin on GL_QUERY_RESULT_AVAILABLE */
- if (!q->flushed) {
- q->flushed = TRUE;
+ if (q->state != NV50_QUERY_STATE_FLUSHED) {
+ q->state = NV50_QUERY_STATE_FLUSHED;
PUSH_KICK(nv50->base.pushbuf);
}
return FALSE;
@@ -294,7 +312,7 @@ nv50_query_result(struct pipe_context *pipe, struct pipe_query *pq,
if (nouveau_bo_wait(q->bo, NOUVEAU_BO_RD, nv50->screen->base.client))
return FALSE;
}
- q->ready = TRUE;
+ q->state = NV50_QUERY_STATE_READY;
switch (q->type) {
case PIPE_QUERY_GPU_FINISHED:
@@ -434,6 +452,7 @@ nv50_query_pushbuf_submit(struct nouveau_pushbuf *push,
/* XXX: does this exist ? */
#define NV50_IB_ENTRY_1_NO_PREFETCH (0 << (31 - 8))
+ PUSH_REFN(push, q->bo, NOUVEAU_BO_RD | NOUVEAU_BO_GART);
nouveau_pushbuf_space(push, 0, 0, 1);
nouveau_pushbuf_data(push, q->bo, q->offset + result_offset, 4 |
NV50_IB_ENTRY_1_NO_PREFETCH);
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_screen.c b/src/gallium/drivers/nouveau/nv50/nv50_screen.c
index 829dfbc13fa..6583a353578 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_screen.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_screen.c
@@ -209,6 +209,7 @@ nv50_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
case PIPE_CAP_VERTEXID_NOBASE:
case PIPE_CAP_MULTISAMPLE_Z_RESOLVE: /* potentially supported on some hw */
case PIPE_CAP_RESOURCE_FROM_USER_MEMORY:
+ case PIPE_CAP_DEVICE_RESET_STATUS_QUERY:
return 0;
case PIPE_CAP_VENDOR_ID:
@@ -290,6 +291,7 @@ nv50_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader,
case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED:
case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED:
case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
+ case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE:
return 0;
default:
NOUVEAU_ERR("unknown PIPE_SHADER_CAP %d\n", param);
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_screen.h b/src/gallium/drivers/nouveau/nv50/nv50_screen.h
index f8ce365135a..881051b1862 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_screen.h
+++ b/src/gallium/drivers/nouveau/nv50/nv50_screen.h
@@ -25,10 +25,34 @@ struct nv50_context;
struct nv50_blitter;
+struct nv50_graph_state {
+ uint32_t instance_elts; /* bitmask of per-instance elements */
+ uint32_t instance_base;
+ uint32_t interpolant_ctrl;
+ uint32_t semantic_color;
+ uint32_t semantic_psize;
+ int32_t index_bias;
+ boolean uniform_buffer_bound[3];
+ boolean prim_restart;
+ boolean point_sprite;
+ boolean rt_serialize;
+ boolean flushed;
+ boolean rasterizer_discard;
+ uint8_t tls_required;
+ boolean new_tls_space;
+ uint8_t num_vtxbufs;
+ uint8_t num_vtxelts;
+ uint8_t num_textures[3];
+ uint8_t num_samplers[3];
+ uint8_t prim_size;
+ uint16_t scissor;
+};
+
struct nv50_screen {
struct nouveau_screen base;
struct nv50_context *cur_ctx;
+ struct nv50_graph_state save_state;
struct nouveau_bo *code;
struct nouveau_bo *uniforms;
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_state.c b/src/gallium/drivers/nouveau/nv50/nv50_state.c
index 290750459cf..d4d41af3c61 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_state.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_state.c
@@ -811,12 +811,12 @@ nv50_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index,
nv50->constbuf[s][i].user = (cb && cb->user_buffer) ? TRUE : FALSE;
if (nv50->constbuf[s][i].user) {
nv50->constbuf[s][i].u.data = cb->user_buffer;
- nv50->constbuf[s][i].size = cb->buffer_size;
+ nv50->constbuf[s][i].size = MIN2(cb->buffer_size, 0x10000);
nv50->constbuf_valid[s] |= 1 << i;
} else
if (res) {
nv50->constbuf[s][i].offset = cb->buffer_offset;
- nv50->constbuf[s][i].size = align(cb->buffer_size, 0x100);
+ nv50->constbuf[s][i].size = MIN2(align(cb->buffer_size, 0x100), 0x10000);
nv50->constbuf_valid[s] |= 1 << i;
} else {
nv50->constbuf_valid[s] &= ~(1 << i);
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_state_validate.c b/src/gallium/drivers/nouveau/nv50/nv50_state_validate.c
index 85e19b4c623..116bf4bba7c 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_state_validate.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_state_validate.c
@@ -394,6 +394,8 @@ nv50_switch_pipe_context(struct nv50_context *ctx_to)
if (ctx_from)
ctx_to->state = ctx_from->state;
+ else
+ ctx_to->state = ctx_to->screen->save_state;
ctx_to->dirty = ~0;
ctx_to->viewports_dirty = ~0;
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_vbo.c b/src/gallium/drivers/nouveau/nv50/nv50_vbo.c
index c1590eefe9f..1fd33b8aa59 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_vbo.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_vbo.c
@@ -628,6 +628,7 @@ nv50_draw_elements(struct nv50_context *nv50, boolean shorten,
BEGIN_NV04(push, NV50_3D(VERTEX_BEGIN_GL), 1);
PUSH_DATA (push, prim);
+ PUSH_REFN(push, buf->bo, NOUVEAU_BO_RD | buf->domain);
nouveau_pushbuf_space(push, 8, 0, 1);
switch (index_size) {
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_compute.c b/src/gallium/drivers/nouveau/nvc0/nvc0_compute.c
index ad287a2af6b..56fc83d3679 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_compute.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_compute.c
@@ -57,7 +57,7 @@ nvc0_screen_compute_setup(struct nvc0_screen *screen,
return ret;
}
- ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 0, 1 << 12, NULL,
+ ret = nouveau_bo_new(dev, NV_VRAM_DOMAIN(&screen->base), 0, 1 << 12, NULL,
&screen->parm);
if (ret)
return ret;
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_context.c b/src/gallium/drivers/nouveau/nvc0/nvc0_context.c
index 7662fb50f61..a35c3f66142 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_context.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_context.c
@@ -139,8 +139,12 @@ nvc0_destroy(struct pipe_context *pipe)
{
struct nvc0_context *nvc0 = nvc0_context(pipe);
- if (nvc0->screen->cur_ctx == nvc0)
+ if (nvc0->screen->cur_ctx == nvc0) {
nvc0->screen->cur_ctx = NULL;
+ nvc0->screen->save_state = nvc0->state;
+ nvc0->screen->save_state.tfb = NULL;
+ }
+
/* Unset bufctx, we don't want to revalidate any resources after the flush.
* Other contexts will always set their bufctx again on action calls.
*/
@@ -303,6 +307,7 @@ nvc0_create(struct pipe_screen *pscreen, void *priv)
pipe->get_sample_position = nvc0_context_get_sample_position;
if (!screen->cur_ctx) {
+ nvc0->state = screen->save_state;
screen->cur_ctx = nvc0;
nouveau_pushbuf_bufctx(screen->base.pushbuf, nvc0->bufctx);
}
@@ -324,7 +329,7 @@ nvc0_create(struct pipe_screen *pscreen, void *priv)
/* add permanently resident buffers to bufctxts */
- flags = NOUVEAU_BO_VRAM | NOUVEAU_BO_RD;
+ flags = NV_VRAM_DOMAIN(&screen->base) | NOUVEAU_BO_RD;
BCTX_REFN_bo(nvc0->bufctx_3d, SCREEN, flags, screen->text);
BCTX_REFN_bo(nvc0->bufctx_3d, SCREEN, flags, screen->uniform_bo);
@@ -335,7 +340,7 @@ nvc0_create(struct pipe_screen *pscreen, void *priv)
BCTX_REFN_bo(nvc0->bufctx_cp, CP_SCREEN, flags, screen->parm);
}
- flags = NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR;
+ flags = NV_VRAM_DOMAIN(&screen->base) | NOUVEAU_BO_RDWR;
if (screen->poly_cache)
BCTX_REFN_bo(nvc0->bufctx_3d, SCREEN, flags, screen->poly_cache);
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_context.h b/src/gallium/drivers/nouveau/nvc0/nvc0_context.h
index ef251f35a1b..a8d7593b398 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_context.h
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_context.h
@@ -113,29 +113,7 @@ struct nvc0_context {
uint32_t dirty;
uint32_t dirty_cp; /* dirty flags for compute state */
- struct {
- boolean flushed;
- boolean rasterizer_discard;
- boolean early_z_forced;
- boolean prim_restart;
- uint32_t instance_elts; /* bitmask of per-instance elements */
- uint32_t instance_base;
- uint32_t constant_vbos;
- uint32_t constant_elts;
- int32_t index_bias;
- uint16_t scissor;
- uint8_t vbo_mode; /* 0 = normal, 1 = translate, 3 = translate, forced */
- uint8_t num_vtxbufs;
- uint8_t num_vtxelts;
- uint8_t num_textures[6];
- uint8_t num_samplers[6];
- uint8_t tls_required; /* bitmask of shader types using l[] */
- uint8_t c14_bound; /* whether immediate array constbuf is bound */
- uint8_t clip_enable;
- uint32_t clip_mode;
- uint32_t uniform_buffer_bound[5];
- struct nvc0_transform_feedback_state *tfb;
- } state;
+ struct nvc0_graph_state state;
struct nvc0_blend_stateobj *blend;
struct nvc0_rasterizer_stateobj *rast;
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_miptree.c b/src/gallium/drivers/nouveau/nvc0/nvc0_miptree.c
index fc75fc6a4a1..3875bbf4ca4 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_miptree.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_miptree.c
@@ -302,7 +302,7 @@ nvc0_miptree_create(struct pipe_screen *pscreen,
if (!bo_config.nvc0.memtype && (pt->usage == PIPE_USAGE_STAGING || pt->bind & PIPE_BIND_SHARED))
mt->base.domain = NOUVEAU_BO_GART;
else
- mt->base.domain = NOUVEAU_BO_VRAM;
+ mt->base.domain = NV_VRAM_DOMAIN(nouveau_screen(pscreen));
bo_flags = mt->base.domain | NOUVEAU_BO_NOSNOOP;
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_program.c b/src/gallium/drivers/nouveau/nvc0/nvc0_program.c
index c156e918dc5..e1f5a8c4416 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_program.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_program.c
@@ -392,7 +392,7 @@ nvc0_gp_gen_header(struct nvc0_program *gp, struct nv50_ir_prog_info *info)
break;
}
- gp->hdr[4] = info->prop.gp.maxVertices & 0x1ff;
+ gp->hdr[4] = MIN2(info->prop.gp.maxVertices, 1024);
return nvc0_vtgp_gen_header(gp, info);
}
@@ -683,11 +683,12 @@ nvc0_program_upload_code(struct nvc0_context *nvc0, struct nvc0_program *prog)
ret = nouveau_heap_alloc(screen->text_heap, size, prog, &prog->mem);
if (ret) {
struct nouveau_heap *heap = screen->text_heap;
- struct nouveau_heap *iter;
- for (iter = heap; iter && iter->next != heap; iter = iter->next) {
- struct nvc0_program *evict = iter->priv;
- if (evict)
- nouveau_heap_free(&evict->mem);
+ /* Note that the code library, which is allocated before anything else,
+ * does not have a priv pointer. We can stop once we hit it.
+ */
+ while (heap->next && heap->next->priv) {
+ struct nvc0_program *evict = heap->next->priv;
+ nouveau_heap_free(&evict->mem);
}
debug_printf("WARNING: out of code space, evicting all shaders.\n");
ret = nouveau_heap_alloc(heap, size, prog, &prog->mem);
@@ -734,12 +735,12 @@ nvc0_program_upload_code(struct nvc0_context *nvc0, struct nvc0_program *prog)
if (!is_cp)
nvc0->base.push_data(&nvc0->base, screen->text, prog->code_base,
- NOUVEAU_BO_VRAM, NVC0_SHADER_HEADER_SIZE, prog->hdr);
+ NV_VRAM_DOMAIN(&screen->base), NVC0_SHADER_HEADER_SIZE, prog->hdr);
nvc0->base.push_data(&nvc0->base, screen->text, code_pos,
- NOUVEAU_BO_VRAM, prog->code_size, prog->code);
+ NV_VRAM_DOMAIN(&screen->base), prog->code_size, prog->code);
if (prog->immd_size)
nvc0->base.push_data(&nvc0->base,
- screen->text, prog->immd_base, NOUVEAU_BO_VRAM,
+ screen->text, prog->immd_base, NV_VRAM_DOMAIN(&screen->base),
prog->immd_size, prog->immd_data);
BEGIN_NVC0(nvc0->base.pushbuf, NVC0_3D(MEM_BARRIER), 1);
@@ -770,7 +771,7 @@ nvc0_program_library_upload(struct nvc0_context *nvc0)
return;
nvc0->base.push_data(&nvc0->base,
- screen->text, screen->lib_code->start, NOUVEAU_BO_VRAM,
+ screen->text, screen->lib_code->start, NV_VRAM_DOMAIN(&screen->base),
size, code);
/* no need for a memory barrier, will be emitted with first program */
}
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_query.c b/src/gallium/drivers/nouveau/nvc0/nvc0_query.c
index 52032eb6f83..aea6cbda02d 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_query.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_query.c
@@ -617,6 +617,7 @@ nvc0_query_pushbuf_submit(struct nouveau_pushbuf *push,
#define NVC0_IB_ENTRY_1_NO_PREFETCH (1 << (31 - 8))
+ PUSH_REFN(push, q->bo, NOUVEAU_BO_RD | NOUVEAU_BO_GART);
nouveau_pushbuf_space(push, 0, 0, 1);
nouveau_pushbuf_data(push, q->bo, q->offset + result_offset, 4 |
NVC0_IB_ENTRY_1_NO_PREFETCH);
@@ -1407,11 +1408,14 @@ nvc0_screen_get_driver_query_info(struct pipe_screen *pscreen,
count += NVC0_QUERY_DRV_STAT_COUNT;
if (screen->base.device->drm_version >= 0x01000101) {
- if (screen->base.class_3d >= NVE4_3D_CLASS) {
- count += NVE4_PM_QUERY_COUNT;
- } else
if (screen->compute) {
- count += NVC0_PM_QUERY_COUNT; /* NVC0_COMPUTE is not always enabled */
+ if (screen->base.class_3d == NVE4_3D_CLASS) {
+ count += NVE4_PM_QUERY_COUNT;
+ } else
+ if (screen->base.class_3d < NVE4_3D_CLASS) {
+ /* NVC0_COMPUTE is not always enabled */
+ count += NVC0_PM_QUERY_COUNT;
+ }
}
}
@@ -1437,19 +1441,21 @@ nvc0_screen_get_driver_query_info(struct pipe_screen *pscreen,
} else
#endif
if (id < count) {
- if (screen->base.class_3d >= NVE4_3D_CLASS) {
- info->name = nve4_pm_query_names[id - NVC0_QUERY_DRV_STAT_COUNT];
- info->query_type = NVE4_PM_QUERY(id - NVC0_QUERY_DRV_STAT_COUNT);
- info->max_value.u64 =
- (id < NVE4_PM_QUERY_METRIC_MP_OCCUPANCY) ? 0 : 100;
- info->group_id = NVC0_QUERY_MP_COUNTER_GROUP;
- return 1;
- } else
if (screen->compute) {
- info->name = nvc0_pm_query_names[id - NVC0_QUERY_DRV_STAT_COUNT];
- info->query_type = NVC0_PM_QUERY(id - NVC0_QUERY_DRV_STAT_COUNT);
- info->group_id = NVC0_QUERY_MP_COUNTER_GROUP;
- return 1;
+ if (screen->base.class_3d == NVE4_3D_CLASS) {
+ info->name = nve4_pm_query_names[id - NVC0_QUERY_DRV_STAT_COUNT];
+ info->query_type = NVE4_PM_QUERY(id - NVC0_QUERY_DRV_STAT_COUNT);
+ info->max_value.u64 =
+ (id < NVE4_PM_QUERY_METRIC_MP_OCCUPANCY) ? 0 : 100;
+ info->group_id = NVC0_QUERY_MP_COUNTER_GROUP;
+ return 1;
+ } else
+ if (screen->base.class_3d < NVE4_3D_CLASS) {
+ info->name = nvc0_pm_query_names[id - NVC0_QUERY_DRV_STAT_COUNT];
+ info->query_type = NVC0_PM_QUERY(id - NVC0_QUERY_DRV_STAT_COUNT);
+ info->group_id = NVC0_QUERY_MP_COUNTER_GROUP;
+ return 1;
+ }
}
}
/* user asked for info about non-existing query */
@@ -1469,10 +1475,13 @@ nvc0_screen_get_driver_query_group_info(struct pipe_screen *pscreen,
#endif
if (screen->base.device->drm_version >= 0x01000101) {
- if (screen->base.class_3d >= NVE4_3D_CLASS) {
- count++;
- } else if (screen->compute) {
- count++; /* NVC0_COMPUTE is not always enabled */
+ if (screen->compute) {
+ if (screen->base.class_3d == NVE4_3D_CLASS) {
+ count++;
+ } else
+ if (screen->base.class_3d < NVE4_3D_CLASS) {
+ count++; /* NVC0_COMPUTE is not always enabled */
+ }
}
}
@@ -1480,25 +1489,28 @@ nvc0_screen_get_driver_query_group_info(struct pipe_screen *pscreen,
return count;
if (id == NVC0_QUERY_MP_COUNTER_GROUP) {
- info->name = "MP counters";
- info->type = PIPE_DRIVER_QUERY_GROUP_TYPE_GPU;
-
- if (screen->base.class_3d >= NVE4_3D_CLASS) {
- info->num_queries = NVE4_PM_QUERY_COUNT;
-
- /* On NVE4+, each multiprocessor have 8 hardware counters separated
- * in two distinct domains, but we allow only one active query
- * simultaneously because some of them use more than one hardware
- * counter and this will result in an undefined behaviour. */
- info->max_active_queries = 1; /* TODO: handle multiple hw counters */
- return 1;
- } else if (screen->compute) {
- info->num_queries = NVC0_PM_QUERY_COUNT;
-
- /* On NVC0:NVE4, each multiprocessor have 8 hardware counters
- * in a single domain. */
- info->max_active_queries = 8;
- return 1;
+ if (screen->compute) {
+ info->name = "MP counters";
+ info->type = PIPE_DRIVER_QUERY_GROUP_TYPE_GPU;
+
+ if (screen->base.class_3d == NVE4_3D_CLASS) {
+ info->num_queries = NVE4_PM_QUERY_COUNT;
+
+ /* On NVE4+, each multiprocessor have 8 hardware counters separated
+ * in two distinct domains, but we allow only one active query
+ * simultaneously because some of them use more than one hardware
+ * counter and this will result in an undefined behaviour. */
+ info->max_active_queries = 1; /* TODO: handle multiple hw counters */
+ return 1;
+ } else
+ if (screen->base.class_3d < NVE4_3D_CLASS) {
+ info->num_queries = NVC0_PM_QUERY_COUNT;
+
+ /* On NVC0:NVE4, each multiprocessor have 8 hardware counters
+ * in a single domain. */
+ info->max_active_queries = 8;
+ return 1;
+ }
}
}
#ifdef NOUVEAU_ENABLE_DRIVER_STATISTICS
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
index 748c9e7c8b9..56c230e42fc 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
@@ -193,6 +193,7 @@ nvc0_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
case PIPE_CAP_TGSI_VS_WINDOW_SPACE_POSITION:
case PIPE_CAP_VERTEXID_NOBASE:
case PIPE_CAP_RESOURCE_FROM_USER_MEMORY:
+ case PIPE_CAP_DEVICE_RESET_STATUS_QUERY:
return 0;
case PIPE_CAP_VENDOR_ID:
@@ -296,6 +297,7 @@ nvc0_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader,
return 1;
case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED:
case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
+ case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE:
return 0;
case PIPE_SHADER_CAP_MAX_TEXTURE_SAMPLERS:
return 16; /* would be 32 in linked (OpenGL-style) mode */
@@ -581,7 +583,7 @@ nvc0_screen_resize_tls_area(struct nvc0_screen *screen,
size = align(size, 1 << 17);
- ret = nouveau_bo_new(screen->base.device, NOUVEAU_BO_VRAM, 1 << 17, size,
+ ret = nouveau_bo_new(screen->base.device, NV_VRAM_DOMAIN(&screen->base), 1 << 17, size,
NULL, &bo);
if (ret) {
NOUVEAU_ERR("failed to allocate TLS area, size: 0x%"PRIx64"\n", size);
@@ -644,6 +646,11 @@ nvc0_screen_create(struct nouveau_device *dev)
screen->base.sysmem_bindings |=
PIPE_BIND_VERTEX_BUFFER | PIPE_BIND_INDEX_BUFFER;
+ if (screen->base.vram_domain & NOUVEAU_BO_GART) {
+ screen->base.sysmem_bindings |= screen->base.vidmem_bindings;
+ screen->base.vidmem_bindings = 0;
+ }
+
pscreen->destroy = nvc0_screen_destroy;
pscreen->context_create = nvc0_create;
pscreen->is_format_supported = nvc0_screen_is_format_supported;
@@ -822,7 +829,7 @@ nvc0_screen_create(struct nouveau_device *dev)
nvc0_magic_3d_init(push, screen->eng3d->oclass);
- ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 1 << 17, 1 << 20, NULL,
+ ret = nouveau_bo_new(dev, NV_VRAM_DOMAIN(&screen->base), 1 << 17, 1 << 20, NULL,
&screen->text);
if (ret)
goto fail;
@@ -832,12 +839,12 @@ nvc0_screen_create(struct nouveau_device *dev)
*/
nouveau_heap_init(&screen->text_heap, 0, (1 << 20) - 0x100);
- ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 1 << 12, 6 << 16, NULL,
+ ret = nouveau_bo_new(dev, NV_VRAM_DOMAIN(&screen->base), 1 << 12, 6 << 16, NULL,
&screen->uniform_bo);
if (ret)
goto fail;
- PUSH_REFN (push, screen->uniform_bo, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
+ PUSH_REFN (push, screen->uniform_bo, NV_VRAM_DOMAIN(&screen->base) | NOUVEAU_BO_WR);
for (i = 0; i < 5; ++i) {
/* TIC and TSC entries for each unit (nve4+ only) */
@@ -908,7 +915,7 @@ nvc0_screen_create(struct nouveau_device *dev)
PUSH_DATA (push, 0);
if (screen->eng3d->oclass < GM107_3D_CLASS) {
- ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 1 << 17, 1 << 20, NULL,
+ ret = nouveau_bo_new(dev, NV_VRAM_DOMAIN(&screen->base), 1 << 17, 1 << 20, NULL,
&screen->poly_cache);
if (ret)
goto fail;
@@ -919,7 +926,7 @@ nvc0_screen_create(struct nouveau_device *dev)
PUSH_DATA (push, 3);
}
- ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 1 << 17, 1 << 17, NULL,
+ ret = nouveau_bo_new(dev, NV_VRAM_DOMAIN(&screen->base), 1 << 17, 1 << 17, NULL,
&screen->txc);
if (ret)
goto fail;
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.h b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.h
index 1a7d5027a7c..ef2bd43f006 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.h
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.h
@@ -27,10 +27,35 @@ struct nvc0_context;
struct nvc0_blitter;
+struct nvc0_graph_state {
+ boolean flushed;
+ boolean rasterizer_discard;
+ boolean early_z_forced;
+ boolean prim_restart;
+ uint32_t instance_elts; /* bitmask of per-instance elements */
+ uint32_t instance_base;
+ uint32_t constant_vbos;
+ uint32_t constant_elts;
+ int32_t index_bias;
+ uint16_t scissor;
+ uint8_t vbo_mode; /* 0 = normal, 1 = translate, 3 = translate, forced */
+ uint8_t num_vtxbufs;
+ uint8_t num_vtxelts;
+ uint8_t num_textures[6];
+ uint8_t num_samplers[6];
+ uint8_t tls_required; /* bitmask of shader types using l[] */
+ uint8_t c14_bound; /* whether immediate array constbuf is bound */
+ uint8_t clip_enable;
+ uint32_t clip_mode;
+ uint32_t uniform_buffer_bound[5];
+ struct nvc0_transform_feedback_state *tfb;
+};
+
struct nvc0_screen {
struct nouveau_screen base;
struct nvc0_context *cur_ctx;
+ struct nvc0_graph_state save_state;
int num_occlusion_queries_active;
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_shader_state.c b/src/gallium/drivers/nouveau/nvc0/nvc0_shader_state.c
index 516b33b76d5..e0842784a88 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_shader_state.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_shader_state.c
@@ -34,7 +34,7 @@ nvc0_program_update_context_state(struct nvc0_context *nvc0,
struct nouveau_pushbuf *push = nvc0->base.pushbuf;
if (prog && prog->need_tls) {
- const uint32_t flags = NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR;
+ const uint32_t flags = NV_VRAM_DOMAIN(&nvc0->screen->base) | NOUVEAU_BO_RDWR;
if (!nvc0->state.tls_required)
BCTX_REFN_bo(nvc0->bufctx_3d, TLS, flags, nvc0->screen->tls);
nvc0->state.tls_required |= 1 << stage;
@@ -262,11 +262,13 @@ nvc0_tfb_validate(struct nvc0_context *nvc0)
if (tfb)
targ->stride = tfb->stride[b];
+ buf = nv04_resource(targ->pipe.buffer);
+
+ BCTX_REFN(nvc0->bufctx_3d, TFB, buf, WR);
+
if (!(nvc0->tfbbuf_dirty & (1 << b)))
continue;
- buf = nv04_resource(targ->pipe.buffer);
-
if (!targ->clean)
nvc0_query_fifo_wait(push, targ->pq);
BEGIN_NVC0(push, NVC0_3D(TFB_BUFFER_ENABLE(b)), 5);
@@ -280,7 +282,6 @@ nvc0_tfb_validate(struct nvc0_context *nvc0)
PUSH_DATA(push, 0); /* TFB_BUFFER_OFFSET */
targ->clean = FALSE;
}
- BCTX_REFN(nvc0->bufctx_3d, TFB, buf, WR);
}
for (; b < 4; ++b)
IMMED_NVC0(push, NVC0_3D(TFB_BUFFER_ENABLE(b)), 0);
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_state.c b/src/gallium/drivers/nouveau/nvc0/nvc0_state.c
index dca06f4cddb..6b7a211e71b 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_state.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_state.c
@@ -413,24 +413,6 @@ nvc0_zsa_state_delete(struct pipe_context *pipe, void *hwcso)
#define NV50_TSC_WRAP_CASE(n) \
case PIPE_TEX_WRAP_##n: return NV50_TSC_WRAP_##n
-static INLINE unsigned
-nv50_tsc_wrap_mode(unsigned wrap)
-{
- switch (wrap) {
- NV50_TSC_WRAP_CASE(REPEAT);
- NV50_TSC_WRAP_CASE(MIRROR_REPEAT);
- NV50_TSC_WRAP_CASE(CLAMP_TO_EDGE);
- NV50_TSC_WRAP_CASE(CLAMP_TO_BORDER);
- NV50_TSC_WRAP_CASE(CLAMP);
- NV50_TSC_WRAP_CASE(MIRROR_CLAMP_TO_EDGE);
- NV50_TSC_WRAP_CASE(MIRROR_CLAMP_TO_BORDER);
- NV50_TSC_WRAP_CASE(MIRROR_CLAMP);
- default:
- NOUVEAU_ERR("unknown wrap mode: %d\n", wrap);
- return NV50_TSC_WRAP_REPEAT;
- }
-}
-
static void
nvc0_sampler_state_delete(struct pipe_context *pipe, void *hwcso)
{
@@ -811,12 +793,12 @@ nvc0_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index,
nvc0->constbuf[s][i].user = (cb && cb->user_buffer) ? TRUE : FALSE;
if (nvc0->constbuf[s][i].user) {
nvc0->constbuf[s][i].u.data = cb->user_buffer;
- nvc0->constbuf[s][i].size = cb->buffer_size;
+ nvc0->constbuf[s][i].size = MIN2(cb->buffer_size, 0x10000);
nvc0->constbuf_valid[s] |= 1 << i;
} else
if (cb) {
nvc0->constbuf[s][i].offset = cb->buffer_offset;
- nvc0->constbuf[s][i].size = align(cb->buffer_size, 0x100);
+ nvc0->constbuf[s][i].size = MIN2(align(cb->buffer_size, 0x100), 0x10000);
nvc0->constbuf_valid[s] |= 1 << i;
}
else {
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c b/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c
index 6051f128f66..c52399ab312 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c
@@ -439,7 +439,7 @@ nvc0_constbufs_validate(struct nvc0_context *nvc0)
BEGIN_NVC0(push, NVC0_3D(CB_BIND(s)), 1);
PUSH_DATA (push, (0 << 4) | 1);
}
- nvc0_cb_push(&nvc0->base, bo, NOUVEAU_BO_VRAM,
+ nvc0_cb_push(&nvc0->base, bo, NV_VRAM_DOMAIN(&nvc0->screen->base),
base, nvc0->state.uniform_buffer_bound[s],
0, (size + 3) / 4,
nvc0->constbuf[s][0].u.data);
@@ -543,6 +543,8 @@ nvc0_switch_pipe_context(struct nvc0_context *ctx_to)
if (ctx_from)
ctx_to->state = ctx_from->state;
+ else
+ ctx_to->state = ctx_to->screen->save_state;
ctx_to->dirty = ~0;
ctx_to->viewports_dirty = ~0;
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c b/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c
index 4404d8c1a74..a820de7259a 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c
@@ -1152,6 +1152,12 @@ nvc0_blit_3d(struct nvc0_context *nvc0, const struct pipe_blit_info *info)
NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE_32 |
NVC0_3D_VERTEX_ATTRIB_FORMAT_CONST);
}
+ if (nvc0->state.instance_elts) {
+ nvc0->state.instance_elts = 0;
+ BEGIN_NVC0(push, NVC0_3D(MACRO_VERTEX_ARRAY_PER_INSTANCE), 2);
+ PUSH_DATA (push, n);
+ PUSH_DATA (push, 0);
+ }
nvc0->state.num_vtxelts = 2;
for (i = 0; i < info->dst.box.depth; ++i, z += dz) {
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_tex.c b/src/gallium/drivers/nouveau/nvc0/nvc0_tex.c
index 457f27c8311..ddc0409ca86 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_tex.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_tex.c
@@ -396,7 +396,7 @@ nvc0_validate_tsc(struct nvc0_context *nvc0, int s)
tsc->id = nvc0_screen_tsc_alloc(nvc0->screen, tsc);
nvc0_m2mf_push_linear(&nvc0->base, nvc0->screen->txc,
- 65536 + tsc->id * 32, NOUVEAU_BO_VRAM,
+ 65536 + tsc->id * 32, NV_VRAM_DOMAIN(&nvc0->screen->base),
32, tsc->tsc);
need_flush = TRUE;
}
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c b/src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c
index 657b8c0fe82..8cf2584b0ce 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c
@@ -829,6 +829,7 @@ nvc0_draw_indirect(struct nvc0_context *nvc0, const struct pipe_draw_info *info)
}
PUSH_DATA(push, nvc0_prim_gl(info->mode));
#define NVC0_IB_ENTRY_1_NO_PREFETCH (1 << (31 - 8))
+ PUSH_REFN(push, buf->bo, NOUVEAU_BO_RD | buf->domain);
nouveau_pushbuf_space(push, 0, 0, 1);
nouveau_pushbuf_data(push,
buf->bo, offset, NVC0_IB_ENTRY_1_NO_PREFETCH | size);
diff --git a/src/gallium/drivers/nouveau/nvc0/nve4_compute.c b/src/gallium/drivers/nouveau/nvc0/nve4_compute.c
index f243316b899..fce02a7cc57 100644
--- a/src/gallium/drivers/nouveau/nvc0/nve4_compute.c
+++ b/src/gallium/drivers/nouveau/nvc0/nve4_compute.c
@@ -63,7 +63,7 @@ nve4_screen_compute_setup(struct nvc0_screen *screen,
return ret;
}
- ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 0, NVE4_CP_PARAM_SIZE, NULL,
+ ret = nouveau_bo_new(dev, NV_VRAM_DOMAIN(&screen->base), 0, NVE4_CP_PARAM_SIZE, NULL,
&screen->parm);
if (ret)
return ret;
diff --git a/src/gallium/drivers/r300/r300_screen.c b/src/gallium/drivers/r300/r300_screen.c
index a7b59d8bfbb..a7bca915f57 100644
--- a/src/gallium/drivers/r300/r300_screen.c
+++ b/src/gallium/drivers/r300/r300_screen.c
@@ -190,6 +190,7 @@ static int r300_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
case PIPE_CAP_POLYGON_OFFSET_CLAMP:
case PIPE_CAP_MULTISAMPLE_Z_RESOLVE:
case PIPE_CAP_RESOURCE_FROM_USER_MEMORY:
+ case PIPE_CAP_DEVICE_RESET_STATUS_QUERY:
return 0;
/* SWTCL-only features. */
@@ -273,6 +274,7 @@ static int r300_get_shader_param(struct pipe_screen *pscreen, unsigned shader, e
case PIPE_SHADER_CAP_MAX_CONST_BUFFER_SIZE:
return (is_r500 ? 256 : 32) * sizeof(float[4]);
case PIPE_SHADER_CAP_MAX_CONST_BUFFERS:
+ case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE:
return 1;
case PIPE_SHADER_CAP_MAX_TEMPS:
return is_r500 ? 128 : is_r400 ? 64 : 32;
@@ -332,6 +334,7 @@ static int r300_get_shader_param(struct pipe_screen *pscreen, unsigned shader, e
case PIPE_SHADER_CAP_MAX_PREDS:
return 0; /* unused */
case PIPE_SHADER_CAP_INDIRECT_CONST_ADDR:
+ case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE:
return 1;
case PIPE_SHADER_CAP_MAX_TEX_INSTRUCTIONS:
case PIPE_SHADER_CAP_MAX_TEX_INDIRECTIONS:
diff --git a/src/gallium/drivers/r300/r300_tgsi_to_rc.c b/src/gallium/drivers/r300/r300_tgsi_to_rc.c
index 69afb4caeaa..23ed2cf2532 100644
--- a/src/gallium/drivers/r300/r300_tgsi_to_rc.c
+++ b/src/gallium/drivers/r300/r300_tgsi_to_rc.c
@@ -133,13 +133,7 @@ static unsigned translate_opcode(unsigned opcode)
static unsigned translate_saturate(unsigned saturate)
{
- switch(saturate) {
- default:
- fprintf(stderr, "Unknown saturate mode: %i\n", saturate);
- /* fall-through */
- case TGSI_SAT_NONE: return RC_SATURATE_NONE;
- case TGSI_SAT_ZERO_ONE: return RC_SATURATE_ZERO_ONE;
- }
+ return saturate ? RC_SATURATE_ZERO_ONE : RC_SATURATE_NONE;
}
static unsigned translate_register_file(unsigned file)
diff --git a/src/gallium/drivers/r600/Android.mk b/src/gallium/drivers/r600/Android.mk
index e9357597a9b..bfe39873089 100644
--- a/src/gallium/drivers/r600/Android.mk
+++ b/src/gallium/drivers/r600/Android.mk
@@ -33,6 +33,10 @@ LOCAL_SRC_FILES := $(C_SOURCES) $(CXX_SOURCES)
LOCAL_SHARED_LIBRARIES := libdrm libdrm_radeon
LOCAL_MODULE := libmesa_pipe_r600
+ifeq ($(MESA_LOLLIPOP_BUILD),true)
+LOCAL_C_INCLUDES := external/libcxx/include
+else
include external/stlport/libstlport.mk
+endif
include $(GALLIUM_COMMON_MK)
include $(BUILD_STATIC_LIBRARY)
diff --git a/src/gallium/drivers/r600/r600_pipe.c b/src/gallium/drivers/r600/r600_pipe.c
index 21e5d42adc3..e122b607b86 100644
--- a/src/gallium/drivers/r600/r600_pipe.c
+++ b/src/gallium/drivers/r600/r600_pipe.c
@@ -332,6 +332,7 @@ static int r600_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
case PIPE_CAP_CONDITIONAL_RENDER_INVERTED:
case PIPE_CAP_SAMPLER_VIEW_TARGET:
case PIPE_CAP_VERTEXID_NOBASE:
+ case PIPE_CAP_DEVICE_RESET_STATUS_QUERY:
return 0;
/* Stream output. */
@@ -475,6 +476,7 @@ static int r600_get_shader_param(struct pipe_screen* pscreen, unsigned shader, e
case PIPE_SHADER_CAP_SUBROUTINES:
return 0;
case PIPE_SHADER_CAP_INTEGERS:
+ case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE:
return 1;
case PIPE_SHADER_CAP_MAX_TEXTURE_SAMPLERS:
case PIPE_SHADER_CAP_MAX_SAMPLER_VIEWS:
diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c
index 87b6e6e06ec..af7622e9b34 100644
--- a/src/gallium/drivers/r600/r600_shader.c
+++ b/src/gallium/drivers/r600/r600_shader.c
@@ -617,98 +617,100 @@ static int tgsi_declaration(struct r600_shader_ctx *ctx)
switch (d->Declaration.File) {
case TGSI_FILE_INPUT:
- i = ctx->shader->ninput;
- assert(i < Elements(ctx->shader->input));
- ctx->shader->ninput += count;
- ctx->shader->input[i].name = d->Semantic.Name;
- ctx->shader->input[i].sid = d->Semantic.Index;
- ctx->shader->input[i].interpolate = d->Interp.Interpolate;
- ctx->shader->input[i].interpolate_location = d->Interp.Location;
- ctx->shader->input[i].gpr = ctx->file_offset[TGSI_FILE_INPUT] + d->Range.First;
- if (ctx->type == TGSI_PROCESSOR_FRAGMENT) {
- ctx->shader->input[i].spi_sid = r600_spi_sid(&ctx->shader->input[i]);
- switch (ctx->shader->input[i].name) {
- case TGSI_SEMANTIC_FACE:
- if (ctx->face_gpr != -1)
- ctx->shader->input[i].gpr = ctx->face_gpr; /* already allocated by allocate_system_value_inputs */
- else
- ctx->face_gpr = ctx->shader->input[i].gpr;
- break;
- case TGSI_SEMANTIC_COLOR:
- ctx->colors_used++;
- break;
- case TGSI_SEMANTIC_POSITION:
- ctx->fragcoord_input = i;
- break;
- case TGSI_SEMANTIC_PRIMID:
- /* set this for now */
- ctx->shader->gs_prim_id_input = true;
- ctx->shader->ps_prim_id_input = i;
- break;
- }
- if (ctx->bc->chip_class >= EVERGREEN) {
- if ((r = evergreen_interp_input(ctx, i)))
- return r;
+ for (j = 0; j < count; j++) {
+ i = ctx->shader->ninput + j;
+ assert(i < Elements(ctx->shader->input));
+ ctx->shader->input[i].name = d->Semantic.Name;
+ ctx->shader->input[i].sid = d->Semantic.Index + j;
+ ctx->shader->input[i].interpolate = d->Interp.Interpolate;
+ ctx->shader->input[i].interpolate_location = d->Interp.Location;
+ ctx->shader->input[i].gpr = ctx->file_offset[TGSI_FILE_INPUT] + d->Range.First + j;
+ if (ctx->type == TGSI_PROCESSOR_FRAGMENT) {
+ ctx->shader->input[i].spi_sid = r600_spi_sid(&ctx->shader->input[i]);
+ switch (ctx->shader->input[i].name) {
+ case TGSI_SEMANTIC_FACE:
+ if (ctx->face_gpr != -1)
+ ctx->shader->input[i].gpr = ctx->face_gpr; /* already allocated by allocate_system_value_inputs */
+ else
+ ctx->face_gpr = ctx->shader->input[i].gpr;
+ break;
+ case TGSI_SEMANTIC_COLOR:
+ ctx->colors_used++;
+ break;
+ case TGSI_SEMANTIC_POSITION:
+ ctx->fragcoord_input = i;
+ break;
+ case TGSI_SEMANTIC_PRIMID:
+ /* set this for now */
+ ctx->shader->gs_prim_id_input = true;
+ ctx->shader->ps_prim_id_input = i;
+ break;
+ }
+ if (ctx->bc->chip_class >= EVERGREEN) {
+ if ((r = evergreen_interp_input(ctx, i)))
+ return r;
+ }
+ } else if (ctx->type == TGSI_PROCESSOR_GEOMETRY) {
+ /* FIXME probably skip inputs if they aren't passed in the ring */
+ ctx->shader->input[i].ring_offset = ctx->next_ring_offset;
+ ctx->next_ring_offset += 16;
+ if (ctx->shader->input[i].name == TGSI_SEMANTIC_PRIMID)
+ ctx->shader->gs_prim_id_input = true;
}
- } else if (ctx->type == TGSI_PROCESSOR_GEOMETRY) {
- /* FIXME probably skip inputs if they aren't passed in the ring */
- ctx->shader->input[i].ring_offset = ctx->next_ring_offset;
- ctx->next_ring_offset += 16;
- if (ctx->shader->input[i].name == TGSI_SEMANTIC_PRIMID)
- ctx->shader->gs_prim_id_input = true;
- }
- for (j = 1; j < count; ++j) {
- ctx->shader->input[i + j] = ctx->shader->input[i];
- ctx->shader->input[i + j].gpr += j;
}
+ ctx->shader->ninput += count;
break;
case TGSI_FILE_OUTPUT:
- i = ctx->shader->noutput++;
- assert(i < Elements(ctx->shader->output));
- ctx->shader->output[i].name = d->Semantic.Name;
- ctx->shader->output[i].sid = d->Semantic.Index;
- ctx->shader->output[i].gpr = ctx->file_offset[TGSI_FILE_OUTPUT] + d->Range.First;
- ctx->shader->output[i].interpolate = d->Interp.Interpolate;
- ctx->shader->output[i].write_mask = d->Declaration.UsageMask;
- if (ctx->type == TGSI_PROCESSOR_VERTEX ||
- ctx->type == TGSI_PROCESSOR_GEOMETRY) {
- ctx->shader->output[i].spi_sid = r600_spi_sid(&ctx->shader->output[i]);
- switch (d->Semantic.Name) {
- case TGSI_SEMANTIC_CLIPDIST:
- ctx->shader->clip_dist_write |= d->Declaration.UsageMask << (d->Semantic.Index << 2);
- break;
- case TGSI_SEMANTIC_PSIZE:
- ctx->shader->vs_out_misc_write = 1;
- ctx->shader->vs_out_point_size = 1;
- break;
- case TGSI_SEMANTIC_EDGEFLAG:
- ctx->shader->vs_out_misc_write = 1;
- ctx->shader->vs_out_edgeflag = 1;
- ctx->edgeflag_output = i;
- break;
- case TGSI_SEMANTIC_VIEWPORT_INDEX:
- ctx->shader->vs_out_misc_write = 1;
- ctx->shader->vs_out_viewport = 1;
- break;
- case TGSI_SEMANTIC_LAYER:
- ctx->shader->vs_out_misc_write = 1;
- ctx->shader->vs_out_layer = 1;
- break;
- case TGSI_SEMANTIC_CLIPVERTEX:
- ctx->clip_vertex_write = TRUE;
- ctx->cv_output = i;
- break;
- }
- if (ctx->type == TGSI_PROCESSOR_GEOMETRY) {
- ctx->gs_out_ring_offset += 16;
- }
- } else if (ctx->type == TGSI_PROCESSOR_FRAGMENT) {
- switch (d->Semantic.Name) {
- case TGSI_SEMANTIC_COLOR:
- ctx->shader->nr_ps_max_color_exports++;
- break;
+ for (j = 0; j < count; j++) {
+ i = ctx->shader->noutput + j;
+ assert(i < Elements(ctx->shader->output));
+ ctx->shader->output[i].name = d->Semantic.Name;
+ ctx->shader->output[i].sid = d->Semantic.Index + j;
+ ctx->shader->output[i].gpr = ctx->file_offset[TGSI_FILE_OUTPUT] + d->Range.First + j;
+ ctx->shader->output[i].interpolate = d->Interp.Interpolate;
+ ctx->shader->output[i].write_mask = d->Declaration.UsageMask;
+ if (ctx->type == TGSI_PROCESSOR_VERTEX ||
+ ctx->type == TGSI_PROCESSOR_GEOMETRY) {
+ ctx->shader->output[i].spi_sid = r600_spi_sid(&ctx->shader->output[i]);
+ switch (d->Semantic.Name) {
+ case TGSI_SEMANTIC_CLIPDIST:
+ ctx->shader->clip_dist_write |= d->Declaration.UsageMask <<
+ ((d->Semantic.Index + j) << 2);
+ break;
+ case TGSI_SEMANTIC_PSIZE:
+ ctx->shader->vs_out_misc_write = 1;
+ ctx->shader->vs_out_point_size = 1;
+ break;
+ case TGSI_SEMANTIC_EDGEFLAG:
+ ctx->shader->vs_out_misc_write = 1;
+ ctx->shader->vs_out_edgeflag = 1;
+ ctx->edgeflag_output = i;
+ break;
+ case TGSI_SEMANTIC_VIEWPORT_INDEX:
+ ctx->shader->vs_out_misc_write = 1;
+ ctx->shader->vs_out_viewport = 1;
+ break;
+ case TGSI_SEMANTIC_LAYER:
+ ctx->shader->vs_out_misc_write = 1;
+ ctx->shader->vs_out_layer = 1;
+ break;
+ case TGSI_SEMANTIC_CLIPVERTEX:
+ ctx->clip_vertex_write = TRUE;
+ ctx->cv_output = i;
+ break;
+ }
+ if (ctx->type == TGSI_PROCESSOR_GEOMETRY) {
+ ctx->gs_out_ring_offset += 16;
+ }
+ } else if (ctx->type == TGSI_PROCESSOR_FRAGMENT) {
+ switch (d->Semantic.Name) {
+ case TGSI_SEMANTIC_COLOR:
+ ctx->shader->nr_ps_max_color_exports++;
+ break;
+ }
}
}
+ ctx->shader->noutput += count;
break;
case TGSI_FILE_TEMPORARY:
if (ctx->info.indirect_files & (1 << TGSI_FILE_TEMPORARY)) {
@@ -723,6 +725,7 @@ static int tgsi_declaration(struct r600_shader_ctx *ctx)
case TGSI_FILE_CONSTANT:
case TGSI_FILE_SAMPLER:
+ case TGSI_FILE_SAMPLER_VIEW:
case TGSI_FILE_ADDRESS:
break;
@@ -1337,7 +1340,7 @@ static int emit_streamout(struct r600_shader_ctx *ctx, struct pipe_stream_output
int i, j, r;
/* Sanity checking. */
- if (so->num_outputs > PIPE_MAX_SHADER_OUTPUTS) {
+ if (so->num_outputs > PIPE_MAX_SO_OUTPUTS) {
R600_ERR("Too many stream outputs: %d\n", so->num_outputs);
r = -EINVAL;
goto out_err;
diff --git a/src/gallium/drivers/r600/r600_state_common.c b/src/gallium/drivers/r600/r600_state_common.c
index c50c7055851..13dc9ee8c10 100644
--- a/src/gallium/drivers/r600/r600_state_common.c
+++ b/src/gallium/drivers/r600/r600_state_common.c
@@ -95,22 +95,23 @@ static void r600_texture_barrier(struct pipe_context *ctx)
static unsigned r600_conv_pipe_prim(unsigned prim)
{
static const unsigned prim_conv[] = {
- V_008958_DI_PT_POINTLIST,
- V_008958_DI_PT_LINELIST,
- V_008958_DI_PT_LINELOOP,
- V_008958_DI_PT_LINESTRIP,
- V_008958_DI_PT_TRILIST,
- V_008958_DI_PT_TRISTRIP,
- V_008958_DI_PT_TRIFAN,
- V_008958_DI_PT_QUADLIST,
- V_008958_DI_PT_QUADSTRIP,
- V_008958_DI_PT_POLYGON,
- V_008958_DI_PT_LINELIST_ADJ,
- V_008958_DI_PT_LINESTRIP_ADJ,
- V_008958_DI_PT_TRILIST_ADJ,
- V_008958_DI_PT_TRISTRIP_ADJ,
- V_008958_DI_PT_RECTLIST
+ [PIPE_PRIM_POINTS] = V_008958_DI_PT_POINTLIST,
+ [PIPE_PRIM_LINES] = V_008958_DI_PT_LINELIST,
+ [PIPE_PRIM_LINE_LOOP] = V_008958_DI_PT_LINELOOP,
+ [PIPE_PRIM_LINE_STRIP] = V_008958_DI_PT_LINESTRIP,
+ [PIPE_PRIM_TRIANGLES] = V_008958_DI_PT_TRILIST,
+ [PIPE_PRIM_TRIANGLE_STRIP] = V_008958_DI_PT_TRISTRIP,
+ [PIPE_PRIM_TRIANGLE_FAN] = V_008958_DI_PT_TRIFAN,
+ [PIPE_PRIM_QUADS] = V_008958_DI_PT_QUADLIST,
+ [PIPE_PRIM_QUAD_STRIP] = V_008958_DI_PT_QUADSTRIP,
+ [PIPE_PRIM_POLYGON] = V_008958_DI_PT_POLYGON,
+ [PIPE_PRIM_LINES_ADJACENCY] = V_008958_DI_PT_LINELIST_ADJ,
+ [PIPE_PRIM_LINE_STRIP_ADJACENCY] = V_008958_DI_PT_LINESTRIP_ADJ,
+ [PIPE_PRIM_TRIANGLES_ADJACENCY] = V_008958_DI_PT_TRILIST_ADJ,
+ [PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY] = V_008958_DI_PT_TRISTRIP_ADJ,
+ [R600_PRIM_RECTANGLE_LIST] = V_008958_DI_PT_RECTLIST
};
+ assert(prim < Elements(prim_conv));
return prim_conv[prim];
}
diff --git a/src/gallium/drivers/radeon/Android.mk b/src/gallium/drivers/radeon/Android.mk
index d61579280ea..6997a6d3ec3 100644
--- a/src/gallium/drivers/radeon/Android.mk
+++ b/src/gallium/drivers/radeon/Android.mk
@@ -30,6 +30,10 @@ include $(CLEAR_VARS)
LOCAL_SRC_FILES := $(C_SOURCES)
+ifeq ($(MESA_ENABLE_LLVM),true)
+LOCAL_SRC_FILES += $(LLVM_C_FILES)
+endif
+
LOCAL_SHARED_LIBRARIES := libdrm libdrm_radeon
LOCAL_MODULE := libmesa_pipe_radeon
diff --git a/src/gallium/drivers/radeon/Makefile.sources b/src/gallium/drivers/radeon/Makefile.sources
index c655fe5787b..f63790c329e 100644
--- a/src/gallium/drivers/radeon/Makefile.sources
+++ b/src/gallium/drivers/radeon/Makefile.sources
@@ -12,6 +12,7 @@ C_SOURCES := \
radeon_uvd.c \
radeon_uvd.h \
radeon_vce_40_2_2.c \
+ radeon_vce_50.c \
radeon_vce.c \
radeon_vce.h \
radeon_video.c \
diff --git a/src/gallium/drivers/radeon/r600_pipe_common.c b/src/gallium/drivers/radeon/r600_pipe_common.c
index 42e681dc7d2..3def4446882 100644
--- a/src/gallium/drivers/radeon/r600_pipe_common.c
+++ b/src/gallium/drivers/radeon/r600_pipe_common.c
@@ -107,11 +107,10 @@ void r600_draw_rectangle(struct blitter_context *blitter,
void r600_need_dma_space(struct r600_common_context *ctx, unsigned num_dw)
{
- /* The number of dwords we already used in the DMA so far. */
- num_dw += ctx->rings.dma.cs->cdw;
/* Flush if there's not enough space. */
- if (num_dw > RADEON_MAX_CMDBUF_DWORDS) {
+ if ((num_dw + ctx->rings.dma.cs->cdw) > RADEON_MAX_CMDBUF_DWORDS) {
ctx->rings.dma.flush(ctx, RADEON_FLUSH_ASYNC, NULL);
+ assert((num_dw + ctx->rings.dma.cs->cdw) <= RADEON_MAX_CMDBUF_DWORDS);
}
}
diff --git a/src/gallium/drivers/radeon/radeon_llvm.h b/src/gallium/drivers/radeon/radeon_llvm.h
index 8612ef8daf7..6a9557b0b73 100644
--- a/src/gallium/drivers/radeon/radeon_llvm.h
+++ b/src/gallium/drivers/radeon/radeon_llvm.h
@@ -33,7 +33,6 @@
#define RADEON_LLVM_MAX_INPUTS 32 * 4
#define RADEON_LLVM_MAX_OUTPUTS 32 * 4
-#define RADEON_LLVM_MAX_ARRAYS 16
#define RADEON_LLVM_INITIAL_CF_DEPTH 4
@@ -130,8 +129,7 @@ struct radeon_llvm_context {
unsigned loop_depth;
unsigned loop_depth_max;
- struct tgsi_declaration_range arrays[RADEON_LLVM_MAX_ARRAYS];
- unsigned num_arrays;
+ struct tgsi_declaration_range *arrays;
LLVMValueRef main_fn;
diff --git a/src/gallium/drivers/radeon/radeon_llvm_emit.c b/src/gallium/drivers/radeon/radeon_llvm_emit.c
index 624077c7465..25580b6bd4c 100644
--- a/src/gallium/drivers/radeon/radeon_llvm_emit.c
+++ b/src/gallium/drivers/radeon/radeon_llvm_emit.c
@@ -86,10 +86,18 @@ static void init_r600_target()
{
static unsigned initialized = 0;
if (!initialized) {
+#if HAVE_LLVM < 0x0307
LLVMInitializeR600TargetInfo();
LLVMInitializeR600Target();
LLVMInitializeR600TargetMC();
LLVMInitializeR600AsmPrinter();
+#else
+ LLVMInitializeAMDGPUTargetInfo();
+ LLVMInitializeAMDGPUTarget();
+ LLVMInitializeAMDGPUTargetMC();
+ LLVMInitializeAMDGPUAsmPrinter();
+
+#endif
initialized = 1;
}
}
diff --git a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
index 20e506b7c5e..c8c980d9d32 100644
--- a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
+++ b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
@@ -85,8 +85,9 @@ get_array_range(struct lp_build_tgsi_context *bld_base,
unsigned File, const struct tgsi_ind_register *reg)
{
struct radeon_llvm_context * ctx = radeon_llvm_context(bld_base);
+
if (File != TGSI_FILE_TEMPORARY || reg->ArrayID == 0 ||
- reg->ArrayID > RADEON_LLVM_MAX_ARRAYS) {
+ reg->ArrayID > bld_base->info->array_max[TGSI_FILE_TEMPORARY]) {
struct tgsi_declaration_range range;
range.First = 0;
range.Last = bld_base->info->file_max[File];
@@ -252,8 +253,14 @@ static void emit_declaration(
}
case TGSI_FILE_TEMPORARY:
- if (decl->Declaration.Array && decl->Array.ArrayID <= RADEON_LLVM_MAX_ARRAYS)
+ if (decl->Declaration.Array) {
+ if (!ctx->arrays) {
+ int size = bld_base->info->array_max[TGSI_FILE_TEMPORARY];
+ ctx->arrays = MALLOC(sizeof(ctx->arrays[0]) * size);
+ }
+
ctx->arrays[decl->Array.ArrayID - 1] = decl->Range;
+ }
if (uses_temp_indirect_addressing(bld_base)) {
lp_emit_declaration_soa(bld_base, decl);
break;
@@ -314,6 +321,21 @@ static void emit_declaration(
}
}
+static LLVMValueRef radeon_llvm_saturate(struct lp_build_tgsi_context *bld_base,
+ LLVMValueRef value)
+{
+ struct lp_build_emit_data clamp_emit_data;
+
+ memset(&clamp_emit_data, 0, sizeof(clamp_emit_data));
+ clamp_emit_data.arg_count = 3;
+ clamp_emit_data.args[0] = value;
+ clamp_emit_data.args[2] = bld_base->base.one;
+ clamp_emit_data.args[1] = bld_base->base.zero;
+
+ return lp_build_emit_llvm(bld_base, TGSI_OPCODE_CLAMP,
+ &clamp_emit_data);
+}
+
static void
emit_store(
struct lp_build_tgsi_context * bld_base,
@@ -324,7 +346,6 @@ emit_store(
struct radeon_llvm_context * ctx = radeon_llvm_context(bld_base);
struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
- struct lp_build_context base = bld->bld_base.base;
const struct tgsi_full_dst_register *reg = &inst->Dst[0];
LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
LLVMValueRef temp_ptr;
@@ -350,28 +371,8 @@ emit_store(
TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
LLVMValueRef value = dst[chan_index];
- if (inst->Instruction.Saturate != TGSI_SAT_NONE) {
- struct lp_build_emit_data clamp_emit_data;
-
- memset(&clamp_emit_data, 0, sizeof(clamp_emit_data));
- clamp_emit_data.arg_count = 3;
- clamp_emit_data.args[0] = value;
- clamp_emit_data.args[2] = base.one;
-
- switch(inst->Instruction.Saturate) {
- case TGSI_SAT_ZERO_ONE:
- clamp_emit_data.args[1] = base.zero;
- break;
- case TGSI_SAT_MINUS_PLUS_ONE:
- clamp_emit_data.args[1] = LLVMConstReal(
- base.elem_type, -1.0f);
- break;
- default:
- assert(0);
- }
- value = lp_build_emit_llvm(bld_base, TGSI_OPCODE_CLAMP,
- &clamp_emit_data);
- }
+ if (inst->Instruction.Saturate)
+ value = radeon_llvm_saturate(bld_base, value);
if (reg->Register.File == TGSI_FILE_ADDRESS) {
temp_ptr = bld->addr[reg->Register.Index][chan_index];
@@ -1438,8 +1439,6 @@ void radeon_llvm_context_init(struct radeon_llvm_context * ctx)
/* Allocate outputs */
ctx->soa.outputs = ctx->outputs;
- ctx->num_arrays = 0;
-
/* XXX: Is there a better way to initialize all this ? */
lp_set_default_actions(bld_base);
@@ -1628,8 +1627,11 @@ void radeon_llvm_dispose(struct radeon_llvm_context * ctx)
{
LLVMDisposeModule(ctx->soa.bld_base.base.gallivm->module);
LLVMContextDispose(ctx->soa.bld_base.base.gallivm->context);
+ FREE(ctx->arrays);
+ ctx->arrays = NULL;
FREE(ctx->temps);
ctx->temps = NULL;
+ ctx->temps_count = 0;
FREE(ctx->loop);
ctx->loop = NULL;
ctx->loop_depth_max = 0;
diff --git a/src/gallium/drivers/radeon/radeon_vce.c b/src/gallium/drivers/radeon/radeon_vce.c
index e220f40165b..a6567379fe3 100644
--- a/src/gallium/drivers/radeon/radeon_vce.c
+++ b/src/gallium/drivers/radeon/radeon_vce.c
@@ -44,6 +44,10 @@
#include "radeon_video.h"
#include "radeon_vce.h"
+#define FW_40_2_2 ((40 << 24) | (2 << 16) | (2 << 8))
+#define FW_50_0_1 ((50 << 24) | (0 << 16) | (1 << 8))
+#define FW_50_1_2 ((50 << 24) | (1 << 16) | (2 << 8))
+
/**
* flush commands to the hardware
*/
@@ -183,6 +187,44 @@ static unsigned get_cpb_num(struct rvce_encoder *enc)
}
/**
+ * Get the slot for the currently encoded frame
+ */
+struct rvce_cpb_slot *current_slot(struct rvce_encoder *enc)
+{
+ return LIST_ENTRY(struct rvce_cpb_slot, enc->cpb_slots.prev, list);
+}
+
+/**
+ * Get the slot for L0
+ */
+struct rvce_cpb_slot *l0_slot(struct rvce_encoder *enc)
+{
+ return LIST_ENTRY(struct rvce_cpb_slot, enc->cpb_slots.next, list);
+}
+
+/**
+ * Get the slot for L1
+ */
+struct rvce_cpb_slot *l1_slot(struct rvce_encoder *enc)
+{
+ return LIST_ENTRY(struct rvce_cpb_slot, enc->cpb_slots.next->next, list);
+}
+
+/**
+ * Calculate the offsets into the CPB
+ */
+void rvce_frame_offset(struct rvce_encoder *enc, struct rvce_cpb_slot *slot,
+ unsigned *luma_offset, unsigned *chroma_offset)
+{
+ unsigned pitch = align(enc->luma->level[0].pitch_bytes, 128);
+ unsigned vpitch = align(enc->luma->npix_y, 16);
+ unsigned fsize = pitch * (vpitch + vpitch / 2);
+
+ *luma_offset = slot->index * fsize;
+ *chroma_offset = *luma_offset + pitch * vpitch;
+}
+
+/**
* destroy this video encoder
*/
static void rvce_destroy(struct pipe_video_codec *encoder)
@@ -406,7 +448,19 @@ struct pipe_video_codec *rvce_create_encoder(struct pipe_context *context,
reset_cpb(enc);
- radeon_vce_40_2_2_init(enc);
+ switch (rscreen->info.vce_fw_version) {
+ case FW_40_2_2:
+ radeon_vce_40_2_2_init(enc);
+ break;
+
+ case FW_50_0_1:
+ case FW_50_1_2:
+ radeon_vce_50_init(enc);
+ break;
+
+ default:
+ goto error;
+ }
return &enc->base;
@@ -426,5 +480,7 @@ error:
*/
bool rvce_is_fw_version_supported(struct r600_common_screen *rscreen)
{
- return rscreen->info.vce_fw_version == ((40 << 24) | (2 << 16) | (2 << 8));
+ return rscreen->info.vce_fw_version == FW_40_2_2 ||
+ rscreen->info.vce_fw_version == FW_50_0_1 ||
+ rscreen->info.vce_fw_version == FW_50_1_2;
}
diff --git a/src/gallium/drivers/radeon/radeon_vce.h b/src/gallium/drivers/radeon/radeon_vce.h
index 1cf018006a8..8319ef48cd5 100644
--- a/src/gallium/drivers/radeon/radeon_vce.h
+++ b/src/gallium/drivers/radeon/radeon_vce.h
@@ -104,6 +104,13 @@ struct rvce_encoder {
bool use_vui;
};
+/* CPB handling functions */
+struct rvce_cpb_slot *current_slot(struct rvce_encoder *enc);
+struct rvce_cpb_slot *l0_slot(struct rvce_encoder *enc);
+struct rvce_cpb_slot *l1_slot(struct rvce_encoder *enc);
+void rvce_frame_offset(struct rvce_encoder *enc, struct rvce_cpb_slot *slot,
+ unsigned *luma_offset, unsigned *chroma_offset);
+
struct pipe_video_codec *rvce_create_encoder(struct pipe_context *context,
const struct pipe_video_codec *templat,
struct radeon_winsys* ws,
@@ -114,4 +121,7 @@ bool rvce_is_fw_version_supported(struct r600_common_screen *rscreen);
/* init vce fw 40.2.2 specific callbacks */
void radeon_vce_40_2_2_init(struct rvce_encoder *enc);
+/* init vce fw 50 specific callbacks */
+void radeon_vce_50_init(struct rvce_encoder *enc);
+
#endif
diff --git a/src/gallium/drivers/radeon/radeon_vce_40_2_2.c b/src/gallium/drivers/radeon/radeon_vce_40_2_2.c
index 09029575547..51b17b5f6a8 100644
--- a/src/gallium/drivers/radeon/radeon_vce_40_2_2.c
+++ b/src/gallium/drivers/radeon/radeon_vce_40_2_2.c
@@ -46,32 +46,6 @@
static const unsigned profiles[7] = { 66, 77, 88, 100, 110, 122, 244 };
-static struct rvce_cpb_slot *current_slot(struct rvce_encoder *enc)
-{
- return LIST_ENTRY(struct rvce_cpb_slot, enc->cpb_slots.prev, list);
-}
-
-static struct rvce_cpb_slot *l0_slot(struct rvce_encoder *enc)
-{
- return LIST_ENTRY(struct rvce_cpb_slot, enc->cpb_slots.next, list);
-}
-
-static struct rvce_cpb_slot *l1_slot(struct rvce_encoder *enc)
-{
- return LIST_ENTRY(struct rvce_cpb_slot, enc->cpb_slots.next->next, list);
-}
-
-static void frame_offset(struct rvce_encoder *enc, struct rvce_cpb_slot *slot,
- unsigned *luma_offset, unsigned *chroma_offset)
-{
- unsigned pitch = align(enc->luma->level[0].pitch_bytes, 128);
- unsigned vpitch = align(enc->luma->npix_y, 16);
- unsigned fsize = pitch * (vpitch + vpitch / 2);
-
- *luma_offset = slot->index * fsize;
- *chroma_offset = *luma_offset + pitch * vpitch;
-}
-
static void session(struct rvce_encoder *enc)
{
RVCE_BEGIN(0x00000001); // session cmd
@@ -369,7 +343,7 @@ static void encode(struct rvce_encoder *enc)
if(enc->pic.picture_type == PIPE_H264_ENC_PICTURE_TYPE_P ||
enc->pic.picture_type == PIPE_H264_ENC_PICTURE_TYPE_B) {
struct rvce_cpb_slot *l0 = l0_slot(enc);
- frame_offset(enc, l0, &luma_offset, &chroma_offset);
+ rvce_frame_offset(enc, l0, &luma_offset, &chroma_offset);
RVCE_CS(l0->picture_type); // encPicType
RVCE_CS(l0->frame_num); // frameNumber
RVCE_CS(l0->pic_order_cnt); // pictureOrderCount
@@ -395,7 +369,7 @@ static void encode(struct rvce_encoder *enc)
RVCE_CS(0x00000000); // pictureStructure
if(enc->pic.picture_type == PIPE_H264_ENC_PICTURE_TYPE_B) {
struct rvce_cpb_slot *l1 = l1_slot(enc);
- frame_offset(enc, l1, &luma_offset, &chroma_offset);
+ rvce_frame_offset(enc, l1, &luma_offset, &chroma_offset);
RVCE_CS(l1->picture_type); // encPicType
RVCE_CS(l1->frame_num); // frameNumber
RVCE_CS(l1->pic_order_cnt); // pictureOrderCount
@@ -409,7 +383,7 @@ static void encode(struct rvce_encoder *enc)
RVCE_CS(0xffffffff); // chromaOffset
}
- frame_offset(enc, current_slot(enc), &luma_offset, &chroma_offset);
+ rvce_frame_offset(enc, current_slot(enc), &luma_offset, &chroma_offset);
RVCE_CS(luma_offset); // encReconstructedLumaOffset
RVCE_CS(chroma_offset); // encReconstructedChromaOffset
RVCE_CS(0x00000000); // encColocBufferOffset
diff --git a/src/gallium/drivers/radeon/radeon_vce_50.c b/src/gallium/drivers/radeon/radeon_vce_50.c
new file mode 100644
index 00000000000..84a2bfb117e
--- /dev/null
+++ b/src/gallium/drivers/radeon/radeon_vce_50.c
@@ -0,0 +1,228 @@
+/**************************************************************************
+ *
+ * Copyright 2013 Advanced Micro Devices, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/*
+ * Authors:
+ * Christian König <[email protected]>
+ *
+ */
+
+#include <stdio.h>
+
+#include "pipe/p_video_codec.h"
+
+#include "util/u_video.h"
+#include "util/u_memory.h"
+
+#include "vl/vl_video_buffer.h"
+
+#include "r600_pipe_common.h"
+#include "radeon_video.h"
+#include "radeon_vce.h"
+
+static void task_info(struct rvce_encoder *enc, uint32_t taskOperation)
+{
+ RVCE_BEGIN(0x00000002); // task info
+ RVCE_CS(0xffffffff); // offsetOfNextTaskInfo
+ RVCE_CS(taskOperation); // taskOperation
+ RVCE_CS(0x00000000); // referencePictureDependency
+ RVCE_CS(0x00000000); // collocateFlagDependency
+ RVCE_CS(0x00000000); // feedbackIndex
+ RVCE_CS(0x00000000); // videoBitstreamRingIndex
+ RVCE_END();
+}
+
+static void rate_control(struct rvce_encoder *enc)
+{
+ RVCE_BEGIN(0x04000005); // rate control
+ RVCE_CS(enc->pic.rate_ctrl.rate_ctrl_method); // encRateControlMethod
+ RVCE_CS(enc->pic.rate_ctrl.target_bitrate); // encRateControlTargetBitRate
+ RVCE_CS(enc->pic.rate_ctrl.peak_bitrate); // encRateControlPeakBitRate
+ RVCE_CS(enc->pic.rate_ctrl.frame_rate_num); // encRateControlFrameRateNum
+ RVCE_CS(0x00000000); // encGOPSize
+ RVCE_CS(enc->pic.quant_i_frames); // encQP_I
+ RVCE_CS(enc->pic.quant_p_frames); // encQP_P
+ RVCE_CS(enc->pic.quant_b_frames); // encQP_B
+ RVCE_CS(enc->pic.rate_ctrl.vbv_buffer_size); // encVBVBufferSize
+ RVCE_CS(enc->pic.rate_ctrl.frame_rate_den); // encRateControlFrameRateDen
+ RVCE_CS(0x00000000); // encVBVBufferLevel
+ RVCE_CS(0x00000000); // encMaxAUSize
+ RVCE_CS(0x00000000); // encQPInitialMode
+ RVCE_CS(enc->pic.rate_ctrl.target_bits_picture); // encTargetBitsPerPicture
+ RVCE_CS(enc->pic.rate_ctrl.peak_bits_picture_integer); // encPeakBitsPerPictureInteger
+ RVCE_CS(enc->pic.rate_ctrl.peak_bits_picture_fraction); // encPeakBitsPerPictureFractional
+ RVCE_CS(0x00000000); // encMinQP
+ RVCE_CS(0x00000033); // encMaxQP
+ RVCE_CS(0x00000000); // encSkipFrameEnable
+ RVCE_CS(0x00000000); // encFillerDataEnable
+ RVCE_CS(0x00000000); // encEnforceHRD
+ RVCE_CS(0x00000000); // encBPicsDeltaQP
+ RVCE_CS(0x00000000); // encReferenceBPicsDeltaQP
+ RVCE_CS(0x00000000); // encRateControlReInitDisable
+ RVCE_CS(0x00000000); // encLCVBRInitQPFlag
+ RVCE_CS(0x00000000); // encLCVBRSATDBasedNonlinearBitBudgetFlag
+ RVCE_END();
+}
+
+static void encode(struct rvce_encoder *enc)
+{
+ int i;
+ unsigned luma_offset, chroma_offset;
+
+ task_info(enc, 0x00000003);
+
+ RVCE_BEGIN(0x05000001); // context buffer
+ RVCE_READWRITE(enc->cpb.res->cs_buf, enc->cpb.res->domains); // encodeContextAddressHi
+ RVCE_CS(0x00000000); // encodeContextAddressLo
+ RVCE_END();
+
+ RVCE_BEGIN(0x05000004); // video bitstream buffer
+ RVCE_WRITE(enc->bs_handle, RADEON_DOMAIN_GTT); // videoBitstreamRingAddressHi
+ RVCE_CS(0x00000000); // videoBitstreamRingAddressLo
+ RVCE_CS(enc->bs_size); // videoBitstreamRingSize
+ RVCE_END();
+
+ RVCE_BEGIN(0x03000001); // encode
+ RVCE_CS(enc->pic.frame_num ? 0x0 : 0x11); // insertHeaders
+ RVCE_CS(0x00000000); // pictureStructure
+ RVCE_CS(enc->bs_size); // allowedMaxBitstreamSize
+ RVCE_CS(0x00000000); // forceRefreshMap
+ RVCE_CS(0x00000000); // insertAUD
+ RVCE_CS(0x00000000); // endOfSequence
+ RVCE_CS(0x00000000); // endOfStream
+ RVCE_READ(enc->handle, RADEON_DOMAIN_VRAM); // inputPictureLumaAddressHi
+ RVCE_CS(enc->luma->level[0].offset); // inputPictureLumaAddressLo
+ RVCE_READ(enc->handle, RADEON_DOMAIN_VRAM); // inputPictureChromaAddressHi
+ RVCE_CS(enc->chroma->level[0].offset); // inputPictureChromaAddressLo
+ RVCE_CS(align(enc->luma->npix_y, 16)); // encInputFrameYPitch
+ RVCE_CS(enc->luma->level[0].pitch_bytes); // encInputPicLumaPitch
+ RVCE_CS(enc->chroma->level[0].pitch_bytes); // encInputPicChromaPitch
+ RVCE_CS(0x00010000); // encInputPic(Addr|Array)Mode,encDisable(TwoPipeMode|MBOffloading)
+ RVCE_CS(0x00000000); // encInputPicTileConfig
+ RVCE_CS(enc->pic.picture_type); // encPicType
+ RVCE_CS(enc->pic.picture_type == PIPE_H264_ENC_PICTURE_TYPE_IDR); // encIdrFlag
+ RVCE_CS(0x00000000); // encIdrPicId
+ RVCE_CS(0x00000000); // encMGSKeyPic
+ RVCE_CS(!enc->pic.not_referenced); // encReferenceFlag
+ RVCE_CS(0x00000000); // encTemporalLayerIndex
+ RVCE_CS(0x00000000); // num_ref_idx_active_override_flag
+ RVCE_CS(0x00000000); // num_ref_idx_l0_active_minus1
+ RVCE_CS(0x00000000); // num_ref_idx_l1_active_minus1
+
+ i = enc->pic.frame_num - enc->pic.ref_idx_l0;
+ if (i > 1 && enc->pic.picture_type == PIPE_H264_ENC_PICTURE_TYPE_P) {
+ RVCE_CS(0x00000001); // encRefListModificationOp
+ RVCE_CS(i - 1); // encRefListModificationNum
+ } else {
+ RVCE_CS(0x00000000); // encRefListModificationOp
+ RVCE_CS(0x00000000); // encRefListModificationNum
+ }
+
+ for (i = 0; i < 3; ++i) {
+ RVCE_CS(0x00000000); // encRefListModificationOp
+ RVCE_CS(0x00000000); // encRefListModificationNum
+ }
+ for (i = 0; i < 4; ++i) {
+ RVCE_CS(0x00000000); // encDecodedPictureMarkingOp
+ RVCE_CS(0x00000000); // encDecodedPictureMarkingNum
+ RVCE_CS(0x00000000); // encDecodedPictureMarkingIdx
+ RVCE_CS(0x00000000); // encDecodedRefBasePictureMarkingOp
+ RVCE_CS(0x00000000); // encDecodedRefBasePictureMarkingNum
+ }
+
+ // encReferencePictureL0[0]
+ RVCE_CS(0x00000000); // pictureStructure
+ if(enc->pic.picture_type == PIPE_H264_ENC_PICTURE_TYPE_P ||
+ enc->pic.picture_type == PIPE_H264_ENC_PICTURE_TYPE_B) {
+ struct rvce_cpb_slot *l0 = l0_slot(enc);
+ rvce_frame_offset(enc, l0, &luma_offset, &chroma_offset);
+ RVCE_CS(l0->picture_type); // encPicType
+ RVCE_CS(l0->frame_num); // frameNumber
+ RVCE_CS(l0->pic_order_cnt); // pictureOrderCount
+ RVCE_CS(luma_offset); // lumaOffset
+ RVCE_CS(chroma_offset); // chromaOffset
+ } else {
+ RVCE_CS(0x00000000); // encPicType
+ RVCE_CS(0x00000000); // frameNumber
+ RVCE_CS(0x00000000); // pictureOrderCount
+ RVCE_CS(0xffffffff); // lumaOffset
+ RVCE_CS(0xffffffff); // chromaOffset
+ }
+
+ // encReferencePictureL0[1]
+ RVCE_CS(0x00000000); // pictureStructure
+ RVCE_CS(0x00000000); // encPicType
+ RVCE_CS(0x00000000); // frameNumber
+ RVCE_CS(0x00000000); // pictureOrderCount
+ RVCE_CS(0xffffffff); // lumaOffset
+ RVCE_CS(0xffffffff); // chromaOffset
+
+ // encReferencePictureL1[0]
+ RVCE_CS(0x00000000); // pictureStructure
+ if(enc->pic.picture_type == PIPE_H264_ENC_PICTURE_TYPE_B) {
+ struct rvce_cpb_slot *l1 = l1_slot(enc);
+ rvce_frame_offset(enc, l1, &luma_offset, &chroma_offset);
+ RVCE_CS(l1->picture_type); // encPicType
+ RVCE_CS(l1->frame_num); // frameNumber
+ RVCE_CS(l1->pic_order_cnt); // pictureOrderCount
+ RVCE_CS(luma_offset); // lumaOffset
+ RVCE_CS(chroma_offset); // chromaOffset
+ } else {
+ RVCE_CS(0x00000000); // encPicType
+ RVCE_CS(0x00000000); // frameNumber
+ RVCE_CS(0x00000000); // pictureOrderCount
+ RVCE_CS(0xffffffff); // lumaOffset
+ RVCE_CS(0xffffffff); // chromaOffset
+ }
+
+ rvce_frame_offset(enc, current_slot(enc), &luma_offset, &chroma_offset);
+ RVCE_CS(luma_offset); // encReconstructedLumaOffset
+ RVCE_CS(chroma_offset); // encReconstructedChromaOffset
+ RVCE_CS(0x00000000); // encColocBufferOffset
+ RVCE_CS(0x00000000); // encReconstructedRefBasePictureLumaOffset
+ RVCE_CS(0x00000000); // encReconstructedRefBasePictureChromaOffset
+ RVCE_CS(0x00000000); // encReferenceRefBasePictureLumaOffset
+ RVCE_CS(0x00000000); // encReferenceRefBasePictureChromaOffset
+ RVCE_CS(0x00000000); // pictureCount
+ RVCE_CS(enc->pic.frame_num); // frameNumber
+ RVCE_CS(enc->pic.pic_order_cnt); // pictureOrderCount
+ RVCE_CS(0x00000000); // numIPicRemainInRCGOP
+ RVCE_CS(0x00000000); // numPPicRemainInRCGOP
+ RVCE_CS(0x00000000); // numBPicRemainInRCGOP
+ RVCE_CS(0x00000000); // numIRPicRemainInRCGOP
+ RVCE_CS(0x00000000); // enableIntraRefresh
+ RVCE_END();
+}
+
+void radeon_vce_50_init(struct rvce_encoder *enc)
+{
+ radeon_vce_40_2_2_init(enc);
+
+ /* only the two below are different */
+ enc->rate_control = rate_control;
+ enc->encode = encode;
+}
diff --git a/src/gallium/drivers/radeonsi/Makefile.sources b/src/gallium/drivers/radeonsi/Makefile.sources
index 774dc2285c0..2876c0ae735 100644
--- a/src/gallium/drivers/radeonsi/Makefile.sources
+++ b/src/gallium/drivers/radeonsi/Makefile.sources
@@ -1,4 +1,5 @@
C_SOURCES := \
+ cik_sdma.c \
si_blit.c \
si_commands.c \
si_compute.c \
diff --git a/src/gallium/drivers/radeonsi/cik_sdma.c b/src/gallium/drivers/radeonsi/cik_sdma.c
new file mode 100644
index 00000000000..86111cb86e8
--- /dev/null
+++ b/src/gallium/drivers/radeonsi/cik_sdma.c
@@ -0,0 +1,364 @@
+/*
+ * Copyright 2010 Jerome Glisse <[email protected]>
+ * Copyright 2014,2015 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ * Jerome Glisse
+ */
+
+#include "sid.h"
+#include "si_pipe.h"
+#include "../radeon/r600_cs.h"
+
+#include "util/u_format.h"
+
+static uint32_t cik_micro_tile_mode(struct si_screen *sscreen, unsigned tile_mode)
+{
+ if (sscreen->b.info.si_tile_mode_array_valid) {
+ uint32_t gb_tile_mode = sscreen->b.info.si_tile_mode_array[tile_mode];
+
+ return G_009910_MICRO_TILE_MODE_NEW(gb_tile_mode);
+ }
+
+ /* The kernel cannod return the tile mode array. Guess? */
+ return V_009910_ADDR_SURF_THIN_MICRO_TILING;
+}
+
+static void cik_sdma_do_copy_buffer(struct si_context *ctx,
+ struct pipe_resource *dst,
+ struct pipe_resource *src,
+ uint64_t dst_offset,
+ uint64_t src_offset,
+ uint64_t size)
+{
+ struct radeon_winsys_cs *cs = ctx->b.rings.dma.cs;
+ unsigned i, ncopy, csize;
+ struct r600_resource *rdst = (struct r600_resource*)dst;
+ struct r600_resource *rsrc = (struct r600_resource*)src;
+
+ dst_offset += r600_resource(dst)->gpu_address;
+ src_offset += r600_resource(src)->gpu_address;
+
+ ncopy = (size + CIK_SDMA_COPY_MAX_SIZE - 1) / CIK_SDMA_COPY_MAX_SIZE;
+ r600_need_dma_space(&ctx->b, ncopy * 7);
+
+ r600_context_bo_reloc(&ctx->b, &ctx->b.rings.dma, rsrc, RADEON_USAGE_READ,
+ RADEON_PRIO_MIN);
+ r600_context_bo_reloc(&ctx->b, &ctx->b.rings.dma, rdst, RADEON_USAGE_WRITE,
+ RADEON_PRIO_MIN);
+
+ for (i = 0; i < ncopy; i++) {
+ csize = size < CIK_SDMA_COPY_MAX_SIZE ? size : CIK_SDMA_COPY_MAX_SIZE;
+ cs->buf[cs->cdw++] = CIK_SDMA_PACKET(CIK_SDMA_OPCODE_COPY,
+ CIK_SDMA_COPY_SUB_OPCODE_LINEAR,
+ 0);
+ cs->buf[cs->cdw++] = csize;
+ cs->buf[cs->cdw++] = 0; /* src/dst endian swap */
+ cs->buf[cs->cdw++] = src_offset;
+ cs->buf[cs->cdw++] = src_offset >> 32;
+ cs->buf[cs->cdw++] = dst_offset;
+ cs->buf[cs->cdw++] = dst_offset >> 32;
+ dst_offset += csize;
+ src_offset += csize;
+ size -= csize;
+ }
+}
+
+static void cik_sdma_copy_buffer(struct si_context *ctx,
+ struct pipe_resource *dst,
+ struct pipe_resource *src,
+ uint64_t dst_offset,
+ uint64_t src_offset,
+ uint64_t size)
+{
+ struct r600_resource *rdst = (struct r600_resource*)dst;
+
+ /* Mark the buffer range of destination as valid (initialized),
+ * so that transfer_map knows it should wait for the GPU when mapping
+ * that range. */
+ util_range_add(&rdst->valid_buffer_range, dst_offset,
+ dst_offset + size);
+
+ cik_sdma_do_copy_buffer(ctx, dst, src, dst_offset, src_offset, size);
+}
+
+static void cik_sdma_copy_tile(struct si_context *ctx,
+ struct pipe_resource *dst,
+ unsigned dst_level,
+ struct pipe_resource *src,
+ unsigned src_level,
+ unsigned y,
+ unsigned copy_height,
+ unsigned y_align,
+ unsigned pitch,
+ unsigned bpe)
+{
+ struct radeon_winsys_cs *cs = ctx->b.rings.dma.cs;
+ struct si_screen *sscreen = ctx->screen;
+ struct r600_texture *rsrc = (struct r600_texture*)src;
+ struct r600_texture *rdst = (struct r600_texture*)dst;
+ struct r600_texture *rlinear, *rtiled;
+ unsigned linear_lvl, tiled_lvl;
+ unsigned array_mode, lbpe, pitch_tile_max, slice_tile_max, size;
+ unsigned ncopy, height, cheight, detile, i, src_mode, dst_mode;
+ unsigned sub_op, bank_h, bank_w, mt_aspect, nbanks, tile_split, mt;
+ uint64_t base, addr;
+ unsigned pipe_config, tile_mode_index;
+
+ dst_mode = rdst->surface.level[dst_level].mode;
+ src_mode = rsrc->surface.level[src_level].mode;
+ /* downcast linear aligned to linear to simplify test */
+ src_mode = src_mode == RADEON_SURF_MODE_LINEAR_ALIGNED ? RADEON_SURF_MODE_LINEAR : src_mode;
+ dst_mode = dst_mode == RADEON_SURF_MODE_LINEAR_ALIGNED ? RADEON_SURF_MODE_LINEAR : dst_mode;
+ assert(dst_mode != src_mode);
+ assert(src_mode == RADEON_SURF_MODE_LINEAR || dst_mode == RADEON_SURF_MODE_LINEAR);
+
+ sub_op = CIK_SDMA_COPY_SUB_OPCODE_TILED;
+ lbpe = util_logbase2(bpe);
+ pitch_tile_max = ((pitch / bpe) / 8) - 1;
+
+ detile = dst_mode == RADEON_SURF_MODE_LINEAR;
+ rlinear = detile ? rdst : rsrc;
+ rtiled = detile ? rsrc : rdst;
+ linear_lvl = detile ? dst_level : src_level;
+ tiled_lvl = detile ? src_level : dst_level;
+
+ assert(!util_format_is_depth_and_stencil(rtiled->resource.b.b.format));
+
+ array_mode = si_array_mode(rtiled->surface.level[tiled_lvl].mode);
+ slice_tile_max = (rtiled->surface.level[tiled_lvl].nblk_x *
+ rtiled->surface.level[tiled_lvl].nblk_y) / (8*8) - 1;
+ height = rlinear->surface.level[linear_lvl].nblk_y;
+ base = rtiled->surface.level[tiled_lvl].offset;
+ addr = rlinear->surface.level[linear_lvl].offset;
+ bank_h = cik_bank_wh(rtiled->surface.bankh);
+ bank_w = cik_bank_wh(rtiled->surface.bankw);
+ mt_aspect = cik_macro_tile_aspect(rtiled->surface.mtilea);
+ tile_split = cik_tile_split(rtiled->surface.tile_split);
+ tile_mode_index = si_tile_mode_index(rtiled, tiled_lvl, false);
+ nbanks = si_num_banks(sscreen, rtiled);
+ base += rtiled->resource.gpu_address;
+ addr += rlinear->resource.gpu_address;
+
+ pipe_config = cik_db_pipe_config(sscreen, tile_mode_index);
+ mt = cik_micro_tile_mode(sscreen, tile_mode_index);
+
+ size = (copy_height * pitch) / 4;
+ cheight = copy_height;
+ if (((cheight * pitch) / 4) > CIK_SDMA_COPY_MAX_SIZE) {
+ cheight = (CIK_SDMA_COPY_MAX_SIZE * 4) / pitch;
+ cheight &= ~(y_align - 1);
+ }
+ ncopy = (copy_height + cheight - 1) / cheight;
+ r600_need_dma_space(&ctx->b, ncopy * 12);
+
+ r600_context_bo_reloc(&ctx->b, &ctx->b.rings.dma, &rsrc->resource,
+ RADEON_USAGE_READ, RADEON_PRIO_MIN);
+ r600_context_bo_reloc(&ctx->b, &ctx->b.rings.dma, &rdst->resource,
+ RADEON_USAGE_WRITE, RADEON_PRIO_MIN);
+
+ copy_height = size * 4 / pitch;
+ for (i = 0; i < ncopy; i++) {
+ cheight = copy_height;
+ if (((cheight * pitch) / 4) > CIK_SDMA_COPY_MAX_SIZE) {
+ cheight = (CIK_SDMA_COPY_MAX_SIZE * 4) / pitch;
+ cheight &= ~(y_align - 1);
+ }
+ size = (cheight * pitch) / 4;
+
+ cs->buf[cs->cdw++] = CIK_SDMA_PACKET(CIK_SDMA_OPCODE_COPY,
+ sub_op, detile << 15);
+ cs->buf[cs->cdw++] = base;
+ cs->buf[cs->cdw++] = base >> 32;
+ cs->buf[cs->cdw++] = ((height - 1) << 16) | pitch_tile_max;
+ cs->buf[cs->cdw++] = slice_tile_max;
+ cs->buf[cs->cdw++] = (pipe_config << 26) | (mt_aspect << 24) |
+ (nbanks << 21) | (bank_h << 18) | (bank_w << 15) |
+ (tile_split << 11) | (mt << 8) | (array_mode << 3) |
+ lbpe;
+ cs->buf[cs->cdw++] = y << 16; /* | x */
+ cs->buf[cs->cdw++] = 0; /* z */;
+ cs->buf[cs->cdw++] = addr & 0xfffffffc;
+ cs->buf[cs->cdw++] = addr >> 32;
+ cs->buf[cs->cdw++] = (pitch / bpe) - 1;
+ cs->buf[cs->cdw++] = size;
+
+ copy_height -= cheight;
+ y += cheight;
+ }
+}
+
+void cik_sdma_copy(struct pipe_context *ctx,
+ struct pipe_resource *dst,
+ unsigned dst_level,
+ unsigned dstx, unsigned dsty, unsigned dstz,
+ struct pipe_resource *src,
+ unsigned src_level,
+ const struct pipe_box *src_box)
+{
+ struct si_context *sctx = (struct si_context *)ctx;
+ struct r600_texture *rsrc = (struct r600_texture*)src;
+ struct r600_texture *rdst = (struct r600_texture*)dst;
+ unsigned dst_pitch, src_pitch, bpe, dst_mode, src_mode;
+ unsigned src_w, dst_w;
+ unsigned src_x, src_y;
+ unsigned copy_height, y_align;
+ unsigned dst_x = dstx, dst_y = dsty, dst_z = dstz;
+
+ if (sctx->b.rings.dma.cs == NULL) {
+ goto fallback;
+ }
+
+ if (dst->target == PIPE_BUFFER && src->target == PIPE_BUFFER) {
+ cik_sdma_copy_buffer(sctx, dst, src, dst_x, src_box->x, src_box->width);
+ return;
+ }
+
+ /* Before re-enabling this, please make sure you can hit all newly
+ * enabled paths in your testing, preferably with both piglit (in
+ * particular the streaming-texture-leak test) and real world apps
+ * (e.g. the UE4 Elemental demo).
+ */
+ goto fallback;
+
+ if (src->format != dst->format ||
+ rdst->surface.nsamples > 1 || rsrc->surface.nsamples > 1 ||
+ rdst->dirty_level_mask & (1 << dst_level)) {
+ goto fallback;
+ }
+
+ if (rsrc->dirty_level_mask & (1 << src_level)) {
+ if (rsrc->htile_buffer)
+ goto fallback;
+
+ ctx->flush_resource(ctx, src);
+ }
+
+ src_x = util_format_get_nblocksx(src->format, src_box->x);
+ dst_x = util_format_get_nblocksx(src->format, dst_x);
+ src_y = util_format_get_nblocksy(src->format, src_box->y);
+ dst_y = util_format_get_nblocksy(src->format, dst_y);
+
+ dst_pitch = rdst->surface.level[dst_level].pitch_bytes;
+ src_pitch = rsrc->surface.level[src_level].pitch_bytes;
+ src_w = rsrc->surface.level[src_level].npix_x;
+ dst_w = rdst->surface.level[dst_level].npix_x;
+
+ if (src_pitch != dst_pitch || src_box->x || dst_x || src_w != dst_w ||
+ src_box->width != src_w ||
+ rsrc->surface.level[src_level].nblk_y !=
+ rdst->surface.level[dst_level].nblk_y) {
+ /* FIXME CIK can do partial blit */
+ goto fallback;
+ }
+
+ bpe = rdst->surface.bpe;
+ copy_height = src_box->height / rsrc->surface.blk_h;
+ dst_mode = rdst->surface.level[dst_level].mode;
+ src_mode = rsrc->surface.level[src_level].mode;
+ /* downcast linear aligned to linear to simplify test */
+ src_mode = src_mode == RADEON_SURF_MODE_LINEAR_ALIGNED ? RADEON_SURF_MODE_LINEAR : src_mode;
+ dst_mode = dst_mode == RADEON_SURF_MODE_LINEAR_ALIGNED ? RADEON_SURF_MODE_LINEAR : dst_mode;
+
+ /* Dimensions must be aligned to (macro)tiles */
+ switch (src_mode == RADEON_SURF_MODE_LINEAR ? dst_mode : src_mode) {
+ case RADEON_SURF_MODE_1D:
+ if ((src_x % 8) || (src_y % 8) || (dst_x % 8) || (dst_y % 8) ||
+ (copy_height % 8))
+ goto fallback;
+ y_align = 8;
+ break;
+ case RADEON_SURF_MODE_2D: {
+ unsigned mtilew, mtileh, num_banks;
+
+ switch (si_num_banks(sctx->screen, rsrc)) {
+ case V_02803C_ADDR_SURF_2_BANK:
+ default:
+ num_banks = 2;
+ break;
+ case V_02803C_ADDR_SURF_4_BANK:
+ num_banks = 4;
+ break;
+ case V_02803C_ADDR_SURF_8_BANK:
+ num_banks = 8;
+ break;
+ case V_02803C_ADDR_SURF_16_BANK:
+ num_banks = 16;
+ break;
+ }
+
+ mtilew = (8 * rsrc->surface.bankw *
+ sctx->screen->b.tiling_info.num_channels) *
+ rsrc->surface.mtilea;
+ assert(!(mtilew & (mtilew - 1)));
+ mtileh = (8 * rsrc->surface.bankh * num_banks) /
+ rsrc->surface.mtilea;
+ assert(!(mtileh & (mtileh - 1)));
+
+ if ((src_x & (mtilew - 1)) || (src_y & (mtileh - 1)) ||
+ (dst_x & (mtilew - 1)) || (dst_y & (mtileh - 1)) ||
+ (copy_height & (mtileh - 1)))
+ goto fallback;
+
+ y_align = mtileh;
+ break;
+ }
+ default:
+ y_align = 1;
+ }
+
+ if (src_mode == dst_mode) {
+ uint64_t dst_offset, src_offset;
+ unsigned src_h, dst_h;
+
+ src_h = rsrc->surface.level[src_level].npix_y;
+ dst_h = rdst->surface.level[dst_level].npix_y;
+
+ if (src_box->depth > 1 &&
+ (src_y || dst_y || src_h != dst_h || src_box->height != src_h))
+ goto fallback;
+
+ /* simple dma blit would do NOTE code here assume :
+ * dst_pitch == src_pitch
+ */
+ src_offset= rsrc->surface.level[src_level].offset;
+ src_offset += rsrc->surface.level[src_level].slice_size * src_box->z;
+ src_offset += src_y * src_pitch + src_x * bpe;
+ dst_offset = rdst->surface.level[dst_level].offset;
+ dst_offset += rdst->surface.level[dst_level].slice_size * dst_z;
+ dst_offset += dst_y * dst_pitch + dst_x * bpe;
+ cik_sdma_do_copy_buffer(sctx, dst, src, dst_offset, src_offset,
+ src_box->depth *
+ rsrc->surface.level[src_level].slice_size);
+ } else {
+ if (dst_y != src_y || src_box->depth > 1 || src_box->z || dst_z)
+ goto fallback;
+
+ cik_sdma_copy_tile(sctx, dst, dst_level, src, src_level,
+ src_y, copy_height, y_align, dst_pitch, bpe);
+ }
+ return;
+
+fallback:
+ si_resource_copy_region(ctx, dst, dst_level, dstx, dsty, dstz,
+ src, src_level, src_box);
+}
diff --git a/src/gallium/drivers/radeonsi/si_dma.c b/src/gallium/drivers/radeonsi/si_dma.c
index db523eef318..7a0076e7aa9 100644
--- a/src/gallium/drivers/radeonsi/si_dma.c
+++ b/src/gallium/drivers/radeonsi/si_dma.c
@@ -30,21 +30,6 @@
#include "util/u_format.h"
-static unsigned si_array_mode(unsigned mode)
-{
- switch (mode) {
- case RADEON_SURF_MODE_LINEAR_ALIGNED:
- return V_009910_ARRAY_LINEAR_ALIGNED;
- case RADEON_SURF_MODE_1D:
- return V_009910_ARRAY_1D_TILED_THIN1;
- case RADEON_SURF_MODE_2D:
- return V_009910_ARRAY_2D_TILED_THIN1;
- default:
- case RADEON_SURF_MODE_LINEAR:
- return V_009910_ARRAY_LINEAR_GENERAL;
- }
-}
-
static uint32_t si_micro_tile_mode(struct si_screen *sscreen, unsigned tile_mode)
{
if (sscreen->b.info.si_tile_mode_array_valid) {
@@ -240,11 +225,6 @@ void si_dma_copy(struct pipe_context *ctx,
goto fallback;
}
- /* TODO: Implement DMA copy for CIK */
- if (sctx->b.chip_class >= CIK) {
- goto fallback;
- }
-
if (dst->target == PIPE_BUFFER && src->target == PIPE_BUFFER) {
si_dma_copy_buffer(sctx, dst, src, dst_x, src_box->x, src_box->width);
return;
diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c
index e68c30e8c7c..53ae71a8c92 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.c
+++ b/src/gallium/drivers/radeonsi/si_pipe.c
@@ -251,6 +251,7 @@ static int si_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
case PIPE_CAP_POLYGON_OFFSET_CLAMP:
case PIPE_CAP_MULTISAMPLE_Z_RESOLVE:
case PIPE_CAP_QUADS_FOLLOW_PROVOKING_VERTEX_CONVENTION:
+ case PIPE_CAP_TGSI_TEXCOORD:
return 1;
case PIPE_CAP_RESOURCE_FROM_USER_MEMORY:
@@ -286,13 +287,13 @@ static int si_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
case PIPE_CAP_FRAGMENT_COLOR_CLAMPED:
case PIPE_CAP_VERTEX_COLOR_CLAMPED:
case PIPE_CAP_USER_VERTEX_BUFFERS:
- case PIPE_CAP_TGSI_TEXCOORD:
case PIPE_CAP_FAKE_SW_MSAA:
case PIPE_CAP_TEXTURE_GATHER_OFFSETS:
case PIPE_CAP_TGSI_FS_FINE_DERIVATIVE:
case PIPE_CAP_CONDITIONAL_RENDER_INVERTED:
case PIPE_CAP_SAMPLER_VIEW_TARGET:
case PIPE_CAP_VERTEXID_NOBASE:
+ case PIPE_CAP_DEVICE_RESET_STATUS_QUERY:
return 0;
case PIPE_CAP_TEXTURE_BORDER_COLOR_QUIRK:
@@ -451,6 +452,7 @@ static int si_get_shader_param(struct pipe_screen* pscreen, unsigned shader, enu
case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED:
return 0;
case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
+ case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE:
return 1;
}
return 0;
diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h
index f98c7a83744..2d67342f160 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@@ -237,6 +237,15 @@ struct si_context {
unsigned spi_tmpring_size;
};
+/* cik_sdma.c */
+void cik_sdma_copy(struct pipe_context *ctx,
+ struct pipe_resource *dst,
+ unsigned dst_level,
+ unsigned dstx, unsigned dsty, unsigned dstz,
+ struct pipe_resource *src,
+ unsigned src_level,
+ const struct pipe_box *src_box);
+
/* si_blit.c */
void si_init_blit_functions(struct si_context *sctx);
void si_flush_depth_textures(struct si_context *sctx,
diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
index 89f02ab0410..47e5f96cbed 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -128,21 +128,10 @@ unsigned si_shader_io_get_unique_index(unsigned semantic_name, unsigned index)
case TGSI_SEMANTIC_CLIPDIST:
assert(index <= 1);
return 2 + index;
- case TGSI_SEMANTIC_CLIPVERTEX:
- return 4;
- case TGSI_SEMANTIC_COLOR:
- assert(index <= 1);
- return 5 + index;
- case TGSI_SEMANTIC_BCOLOR:
- assert(index <= 1);
- return 7 + index;
- case TGSI_SEMANTIC_FOG:
- return 9;
- case TGSI_SEMANTIC_EDGEFLAG:
- return 10;
case TGSI_SEMANTIC_GENERIC:
- assert(index <= 63-11);
- return 11 + index;
+ assert(index <= 63-4);
+ return 4 + index;
+
default:
assert(0);
return 63;
@@ -1183,6 +1172,7 @@ handle_semantic:
continue;
case TGSI_SEMANTIC_PRIMID:
case TGSI_SEMANTIC_FOG:
+ case TGSI_SEMANTIC_TEXCOORD:
case TGSI_SEMANTIC_GENERIC:
target = V_008DFC_SQ_EXP_PARAM + param_count;
shader->vs_output_param_offset[i] = param_count;
diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c
index 7f0fdd599dc..6c18836d189 100644
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -44,6 +44,21 @@ static void si_init_atom(struct r600_atom *atom, struct r600_atom **list_elem,
*list_elem = atom;
}
+unsigned si_array_mode(unsigned mode)
+{
+ switch (mode) {
+ case RADEON_SURF_MODE_LINEAR_ALIGNED:
+ return V_009910_ARRAY_LINEAR_ALIGNED;
+ case RADEON_SURF_MODE_1D:
+ return V_009910_ARRAY_1D_TILED_THIN1;
+ case RADEON_SURF_MODE_2D:
+ return V_009910_ARRAY_2D_TILED_THIN1;
+ default:
+ case RADEON_SURF_MODE_LINEAR:
+ return V_009910_ARRAY_LINEAR_GENERAL;
+ }
+}
+
uint32_t si_num_banks(struct si_screen *sscreen, struct r600_texture *tex)
{
if (sscreen->b.chip_class == CIK &&
@@ -636,18 +651,14 @@ static void *si_create_rs_state(struct pipe_context *ctx,
rs->offset_units = state->offset_units;
rs->offset_scale = state->offset_scale * 12.0f;
- tmp = S_0286D4_FLAT_SHADE_ENA(1);
- if (state->sprite_coord_enable) {
- tmp |= S_0286D4_PNT_SPRITE_ENA(1) |
- S_0286D4_PNT_SPRITE_OVRD_X(V_0286D4_SPI_PNT_SPRITE_SEL_S) |
- S_0286D4_PNT_SPRITE_OVRD_Y(V_0286D4_SPI_PNT_SPRITE_SEL_T) |
- S_0286D4_PNT_SPRITE_OVRD_Z(V_0286D4_SPI_PNT_SPRITE_SEL_0) |
- S_0286D4_PNT_SPRITE_OVRD_W(V_0286D4_SPI_PNT_SPRITE_SEL_1);
- if (state->sprite_coord_mode != PIPE_SPRITE_COORD_UPPER_LEFT) {
- tmp |= S_0286D4_PNT_SPRITE_TOP_1(1);
- }
- }
- si_pm4_set_reg(pm4, R_0286D4_SPI_INTERP_CONTROL_0, tmp);
+ si_pm4_set_reg(pm4, R_0286D4_SPI_INTERP_CONTROL_0,
+ S_0286D4_FLAT_SHADE_ENA(1) |
+ S_0286D4_PNT_SPRITE_ENA(1) |
+ S_0286D4_PNT_SPRITE_OVRD_X(V_0286D4_SPI_PNT_SPRITE_SEL_S) |
+ S_0286D4_PNT_SPRITE_OVRD_Y(V_0286D4_SPI_PNT_SPRITE_SEL_T) |
+ S_0286D4_PNT_SPRITE_OVRD_Z(V_0286D4_SPI_PNT_SPRITE_SEL_0) |
+ S_0286D4_PNT_SPRITE_OVRD_W(V_0286D4_SPI_PNT_SPRITE_SEL_1) |
+ S_0286D4_PNT_SPRITE_TOP_1(state->sprite_coord_mode != PIPE_SPRITE_COORD_UPPER_LEFT));
/* point size 12.4 fixed point */
tmp = (unsigned)(state->point_size * 8.0);
@@ -2910,11 +2921,16 @@ void si_init_state_functions(struct si_context *sctx)
sctx->b.b.set_polygon_stipple = si_set_polygon_stipple;
sctx->b.b.set_min_samples = si_set_min_samples;
- sctx->b.dma_copy = si_dma_copy;
sctx->b.set_occlusion_query_state = si_set_occlusion_query_state;
sctx->b.need_gfx_cs_space = si_need_gfx_cs_space;
sctx->b.b.draw_vbo = si_draw_vbo;
+
+ if (sctx->b.chip_class >= CIK) {
+ sctx->b.dma_copy = cik_sdma_copy;
+ } else {
+ sctx->b.dma_copy = si_dma_copy;
+ }
}
static void
diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h
index 2f8a943846a..5e68b162137 100644
--- a/src/gallium/drivers/radeonsi/si_state.h
+++ b/src/gallium/drivers/radeonsi/si_state.h
@@ -261,6 +261,7 @@ unsigned cik_bank_wh(unsigned bankwh);
unsigned cik_db_pipe_config(struct si_screen *sscreen, unsigned tile_mode);
unsigned cik_macro_tile_aspect(unsigned macro_tile_aspect);
unsigned cik_tile_split(unsigned tile_split);
+unsigned si_array_mode(unsigned mode);
uint32_t si_num_banks(struct si_screen *sscreen, struct r600_texture *tex);
unsigned si_tile_mode_index(struct r600_texture *rtex, unsigned level, bool stencil);
diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c
index 1bbc6b3ca7a..208c8523ef1 100644
--- a/src/gallium/drivers/radeonsi/si_state_shaders.c
+++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
@@ -182,8 +182,13 @@ static void si_shader_vs(struct si_shader *shader)
for (nparams = 0, i = 0 ; i < info->num_outputs; i++) {
switch (info->output_semantic_name[i]) {
case TGSI_SEMANTIC_CLIPVERTEX:
+ case TGSI_SEMANTIC_CLIPDIST:
+ case TGSI_SEMANTIC_CULLDIST:
case TGSI_SEMANTIC_POSITION:
case TGSI_SEMANTIC_PSIZE:
+ case TGSI_SEMANTIC_EDGEFLAG:
+ case TGSI_SEMANTIC_VIEWPORT_INDEX:
+ case TGSI_SEMANTIC_LAYER:
break;
default:
nparams++;
@@ -351,21 +356,25 @@ static INLINE void si_shader_selector_key(struct pipe_context *ctx,
union si_shader_key *key)
{
struct si_context *sctx = (struct si_context *)ctx;
- memset(key, 0, sizeof(*key));
+ unsigned i;
- if (sel->type == PIPE_SHADER_VERTEX) {
- unsigned i;
- if (!sctx->vertex_elements)
- return;
+ memset(key, 0, sizeof(*key));
- for (i = 0; i < sctx->vertex_elements->count; ++i)
- key->vs.instance_divisors[i] = sctx->vertex_elements->elements[i].instance_divisor;
+ switch (sel->type) {
+ case PIPE_SHADER_VERTEX:
+ if (sctx->vertex_elements)
+ for (i = 0; i < sctx->vertex_elements->count; ++i)
+ key->vs.instance_divisors[i] =
+ sctx->vertex_elements->elements[i].instance_divisor;
if (sctx->gs_shader) {
key->vs.as_es = 1;
key->vs.gs_used_inputs = sctx->gs_shader->gs_used_inputs;
}
- } else if (sel->type == PIPE_SHADER_FRAGMENT) {
+ break;
+ case PIPE_SHADER_GEOMETRY:
+ break;
+ case PIPE_SHADER_FRAGMENT: {
struct si_state_rasterizer *rs = sctx->queued.named.rasterizer;
if (sel->info.properties[TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS])
@@ -393,11 +402,14 @@ static INLINE void si_shader_selector_key(struct pipe_context *ctx,
}
key->ps.alpha_func = PIPE_FUNC_ALWAYS;
-
/* Alpha-test should be disabled if colorbuffer 0 is integer. */
if (sctx->queued.named.dsa &&
!sctx->framebuffer.cb0_is_integer)
key->ps.alpha_func = sctx->queued.named.dsa->alpha_func;
+ break;
+ }
+ default:
+ assert(0);
}
}
@@ -580,15 +592,22 @@ static void si_delete_shader_selector(struct pipe_context *ctx,
while (p) {
c = p->next_variant;
- if (sel->type == PIPE_SHADER_GEOMETRY) {
+ switch (sel->type) {
+ case PIPE_SHADER_VERTEX:
+ if (p->key.vs.as_es)
+ si_pm4_delete_state(sctx, es, p->pm4);
+ else
+ si_pm4_delete_state(sctx, vs, p->pm4);
+ break;
+ case PIPE_SHADER_GEOMETRY:
si_pm4_delete_state(sctx, gs, p->pm4);
si_pm4_delete_state(sctx, vs, p->gs_copy_shader->pm4);
- } else if (sel->type == PIPE_SHADER_FRAGMENT)
+ break;
+ case PIPE_SHADER_FRAGMENT:
si_pm4_delete_state(sctx, ps, p->pm4);
- else if (p->key.vs.as_es)
- si_pm4_delete_state(sctx, es, p->pm4);
- else
- si_pm4_delete_state(sctx, vs, p->pm4);
+ break;
+ }
+
si_shader_destroy(ctx, p);
free(p);
p = c;
@@ -661,8 +680,9 @@ bcolor:
(interpolate == TGSI_INTERPOLATE_COLOR && sctx->flatshade))
tmp |= S_028644_FLAT_SHADE(1);
- if (name == TGSI_SEMANTIC_GENERIC &&
- sctx->sprite_coord_enable & (1 << index)) {
+ if (name == TGSI_SEMANTIC_PCOORD ||
+ (name == TGSI_SEMANTIC_TEXCOORD &&
+ sctx->sprite_coord_enable & (1 << index))) {
tmp |= S_028644_PT_SPRITE_TEX(1);
}
@@ -835,8 +855,15 @@ static void si_update_spi_tmpring_size(struct si_context *sctx)
si_pm4_bind_state(sctx, ps, sctx->ps_shader->current->pm4);
if (si_update_scratch_buffer(sctx, sctx->gs_shader))
si_pm4_bind_state(sctx, gs, sctx->gs_shader->current->pm4);
- if (si_update_scratch_buffer(sctx, sctx->vs_shader))
- si_pm4_bind_state(sctx, vs, sctx->vs_shader->current->pm4);
+
+ /* VS can be bound as ES or VS. */
+ if (sctx->gs_shader) {
+ if (si_update_scratch_buffer(sctx, sctx->vs_shader))
+ si_pm4_bind_state(sctx, es, sctx->vs_shader->current->pm4);
+ } else {
+ if (si_update_scratch_buffer(sctx, sctx->vs_shader))
+ si_pm4_bind_state(sctx, vs, sctx->vs_shader->current->pm4);
+ }
}
/* The LLVM shader backend should be reporting aligned scratch_sizes. */
diff --git a/src/gallium/drivers/radeonsi/sid.h b/src/gallium/drivers/radeonsi/sid.h
index afe011b15c7..35d5ee232a0 100644
--- a/src/gallium/drivers/radeonsi/sid.h
+++ b/src/gallium/drivers/radeonsi/sid.h
@@ -4516,6 +4516,13 @@
#define V_009910_ADDR_SURF_8_BANK 0x02
#define V_009910_ADDR_SURF_16_BANK 0x03
/* CIK */
+#define S_009910_MICRO_TILE_MODE_NEW(x) (((x) & 0x07) << 22)
+#define G_009910_MICRO_TILE_MODE_NEW(x) (((x) >> 22) & 0x07)
+#define C_009910_MICRO_TILE_MODE_NEW(x) 0xFE3FFFFF
+#define V_009910_ADDR_SURF_DISPLAY_MICRO_TILING 0x00
+#define V_009910_ADDR_SURF_THIN_MICRO_TILING 0x01
+#define V_009910_ADDR_SURF_DEPTH_MICRO_TILING 0x02
+#define V_009910_ADDR_SURF_ROTATED_MICRO_TILING 0x03
#define R_00B01C_SPI_SHADER_PGM_RSRC3_PS 0x00B01C
#define S_00B01C_CU_EN(x) (((x) & 0xFFFF) << 0)
#define G_00B01C_CU_EN(x) (((x) >> 0) & 0xFFFF)
@@ -8696,5 +8703,29 @@
#define SI_DMA_PACKET_CONSTANT_FILL 0xd
#define SI_DMA_PACKET_NOP 0xf
+/* CIK async DMA packets */
+#define CIK_SDMA_PACKET(op, sub_op, n) ((((n) & 0xFFFF) << 16) | \
+ (((sub_op) & 0xFF) << 8) | \
+ (((op) & 0xFF) << 0))
+/* CIK async DMA packet types */
+#define CIK_SDMA_OPCODE_NOP 0x0
+#define CIK_SDMA_OPCODE_COPY 0x1
+#define CIK_SDMA_COPY_SUB_OPCODE_LINEAR 0x0
+#define CIK_SDMA_COPY_SUB_OPCODE_TILED 0x1
+#define CIK_SDMA_COPY_SUB_OPCODE_SOA 0x3
+#define CIK_SDMA_COPY_SUB_OPCODE_LINEAR_SUB_WINDOW 0x4
+#define CIK_SDMA_COPY_SUB_OPCODE_TILED_SUB_WINDOW 0x5
+#define CIK_SDMA_COPY_SUB_OPCODE_T2T_SUB_WINDOW 0x6
+#define CIK_SDMA_OPCODE_WRITE 0x2
+#define SDMA_WRITE_SUB_OPCODE_LINEAR 0x0
+#define SDMA_WRTIE_SUB_OPCODE_TILED 0x1
+#define CIK_SDMA_OPCODE_INDIRECT_BUFFER 0x4
+#define CIK_SDMA_PACKET_FENCE 0x5
+#define CIK_SDMA_PACKET_TRAP 0x6
+#define CIK_SDMA_PACKET_SEMAPHORE 0x7
+#define CIK_SDMA_PACKET_CONSTANT_FILL 0xb
+#define CIK_SDMA_PACKET_SRBM_WRITE 0xe
+#define CIK_SDMA_COPY_MAX_SIZE 0x1fffff
+
#endif /* _SID_H */
diff --git a/src/gallium/drivers/rbug/rbug_public.h b/src/gallium/drivers/rbug/rbug_public.h
index b66740b49cd..83f9c94e31f 100644
--- a/src/gallium/drivers/rbug/rbug_public.h
+++ b/src/gallium/drivers/rbug/rbug_public.h
@@ -28,6 +28,10 @@
#ifndef RBUG_PUBLIC_H
#define RBUG_PUBLIC_H
+#ifdef __cplusplus
+extern "C" {
+#endif
+
struct pipe_screen;
struct pipe_context;
@@ -37,4 +41,8 @@ rbug_screen_create(struct pipe_screen *screen);
boolean
rbug_enabled(void);
+#ifdef __cplusplus
+}
+#endif
+
#endif /* RBUG_PUBLIC_H */
diff --git a/src/gallium/drivers/softpipe/sp_public.h b/src/gallium/drivers/softpipe/sp_public.h
index 62d0903d87a..88a9b5e6643 100644
--- a/src/gallium/drivers/softpipe/sp_public.h
+++ b/src/gallium/drivers/softpipe/sp_public.h
@@ -1,10 +1,18 @@
#ifndef SP_PUBLIC_H
#define SP_PUBLIC_H
+#ifdef __cplusplus
+extern "C" {
+#endif
+
struct pipe_screen;
struct sw_winsys;
struct pipe_screen *
softpipe_create_screen(struct sw_winsys *winsys);
+#ifdef __cplusplus
+}
+#endif
+
#endif
diff --git a/src/gallium/drivers/softpipe/sp_query.c b/src/gallium/drivers/softpipe/sp_query.c
index e77387082bc..76105b4c0ec 100644
--- a/src/gallium/drivers/softpipe/sp_query.c
+++ b/src/gallium/drivers/softpipe/sp_query.c
@@ -277,7 +277,7 @@ softpipe_check_render_cond(struct softpipe_context *sp)
b = pipe->get_query_result(pipe, sp->render_cond_query, wait,
(void*)&result);
if (b)
- return (!result == sp->render_cond_cond);
+ return (!result) == sp->render_cond_cond;
else
return TRUE;
}
diff --git a/src/gallium/drivers/softpipe/sp_screen.c b/src/gallium/drivers/softpipe/sp_screen.c
index d289e28a6f8..a688d319bb8 100644
--- a/src/gallium/drivers/softpipe/sp_screen.c
+++ b/src/gallium/drivers/softpipe/sp_screen.c
@@ -191,7 +191,9 @@ softpipe_get_param(struct pipe_screen *screen, enum pipe_cap param)
case PIPE_CAP_ENDIANNESS:
return PIPE_ENDIAN_NATIVE;
case PIPE_CAP_MAX_TEXTURE_GATHER_COMPONENTS:
+ return 4;
case PIPE_CAP_TEXTURE_GATHER_SM5:
+ return 1;
case PIPE_CAP_BUFFER_MAP_PERSISTENT_COHERENT:
case PIPE_CAP_TEXTURE_QUERY_LOD:
case PIPE_CAP_SAMPLE_SHADING:
@@ -200,13 +202,15 @@ softpipe_get_param(struct pipe_screen *screen, enum pipe_cap param)
case PIPE_CAP_TGSI_VS_WINDOW_SPACE_POSITION:
return 1;
case PIPE_CAP_TGSI_FS_FINE_DERIVATIVE:
- case PIPE_CAP_SAMPLER_VIEW_TARGET:
return 0;
+ case PIPE_CAP_SAMPLER_VIEW_TARGET:
+ return 1;
case PIPE_CAP_FAKE_SW_MSAA:
return 1;
case PIPE_CAP_MIN_TEXTURE_GATHER_OFFSET:
+ return -32;
case PIPE_CAP_MAX_TEXTURE_GATHER_OFFSET:
- return 0;
+ return 31;
case PIPE_CAP_DRAW_INDIRECT:
return 1;
@@ -237,6 +241,7 @@ softpipe_get_param(struct pipe_screen *screen, enum pipe_cap param)
return 0;
case PIPE_CAP_MULTISAMPLE_Z_RESOLVE:
case PIPE_CAP_RESOURCE_FROM_USER_MEMORY:
+ case PIPE_CAP_DEVICE_RESET_STATUS_QUERY:
return 0;
}
/* should only get here on unhandled cases */
diff --git a/src/gallium/drivers/softpipe/sp_state_sampler.c b/src/gallium/drivers/softpipe/sp_state_sampler.c
index e56fb5b1485..d7a3360713f 100644
--- a/src/gallium/drivers/softpipe/sp_state_sampler.c
+++ b/src/gallium/drivers/softpipe/sp_state_sampler.c
@@ -202,7 +202,7 @@ prepare_shader_sampling(
struct pipe_resource *res = view->texture;
int j;
- if (res->target != PIPE_BUFFER) {
+ if (view->target != PIPE_BUFFER) {
first_level = view->u.tex.first_level;
last_level = view->u.tex.last_level;
assert(first_level <= last_level);
@@ -214,15 +214,17 @@ prepare_shader_sampling(
row_stride[j] = sp_tex->stride[j];
img_stride[j] = sp_tex->img_stride[j];
}
- if (res->target == PIPE_TEXTURE_1D_ARRAY ||
- res->target == PIPE_TEXTURE_2D_ARRAY ||
- res->target == PIPE_TEXTURE_CUBE_ARRAY) {
+ if (view->target == PIPE_TEXTURE_1D_ARRAY ||
+ view->target == PIPE_TEXTURE_2D_ARRAY ||
+ view->target == PIPE_TEXTURE_CUBE ||
+ view->target == PIPE_TEXTURE_CUBE_ARRAY) {
num_layers = view->u.tex.last_layer - view->u.tex.first_layer + 1;
for (j = first_level; j <= last_level; j++) {
mip_offsets[j] += view->u.tex.first_layer *
sp_tex->img_stride[j];
}
- if (res->target == PIPE_TEXTURE_CUBE_ARRAY) {
+ if (view->target == PIPE_TEXTURE_CUBE ||
+ view->target == PIPE_TEXTURE_CUBE_ARRAY) {
assert(num_layers % 6 == 0);
}
assert(view->u.tex.first_layer <= view->u.tex.last_layer);
diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.c b/src/gallium/drivers/softpipe/sp_tex_sample.c
index 68dcf57240d..1010b63de2c 100644
--- a/src/gallium/drivers/softpipe/sp_tex_sample.c
+++ b/src/gallium/drivers/softpipe/sp_tex_sample.c
@@ -131,68 +131,80 @@ repeat(int coord, unsigned size)
* \param icoord returns the integer texcoords
*/
static void
-wrap_nearest_repeat(float s, unsigned size, int *icoord)
+wrap_nearest_repeat(float s, unsigned size, int offset, int *icoord)
{
/* s limited to [0,1) */
/* i limited to [0,size-1] */
int i = util_ifloor(s * size);
- *icoord = repeat(i, size);
+ *icoord = repeat(i + offset, size);
}
static void
-wrap_nearest_clamp(float s, unsigned size, int *icoord)
+wrap_nearest_clamp(float s, unsigned size, int offset, int *icoord)
{
/* s limited to [0,1] */
/* i limited to [0,size-1] */
+ s *= size;
+ s += offset;
if (s <= 0.0F)
*icoord = 0;
- else if (s >= 1.0F)
+ else if (s >= size)
*icoord = size - 1;
else
- *icoord = util_ifloor(s * size);
+ *icoord = util_ifloor(s);
}
static void
-wrap_nearest_clamp_to_edge(float s, unsigned size, int *icoord)
+wrap_nearest_clamp_to_edge(float s, unsigned size, int offset, int *icoord)
{
/* s limited to [min,max] */
/* i limited to [0, size-1] */
- const float min = 1.0F / (2.0F * size);
- const float max = 1.0F - min;
+ const float min = 0.5F;
+ const float max = (float)size - 0.5F;
+
+ s *= size;
+ s += offset;
+
if (s < min)
*icoord = 0;
else if (s > max)
*icoord = size - 1;
else
- *icoord = util_ifloor(s * size);
+ *icoord = util_ifloor(s);
}
static void
-wrap_nearest_clamp_to_border(float s, unsigned size, int *icoord)
+wrap_nearest_clamp_to_border(float s, unsigned size, int offset, int *icoord)
{
/* s limited to [min,max] */
/* i limited to [-1, size] */
- const float min = -1.0F / (2.0F * size);
- const float max = 1.0F - min;
+ const float min = -0.5F;
+ const float max = size + 0.5F;
+
+ s *= size;
+ s += offset;
if (s <= min)
*icoord = -1;
else if (s >= max)
*icoord = size;
else
- *icoord = util_ifloor(s * size);
+ *icoord = util_ifloor(s);
}
-
static void
-wrap_nearest_mirror_repeat(float s, unsigned size, int *icoord)
+wrap_nearest_mirror_repeat(float s, unsigned size, int offset, int *icoord)
{
const float min = 1.0F / (2.0F * size);
const float max = 1.0F - min;
- const int flr = util_ifloor(s);
- float u = frac(s);
+ int flr;
+ float u;
+
+ s += (float)offset / size;
+ flr = util_ifloor(s);
+ u = frac(s);
if (flr & 1)
u = 1.0F - u;
if (u < min)
@@ -205,51 +217,52 @@ wrap_nearest_mirror_repeat(float s, unsigned size, int *icoord)
static void
-wrap_nearest_mirror_clamp(float s, unsigned size, int *icoord)
+wrap_nearest_mirror_clamp(float s, unsigned size, int offset, int *icoord)
{
/* s limited to [0,1] */
/* i limited to [0,size-1] */
- const float u = fabsf(s);
+ const float u = fabsf(s * size + offset);
if (u <= 0.0F)
*icoord = 0;
- else if (u >= 1.0F)
+ else if (u >= size)
*icoord = size - 1;
else
- *icoord = util_ifloor(u * size);
+ *icoord = util_ifloor(u);
}
static void
-wrap_nearest_mirror_clamp_to_edge(float s, unsigned size, int *icoord)
+wrap_nearest_mirror_clamp_to_edge(float s, unsigned size, int offset, int *icoord)
{
/* s limited to [min,max] */
/* i limited to [0, size-1] */
- const float min = 1.0F / (2.0F * size);
- const float max = 1.0F - min;
- const float u = fabsf(s);
+ const float min = 0.5F;
+ const float max = (float)size - 0.5F;
+ const float u = fabsf(s * size + offset);
+
if (u < min)
*icoord = 0;
else if (u > max)
*icoord = size - 1;
else
- *icoord = util_ifloor(u * size);
+ *icoord = util_ifloor(u);
}
static void
-wrap_nearest_mirror_clamp_to_border(float s, unsigned size, int *icoord)
+wrap_nearest_mirror_clamp_to_border(float s, unsigned size, int offset, int *icoord)
{
- /* s limited to [min,max] */
- /* i limited to [0, size-1] */
- const float min = -1.0F / (2.0F * size);
- const float max = 1.0F - min;
- const float u = fabsf(s);
+ /* u limited to [-0.5, size-0.5] */
+ const float min = -0.5F;
+ const float max = (float)size + 0.5F;
+ const float u = fabsf(s * size + offset);
+
if (u < min)
*icoord = -1;
else if (u > max)
*icoord = size;
else
- *icoord = util_ifloor(u * size);
+ *icoord = util_ifloor(u);
}
@@ -264,22 +277,23 @@ wrap_nearest_mirror_clamp_to_border(float s, unsigned size, int *icoord)
* \param icoord returns the computed integer texture coord
*/
static void
-wrap_linear_repeat(float s, unsigned size,
+wrap_linear_repeat(float s, unsigned size, int offset,
int *icoord0, int *icoord1, float *w)
{
float u = s * size - 0.5F;
- *icoord0 = repeat(util_ifloor(u), size);
+ *icoord0 = repeat(util_ifloor(u) + offset, size);
*icoord1 = repeat(*icoord0 + 1, size);
*w = frac(u);
}
static void
-wrap_linear_clamp(float s, unsigned size,
+wrap_linear_clamp(float s, unsigned size, int offset,
int *icoord0, int *icoord1, float *w)
{
- float u = CLAMP(s, 0.0F, 1.0F);
- u = u * size - 0.5f;
+ float u = CLAMP(s * size + offset, 0.0F, (float)size);
+
+ u = u - 0.5f;
*icoord0 = util_ifloor(u);
*icoord1 = *icoord0 + 1;
*w = frac(u);
@@ -287,11 +301,11 @@ wrap_linear_clamp(float s, unsigned size,
static void
-wrap_linear_clamp_to_edge(float s, unsigned size,
+wrap_linear_clamp_to_edge(float s, unsigned size, int offset,
int *icoord0, int *icoord1, float *w)
{
- float u = CLAMP(s, 0.0F, 1.0F);
- u = u * size - 0.5f;
+ float u = CLAMP(s * size + offset, 0.0F, (float)size);
+ u = u - 0.5f;
*icoord0 = util_ifloor(u);
*icoord1 = *icoord0 + 1;
if (*icoord0 < 0)
@@ -303,13 +317,13 @@ wrap_linear_clamp_to_edge(float s, unsigned size,
static void
-wrap_linear_clamp_to_border(float s, unsigned size,
+wrap_linear_clamp_to_border(float s, unsigned size, int offset,
int *icoord0, int *icoord1, float *w)
{
- const float min = -1.0F / (2.0F * size);
- const float max = 1.0F - min;
- float u = CLAMP(s, min, max);
- u = u * size - 0.5f;
+ const float min = -0.5F;
+ const float max = (float)size + 0.5F;
+ float u = CLAMP(s * size + offset, min, max);
+ u = u - 0.5f;
*icoord0 = util_ifloor(u);
*icoord1 = *icoord0 + 1;
*w = frac(u);
@@ -317,11 +331,15 @@ wrap_linear_clamp_to_border(float s, unsigned size,
static void
-wrap_linear_mirror_repeat(float s, unsigned size,
+wrap_linear_mirror_repeat(float s, unsigned size, int offset,
int *icoord0, int *icoord1, float *w)
{
- const int flr = util_ifloor(s);
- float u = frac(s);
+ int flr;
+ float u;
+
+ s += (float)offset / size;
+ flr = util_ifloor(s);
+ u = frac(s);
if (flr & 1)
u = 1.0F - u;
u = u * size - 0.5F;
@@ -336,14 +354,12 @@ wrap_linear_mirror_repeat(float s, unsigned size,
static void
-wrap_linear_mirror_clamp(float s, unsigned size,
+wrap_linear_mirror_clamp(float s, unsigned size, int offset,
int *icoord0, int *icoord1, float *w)
{
- float u = fabsf(s);
- if (u >= 1.0F)
+ float u = fabsf(s * size + offset);
+ if (u >= size)
u = (float) size;
- else
- u *= size;
u -= 0.5F;
*icoord0 = util_ifloor(u);
*icoord1 = *icoord0 + 1;
@@ -352,14 +368,12 @@ wrap_linear_mirror_clamp(float s, unsigned size,
static void
-wrap_linear_mirror_clamp_to_edge(float s, unsigned size,
+wrap_linear_mirror_clamp_to_edge(float s, unsigned size, int offset,
int *icoord0, int *icoord1, float *w)
{
- float u = fabsf(s);
- if (u >= 1.0F)
+ float u = fabsf(s * size + offset);
+ if (u >= size)
u = (float) size;
- else
- u *= size;
u -= 0.5F;
*icoord0 = util_ifloor(u);
*icoord1 = *icoord0 + 1;
@@ -372,18 +386,16 @@ wrap_linear_mirror_clamp_to_edge(float s, unsigned size,
static void
-wrap_linear_mirror_clamp_to_border(float s, unsigned size,
+wrap_linear_mirror_clamp_to_border(float s, unsigned size, int offset,
int *icoord0, int *icoord1, float *w)
{
- const float min = -1.0F / (2.0F * size);
- const float max = 1.0F - min;
- float u = fabsf(s);
+ const float min = -0.5F;
+ const float max = size + 0.5F;
+ float u = fabsf(s * size + offset);
if (u <= min)
- u = min * size;
+ u = min;
else if (u >= max)
- u = max * size;
- else
- u *= size;
+ u = max;
u -= 0.5F;
*icoord0 = util_ifloor(u);
*icoord1 = *icoord0 + 1;
@@ -395,10 +407,10 @@ wrap_linear_mirror_clamp_to_border(float s, unsigned size,
* PIPE_TEX_WRAP_CLAMP for nearest sampling, unnormalized coords.
*/
static void
-wrap_nearest_unorm_clamp(float s, unsigned size, int *icoord)
+wrap_nearest_unorm_clamp(float s, unsigned size, int offset, int *icoord)
{
int i = util_ifloor(s);
- *icoord = CLAMP(i, 0, (int) size-1);
+ *icoord = CLAMP(i + offset, 0, (int) size-1);
}
@@ -406,9 +418,9 @@ wrap_nearest_unorm_clamp(float s, unsigned size, int *icoord)
* PIPE_TEX_WRAP_CLAMP_TO_BORDER for nearest sampling, unnormalized coords.
*/
static void
-wrap_nearest_unorm_clamp_to_border(float s, unsigned size, int *icoord)
+wrap_nearest_unorm_clamp_to_border(float s, unsigned size, int offset, int *icoord)
{
- *icoord = util_ifloor( CLAMP(s, -0.5F, (float) size + 0.5F) );
+ *icoord = util_ifloor( CLAMP(s + offset, -0.5F, (float) size + 0.5F) );
}
@@ -416,9 +428,9 @@ wrap_nearest_unorm_clamp_to_border(float s, unsigned size, int *icoord)
* PIPE_TEX_WRAP_CLAMP_TO_EDGE for nearest sampling, unnormalized coords.
*/
static void
-wrap_nearest_unorm_clamp_to_edge(float s, unsigned size, int *icoord)
+wrap_nearest_unorm_clamp_to_edge(float s, unsigned size, int offset, int *icoord)
{
- *icoord = util_ifloor( CLAMP(s, 0.5F, (float) size - 0.5F) );
+ *icoord = util_ifloor( CLAMP(s + offset, 0.5F, (float) size - 0.5F) );
}
@@ -426,11 +438,11 @@ wrap_nearest_unorm_clamp_to_edge(float s, unsigned size, int *icoord)
* PIPE_TEX_WRAP_CLAMP for linear sampling, unnormalized coords.
*/
static void
-wrap_linear_unorm_clamp(float s, unsigned size,
+wrap_linear_unorm_clamp(float s, unsigned size, int offset,
int *icoord0, int *icoord1, float *w)
{
/* Not exactly what the spec says, but it matches NVIDIA output */
- float u = CLAMP(s - 0.5F, 0.0f, (float) size - 1.0f);
+ float u = CLAMP(s + offset - 0.5F, 0.0f, (float) size - 1.0f);
*icoord0 = util_ifloor(u);
*icoord1 = *icoord0 + 1;
*w = frac(u);
@@ -441,10 +453,10 @@ wrap_linear_unorm_clamp(float s, unsigned size,
* PIPE_TEX_WRAP_CLAMP_TO_BORDER for linear sampling, unnormalized coords.
*/
static void
-wrap_linear_unorm_clamp_to_border(float s, unsigned size,
+wrap_linear_unorm_clamp_to_border(float s, unsigned size, int offset,
int *icoord0, int *icoord1, float *w)
{
- float u = CLAMP(s, -0.5F, (float) size + 0.5F);
+ float u = CLAMP(s + offset, -0.5F, (float) size + 0.5F);
u -= 0.5F;
*icoord0 = util_ifloor(u);
*icoord1 = *icoord0 + 1;
@@ -458,10 +470,10 @@ wrap_linear_unorm_clamp_to_border(float s, unsigned size,
* PIPE_TEX_WRAP_CLAMP_TO_EDGE for linear sampling, unnormalized coords.
*/
static void
-wrap_linear_unorm_clamp_to_edge(float s, unsigned size,
+wrap_linear_unorm_clamp_to_edge(float s, unsigned size, int offset,
int *icoord0, int *icoord1, float *w)
{
- float u = CLAMP(s, +0.5F, (float) size - 0.5F);
+ float u = CLAMP(s + offset, +0.5F, (float) size - 0.5F);
u -= 0.5F;
*icoord0 = util_ifloor(u);
*icoord1 = *icoord0 + 1;
@@ -474,11 +486,11 @@ wrap_linear_unorm_clamp_to_edge(float s, unsigned size,
/**
* Do coordinate to array index conversion. For array textures.
*/
-static INLINE void
-wrap_array_layer(float coord, unsigned size, int *layer)
+static INLINE int
+coord_to_layer(float coord, unsigned first_layer, unsigned last_layer)
{
int c = util_ifloor(coord + 0.5F);
- *layer = CLAMP(c, 0, (int) size - 1);
+ return CLAMP(c, (int)first_layer, (int)last_layer);
}
@@ -757,61 +769,6 @@ get_next_ycoord(unsigned face, unsigned fall_off_index, int max, int xc, int yc)
}
-static INLINE const float *
-get_texel_cube_seamless(const struct sp_sampler_view *sp_sview,
- union tex_tile_address addr, int x, int y,
- float *corner)
-{
- const struct pipe_resource *texture = sp_sview->base.texture;
- unsigned level = addr.bits.level;
- unsigned face = addr.bits.face;
- int new_x, new_y, max_x;
-
- max_x = (int) u_minify(texture->width0, level);
-
- assert(texture->width0 == texture->height0);
- new_x = x;
- new_y = y;
-
- /* change the face */
- if (x < 0) {
- /*
- * Cheat with corners. They are difficult and I believe because we don't get
- * per-pixel faces we can actually have multiple corner texels per pixel,
- * which screws things up majorly in any case (as the per spec behavior is
- * to average the 3 remaining texels, which we might not have).
- * Hence just make sure that the 2nd coord is clamped, will simply pick the
- * sample which would have fallen off the x coord, but not y coord.
- * So the filter weight of the samples will be wrong, but at least this
- * ensures that only valid texels near the corner are used.
- */
- if (y < 0 || y >= max_x) {
- y = CLAMP(y, 0, max_x - 1);
- }
- new_x = get_next_xcoord(face, 0, max_x -1, x, y);
- new_y = get_next_ycoord(face, 0, max_x -1, x, y);
- face = get_next_face(face, 0);
- } else if (x >= max_x) {
- if (y < 0 || y >= max_x) {
- y = CLAMP(y, 0, max_x - 1);
- }
- new_x = get_next_xcoord(face, 1, max_x -1, x, y);
- new_y = get_next_ycoord(face, 1, max_x -1, x, y);
- face = get_next_face(face, 1);
- } else if (y < 0) {
- new_x = get_next_xcoord(face, 2, max_x -1, x, y);
- new_y = get_next_ycoord(face, 2, max_x -1, x, y);
- face = get_next_face(face, 2);
- } else if (y >= max_x) {
- new_x = get_next_xcoord(face, 3, max_x -1, x, y);
- new_y = get_next_ycoord(face, 3, max_x -1, x, y);
- face = get_next_face(face, 3);
- }
-
- addr.bits.face = face;
- return get_texel_2d_no_border( sp_sview, addr, new_x, new_y );
-}
-
/* Gather a quad of adjacent texels within a tile:
*/
static INLINE void
@@ -948,6 +905,60 @@ get_texel_2d_array(const struct sp_sampler_view *sp_sview,
}
+static INLINE const float *
+get_texel_cube_seamless(const struct sp_sampler_view *sp_sview,
+ union tex_tile_address addr, int x, int y,
+ float *corner, int layer, unsigned face)
+{
+ const struct pipe_resource *texture = sp_sview->base.texture;
+ unsigned level = addr.bits.level;
+ int new_x, new_y, max_x;
+
+ max_x = (int) u_minify(texture->width0, level);
+
+ assert(texture->width0 == texture->height0);
+ new_x = x;
+ new_y = y;
+
+ /* change the face */
+ if (x < 0) {
+ /*
+ * Cheat with corners. They are difficult and I believe because we don't get
+ * per-pixel faces we can actually have multiple corner texels per pixel,
+ * which screws things up majorly in any case (as the per spec behavior is
+ * to average the 3 remaining texels, which we might not have).
+ * Hence just make sure that the 2nd coord is clamped, will simply pick the
+ * sample which would have fallen off the x coord, but not y coord.
+ * So the filter weight of the samples will be wrong, but at least this
+ * ensures that only valid texels near the corner are used.
+ */
+ if (y < 0 || y >= max_x) {
+ y = CLAMP(y, 0, max_x - 1);
+ }
+ new_x = get_next_xcoord(face, 0, max_x -1, x, y);
+ new_y = get_next_ycoord(face, 0, max_x -1, x, y);
+ face = get_next_face(face, 0);
+ } else if (x >= max_x) {
+ if (y < 0 || y >= max_x) {
+ y = CLAMP(y, 0, max_x - 1);
+ }
+ new_x = get_next_xcoord(face, 1, max_x -1, x, y);
+ new_y = get_next_ycoord(face, 1, max_x -1, x, y);
+ face = get_next_face(face, 1);
+ } else if (y < 0) {
+ new_x = get_next_xcoord(face, 2, max_x -1, x, y);
+ new_y = get_next_ycoord(face, 2, max_x -1, x, y);
+ face = get_next_face(face, 2);
+ } else if (y >= max_x) {
+ new_x = get_next_xcoord(face, 3, max_x -1, x, y);
+ new_y = get_next_ycoord(face, 3, max_x -1, x, y);
+ face = get_next_face(face, 3);
+ }
+
+ return get_texel_3d_no_border(sp_sview, addr, new_x, new_y, layer + face);
+}
+
+
/* Get texel pointer for cube array texture */
static INLINE const float *
get_texel_cube_array(const struct sp_sampler_view *sp_sview,
@@ -1008,22 +1019,18 @@ print_sample_4(const char *function, float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZ
static INLINE void
img_filter_2d_linear_repeat_POT(struct sp_sampler_view *sp_sview,
struct sp_sampler *sp_samp,
- float s,
- float t,
- float p,
- unsigned level,
- unsigned face_id,
+ const struct img_filter_args *args,
float *rgba)
{
- unsigned xpot = pot_level_size(sp_sview->xpot, level);
- unsigned ypot = pot_level_size(sp_sview->ypot, level);
+ unsigned xpot = pot_level_size(sp_sview->xpot, args->level);
+ unsigned ypot = pot_level_size(sp_sview->ypot, args->level);
int xmax = (xpot - 1) & (TEX_TILE_SIZE - 1); /* MIN2(TEX_TILE_SIZE, xpot) - 1; */
int ymax = (ypot - 1) & (TEX_TILE_SIZE - 1); /* MIN2(TEX_TILE_SIZE, ypot) - 1; */
union tex_tile_address addr;
int c;
- float u = s * xpot - 0.5F;
- float v = t * ypot - 0.5F;
+ float u = (args->s * xpot - 0.5F) + args->offset[0];
+ float v = (args->t * ypot - 0.5F) + args->offset[1];
int uflr = util_ifloor(u);
int vflr = util_ifloor(v);
@@ -1037,7 +1044,7 @@ img_filter_2d_linear_repeat_POT(struct sp_sampler_view *sp_sview,
const float *tx[4];
addr.value = 0;
- addr.bits.level = level;
+ addr.bits.level = args->level;
/* Can we fetch all four at once:
*/
@@ -1066,21 +1073,17 @@ img_filter_2d_linear_repeat_POT(struct sp_sampler_view *sp_sview,
static INLINE void
img_filter_2d_nearest_repeat_POT(struct sp_sampler_view *sp_sview,
struct sp_sampler *sp_samp,
- float s,
- float t,
- float p,
- unsigned level,
- unsigned face_id,
+ const struct img_filter_args *args,
float rgba[TGSI_QUAD_SIZE])
{
- unsigned xpot = pot_level_size(sp_sview->xpot, level);
- unsigned ypot = pot_level_size(sp_sview->ypot, level);
+ unsigned xpot = pot_level_size(sp_sview->xpot, args->level);
+ unsigned ypot = pot_level_size(sp_sview->ypot, args->level);
const float *out;
union tex_tile_address addr;
int c;
- float u = s * xpot;
- float v = t * ypot;
+ float u = args->s * xpot + args->offset[0];
+ float v = args->t * ypot + args->offset[1];
int uflr = util_ifloor(u);
int vflr = util_ifloor(v);
@@ -1089,7 +1092,7 @@ img_filter_2d_nearest_repeat_POT(struct sp_sampler_view *sp_sview,
int y0 = vflr & (ypot - 1);
addr.value = 0;
- addr.bits.level = level;
+ addr.bits.level = args->level;
out = get_texel_2d_no_border(sp_sview, addr, x0, y0);
for (c = 0; c < TGSI_QUAD_SIZE; c++)
@@ -1104,26 +1107,22 @@ img_filter_2d_nearest_repeat_POT(struct sp_sampler_view *sp_sview,
static INLINE void
img_filter_2d_nearest_clamp_POT(struct sp_sampler_view *sp_sview,
struct sp_sampler *sp_samp,
- float s,
- float t,
- float p,
- unsigned level,
- unsigned face_id,
+ const struct img_filter_args *args,
float rgba[TGSI_QUAD_SIZE])
{
- unsigned xpot = pot_level_size(sp_sview->xpot, level);
- unsigned ypot = pot_level_size(sp_sview->ypot, level);
+ unsigned xpot = pot_level_size(sp_sview->xpot, args->level);
+ unsigned ypot = pot_level_size(sp_sview->ypot, args->level);
union tex_tile_address addr;
int c;
- float u = s * xpot;
- float v = t * ypot;
+ float u = args->s * xpot + args->offset[0];
+ float v = args->t * ypot + args->offset[1];
int x0, y0;
const float *out;
addr.value = 0;
- addr.bits.level = level;
+ addr.bits.level = args->level;
x0 = util_ifloor(u);
if (x0 < 0)
@@ -1150,11 +1149,7 @@ img_filter_2d_nearest_clamp_POT(struct sp_sampler_view *sp_sview,
static void
img_filter_1d_nearest(struct sp_sampler_view *sp_sview,
struct sp_sampler *sp_samp,
- float s,
- float t,
- float p,
- unsigned level,
- unsigned face_id,
+ const struct img_filter_args *args,
float rgba[TGSI_QUAD_SIZE])
{
const struct pipe_resource *texture = sp_sview->base.texture;
@@ -1164,14 +1159,14 @@ img_filter_1d_nearest(struct sp_sampler_view *sp_sview,
const float *out;
int c;
- width = u_minify(texture->width0, level);
+ width = u_minify(texture->width0, args->level);
assert(width > 0);
addr.value = 0;
- addr.bits.level = level;
+ addr.bits.level = args->level;
- sp_samp->nearest_texcoord_s(s, width, &x);
+ sp_samp->nearest_texcoord_s(args->s, width, args->offset[0], &x);
out = get_texel_2d(sp_sview, sp_samp, addr, x, 0);
for (c = 0; c < TGSI_QUAD_SIZE; c++)
@@ -1186,11 +1181,7 @@ img_filter_1d_nearest(struct sp_sampler_view *sp_sview,
static void
img_filter_1d_array_nearest(struct sp_sampler_view *sp_sview,
struct sp_sampler *sp_samp,
- float s,
- float t,
- float p,
- unsigned level,
- unsigned face_id,
+ const struct img_filter_args *args,
float *rgba)
{
const struct pipe_resource *texture = sp_sview->base.texture;
@@ -1200,15 +1191,16 @@ img_filter_1d_array_nearest(struct sp_sampler_view *sp_sview,
const float *out;
int c;
- width = u_minify(texture->width0, level);
+ width = u_minify(texture->width0, args->level);
assert(width > 0);
addr.value = 0;
- addr.bits.level = level;
+ addr.bits.level = args->level;
- sp_samp->nearest_texcoord_s(s, width, &x);
- wrap_array_layer(t, texture->array_size, &layer);
+ sp_samp->nearest_texcoord_s(args->s, width, args->offset[0], &x);
+ layer = coord_to_layer(args->t, sp_sview->base.u.tex.first_layer,
+ sp_sview->base.u.tex.last_layer);
out = get_texel_1d_array(sp_sview, sp_samp, addr, x, layer);
for (c = 0; c < TGSI_QUAD_SIZE; c++)
@@ -1223,11 +1215,7 @@ img_filter_1d_array_nearest(struct sp_sampler_view *sp_sview,
static void
img_filter_2d_nearest(struct sp_sampler_view *sp_sview,
struct sp_sampler *sp_samp,
- float s,
- float t,
- float p,
- unsigned level,
- unsigned face_id,
+ const struct img_filter_args *args,
float *rgba)
{
const struct pipe_resource *texture = sp_sview->base.texture;
@@ -1237,17 +1225,17 @@ img_filter_2d_nearest(struct sp_sampler_view *sp_sview,
const float *out;
int c;
- width = u_minify(texture->width0, level);
- height = u_minify(texture->height0, level);
+ width = u_minify(texture->width0, args->level);
+ height = u_minify(texture->height0, args->level);
assert(width > 0);
assert(height > 0);
addr.value = 0;
- addr.bits.level = level;
+ addr.bits.level = args->level;
- sp_samp->nearest_texcoord_s(s, width, &x);
- sp_samp->nearest_texcoord_t(t, height, &y);
+ sp_samp->nearest_texcoord_s(args->s, width, args->offset[0], &x);
+ sp_samp->nearest_texcoord_t(args->t, height, args->offset[1], &y);
out = get_texel_2d(sp_sview, sp_samp, addr, x, y);
for (c = 0; c < TGSI_QUAD_SIZE; c++)
@@ -1262,11 +1250,7 @@ img_filter_2d_nearest(struct sp_sampler_view *sp_sview,
static void
img_filter_2d_array_nearest(struct sp_sampler_view *sp_sview,
struct sp_sampler *sp_samp,
- float s,
- float t,
- float p,
- unsigned level,
- unsigned face_id,
+ const struct img_filter_args *args,
float *rgba)
{
const struct pipe_resource *texture = sp_sview->base.texture;
@@ -1276,18 +1260,19 @@ img_filter_2d_array_nearest(struct sp_sampler_view *sp_sview,
const float *out;
int c;
- width = u_minify(texture->width0, level);
- height = u_minify(texture->height0, level);
+ width = u_minify(texture->width0, args->level);
+ height = u_minify(texture->height0, args->level);
assert(width > 0);
assert(height > 0);
addr.value = 0;
- addr.bits.level = level;
+ addr.bits.level = args->level;
- sp_samp->nearest_texcoord_s(s, width, &x);
- sp_samp->nearest_texcoord_t(t, height, &y);
- wrap_array_layer(p, texture->array_size, &layer);
+ sp_samp->nearest_texcoord_s(args->s, width, args->offset[0], &x);
+ sp_samp->nearest_texcoord_t(args->t, height, args->offset[1], &y);
+ layer = coord_to_layer(args->p, sp_sview->base.u.tex.first_layer,
+ sp_sview->base.u.tex.last_layer);
out = get_texel_2d_array(sp_sview, sp_samp, addr, x, y, layer);
for (c = 0; c < TGSI_QUAD_SIZE; c++)
@@ -1299,54 +1284,43 @@ img_filter_2d_array_nearest(struct sp_sampler_view *sp_sview,
}
-static INLINE union tex_tile_address
-face(union tex_tile_address addr, unsigned face )
-{
- addr.bits.face = face;
- return addr;
-}
-
-
static void
img_filter_cube_nearest(struct sp_sampler_view *sp_sview,
struct sp_sampler *sp_samp,
- float s,
- float t,
- float p,
- unsigned level,
- unsigned face_id,
+ const struct img_filter_args *args,
float *rgba)
{
const struct pipe_resource *texture = sp_sview->base.texture;
int width, height;
- int x, y;
+ int x, y, layerface;
union tex_tile_address addr;
const float *out;
int c;
- width = u_minify(texture->width0, level);
- height = u_minify(texture->height0, level);
+ width = u_minify(texture->width0, args->level);
+ height = u_minify(texture->height0, args->level);
assert(width > 0);
assert(height > 0);
addr.value = 0;
- addr.bits.level = level;
+ addr.bits.level = args->level;
/*
* If NEAREST filtering is done within a miplevel, always apply wrap
* mode CLAMP_TO_EDGE.
*/
if (sp_samp->base.seamless_cube_map) {
- wrap_nearest_clamp_to_edge(s, width, &x);
- wrap_nearest_clamp_to_edge(t, height, &y);
+ wrap_nearest_clamp_to_edge(args->s, width, args->offset[0], &x);
+ wrap_nearest_clamp_to_edge(args->t, height, args->offset[1], &y);
} else {
/* Would probably make sense to ignore mode and just do edge clamp */
- sp_samp->nearest_texcoord_s(s, width, &x);
- sp_samp->nearest_texcoord_t(t, height, &y);
+ sp_samp->nearest_texcoord_s(args->s, width, args->offset[0], &x);
+ sp_samp->nearest_texcoord_t(args->t, height, args->offset[1], &y);
}
- out = get_texel_2d(sp_sview, sp_samp, face(addr, face_id), x, y);
+ layerface = args->face_id + sp_sview->base.u.tex.first_layer;
+ out = get_texel_cube_array(sp_sview, sp_samp, addr, x, y, layerface);
for (c = 0; c < TGSI_QUAD_SIZE; c++)
rgba[TGSI_NUM_CHANNELS*c] = out[c];
@@ -1358,34 +1332,32 @@ img_filter_cube_nearest(struct sp_sampler_view *sp_sview,
static void
img_filter_cube_array_nearest(struct sp_sampler_view *sp_sview,
struct sp_sampler *sp_samp,
- float s,
- float t,
- float p,
- unsigned level,
- unsigned face_id,
+ const struct img_filter_args *args,
float *rgba)
{
const struct pipe_resource *texture = sp_sview->base.texture;
int width, height;
- int x, y, layer;
+ int x, y, layerface;
union tex_tile_address addr;
const float *out;
int c;
- width = u_minify(texture->width0, level);
- height = u_minify(texture->height0, level);
+ width = u_minify(texture->width0, args->level);
+ height = u_minify(texture->height0, args->level);
assert(width > 0);
assert(height > 0);
addr.value = 0;
- addr.bits.level = level;
+ addr.bits.level = args->level;
- sp_samp->nearest_texcoord_s(s, width, &x);
- sp_samp->nearest_texcoord_t(t, height, &y);
- wrap_array_layer(p, texture->array_size, &layer);
+ sp_samp->nearest_texcoord_s(args->s, width, args->offset[0], &x);
+ sp_samp->nearest_texcoord_t(args->t, height, args->offset[1], &y);
+ layerface = coord_to_layer(6 * args->p + sp_sview->base.u.tex.first_layer,
+ sp_sview->base.u.tex.first_layer,
+ sp_sview->base.u.tex.last_layer - 5) + args->face_id;
- out = get_texel_cube_array(sp_sview, sp_samp, addr, x, y, layer * 6 + face_id);
+ out = get_texel_cube_array(sp_sview, sp_samp, addr, x, y, layerface);
for (c = 0; c < TGSI_QUAD_SIZE; c++)
rgba[TGSI_NUM_CHANNELS*c] = out[c];
@@ -1397,11 +1369,7 @@ img_filter_cube_array_nearest(struct sp_sampler_view *sp_sview,
static void
img_filter_3d_nearest(struct sp_sampler_view *sp_sview,
struct sp_sampler *sp_samp,
- float s,
- float t,
- float p,
- unsigned level,
- unsigned face_id,
+ const struct img_filter_args *args,
float *rgba)
{
const struct pipe_resource *texture = sp_sview->base.texture;
@@ -1411,20 +1379,20 @@ img_filter_3d_nearest(struct sp_sampler_view *sp_sview,
const float *out;
int c;
- width = u_minify(texture->width0, level);
- height = u_minify(texture->height0, level);
- depth = u_minify(texture->depth0, level);
+ width = u_minify(texture->width0, args->level);
+ height = u_minify(texture->height0, args->level);
+ depth = u_minify(texture->depth0, args->level);
assert(width > 0);
assert(height > 0);
assert(depth > 0);
- sp_samp->nearest_texcoord_s(s, width, &x);
- sp_samp->nearest_texcoord_t(t, height, &y);
- sp_samp->nearest_texcoord_p(p, depth, &z);
+ sp_samp->nearest_texcoord_s(args->s, width, args->offset[0], &x);
+ sp_samp->nearest_texcoord_t(args->t, height, args->offset[1], &y);
+ sp_samp->nearest_texcoord_p(args->p, depth, args->offset[2], &z);
addr.value = 0;
- addr.bits.level = level;
+ addr.bits.level = args->level;
out = get_texel_3d(sp_sview, sp_samp, addr, x, y, z);
for (c = 0; c < TGSI_QUAD_SIZE; c++)
@@ -1435,11 +1403,7 @@ img_filter_3d_nearest(struct sp_sampler_view *sp_sview,
static void
img_filter_1d_linear(struct sp_sampler_view *sp_sview,
struct sp_sampler *sp_samp,
- float s,
- float t,
- float p,
- unsigned level,
- unsigned face_id,
+ const struct img_filter_args *args,
float *rgba)
{
const struct pipe_resource *texture = sp_sview->base.texture;
@@ -1450,14 +1414,14 @@ img_filter_1d_linear(struct sp_sampler_view *sp_sview,
const float *tx0, *tx1;
int c;
- width = u_minify(texture->width0, level);
+ width = u_minify(texture->width0, args->level);
assert(width > 0);
addr.value = 0;
- addr.bits.level = level;
+ addr.bits.level = args->level;
- sp_samp->linear_texcoord_s(s, width, &x0, &x1, &xw);
+ sp_samp->linear_texcoord_s(args->s, width, args->offset[0], &x0, &x1, &xw);
tx0 = get_texel_2d(sp_sview, sp_samp, addr, x0, 0);
tx1 = get_texel_2d(sp_sview, sp_samp, addr, x1, 0);
@@ -1471,11 +1435,7 @@ img_filter_1d_linear(struct sp_sampler_view *sp_sview,
static void
img_filter_1d_array_linear(struct sp_sampler_view *sp_sview,
struct sp_sampler *sp_samp,
- float s,
- float t,
- float p,
- unsigned level,
- unsigned face_id,
+ const struct img_filter_args *args,
float *rgba)
{
const struct pipe_resource *texture = sp_sview->base.texture;
@@ -1486,15 +1446,16 @@ img_filter_1d_array_linear(struct sp_sampler_view *sp_sview,
const float *tx0, *tx1;
int c;
- width = u_minify(texture->width0, level);
+ width = u_minify(texture->width0, args->level);
assert(width > 0);
addr.value = 0;
- addr.bits.level = level;
+ addr.bits.level = args->level;
- sp_samp->linear_texcoord_s(s, width, &x0, &x1, &xw);
- wrap_array_layer(t, texture->array_size, &layer);
+ sp_samp->linear_texcoord_s(args->s, width, args->offset[0], &x0, &x1, &xw);
+ layer = coord_to_layer(args->t, sp_sview->base.u.tex.first_layer,
+ sp_sview->base.u.tex.last_layer);
tx0 = get_texel_1d_array(sp_sview, sp_samp, addr, x0, layer);
tx1 = get_texel_1d_array(sp_sview, sp_samp, addr, x1, layer);
@@ -1504,15 +1465,77 @@ img_filter_1d_array_linear(struct sp_sampler_view *sp_sview,
rgba[TGSI_NUM_CHANNELS*c] = lerp(xw, tx0[c], tx1[c]);
}
+/*
+ * Retrieve the gathered value, need to convert to the
+ * TGSI expected interface, and take component select
+ * and swizzling into account.
+ */
+static float
+get_gather_value(const struct sp_sampler_view *sp_sview,
+ int chan_in, int comp_sel,
+ const float *tx[4])
+{
+ int chan;
+ unsigned swizzle;
+
+ /*
+ * softpipe samples in a different order
+ * to TGSI expects, so we need to swizzle,
+ * the samples into the correct slots.
+ */
+ switch (chan_in) {
+ case 0:
+ chan = 2;
+ break;
+ case 1:
+ chan = 3;
+ break;
+ case 2:
+ chan = 1;
+ break;
+ case 3:
+ chan = 0;
+ break;
+ default:
+ assert(0);
+ return 0.0;
+ }
+
+ /* pick which component to use for the swizzle */
+ switch (comp_sel) {
+ case 0:
+ swizzle = sp_sview->base.swizzle_r;
+ break;
+ case 1:
+ swizzle = sp_sview->base.swizzle_g;
+ break;
+ case 2:
+ swizzle = sp_sview->base.swizzle_b;
+ break;
+ case 3:
+ swizzle = sp_sview->base.swizzle_a;
+ break;
+ default:
+ assert(0);
+ return 0.0;
+ }
+
+ /* get correct result using the channel and swizzle */
+ switch (swizzle) {
+ case PIPE_SWIZZLE_ZERO:
+ return 0.0;
+ case PIPE_SWIZZLE_ONE:
+ return 1.0;
+ default:
+ return tx[chan][swizzle];
+ }
+}
+
static void
img_filter_2d_linear(struct sp_sampler_view *sp_sview,
struct sp_sampler *sp_samp,
- float s,
- float t,
- float p,
- unsigned level,
- unsigned face_id,
+ const struct img_filter_args *args,
float *rgba)
{
const struct pipe_resource *texture = sp_sview->base.texture;
@@ -1520,42 +1543,45 @@ img_filter_2d_linear(struct sp_sampler_view *sp_sview,
int x0, y0, x1, y1;
float xw, yw; /* weights */
union tex_tile_address addr;
- const float *tx0, *tx1, *tx2, *tx3;
+ const float *tx[4];
int c;
- width = u_minify(texture->width0, level);
- height = u_minify(texture->height0, level);
+ width = u_minify(texture->width0, args->level);
+ height = u_minify(texture->height0, args->level);
assert(width > 0);
assert(height > 0);
addr.value = 0;
- addr.bits.level = level;
+ addr.bits.level = args->level;
- sp_samp->linear_texcoord_s(s, width, &x0, &x1, &xw);
- sp_samp->linear_texcoord_t(t, height, &y0, &y1, &yw);
+ sp_samp->linear_texcoord_s(args->s, width, args->offset[0], &x0, &x1, &xw);
+ sp_samp->linear_texcoord_t(args->t, height, args->offset[1], &y0, &y1, &yw);
- tx0 = get_texel_2d(sp_sview, sp_samp, addr, x0, y0);
- tx1 = get_texel_2d(sp_sview, sp_samp, addr, x1, y0);
- tx2 = get_texel_2d(sp_sview, sp_samp, addr, x0, y1);
- tx3 = get_texel_2d(sp_sview, sp_samp, addr, x1, y1);
+ tx[0] = get_texel_2d(sp_sview, sp_samp, addr, x0, y0);
+ tx[1] = get_texel_2d(sp_sview, sp_samp, addr, x1, y0);
+ tx[2] = get_texel_2d(sp_sview, sp_samp, addr, x0, y1);
+ tx[3] = get_texel_2d(sp_sview, sp_samp, addr, x1, y1);
- /* interpolate R, G, B, A */
- for (c = 0; c < TGSI_QUAD_SIZE; c++)
- rgba[TGSI_NUM_CHANNELS*c] = lerp_2d(xw, yw,
- tx0[c], tx1[c],
- tx2[c], tx3[c]);
+ if (args->gather_only) {
+ for (c = 0; c < TGSI_QUAD_SIZE; c++)
+ rgba[TGSI_NUM_CHANNELS*c] = get_gather_value(sp_sview, c,
+ args->gather_comp,
+ tx);
+ } else {
+ /* interpolate R, G, B, A */
+ for (c = 0; c < TGSI_QUAD_SIZE; c++)
+ rgba[TGSI_NUM_CHANNELS*c] = lerp_2d(xw, yw,
+ tx[0][c], tx[1][c],
+ tx[2][c], tx[3][c]);
+ }
}
static void
img_filter_2d_array_linear(struct sp_sampler_view *sp_sview,
struct sp_sampler *sp_samp,
- float s,
- float t,
- float p,
- unsigned level,
- unsigned face_id,
+ const struct img_filter_args *args,
float *rgba)
{
const struct pipe_resource *texture = sp_sview->base.texture;
@@ -1563,63 +1589,67 @@ img_filter_2d_array_linear(struct sp_sampler_view *sp_sview,
int x0, y0, x1, y1, layer;
float xw, yw; /* weights */
union tex_tile_address addr;
- const float *tx0, *tx1, *tx2, *tx3;
+ const float *tx[4];
int c;
- width = u_minify(texture->width0, level);
- height = u_minify(texture->height0, level);
+ width = u_minify(texture->width0, args->level);
+ height = u_minify(texture->height0, args->level);
assert(width > 0);
assert(height > 0);
addr.value = 0;
- addr.bits.level = level;
-
- sp_samp->linear_texcoord_s(s, width, &x0, &x1, &xw);
- sp_samp->linear_texcoord_t(t, height, &y0, &y1, &yw);
- wrap_array_layer(p, texture->array_size, &layer);
-
- tx0 = get_texel_2d_array(sp_sview, sp_samp, addr, x0, y0, layer);
- tx1 = get_texel_2d_array(sp_sview, sp_samp, addr, x1, y0, layer);
- tx2 = get_texel_2d_array(sp_sview, sp_samp, addr, x0, y1, layer);
- tx3 = get_texel_2d_array(sp_sview, sp_samp, addr, x1, y1, layer);
-
- /* interpolate R, G, B, A */
- for (c = 0; c < TGSI_QUAD_SIZE; c++)
- rgba[TGSI_NUM_CHANNELS*c] = lerp_2d(xw, yw,
- tx0[c], tx1[c],
- tx2[c], tx3[c]);
+ addr.bits.level = args->level;
+
+ sp_samp->linear_texcoord_s(args->s, width, args->offset[0], &x0, &x1, &xw);
+ sp_samp->linear_texcoord_t(args->t, height, args->offset[1], &y0, &y1, &yw);
+ layer = coord_to_layer(args->p, sp_sview->base.u.tex.first_layer,
+ sp_sview->base.u.tex.last_layer);
+
+ tx[0] = get_texel_2d_array(sp_sview, sp_samp, addr, x0, y0, layer);
+ tx[1] = get_texel_2d_array(sp_sview, sp_samp, addr, x1, y0, layer);
+ tx[2] = get_texel_2d_array(sp_sview, sp_samp, addr, x0, y1, layer);
+ tx[3] = get_texel_2d_array(sp_sview, sp_samp, addr, x1, y1, layer);
+
+ if (args->gather_only) {
+ for (c = 0; c < TGSI_QUAD_SIZE; c++)
+ rgba[TGSI_NUM_CHANNELS*c] = get_gather_value(sp_sview, c,
+ args->gather_comp,
+ tx);
+ } else {
+ /* interpolate R, G, B, A */
+ for (c = 0; c < TGSI_QUAD_SIZE; c++)
+ rgba[TGSI_NUM_CHANNELS*c] = lerp_2d(xw, yw,
+ tx[0][c], tx[1][c],
+ tx[2][c], tx[3][c]);
+ }
}
static void
img_filter_cube_linear(struct sp_sampler_view *sp_sview,
struct sp_sampler *sp_samp,
- float s,
- float t,
- float p,
- unsigned level,
- unsigned face_id,
+ const struct img_filter_args *args,
float *rgba)
{
const struct pipe_resource *texture = sp_sview->base.texture;
int width, height;
- int x0, y0, x1, y1;
+ int x0, y0, x1, y1, layer;
float xw, yw; /* weights */
- union tex_tile_address addr, addrj;
- const float *tx0, *tx1, *tx2, *tx3;
+ union tex_tile_address addr;
+ const float *tx[4];
float corner0[TGSI_QUAD_SIZE], corner1[TGSI_QUAD_SIZE],
corner2[TGSI_QUAD_SIZE], corner3[TGSI_QUAD_SIZE];
int c;
- width = u_minify(texture->width0, level);
- height = u_minify(texture->height0, level);
+ width = u_minify(texture->width0, args->level);
+ height = u_minify(texture->height0, args->level);
assert(width > 0);
assert(height > 0);
addr.value = 0;
- addr.bits.level = level;
+ addr.bits.level = args->level;
/*
* For seamless if LINEAR filtering is done within a miplevel,
@@ -1627,43 +1657,47 @@ img_filter_cube_linear(struct sp_sampler_view *sp_sview,
*/
if (sp_samp->base.seamless_cube_map) {
/* Note this is a bit overkill, actual clamping is not required */
- wrap_linear_clamp_to_border(s, width, &x0, &x1, &xw);
- wrap_linear_clamp_to_border(t, height, &y0, &y1, &yw);
+ wrap_linear_clamp_to_border(args->s, width, args->offset[0], &x0, &x1, &xw);
+ wrap_linear_clamp_to_border(args->t, height, args->offset[1], &y0, &y1, &yw);
} else {
/* Would probably make sense to ignore mode and just do edge clamp */
- sp_samp->linear_texcoord_s(s, width, &x0, &x1, &xw);
- sp_samp->linear_texcoord_t(t, height, &y0, &y1, &yw);
+ sp_samp->linear_texcoord_s(args->s, width, args->offset[0], &x0, &x1, &xw);
+ sp_samp->linear_texcoord_t(args->t, height, args->offset[1], &y0, &y1, &yw);
}
- addrj = face(addr, face_id);
+ layer = sp_sview->base.u.tex.first_layer;
if (sp_samp->base.seamless_cube_map) {
- tx0 = get_texel_cube_seamless(sp_sview, addrj, x0, y0, corner0);
- tx1 = get_texel_cube_seamless(sp_sview, addrj, x1, y0, corner1);
- tx2 = get_texel_cube_seamless(sp_sview, addrj, x0, y1, corner2);
- tx3 = get_texel_cube_seamless(sp_sview, addrj, x1, y1, corner3);
+ tx[0] = get_texel_cube_seamless(sp_sview, addr, x0, y0, corner0, layer, args->face_id);
+ tx[1] = get_texel_cube_seamless(sp_sview, addr, x1, y0, corner1, layer, args->face_id);
+ tx[2] = get_texel_cube_seamless(sp_sview, addr, x0, y1, corner2, layer, args->face_id);
+ tx[3] = get_texel_cube_seamless(sp_sview, addr, x1, y1, corner3, layer, args->face_id);
} else {
- tx0 = get_texel_2d(sp_sview, sp_samp, addrj, x0, y0);
- tx1 = get_texel_2d(sp_sview, sp_samp, addrj, x1, y0);
- tx2 = get_texel_2d(sp_sview, sp_samp, addrj, x0, y1);
- tx3 = get_texel_2d(sp_sview, sp_samp, addrj, x1, y1);
+ tx[0] = get_texel_cube_array(sp_sview, sp_samp, addr, x0, y0, layer + args->face_id);
+ tx[1] = get_texel_cube_array(sp_sview, sp_samp, addr, x1, y0, layer + args->face_id);
+ tx[2] = get_texel_cube_array(sp_sview, sp_samp, addr, x0, y1, layer + args->face_id);
+ tx[3] = get_texel_cube_array(sp_sview, sp_samp, addr, x1, y1, layer + args->face_id);
+ }
+
+ if (args->gather_only) {
+ for (c = 0; c < TGSI_QUAD_SIZE; c++)
+ rgba[TGSI_NUM_CHANNELS*c] = get_gather_value(sp_sview, c,
+ args->gather_comp,
+ tx);
+ } else {
+ /* interpolate R, G, B, A */
+ for (c = 0; c < TGSI_QUAD_SIZE; c++)
+ rgba[TGSI_NUM_CHANNELS*c] = lerp_2d(xw, yw,
+ tx[0][c], tx[1][c],
+ tx[2][c], tx[3][c]);
}
- /* interpolate R, G, B, A */
- for (c = 0; c < TGSI_QUAD_SIZE; c++)
- rgba[TGSI_NUM_CHANNELS*c] = lerp_2d(xw, yw,
- tx0[c], tx1[c],
- tx2[c], tx3[c]);
}
static void
img_filter_cube_array_linear(struct sp_sampler_view *sp_sview,
struct sp_sampler *sp_samp,
- float s,
- float t,
- float p,
- unsigned level,
- unsigned face_id,
+ const struct img_filter_args *args,
float *rgba)
{
const struct pipe_resource *texture = sp_sview->base.texture;
@@ -1671,42 +1705,68 @@ img_filter_cube_array_linear(struct sp_sampler_view *sp_sview,
int x0, y0, x1, y1, layer;
float xw, yw; /* weights */
union tex_tile_address addr;
- const float *tx0, *tx1, *tx2, *tx3;
+ const float *tx[4];
+ float corner0[TGSI_QUAD_SIZE], corner1[TGSI_QUAD_SIZE],
+ corner2[TGSI_QUAD_SIZE], corner3[TGSI_QUAD_SIZE];
int c;
- width = u_minify(texture->width0, level);
- height = u_minify(texture->height0, level);
+ width = u_minify(texture->width0, args->level);
+ height = u_minify(texture->height0, args->level);
assert(width > 0);
assert(height > 0);
addr.value = 0;
- addr.bits.level = level;
+ addr.bits.level = args->level;
- sp_samp->linear_texcoord_s(s, width, &x0, &x1, &xw);
- sp_samp->linear_texcoord_t(t, height, &y0, &y1, &yw);
- wrap_array_layer(p, texture->array_size, &layer);
+ /*
+ * For seamless if LINEAR filtering is done within a miplevel,
+ * always apply wrap mode CLAMP_TO_BORDER.
+ */
+ if (sp_samp->base.seamless_cube_map) {
+ /* Note this is a bit overkill, actual clamping is not required */
+ wrap_linear_clamp_to_border(args->s, width, args->offset[0], &x0, &x1, &xw);
+ wrap_linear_clamp_to_border(args->t, height, args->offset[1], &y0, &y1, &yw);
+ } else {
+ /* Would probably make sense to ignore mode and just do edge clamp */
+ sp_samp->linear_texcoord_s(args->s, width, args->offset[0], &x0, &x1, &xw);
+ sp_samp->linear_texcoord_t(args->t, height, args->offset[1], &y0, &y1, &yw);
+ }
- tx0 = get_texel_cube_array(sp_sview, sp_samp, addr, x0, y0, layer * 6 + face_id);
- tx1 = get_texel_cube_array(sp_sview, sp_samp, addr, x1, y0, layer * 6 + face_id);
- tx2 = get_texel_cube_array(sp_sview, sp_samp, addr, x0, y1, layer * 6 + face_id);
- tx3 = get_texel_cube_array(sp_sview, sp_samp, addr, x1, y1, layer * 6 + face_id);
+ layer = coord_to_layer(6 * args->p + sp_sview->base.u.tex.first_layer,
+ sp_sview->base.u.tex.first_layer,
+ sp_sview->base.u.tex.last_layer - 5);
- /* interpolate R, G, B, A */
- for (c = 0; c < TGSI_QUAD_SIZE; c++)
- rgba[TGSI_NUM_CHANNELS*c] = lerp_2d(xw, yw,
- tx0[c], tx1[c],
- tx2[c], tx3[c]);
+ if (sp_samp->base.seamless_cube_map) {
+ tx[0] = get_texel_cube_seamless(sp_sview, addr, x0, y0, corner0, layer, args->face_id);
+ tx[1] = get_texel_cube_seamless(sp_sview, addr, x1, y0, corner1, layer, args->face_id);
+ tx[2] = get_texel_cube_seamless(sp_sview, addr, x0, y1, corner2, layer, args->face_id);
+ tx[3] = get_texel_cube_seamless(sp_sview, addr, x1, y1, corner3, layer, args->face_id);
+ } else {
+ tx[0] = get_texel_cube_array(sp_sview, sp_samp, addr, x0, y0, layer + args->face_id);
+ tx[1] = get_texel_cube_array(sp_sview, sp_samp, addr, x1, y0, layer + args->face_id);
+ tx[2] = get_texel_cube_array(sp_sview, sp_samp, addr, x0, y1, layer + args->face_id);
+ tx[3] = get_texel_cube_array(sp_sview, sp_samp, addr, x1, y1, layer + args->face_id);
+ }
+
+ if (args->gather_only) {
+ for (c = 0; c < TGSI_QUAD_SIZE; c++)
+ rgba[TGSI_NUM_CHANNELS*c] = get_gather_value(sp_sview, c,
+ args->gather_comp,
+ tx);
+ } else {
+ /* interpolate R, G, B, A */
+ for (c = 0; c < TGSI_QUAD_SIZE; c++)
+ rgba[TGSI_NUM_CHANNELS*c] = lerp_2d(xw, yw,
+ tx[0][c], tx[1][c],
+ tx[2][c], tx[3][c]);
+ }
}
static void
img_filter_3d_linear(struct sp_sampler_view *sp_sview,
struct sp_sampler *sp_samp,
- float s,
- float t,
- float p,
- unsigned level,
- unsigned face_id,
+ const struct img_filter_args *args,
float *rgba)
{
const struct pipe_resource *texture = sp_sview->base.texture;
@@ -1717,21 +1777,20 @@ img_filter_3d_linear(struct sp_sampler_view *sp_sview,
const float *tx00, *tx01, *tx02, *tx03, *tx10, *tx11, *tx12, *tx13;
int c;
- width = u_minify(texture->width0, level);
- height = u_minify(texture->height0, level);
- depth = u_minify(texture->depth0, level);
+ width = u_minify(texture->width0, args->level);
+ height = u_minify(texture->height0, args->level);
+ depth = u_minify(texture->depth0, args->level);
addr.value = 0;
- addr.bits.level = level;
+ addr.bits.level = args->level;
assert(width > 0);
assert(height > 0);
assert(depth > 0);
- sp_samp->linear_texcoord_s(s, width, &x0, &x1, &xw);
- sp_samp->linear_texcoord_t(t, height, &y0, &y1, &yw);
- sp_samp->linear_texcoord_p(p, depth, &z0, &z1, &zw);
-
+ sp_samp->linear_texcoord_s(args->s, width, args->offset[0], &x0, &x1, &xw);
+ sp_samp->linear_texcoord_t(args->t, height, args->offset[1], &y0, &y1, &yw);
+ sp_samp->linear_texcoord_p(args->p, depth, args->offset[2], &z0, &z1, &zw);
tx00 = get_texel_3d(sp_sview, sp_samp, addr, x0, y0, z0);
tx01 = get_texel_3d(sp_sview, sp_samp, addr, x1, y0, z0);
@@ -1837,6 +1896,7 @@ compute_lambda_lod(struct sp_sampler_view *sp_sview,
}
break;
case tgsi_sampler_lod_zero:
+ case tgsi_sampler_gather:
/* this is all static state in the sampler really need clamp here? */
lod[0] = lod[1] = lod[2] = lod[3] = CLAMP(lod_bias, min_lod, max_lod);
break;
@@ -1846,6 +1906,12 @@ compute_lambda_lod(struct sp_sampler_view *sp_sview,
}
}
+static INLINE unsigned
+get_gather_component(const float lod_in[TGSI_QUAD_SIZE])
+{
+ /* gather component is stored in lod_in slot as unsigned */
+ return (*(unsigned int *)lod_in) & 0x3;
+}
static void
mip_filter_linear(struct sp_sampler_view *sp_sview,
@@ -1857,36 +1923,45 @@ mip_filter_linear(struct sp_sampler_view *sp_sview,
const float p[TGSI_QUAD_SIZE],
const float c0[TGSI_QUAD_SIZE],
const float lod_in[TGSI_QUAD_SIZE],
- enum tgsi_sampler_control control,
+ const struct filter_args *filt_args,
float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE])
{
const struct pipe_sampler_view *psview = &sp_sview->base;
int j;
float lod[TGSI_QUAD_SIZE];
+ struct img_filter_args args;
- compute_lambda_lod(sp_sview, sp_samp, s, t, p, lod_in, control, lod);
+ compute_lambda_lod(sp_sview, sp_samp, s, t, p, lod_in, filt_args->control, lod);
+
+ args.offset = filt_args->offset;
+ args.gather_only = filt_args->control == tgsi_sampler_gather;
+ args.gather_comp = get_gather_component(lod_in);
for (j = 0; j < TGSI_QUAD_SIZE; j++) {
int level0 = psview->u.tex.first_level + (int)lod[j];
- if (lod[j] < 0.0)
- mag_filter(sp_sview, sp_samp, s[j], t[j], p[j],
- psview->u.tex.first_level,
- sp_sview->faces[j], &rgba[0][j]);
-
- else if (level0 >= (int) psview->u.tex.last_level)
- min_filter(sp_sview, sp_samp, s[j], t[j], p[j], psview->u.tex.last_level,
- sp_sview->faces[j], &rgba[0][j]);
+ args.s = s[j];
+ args.t = t[j];
+ args.p = p[j];
+ args.face_id = sp_sview->faces[j];
+ if (lod[j] < 0.0) {
+ args.level = psview->u.tex.first_level;
+ mag_filter(sp_sview, sp_samp, &args, &rgba[0][j]);
+ }
+ else if (level0 >= (int) psview->u.tex.last_level) {
+ args.level = psview->u.tex.last_level;
+ min_filter(sp_sview, sp_samp, &args, &rgba[0][j]);
+ }
else {
float levelBlend = frac(lod[j]);
float rgbax[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE];
int c;
- min_filter(sp_sview, sp_samp, s[j], t[j], p[j], level0,
- sp_sview->faces[j], &rgbax[0][0]);
- min_filter(sp_sview, sp_samp, s[j], t[j], p[j], level0+1,
- sp_sview->faces[j], &rgbax[0][1]);
+ args.level = level0;
+ min_filter(sp_sview, sp_samp, &args, &rgbax[0][0]);
+ args.level = level0+1;
+ min_filter(sp_sview, sp_samp, &args, &rgbax[0][1]);
for (c = 0; c < 4; c++) {
rgba[c][j] = lerp(levelBlend, rgbax[c][0], rgbax[c][1]);
@@ -1915,25 +1990,33 @@ mip_filter_nearest(struct sp_sampler_view *sp_sview,
const float p[TGSI_QUAD_SIZE],
const float c0[TGSI_QUAD_SIZE],
const float lod_in[TGSI_QUAD_SIZE],
- enum tgsi_sampler_control control,
+ const struct filter_args *filt_args,
float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE])
{
const struct pipe_sampler_view *psview = &sp_sview->base;
float lod[TGSI_QUAD_SIZE];
int j;
+ struct img_filter_args args;
+
+ args.offset = filt_args->offset;
+ args.gather_only = filt_args->control == tgsi_sampler_gather;
+ args.gather_comp = get_gather_component(lod_in);
- compute_lambda_lod(sp_sview, sp_samp, s, t, p, lod_in, control, lod);
+ compute_lambda_lod(sp_sview, sp_samp, s, t, p, lod_in, filt_args->control, lod);
for (j = 0; j < TGSI_QUAD_SIZE; j++) {
- if (lod[j] < 0.0)
- mag_filter(sp_sview, sp_samp, s[j], t[j], p[j],
- psview->u.tex.first_level,
- sp_sview->faces[j], &rgba[0][j]);
- else {
+ args.s = s[j];
+ args.t = t[j];
+ args.p = p[j];
+ args.face_id = sp_sview->faces[j];
+
+ if (lod[j] < 0.0) {
+ args.level = psview->u.tex.first_level;
+ mag_filter(sp_sview, sp_samp, &args, &rgba[0][j]);
+ } else {
int level = psview->u.tex.first_level + (int)(lod[j] + 0.5F);
- level = MIN2(level, (int)psview->u.tex.last_level);
- min_filter(sp_sview, sp_samp, s[j], t[j], p[j],
- level, sp_sview->faces[j], &rgba[0][j]);
+ args.level = MIN2(level, (int)psview->u.tex.last_level);
+ min_filter(sp_sview, sp_samp, &args, &rgba[0][j]);
}
}
@@ -1953,24 +2036,29 @@ mip_filter_none(struct sp_sampler_view *sp_sview,
const float p[TGSI_QUAD_SIZE],
const float c0[TGSI_QUAD_SIZE],
const float lod_in[TGSI_QUAD_SIZE],
- enum tgsi_sampler_control control,
+ const struct filter_args *filt_args,
float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE])
{
float lod[TGSI_QUAD_SIZE];
int j;
+ struct img_filter_args args;
+
+ args.level = sp_sview->base.u.tex.first_level;
+ args.offset = filt_args->offset;
+ args.gather_only = filt_args->control == tgsi_sampler_gather;
- compute_lambda_lod(sp_sview, sp_samp, s, t, p, lod_in, control, lod);
+ compute_lambda_lod(sp_sview, sp_samp, s, t, p, lod_in, filt_args->control, lod);
for (j = 0; j < TGSI_QUAD_SIZE; j++) {
- if (lod[j] < 0.0) {
- mag_filter(sp_sview, sp_samp, s[j], t[j], p[j],
- sp_sview->base.u.tex.first_level,
- sp_sview->faces[j], &rgba[0][j]);
+ args.s = s[j];
+ args.t = t[j];
+ args.p = p[j];
+ args.face_id = sp_sview->faces[j];
+ if (lod[j] < 0.0) {
+ mag_filter(sp_sview, sp_samp, &args, &rgba[0][j]);
}
else {
- min_filter(sp_sview, sp_samp, s[j], t[j], p[j],
- sp_sview->base.u.tex.first_level,
- sp_sview->faces[j], &rgba[0][j]);
+ min_filter(sp_sview, sp_samp, &args, &rgba[0][j]);
}
}
}
@@ -1986,15 +2074,21 @@ mip_filter_none_no_filter_select(struct sp_sampler_view *sp_sview,
const float p[TGSI_QUAD_SIZE],
const float c0[TGSI_QUAD_SIZE],
const float lod_in[TGSI_QUAD_SIZE],
- enum tgsi_sampler_control control,
+ const struct filter_args *filt_args,
float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE])
{
int j;
-
- for (j = 0; j < TGSI_QUAD_SIZE; j++)
- mag_filter(sp_sview, sp_samp, s[j], t[j], p[j],
- sp_sview->base.u.tex.first_level,
- sp_sview->faces[j], &rgba[0][j]);
+ struct img_filter_args args;
+ args.level = sp_sview->base.u.tex.first_level;
+ args.offset = filt_args->offset;
+ args.gather_only = filt_args->control == tgsi_sampler_gather;
+ for (j = 0; j < TGSI_QUAD_SIZE; j++) {
+ args.s = s[j];
+ args.t = t[j];
+ args.p = p[j];
+ args.face_id = sp_sview->faces[j];
+ mag_filter(sp_sview, sp_samp, &args, &rgba[0][j]);
+ }
}
@@ -2050,7 +2144,7 @@ img_filter_2d_ewa(struct sp_sampler_view *sp_sview,
float scaling = 1.0f / (1 << level0);
int width = u_minify(texture->width0, level0);
int height = u_minify(texture->height0, level0);
-
+ struct img_filter_args args;
float ux = dudx * scaling;
float vx = dvdx * scaling;
float uy = dudy * scaling;
@@ -2100,7 +2194,8 @@ img_filter_2d_ewa(struct sp_sampler_view *sp_sview,
* full, then the pixel values are read from the image.
*/
ddq = 2 * A;
-
+
+ args.level = level;
for (j = 0; j < TGSI_QUAD_SIZE; j++) {
/* Heckbert MS thesis, p. 59; scan over the bounding box of the ellipse
* and incrementally update the value of Ax^2+Bxy*Cy^2; when this
@@ -2117,6 +2212,8 @@ img_filter_2d_ewa(struct sp_sampler_view *sp_sview,
float num[4] = {0.0F, 0.0F, 0.0F, 0.0F};
buffer_next = 0;
den = 0;
+ args.face_id = sp_sview->faces[j];
+
U = u0 - tex_u;
for (v = v0; v <= v1; ++v) {
float V = v - tex_v;
@@ -2148,8 +2245,10 @@ img_filter_2d_ewa(struct sp_sampler_view *sp_sview,
* accelerated img_filter_2d_nearest_XXX functions.
*/
for (jj = 0; jj < buffer_next; jj++) {
- min_filter(sp_sview, sp_samp, s_buffer[jj], t_buffer[jj], p[jj],
- level, sp_sview->faces[j], &rgba_temp[0][jj]);
+ args.s = s_buffer[jj];
+ args.t = t_buffer[jj];
+ args.p = p[jj];
+ min_filter(sp_sview, sp_samp, &args, &rgba_temp[0][jj]);
num[0] += weight_buffer[jj] * rgba_temp[0][jj];
num[1] += weight_buffer[jj] * rgba_temp[1][jj];
num[2] += weight_buffer[jj] * rgba_temp[2][jj];
@@ -2176,8 +2275,10 @@ img_filter_2d_ewa(struct sp_sampler_view *sp_sview,
* accelerated img_filter_2d_nearest_XXX functions.
*/
for (jj = 0; jj < buffer_next; jj++) {
- min_filter(sp_sview, sp_samp, s_buffer[jj], t_buffer[jj], p[jj],
- level, sp_sview->faces[j], &rgba_temp[0][jj]);
+ args.s = s_buffer[jj];
+ args.t = t_buffer[jj];
+ args.p = p[jj];
+ min_filter(sp_sview, sp_samp, &args, &rgba_temp[0][jj]);
num[0] += weight_buffer[jj] * rgba_temp[0][jj];
num[1] += weight_buffer[jj] * rgba_temp[1][jj];
num[2] += weight_buffer[jj] * rgba_temp[2][jj];
@@ -2196,8 +2297,10 @@ img_filter_2d_ewa(struct sp_sampler_view *sp_sview,
rgba[2]=0;
rgba[3]=0;*/
/* not enough pixels in resampling, resort to direct interpolation */
- min_filter(sp_sview, sp_samp, s[j], t[j], p[j], level,
- sp_sview->faces[j], &rgba_temp[0][j]);
+ args.s = s[j];
+ args.t = t[j];
+ args.p = p[j];
+ min_filter(sp_sview, sp_samp, &args, &rgba_temp[0][j]);
den = 1;
num[0] = rgba_temp[0][j];
num[1] = rgba_temp[1][j];
@@ -2226,7 +2329,7 @@ mip_filter_linear_aniso(struct sp_sampler_view *sp_sview,
const float p[TGSI_QUAD_SIZE],
const float c0[TGSI_QUAD_SIZE],
const float lod_in[TGSI_QUAD_SIZE],
- enum tgsi_sampler_control control,
+ const struct filter_args *filt_args,
float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE])
{
const struct pipe_resource *texture = sp_sview->base.texture;
@@ -2241,11 +2344,12 @@ mip_filter_linear_aniso(struct sp_sampler_view *sp_sview,
float dudy = (s[QUAD_TOP_LEFT] - s[QUAD_BOTTOM_LEFT]) * s_to_u;
float dvdx = (t[QUAD_BOTTOM_RIGHT] - t[QUAD_BOTTOM_LEFT]) * t_to_v;
float dvdy = (t[QUAD_TOP_LEFT] - t[QUAD_BOTTOM_LEFT]) * t_to_v;
-
- if (control == tgsi_sampler_lod_bias ||
- control == tgsi_sampler_lod_none ||
+ struct img_filter_args args;
+
+ if (filt_args->control == tgsi_sampler_lod_bias ||
+ filt_args->control == tgsi_sampler_lod_none ||
/* XXX FIXME */
- control == tgsi_sampler_derivs_explicit) {
+ filt_args->control == tgsi_sampler_derivs_explicit) {
/* note: instead of working with Px and Py, we will use the
* squared length instead, to avoid sqrt.
*/
@@ -2282,12 +2386,12 @@ mip_filter_linear_aniso(struct sp_sampler_view *sp_sview,
* this since 0.5*log(x) = log(sqrt(x))
*/
lambda = 0.5F * util_fast_log2(Pmin2) + sp_samp->base.lod_bias;
- compute_lod(&sp_samp->base, control, lambda, lod_in, lod);
+ compute_lod(&sp_samp->base, filt_args->control, lambda, lod_in, lod);
}
else {
- assert(control == tgsi_sampler_lod_explicit ||
- control == tgsi_sampler_lod_zero);
- compute_lod(&sp_samp->base, control, sp_samp->base.lod_bias, lod_in, lod);
+ assert(filt_args->control == tgsi_sampler_lod_explicit ||
+ filt_args->control == tgsi_sampler_lod_zero);
+ compute_lod(&sp_samp->base, filt_args->control, sp_samp->base.lod_bias, lod_in, lod);
}
/* XXX: Take into account all lod values.
@@ -2300,9 +2404,14 @@ mip_filter_linear_aniso(struct sp_sampler_view *sp_sview,
*/
if (level0 >= (int) psview->u.tex.last_level) {
int j;
- for (j = 0; j < TGSI_QUAD_SIZE; j++)
- min_filter(sp_sview, sp_samp, s[j], t[j], p[j], psview->u.tex.last_level,
- sp_sview->faces[j], &rgba[0][j]);
+ for (j = 0; j < TGSI_QUAD_SIZE; j++) {
+ args.s = s[j];
+ args.t = t[j];
+ args.p = p[j];
+ args.level = psview->u.tex.last_level;
+ args.face_id = sp_sview->faces[j];
+ min_filter(sp_sview, sp_samp, &args, &rgba[0][j]);
+ }
}
else {
/* don't bother interpolating between multiple LODs; it doesn't
@@ -2334,29 +2443,33 @@ mip_filter_linear_2d_linear_repeat_POT(
const float p[TGSI_QUAD_SIZE],
const float c0[TGSI_QUAD_SIZE],
const float lod_in[TGSI_QUAD_SIZE],
- enum tgsi_sampler_control control,
+ const struct filter_args *filt_args,
float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE])
{
const struct pipe_sampler_view *psview = &sp_sview->base;
int j;
float lod[TGSI_QUAD_SIZE];
- compute_lambda_lod(sp_sview, sp_samp, s, t, p, lod_in, control, lod);
+ compute_lambda_lod(sp_sview, sp_samp, s, t, p, lod_in, filt_args->control, lod);
for (j = 0; j < TGSI_QUAD_SIZE; j++) {
int level0 = psview->u.tex.first_level + (int)lod[j];
-
+ struct img_filter_args args;
/* Catches both negative and large values of level0:
*/
+ args.s = s[j];
+ args.t = t[j];
+ args.p = p[j];
+ args.face_id = sp_sview->faces[j];
+ args.offset = filt_args->offset;
+ args.gather_only = filt_args->control == tgsi_sampler_gather;
if ((unsigned)level0 >= psview->u.tex.last_level) {
if (level0 < 0)
- img_filter_2d_linear_repeat_POT(sp_sview, sp_samp, s[j], t[j], p[j],
- psview->u.tex.first_level,
- sp_sview->faces[j], &rgba[0][j]);
+ args.level = psview->u.tex.first_level;
else
- img_filter_2d_linear_repeat_POT(sp_sview, sp_samp, s[j], t[j], p[j],
- psview->u.tex.last_level,
- sp_sview->faces[j], &rgba[0][j]);
+ args.level = psview->u.tex.last_level;
+ img_filter_2d_linear_repeat_POT(sp_sview, sp_samp, &args,
+ &rgba[0][j]);
}
else {
@@ -2364,10 +2477,10 @@ mip_filter_linear_2d_linear_repeat_POT(
float rgbax[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE];
int c;
- img_filter_2d_linear_repeat_POT(sp_sview, sp_samp, s[j], t[j], p[j], level0,
- sp_sview->faces[j], &rgbax[0][0]);
- img_filter_2d_linear_repeat_POT(sp_sview, sp_samp, s[j], t[j], p[j], level0+1,
- sp_sview->faces[j], &rgbax[0][1]);
+ args.level = level0;
+ img_filter_2d_linear_repeat_POT(sp_sview, sp_samp, &args, &rgbax[0][0]);
+ args.level = level0+1;
+ img_filter_2d_linear_repeat_POT(sp_sview, sp_samp, &args, &rgbax[0][1]);
for (c = 0; c < TGSI_NUM_CHANNELS; c++)
rgba[c][j] = lerp(levelBlend, rgbax[c][0], rgbax[c][1]);
@@ -2395,11 +2508,12 @@ sample_compare(struct sp_sampler_view *sp_sview,
float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE])
{
const struct pipe_sampler_state *sampler = &sp_samp->base;
- int j;
- int k[4];
+ int j, v;
+ int k[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE];
float pc[4];
const struct util_format_description *format_desc;
unsigned chan_type;
+ bool is_gather = (control == tgsi_sampler_gather);
/**
* Compare texcoord 'p' (aka R) against texture value 'rgba[0]'
@@ -2408,13 +2522,13 @@ sample_compare(struct sp_sampler_view *sp_sview,
* RGBA channels. We look at the red channel here.
*/
- if (sp_sview->base.texture->target == PIPE_TEXTURE_2D_ARRAY ||
- sp_sview->base.texture->target == PIPE_TEXTURE_CUBE) {
+ if (sp_sview->base.target == PIPE_TEXTURE_2D_ARRAY ||
+ sp_sview->base.target == PIPE_TEXTURE_CUBE) {
pc[0] = c0[0];
pc[1] = c0[1];
pc[2] = c0[2];
pc[3] = c0[3];
- } else if (sp_sview->base.texture->target == PIPE_TEXTURE_CUBE_ARRAY) {
+ } else if (sp_sview->base.target == PIPE_TEXTURE_CUBE_ARRAY) {
pc[0] = c1[0];
pc[1] = c1[1];
pc[2] = c1[2];
@@ -2443,65 +2557,74 @@ sample_compare(struct sp_sampler_view *sp_sview,
pc[3] = CLAMP(pc[3], 0.0F, 1.0F);
}
- /* compare four texcoords vs. four texture samples */
- switch (sampler->compare_func) {
- case PIPE_FUNC_LESS:
- k[0] = pc[0] < rgba[0][0];
- k[1] = pc[1] < rgba[0][1];
- k[2] = pc[2] < rgba[0][2];
- k[3] = pc[3] < rgba[0][3];
- break;
- case PIPE_FUNC_LEQUAL:
- k[0] = pc[0] <= rgba[0][0];
- k[1] = pc[1] <= rgba[0][1];
- k[2] = pc[2] <= rgba[0][2];
- k[3] = pc[3] <= rgba[0][3];
- break;
- case PIPE_FUNC_GREATER:
- k[0] = pc[0] > rgba[0][0];
- k[1] = pc[1] > rgba[0][1];
- k[2] = pc[2] > rgba[0][2];
- k[3] = pc[3] > rgba[0][3];
- break;
- case PIPE_FUNC_GEQUAL:
- k[0] = pc[0] >= rgba[0][0];
- k[1] = pc[1] >= rgba[0][1];
- k[2] = pc[2] >= rgba[0][2];
- k[3] = pc[3] >= rgba[0][3];
- break;
- case PIPE_FUNC_EQUAL:
- k[0] = pc[0] == rgba[0][0];
- k[1] = pc[1] == rgba[0][1];
- k[2] = pc[2] == rgba[0][2];
- k[3] = pc[3] == rgba[0][3];
- break;
- case PIPE_FUNC_NOTEQUAL:
- k[0] = pc[0] != rgba[0][0];
- k[1] = pc[1] != rgba[0][1];
- k[2] = pc[2] != rgba[0][2];
- k[3] = pc[3] != rgba[0][3];
- break;
- case PIPE_FUNC_ALWAYS:
- k[0] = k[1] = k[2] = k[3] = 1;
- break;
- case PIPE_FUNC_NEVER:
- k[0] = k[1] = k[2] = k[3] = 0;
- break;
- default:
- k[0] = k[1] = k[2] = k[3] = 0;
- assert(0);
- break;
+ for (v = 0; v < (is_gather ? TGSI_NUM_CHANNELS : 1); v++) {
+ /* compare four texcoords vs. four texture samples */
+ switch (sampler->compare_func) {
+ case PIPE_FUNC_LESS:
+ k[v][0] = pc[0] < rgba[v][0];
+ k[v][1] = pc[1] < rgba[v][1];
+ k[v][2] = pc[2] < rgba[v][2];
+ k[v][3] = pc[3] < rgba[v][3];
+ break;
+ case PIPE_FUNC_LEQUAL:
+ k[v][0] = pc[0] <= rgba[v][0];
+ k[v][1] = pc[1] <= rgba[v][1];
+ k[v][2] = pc[2] <= rgba[v][2];
+ k[v][3] = pc[3] <= rgba[v][3];
+ break;
+ case PIPE_FUNC_GREATER:
+ k[v][0] = pc[0] > rgba[v][0];
+ k[v][1] = pc[1] > rgba[v][1];
+ k[v][2] = pc[2] > rgba[v][2];
+ k[v][3] = pc[3] > rgba[v][3];
+ break;
+ case PIPE_FUNC_GEQUAL:
+ k[v][0] = pc[0] >= rgba[v][0];
+ k[v][1] = pc[1] >= rgba[v][1];
+ k[v][2] = pc[2] >= rgba[v][2];
+ k[v][3] = pc[3] >= rgba[v][3];
+ break;
+ case PIPE_FUNC_EQUAL:
+ k[v][0] = pc[0] == rgba[v][0];
+ k[v][1] = pc[1] == rgba[v][1];
+ k[v][2] = pc[2] == rgba[v][2];
+ k[v][3] = pc[3] == rgba[v][3];
+ break;
+ case PIPE_FUNC_NOTEQUAL:
+ k[v][0] = pc[0] != rgba[v][0];
+ k[v][1] = pc[1] != rgba[v][1];
+ k[v][2] = pc[2] != rgba[v][2];
+ k[v][3] = pc[3] != rgba[v][3];
+ break;
+ case PIPE_FUNC_ALWAYS:
+ k[v][0] = k[v][1] = k[v][2] = k[v][3] = 1;
+ break;
+ case PIPE_FUNC_NEVER:
+ k[v][0] = k[v][1] = k[v][2] = k[v][3] = 0;
+ break;
+ default:
+ k[v][0] = k[v][1] = k[v][2] = k[v][3] = 0;
+ assert(0);
+ break;
+ }
}
- for (j = 0; j < TGSI_QUAD_SIZE; j++) {
- rgba[0][j] = k[j];
- rgba[1][j] = k[j];
- rgba[2][j] = k[j];
- rgba[3][j] = 1.0F;
+ if (is_gather) {
+ for (j = 0; j < TGSI_QUAD_SIZE; j++) {
+ for (v = 0; v < TGSI_NUM_CHANNELS; v++) {
+ rgba[v][j] = k[v][j];
+ }
+ }
+ } else {
+ for (j = 0; j < TGSI_QUAD_SIZE; j++) {
+ rgba[0][j] = k[0][j];
+ rgba[1][j] = k[0][j];
+ rgba[2][j] = k[0][j];
+ rgba[3][j] = 1.0F;
+ }
}
}
-
static void
do_swizzling(const struct pipe_sampler_view *sview,
float in[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE],
@@ -2679,9 +2802,9 @@ any_swizzle(const struct pipe_sampler_view *view)
static img_filter_func
get_img_filter(const struct sp_sampler_view *sp_sview,
const struct pipe_sampler_state *sampler,
- unsigned filter)
+ unsigned filter, bool gather)
{
- switch (sp_sview->base.texture->target) {
+ switch (sp_sview->base.target) {
case PIPE_BUFFER:
case PIPE_TEXTURE_1D:
if (filter == PIPE_TEX_FILTER_NEAREST)
@@ -2699,7 +2822,7 @@ get_img_filter(const struct sp_sampler_view *sp_sview,
case PIPE_TEXTURE_RECT:
/* Try for fast path:
*/
- if (sp_sview->pot2d &&
+ if (!gather && sp_sview->pot2d &&
sampler->wrap_s == sampler->wrap_t &&
sampler->normalized_coords)
{
@@ -2769,35 +2892,38 @@ sample_mip(struct sp_sampler_view *sp_sview,
const float p[TGSI_QUAD_SIZE],
const float c0[TGSI_QUAD_SIZE],
const float lod[TGSI_QUAD_SIZE],
- enum tgsi_sampler_control control,
+ const struct filter_args *filt_args,
float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE])
{
mip_filter_func mip_filter;
img_filter_func min_img_filter = NULL;
img_filter_func mag_img_filter = NULL;
- if (sp_sview->pot2d & sp_samp->min_mag_equal_repeat_linear) {
+ if (filt_args->control == tgsi_sampler_gather) {
+ mip_filter = mip_filter_nearest;
+ min_img_filter = get_img_filter(sp_sview, &sp_samp->base, PIPE_TEX_FILTER_LINEAR, true);
+ } else if (sp_sview->pot2d & sp_samp->min_mag_equal_repeat_linear) {
mip_filter = mip_filter_linear_2d_linear_repeat_POT;
}
else {
mip_filter = sp_samp->mip_filter;
- min_img_filter = get_img_filter(sp_sview, &sp_samp->base, sp_samp->min_img_filter);
+ min_img_filter = get_img_filter(sp_sview, &sp_samp->base, sp_samp->min_img_filter, false);
if (sp_samp->min_mag_equal) {
mag_img_filter = min_img_filter;
}
else {
- mag_img_filter = get_img_filter(sp_sview, &sp_samp->base, sp_samp->base.mag_img_filter);
+ mag_img_filter = get_img_filter(sp_sview, &sp_samp->base, sp_samp->base.mag_img_filter, false);
}
}
mip_filter(sp_sview, sp_samp, min_img_filter, mag_img_filter,
- s, t, p, c0, lod, control, rgba);
+ s, t, p, c0, lod, filt_args, rgba);
if (sp_samp->base.compare_mode != PIPE_TEX_COMPARE_NONE) {
- sample_compare(sp_sview, sp_samp, s, t, p, c0, lod, control, rgba);
+ sample_compare(sp_sview, sp_samp, s, t, p, c0, lod, filt_args->control, rgba);
}
- if (sp_sview->need_swizzle) {
+ if (sp_sview->need_swizzle && filt_args->control != tgsi_sampler_gather) {
float rgba_temp[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE];
memcpy(rgba_temp, rgba, sizeof(rgba_temp));
do_swizzling(&sp_sview->base, rgba_temp, rgba);
@@ -2818,7 +2944,7 @@ sample_cube(struct sp_sampler_view *sp_sview,
const float p[TGSI_QUAD_SIZE],
const float c0[TGSI_QUAD_SIZE],
const float c1[TGSI_QUAD_SIZE],
- enum tgsi_sampler_control control,
+ const struct filter_args *filt_args,
float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE])
{
unsigned j;
@@ -2896,7 +3022,7 @@ sample_cube(struct sp_sampler_view *sp_sview,
}
}
- sample_mip(sp_sview, sp_samp, ssss, tttt, pppp, c0, c1, control, rgba);
+ sample_mip(sp_sview, sp_samp, ssss, tttt, pppp, c0, c1, filt_args, rgba);
}
@@ -2907,7 +3033,7 @@ sp_get_dims(struct sp_sampler_view *sp_sview, int level,
const struct pipe_sampler_view *view = &sp_sview->base;
const struct pipe_resource *texture = view->texture;
- if (texture->target == PIPE_BUFFER) {
+ if (view->target == PIPE_BUFFER) {
dims[0] = (view->u.buf.last_element - view->u.buf.first_element) + 1;
/* the other values are undefined, but let's avoid potential valgrind
* warnings.
@@ -2924,7 +3050,7 @@ sp_get_dims(struct sp_sampler_view *sp_sview, int level,
dims[3] = view->u.tex.last_level - view->u.tex.first_level + 1;
dims[0] = u_minify(texture->width0, level);
- switch(texture->target) {
+ switch (view->target) {
case PIPE_TEXTURE_1D_ARRAY:
dims[1] = view->u.tex.last_layer - view->u.tex.first_layer + 1;
/* fallthrough */
@@ -2975,13 +3101,16 @@ sp_get_texels(struct sp_sampler_view *sp_sview,
addr.value = 0;
/* TODO write a better test for LOD */
- addr.bits.level = lod[0];
+ addr.bits.level = sp_sview->base.target == PIPE_BUFFER ? 0 :
+ CLAMP(lod[0] + sp_sview->base.u.tex.first_level,
+ sp_sview->base.u.tex.first_level,
+ sp_sview->base.u.tex.last_level);
width = u_minify(texture->width0, addr.bits.level);
height = u_minify(texture->height0, addr.bits.level);
depth = u_minify(texture->depth0, addr.bits.level);
- switch(texture->target) {
+ switch (sp_sview->base.target) {
case PIPE_BUFFER:
case PIPE_TEXTURE_1D:
for (j = 0; j < TGSI_QUAD_SIZE; j++) {
@@ -2995,7 +3124,8 @@ sp_get_texels(struct sp_sampler_view *sp_sview,
case PIPE_TEXTURE_1D_ARRAY:
for (j = 0; j < TGSI_QUAD_SIZE; j++) {
int x = CLAMP(v_i[j] + offset[0], 0, width - 1);
- int y = CLAMP(v_j[j], sp_sview->base.u.tex.first_layer, sp_sview->base.u.tex.last_layer);
+ int y = CLAMP(v_j[j], sp_sview->base.u.tex.first_layer,
+ sp_sview->base.u.tex.last_layer);
tx = get_texel_2d_no_border(sp_sview, addr, x, y);
for (c = 0; c < 4; c++) {
rgba[c][j] = tx[c];
@@ -3017,7 +3147,8 @@ sp_get_texels(struct sp_sampler_view *sp_sview,
for (j = 0; j < TGSI_QUAD_SIZE; j++) {
int x = CLAMP(v_i[j] + offset[0], 0, width - 1);
int y = CLAMP(v_j[j] + offset[1], 0, height - 1);
- int layer = CLAMP(v_k[j], sp_sview->base.u.tex.first_layer, sp_sview->base.u.tex.last_layer);
+ int layer = CLAMP(v_k[j], sp_sview->base.u.tex.first_layer,
+ sp_sview->base.u.tex.last_layer);
tx = get_texel_3d_no_border(sp_sview, addr, x, y, layer);
for (c = 0; c < 4; c++) {
rgba[c][j] = tx[c];
@@ -3140,7 +3271,7 @@ softpipe_get_lambda_func(const struct pipe_sampler_view *view, unsigned shader)
if (shader != PIPE_SHADER_FRAGMENT)
return compute_lambda_vert;
- switch (view->texture->target) {
+ switch (view->target) {
case PIPE_BUFFER:
case PIPE_TEXTURE_1D:
case PIPE_TEXTURE_1D_ARRAY:
@@ -3176,19 +3307,49 @@ softpipe_create_sampler_view(struct pipe_context *pipe,
pipe_resource_reference(&view->texture, resource);
view->context = pipe;
+#ifdef DEBUG
+ /*
+ * This is possibly too lenient, but the primary reason is just
+ * to catch state trackers which forget to initialize this, so
+ * it only catches clearly impossible view targets.
+ */
+ if (view->target != resource->target) {
+ if (view->target == PIPE_TEXTURE_1D)
+ assert(resource->target == PIPE_TEXTURE_1D_ARRAY);
+ else if (view->target == PIPE_TEXTURE_1D_ARRAY)
+ assert(resource->target == PIPE_TEXTURE_1D);
+ else if (view->target == PIPE_TEXTURE_2D)
+ assert(resource->target == PIPE_TEXTURE_2D_ARRAY ||
+ resource->target == PIPE_TEXTURE_CUBE ||
+ resource->target == PIPE_TEXTURE_CUBE_ARRAY);
+ else if (view->target == PIPE_TEXTURE_2D_ARRAY)
+ assert(resource->target == PIPE_TEXTURE_2D ||
+ resource->target == PIPE_TEXTURE_CUBE ||
+ resource->target == PIPE_TEXTURE_CUBE_ARRAY);
+ else if (view->target == PIPE_TEXTURE_CUBE)
+ assert(resource->target == PIPE_TEXTURE_CUBE_ARRAY ||
+ resource->target == PIPE_TEXTURE_2D_ARRAY);
+ else if (view->target == PIPE_TEXTURE_CUBE_ARRAY)
+ assert(resource->target == PIPE_TEXTURE_CUBE ||
+ resource->target == PIPE_TEXTURE_2D_ARRAY);
+ else
+ assert(0);
+ }
+#endif
+
if (any_swizzle(view)) {
sview->need_swizzle = TRUE;
}
- if (resource->target == PIPE_TEXTURE_CUBE ||
- resource->target == PIPE_TEXTURE_CUBE_ARRAY)
+ if (view->target == PIPE_TEXTURE_CUBE ||
+ view->target == PIPE_TEXTURE_CUBE_ARRAY)
sview->get_samples = sample_cube;
else {
sview->get_samples = sample_mip;
}
sview->pot2d = spr->pot &&
- (resource->target == PIPE_TEXTURE_2D ||
- resource->target == PIPE_TEXTURE_RECT);
+ (view->target == PIPE_TEXTURE_2D ||
+ view->target == PIPE_TEXTURE_RECT);
sview->xpot = util_logbase2( resource->width0 );
sview->ypot = util_logbase2( resource->height0 );
@@ -3230,7 +3391,7 @@ sp_tgsi_get_samples(struct tgsi_sampler *tgsi_sampler,
float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE])
{
struct sp_tgsi_sampler *sp_samp = (struct sp_tgsi_sampler *)tgsi_sampler;
-
+ struct filter_args filt_args;
assert(sview_index < PIPE_MAX_SHADER_SAMPLER_VIEWS);
assert(sampler_index < PIPE_MAX_SAMPLERS);
assert(sp_samp->sp_sampler[sampler_index]);
@@ -3244,9 +3405,12 @@ sp_tgsi_get_samples(struct tgsi_sampler *tgsi_sampler,
}
return;
}
+
+ filt_args.control = control;
+ filt_args.offset = offset;
sp_samp->sp_sview[sview_index].get_samples(&sp_samp->sp_sview[sview_index],
sp_samp->sp_sampler[sampler_index],
- s, t, p, c0, lod, control, rgba);
+ s, t, p, c0, lod, &filt_args, rgba);
}
diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.h b/src/gallium/drivers/softpipe/sp_tex_sample.h
index 00a97c5186b..7d1aafc4473 100644
--- a/src/gallium/drivers/softpipe/sp_tex_sample.h
+++ b/src/gallium/drivers/softpipe/sp_tex_sample.h
@@ -38,10 +38,12 @@ struct sp_sampler;
typedef void (*wrap_nearest_func)(float s,
unsigned size,
+ int offset,
int *icoord);
typedef void (*wrap_linear_func)(float s,
unsigned size,
+ int offset,
int *icoord0,
int *icoord1,
float *w);
@@ -51,15 +53,27 @@ typedef float (*compute_lambda_func)(const struct sp_sampler_view *sp_sview,
const float t[TGSI_QUAD_SIZE],
const float p[TGSI_QUAD_SIZE]);
+struct img_filter_args {
+ float s;
+ float t;
+ float p;
+ unsigned level;
+ unsigned face_id;
+ const int8_t *offset;
+ bool gather_only;
+ int gather_comp;
+};
+
typedef void (*img_filter_func)(struct sp_sampler_view *sp_sview,
struct sp_sampler *sp_samp,
- float s,
- float t,
- float p,
- unsigned level,
- unsigned face_id,
+ const struct img_filter_args *args,
float *rgba);
+struct filter_args {
+ enum tgsi_sampler_control control;
+ const int8_t *offset;
+};
+
typedef void (*mip_filter_func)(struct sp_sampler_view *sp_sview,
struct sp_sampler *sp_samp,
img_filter_func min_filter,
@@ -69,7 +83,7 @@ typedef void (*mip_filter_func)(struct sp_sampler_view *sp_sview,
const float p[TGSI_QUAD_SIZE],
const float c0[TGSI_QUAD_SIZE],
const float lod[TGSI_QUAD_SIZE],
- enum tgsi_sampler_control control,
+ const struct filter_args *args,
float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE]);
@@ -80,7 +94,7 @@ typedef void (*filter_func)(struct sp_sampler_view *sp_sview,
const float p[TGSI_QUAD_SIZE],
const float c0[TGSI_QUAD_SIZE],
const float lod[TGSI_QUAD_SIZE],
- enum tgsi_sampler_control control,
+ const struct filter_args *args,
float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE]);
diff --git a/src/gallium/drivers/softpipe/sp_tex_tile_cache.c b/src/gallium/drivers/softpipe/sp_tex_tile_cache.c
index ab8ba60849a..4a421a8f882 100644
--- a/src/gallium/drivers/softpipe/sp_tex_tile_cache.c
+++ b/src/gallium/drivers/softpipe/sp_tex_tile_cache.c
@@ -151,7 +151,7 @@ sp_tex_tile_cache_set_sampler_view(struct softpipe_tex_tile_cache *tc,
tc->entries[i].addr.bits.invalid = 1;
}
- tc->tex_face = -1; /* any invalid value here */
+ tc->tex_z = -1; /* any invalid value here */
}
}
@@ -172,7 +172,7 @@ sp_flush_tex_tile_cache(struct softpipe_tex_tile_cache *tc)
for (pos = 0; pos < Elements(tc->entries); pos++) {
tc->entries[pos].addr.bits.invalid = 1;
}
- tc->tex_face = -1;
+ tc->tex_z = -1;
}
}
@@ -190,8 +190,7 @@ tex_cache_pos( union tex_tile_address addr )
{
uint entry = (addr.bits.x +
addr.bits.y * 9 +
- addr.bits.z * 3 +
- addr.bits.face +
+ addr.bits.z +
addr.bits.level * 7);
return entry % NUM_TEX_TILE_ENTRIES;
@@ -226,7 +225,6 @@ sp_find_cached_tile_tex(struct softpipe_tex_tile_cache *tc,
/* check if we need to get a new transfer */
if (!tc->tex_trans ||
- tc->tex_face != addr.bits.face ||
tc->tex_level != addr.bits.level ||
tc->tex_z != addr.bits.z) {
/* get new transfer (view into texture) */
@@ -245,7 +243,7 @@ sp_find_cached_tile_tex(struct softpipe_tex_tile_cache *tc,
}
else {
height = u_minify(tc->texture->height0, addr.bits.level);
- layer = addr.bits.face + addr.bits.z;
+ layer = addr.bits.z;
}
tc->tex_trans_map =
@@ -255,7 +253,6 @@ sp_find_cached_tile_tex(struct softpipe_tex_tile_cache *tc,
PIPE_TRANSFER_READ | PIPE_TRANSFER_UNSYNCHRONIZED,
0, 0, width, height, &tc->tex_trans);
- tc->tex_face = addr.bits.face;
tc->tex_level = addr.bits.level;
tc->tex_z = addr.bits.z;
}
diff --git a/src/gallium/drivers/softpipe/sp_tex_tile_cache.h b/src/gallium/drivers/softpipe/sp_tex_tile_cache.h
index 4eb42460552..2233effc439 100644
--- a/src/gallium/drivers/softpipe/sp_tex_tile_cache.h
+++ b/src/gallium/drivers/softpipe/sp_tex_tile_cache.h
@@ -55,7 +55,6 @@ union tex_tile_address {
unsigned x:TEX_ADDR_BITS; /* 16K / TILE_SIZE */
unsigned y:TEX_ADDR_BITS; /* 16K / TILE_SIZE */
unsigned z:TEX_Z_BITS; /* 16K -- z not tiled */
- unsigned face:3;
unsigned level:4;
unsigned invalid:1;
} bits;
@@ -94,7 +93,7 @@ struct softpipe_tex_tile_cache
struct pipe_transfer *tex_trans;
void *tex_trans_map;
- int tex_face, tex_level, tex_z;
+ int tex_level, tex_z;
unsigned swizzle_r;
unsigned swizzle_g;
@@ -141,7 +140,6 @@ tex_tile_address( unsigned x,
addr.bits.x = x / TEX_TILE_SIZE;
addr.bits.y = y / TEX_TILE_SIZE;
addr.bits.z = z;
- addr.bits.face = face;
addr.bits.level = level;
return addr;
diff --git a/src/gallium/drivers/svga/svga_screen.c b/src/gallium/drivers/svga/svga_screen.c
index b75f0386449..56e486786df 100644
--- a/src/gallium/drivers/svga/svga_screen.c
+++ b/src/gallium/drivers/svga/svga_screen.c
@@ -308,6 +308,7 @@ svga_get_param(struct pipe_screen *screen, enum pipe_cap param)
return 1;
case PIPE_CAP_UMA:
case PIPE_CAP_RESOURCE_FROM_USER_MEMORY:
+ case PIPE_CAP_DEVICE_RESET_STATUS_QUERY:
return 0;
}
@@ -376,6 +377,7 @@ static int svga_get_shader_param(struct pipe_screen *screen, unsigned shader, en
case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED:
case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED:
case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
+ case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE:
return 0;
}
/* If we get here, we failed to handle a cap above */
@@ -433,6 +435,7 @@ static int svga_get_shader_param(struct pipe_screen *screen, unsigned shader, en
case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED:
case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED:
case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
+ case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE:
return 0;
}
/* If we get here, we failed to handle a cap above */
diff --git a/src/gallium/drivers/svga/svga_tgsi_insn.c b/src/gallium/drivers/svga/svga_tgsi_insn.c
index 7a12b52e2dd..bac956066a5 100644
--- a/src/gallium/drivers/svga/svga_tgsi_insn.c
+++ b/src/gallium/drivers/svga/svga_tgsi_insn.c
@@ -1900,7 +1900,7 @@ emit_tex(struct svga_shader_emitter *emit,
emit->key.fkey.tex[unit].swizzle_b != PIPE_SWIZZLE_BLUE ||
emit->key.fkey.tex[unit].swizzle_a != PIPE_SWIZZLE_ALPHA);
- boolean saturate = insn->Instruction.Saturate != TGSI_SAT_NONE;
+ boolean saturate = insn->Instruction.Saturate;
/* If doing compare processing or tex swizzle or saturation, we need to put
* the fetched color into a temporary so it can be used as a source later on.
diff --git a/src/gallium/drivers/trace/tr_context.c b/src/gallium/drivers/trace/tr_context.c
index 0b56517e696..0013c963e7a 100644
--- a/src/gallium/drivers/trace/tr_context.c
+++ b/src/gallium/drivers/trace/tr_context.c
@@ -553,6 +553,8 @@ trace_context_delete_depth_stencil_alpha_state(struct pipe_context *_pipe,
TRACE_SHADER_STATE(fs)
TRACE_SHADER_STATE(vs)
TRACE_SHADER_STATE(gs)
+TRACE_SHADER_STATE(tcs)
+TRACE_SHADER_STATE(tes)
#undef TRACE_SHADER_STATE
@@ -1508,6 +1510,23 @@ static void trace_context_memory_barrier(struct pipe_context *_context,
}
+static void trace_context_set_tess_state(struct pipe_context *_context,
+ const float default_outer_level[4],
+ const float default_inner_level[2])
+{
+ struct trace_context *tr_context = trace_context(_context);
+ struct pipe_context *context = tr_context->pipe;
+
+ trace_dump_call_begin("pipe_context", "set_tess_state");
+ trace_dump_arg(ptr, context);
+ trace_dump_arg_array(float, default_outer_level, 4);
+ trace_dump_arg_array(float, default_inner_level, 2);
+ trace_dump_call_end();
+
+ context->set_tess_state(context, default_outer_level, default_inner_level);
+}
+
+
static const struct debug_named_value rbug_blocker_flags[] = {
{"before", 1, NULL},
{"after", 2, NULL},
@@ -1566,6 +1585,12 @@ trace_context_create(struct trace_screen *tr_scr,
TR_CTX_INIT(create_gs_state);
TR_CTX_INIT(bind_gs_state);
TR_CTX_INIT(delete_gs_state);
+ TR_CTX_INIT(create_tcs_state);
+ TR_CTX_INIT(bind_tcs_state);
+ TR_CTX_INIT(delete_tcs_state);
+ TR_CTX_INIT(create_tes_state);
+ TR_CTX_INIT(bind_tes_state);
+ TR_CTX_INIT(delete_tes_state);
TR_CTX_INIT(create_vertex_elements_state);
TR_CTX_INIT(bind_vertex_elements_state);
TR_CTX_INIT(delete_vertex_elements_state);
@@ -1597,6 +1622,7 @@ trace_context_create(struct trace_screen *tr_scr,
TR_CTX_INIT(flush);
TR_CTX_INIT(texture_barrier);
TR_CTX_INIT(memory_barrier);
+ TR_CTX_INIT(set_tess_state);
TR_CTX_INIT(transfer_map);
TR_CTX_INIT(transfer_unmap);
diff --git a/src/gallium/drivers/trace/tr_dump_state.c b/src/gallium/drivers/trace/tr_dump_state.c
index 71273380434..9bf4a722d80 100644
--- a/src/gallium/drivers/trace/tr_dump_state.c
+++ b/src/gallium/drivers/trace/tr_dump_state.c
@@ -709,6 +709,8 @@ void trace_dump_draw_info(const struct pipe_draw_info *state)
trace_dump_member(uint, state, start_instance);
trace_dump_member(uint, state, instance_count);
+ trace_dump_member(uint, state, vertices_per_patch);
+
trace_dump_member(int, state, index_bias);
trace_dump_member(uint, state, min_index);
trace_dump_member(uint, state, max_index);
diff --git a/src/gallium/drivers/trace/tr_public.h b/src/gallium/drivers/trace/tr_public.h
index aee4937dd4f..b03133f8d97 100644
--- a/src/gallium/drivers/trace/tr_public.h
+++ b/src/gallium/drivers/trace/tr_public.h
@@ -28,6 +28,8 @@
#ifndef TR_PUBLIC_H
#define TR_PUBLIC_H
+#include "pipe/p_compiler.h"
+
#ifdef __cplusplus
extern "C" {
#endif
diff --git a/src/gallium/drivers/vc4/kernel/Makefile.am b/src/gallium/drivers/vc4/Android.mk
index 1ae5f1c2e83..f42a152aa8c 100644
--- a/src/gallium/drivers/vc4/kernel/Makefile.am
+++ b/src/gallium/drivers/vc4/Android.mk
@@ -1,4 +1,4 @@
-# Copyright © 2014 Broadcom
+# Copyright (C) 2014 Emil Velikov <[email protected]>
#
# Permission is hereby granted, free of charge, to any person obtaining a
# copy of this software and associated documentation files (the "Software"),
@@ -7,34 +7,31 @@
# and/or sell copies of the Software, and to permit persons to whom the
# Software is furnished to do so, subject to the following conditions:
#
-# The above copyright notice and this permission notice (including the next
-# paragraph) shall be included in all copies or substantial portions of the
-# Software.
+# The above copyright notice and this permission notice shall be included
+# in all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-# IN THE SOFTWARE.
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+# DEALINGS IN THE SOFTWARE.
-include Makefile.sources
-include $(top_srcdir)/src/gallium/Automake.inc
+LOCAL_PATH := $(call my-dir)
-if USE_VC4_SIMULATOR
-SIM_CFLAGS = -DUSE_VC4_SIMULATOR=1
-endif
+# get C_SOURCES
+include $(LOCAL_PATH)/Makefile.sources
-AM_CFLAGS = \
- $(LIBDRM_CFLAGS) \
- $(GALLIUM_DRIVER_CFLAGS) \
- $(SIM_CFLAGS) \
- -I$(top_srcdir)/src/mesa/ \
- -I$(srcdir)/../ \
- $()
+include $(CLEAR_VARS)
-noinst_LTLIBRARIES = libvc4_kernel.la
+LOCAL_SRC_FILES := \
+ $(C_SOURCES)
-libvc4_kernel_la_SOURCES = $(C_SOURCES)
-libvc4_kernel_la_LDFLAGS = $(SIM_LDFLAGS)
+LOCAL_SHARED_LIBRARIES := libdrm
+# We need libmesa_glsl to get NIR's generated include directories.
+LOCAL_STATIC_LIBRARIES := libmesa_glsl
+LOCAL_MODULE := libmesa_pipe_vc4
+
+include $(GALLIUM_COMMON_MK)
+include $(BUILD_STATIC_LIBRARY)
diff --git a/src/gallium/drivers/vc4/Makefile.am b/src/gallium/drivers/vc4/Makefile.am
index 3fc591f10c1..3f62ce21a9f 100644
--- a/src/gallium/drivers/vc4/Makefile.am
+++ b/src/gallium/drivers/vc4/Makefile.am
@@ -19,7 +19,7 @@
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
# IN THE SOFTWARE.
-SUBDIRS = kernel
+AUTOMAKE_OPTIONS = subdir-objects
include Makefile.sources
include $(top_srcdir)/src/gallium/Automake.inc
@@ -39,5 +39,5 @@ AM_CFLAGS = \
noinst_LTLIBRARIES = libvc4.la
libvc4_la_SOURCES = $(C_SOURCES)
-libvc4_la_LIBADD = $(SIM_LIB) kernel/libvc4_kernel.la
+libvc4_la_LIBADD = $(SIM_LIB)
libvc4_la_LDFLAGS = $(SIM_LDFLAGS)
diff --git a/src/gallium/drivers/vc4/Makefile.sources b/src/gallium/drivers/vc4/Makefile.sources
index 49474df3548..1eb029e67e7 100644
--- a/src/gallium/drivers/vc4/Makefile.sources
+++ b/src/gallium/drivers/vc4/Makefile.sources
@@ -1,4 +1,10 @@
C_SOURCES := \
+ kernel/vc4_drv.h \
+ kernel/vc4_gem.c \
+ kernel/vc4_packet.h \
+ kernel/vc4_render_cl.c \
+ kernel/vc4_validate.c \
+ kernel/vc4_validate_shaders.c \
vc4_blit.c \
vc4_bufmgr.c \
vc4_bufmgr.h \
@@ -20,7 +26,6 @@ C_SOURCES := \
vc4_opt_dead_code.c \
vc4_opt_small_immediates.c \
vc4_opt_vpm_writes.c \
- vc4_packet.h \
vc4_program.c \
vc4_qir.c \
vc4_qir_lower_uniforms.c \
diff --git a/src/gallium/drivers/vc4/kernel/Makefile.sources b/src/gallium/drivers/vc4/kernel/Makefile.sources
deleted file mode 100644
index 7d17a898ebf..00000000000
--- a/src/gallium/drivers/vc4/kernel/Makefile.sources
+++ /dev/null
@@ -1,6 +0,0 @@
-C_SOURCES := \
- vc4_drv.h \
- vc4_gem.c \
- vc4_validate.c \
- vc4_validate_shaders.c \
- $()
diff --git a/src/gallium/drivers/vc4/kernel/vc4_drv.h b/src/gallium/drivers/vc4/kernel/vc4_drv.h
index 325f944bf25..1fd8aa9fb28 100644
--- a/src/gallium/drivers/vc4/kernel/vc4_drv.h
+++ b/src/gallium/drivers/vc4/kernel/vc4_drv.h
@@ -28,8 +28,6 @@
enum vc4_bo_mode {
VC4_MODE_UNDECIDED,
- VC4_MODE_TILE_ALLOC,
- VC4_MODE_TSDA,
VC4_MODE_RENDER,
VC4_MODE_SHADER,
};
@@ -52,6 +50,11 @@ struct vc4_exec_info {
struct vc4_bo_exec_state *bo;
uint32_t bo_count;
+ /* List of other BOs used in the job that need to be released
+ * once the job is complete.
+ */
+ struct list_head unref_list;
+
/* Current unvalidated indices into @bo loaded by the non-hardware
* VC4_PACKET_GEM_HANDLES.
*/
@@ -83,14 +86,11 @@ struct vc4_exec_info {
uint32_t shader_state_count;
bool found_tile_binning_mode_config_packet;
- bool found_tile_rendering_mode_config_packet;
bool found_start_tile_binning_packet;
bool found_increment_semaphore_packet;
- bool found_wait_on_semaphore_packet;
uint8_t bin_tiles_x, bin_tiles_y;
- uint32_t fb_width, fb_height;
- uint32_t tile_alloc_init_block_size;
- struct drm_gem_cma_object *tile_alloc_bo;
+ struct drm_gem_cma_object *tile_bo;
+ uint32_t tile_alloc_offset;
/**
* Computed addresses pointing into exec_bo where we start the
@@ -157,13 +157,10 @@ struct vc4_validated_shader_info
/* vc4_validate.c */
int
-vc4_validate_cl(struct drm_device *dev,
- void *validated,
- void *unvalidated,
- uint32_t len,
- bool is_bin,
- bool has_bin,
- struct vc4_exec_info *exec);
+vc4_validate_bin_cl(struct drm_device *dev,
+ void *validated,
+ void *unvalidated,
+ struct vc4_exec_info *exec);
int
vc4_validate_shader_recs(struct drm_device *dev, struct vc4_exec_info *exec);
@@ -171,4 +168,16 @@ vc4_validate_shader_recs(struct drm_device *dev, struct vc4_exec_info *exec);
struct vc4_validated_shader_info *
vc4_validate_shader(struct drm_gem_cma_object *shader_obj);
+bool vc4_use_bo(struct vc4_exec_info *exec,
+ uint32_t hindex,
+ enum vc4_bo_mode mode,
+ struct drm_gem_cma_object **obj);
+
+int vc4_get_rcl(struct drm_device *dev, struct vc4_exec_info *exec);
+
+bool vc4_check_tex_size(struct vc4_exec_info *exec,
+ struct drm_gem_cma_object *fbo,
+ uint32_t offset, uint8_t tiling_format,
+ uint32_t width, uint32_t height, uint8_t cpp);
+
#endif /* VC4_DRV_H */
diff --git a/src/gallium/drivers/vc4/kernel/vc4_gem.c b/src/gallium/drivers/vc4/kernel/vc4_gem.c
index ac29ab35dbc..e4b7fea5968 100644
--- a/src/gallium/drivers/vc4/kernel/vc4_gem.c
+++ b/src/gallium/drivers/vc4/kernel/vc4_gem.c
@@ -25,24 +25,26 @@
#include "vc4_drv.h"
-int
-vc4_cl_validate(struct drm_device *dev, struct vc4_exec_info *exec)
+/*
+ * Copies in the user's binning command list and generates the validated bin
+ * CL, along with associated data (shader records, uniforms).
+ */
+static int
+vc4_get_bcl(struct drm_device *dev, struct vc4_exec_info *exec)
{
struct drm_vc4_submit_cl *args = exec->args;
void *temp = NULL;
- void *bin, *render;
+ void *bin;
int ret = 0;
uint32_t bin_offset = 0;
- uint32_t render_offset = bin_offset + args->bin_cl_size;
- uint32_t shader_rec_offset = roundup(render_offset +
- args->render_cl_size, 16);
+ uint32_t shader_rec_offset = roundup(bin_offset + args->bin_cl_size,
+ 16);
uint32_t uniforms_offset = shader_rec_offset + args->shader_rec_size;
uint32_t exec_size = uniforms_offset + args->uniforms_size;
uint32_t temp_size = exec_size + (sizeof(struct vc4_shader_state) *
args->shader_rec_count);
- if (shader_rec_offset < render_offset ||
- uniforms_offset < shader_rec_offset ||
+ if (uniforms_offset < shader_rec_offset ||
exec_size < uniforms_offset ||
args->shader_rec_count >= (UINT_MAX /
sizeof(struct vc4_shader_state)) ||
@@ -66,7 +68,6 @@ vc4_cl_validate(struct drm_device *dev, struct vc4_exec_info *exec)
goto fail;
}
bin = temp + bin_offset;
- render = temp + render_offset;
exec->shader_rec_u = temp + shader_rec_offset;
exec->uniforms_u = temp + uniforms_offset;
exec->shader_state = temp + exec_size;
@@ -80,14 +81,6 @@ vc4_cl_validate(struct drm_device *dev, struct vc4_exec_info *exec)
goto fail;
}
- ret = copy_from_user(render,
- (void __user *)(uintptr_t)args->render_cl,
- args->render_cl_size);
- if (ret) {
- DRM_ERROR("Failed to copy in render cl\n");
- goto fail;
- }
-
ret = copy_from_user(exec->shader_rec_u,
(void __user *)(uintptr_t)args->shader_rec,
args->shader_rec_size);
@@ -114,8 +107,10 @@ vc4_cl_validate(struct drm_device *dev, struct vc4_exec_info *exec)
}
#endif
+ list_addtail(&to_vc4_bo(&exec->exec_bo->base)->unref_head,
+ &exec->unref_list);
+
exec->ct0ca = exec->exec_bo->paddr + bin_offset;
- exec->ct1ca = exec->exec_bo->paddr + render_offset;
exec->shader_rec_v = exec->exec_bo->vaddr + shader_rec_offset;
exec->shader_rec_p = exec->exec_bo->paddr + shader_rec_offset;
@@ -125,23 +120,10 @@ vc4_cl_validate(struct drm_device *dev, struct vc4_exec_info *exec)
exec->uniforms_p = exec->exec_bo->paddr + uniforms_offset;
exec->uniforms_size = args->uniforms_size;
- ret = vc4_validate_cl(dev,
- exec->exec_bo->vaddr + bin_offset,
- bin,
- args->bin_cl_size,
- true,
- args->bin_cl_size != 0,
- exec);
- if (ret)
- goto fail;
-
- ret = vc4_validate_cl(dev,
- exec->exec_bo->vaddr + render_offset,
- render,
- args->render_cl_size,
- false,
- args->bin_cl_size != 0,
- exec);
+ ret = vc4_validate_bin_cl(dev,
+ exec->exec_bo->vaddr + bin_offset,
+ bin,
+ exec);
if (ret)
goto fail;
@@ -152,4 +134,25 @@ fail:
return ret;
}
+int
+vc4_cl_validate(struct drm_device *dev, struct vc4_exec_info *exec)
+{
+ int ret = 0;
+
+ if (exec->args->bin_cl_size != 0) {
+ ret = vc4_get_bcl(dev, exec);
+ if (ret)
+ goto fail;
+ } else {
+ exec->ct0ca = exec->ct0ea = 0;
+ }
+
+ ret = vc4_get_rcl(dev, exec);
+ if (ret)
+ goto fail;
+
+fail:
+ return ret;
+}
+
#endif /* USE_VC4_SIMULATOR */
diff --git a/src/gallium/drivers/vc4/vc4_packet.h b/src/gallium/drivers/vc4/kernel/vc4_packet.h
index 181f2e01dc9..88cfc0fa9f0 100644
--- a/src/gallium/drivers/vc4/vc4_packet.h
+++ b/src/gallium/drivers/vc4/kernel/vc4_packet.h
@@ -81,6 +81,38 @@ enum vc4_packet {
VC4_PACKET_GEM_HANDLES = 254,
} __attribute__ ((__packed__));
+#define VC4_PACKET_HALT_SIZE 1
+#define VC4_PACKET_NOP_SIZE 1
+#define VC4_PACKET_FLUSH_SIZE 1
+#define VC4_PACKET_FLUSH_ALL_SIZE 1
+#define VC4_PACKET_START_TILE_BINNING_SIZE 1
+#define VC4_PACKET_INCREMENT_SEMAPHORE_SIZE 1
+#define VC4_PACKET_WAIT_ON_SEMAPHORE_SIZE 1
+#define VC4_PACKET_BRANCH_TO_SUB_LIST_SIZE 5
+#define VC4_PACKET_STORE_MS_TILE_BUFFER_SIZE 1
+#define VC4_PACKET_STORE_MS_TILE_BUFFER_AND_EOF_SIZE 1
+#define VC4_PACKET_STORE_TILE_BUFFER_GENERAL_SIZE 7
+#define VC4_PACKET_LOAD_TILE_BUFFER_GENERAL_SIZE 7
+#define VC4_PACKET_GL_INDEXED_PRIMITIVE_SIZE 14
+#define VC4_PACKET_GL_ARRAY_PRIMITIVE_SIZE 10
+#define VC4_PACKET_PRIMITIVE_LIST_FORMAT_SIZE 2
+#define VC4_PACKET_GL_SHADER_STATE_SIZE 5
+#define VC4_PACKET_NV_SHADER_STATE_SIZE 5
+#define VC4_PACKET_CONFIGURATION_BITS_SIZE 4
+#define VC4_PACKET_FLAT_SHADE_FLAGS_SIZE 5
+#define VC4_PACKET_POINT_SIZE_SIZE 5
+#define VC4_PACKET_LINE_WIDTH_SIZE 5
+#define VC4_PACKET_RHT_X_BOUNDARY_SIZE 3
+#define VC4_PACKET_DEPTH_OFFSET_SIZE 5
+#define VC4_PACKET_CLIP_WINDOW_SIZE 9
+#define VC4_PACKET_VIEWPORT_OFFSET_SIZE 5
+#define VC4_PACKET_CLIPPER_XY_SCALING_SIZE 9
+#define VC4_PACKET_CLIPPER_Z_SCALING_SIZE 9
+#define VC4_PACKET_TILE_BINNING_MODE_CONFIG_SIZE 16
+#define VC4_PACKET_TILE_RENDERING_MODE_CONFIG_SIZE 11
+#define VC4_PACKET_CLEAR_COLORS_SIZE 14
+#define VC4_PACKET_TILE_COORDINATES_SIZE 3
+#define VC4_PACKET_GEM_HANDLES_SIZE 9
#define VC4_MASK(high, low) (((1 << ((high) - (low) + 1)) - 1) << (low))
/* Using the GNU statement expression extension */
@@ -117,18 +149,19 @@ enum vc4_packet {
/** @{
*
- * byte 1 of VC4_PACKET_STORE_TILE_BUFFER_GENERAL and
+ * byte 0-1 of VC4_PACKET_STORE_TILE_BUFFER_GENERAL and
* VC4_PACKET_LOAD_TILE_BUFFER_GENERAL
*/
-#define VC4_STORE_TILE_BUFFER_DISABLE_VG_MASK_CLEAR (1 << 7)
-#define VC4_STORE_TILE_BUFFER_DISABLE_ZS_CLEAR (1 << 6)
-#define VC4_STORE_TILE_BUFFER_DISABLE_COLOR_CLEAR (1 << 5)
-#define VC4_STORE_TILE_BUFFER_DISABLE_SWAP (1 << 4)
-
-#define VC4_LOADSTORE_TILE_BUFFER_RGBA8888 (0 << 0)
-#define VC4_LOADSTORE_TILE_BUFFER_BGR565_DITHER (1 << 0)
-#define VC4_LOADSTORE_TILE_BUFFER_BGR565 (2 << 0)
-#define VC4_LOADSTORE_TILE_BUFFER_MASK (3 << 0)
+#define VC4_STORE_TILE_BUFFER_DISABLE_VG_MASK_CLEAR (1 << 15)
+#define VC4_STORE_TILE_BUFFER_DISABLE_ZS_CLEAR (1 << 14)
+#define VC4_STORE_TILE_BUFFER_DISABLE_COLOR_CLEAR (1 << 13)
+#define VC4_STORE_TILE_BUFFER_DISABLE_SWAP (1 << 12)
+
+#define VC4_LOADSTORE_TILE_BUFFER_FORMAT_MASK VC4_MASK(9, 8)
+#define VC4_LOADSTORE_TILE_BUFFER_FORMAT_SHIFT 8
+#define VC4_LOADSTORE_TILE_BUFFER_RGBA8888 0
+#define VC4_LOADSTORE_TILE_BUFFER_BGR565_DITHER 1
+#define VC4_LOADSTORE_TILE_BUFFER_BGR565 2
/** @} */
/** @{
@@ -136,21 +169,24 @@ enum vc4_packet {
* byte 0 of VC4_PACKET_STORE_TILE_BUFFER_GENERAL and
* VC4_PACKET_LOAD_TILE_BUFFER_GENERAL
*/
+#define VC4_STORE_TILE_BUFFER_MODE_MASK VC4_MASK(7, 6)
+#define VC4_STORE_TILE_BUFFER_MODE_SHIFT 6
#define VC4_STORE_TILE_BUFFER_MODE_SAMPLE0 (0 << 6)
#define VC4_STORE_TILE_BUFFER_MODE_DECIMATE_X4 (1 << 6)
#define VC4_STORE_TILE_BUFFER_MODE_DECIMATE_X16 (2 << 6)
/** The values of the field are VC4_TILING_FORMAT_* */
-#define VC4_LOADSTORE_TILE_BUFFER_FORMAT_MASK (3 << 4)
-#define VC4_LOADSTORE_TILE_BUFFER_FORMAT_SHIFT 4
-
-
-#define VC4_LOADSTORE_TILE_BUFFER_NONE (0 << 0)
-#define VC4_LOADSTORE_TILE_BUFFER_COLOR (1 << 0)
-#define VC4_LOADSTORE_TILE_BUFFER_ZS (2 << 0)
-#define VC4_LOADSTORE_TILE_BUFFER_Z (3 << 0)
-#define VC4_LOADSTORE_TILE_BUFFER_VG_MASK (4 << 0)
-#define VC4_LOADSTORE_TILE_BUFFER_FULL (5 << 0)
+#define VC4_LOADSTORE_TILE_BUFFER_TILING_MASK VC4_MASK(5, 4)
+#define VC4_LOADSTORE_TILE_BUFFER_TILING_SHIFT 4
+
+#define VC4_LOADSTORE_TILE_BUFFER_BUFFER_MASK VC4_MASK(2, 0)
+#define VC4_LOADSTORE_TILE_BUFFER_BUFFER_SHIFT 0
+#define VC4_LOADSTORE_TILE_BUFFER_NONE 0
+#define VC4_LOADSTORE_TILE_BUFFER_COLOR 1
+#define VC4_LOADSTORE_TILE_BUFFER_ZS 2
+#define VC4_LOADSTORE_TILE_BUFFER_Z 3
+#define VC4_LOADSTORE_TILE_BUFFER_VG_MASK 4
+#define VC4_LOADSTORE_TILE_BUFFER_FULL 5
/** @} */
#define VC4_INDEX_BUFFER_U8 (0 << 4)
@@ -196,15 +232,19 @@ enum vc4_packet {
/** @{ bits in the last u8 of VC4_PACKET_TILE_BINNING_MODE_CONFIG */
#define VC4_BIN_CONFIG_DB_NON_MS (1 << 7)
-#define VC4_BIN_CONFIG_ALLOC_BLOCK_SIZE_32 (0 << 5)
-#define VC4_BIN_CONFIG_ALLOC_BLOCK_SIZE_64 (1 << 5)
-#define VC4_BIN_CONFIG_ALLOC_BLOCK_SIZE_128 (2 << 5)
-#define VC4_BIN_CONFIG_ALLOC_BLOCK_SIZE_256 (3 << 5)
+#define VC4_BIN_CONFIG_ALLOC_BLOCK_SIZE_MASK VC4_MASK(6, 5)
+#define VC4_BIN_CONFIG_ALLOC_BLOCK_SIZE_SHIFT 5
+#define VC4_BIN_CONFIG_ALLOC_BLOCK_SIZE_32 0
+#define VC4_BIN_CONFIG_ALLOC_BLOCK_SIZE_64 1
+#define VC4_BIN_CONFIG_ALLOC_BLOCK_SIZE_128 2
+#define VC4_BIN_CONFIG_ALLOC_BLOCK_SIZE_256 3
-#define VC4_BIN_CONFIG_ALLOC_INIT_BLOCK_SIZE_32 (0 << 3)
-#define VC4_BIN_CONFIG_ALLOC_INIT_BLOCK_SIZE_64 (1 << 3)
-#define VC4_BIN_CONFIG_ALLOC_INIT_BLOCK_SIZE_128 (2 << 3)
-#define VC4_BIN_CONFIG_ALLOC_INIT_BLOCK_SIZE_256 (3 << 3)
+#define VC4_BIN_CONFIG_ALLOC_INIT_BLOCK_SIZE_MASK VC4_MASK(4, 3)
+#define VC4_BIN_CONFIG_ALLOC_INIT_BLOCK_SIZE_SHIFT 3
+#define VC4_BIN_CONFIG_ALLOC_INIT_BLOCK_SIZE_32 0
+#define VC4_BIN_CONFIG_ALLOC_INIT_BLOCK_SIZE_64 1
+#define VC4_BIN_CONFIG_ALLOC_INIT_BLOCK_SIZE_128 2
+#define VC4_BIN_CONFIG_ALLOC_INIT_BLOCK_SIZE_256 3
#define VC4_BIN_CONFIG_AUTO_INIT_TSDA (1 << 2)
#define VC4_BIN_CONFIG_TILE_BUFFER_64BIT (1 << 1)
@@ -219,17 +259,18 @@ enum vc4_packet {
#define VC4_RENDER_CONFIG_ENABLE_VG_MASK (1 << 8)
/** The values of the field are VC4_TILING_FORMAT_* */
-#define VC4_RENDER_CONFIG_MEMORY_FORMAT_MASK (3 << 6)
+#define VC4_RENDER_CONFIG_MEMORY_FORMAT_MASK VC4_MASK(7, 6)
#define VC4_RENDER_CONFIG_MEMORY_FORMAT_SHIFT 6
#define VC4_RENDER_CONFIG_DECIMATE_MODE_1X (0 << 4)
#define VC4_RENDER_CONFIG_DECIMATE_MODE_4X (1 << 4)
#define VC4_RENDER_CONFIG_DECIMATE_MODE_16X (2 << 4)
-#define VC4_RENDER_CONFIG_FORMAT_BGR565_DITHERED (0 << 2)
-#define VC4_RENDER_CONFIG_FORMAT_RGBA8888 (1 << 2)
-#define VC4_RENDER_CONFIG_FORMAT_BGR565 (2 << 2)
-#define VC4_RENDER_CONFIG_FORMAT_MASK (3 << 2)
+#define VC4_RENDER_CONFIG_FORMAT_MASK VC4_MASK(3, 2)
+#define VC4_RENDER_CONFIG_FORMAT_SHIFT 2
+#define VC4_RENDER_CONFIG_FORMAT_BGR565_DITHERED 0
+#define VC4_RENDER_CONFIG_FORMAT_RGBA8888 1
+#define VC4_RENDER_CONFIG_FORMAT_BGR565 2
#define VC4_RENDER_CONFIG_TILE_BUFFER_64BIT (1 << 1)
#define VC4_RENDER_CONFIG_MS_MODE_4X (1 << 0)
diff --git a/src/gallium/drivers/vc4/kernel/vc4_render_cl.c b/src/gallium/drivers/vc4/kernel/vc4_render_cl.c
new file mode 100644
index 00000000000..e2d907ad91f
--- /dev/null
+++ b/src/gallium/drivers/vc4/kernel/vc4_render_cl.c
@@ -0,0 +1,447 @@
+/*
+ * Copyright © 2014-2015 Broadcom
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+/**
+ * DOC: Render command list generation
+ *
+ * In the VC4 driver, render command list generation is performed by the
+ * kernel instead of userspace. We do this because validating a
+ * user-submitted command list is hard to get right and has high CPU overhead,
+ * while the number of valid configurations for render command lists is
+ * actually fairly low.
+ */
+
+#include "vc4_drv.h"
+#include "vc4_packet.h"
+
+struct vc4_rcl_setup {
+ struct drm_gem_cma_object *color_read;
+ struct drm_gem_cma_object *color_ms_write;
+ struct drm_gem_cma_object *zs_read;
+ struct drm_gem_cma_object *zs_write;
+
+ struct drm_gem_cma_object *rcl;
+ u32 next_offset;
+};
+
+static inline void rcl_u8(struct vc4_rcl_setup *setup, u8 val)
+{
+ *(u8 *)(setup->rcl->vaddr + setup->next_offset) = val;
+ setup->next_offset += 1;
+}
+
+static inline void rcl_u16(struct vc4_rcl_setup *setup, u16 val)
+{
+ *(u16 *)(setup->rcl->vaddr + setup->next_offset) = val;
+ setup->next_offset += 2;
+}
+
+static inline void rcl_u32(struct vc4_rcl_setup *setup, u32 val)
+{
+ *(u32 *)(setup->rcl->vaddr + setup->next_offset) = val;
+ setup->next_offset += 4;
+}
+
+
+/*
+ * Emits a no-op STORE_TILE_BUFFER_GENERAL.
+ *
+ * If we emit a PACKET_TILE_COORDINATES, it must be followed by a store of
+ * some sort before another load is triggered.
+ */
+static void vc4_store_before_load(struct vc4_rcl_setup *setup)
+{
+ rcl_u8(setup, VC4_PACKET_STORE_TILE_BUFFER_GENERAL);
+ rcl_u16(setup,
+ VC4_SET_FIELD(VC4_LOADSTORE_TILE_BUFFER_NONE,
+ VC4_LOADSTORE_TILE_BUFFER_BUFFER) |
+ VC4_STORE_TILE_BUFFER_DISABLE_COLOR_CLEAR |
+ VC4_STORE_TILE_BUFFER_DISABLE_ZS_CLEAR |
+ VC4_STORE_TILE_BUFFER_DISABLE_VG_MASK_CLEAR);
+ rcl_u32(setup, 0); /* no address, since we're in None mode */
+}
+
+/*
+ * Emits a PACKET_TILE_COORDINATES if one isn't already pending.
+ *
+ * The tile coordinates packet triggers a pending load if there is one, are
+ * used for clipping during rendering, and determine where loads/stores happen
+ * relative to their base address.
+ */
+static void vc4_tile_coordinates(struct vc4_rcl_setup *setup,
+ uint32_t x, uint32_t y)
+{
+ rcl_u8(setup, VC4_PACKET_TILE_COORDINATES);
+ rcl_u8(setup, x);
+ rcl_u8(setup, y);
+}
+
+static void emit_tile(struct vc4_exec_info *exec,
+ struct vc4_rcl_setup *setup,
+ uint8_t x, uint8_t y, bool first, bool last)
+{
+ bool has_bin = exec->args->bin_cl_size != 0;
+
+ /* Note that the load doesn't actually occur until the
+ * tile coords packet is processed, and only one load
+ * may be outstanding at a time.
+ */
+ if (setup->color_read) {
+ rcl_u8(setup, VC4_PACKET_LOAD_TILE_BUFFER_GENERAL);
+ rcl_u16(setup, exec->args->color_read.bits);
+ rcl_u32(setup,
+ setup->color_read->paddr +
+ exec->args->color_read.offset);
+ }
+
+ if (setup->zs_read) {
+ if (setup->color_read) {
+ /* Exec previous load. */
+ vc4_tile_coordinates(setup, x, y);
+ vc4_store_before_load(setup);
+ }
+
+ rcl_u8(setup, VC4_PACKET_LOAD_TILE_BUFFER_GENERAL);
+ rcl_u16(setup, exec->args->zs_read.bits);
+ rcl_u32(setup,
+ setup->zs_read->paddr + exec->args->zs_read.offset);
+ }
+
+ /* Clipping depends on tile coordinates having been
+ * emitted, so we always need one here.
+ */
+ vc4_tile_coordinates(setup, x, y);
+
+ /* Wait for the binner before jumping to the first
+ * tile's lists.
+ */
+ if (first && has_bin)
+ rcl_u8(setup, VC4_PACKET_WAIT_ON_SEMAPHORE);
+
+ if (has_bin) {
+ rcl_u8(setup, VC4_PACKET_BRANCH_TO_SUB_LIST);
+ rcl_u32(setup, (exec->tile_bo->paddr +
+ exec->tile_alloc_offset +
+ (y * exec->bin_tiles_x + x) * 32));
+ }
+
+ if (setup->zs_write) {
+ rcl_u8(setup, VC4_PACKET_STORE_TILE_BUFFER_GENERAL);
+ rcl_u16(setup, exec->args->zs_write.bits |
+ (setup->color_ms_write ?
+ VC4_STORE_TILE_BUFFER_DISABLE_COLOR_CLEAR : 0));
+ rcl_u32(setup,
+ (setup->zs_write->paddr + exec->args->zs_write.offset) |
+ ((last && !setup->color_ms_write) ?
+ VC4_LOADSTORE_TILE_BUFFER_EOF : 0));
+ }
+
+ if (setup->color_ms_write) {
+ if (setup->zs_write) {
+ /* Reset after previous store */
+ vc4_tile_coordinates(setup, x, y);
+ }
+
+ if (last)
+ rcl_u8(setup, VC4_PACKET_STORE_MS_TILE_BUFFER_AND_EOF);
+ else
+ rcl_u8(setup, VC4_PACKET_STORE_MS_TILE_BUFFER);
+ }
+}
+
+static int vc4_create_rcl_bo(struct drm_device *dev, struct vc4_exec_info *exec,
+ struct vc4_rcl_setup *setup)
+{
+ bool has_bin = exec->args->bin_cl_size != 0;
+ uint8_t min_x_tile = exec->args->min_x_tile;
+ uint8_t min_y_tile = exec->args->min_y_tile;
+ uint8_t max_x_tile = exec->args->max_x_tile;
+ uint8_t max_y_tile = exec->args->max_y_tile;
+ uint8_t xtiles = max_x_tile - min_x_tile + 1;
+ uint8_t ytiles = max_y_tile - min_y_tile + 1;
+ uint8_t x, y;
+ uint32_t size, loop_body_size;
+
+ size = VC4_PACKET_TILE_RENDERING_MODE_CONFIG_SIZE;
+ loop_body_size = VC4_PACKET_TILE_COORDINATES_SIZE;
+
+ if (exec->args->flags & VC4_SUBMIT_CL_USE_CLEAR_COLOR) {
+ size += VC4_PACKET_CLEAR_COLORS_SIZE +
+ VC4_PACKET_TILE_COORDINATES_SIZE +
+ VC4_PACKET_STORE_TILE_BUFFER_GENERAL_SIZE;
+ }
+
+ if (setup->color_read) {
+ loop_body_size += (VC4_PACKET_LOAD_TILE_BUFFER_GENERAL_SIZE);
+ }
+ if (setup->zs_read) {
+ if (setup->color_read) {
+ loop_body_size += VC4_PACKET_TILE_COORDINATES_SIZE;
+ loop_body_size += VC4_PACKET_STORE_TILE_BUFFER_GENERAL_SIZE;
+ }
+ loop_body_size += VC4_PACKET_LOAD_TILE_BUFFER_GENERAL_SIZE;
+ }
+
+ if (has_bin) {
+ size += VC4_PACKET_WAIT_ON_SEMAPHORE_SIZE;
+ loop_body_size += VC4_PACKET_BRANCH_TO_SUB_LIST_SIZE;
+ }
+
+ if (setup->zs_write)
+ loop_body_size += VC4_PACKET_LOAD_TILE_BUFFER_GENERAL_SIZE;
+ if (setup->color_ms_write) {
+ if (setup->zs_write)
+ loop_body_size += VC4_PACKET_TILE_COORDINATES_SIZE;
+ loop_body_size += VC4_PACKET_STORE_MS_TILE_BUFFER_SIZE;
+ }
+ size += xtiles * ytiles * loop_body_size;
+
+ setup->rcl = drm_gem_cma_create(dev, size);
+ if (!setup->rcl)
+ return -ENOMEM;
+ list_addtail(&to_vc4_bo(&setup->rcl->base)->unref_head,
+ &exec->unref_list);
+
+ rcl_u8(setup, VC4_PACKET_TILE_RENDERING_MODE_CONFIG);
+ rcl_u32(setup,
+ (setup->color_ms_write ?
+ (setup->color_ms_write->paddr +
+ exec->args->color_ms_write.offset) :
+ 0));
+ rcl_u16(setup, exec->args->width);
+ rcl_u16(setup, exec->args->height);
+ rcl_u16(setup, exec->args->color_ms_write.bits);
+
+ /* The tile buffer gets cleared when the previous tile is stored. If
+ * the clear values changed between frames, then the tile buffer has
+ * stale clear values in it, so we have to do a store in None mode (no
+ * writes) so that we trigger the tile buffer clear.
+ */
+ if (exec->args->flags & VC4_SUBMIT_CL_USE_CLEAR_COLOR) {
+ rcl_u8(setup, VC4_PACKET_CLEAR_COLORS);
+ rcl_u32(setup, exec->args->clear_color[0]);
+ rcl_u32(setup, exec->args->clear_color[1]);
+ rcl_u32(setup, exec->args->clear_z);
+ rcl_u8(setup, exec->args->clear_s);
+
+ vc4_tile_coordinates(setup, 0, 0);
+
+ rcl_u8(setup, VC4_PACKET_STORE_TILE_BUFFER_GENERAL);
+ rcl_u16(setup, VC4_LOADSTORE_TILE_BUFFER_NONE);
+ rcl_u32(setup, 0); /* no address, since we're in None mode */
+ }
+
+ for (y = min_y_tile; y <= max_y_tile; y++) {
+ for (x = min_x_tile; x <= max_x_tile; x++) {
+ bool first = (x == min_x_tile && y == min_y_tile);
+ bool last = (x == max_x_tile && y == max_y_tile);
+ emit_tile(exec, setup, x, y, first, last);
+ }
+ }
+
+ BUG_ON(setup->next_offset != size);
+ exec->ct1ca = setup->rcl->paddr;
+ exec->ct1ea = setup->rcl->paddr + setup->next_offset;
+
+ return 0;
+}
+
+static int vc4_rcl_surface_setup(struct vc4_exec_info *exec,
+ struct drm_gem_cma_object **obj,
+ struct drm_vc4_submit_rcl_surface *surf)
+{
+ uint8_t tiling = VC4_GET_FIELD(surf->bits,
+ VC4_LOADSTORE_TILE_BUFFER_TILING);
+ uint8_t buffer = VC4_GET_FIELD(surf->bits,
+ VC4_LOADSTORE_TILE_BUFFER_BUFFER);
+ uint8_t format = VC4_GET_FIELD(surf->bits,
+ VC4_LOADSTORE_TILE_BUFFER_FORMAT);
+ int cpp;
+
+ if (surf->pad != 0) {
+ DRM_ERROR("Padding unset\n");
+ return -EINVAL;
+ }
+
+ if (surf->hindex == ~0)
+ return 0;
+
+ if (!vc4_use_bo(exec, surf->hindex, VC4_MODE_RENDER, obj))
+ return -EINVAL;
+
+ if (surf->bits & ~(VC4_LOADSTORE_TILE_BUFFER_TILING_MASK |
+ VC4_LOADSTORE_TILE_BUFFER_BUFFER_MASK |
+ VC4_LOADSTORE_TILE_BUFFER_FORMAT_MASK)) {
+ DRM_ERROR("Unknown bits in load/store: 0x%04x\n",
+ surf->bits);
+ return -EINVAL;
+ }
+
+ if (tiling > VC4_TILING_FORMAT_LT) {
+ DRM_ERROR("Bad tiling format\n");
+ return -EINVAL;
+ }
+
+ if (buffer == VC4_LOADSTORE_TILE_BUFFER_ZS) {
+ if (format != 0) {
+ DRM_ERROR("No color format should be set for ZS\n");
+ return -EINVAL;
+ }
+ cpp = 4;
+ } else if (buffer == VC4_LOADSTORE_TILE_BUFFER_COLOR) {
+ switch (format) {
+ case VC4_LOADSTORE_TILE_BUFFER_BGR565:
+ case VC4_LOADSTORE_TILE_BUFFER_BGR565_DITHER:
+ cpp = 2;
+ break;
+ case VC4_LOADSTORE_TILE_BUFFER_RGBA8888:
+ cpp = 4;
+ break;
+ default:
+ DRM_ERROR("Bad tile buffer format\n");
+ return -EINVAL;
+ }
+ } else {
+ DRM_ERROR("Bad load/store buffer %d.\n", buffer);
+ return -EINVAL;
+ }
+
+ if (surf->offset & 0xf) {
+ DRM_ERROR("load/store buffer must be 16b aligned.\n");
+ return -EINVAL;
+ }
+
+ if (!vc4_check_tex_size(exec, *obj, surf->offset, tiling,
+ exec->args->width, exec->args->height, cpp)) {
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int
+vc4_rcl_ms_surface_setup(struct vc4_exec_info *exec,
+ struct drm_gem_cma_object **obj,
+ struct drm_vc4_submit_rcl_surface *surf)
+{
+ uint8_t tiling = VC4_GET_FIELD(surf->bits,
+ VC4_RENDER_CONFIG_MEMORY_FORMAT);
+ uint8_t format = VC4_GET_FIELD(surf->bits,
+ VC4_RENDER_CONFIG_FORMAT);
+ int cpp;
+
+ if (surf->pad != 0) {
+ DRM_ERROR("Padding unset\n");
+ return -EINVAL;
+ }
+
+ if (surf->bits & ~(VC4_RENDER_CONFIG_MEMORY_FORMAT_MASK |
+ VC4_RENDER_CONFIG_FORMAT_MASK)) {
+ DRM_ERROR("Unknown bits in render config: 0x%04x\n",
+ surf->bits);
+ return -EINVAL;
+ }
+
+ if (surf->hindex == ~0)
+ return 0;
+
+ if (!vc4_use_bo(exec, surf->hindex, VC4_MODE_RENDER, obj))
+ return -EINVAL;
+
+ if (tiling > VC4_TILING_FORMAT_LT) {
+ DRM_ERROR("Bad tiling format\n");
+ return -EINVAL;
+ }
+
+ switch (format) {
+ case VC4_RENDER_CONFIG_FORMAT_BGR565_DITHERED:
+ case VC4_RENDER_CONFIG_FORMAT_BGR565:
+ cpp = 2;
+ break;
+ case VC4_RENDER_CONFIG_FORMAT_RGBA8888:
+ cpp = 4;
+ break;
+ default:
+ DRM_ERROR("Bad tile buffer format\n");
+ return -EINVAL;
+ }
+
+ if (!vc4_check_tex_size(exec, *obj, surf->offset, tiling,
+ exec->args->width, exec->args->height, cpp)) {
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+int vc4_get_rcl(struct drm_device *dev, struct vc4_exec_info *exec)
+{
+ struct vc4_rcl_setup setup = {0};
+ struct drm_vc4_submit_cl *args = exec->args;
+ bool has_bin = args->bin_cl_size != 0;
+ int ret;
+
+ if (args->min_x_tile > args->max_x_tile ||
+ args->min_y_tile > args->max_y_tile) {
+ DRM_ERROR("Bad render tile set (%d,%d)-(%d,%d)\n",
+ args->min_x_tile, args->min_y_tile,
+ args->max_x_tile, args->max_y_tile);
+ return -EINVAL;
+ }
+
+ if (has_bin &&
+ (args->max_x_tile > exec->bin_tiles_x ||
+ args->max_y_tile > exec->bin_tiles_y)) {
+ DRM_ERROR("Render tiles (%d,%d) outside of bin config (%d,%d)\n",
+ args->max_x_tile, args->max_y_tile,
+ exec->bin_tiles_x, exec->bin_tiles_y);
+ return -EINVAL;
+ }
+
+ ret = vc4_rcl_surface_setup(exec, &setup.color_read, &args->color_read);
+ if (ret)
+ return ret;
+
+ ret = vc4_rcl_ms_surface_setup(exec, &setup.color_ms_write,
+ &args->color_ms_write);
+ if (ret)
+ return ret;
+
+ ret = vc4_rcl_surface_setup(exec, &setup.zs_read, &args->zs_read);
+ if (ret)
+ return ret;
+
+ ret = vc4_rcl_surface_setup(exec, &setup.zs_write, &args->zs_write);
+ if (ret)
+ return ret;
+
+ /* We shouldn't even have the job submitted to us if there's no
+ * surface to write out.
+ */
+ if (!setup.color_ms_write && !setup.zs_write) {
+ DRM_ERROR("RCL requires color or Z/S write\n");
+ return -EINVAL;
+ }
+
+ return vc4_create_rcl_bo(dev, exec, &setup);
+}
diff --git a/src/gallium/drivers/vc4/kernel/vc4_validate.c b/src/gallium/drivers/vc4/kernel/vc4_validate.c
index 2d04a4a7b9a..a0b67a7e50b 100644
--- a/src/gallium/drivers/vc4/kernel/vc4_validate.c
+++ b/src/gallium/drivers/vc4/kernel/vc4_validate.c
@@ -94,7 +94,7 @@ size_is_lt(uint32_t width, uint32_t height, int cpp)
height <= 4 * utile_height(cpp));
}
-static bool
+bool
vc4_use_bo(struct vc4_exec_info *exec,
uint32_t hindex,
enum vc4_bo_mode mode,
@@ -147,33 +147,39 @@ gl_shader_rec_size(uint32_t pointer_bits)
return 36 + attribute_count * 8;
}
-static bool
-check_tex_size(struct vc4_exec_info *exec, struct drm_gem_cma_object *fbo,
- uint32_t offset, uint8_t tiling_format,
- uint32_t width, uint32_t height, uint8_t cpp)
+bool
+vc4_check_tex_size(struct vc4_exec_info *exec, struct drm_gem_cma_object *fbo,
+ uint32_t offset, uint8_t tiling_format,
+ uint32_t width, uint32_t height, uint8_t cpp)
{
uint32_t aligned_width, aligned_height, stride, size;
uint32_t utile_w = utile_width(cpp);
uint32_t utile_h = utile_height(cpp);
- /* The values are limited by the packet/texture parameter bitfields,
- * so we don't need to worry as much about integer overflow.
+ /* The shaded vertex format stores signed 12.4 fixed point
+ * (-2048,2047) offsets from the viewport center, so we should
+ * never have a render target larger than 4096. The texture
+ * unit can only sample from 2048x2048, so it's even more
+ * restricted. This lets us avoid worrying about overflow in
+ * our math.
*/
- BUG_ON(width > 65535);
- BUG_ON(height > 65535);
+ if (width > 4096 || height > 4096) {
+ DRM_ERROR("Surface dimesions (%d,%d) too large", width, height);
+ return false;
+ }
switch (tiling_format) {
case VC4_TILING_FORMAT_LINEAR:
- aligned_width = roundup(width, utile_w);
+ aligned_width = round_up(width, utile_w);
aligned_height = height;
break;
case VC4_TILING_FORMAT_T:
- aligned_width = roundup(width, utile_w * 8);
- aligned_height = roundup(height, utile_h * 8);
+ aligned_width = round_up(width, utile_w * 8);
+ aligned_height = round_up(height, utile_h * 8);
break;
case VC4_TILING_FORMAT_LT:
- aligned_width = roundup(width, utile_w);
- aligned_height = roundup(height, utile_h);
+ aligned_width = round_up(width, utile_w);
+ aligned_height = round_up(height, utile_h);
break;
default:
DRM_ERROR("buffer tiling %d unsupported\n", tiling_format);
@@ -181,13 +187,6 @@ check_tex_size(struct vc4_exec_info *exec, struct drm_gem_cma_object *fbo,
}
stride = aligned_width * cpp;
-
- if (INT_MAX / stride < aligned_height) {
- DRM_ERROR("Overflow in fbo size (%dx%d -> %dx%d)\n",
- width, height,
- aligned_width, aligned_height);
- return false;
- }
size = stride * aligned_height;
if (size + offset < size ||
@@ -249,122 +248,6 @@ validate_increment_semaphore(VALIDATE_ARGS)
}
static int
-validate_wait_on_semaphore(VALIDATE_ARGS)
-{
- if (exec->found_wait_on_semaphore_packet) {
- DRM_ERROR("Duplicate VC4_PACKET_WAIT_ON_SEMAPHORE\n");
- return -EINVAL;
- }
- exec->found_wait_on_semaphore_packet = true;
-
- if (!exec->found_increment_semaphore_packet) {
- DRM_ERROR("VC4_PACKET_WAIT_ON_SEMAPHORE without "
- "VC4_PACKET_INCREMENT_SEMAPHORE\n");
- return -EINVAL;
- }
-
- return 0;
-}
-
-static int
-validate_branch_to_sublist(VALIDATE_ARGS)
-{
- struct drm_gem_cma_object *target;
- uint32_t offset;
-
- if (!vc4_use_handle(exec, 0, VC4_MODE_TILE_ALLOC, &target))
- return -EINVAL;
-
- if (target != exec->tile_alloc_bo) {
- DRM_ERROR("Jumping to BOs other than tile alloc unsupported\n");
- return -EINVAL;
- }
-
- if (!exec->found_wait_on_semaphore_packet) {
- DRM_ERROR("Jumping to tile alloc before binning finished.\n");
- return -EINVAL;
- }
-
- offset = *(uint32_t *)(untrusted + 0);
- if (offset % exec->tile_alloc_init_block_size ||
- offset / exec->tile_alloc_init_block_size >=
- exec->bin_tiles_x * exec->bin_tiles_y) {
- DRM_ERROR("VC4_PACKET_BRANCH_TO_SUB_LIST must jump to initial "
- "tile allocation space.\n");
- return -EINVAL;
- }
-
- *(uint32_t *)(validated + 0) = target->paddr + offset;
-
- return 0;
-}
-
-/**
- * validate_loadstore_tile_buffer_general() - Validation for
- * VC4_PACKET_LOAD_TILE_BUFFER_GENERAL and
- * VC4_PACKET_STORE_TILE_BUFFER_GENERAL.
- *
- * The two packets are nearly the same, except for the TLB-clearing management
- * bits not being present for loads. Additionally, while stores are executed
- * immediately (using the current tile coordinates), loads are queued to be
- * executed when the tile coordinates packet occurs.
- *
- * Note that coordinates packets are validated to be within the declared
- * bin_x/y, which themselves are verified to match the rendering-configuration
- * FB width and height (which the hardware uses to clip loads and stores).
- */
-static int
-validate_loadstore_tile_buffer_general(VALIDATE_ARGS)
-{
- uint32_t packet_b0 = *(uint8_t *)(untrusted + 0);
- uint32_t packet_b1 = *(uint8_t *)(untrusted + 1);
- struct drm_gem_cma_object *fbo;
- uint32_t buffer_type = packet_b0 & 0xf;
- uint32_t untrusted_address, offset, cpp;
-
- switch (buffer_type) {
- case VC4_LOADSTORE_TILE_BUFFER_NONE:
- return 0;
- case VC4_LOADSTORE_TILE_BUFFER_COLOR:
- if ((packet_b1 & VC4_LOADSTORE_TILE_BUFFER_MASK) ==
- VC4_LOADSTORE_TILE_BUFFER_RGBA8888) {
- cpp = 4;
- } else {
- cpp = 2;
- }
- break;
-
- case VC4_LOADSTORE_TILE_BUFFER_Z:
- case VC4_LOADSTORE_TILE_BUFFER_ZS:
- cpp = 4;
- break;
-
- default:
- DRM_ERROR("Load/store type %d unsupported\n", buffer_type);
- return -EINVAL;
- }
-
- if (!vc4_use_handle(exec, 0, VC4_MODE_RENDER, &fbo))
- return -EINVAL;
-
- untrusted_address = *(uint32_t *)(untrusted + 2);
- offset = untrusted_address & ~0xf;
-
- if (!check_tex_size(exec, fbo, offset,
- ((packet_b0 &
- VC4_LOADSTORE_TILE_BUFFER_FORMAT_MASK) >>
- VC4_LOADSTORE_TILE_BUFFER_FORMAT_SHIFT),
- exec->fb_width, exec->fb_height, cpp)) {
- return -EINVAL;
- }
-
- *(uint32_t *)(validated + 2) = (offset + fbo->paddr +
- (untrusted_address & 0xf));
-
- return 0;
-}
-
-static int
validate_indexed_prim_list(VALIDATE_ARGS)
{
struct drm_gem_cma_object *ib;
@@ -492,14 +375,10 @@ validate_nv_shader_state(VALIDATE_ARGS)
static int
validate_tile_binning_config(VALIDATE_ARGS)
{
- struct drm_gem_cma_object *tile_allocation;
- struct drm_gem_cma_object *tile_state_data_array;
+ struct drm_device *dev = exec->exec_bo->base.dev;
uint8_t flags;
- uint32_t tile_allocation_size;
-
- if (!vc4_use_handle(exec, 0, VC4_MODE_TILE_ALLOC, &tile_allocation) ||
- !vc4_use_handle(exec, 1, VC4_MODE_TSDA, &tile_state_data_array))
- return -EINVAL;
+ uint32_t tile_state_size, tile_alloc_size;
+ uint32_t tile_count;
if (exec->found_tile_binning_mode_config_packet) {
DRM_ERROR("Duplicate VC4_PACKET_TILE_BINNING_MODE_CONFIG\n");
@@ -509,6 +388,7 @@ validate_tile_binning_config(VALIDATE_ARGS)
exec->bin_tiles_x = *(uint8_t *)(untrusted + 12);
exec->bin_tiles_y = *(uint8_t *)(untrusted + 13);
+ tile_count = exec->bin_tiles_x * exec->bin_tiles_y;
flags = *(uint8_t *)(untrusted + 14);
if (exec->bin_tiles_x == 0 ||
@@ -518,15 +398,6 @@ validate_tile_binning_config(VALIDATE_ARGS)
return -EINVAL;
}
- /* Our validation relies on the user not getting to set up their own
- * tile state/tile allocation BO contents.
- */
- if (!(flags & VC4_BIN_CONFIG_AUTO_INIT_TSDA)) {
- DRM_ERROR("binning config missing "
- "VC4_BIN_CONFIG_AUTO_INIT_TSDA\n");
- return -EINVAL;
- }
-
if (flags & (VC4_BIN_CONFIG_DB_NON_MS |
VC4_BIN_CONFIG_TILE_BUFFER_64BIT |
VC4_BIN_CONFIG_MS_MODE_4X)) {
@@ -534,94 +405,52 @@ validate_tile_binning_config(VALIDATE_ARGS)
return -EINVAL;
}
- if (*(uint32_t *)(untrusted + 0) != 0) {
- DRM_ERROR("tile allocation offset != 0 unsupported\n");
- return -EINVAL;
- }
- tile_allocation_size = *(uint32_t *)(untrusted + 4);
- if (tile_allocation_size > tile_allocation->base.size) {
- DRM_ERROR("tile allocation size %d > BO size %d\n",
- tile_allocation_size, tile_allocation->base.size);
- return -EINVAL;
- }
- *(uint32_t *)validated = tile_allocation->paddr;
- exec->tile_alloc_bo = tile_allocation;
-
- exec->tile_alloc_init_block_size = 1 << (5 + ((flags >> 5) & 3));
- if (exec->bin_tiles_x * exec->bin_tiles_y *
- exec->tile_alloc_init_block_size > tile_allocation_size) {
- DRM_ERROR("tile init exceeds tile alloc size (%d vs %d)\n",
- exec->bin_tiles_x * exec->bin_tiles_y *
- exec->tile_alloc_init_block_size,
- tile_allocation_size);
- return -EINVAL;
- }
- if (*(uint32_t *)(untrusted + 8) != 0) {
- DRM_ERROR("TSDA offset != 0 unsupported\n");
- return -EINVAL;
- }
- if (exec->bin_tiles_x * exec->bin_tiles_y * 48 >
- tile_state_data_array->base.size) {
- DRM_ERROR("TSDA of %db too small for %dx%d bin config\n",
- tile_state_data_array->base.size,
- exec->bin_tiles_x, exec->bin_tiles_y);
- }
- *(uint32_t *)(validated + 8) = tile_state_data_array->paddr;
-
- return 0;
-}
-
-static int
-validate_tile_rendering_mode_config(VALIDATE_ARGS)
-{
- struct drm_gem_cma_object *fbo;
- uint32_t flags, offset, cpp;
-
- if (exec->found_tile_rendering_mode_config_packet) {
- DRM_ERROR("Duplicate VC4_PACKET_TILE_RENDERING_MODE_CONFIG\n");
- return -EINVAL;
- }
- exec->found_tile_rendering_mode_config_packet = true;
-
- if (!vc4_use_handle(exec, 0, VC4_MODE_RENDER, &fbo))
- return -EINVAL;
-
- exec->fb_width = *(uint16_t *)(untrusted + 4);
- exec->fb_height = *(uint16_t *)(untrusted + 6);
-
- flags = *(uint16_t *)(untrusted + 8);
- if ((flags & VC4_RENDER_CONFIG_FORMAT_MASK) ==
- VC4_RENDER_CONFIG_FORMAT_RGBA8888) {
- cpp = 4;
- } else {
- cpp = 2;
- }
-
- offset = *(uint32_t *)untrusted;
- if (!check_tex_size(exec, fbo, offset,
- ((flags &
- VC4_RENDER_CONFIG_MEMORY_FORMAT_MASK) >>
- VC4_RENDER_CONFIG_MEMORY_FORMAT_SHIFT),
- exec->fb_width, exec->fb_height, cpp)) {
- return -EINVAL;
- }
-
- *(uint32_t *)validated = fbo->paddr + offset;
-
- return 0;
-}
-
-static int
-validate_tile_coordinates(VALIDATE_ARGS)
-{
- uint8_t tile_x = *(uint8_t *)(untrusted + 0);
- uint8_t tile_y = *(uint8_t *)(untrusted + 1);
+ /* The tile state data array is 48 bytes per tile, and we put it at
+ * the start of a BO containing both it and the tile alloc.
+ */
+ tile_state_size = 48 * tile_count;
+
+ /* Since the tile alloc array will follow us, align. */
+ exec->tile_alloc_offset = roundup(tile_state_size, 4096);
+
+ *(uint8_t *)(validated + 14) =
+ ((flags & ~(VC4_BIN_CONFIG_ALLOC_INIT_BLOCK_SIZE_MASK |
+ VC4_BIN_CONFIG_ALLOC_BLOCK_SIZE_MASK)) |
+ VC4_BIN_CONFIG_AUTO_INIT_TSDA |
+ VC4_SET_FIELD(VC4_BIN_CONFIG_ALLOC_INIT_BLOCK_SIZE_32,
+ VC4_BIN_CONFIG_ALLOC_INIT_BLOCK_SIZE) |
+ VC4_SET_FIELD(VC4_BIN_CONFIG_ALLOC_BLOCK_SIZE_128,
+ VC4_BIN_CONFIG_ALLOC_BLOCK_SIZE));
+
+ /* Initial block size. */
+ tile_alloc_size = 32 * tile_count;
+
+ /*
+ * The initial allocation gets rounded to the next 256 bytes before
+ * the hardware starts fulfilling further allocations.
+ */
+ tile_alloc_size = roundup(tile_alloc_size, 256);
- if (tile_x * 64 >= exec->fb_width || tile_y * 64 >= exec->fb_height) {
- DRM_ERROR("Tile coordinates %d,%d > render config %dx%d\n",
- tile_x, tile_y, exec->fb_width, exec->fb_height);
- return -EINVAL;
- }
+ /* Add space for the extra allocations. This is what gets used first,
+ * before overflow memory. It must have at least 4096 bytes, but we
+ * want to avoid overflow memory usage if possible.
+ */
+ tile_alloc_size += 1024 * 1024;
+
+ exec->tile_bo = drm_gem_cma_create(dev, exec->tile_alloc_offset +
+ tile_alloc_size);
+ if (!exec->tile_bo)
+ return -ENOMEM;
+ list_addtail(&to_vc4_bo(&exec->tile_bo->base)->unref_head,
+ &exec->unref_list);
+
+ /* tile alloc address. */
+ *(uint32_t *)(validated + 0) = (exec->tile_bo->paddr +
+ exec->tile_alloc_offset);
+ /* tile alloc size. */
+ *(uint32_t *)(validated + 4) = tile_alloc_size;
+ /* tile state address. */
+ *(uint32_t *)(validated + 8) = exec->tile_bo->paddr;
return 0;
}
@@ -633,78 +462,60 @@ validate_gem_handles(VALIDATE_ARGS)
return 0;
}
+#define VC4_DEFINE_PACKET(packet, name, func) \
+ [packet] = { packet ## _SIZE, name, func }
+
static const struct cmd_info {
- bool bin;
- bool render;
uint16_t len;
const char *name;
int (*func)(struct vc4_exec_info *exec, void *validated,
void *untrusted);
} cmd_info[] = {
- [VC4_PACKET_HALT] = { 1, 1, 1, "halt", NULL },
- [VC4_PACKET_NOP] = { 1, 1, 1, "nop", NULL },
- [VC4_PACKET_FLUSH] = { 1, 1, 1, "flush", NULL },
- [VC4_PACKET_FLUSH_ALL] = { 1, 0, 1, "flush all state", validate_flush_all },
- [VC4_PACKET_START_TILE_BINNING] = { 1, 0, 1, "start tile binning", validate_start_tile_binning },
- [VC4_PACKET_INCREMENT_SEMAPHORE] = { 1, 0, 1, "increment semaphore", validate_increment_semaphore },
- [VC4_PACKET_WAIT_ON_SEMAPHORE] = { 0, 1, 1, "wait on semaphore", validate_wait_on_semaphore },
- /* BRANCH_TO_SUB_LIST is actually supported in the binner as well, but
- * we only use it from the render CL in order to jump into the tile
- * allocation BO.
- */
- [VC4_PACKET_BRANCH_TO_SUB_LIST] = { 0, 1, 5, "branch to sublist", validate_branch_to_sublist },
- [VC4_PACKET_STORE_MS_TILE_BUFFER] = { 0, 1, 1, "store MS resolved tile color buffer", NULL },
- [VC4_PACKET_STORE_MS_TILE_BUFFER_AND_EOF] = { 0, 1, 1, "store MS resolved tile color buffer and EOF", NULL },
+ VC4_DEFINE_PACKET(VC4_PACKET_HALT, "halt", NULL),
+ VC4_DEFINE_PACKET(VC4_PACKET_NOP, "nop", NULL),
+ VC4_DEFINE_PACKET(VC4_PACKET_FLUSH, "flush", NULL),
+ VC4_DEFINE_PACKET(VC4_PACKET_FLUSH_ALL, "flush all state", validate_flush_all),
+ VC4_DEFINE_PACKET(VC4_PACKET_START_TILE_BINNING, "start tile binning", validate_start_tile_binning),
+ VC4_DEFINE_PACKET(VC4_PACKET_INCREMENT_SEMAPHORE, "increment semaphore", validate_increment_semaphore),
- [VC4_PACKET_STORE_TILE_BUFFER_GENERAL] = { 0, 1, 7, "Store Tile Buffer General", validate_loadstore_tile_buffer_general },
- [VC4_PACKET_LOAD_TILE_BUFFER_GENERAL] = { 0, 1, 7, "Load Tile Buffer General", validate_loadstore_tile_buffer_general },
+ VC4_DEFINE_PACKET(VC4_PACKET_GL_INDEXED_PRIMITIVE, "Indexed Primitive List", validate_indexed_prim_list),
- [VC4_PACKET_GL_INDEXED_PRIMITIVE] = { 1, 1, 14, "Indexed Primitive List", validate_indexed_prim_list },
-
- [VC4_PACKET_GL_ARRAY_PRIMITIVE] = { 1, 1, 10, "Vertex Array Primitives", validate_gl_array_primitive },
+ VC4_DEFINE_PACKET(VC4_PACKET_GL_ARRAY_PRIMITIVE, "Vertex Array Primitives", validate_gl_array_primitive),
/* This is only used by clipped primitives (packets 48 and 49), which
* we don't support parsing yet.
*/
- [VC4_PACKET_PRIMITIVE_LIST_FORMAT] = { 1, 1, 2, "primitive list format", NULL },
-
- [VC4_PACKET_GL_SHADER_STATE] = { 1, 1, 5, "GL Shader State", validate_gl_shader_state },
- [VC4_PACKET_NV_SHADER_STATE] = { 1, 1, 5, "NV Shader State", validate_nv_shader_state },
-
- [VC4_PACKET_CONFIGURATION_BITS] = { 1, 1, 4, "configuration bits", NULL },
- [VC4_PACKET_FLAT_SHADE_FLAGS] = { 1, 1, 5, "flat shade flags", NULL },
- [VC4_PACKET_POINT_SIZE] = { 1, 1, 5, "point size", NULL },
- [VC4_PACKET_LINE_WIDTH] = { 1, 1, 5, "line width", NULL },
- [VC4_PACKET_RHT_X_BOUNDARY] = { 1, 1, 3, "RHT X boundary", NULL },
- [VC4_PACKET_DEPTH_OFFSET] = { 1, 1, 5, "Depth Offset", NULL },
- [VC4_PACKET_CLIP_WINDOW] = { 1, 1, 9, "Clip Window", NULL },
- [VC4_PACKET_VIEWPORT_OFFSET] = { 1, 1, 5, "Viewport Offset", NULL },
- [VC4_PACKET_CLIPPER_XY_SCALING] = { 1, 1, 9, "Clipper XY Scaling", NULL },
+ VC4_DEFINE_PACKET(VC4_PACKET_PRIMITIVE_LIST_FORMAT, "primitive list format", NULL),
+
+ VC4_DEFINE_PACKET(VC4_PACKET_GL_SHADER_STATE, "GL Shader State", validate_gl_shader_state),
+ VC4_DEFINE_PACKET(VC4_PACKET_NV_SHADER_STATE, "NV Shader State", validate_nv_shader_state),
+
+ VC4_DEFINE_PACKET(VC4_PACKET_CONFIGURATION_BITS, "configuration bits", NULL),
+ VC4_DEFINE_PACKET(VC4_PACKET_FLAT_SHADE_FLAGS, "flat shade flags", NULL),
+ VC4_DEFINE_PACKET(VC4_PACKET_POINT_SIZE, "point size", NULL),
+ VC4_DEFINE_PACKET(VC4_PACKET_LINE_WIDTH, "line width", NULL),
+ VC4_DEFINE_PACKET(VC4_PACKET_RHT_X_BOUNDARY, "RHT X boundary", NULL),
+ VC4_DEFINE_PACKET(VC4_PACKET_DEPTH_OFFSET, "Depth Offset", NULL),
+ VC4_DEFINE_PACKET(VC4_PACKET_CLIP_WINDOW, "Clip Window", NULL),
+ VC4_DEFINE_PACKET(VC4_PACKET_VIEWPORT_OFFSET, "Viewport Offset", NULL),
+ VC4_DEFINE_PACKET(VC4_PACKET_CLIPPER_XY_SCALING, "Clipper XY Scaling", NULL),
/* Note: The docs say this was also 105, but it was 106 in the
* initial userland code drop.
*/
- [VC4_PACKET_CLIPPER_Z_SCALING] = { 1, 1, 9, "Clipper Z Scale and Offset", NULL },
-
- [VC4_PACKET_TILE_BINNING_MODE_CONFIG] = { 1, 0, 16, "tile binning configuration", validate_tile_binning_config },
+ VC4_DEFINE_PACKET(VC4_PACKET_CLIPPER_Z_SCALING, "Clipper Z Scale and Offset", NULL),
- [VC4_PACKET_TILE_RENDERING_MODE_CONFIG] = { 0, 1, 11, "tile rendering mode configuration", validate_tile_rendering_mode_config},
+ VC4_DEFINE_PACKET(VC4_PACKET_TILE_BINNING_MODE_CONFIG, "tile binning configuration", validate_tile_binning_config),
- [VC4_PACKET_CLEAR_COLORS] = { 0, 1, 14, "Clear Colors", NULL },
-
- [VC4_PACKET_TILE_COORDINATES] = { 0, 1, 3, "Tile Coordinates", validate_tile_coordinates },
-
- [VC4_PACKET_GEM_HANDLES] = { 1, 1, 9, "GEM handles", validate_gem_handles },
+ VC4_DEFINE_PACKET(VC4_PACKET_GEM_HANDLES, "GEM handles", validate_gem_handles),
};
int
-vc4_validate_cl(struct drm_device *dev,
- void *validated,
- void *unvalidated,
- uint32_t len,
- bool is_bin,
- bool has_bin,
- struct vc4_exec_info *exec)
+vc4_validate_bin_cl(struct drm_device *dev,
+ void *validated,
+ void *unvalidated,
+ struct vc4_exec_info *exec)
{
+ uint32_t len = exec->args->bin_cl_size;
uint32_t dst_offset = 0;
uint32_t src_offset = 0;
@@ -732,14 +543,6 @@ vc4_validate_cl(struct drm_device *dev,
src_offset, cmd, info->name, info->len);
#endif
- if ((is_bin && !info->bin) ||
- (!is_bin && !info->render)) {
- DRM_ERROR("0x%08x: packet %d (%s) invalid for %s\n",
- src_offset, cmd, info->name,
- is_bin ? "binner" : "render");
- return -EINVAL;
- }
-
if (src_offset + info->len > len) {
DRM_ERROR("0x%08x: packet %d (%s) length 0x%08x "
"exceeds bounds (0x%08x)\n",
@@ -770,30 +573,16 @@ vc4_validate_cl(struct drm_device *dev,
break;
}
- if (is_bin) {
- exec->ct0ea = exec->ct0ca + dst_offset;
+ exec->ct0ea = exec->ct0ca + dst_offset;
- if (has_bin && !exec->found_start_tile_binning_packet) {
- DRM_ERROR("Bin CL missing VC4_PACKET_START_TILE_BINNING\n");
- return -EINVAL;
- }
- } else {
- if (!exec->found_tile_rendering_mode_config_packet) {
- DRM_ERROR("Render CL missing VC4_PACKET_TILE_RENDERING_MODE_CONFIG\n");
- return -EINVAL;
- }
+ if (!exec->found_start_tile_binning_packet) {
+ DRM_ERROR("Bin CL missing VC4_PACKET_START_TILE_BINNING\n");
+ return -EINVAL;
+ }
- /* Make sure that they actually consumed the semaphore
- * increment from the bin CL. Otherwise a later submit would
- * have render execute immediately.
- */
- if (exec->found_wait_on_semaphore_packet != has_bin) {
- DRM_ERROR("Render CL %s VC4_PACKET_WAIT_ON_SEMAPHORE\n",
- exec->found_wait_on_semaphore_packet ?
- "has" : "missing");
- return -EINVAL;
- }
- exec->ct1ea = exec->ct1ca + dst_offset;
+ if (!exec->found_increment_semaphore_packet) {
+ DRM_ERROR("Bin CL missing VC4_PACKET_INCREMENT_SEMAPHORE\n");
+ return -EINVAL;
}
return 0;
@@ -814,10 +603,10 @@ reloc_tex(struct vc4_exec_info *exec,
uint32_t p3 = (sample->p_offset[3] != ~0 ?
*(uint32_t *)(uniform_data_u + sample->p_offset[3]) : 0);
uint32_t *validated_p0 = exec->uniforms_v + sample->p_offset[0];
- uint32_t offset = p0 & ~0xfff;
- uint32_t miplevels = (p0 & 15);
- uint32_t width = (p1 >> 8) & 2047;
- uint32_t height = (p1 >> 20) & 2047;
+ uint32_t offset = p0 & VC4_TEX_P0_OFFSET_MASK;
+ uint32_t miplevels = VC4_GET_FIELD(p0, VC4_TEX_P0_MIPLVLS);
+ uint32_t width = VC4_GET_FIELD(p1, VC4_TEX_P1_WIDTH);
+ uint32_t height = VC4_GET_FIELD(p1, VC4_TEX_P1_HEIGHT);
uint32_t cpp, tiling_format, utile_w, utile_h;
uint32_t i;
uint32_t cube_map_stride = 0;
@@ -845,16 +634,18 @@ reloc_tex(struct vc4_exec_info *exec,
if (height == 0)
height = 2048;
- if (p0 & (1 << 9)) {
- if ((p2 & (3 << 30)) == (1 << 30))
- cube_map_stride = p2 & 0x3ffff000;
- if ((p3 & (3 << 30)) == (1 << 30)) {
+ if (p0 & VC4_TEX_P0_CMMODE_MASK) {
+ if (VC4_GET_FIELD(p2, VC4_TEX_P2_PTYPE) ==
+ VC4_TEX_P2_PTYPE_CUBE_MAP_STRIDE)
+ cube_map_stride = p2 & VC4_TEX_P2_CMST_MASK;
+ if (VC4_GET_FIELD(p3, VC4_TEX_P2_PTYPE) ==
+ VC4_TEX_P2_PTYPE_CUBE_MAP_STRIDE) {
if (cube_map_stride) {
DRM_ERROR("Cube map stride set twice\n");
return false;
}
- cube_map_stride = p3 & 0x3ffff000;
+ cube_map_stride = p3 & VC4_TEX_P2_CMST_MASK;
}
if (!cube_map_stride) {
DRM_ERROR("Cube map stride not set\n");
@@ -862,7 +653,8 @@ reloc_tex(struct vc4_exec_info *exec,
}
}
- type = ((p0 >> 4) & 15) | ((p1 >> 31) << 4);
+ type = (VC4_GET_FIELD(p0, VC4_TEX_P0_TYPE) |
+ (VC4_GET_FIELD(p1, VC4_TEX_P1_TYPE4) << 4));
switch (type) {
case VC4_TEXTURE_TYPE_RGBA8888:
@@ -905,8 +697,8 @@ reloc_tex(struct vc4_exec_info *exec,
tiling_format = VC4_TILING_FORMAT_T;
}
- if (!check_tex_size(exec, tex, offset + cube_map_stride * 5,
- tiling_format, width, height, cpp)) {
+ if (!vc4_check_tex_size(exec, tex, offset + cube_map_stride * 5,
+ tiling_format, width, height, cpp)) {
return false;
}
@@ -927,15 +719,15 @@ reloc_tex(struct vc4_exec_info *exec,
switch (tiling_format) {
case VC4_TILING_FORMAT_T:
- aligned_width = roundup(level_width, utile_w * 8);
- aligned_height = roundup(level_height, utile_h * 8);
+ aligned_width = round_up(level_width, utile_w * 8);
+ aligned_height = round_up(level_height, utile_h * 8);
break;
case VC4_TILING_FORMAT_LT:
- aligned_width = roundup(level_width, utile_w);
- aligned_height = roundup(level_height, utile_h);
+ aligned_width = round_up(level_width, utile_w);
+ aligned_height = round_up(level_height, utile_h);
break;
default:
- aligned_width = roundup(level_width, utile_w);
+ aligned_width = round_up(level_width, utile_w);
aligned_height = level_height;
break;
}
diff --git a/src/gallium/drivers/vc4/kernel/vc4_validate_shaders.c b/src/gallium/drivers/vc4/kernel/vc4_validate_shaders.c
index e5a75c5f8c2..ab9a6512e82 100644
--- a/src/gallium/drivers/vc4/kernel/vc4_validate_shaders.c
+++ b/src/gallium/drivers/vc4/kernel/vc4_validate_shaders.c
@@ -58,7 +58,8 @@ struct vc4_shader_validation_state {
*
* This is used for the validation of direct address memory reads.
*/
- uint32_t live_clamp_offsets[32 + 32 + 4];
+ uint32_t live_min_clamp_offsets[32 + 32 + 4];
+ bool live_max_clamp_regs[32 + 32 + 4];
};
static uint32_t
@@ -77,6 +78,25 @@ waddr_to_live_reg_index(uint32_t waddr, bool is_b)
}
}
+static uint32_t
+raddr_add_a_to_live_reg_index(uint64_t inst)
+{
+ uint32_t sig = QPU_GET_FIELD(inst, QPU_SIG);
+ uint32_t add_a = QPU_GET_FIELD(inst, QPU_ADD_A);
+ uint32_t raddr_a = QPU_GET_FIELD(inst, QPU_RADDR_A);
+ uint32_t raddr_b = QPU_GET_FIELD(inst, QPU_RADDR_B);
+
+ if (add_a == QPU_MUX_A) {
+ return raddr_a;
+ } else if (add_a == QPU_MUX_B && sig != QPU_SIG_SMALL_IMM) {
+ return 32 + raddr_b;
+ } else if (add_a <= QPU_MUX_R3) {
+ return 64 + add_a;
+ } else {
+ return ~0;
+ }
+}
+
static bool
is_tmu_submit(uint32_t waddr)
{
@@ -136,9 +156,8 @@ check_tmu_write(uint64_t inst,
uint32_t sig = QPU_GET_FIELD(inst, QPU_SIG);
if (is_direct) {
- uint32_t add_a = QPU_GET_FIELD(inst, QPU_ADD_A);
uint32_t add_b = QPU_GET_FIELD(inst, QPU_ADD_B);
- uint32_t clamp_offset = ~0;
+ uint32_t clamp_reg, clamp_offset;
if (sig == QPU_SIG_SMALL_IMM) {
DRM_ERROR("direct TMU read used small immediate\n");
@@ -159,14 +178,13 @@ check_tmu_write(uint64_t inst,
* This is arbitrary, but simpler than supporting flipping the
* two either way.
*/
- if (add_a == QPU_MUX_A) {
- clamp_offset = validation_state->live_clamp_offsets[raddr_a];
- } else if (add_a == QPU_MUX_B) {
- clamp_offset = validation_state->live_clamp_offsets[32 + raddr_b];
- } else if (add_a <= QPU_MUX_R4) {
- clamp_offset = validation_state->live_clamp_offsets[64 + add_a];
+ clamp_reg = raddr_add_a_to_live_reg_index(inst);
+ if (clamp_reg == ~0) {
+ DRM_ERROR("direct TMU load wasn't clamped\n");
+ return false;
}
+ clamp_offset = validation_state->live_min_clamp_offsets[clamp_reg];
if (clamp_offset == ~0) {
DRM_ERROR("direct TMU load wasn't clamped\n");
return false;
@@ -229,8 +247,6 @@ check_register_write(uint64_t inst,
uint32_t waddr = (is_mul ?
QPU_GET_FIELD(inst, QPU_WADDR_MUL) :
QPU_GET_FIELD(inst, QPU_WADDR_ADD));
- bool is_b = is_mul != ((inst & QPU_WS) != 0);
- uint32_t live_reg_index;
switch (waddr) {
case QPU_W_UNIFORMS_ADDRESS:
@@ -285,14 +301,6 @@ check_register_write(uint64_t inst,
return true;
}
- /* Clear out the live offset clamp tracking for the written register.
- * If this particular instruction is setting up an offset clamp, it'll
- * get tracked immediately after we return.
- */
- live_reg_index = waddr_to_live_reg_index(waddr, is_b);
- if (live_reg_index != ~0)
- validation_state->live_clamp_offsets[live_reg_index] = ~0;
-
return true;
}
@@ -301,26 +309,72 @@ track_live_clamps(uint64_t inst,
struct vc4_validated_shader_info *validated_shader,
struct vc4_shader_validation_state *validation_state)
{
+ uint32_t op_add = QPU_GET_FIELD(inst, QPU_OP_ADD);
uint32_t waddr_add = QPU_GET_FIELD(inst, QPU_WADDR_ADD);
+ uint32_t waddr_mul = QPU_GET_FIELD(inst, QPU_WADDR_MUL);
+ uint32_t cond_add = QPU_GET_FIELD(inst, QPU_COND_ADD);
+ uint32_t add_a = QPU_GET_FIELD(inst, QPU_ADD_A);
uint32_t add_b = QPU_GET_FIELD(inst, QPU_ADD_B);
uint32_t raddr_a = QPU_GET_FIELD(inst, QPU_RADDR_A);
uint32_t raddr_b = QPU_GET_FIELD(inst, QPU_RADDR_B);
uint32_t sig = QPU_GET_FIELD(inst, QPU_SIG);
- bool is_b = inst & QPU_WS;
- uint32_t live_reg_index;
+ bool ws = inst & QPU_WS;
+ uint32_t lri_add_a, lri_add, lri_mul;
+ bool add_a_is_min_0;
- if (QPU_GET_FIELD(inst, QPU_OP_ADD) != QPU_A_MIN)
+ /* Check whether OP_ADD's A argumennt comes from a live MAX(x, 0),
+ * before we clear previous live state.
+ */
+ lri_add_a = raddr_add_a_to_live_reg_index(inst);
+ add_a_is_min_0 = (lri_add_a != ~0 &&
+ validation_state->live_max_clamp_regs[lri_add_a]);
+
+ /* Clear live state for registers written by our instruction. */
+ lri_add = waddr_to_live_reg_index(waddr_add, ws);
+ lri_mul = waddr_to_live_reg_index(waddr_mul, !ws);
+ if (lri_mul != ~0) {
+ validation_state->live_max_clamp_regs[lri_mul] = false;
+ validation_state->live_min_clamp_offsets[lri_mul] = ~0;
+ }
+ if (lri_add != ~0) {
+ validation_state->live_max_clamp_regs[lri_add] = false;
+ validation_state->live_min_clamp_offsets[lri_add] = ~0;
+ } else {
+ /* Nothing further to do for live tracking, since only ADDs
+ * generate new live clamp registers.
+ */
return;
+ }
+
+ /* Now, handle remaining live clamp tracking for the ADD operation. */
- if (!(add_b == QPU_MUX_A && raddr_a == QPU_R_UNIF) &&
- !(add_b == QPU_MUX_B && raddr_b == QPU_R_UNIF &&
- sig != QPU_SIG_SMALL_IMM)) {
+ if (cond_add != QPU_COND_ALWAYS)
return;
- }
- live_reg_index = waddr_to_live_reg_index(waddr_add, is_b);
- if (live_reg_index != ~0) {
- validation_state->live_clamp_offsets[live_reg_index] =
+ if (op_add == QPU_A_MAX) {
+ /* Track live clamps of a value to a minimum of 0 (in either
+ * arg).
+ */
+ if (sig != QPU_SIG_SMALL_IMM || raddr_b != 0 ||
+ (add_a != QPU_MUX_B && add_b != QPU_MUX_B)) {
+ return;
+ }
+
+ validation_state->live_max_clamp_regs[lri_add] = true;
+ } if (op_add == QPU_A_MIN) {
+ /* Track live clamps of a value clamped to a minimum of 0 and
+ * a maximum of some uniform's offset.
+ */
+ if (!add_a_is_min_0)
+ return;
+
+ if (!(add_b == QPU_MUX_A && raddr_a == QPU_R_UNIF) &&
+ !(add_b == QPU_MUX_B && raddr_b == QPU_R_UNIF &&
+ sig != QPU_SIG_SMALL_IMM)) {
+ return;
+ }
+
+ validation_state->live_min_clamp_offsets[lri_add] =
validated_shader->uniforms_size;
}
}
@@ -382,8 +436,8 @@ vc4_validate_shader(struct drm_gem_cma_object *shader_obj)
for (i = 0; i < 8; i++)
validation_state.tmu_setup[i / 4].p_offset[i % 4] = ~0;
- for (i = 0; i < ARRAY_SIZE(validation_state.live_clamp_offsets); i++)
- validation_state.live_clamp_offsets[i] = ~0;
+ for (i = 0; i < ARRAY_SIZE(validation_state.live_min_clamp_offsets); i++)
+ validation_state.live_min_clamp_offsets[i] = ~0;
shader = shader_obj->vaddr;
max_ip = shader_obj->base.size / sizeof(uint64_t);
diff --git a/src/gallium/drivers/vc4/vc4_blit.c b/src/gallium/drivers/vc4/vc4_blit.c
index 2d524c40b4d..d29e2c9c318 100644
--- a/src/gallium/drivers/vc4/vc4_blit.c
+++ b/src/gallium/drivers/vc4/vc4_blit.c
@@ -26,86 +26,7 @@
#include "util/u_blitter.h"
#include "vc4_context.h"
-static void
-vc4_tile_blit_color_rcl(struct vc4_context *vc4,
- struct vc4_surface *dst_surf,
- struct vc4_surface *src_surf)
-{
- struct vc4_resource *src = vc4_resource(src_surf->base.texture);
- struct vc4_resource *dst = vc4_resource(dst_surf->base.texture);
-
- uint32_t min_x_tile = 0;
- uint32_t min_y_tile = 0;
- uint32_t max_x_tile = (dst_surf->base.width - 1) / 64;
- uint32_t max_y_tile = (dst_surf->base.height - 1) / 64;
- uint32_t xtiles = max_x_tile - min_x_tile + 1;
- uint32_t ytiles = max_y_tile - min_y_tile + 1;
- uint32_t reloc_size = 9;
- uint32_t config_size = 11 + reloc_size;
- uint32_t loadstore_size = 7 + reloc_size;
- uint32_t tilecoords_size = 3;
- cl_ensure_space(&vc4->rcl,
- config_size +
- xtiles * ytiles * (loadstore_size * 2 +
- tilecoords_size * 1));
- cl_ensure_space(&vc4->bo_handles, 2 * sizeof(uint32_t));
- cl_ensure_space(&vc4->bo_pointers, 2 * sizeof(struct vc4_bo *));
-
- cl_start_reloc(&vc4->rcl, 1);
- cl_u8(&vc4->rcl, VC4_PACKET_TILE_RENDERING_MODE_CONFIG);
- cl_reloc(vc4, &vc4->rcl, dst->bo, dst_surf->offset);
- cl_u16(&vc4->rcl, dst_surf->base.width);
- cl_u16(&vc4->rcl, dst_surf->base.height);
- cl_u16(&vc4->rcl, ((dst_surf->tiling <<
- VC4_RENDER_CONFIG_MEMORY_FORMAT_SHIFT) |
- (vc4_rt_format_is_565(dst_surf->base.format) ?
- VC4_RENDER_CONFIG_FORMAT_BGR565 :
- VC4_RENDER_CONFIG_FORMAT_RGBA8888)));
-
- uint32_t src_hindex = vc4_gem_hindex(vc4, src->bo);
-
- for (int y = min_y_tile; y <= max_y_tile; y++) {
- for (int x = min_x_tile; x <= max_x_tile; x++) {
- bool end_of_frame = (x == max_x_tile &&
- y == max_y_tile);
-
- cl_start_reloc(&vc4->rcl, 1);
- cl_u8(&vc4->rcl, VC4_PACKET_LOAD_TILE_BUFFER_GENERAL);
- cl_u8(&vc4->rcl,
- VC4_LOADSTORE_TILE_BUFFER_COLOR |
- (src_surf->tiling <<
- VC4_LOADSTORE_TILE_BUFFER_FORMAT_SHIFT));
- cl_u8(&vc4->rcl,
- vc4_rt_format_is_565(src_surf->base.format) ?
- VC4_LOADSTORE_TILE_BUFFER_BGR565 :
- VC4_LOADSTORE_TILE_BUFFER_RGBA8888);
- cl_reloc_hindex(&vc4->rcl, src_hindex,
- src_surf->offset);
-
- cl_u8(&vc4->rcl, VC4_PACKET_TILE_COORDINATES);
- cl_u8(&vc4->rcl, x);
- cl_u8(&vc4->rcl, y);
-
- if (end_of_frame) {
- cl_u8(&vc4->rcl,
- VC4_PACKET_STORE_MS_TILE_BUFFER_AND_EOF);
- } else {
- cl_u8(&vc4->rcl,
- VC4_PACKET_STORE_MS_TILE_BUFFER);
- }
- }
- }
-
- vc4->draw_min_x = 0;
- vc4->draw_min_y = 0;
- vc4->draw_max_x = dst_surf->base.width;
- vc4->draw_max_y = dst_surf->base.height;
-
- dst->writes++;
- vc4->needs_flush = true;
-}
-
-static struct vc4_surface *
+static struct pipe_surface *
vc4_get_blit_surface(struct pipe_context *pctx,
struct pipe_resource *prsc, unsigned level)
{
@@ -117,7 +38,7 @@ vc4_get_blit_surface(struct pipe_context *pctx,
tmpl.u.tex.first_layer = 0;
tmpl.u.tex.last_layer = 0;
- return vc4_surface(pctx->create_surface(pctx, prsc, &tmpl));
+ return pctx->create_surface(pctx, prsc, &tmpl);
}
static bool
@@ -141,17 +62,28 @@ vc4_tile_blit(struct pipe_context *pctx, const struct pipe_blit_info *info)
if (info->dst.resource->format != info->src.resource->format)
return false;
- struct vc4_surface *dst_surf =
+ vc4_flush(pctx);
+
+ struct pipe_surface *dst_surf =
vc4_get_blit_surface(pctx, info->dst.resource, info->dst.level);
- struct vc4_surface *src_surf =
+ struct pipe_surface *src_surf =
vc4_get_blit_surface(pctx, info->src.resource, info->src.level);
- vc4_flush(pctx);
- vc4_tile_blit_color_rcl(vc4, dst_surf, src_surf);
+ pipe_surface_reference(&vc4->color_read, src_surf);
+ pipe_surface_reference(&vc4->color_write, dst_surf);
+ pipe_surface_reference(&vc4->zs_read, NULL);
+ pipe_surface_reference(&vc4->zs_write, NULL);
+ vc4->draw_min_x = 0;
+ vc4->draw_min_y = 0;
+ vc4->draw_max_x = dst_surf->width;
+ vc4->draw_max_y = dst_surf->height;
+ vc4->draw_width = dst_surf->width;
+ vc4->draw_height = dst_surf->height;
+ vc4->needs_flush = true;
vc4_job_submit(vc4);
- pctx->surface_destroy(pctx, &dst_surf->base);
- pctx->surface_destroy(pctx, &src_surf->base);
+ pipe_surface_reference(&dst_surf, NULL);
+ pipe_surface_reference(&src_surf, NULL);
return true;
}
diff --git a/src/gallium/drivers/vc4/vc4_bufmgr.c b/src/gallium/drivers/vc4/vc4_bufmgr.c
index 4bb2c711e16..cbdb9e89cf6 100644
--- a/src/gallium/drivers/vc4/vc4_bufmgr.c
+++ b/src/gallium/drivers/vc4/vc4_bufmgr.c
@@ -34,8 +34,46 @@
#include "vc4_context.h"
#include "vc4_screen.h"
-#define container_of(ptr, type, field) \
- (type*)((char*)ptr - offsetof(type, field))
+static bool dump_stats = false;
+
+static void
+vc4_bo_dump_stats(struct vc4_screen *screen)
+{
+ struct vc4_bo_cache *cache = &screen->bo_cache;
+
+ fprintf(stderr, " BOs allocated: %d\n", screen->bo_count);
+ fprintf(stderr, " BOs size: %dkb\n", screen->bo_size / 102);
+ fprintf(stderr, " BOs cached: %d\n", cache->bo_count);
+ fprintf(stderr, " BOs cached size: %dkb\n", cache->bo_size / 102);
+
+ if (!list_empty(&cache->time_list)) {
+ struct vc4_bo *first = LIST_ENTRY(struct vc4_bo,
+ cache->time_list.next,
+ time_list);
+ struct vc4_bo *last = LIST_ENTRY(struct vc4_bo,
+ cache->time_list.prev,
+ time_list);
+
+ fprintf(stderr, " oldest cache time: %ld\n",
+ (long)first->free_time);
+ fprintf(stderr, " newest cache time: %ld\n",
+ (long)last->free_time);
+
+ struct timespec time;
+ clock_gettime(CLOCK_MONOTONIC, &time);
+ fprintf(stderr, " now: %ld\n",
+ time.tv_sec);
+ }
+}
+
+static void
+vc4_bo_remove_from_cache(struct vc4_bo_cache *cache, struct vc4_bo *bo)
+{
+ list_del(&bo->time_list);
+ list_del(&bo->size_list);
+ cache->bo_count--;
+ cache->bo_size -= bo->size;
+}
static struct vc4_bo *
vc4_bo_from_cache(struct vc4_screen *screen, uint32_t size, const char *name)
@@ -48,12 +86,21 @@ vc4_bo_from_cache(struct vc4_screen *screen, uint32_t size, const char *name)
struct vc4_bo *bo = NULL;
pipe_mutex_lock(cache->lock);
- if (!is_empty_list(&cache->size_list[page_index])) {
- struct simple_node *node = last_elem(&cache->size_list[page_index]);
- bo = container_of(node, struct vc4_bo, size_list);
+ if (!list_empty(&cache->size_list[page_index])) {
+ bo = LIST_ENTRY(struct vc4_bo, cache->size_list[page_index].next,
+ size_list);
+
+ /* Check that the BO has gone idle. If not, then we want to
+ * allocate something new instead, since we assume that the
+ * user will proceed to CPU map it and fill it with stuff.
+ */
+ if (!vc4_bo_wait(bo, 0)) {
+ pipe_mutex_unlock(cache->lock);
+ return NULL;
+ }
+
pipe_reference_init(&bo->reference, 1);
- remove_from_list(&bo->time_list);
- remove_from_list(&bo->size_list);
+ vc4_bo_remove_from_cache(cache, bo);
bo->name = name;
}
@@ -70,8 +117,14 @@ vc4_bo_alloc(struct vc4_screen *screen, uint32_t size, const char *name)
size = align(size, 4096);
bo = vc4_bo_from_cache(screen, size, name);
- if (bo)
+ if (bo) {
+ if (dump_stats) {
+ fprintf(stderr, "Allocated %s %dkb from cache:\n",
+ name, size / 1024);
+ vc4_bo_dump_stats(screen);
+ }
return bo;
+ }
bo = CALLOC_STRUCT(vc4_bo);
if (!bo)
@@ -108,6 +161,13 @@ vc4_bo_alloc(struct vc4_screen *screen, uint32_t size, const char *name)
abort();
}
+ screen->bo_count++;
+ screen->bo_size += bo->size;
+ if (dump_stats) {
+ fprintf(stderr, "Allocated %s %dkb:\n", name, size / 1024);
+ vc4_bo_dump_stats(screen);
+ }
+
return bo;
}
@@ -145,26 +205,47 @@ vc4_bo_free(struct vc4_bo *bo)
if (ret != 0)
fprintf(stderr, "close object %d: %s\n", bo->handle, strerror(errno));
+ screen->bo_count--;
+ screen->bo_size -= bo->size;
+
+ if (dump_stats) {
+ fprintf(stderr, "Freed %s%s%dkb:\n",
+ bo->name ? bo->name : "",
+ bo->name ? " " : "",
+ bo->size / 1024);
+ vc4_bo_dump_stats(screen);
+ }
+
free(bo);
}
static void
free_stale_bos(struct vc4_screen *screen, time_t time)
{
- while (!is_empty_list(&screen->bo_cache.time_list)) {
- struct simple_node *node =
- first_elem(&screen->bo_cache.time_list);
- struct vc4_bo *bo = container_of(node, struct vc4_bo, time_list);
+ struct vc4_bo_cache *cache = &screen->bo_cache;
+ bool freed_any = false;
+
+ list_for_each_entry_safe(struct vc4_bo, bo, &cache->time_list,
+ time_list) {
+ if (dump_stats && !freed_any) {
+ fprintf(stderr, "Freeing stale BOs:\n");
+ vc4_bo_dump_stats(screen);
+ freed_any = true;
+ }
/* If it's more than a second old, free it. */
if (time - bo->free_time > 2) {
- remove_from_list(&bo->time_list);
- remove_from_list(&bo->size_list);
+ vc4_bo_remove_from_cache(cache, bo);
vc4_bo_free(bo);
} else {
break;
}
}
+
+ if (dump_stats && freed_any) {
+ fprintf(stderr, "Freed stale BOs:\n");
+ vc4_bo_dump_stats(screen);
+ }
}
void
@@ -180,16 +261,16 @@ vc4_bo_last_unreference_locked_timed(struct vc4_bo *bo, time_t time)
}
if (cache->size_list_size <= page_index) {
- struct simple_node *new_list =
- ralloc_array(screen, struct simple_node, page_index + 1);
+ struct list_head *new_list =
+ ralloc_array(screen, struct list_head, page_index + 1);
/* Move old list contents over (since the array has moved, and
- * therefore the pointers to the list heads have to change.
+ * therefore the pointers to the list heads have to change).
*/
for (int i = 0; i < cache->size_list_size; i++) {
- struct simple_node *old_head = &cache->size_list[i];
- if (is_empty_list(old_head))
- make_empty_list(&new_list[i]);
+ struct list_head *old_head = &cache->size_list[i];
+ if (list_empty(old_head))
+ list_inithead(&new_list[i]);
else {
new_list[i].next = old_head->next;
new_list[i].prev = old_head->prev;
@@ -198,15 +279,23 @@ vc4_bo_last_unreference_locked_timed(struct vc4_bo *bo, time_t time)
}
}
for (int i = cache->size_list_size; i < page_index + 1; i++)
- make_empty_list(&new_list[i]);
+ list_inithead(&new_list[i]);
cache->size_list = new_list;
cache->size_list_size = page_index + 1;
}
bo->free_time = time;
- insert_at_tail(&cache->size_list[page_index], &bo->size_list);
- insert_at_tail(&cache->time_list, &bo->time_list);
+ list_addtail(&bo->size_list, &cache->size_list[page_index]);
+ list_addtail(&bo->time_list, &cache->time_list);
+ cache->bo_count++;
+ cache->bo_size += bo->size;
+ if (dump_stats) {
+ fprintf(stderr, "Freed %s %dkb to cache:\n",
+ bo->name, bo->size / 1024);
+ vc4_bo_dump_stats(screen);
+ }
+ bo->name = NULL;
free_stale_bos(screen, time);
}
@@ -286,6 +375,7 @@ vc4_bo_get_dmabuf(struct vc4_bo *bo)
bo->handle);
return -1;
}
+ bo->private = false;
return fd;
}
@@ -342,15 +432,17 @@ vc4_wait_seqno(struct vc4_screen *screen, uint64_t seqno, uint64_t timeout_ns)
ret = 0;
}
- if (ret == -ETIME) {
- return false;
- } else if (ret != 0) {
- fprintf(stderr, "wait failed\n");
- abort();
- } else {
+ if (ret == 0) {
screen->finished_seqno = wait.seqno;
return true;
}
+
+ if (errno != ETIME) {
+ fprintf(stderr, "wait failed: %d\n", ret);
+ abort();
+ }
+
+ return false;
}
bool
@@ -369,14 +461,15 @@ vc4_bo_wait(struct vc4_bo *bo, uint64_t timeout_ns)
else
ret = 0;
- if (ret == -ETIME) {
- return false;
- } else if (ret != 0) {
- fprintf(stderr, "wait failed\n");
- abort();
- } else {
+ if (ret == 0)
return true;
+
+ if (errno != ETIME) {
+ fprintf(stderr, "wait failed: %d\n", ret);
+ abort();
}
+
+ return false;
}
void *
@@ -437,12 +530,14 @@ vc4_bufmgr_destroy(struct pipe_screen *pscreen)
struct vc4_screen *screen = vc4_screen(pscreen);
struct vc4_bo_cache *cache = &screen->bo_cache;
- while (!is_empty_list(&cache->time_list)) {
- struct simple_node *node = first_elem(&cache->time_list);
- struct vc4_bo *bo = container_of(node, struct vc4_bo, time_list);
-
- remove_from_list(&bo->time_list);
- remove_from_list(&bo->size_list);
+ list_for_each_entry_safe(struct vc4_bo, bo, &cache->time_list,
+ time_list) {
+ vc4_bo_remove_from_cache(cache, bo);
vc4_bo_free(bo);
}
+
+ if (dump_stats) {
+ fprintf(stderr, "BO stats after screen destroy:\n");
+ vc4_bo_dump_stats(screen);
+ }
}
diff --git a/src/gallium/drivers/vc4/vc4_bufmgr.h b/src/gallium/drivers/vc4/vc4_bufmgr.h
index f9559e999a1..7320695ca8e 100644
--- a/src/gallium/drivers/vc4/vc4_bufmgr.h
+++ b/src/gallium/drivers/vc4/vc4_bufmgr.h
@@ -44,9 +44,9 @@ struct vc4_bo {
#endif
/** Entry in the linked list of buffers freed, by age. */
- struct simple_node time_list;
+ struct list_head time_list;
/** Entry in the per-page-count linked list of buffers freed (by age). */
- struct simple_node size_list;
+ struct list_head size_list;
/** Approximate second when the bo was freed. */
time_t free_time;
/**
diff --git a/src/gallium/drivers/vc4/vc4_cl.h b/src/gallium/drivers/vc4/vc4_cl.h
index 32a2e717379..4a50e790942 100644
--- a/src/gallium/drivers/vc4/vc4_cl.h
+++ b/src/gallium/drivers/vc4/vc4_cl.h
@@ -29,7 +29,7 @@
#include "util/u_math.h"
#include "util/macros.h"
-#include "vc4_packet.h"
+#include "kernel/vc4_packet.h"
struct vc4_bo;
diff --git a/src/gallium/drivers/vc4/vc4_cl_dump.c b/src/gallium/drivers/vc4/vc4_cl_dump.c
index 14239840d32..69055081daa 100644
--- a/src/gallium/drivers/vc4/vc4_cl_dump.c
+++ b/src/gallium/drivers/vc4/vc4_cl_dump.c
@@ -174,6 +174,37 @@ dump_VC4_PACKET_CLIPPER_Z_SCALING(void *cl, uint32_t offset, uint32_t hw_offset)
}
static void
+dump_VC4_PACKET_TILE_BINNING_MODE_CONFIG(void *cl, uint32_t offset, uint32_t hw_offset)
+{
+ uint32_t *tile_alloc_addr = cl + offset;
+ uint32_t *tile_alloc_size = cl + offset + 4;
+ uint32_t *tile_state_addr = cl + offset + 8;
+ uint8_t *bin_x = cl + offset + 12;
+ uint8_t *bin_y = cl + offset + 13;
+ uint8_t *flags = cl + offset + 14;
+
+ fprintf(stderr, "0x%08x 0x%08x: tile alloc addr 0x%08x\n",
+ offset, hw_offset,
+ *tile_alloc_addr);
+
+ fprintf(stderr, "0x%08x 0x%08x: tile alloc size %db\n",
+ offset + 4, hw_offset + 4,
+ *tile_alloc_size);
+
+ fprintf(stderr, "0x%08x 0x%08x: tile state addr 0x%08x\n",
+ offset + 8, hw_offset + 8,
+ *tile_state_addr);
+
+ fprintf(stderr, "0x%08x 0x%08x: tiles (%d, %d)\n",
+ offset + 12, hw_offset + 12,
+ *bin_x, *bin_y);
+
+ fprintf(stderr, "0x%08x 0x%08x: flags 0x%02x\n",
+ offset + 14, hw_offset + 14,
+ *flags);
+}
+
+static void
dump_VC4_PACKET_TILE_RENDERING_MODE_CONFIG(void *cl, uint32_t offset, uint32_t hw_offset)
{
uint32_t *render_offset = cl + offset;
@@ -311,7 +342,7 @@ static const struct packet_info {
PACKET_DUMP(VC4_PACKET_CLIPPER_XY_SCALING, 9),
PACKET_DUMP(VC4_PACKET_CLIPPER_Z_SCALING, 9),
- PACKET(VC4_PACKET_TILE_BINNING_MODE_CONFIG, 16),
+ PACKET_DUMP(VC4_PACKET_TILE_BINNING_MODE_CONFIG, 16),
PACKET_DUMP(VC4_PACKET_TILE_RENDERING_MODE_CONFIG, 11),
PACKET(VC4_PACKET_CLEAR_COLORS, 14),
PACKET_DUMP(VC4_PACKET_TILE_COORDINATES, 3),
diff --git a/src/gallium/drivers/vc4/vc4_context.c b/src/gallium/drivers/vc4/vc4_context.c
index b394c186efb..630f8e68896 100644
--- a/src/gallium/drivers/vc4/vc4_context.c
+++ b/src/gallium/drivers/vc4/vc4_context.c
@@ -29,6 +29,7 @@
#include "util/u_inlines.h"
#include "util/u_memory.h"
#include "util/u_blitter.h"
+#include "util/u_upload_mgr.h"
#include "indices/u_primconvert.h"
#include "pipe/p_screen.h"
@@ -36,270 +37,12 @@
#include "vc4_context.h"
#include "vc4_resource.h"
-/**
- * Emits a no-op STORE_TILE_BUFFER_GENERAL.
- *
- * If we emit a PACKET_TILE_COORDINATES, it must be followed by a store of
- * some sort before another load is triggered.
- */
-static void
-vc4_store_before_load(struct vc4_context *vc4, bool *coords_emitted)
-{
- if (!*coords_emitted)
- return;
-
- cl_u8(&vc4->rcl, VC4_PACKET_STORE_TILE_BUFFER_GENERAL);
- cl_u8(&vc4->rcl, VC4_LOADSTORE_TILE_BUFFER_NONE);
- cl_u8(&vc4->rcl, (VC4_STORE_TILE_BUFFER_DISABLE_COLOR_CLEAR |
- VC4_STORE_TILE_BUFFER_DISABLE_ZS_CLEAR |
- VC4_STORE_TILE_BUFFER_DISABLE_VG_MASK_CLEAR));
- cl_u32(&vc4->rcl, 0); /* no address, since we're in None mode */
-
- *coords_emitted = false;
-}
-
-/**
- * Emits a PACKET_TILE_COORDINATES if one isn't already pending.
- *
- * The tile coordinates packet triggers a pending load if there is one, are
- * used for clipping during rendering, and determine where loads/stores happen
- * relative to their base address.
- */
-static void
-vc4_tile_coordinates(struct vc4_context *vc4, uint32_t x, uint32_t y,
- bool *coords_emitted)
-{
- if (*coords_emitted)
- return;
-
- cl_u8(&vc4->rcl, VC4_PACKET_TILE_COORDINATES);
- cl_u8(&vc4->rcl, x);
- cl_u8(&vc4->rcl, y);
-
- *coords_emitted = true;
-}
-
-static void
-vc4_setup_rcl(struct vc4_context *vc4)
-{
- struct vc4_surface *csurf = vc4_surface(vc4->framebuffer.cbufs[0]);
- struct vc4_resource *ctex = csurf ? vc4_resource(csurf->base.texture) : NULL;
- struct vc4_surface *zsurf = vc4_surface(vc4->framebuffer.zsbuf);
- struct vc4_resource *ztex = zsurf ? vc4_resource(zsurf->base.texture) : NULL;
-
- if (!csurf)
- vc4->resolve &= ~PIPE_CLEAR_COLOR0;
- if (!zsurf)
- vc4->resolve &= ~(PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL);
- uint32_t resolve_uncleared = vc4->resolve & ~vc4->cleared;
- uint32_t width = vc4->framebuffer.width;
- uint32_t height = vc4->framebuffer.height;
- uint32_t stride_in_tiles = align(width, 64) / 64;
-
- assert(vc4->draw_min_x != ~0 && vc4->draw_min_y != ~0);
- uint32_t min_x_tile = vc4->draw_min_x / 64;
- uint32_t min_y_tile = vc4->draw_min_y / 64;
- uint32_t max_x_tile = (vc4->draw_max_x - 1) / 64;
- uint32_t max_y_tile = (vc4->draw_max_y - 1) / 64;
- uint32_t xtiles = max_x_tile - min_x_tile + 1;
- uint32_t ytiles = max_y_tile - min_y_tile + 1;
-
-#if 0
- fprintf(stderr, "RCL: resolve 0x%x clear 0x%x resolve uncleared 0x%x\n",
- vc4->resolve,
- vc4->cleared,
- resolve_uncleared);
-#endif
-
- uint32_t reloc_size = 9;
- uint32_t clear_size = 14;
- uint32_t config_size = 11 + reloc_size;
- uint32_t loadstore_size = 7 + reloc_size;
- uint32_t tilecoords_size = 3;
- uint32_t branch_size = 5 + reloc_size;
- uint32_t color_store_size = 1;
- uint32_t semaphore_size = 1;
- cl_ensure_space(&vc4->rcl,
- clear_size +
- config_size +
- loadstore_size +
- semaphore_size +
- xtiles * ytiles * (loadstore_size * 4 +
- tilecoords_size * 3 +
- branch_size +
- color_store_size));
-
- if (vc4->cleared) {
- cl_u8(&vc4->rcl, VC4_PACKET_CLEAR_COLORS);
- cl_u32(&vc4->rcl, vc4->clear_color[0]);
- cl_u32(&vc4->rcl, vc4->clear_color[1]);
- cl_u32(&vc4->rcl, vc4->clear_depth);
- cl_u8(&vc4->rcl, vc4->clear_stencil);
- }
-
- /* The rendering mode config determines the pointer that's used for
- * VC4_PACKET_STORE_MS_TILE_BUFFER address computations. The kernel
- * could handle a no-relocation rendering mode config and deny those
- * packets, but instead we just tell the kernel we're doing our color
- * rendering to the Z buffer, and just don't emit any of those
- * packets.
- */
- struct vc4_surface *render_surf = csurf ? csurf : zsurf;
- struct vc4_resource *render_tex = vc4_resource(render_surf->base.texture);
- cl_start_reloc(&vc4->rcl, 1);
- cl_u8(&vc4->rcl, VC4_PACKET_TILE_RENDERING_MODE_CONFIG);
- cl_reloc(vc4, &vc4->rcl, render_tex->bo, render_surf->offset);
- cl_u16(&vc4->rcl, width);
- cl_u16(&vc4->rcl, height);
- cl_u16(&vc4->rcl, ((render_surf->tiling <<
- VC4_RENDER_CONFIG_MEMORY_FORMAT_SHIFT) |
- (vc4_rt_format_is_565(render_surf->base.format) ?
- VC4_RENDER_CONFIG_FORMAT_BGR565 :
- VC4_RENDER_CONFIG_FORMAT_RGBA8888)));
-
- /* The tile buffer normally gets cleared when the previous tile is
- * stored. If the clear values changed between frames, then the tile
- * buffer has stale clear values in it, so we have to do a store in
- * None mode (no writes) so that we trigger the tile buffer clear.
- *
- * Excess clearing is only a performance cost, since per-tile contents
- * will be loaded/stored in the loop below.
- */
- if (vc4->cleared & (PIPE_CLEAR_COLOR0 |
- PIPE_CLEAR_DEPTH |
- PIPE_CLEAR_STENCIL)) {
- cl_u8(&vc4->rcl, VC4_PACKET_TILE_COORDINATES);
- cl_u8(&vc4->rcl, 0);
- cl_u8(&vc4->rcl, 0);
-
- cl_u8(&vc4->rcl, VC4_PACKET_STORE_TILE_BUFFER_GENERAL);
- cl_u16(&vc4->rcl, VC4_LOADSTORE_TILE_BUFFER_NONE);
- cl_u32(&vc4->rcl, 0); /* no address, since we're in None mode */
- }
-
- uint32_t color_hindex = ctex ? vc4_gem_hindex(vc4, ctex->bo) : 0;
- uint32_t depth_hindex = ztex ? vc4_gem_hindex(vc4, ztex->bo) : 0;
- uint32_t tile_alloc_hindex = vc4_gem_hindex(vc4, vc4->tile_alloc);
-
- for (int y = min_y_tile; y <= max_y_tile; y++) {
- for (int x = min_x_tile; x <= max_x_tile; x++) {
- bool end_of_frame = (x == max_x_tile &&
- y == max_y_tile);
- bool coords_emitted = false;
-
- /* Note that the load doesn't actually occur until the
- * tile coords packet is processed, and only one load
- * may be outstanding at a time.
- */
- if (resolve_uncleared & PIPE_CLEAR_COLOR) {
- vc4_store_before_load(vc4, &coords_emitted);
-
- cl_start_reloc(&vc4->rcl, 1);
- cl_u8(&vc4->rcl, VC4_PACKET_LOAD_TILE_BUFFER_GENERAL);
- cl_u8(&vc4->rcl,
- VC4_LOADSTORE_TILE_BUFFER_COLOR |
- (csurf->tiling <<
- VC4_LOADSTORE_TILE_BUFFER_FORMAT_SHIFT));
- cl_u8(&vc4->rcl,
- vc4_rt_format_is_565(csurf->base.format) ?
- VC4_LOADSTORE_TILE_BUFFER_BGR565 :
- VC4_LOADSTORE_TILE_BUFFER_RGBA8888);
- cl_reloc_hindex(&vc4->rcl, color_hindex,
- csurf->offset);
-
- vc4_tile_coordinates(vc4, x, y, &coords_emitted);
- }
-
- if (resolve_uncleared & (PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL)) {
- vc4_store_before_load(vc4, &coords_emitted);
-
- cl_start_reloc(&vc4->rcl, 1);
- cl_u8(&vc4->rcl, VC4_PACKET_LOAD_TILE_BUFFER_GENERAL);
- cl_u8(&vc4->rcl,
- VC4_LOADSTORE_TILE_BUFFER_ZS |
- (zsurf->tiling <<
- VC4_LOADSTORE_TILE_BUFFER_FORMAT_SHIFT));
- cl_u8(&vc4->rcl, 0);
- cl_reloc_hindex(&vc4->rcl, depth_hindex,
- zsurf->offset);
-
- vc4_tile_coordinates(vc4, x, y, &coords_emitted);
- }
-
- /* Clipping depends on tile coordinates having been
- * emitted, so make sure it's happened even if
- * everything was cleared to start.
- */
- vc4_tile_coordinates(vc4, x, y, &coords_emitted);
-
- /* Wait for the binner before jumping to the first
- * tile's lists.
- */
- if (x == min_x_tile && y == min_y_tile)
- cl_u8(&vc4->rcl, VC4_PACKET_WAIT_ON_SEMAPHORE);
-
- cl_start_reloc(&vc4->rcl, 1);
- cl_u8(&vc4->rcl, VC4_PACKET_BRANCH_TO_SUB_LIST);
- cl_reloc_hindex(&vc4->rcl, tile_alloc_hindex,
- (y * stride_in_tiles + x) * 32);
-
- if (vc4->resolve & (PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL)) {
- vc4_tile_coordinates(vc4, x, y, &coords_emitted);
-
- cl_start_reloc(&vc4->rcl, 1);
- cl_u8(&vc4->rcl, VC4_PACKET_STORE_TILE_BUFFER_GENERAL);
- cl_u8(&vc4->rcl,
- VC4_LOADSTORE_TILE_BUFFER_ZS |
- (zsurf->tiling <<
- VC4_LOADSTORE_TILE_BUFFER_FORMAT_SHIFT));
- cl_u8(&vc4->rcl,
- VC4_STORE_TILE_BUFFER_DISABLE_COLOR_CLEAR);
- cl_reloc_hindex(&vc4->rcl, depth_hindex,
- zsurf->offset |
- ((end_of_frame &&
- !(vc4->resolve & PIPE_CLEAR_COLOR0)) ?
- VC4_LOADSTORE_TILE_BUFFER_EOF : 0));
-
- coords_emitted = false;
- }
-
- if (vc4->resolve & PIPE_CLEAR_COLOR0) {
- vc4_tile_coordinates(vc4, x, y, &coords_emitted);
- if (end_of_frame) {
- cl_u8(&vc4->rcl,
- VC4_PACKET_STORE_MS_TILE_BUFFER_AND_EOF);
- } else {
- cl_u8(&vc4->rcl,
- VC4_PACKET_STORE_MS_TILE_BUFFER);
- }
-
- coords_emitted = false;
- }
-
- /* One of the bits needs to have been set that would
- * have triggered an EOF.
- */
- assert(vc4->resolve & (PIPE_CLEAR_COLOR0 |
- PIPE_CLEAR_DEPTH |
- PIPE_CLEAR_STENCIL));
- /* Any coords emitted must also have been consumed by
- * a store.
- */
- assert(!coords_emitted);
- }
- }
-
- if (vc4->resolve & PIPE_CLEAR_COLOR0)
- ctex->writes++;
-
- if (vc4->resolve & (PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL))
- ztex->writes++;
-}
-
void
vc4_flush(struct pipe_context *pctx)
{
struct vc4_context *vc4 = vc4_context(pctx);
+ struct pipe_surface *cbuf = vc4->framebuffer.cbufs[0];
+ struct pipe_surface *zsbuf = vc4->framebuffer.zsbuf;
if (!vc4->needs_flush)
return;
@@ -322,7 +65,31 @@ vc4_flush(struct pipe_context *pctx)
/* The FLUSH caps all of our bin lists with a VC4_PACKET_RETURN. */
cl_u8(&vc4->bcl, VC4_PACKET_FLUSH);
- vc4_setup_rcl(vc4);
+ if (cbuf && (vc4->resolve & PIPE_CLEAR_COLOR0)) {
+ pipe_surface_reference(&vc4->color_write, cbuf);
+ if (!(vc4->cleared & PIPE_CLEAR_COLOR0)) {
+ pipe_surface_reference(&vc4->color_read, cbuf);
+ } else {
+ pipe_surface_reference(&vc4->color_read, NULL);
+ }
+
+ } else {
+ pipe_surface_reference(&vc4->color_write, NULL);
+ pipe_surface_reference(&vc4->color_read, NULL);
+ }
+
+ if (vc4->framebuffer.zsbuf &&
+ (vc4->resolve & (PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL))) {
+ pipe_surface_reference(&vc4->zs_write, zsbuf);
+ if (!(vc4->cleared & (PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL))) {
+ pipe_surface_reference(&vc4->zs_read, zsbuf);
+ } else {
+ pipe_surface_reference(&vc4->zs_read, NULL);
+ }
+ } else {
+ pipe_surface_reference(&vc4->zs_write, NULL);
+ pipe_surface_reference(&vc4->zs_read, NULL);
+ }
vc4_job_submit(vc4);
}
@@ -410,12 +177,13 @@ vc4_context_destroy(struct pipe_context *pctx)
if (vc4->primconvert)
util_primconvert_destroy(vc4->primconvert);
+ if (vc4->uploader)
+ u_upload_destroy(vc4->uploader);
+
util_slab_destroy(&vc4->transfer_pool);
pipe_surface_reference(&vc4->framebuffer.cbufs[0], NULL);
pipe_surface_reference(&vc4->framebuffer.zsbuf, NULL);
- vc4_bo_unreference(&vc4->tile_alloc);
- vc4_bo_unreference(&vc4->tile_state);
vc4_program_fini(pctx);
@@ -466,6 +234,9 @@ vc4_context_create(struct pipe_screen *pscreen, void *priv)
if (!vc4->primconvert)
goto fail;
+ vc4->uploader = u_upload_create(pctx, 16 * 1024, 4,
+ PIPE_BIND_INDEX_BUFFER);
+
vc4_debug |= saved_shaderdb_flag;
return &vc4->base;
diff --git a/src/gallium/drivers/vc4/vc4_context.h b/src/gallium/drivers/vc4/vc4_context.h
index d89f1974e12..d5d6be16f6e 100644
--- a/src/gallium/drivers/vc4/vc4_context.h
+++ b/src/gallium/drivers/vc4/vc4_context.h
@@ -178,12 +178,18 @@ struct vc4_context {
struct vc4_screen *screen;
struct vc4_cl bcl;
- struct vc4_cl rcl;
struct vc4_cl shader_rec;
struct vc4_cl uniforms;
struct vc4_cl bo_handles;
struct vc4_cl bo_pointers;
uint32_t shader_rec_count;
+
+ /** @{ Surfaces to submit rendering for. */
+ struct pipe_surface *color_read;
+ struct pipe_surface *color_write;
+ struct pipe_surface *zs_read;
+ struct pipe_surface *zs_write;
+ /** @} */
/** @{
* Bounding box of the scissor across all queued drawing.
*
@@ -194,9 +200,13 @@ struct vc4_context {
uint32_t draw_max_x;
uint32_t draw_max_y;
/** @} */
-
- struct vc4_bo *tile_alloc;
- struct vc4_bo *tile_state;
+ /** @{
+ * Width/height of the color framebuffer being rendered to,
+ * for VC4_TILE_RENDERING_MODE_CONFIG.
+ */
+ uint32_t draw_width;
+ uint32_t draw_height;
+ /** @} */
struct util_slab_mempool transfer_pool;
struct blitter_context *blitter;
@@ -243,6 +253,8 @@ struct vc4_context {
/** Seqno of the last CL flush's job. */
uint64_t last_emit_seqno;
+ struct u_upload_mgr *uploader;
+
/** @{ Current pipeline state objects */
struct pipe_scissor_state scissor;
struct pipe_blend_state *blend;
diff --git a/src/gallium/drivers/vc4/vc4_draw.c b/src/gallium/drivers/vc4/vc4_draw.c
index 16418bf12da..5e6d70d6f33 100644
--- a/src/gallium/drivers/vc4/vc4_draw.c
+++ b/src/gallium/drivers/vc4/vc4_draw.c
@@ -72,44 +72,15 @@ vc4_start_draw(struct vc4_context *vc4)
uint32_t tilew = align(width, 64) / 64;
uint32_t tileh = align(height, 64) / 64;
- /* Tile alloc memory setup: We use an initial alloc size of 32b. The
- * hardware then aligns that to 256b (we use 4096, because all of our
- * BO allocations align to that anyway), then for some reason the
- * simulator wants an extra page available, even if you have overflow
- * memory set up.
- *
- * XXX: The binner only does 28-bit addressing math, so the tile alloc
- * and tile state should be in the same BO and that BO needs to not
- * cross a 256MB boundary, somehow.
- */
- uint32_t tile_alloc_size = 32 * tilew * tileh;
- tile_alloc_size = align(tile_alloc_size, 4096);
- tile_alloc_size += 4096;
- uint32_t tile_state_size = 48 * tilew * tileh;
- if (!vc4->tile_alloc || vc4->tile_alloc->size < tile_alloc_size) {
- vc4_bo_unreference(&vc4->tile_alloc);
- vc4->tile_alloc = vc4_bo_alloc(vc4->screen, tile_alloc_size,
- "tile_alloc");
- }
- if (!vc4->tile_state || vc4->tile_state->size < tile_state_size) {
- vc4_bo_unreference(&vc4->tile_state);
- vc4->tile_state = vc4_bo_alloc(vc4->screen, tile_state_size,
- "tile_state");
- }
-
// Tile state data is 48 bytes per tile, I think it can be thrown away
// as soon as binning is finished.
- cl_start_reloc(&vc4->bcl, 2);
cl_u8(&vc4->bcl, VC4_PACKET_TILE_BINNING_MODE_CONFIG);
- cl_reloc(vc4, &vc4->bcl, vc4->tile_alloc, 0);
- cl_u32(&vc4->bcl, vc4->tile_alloc->size);
- cl_reloc(vc4, &vc4->bcl, vc4->tile_state, 0);
+ cl_u32(&vc4->bcl, 0); /* tile alloc addr, filled by kernel */
+ cl_u32(&vc4->bcl, 0); /* tile alloc size, filled by kernel */
+ cl_u32(&vc4->bcl, 0); /* tile state addr, filled by kernel */
cl_u8(&vc4->bcl, tilew);
cl_u8(&vc4->bcl, tileh);
- cl_u8(&vc4->bcl,
- VC4_BIN_CONFIG_AUTO_INIT_TSDA |
- VC4_BIN_CONFIG_ALLOC_BLOCK_SIZE_32 |
- VC4_BIN_CONFIG_ALLOC_INIT_BLOCK_SIZE_32);
+ cl_u8(&vc4->bcl, 0); /* flags, filled by kernel. */
/* START_TILE_BINNING resets the statechange counters in the hardware,
* which are what is used when a primitive is binned to a tile to
@@ -129,6 +100,8 @@ vc4_start_draw(struct vc4_context *vc4)
vc4->needs_flush = true;
vc4->draw_call_queued = true;
+ vc4->draw_width = width;
+ vc4->draw_height = height;
}
static void
@@ -266,13 +239,17 @@ vc4_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info)
* definitions, up to but not including QUADS.
*/
if (info->indexed) {
- struct vc4_resource *rsc = vc4_resource(vc4->indexbuf.buffer);
uint32_t offset = vc4->indexbuf.offset;
uint32_t index_size = vc4->indexbuf.index_size;
- if (rsc->shadow_parent) {
- vc4_update_shadow_index_buffer(pctx, &vc4->indexbuf);
- offset = 0;
+ struct pipe_resource *prsc;
+ if (vc4->indexbuf.index_size == 4) {
+ prsc = vc4_get_shadow_index_buffer(pctx, &vc4->indexbuf,
+ info->count, &offset);
+ index_size = 2;
+ } else {
+ prsc = vc4->indexbuf.buffer;
}
+ struct vc4_resource *rsc = vc4_resource(prsc);
cl_start_reloc(&vc4->bcl, 1);
cl_u8(&vc4->bcl, VC4_PACKET_GL_INDEXED_PRIMITIVE);
@@ -284,6 +261,9 @@ vc4_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info)
cl_u32(&vc4->bcl, info->count);
cl_reloc(vc4, &vc4->bcl, rsc->bo, offset);
cl_u32(&vc4->bcl, max_index);
+
+ if (vc4->indexbuf.index_size == 4)
+ pipe_resource_reference(&prsc, NULL);
} else {
cl_u8(&vc4->bcl, VC4_PACKET_GL_ARRAY_PRIMITIVE);
cl_u8(&vc4->bcl, info->mode);
diff --git a/src/gallium/drivers/vc4/vc4_drm.h b/src/gallium/drivers/vc4/vc4_drm.h
index 062fd3b687e..5f1ee4fa125 100644
--- a/src/gallium/drivers/vc4/vc4_drm.h
+++ b/src/gallium/drivers/vc4/vc4_drm.h
@@ -38,6 +38,15 @@
#define DRM_IOCTL_VC4_CREATE_BO DRM_IOWR( DRM_COMMAND_BASE + DRM_VC4_CREATE_BO, struct drm_vc4_create_bo)
#define DRM_IOCTL_VC4_MMAP_BO DRM_IOWR( DRM_COMMAND_BASE + DRM_VC4_MMAP_BO, struct drm_vc4_mmap_bo)
+struct drm_vc4_submit_rcl_surface {
+ uint32_t hindex; /* Handle index, or ~0 if not present. */
+ uint32_t offset; /* Offset to start of buffer. */
+ /*
+ * Bits for either render config (color_ms_write) or load/store packet.
+ */
+ uint16_t bits;
+ uint16_t pad;
+};
/**
* struct drm_vc4_submit_cl - ioctl argument for submitting commands to the 3D
@@ -62,16 +71,6 @@ struct drm_vc4_submit_cl {
*/
uint64_t bin_cl;
- /* Pointer to the render command list.
- *
- * The render command list contains a set of packets to load the
- * current tile's state (reading from memory, or just clearing it)
- * into the GPU, then call into the tile allocation BO to run the
- * stored rendering for that tile, then store the tile's state back to
- * memory.
- */
- uint64_t render_cl;
-
/* Pointer to the shader records.
*
* Shader records are the structures read by the hardware that contain
@@ -102,8 +101,6 @@ struct drm_vc4_submit_cl {
/* Size in bytes of the binner command list. */
uint32_t bin_cl_size;
- /* Size in bytes of the render command list */
- uint32_t render_cl_size;
/* Size in bytes of the set of shader records. */
uint32_t shader_rec_size;
/* Number of shader records.
@@ -119,8 +116,25 @@ struct drm_vc4_submit_cl {
/* Number of BO handles passed in (size is that times 4). */
uint32_t bo_handle_count;
+ /* RCL setup: */
+ uint16_t width;
+ uint16_t height;
+ uint8_t min_x_tile;
+ uint8_t min_y_tile;
+ uint8_t max_x_tile;
+ uint8_t max_y_tile;
+ struct drm_vc4_submit_rcl_surface color_read;
+ struct drm_vc4_submit_rcl_surface color_ms_write;
+ struct drm_vc4_submit_rcl_surface zs_read;
+ struct drm_vc4_submit_rcl_surface zs_write;
+ uint32_t clear_color[2];
+ uint32_t clear_z;
+ uint8_t clear_s;
+
+ uint32_t pad:24;
+
+#define VC4_SUBMIT_CL_USE_CLEAR_COLOR (1 << 0)
uint32_t flags;
- uint32_t pad;
/* Returned value of the seqno of this render job (for the
* wait ioctl).
diff --git a/src/gallium/drivers/vc4/vc4_job.c b/src/gallium/drivers/vc4/vc4_job.c
index 76037162102..dcade15443a 100644
--- a/src/gallium/drivers/vc4/vc4_job.c
+++ b/src/gallium/drivers/vc4/vc4_job.c
@@ -33,7 +33,6 @@ void
vc4_job_init(struct vc4_context *vc4)
{
vc4_init_cl(vc4, &vc4->bcl);
- vc4_init_cl(vc4, &vc4->rcl);
vc4_init_cl(vc4, &vc4->shader_rec);
vc4_init_cl(vc4, &vc4->uniforms);
vc4_init_cl(vc4, &vc4->bo_handles);
@@ -50,7 +49,6 @@ vc4_job_reset(struct vc4_context *vc4)
vc4_bo_unreference(&referenced_bos[i]);
}
vc4_reset_cl(&vc4->bcl);
- vc4_reset_cl(&vc4->rcl);
vc4_reset_cl(&vc4->shader_rec);
vc4_reset_cl(&vc4->uniforms);
vc4_reset_cl(&vc4->bo_handles);
@@ -75,6 +73,70 @@ vc4_job_reset(struct vc4_context *vc4)
vc4->draw_max_y = 0;
}
+static void
+vc4_submit_setup_rcl_surface(struct vc4_context *vc4,
+ struct drm_vc4_submit_rcl_surface *submit_surf,
+ struct pipe_surface *psurf,
+ bool is_depth, bool is_write)
+{
+ struct vc4_surface *surf = vc4_surface(psurf);
+
+ if (!surf) {
+ submit_surf->hindex = ~0;
+ return;
+ }
+
+ struct vc4_resource *rsc = vc4_resource(psurf->texture);
+ submit_surf->hindex = vc4_gem_hindex(vc4, rsc->bo);
+ submit_surf->offset = surf->offset;
+
+ if (is_depth) {
+ submit_surf->bits =
+ VC4_SET_FIELD(VC4_LOADSTORE_TILE_BUFFER_ZS,
+ VC4_LOADSTORE_TILE_BUFFER_BUFFER);
+
+ } else {
+ submit_surf->bits =
+ VC4_SET_FIELD(VC4_LOADSTORE_TILE_BUFFER_COLOR,
+ VC4_LOADSTORE_TILE_BUFFER_BUFFER) |
+ VC4_SET_FIELD(vc4_rt_format_is_565(psurf->format) ?
+ VC4_LOADSTORE_TILE_BUFFER_BGR565 :
+ VC4_LOADSTORE_TILE_BUFFER_RGBA8888,
+ VC4_LOADSTORE_TILE_BUFFER_FORMAT);
+ }
+ submit_surf->bits |=
+ VC4_SET_FIELD(surf->tiling, VC4_LOADSTORE_TILE_BUFFER_TILING);
+
+ if (is_write)
+ rsc->writes++;
+}
+
+static void
+vc4_submit_setup_ms_rcl_surface(struct vc4_context *vc4,
+ struct drm_vc4_submit_rcl_surface *submit_surf,
+ struct pipe_surface *psurf)
+{
+ struct vc4_surface *surf = vc4_surface(psurf);
+
+ if (!surf) {
+ submit_surf->hindex = ~0;
+ return;
+ }
+
+ struct vc4_resource *rsc = vc4_resource(psurf->texture);
+ submit_surf->hindex = vc4_gem_hindex(vc4, rsc->bo);
+ submit_surf->offset = surf->offset;
+
+ submit_surf->bits =
+ VC4_SET_FIELD(vc4_rt_format_is_565(surf->base.format) ?
+ VC4_RENDER_CONFIG_FORMAT_BGR565 :
+ VC4_RENDER_CONFIG_FORMAT_RGBA8888,
+ VC4_RENDER_CONFIG_FORMAT) |
+ VC4_SET_FIELD(surf->tiling, VC4_RENDER_CONFIG_MEMORY_FORMAT);
+
+ rsc->writes++;
+}
+
/**
* Submits the job to the kernel and then reinitializes it.
*/
@@ -84,26 +146,49 @@ vc4_job_submit(struct vc4_context *vc4)
if (vc4_debug & VC4_DEBUG_CL) {
fprintf(stderr, "BCL:\n");
vc4_dump_cl(vc4->bcl.base, vc4->bcl.next - vc4->bcl.base, false);
- fprintf(stderr, "RCL:\n");
- vc4_dump_cl(vc4->rcl.base, vc4->rcl.next - vc4->rcl.base, true);
}
struct drm_vc4_submit_cl submit;
memset(&submit, 0, sizeof(submit));
+ cl_ensure_space(&vc4->bo_handles, 4 * sizeof(uint32_t));
+ cl_ensure_space(&vc4->bo_pointers, 4 * sizeof(struct vc4_bo *));
+
+ vc4_submit_setup_rcl_surface(vc4, &submit.color_read,
+ vc4->color_read, false, false);
+ vc4_submit_setup_ms_rcl_surface(vc4, &submit.color_ms_write,
+ vc4->color_write);
+ vc4_submit_setup_rcl_surface(vc4, &submit.zs_read,
+ vc4->zs_read, true, false);
+ vc4_submit_setup_rcl_surface(vc4, &submit.zs_write,
+ vc4->zs_write, true, true);
+
submit.bo_handles = (uintptr_t)vc4->bo_handles.base;
submit.bo_handle_count = (vc4->bo_handles.next -
vc4->bo_handles.base) / 4;
submit.bin_cl = (uintptr_t)vc4->bcl.base;
submit.bin_cl_size = vc4->bcl.next - vc4->bcl.base;
- submit.render_cl = (uintptr_t)vc4->rcl.base;
- submit.render_cl_size = vc4->rcl.next - vc4->rcl.base;
submit.shader_rec = (uintptr_t)vc4->shader_rec.base;
submit.shader_rec_size = vc4->shader_rec.next - vc4->shader_rec.base;
submit.shader_rec_count = vc4->shader_rec_count;
submit.uniforms = (uintptr_t)vc4->uniforms.base;
submit.uniforms_size = vc4->uniforms.next - vc4->uniforms.base;
+ assert(vc4->draw_min_x != ~0 && vc4->draw_min_y != ~0);
+ submit.min_x_tile = vc4->draw_min_x / 64;
+ submit.min_y_tile = vc4->draw_min_y / 64;
+ submit.max_x_tile = (vc4->draw_max_x - 1) / 64;
+ submit.max_y_tile = (vc4->draw_max_y - 1) / 64;
+ submit.width = vc4->draw_width;
+ submit.height = vc4->draw_height;
+ if (vc4->cleared) {
+ submit.flags |= VC4_SUBMIT_CL_USE_CLEAR_COLOR;
+ submit.clear_color[0] = vc4->clear_color[0];
+ submit.clear_color[1] = vc4->clear_color[1];
+ submit.clear_z = vc4->clear_depth;
+ submit.clear_s = vc4->clear_stencil;
+ }
+
if (!(vc4_debug & VC4_DEBUG_NORAST)) {
int ret;
diff --git a/src/gallium/drivers/vc4/vc4_opt_algebraic.c b/src/gallium/drivers/vc4/vc4_opt_algebraic.c
index e40e0f3b71b..7978ea1829f 100644
--- a/src/gallium/drivers/vc4/vc4_opt_algebraic.c
+++ b/src/gallium/drivers/vc4/vc4_opt_algebraic.c
@@ -136,11 +136,8 @@ bool
qir_opt_algebraic(struct vc4_compile *c)
{
bool progress = false;
- struct simple_node *node;
-
- foreach(node, &c->instructions) {
- struct qinst *inst = (struct qinst *)node;
+ list_for_each_entry(struct qinst, inst, &c->instructions, link) {
switch (inst->op) {
case QOP_SEL_X_Y_ZS:
case QOP_SEL_X_Y_ZC:
diff --git a/src/gallium/drivers/vc4/vc4_opt_constant_folding.c b/src/gallium/drivers/vc4/vc4_opt_constant_folding.c
index ac9be5c9642..15ec9f07260 100644
--- a/src/gallium/drivers/vc4/vc4_opt_constant_folding.c
+++ b/src/gallium/drivers/vc4/vc4_opt_constant_folding.c
@@ -98,10 +98,8 @@ bool
qir_opt_constant_folding(struct vc4_compile *c)
{
bool progress = false;
- struct simple_node *node;
- foreach(node, &c->instructions) {
- struct qinst *inst = (struct qinst *)node;
+ list_for_each_entry(struct qinst, inst, &c->instructions, link) {
if (constant_fold(c, inst))
progress = true;
}
diff --git a/src/gallium/drivers/vc4/vc4_opt_copy_propagation.c b/src/gallium/drivers/vc4/vc4_opt_copy_propagation.c
index 5189a401248..d6d2fbf257f 100644
--- a/src/gallium/drivers/vc4/vc4_opt_copy_propagation.c
+++ b/src/gallium/drivers/vc4/vc4_opt_copy_propagation.c
@@ -38,13 +38,10 @@ bool
qir_opt_copy_propagation(struct vc4_compile *c)
{
bool progress = false;
- struct simple_node *node;
bool debug = false;
struct qreg *movs = calloc(c->num_temps, sizeof(struct qreg));
- foreach(node, &c->instructions) {
- struct qinst *inst = (struct qinst *)node;
-
+ list_for_each_entry(struct qinst, inst, &c->instructions, link) {
for (int i = 0; i < qir_get_op_nsrc(inst->op); i++) {
int index = inst->src[i].index;
if (inst->src[i].file == QFILE_TEMP &&
diff --git a/src/gallium/drivers/vc4/vc4_opt_cse.c b/src/gallium/drivers/vc4/vc4_opt_cse.c
index 71794f7d1cf..92c8260eb59 100644
--- a/src/gallium/drivers/vc4/vc4_opt_cse.c
+++ b/src/gallium/drivers/vc4/vc4_opt_cse.c
@@ -121,7 +121,6 @@ bool
qir_opt_cse(struct vc4_compile *c)
{
bool progress = false;
- struct simple_node *node, *t;
uint32_t sf_count = 0, r4_count = 0;
struct hash_table *ht = _mesa_hash_table_create(NULL, NULL,
@@ -129,9 +128,7 @@ qir_opt_cse(struct vc4_compile *c)
if (!ht)
return false;
- foreach_s(node, t, &c->instructions) {
- struct qinst *inst = (struct qinst *)node;
-
+ list_for_each_entry(struct qinst, inst, &c->instructions, link) {
if (qir_has_side_effects(c, inst) ||
qir_has_side_effect_reads(c, inst)) {
continue;
diff --git a/src/gallium/drivers/vc4/vc4_opt_dead_code.c b/src/gallium/drivers/vc4/vc4_opt_dead_code.c
index e4ead46c9c2..ffd42422de8 100644
--- a/src/gallium/drivers/vc4/vc4_opt_dead_code.c
+++ b/src/gallium/drivers/vc4/vc4_opt_dead_code.c
@@ -86,7 +86,7 @@ qir_opt_dead_code(struct vc4_compile *c)
/* Whether we're eliminating texture setup currently. */
bool dce_tex = false;
- struct simple_node *node, *t;
+ struct list_head *node, *t;
for (node = c->instructions.prev, t = node->prev;
&c->instructions != node;
node = t, t = t->prev) {
diff --git a/src/gallium/drivers/vc4/vc4_opt_small_immediates.c b/src/gallium/drivers/vc4/vc4_opt_small_immediates.c
index a329ac69d11..d6e98f0aebf 100644
--- a/src/gallium/drivers/vc4/vc4_opt_small_immediates.c
+++ b/src/gallium/drivers/vc4/vc4_opt_small_immediates.c
@@ -37,11 +37,8 @@ bool
qir_opt_small_immediates(struct vc4_compile *c)
{
bool progress = false;
- struct simple_node *node;
-
- foreach(node, &c->instructions) {
- struct qinst *inst = (struct qinst *)node;
+ list_for_each_entry(struct qinst, inst, &c->instructions, link) {
/* The small immediate value sits in the raddr B field, so we
* can't have 2 small immediates in one instruction (unless
* they're the same value, but that should be optimized away
diff --git a/src/gallium/drivers/vc4/vc4_opt_vpm_writes.c b/src/gallium/drivers/vc4/vc4_opt_vpm_writes.c
index e9711f222cd..e04f02859d5 100644
--- a/src/gallium/drivers/vc4/vc4_opt_vpm_writes.c
+++ b/src/gallium/drivers/vc4/vc4_opt_vpm_writes.c
@@ -37,15 +37,12 @@ qir_opt_vpm_writes(struct vc4_compile *c)
return false;
bool progress = false;
- struct simple_node *node;
struct qinst *vpm_writes[64] = { 0 };
uint32_t use_count[c->num_temps];
uint32_t vpm_write_count = 0;
memset(&use_count, 0, sizeof(use_count));
- foreach(node, &c->instructions) {
- struct qinst *inst = (struct qinst *)node;
-
+ list_for_each_entry(struct qinst, inst, &c->instructions, link) {
switch (inst->dst.file) {
case QFILE_VPM:
vpm_writes[vpm_write_count++] = inst;
@@ -102,7 +99,8 @@ qir_opt_vpm_writes(struct vc4_compile *c)
* to maintain the order of the VPM writes.
*/
assert(!vpm_writes[i]->sf);
- move_to_tail(&vpm_writes[i]->link, &inst->link);
+ list_del(&inst->link);
+ list_addtail(&inst->link, &vpm_writes[i]->link);
qir_remove_instruction(c, vpm_writes[i]);
c->defs[inst->dst.index] = NULL;
diff --git a/src/gallium/drivers/vc4/vc4_program.c b/src/gallium/drivers/vc4/vc4_program.c
index bf156f9b42d..ba47c51d9bd 100644
--- a/src/gallium/drivers/vc4/vc4_program.c
+++ b/src/gallium/drivers/vc4/vc4_program.c
@@ -147,6 +147,9 @@ indirect_uniform_load(struct vc4_compile *c,
indirect_offset = qir_ADD(c, indirect_offset,
qir_uniform_ui(c, (range->dst_offset +
offset)));
+
+ /* Clamp to [0, array size). Note that MIN/MAX are signed. */
+ indirect_offset = qir_MAX(c, indirect_offset, qir_uniform_ui(c, 0));
indirect_offset = qir_MIN(c, indirect_offset,
qir_uniform_ui(c, (range->dst_offset +
range->size - 4)));
@@ -322,7 +325,9 @@ ntq_emit_tex(struct vc4_compile *c, nir_tex_instr *instr)
switch (instr->src[i].src_type) {
case nir_tex_src_coord:
s = ntq_get_src(c, instr->src[i].src, 0);
- if (instr->sampler_dim != GLSL_SAMPLER_DIM_1D)
+ if (instr->sampler_dim == GLSL_SAMPLER_DIM_1D)
+ t = qir_uniform_f(c, 0.5);
+ else
t = ntq_get_src(c, instr->src[i].src, 1);
if (instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE)
r = ntq_get_src(c, instr->src[i].src, 2);
@@ -1849,8 +1854,6 @@ ntq_emit_intrinsic(struct vc4_compile *c, nir_intrinsic_instr *instr)
switch (instr->intrinsic) {
case nir_intrinsic_load_uniform:
- assert(instr->const_index[1] == 1);
-
for (int i = 0; i < instr->num_components; i++) {
dest[i] = qir_uniform(c, QUNIFORM_UNIFORM,
instr->const_index[0] * 4 + i);
@@ -1858,8 +1861,6 @@ ntq_emit_intrinsic(struct vc4_compile *c, nir_intrinsic_instr *instr)
break;
case nir_intrinsic_load_uniform_indirect:
- assert(instr->const_index[1] == 1);
-
for (int i = 0; i < instr->num_components; i++) {
dest[i] = indirect_uniform_load(c,
ntq_get_src(c, instr->src[0], 0),
@@ -1870,8 +1871,6 @@ ntq_emit_intrinsic(struct vc4_compile *c, nir_intrinsic_instr *instr)
break;
case nir_intrinsic_load_input:
- assert(instr->const_index[1] == 1);
-
for (int i = 0; i < instr->num_components; i++)
dest[i] = c->inputs[instr->const_index[0] * 4 + i];
@@ -2215,11 +2214,9 @@ vc4_get_compiled_shader(struct vc4_context *vc4, enum qstage stage,
shader->program_id = vc4->next_compiled_program_id++;
if (stage == QSTAGE_FRAG) {
bool input_live[c->num_input_semantics];
- struct simple_node *node;
memset(input_live, 0, sizeof(input_live));
- foreach(node, &c->instructions) {
- struct qinst *inst = (struct qinst *)node;
+ list_for_each_entry(struct qinst, inst, &c->instructions, link) {
for (int i = 0; i < qir_get_op_nsrc(inst->op); i++) {
if (inst->src[i].file == QFILE_VARY)
input_live[inst->src[i].index] = true;
diff --git a/src/gallium/drivers/vc4/vc4_qir.c b/src/gallium/drivers/vc4/vc4_qir.c
index e2e6a5cdf16..1c96ef4795f 100644
--- a/src/gallium/drivers/vc4/vc4_qir.c
+++ b/src/gallium/drivers/vc4/vc4_qir.c
@@ -22,7 +22,6 @@
*/
#include "util/u_memory.h"
-#include "util/simple_list.h"
#include "util/ralloc.h"
#include "vc4_qir.h"
@@ -301,10 +300,7 @@ qir_dump_inst(struct vc4_compile *c, struct qinst *inst)
void
qir_dump(struct vc4_compile *c)
{
- struct simple_node *node;
-
- foreach(node, &c->instructions) {
- struct qinst *inst = (struct qinst *)node;
+ list_for_each_entry(struct qinst, inst, &c->instructions, link) {
qir_dump_inst(c, inst);
fprintf(stderr, "\n");
}
@@ -370,7 +366,7 @@ qir_emit(struct vc4_compile *c, struct qinst *inst)
if (inst->dst.file == QFILE_TEMP)
c->defs[inst->dst.index] = inst;
- insert_at_tail(&c->instructions, &inst->link);
+ list_addtail(&inst->link, &c->instructions);
}
bool
@@ -384,7 +380,7 @@ qir_compile_init(void)
{
struct vc4_compile *c = rzalloc(NULL, struct vc4_compile);
- make_empty_list(&c->instructions);
+ list_inithead(&c->instructions);
c->output_position_index = -1;
c->output_clipvertex_index = -1;
@@ -403,7 +399,7 @@ qir_remove_instruction(struct vc4_compile *c, struct qinst *qinst)
if (qinst->dst.file == QFILE_TEMP)
c->defs[qinst->dst.index] = NULL;
- remove_from_list(&qinst->link);
+ list_del(&qinst->link);
free(qinst->src);
free(qinst);
}
@@ -420,9 +416,9 @@ qir_follow_movs(struct vc4_compile *c, struct qreg reg)
void
qir_compile_destroy(struct vc4_compile *c)
{
- while (!is_empty_list(&c->instructions)) {
+ while (!list_empty(&c->instructions)) {
struct qinst *qinst =
- (struct qinst *)first_elem(&c->instructions);
+ (struct qinst *)c->instructions.next;
qir_remove_instruction(c, qinst);
}
@@ -478,7 +474,7 @@ void
qir_SF(struct vc4_compile *c, struct qreg src)
{
struct qinst *last_inst = NULL;
- if (!is_empty_list(&c->instructions))
+ if (!list_empty(&c->instructions))
last_inst = (struct qinst *)c->instructions.prev;
if (!last_inst ||
diff --git a/src/gallium/drivers/vc4/vc4_qir.h b/src/gallium/drivers/vc4/vc4_qir.h
index adc2c89d2c1..732cfd0b306 100644
--- a/src/gallium/drivers/vc4/vc4_qir.h
+++ b/src/gallium/drivers/vc4/vc4_qir.h
@@ -33,7 +33,7 @@
#include "util/macros.h"
#include "glsl/nir/nir.h"
-#include "util/simple_list.h"
+#include "util/list.h"
#include "util/u_math.h"
enum qfile {
@@ -162,12 +162,12 @@ enum qop {
};
struct queued_qpu_inst {
- struct simple_node link;
+ struct list_head link;
uint64_t inst;
};
struct qinst {
- struct simple_node link;
+ struct list_head link;
enum qop op;
struct qreg dst;
@@ -356,10 +356,10 @@ struct vc4_compile {
struct qreg undef;
enum qstage stage;
uint32_t num_temps;
- struct simple_node instructions;
+ struct list_head instructions;
uint32_t immediates[1024];
- struct simple_node qpu_inst_list;
+ struct list_head qpu_inst_list;
uint64_t *qpu_insts;
uint32_t qpu_inst_count;
uint32_t qpu_inst_size;
diff --git a/src/gallium/drivers/vc4/vc4_qir_lower_uniforms.c b/src/gallium/drivers/vc4/vc4_qir_lower_uniforms.c
index 63f5eb22858..910c89dca79 100644
--- a/src/gallium/drivers/vc4/vc4_qir_lower_uniforms.c
+++ b/src/gallium/drivers/vc4/vc4_qir_lower_uniforms.c
@@ -88,7 +88,6 @@ is_lowerable_uniform(struct qinst *inst, int i)
void
qir_lower_uniforms(struct vc4_compile *c)
{
- struct simple_node *node;
struct hash_table *ht =
_mesa_hash_table_create(c, index_hash, index_compare);
@@ -96,8 +95,7 @@ qir_lower_uniforms(struct vc4_compile *c)
* than one uniform referenced, and add those uniform values to the
* ht.
*/
- foreach(node, &c->instructions) {
- struct qinst *inst = (struct qinst *)node;
+ list_for_each_entry(struct qinst, inst, &c->instructions, link) {
uint32_t nsrc = qir_get_op_nsrc(inst->op);
uint32_t count = 0;
@@ -137,10 +135,9 @@ qir_lower_uniforms(struct vc4_compile *c)
struct qreg temp = qir_get_temp(c);
struct qreg unif = { QFILE_UNIF, max_index };
struct qinst *mov = qir_inst(QOP_MOV, temp, unif, c->undef);
- insert_at_head(&c->instructions, &mov->link);
+ list_add(&mov->link, &c->instructions);
c->defs[temp.index] = mov;
- foreach(node, &c->instructions) {
- struct qinst *inst = (struct qinst *)node;
+ list_for_each_entry(struct qinst, inst, &c->instructions, link) {
uint32_t nsrc = qir_get_op_nsrc(inst->op);
uint32_t count = 0;
diff --git a/src/gallium/drivers/vc4/vc4_qpu_emit.c b/src/gallium/drivers/vc4/vc4_qpu_emit.c
index eeb8d3a21ff..99afe4b8798 100644
--- a/src/gallium/drivers/vc4/vc4_qpu_emit.c
+++ b/src/gallium/drivers/vc4/vc4_qpu_emit.c
@@ -47,14 +47,14 @@ queue(struct vc4_compile *c, uint64_t inst)
{
struct queued_qpu_inst *q = rzalloc(c, struct queued_qpu_inst);
q->inst = inst;
- insert_at_tail(&c->qpu_inst_list, &q->link);
+ list_addtail(&q->link, &c->qpu_inst_list);
}
static uint64_t *
last_inst(struct vc4_compile *c)
{
struct queued_qpu_inst *q =
- (struct queued_qpu_inst *)last_elem(&c->qpu_inst_list);
+ (struct queued_qpu_inst *)c->qpu_inst_list.prev;
return &q->inst;
}
@@ -117,11 +117,11 @@ fixup_raddr_conflict(struct vc4_compile *c,
return;
if (mux0 == QPU_MUX_A) {
- queue(c, qpu_a_MOV(qpu_rb(31), *src1));
- *src1 = qpu_rb(31);
+ queue(c, qpu_a_MOV(qpu_rb(31), *src0));
+ *src0 = qpu_rb(31);
} else {
- queue(c, qpu_a_MOV(qpu_ra(31), *src1));
- *src1 = qpu_ra(31);
+ queue(c, qpu_a_MOV(qpu_ra(31), *src0));
+ *src0 = qpu_ra(31);
}
}
@@ -144,7 +144,7 @@ vc4_generate_code(struct vc4_context *vc4, struct vc4_compile *c)
QPU_UNPACK_16B_TO_F32,
};
- make_empty_list(&c->qpu_inst_list);
+ list_inithead(&c->qpu_inst_list);
switch (c->stage) {
case QSTAGE_VERT:
@@ -170,10 +170,7 @@ vc4_generate_code(struct vc4_context *vc4, struct vc4_compile *c)
break;
}
- struct simple_node *node;
- foreach(node, &c->instructions) {
- struct qinst *qinst = (struct qinst *)node;
-
+ list_for_each_entry(struct qinst, qinst, &c->instructions, link) {
#if 0
fprintf(stderr, "translating qinst to qpu: ");
qir_dump_inst(qinst);
diff --git a/src/gallium/drivers/vc4/vc4_qpu_schedule.c b/src/gallium/drivers/vc4/vc4_qpu_schedule.c
index f523b4c6fb0..19cbf7bb98c 100644
--- a/src/gallium/drivers/vc4/vc4_qpu_schedule.c
+++ b/src/gallium/drivers/vc4/vc4_qpu_schedule.c
@@ -43,7 +43,7 @@ static bool debug;
struct schedule_node_child;
struct schedule_node {
- struct simple_node link;
+ struct list_head link;
struct queued_qpu_inst *inst;
struct schedule_node_child *children;
uint32_t child_count;
@@ -400,22 +400,21 @@ calculate_deps(struct schedule_state *state, struct schedule_node *n)
}
static void
-calculate_forward_deps(struct vc4_compile *c, struct simple_node *schedule_list)
+calculate_forward_deps(struct vc4_compile *c, struct list_head *schedule_list)
{
- struct simple_node *node;
struct schedule_state state;
memset(&state, 0, sizeof(state));
state.dir = F;
- foreach(node, schedule_list)
- calculate_deps(&state, (struct schedule_node *)node);
+ list_for_each_entry(struct schedule_node, node, schedule_list, link)
+ calculate_deps(&state, node);
}
static void
-calculate_reverse_deps(struct vc4_compile *c, struct simple_node *schedule_list)
+calculate_reverse_deps(struct vc4_compile *c, struct list_head *schedule_list)
{
- struct simple_node *node;
+ struct list_head *node;
struct schedule_state state;
memset(&state, 0, sizeof(state));
@@ -507,15 +506,13 @@ get_instruction_priority(uint64_t inst)
static struct schedule_node *
choose_instruction_to_schedule(struct choose_scoreboard *scoreboard,
- struct simple_node *schedule_list,
+ struct list_head *schedule_list,
struct schedule_node *prev_inst)
{
struct schedule_node *chosen = NULL;
- struct simple_node *node;
int chosen_prio = 0;
- foreach(node, schedule_list) {
- struct schedule_node *n = (struct schedule_node *)node;
+ list_for_each_entry(struct schedule_node, n, schedule_list, link) {
uint64_t inst = n->inst->inst;
/* "An instruction must not read from a location in physical
@@ -596,14 +593,11 @@ update_scoreboard_for_chosen(struct choose_scoreboard *scoreboard,
}
static void
-dump_state(struct simple_node *schedule_list)
+dump_state(struct list_head *schedule_list)
{
- struct simple_node *node;
-
uint32_t i = 0;
- foreach(node, schedule_list) {
- struct schedule_node *n = (struct schedule_node *)node;
+ list_for_each_entry(struct schedule_node, n, schedule_list, link) {
fprintf(stderr, "%3d: ", i++);
vc4_qpu_disasm(&n->inst->inst, 1);
fprintf(stderr, "\n");
@@ -639,7 +633,7 @@ compute_delay(struct schedule_node *n)
}
static void
-mark_instruction_scheduled(struct simple_node *schedule_list,
+mark_instruction_scheduled(struct list_head *schedule_list,
struct schedule_node *node,
bool war_only)
{
@@ -658,16 +652,15 @@ mark_instruction_scheduled(struct simple_node *schedule_list,
child->parent_count--;
if (child->parent_count == 0)
- insert_at_head(schedule_list, &child->link);
+ list_add(&child->link, schedule_list);
node->children[i].node = NULL;
}
}
static void
-schedule_instructions(struct vc4_compile *c, struct simple_node *schedule_list)
+schedule_instructions(struct vc4_compile *c, struct list_head *schedule_list)
{
- struct simple_node *node, *t;
struct choose_scoreboard scoreboard;
/* We reorder the uniforms as we schedule instructions, so save the
@@ -693,14 +686,12 @@ schedule_instructions(struct vc4_compile *c, struct simple_node *schedule_list)
}
/* Remove non-DAG heads from the list. */
- foreach_s(node, t, schedule_list) {
- struct schedule_node *n = (struct schedule_node *)node;
-
+ list_for_each_entry_safe(struct schedule_node, n, schedule_list, link) {
if (n->parent_count != 0)
- remove_from_list(&n->link);
+ list_del(&n->link);
}
- while (!is_empty_list(schedule_list)) {
+ while (!list_empty(schedule_list)) {
struct schedule_node *chosen =
choose_instruction_to_schedule(&scoreboard,
schedule_list,
@@ -724,7 +715,7 @@ schedule_instructions(struct vc4_compile *c, struct simple_node *schedule_list)
* find an instruction to pair with it.
*/
if (chosen) {
- remove_from_list(&chosen->link);
+ list_del(&chosen->link);
mark_instruction_scheduled(schedule_list, chosen, true);
if (chosen->uniform != -1) {
c->uniform_data[next_uniform] =
@@ -738,7 +729,7 @@ schedule_instructions(struct vc4_compile *c, struct simple_node *schedule_list)
schedule_list,
chosen);
if (merge) {
- remove_from_list(&merge->link);
+ list_del(&merge->link);
inst = qpu_merge_inst(inst, merge->inst->inst);
assert(inst != 0);
if (merge->uniform != -1) {
@@ -813,16 +804,14 @@ void
qpu_schedule_instructions(struct vc4_compile *c)
{
void *mem_ctx = ralloc_context(NULL);
- struct simple_node schedule_list;
- struct simple_node *node;
+ struct list_head schedule_list;
- make_empty_list(&schedule_list);
+ list_inithead(&schedule_list);
if (debug) {
fprintf(stderr, "Pre-schedule instructions\n");
- foreach(node, &c->qpu_inst_list) {
- struct queued_qpu_inst *q =
- (struct queued_qpu_inst *)node;
+ list_for_each_entry(struct queued_qpu_inst, q,
+ &c->qpu_inst_list, link) {
vc4_qpu_disasm(&q->inst, 1);
fprintf(stderr, "\n");
}
@@ -831,7 +820,7 @@ qpu_schedule_instructions(struct vc4_compile *c)
/* Wrap each instruction in a scheduler structure. */
uint32_t next_uniform = 0;
- while (!is_empty_list(&c->qpu_inst_list)) {
+ while (!list_empty(&c->qpu_inst_list)) {
struct queued_qpu_inst *inst =
(struct queued_qpu_inst *)c->qpu_inst_list.next;
struct schedule_node *n = rzalloc(mem_ctx, struct schedule_node);
@@ -844,16 +833,15 @@ qpu_schedule_instructions(struct vc4_compile *c)
} else {
n->uniform = -1;
}
- remove_from_list(&inst->link);
- insert_at_tail(&schedule_list, &n->link);
+ list_del(&inst->link);
+ list_addtail(&n->link, &schedule_list);
}
assert(next_uniform == c->num_uniforms);
calculate_forward_deps(c, &schedule_list);
calculate_reverse_deps(c, &schedule_list);
- foreach(node, &schedule_list) {
- struct schedule_node *n = (struct schedule_node *)node;
+ list_for_each_entry(struct schedule_node, n, &schedule_list, link) {
compute_delay(n);
}
diff --git a/src/gallium/drivers/vc4/vc4_query.c b/src/gallium/drivers/vc4/vc4_query.c
index 1792becb08f..270832eae3a 100644
--- a/src/gallium/drivers/vc4/vc4_query.c
+++ b/src/gallium/drivers/vc4/vc4_query.c
@@ -50,9 +50,10 @@ vc4_destroy_query(struct pipe_context *ctx, struct pipe_query *query)
free(query);
}
-static void
+static boolean
vc4_begin_query(struct pipe_context *ctx, struct pipe_query *query)
{
+ return true;
}
static void
diff --git a/src/gallium/drivers/vc4/vc4_register_allocate.c b/src/gallium/drivers/vc4/vc4_register_allocate.c
index f40547b8154..3b0b890b66a 100644
--- a/src/gallium/drivers/vc4/vc4_register_allocate.c
+++ b/src/gallium/drivers/vc4/vc4_register_allocate.c
@@ -161,7 +161,6 @@ node_to_temp_priority(const void *in_a, const void *in_b)
struct qpu_reg *
vc4_register_allocate(struct vc4_context *vc4, struct vc4_compile *c)
{
- struct simple_node *node;
struct node_to_temp_map map[c->num_temps];
uint32_t temp_to_node[c->num_temps];
uint32_t def[c->num_temps];
@@ -189,9 +188,7 @@ vc4_register_allocate(struct vc4_context *vc4, struct vc4_compile *c)
/* Compute the live ranges so we can figure out interference.
*/
uint32_t ip = 0;
- foreach(node, &c->instructions) {
- struct qinst *inst = (struct qinst *)node;
-
+ list_for_each_entry(struct qinst, inst, &c->instructions, link) {
if (inst->dst.file == QFILE_TEMP) {
def[inst->dst.index] = ip;
use[inst->dst.index] = ip;
@@ -227,9 +224,7 @@ vc4_register_allocate(struct vc4_context *vc4, struct vc4_compile *c)
}
/* Figure out our register classes and preallocated registers*/
- foreach(node, &c->instructions) {
- struct qinst *inst = (struct qinst *)node;
-
+ list_for_each_entry(struct qinst, inst, &c->instructions, link) {
switch (inst->op) {
case QOP_FRAG_Z:
ra_set_node_reg(g, temp_to_node[inst->dst.index],
diff --git a/src/gallium/drivers/vc4/vc4_reorder_uniforms.c b/src/gallium/drivers/vc4/vc4_reorder_uniforms.c
index 109724369d5..7f11fba2340 100644
--- a/src/gallium/drivers/vc4/vc4_reorder_uniforms.c
+++ b/src/gallium/drivers/vc4/vc4_reorder_uniforms.c
@@ -42,10 +42,8 @@ qir_reorder_uniforms(struct vc4_compile *c)
uint32_t *uniform_index = NULL;
uint32_t uniform_index_size = 0;
uint32_t next_uniform = 0;
- struct simple_node *node;
- foreach(node, &c->instructions) {
- struct qinst *inst = (struct qinst *)node;
+ list_for_each_entry(struct qinst, inst, &c->instructions, link) {
for (int i = 0; i < qir_get_op_nsrc(inst->op); i++) {
if (inst->src[i].file != QFILE_UNIF)
continue;
diff --git a/src/gallium/drivers/vc4/vc4_resource.c b/src/gallium/drivers/vc4/vc4_resource.c
index 3f180d5845d..cab76406055 100644
--- a/src/gallium/drivers/vc4/vc4_resource.c
+++ b/src/gallium/drivers/vc4/vc4_resource.c
@@ -26,6 +26,7 @@
#include "util/u_format.h"
#include "util/u_inlines.h"
#include "util/u_surface.h"
+#include "util/u_upload_mgr.h"
#include "vc4_screen.h"
#include "vc4_context.h"
@@ -161,6 +162,8 @@ vc4_resource_transfer_map(struct pipe_context *pctx,
/* We need to align the box to utile boundaries, since that's
* what load/store operate on.
*/
+ uint32_t orig_width = ptrans->box.width;
+ uint32_t orig_height = ptrans->box.height;
uint32_t box_start_x = ptrans->box.x & (utile_w - 1);
uint32_t box_start_y = ptrans->box.y & (utile_h - 1);
ptrans->box.width += box_start_x;
@@ -174,7 +177,9 @@ vc4_resource_transfer_map(struct pipe_context *pctx,
ptrans->layer_stride = ptrans->stride;
trans->map = malloc(ptrans->stride * ptrans->box.height);
- if (usage & PIPE_TRANSFER_READ) {
+ if (usage & PIPE_TRANSFER_READ ||
+ ptrans->box.width != orig_width ||
+ ptrans->box.height != orig_height) {
vc4_load_tiled_image(trans->map, ptrans->stride,
buf + slice->offset +
box->z * rsc->cube_map_stride,
@@ -638,41 +643,37 @@ vc4_update_shadow_baselevel_texture(struct pipe_context *pctx,
* was in user memory, it would be nice to not have uploaded it to a VBO
* before translating.
*/
-void
-vc4_update_shadow_index_buffer(struct pipe_context *pctx,
- const struct pipe_index_buffer *ib)
+struct pipe_resource *
+vc4_get_shadow_index_buffer(struct pipe_context *pctx,
+ const struct pipe_index_buffer *ib,
+ uint32_t count,
+ uint32_t *shadow_offset)
{
- struct vc4_resource *shadow = vc4_resource(ib->buffer);
- struct vc4_resource *orig = vc4_resource(shadow->shadow_parent);
- uint32_t count = shadow->base.b.width0 / 2;
-
- if (shadow->writes == orig->writes)
- return;
-
+ struct vc4_context *vc4 = vc4_context(pctx);
+ struct vc4_resource *orig = vc4_resource(ib->buffer);
perf_debug("Fallback conversion for %d uint indices\n", count);
+ void *data;
+ struct pipe_resource *shadow_rsc = NULL;
+ u_upload_alloc(vc4->uploader, 0, count * 2,
+ shadow_offset, &shadow_rsc, &data);
+ uint16_t *dst = data;
+
struct pipe_transfer *src_transfer;
uint32_t *src = pipe_buffer_map_range(pctx, &orig->base.b,
ib->offset,
count * 4,
PIPE_TRANSFER_READ, &src_transfer);
- struct pipe_transfer *dst_transfer;
- uint16_t *dst = pipe_buffer_map_range(pctx, &shadow->base.b,
- 0,
- count * 2,
- PIPE_TRANSFER_WRITE, &dst_transfer);
-
for (int i = 0; i < count; i++) {
uint32_t src_index = src[i];
assert(src_index <= 0xffff);
dst[i] = src_index;
}
- pctx->transfer_unmap(pctx, dst_transfer);
pctx->transfer_unmap(pctx, src_transfer);
- shadow->writes = orig->writes;
+ return shadow_rsc;
}
void
diff --git a/src/gallium/drivers/vc4/vc4_resource.h b/src/gallium/drivers/vc4/vc4_resource.h
index 2ed848bc7b9..ab8f5d3cd55 100644
--- a/src/gallium/drivers/vc4/vc4_resource.h
+++ b/src/gallium/drivers/vc4/vc4_resource.h
@@ -26,7 +26,7 @@
#define VC4_RESOURCE_H
#include "vc4_screen.h"
-#include "vc4_packet.h"
+#include "kernel/vc4_packet.h"
#include "util/u_transfer.h"
struct vc4_transfer {
@@ -45,7 +45,6 @@ struct vc4_resource_slice {
struct vc4_surface {
struct pipe_surface base;
uint32_t offset;
- uint32_t stride;
uint8_t tiling;
};
@@ -107,8 +106,10 @@ struct pipe_resource *vc4_resource_create(struct pipe_screen *pscreen,
const struct pipe_resource *tmpl);
void vc4_update_shadow_baselevel_texture(struct pipe_context *pctx,
struct pipe_sampler_view *view);
-void vc4_update_shadow_index_buffer(struct pipe_context *pctx,
- const struct pipe_index_buffer *ib);
+struct pipe_resource *vc4_get_shadow_index_buffer(struct pipe_context *pctx,
+ const struct pipe_index_buffer *ib,
+ uint32_t count,
+ uint32_t *offset);
void vc4_dump_surface(struct pipe_surface *psurf);
#endif /* VC4_RESOURCE_H */
diff --git a/src/gallium/drivers/vc4/vc4_screen.c b/src/gallium/drivers/vc4/vc4_screen.c
index 84aae918326..f63bead0fbb 100644
--- a/src/gallium/drivers/vc4/vc4_screen.c
+++ b/src/gallium/drivers/vc4/vc4_screen.c
@@ -175,6 +175,7 @@ vc4_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
case PIPE_CAP_POLYGON_OFFSET_CLAMP:
case PIPE_CAP_MULTISAMPLE_Z_RESOLVE:
case PIPE_CAP_RESOURCE_FROM_USER_MEMORY:
+ case PIPE_CAP_DEVICE_RESET_STATUS_QUERY:
return 0;
/* Stream output. */
@@ -322,6 +323,7 @@ vc4_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader,
case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED:
case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED:
case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
+ case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE:
return 0;
case PIPE_SHADER_CAP_MAX_TEXTURE_SAMPLERS:
case PIPE_SHADER_CAP_MAX_SAMPLER_VIEWS:
@@ -458,7 +460,7 @@ vc4_screen_create(int fd)
pscreen->is_format_supported = vc4_screen_is_format_supported;
screen->fd = fd;
- make_empty_list(&screen->bo_cache.time_list);
+ list_inithead(&screen->bo_cache.time_list);
vc4_fence_init(screen);
diff --git a/src/gallium/drivers/vc4/vc4_screen.h b/src/gallium/drivers/vc4/vc4_screen.h
index 60626285d4d..5992e371093 100644
--- a/src/gallium/drivers/vc4/vc4_screen.h
+++ b/src/gallium/drivers/vc4/vc4_screen.h
@@ -27,7 +27,7 @@
#include "pipe/p_screen.h"
#include "os/os_thread.h"
#include "state_tracker/drm_driver.h"
-#include "vc4_qir.h"
+#include "util/list.h"
struct vc4_bo;
@@ -61,13 +61,19 @@ struct vc4_screen {
struct vc4_bo_cache {
/** List of struct vc4_bo freed, by age. */
- struct simple_node time_list;
+ struct list_head time_list;
/** List of struct vc4_bo freed, per size, by age. */
- struct simple_node *size_list;
+ struct list_head *size_list;
uint32_t size_list_size;
pipe_mutex lock;
+
+ uint32_t bo_size;
+ uint32_t bo_count;
} bo_cache;
+
+ uint32_t bo_size;
+ uint32_t bo_count;
};
static inline struct vc4_screen *
diff --git a/src/gallium/drivers/vc4/vc4_simulator.c b/src/gallium/drivers/vc4/vc4_simulator.c
index 2f72e722fc5..b58013dd2ee 100644
--- a/src/gallium/drivers/vc4/vc4_simulator.c
+++ b/src/gallium/drivers/vc4/vc4_simulator.c
@@ -39,11 +39,13 @@ vc4_wrap_bo_with_cma(struct drm_device *dev, struct vc4_bo *bo)
{
struct vc4_context *vc4 = dev->vc4;
struct vc4_screen *screen = vc4->screen;
- struct drm_gem_cma_object *obj = CALLOC_STRUCT(drm_gem_cma_object);
+ struct drm_vc4_bo *drm_bo = CALLOC_STRUCT(drm_vc4_bo);
+ struct drm_gem_cma_object *obj = &drm_bo->base;
uint32_t size = align(bo->size, 4096);
- obj->bo = bo;
+ drm_bo->bo = bo;
obj->base.size = size;
+ obj->base.dev = dev;
obj->vaddr = screen->simulator_mem_base + dev->simulator_mem_next;
obj->paddr = simpenrose_hw_addr(obj->vaddr);
@@ -94,7 +96,7 @@ vc4_simulator_unpin_bos(struct vc4_exec_info *exec)
{
for (int i = 0; i < exec->bo_count; i++) {
struct drm_gem_cma_object *obj = exec->bo[i].bo;
- struct vc4_bo *bo = obj->bo;
+ struct vc4_bo *bo = to_vc4_bo(&obj->base)->bo;
memcpy(bo->map, obj->vaddr, bo->size);
@@ -124,6 +126,7 @@ vc4_simulator_flush(struct vc4_context *vc4, struct drm_vc4_submit_cl *args)
int ret;
memset(&exec, 0, sizeof(exec));
+ list_inithead(&exec.unref_list);
if (ctex && ctex->bo->simulator_winsys_map) {
#if 0
@@ -176,8 +179,12 @@ vc4_simulator_flush(struct vc4_context *vc4, struct drm_vc4_submit_cl *args)
if (ret)
return ret;
- vc4_bo_unreference(&exec.exec_bo->bo);
- free(exec.exec_bo);
+ list_for_each_entry_safe(struct drm_vc4_bo, bo, &exec.unref_list,
+ unref_head) {
+ list_del(&bo->unref_head);
+ vc4_bo_unreference(&bo->bo);
+ free(bo);
+ }
if (ctex && ctex->bo->simulator_winsys_map) {
for (int y = 0; y < ctex->base.b.height0; y++) {
diff --git a/src/gallium/drivers/vc4/vc4_simulator_validate.h b/src/gallium/drivers/vc4/vc4_simulator_validate.h
index 1f0c6b67c0f..2bb36b253bb 100644
--- a/src/gallium/drivers/vc4/vc4_simulator_validate.h
+++ b/src/gallium/drivers/vc4/vc4_simulator_validate.h
@@ -43,6 +43,7 @@ struct vc4_exec_info;
#define kfree(ptr) free(ptr)
#define krealloc(ptr, size, args) realloc(ptr, size)
#define roundup(x, y) align(x, y)
+#define round_up(x, y) align(x, y)
#define max(x, y) MAX2(x, y)
#define min(x, y) MiN2(x, y)
#define BUG_ON(condition) assert(!(condition))
@@ -63,16 +64,27 @@ struct drm_device {
uint32_t simulator_mem_next;
};
-struct drm_gem_cma_object {
- struct vc4_bo *bo;
+struct drm_gem_object {
+ uint32_t size;
+ struct drm_device *dev;
+};
- struct {
- uint32_t size;
- } base;
+struct drm_gem_cma_object {
+ struct drm_gem_object base;
uint32_t paddr;
void *vaddr;
};
+struct drm_vc4_bo {
+ struct drm_gem_cma_object base;
+ struct vc4_bo *bo;
+ struct list_head unref_head;
+};
+
+static inline struct drm_vc4_bo *to_vc4_bo(struct drm_gem_object *obj)
+{
+ return (struct drm_vc4_bo *)obj;
+}
struct drm_gem_cma_object *
drm_gem_cma_create(struct drm_device *dev, size_t size);
diff --git a/src/gallium/drivers/vc4/vc4_state.c b/src/gallium/drivers/vc4/vc4_state.c
index 80e963ea2ee..4a1d4c3a4d6 100644
--- a/src/gallium/drivers/vc4/vc4_state.c
+++ b/src/gallium/drivers/vc4/vc4_state.c
@@ -304,24 +304,8 @@ vc4_set_index_buffer(struct pipe_context *pctx,
if (ib) {
assert(!ib->user_buffer);
-
- if (ib->index_size == 4) {
- struct pipe_resource tmpl = *ib->buffer;
- assert(tmpl.format == PIPE_FORMAT_R8_UNORM);
- assert(tmpl.height0 == 1);
- tmpl.width0 = (tmpl.width0 - ib->offset) / 2;
- struct pipe_resource *pshadow =
- vc4_resource_create(&vc4->screen->base, &tmpl);
- struct vc4_resource *shadow = vc4_resource(pshadow);
- pipe_resource_reference(&shadow->shadow_parent, ib->buffer);
-
- pipe_resource_reference(&vc4->indexbuf.buffer, NULL);
- vc4->indexbuf.buffer = pshadow;
- vc4->indexbuf.index_size = 2;
- } else {
- pipe_resource_reference(&vc4->indexbuf.buffer, ib->buffer);
- vc4->indexbuf.index_size = ib->index_size;
- }
+ pipe_resource_reference(&vc4->indexbuf.buffer, ib->buffer);
+ vc4->indexbuf.index_size = ib->index_size;
vc4->indexbuf.offset = ib->offset;
} else {
pipe_resource_reference(&vc4->indexbuf.buffer, NULL);
@@ -538,6 +522,7 @@ vc4_create_sampler_view(struct pipe_context *pctx, struct pipe_resource *prsc,
struct pipe_resource tmpl = shadow_parent->base.b;
struct vc4_resource *clone;
+ tmpl.bind = PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_RENDER_TARGET;
tmpl.width0 = u_minify(tmpl.width0, so->u.tex.first_level);
tmpl.height0 = u_minify(tmpl.height0, so->u.tex.first_level);
tmpl.last_level = so->u.tex.last_level - so->u.tex.first_level;
@@ -547,6 +532,8 @@ vc4_create_sampler_view(struct pipe_context *pctx, struct pipe_resource *prsc,
clone->shadow_parent = &shadow_parent->base.b;
/* Flag it as needing update of the contents from the parent. */
clone->writes = shadow_parent->writes - 1;
+
+ assert(clone->vc4_format != VC4_TEXTURE_TYPE_RGBA32R);
}
so->texture = prsc;
so->reference.count = 1;