summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorJason Ekstrand <[email protected]>2016-02-05 15:21:11 -0800
committerJason Ekstrand <[email protected]>2016-02-05 15:21:11 -0800
commit9401516113152ea2a571dc1103a2fa7ce68d4ee8 (patch)
tree2472a42bd85a3d3b9697be9235c029099c3d575d /src
parent741744f691d6ef63e9f9a4c03136f969f2ffb0bf (diff)
parent5b51b2e00013af70072106e9d34905326fc357fc (diff)
Merge remote-tracking branch 'mesa-public/master' into vulkan
Diffstat (limited to 'src')
-rw-r--r--src/compiler/.gitignore1
-rw-r--r--src/compiler/Makefile.am2
-rw-r--r--src/compiler/glsl/.gitignore1
-rw-r--r--src/compiler/glsl/ast_to_hir.cpp4
-rw-r--r--src/compiler/glsl/builtin_functions.cpp35
-rw-r--r--src/compiler/glsl/builtin_variables.cpp12
-rw-r--r--src/compiler/glsl/glcpp/glcpp-parse.y7
-rw-r--r--src/compiler/glsl/glsl_parser_extras.cpp82
-rw-r--r--src/compiler/glsl/glsl_parser_extras.h2
-rw-r--r--src/compiler/glsl/link_uniforms.cpp84
-rw-r--r--src/compiler/glsl/link_varyings.cpp9
-rw-r--r--src/compiler/glsl/linker.cpp2
-rw-r--r--src/compiler/glsl/lower_buffer_access.cpp6
-rw-r--r--src/compiler/glsl/lower_buffer_access.h1
-rw-r--r--src/compiler/glsl/lower_shared_reference.cpp6
-rw-r--r--src/compiler/glsl/lower_ubo_reference.cpp40
-rw-r--r--src/compiler/glsl/opt_tree_grafting.cpp11
-rw-r--r--src/compiler/glsl_types.cpp12
-rw-r--r--src/compiler/glsl_types.h3
-rw-r--r--src/compiler/nir/nir_lower_alu_to_scalar.c4
-rw-r--r--src/compiler/nir/nir_opcodes.py22
-rw-r--r--src/compiler/nir/nir_opt_algebraic.py34
-rw-r--r--src/compiler/shader_enums.h10
-rw-r--r--src/gallium/Android.mk2
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_conv.c16
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_pack.c26
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_pack.h5
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_tgsi.c1
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c75
-rw-r--r--src/gallium/auxiliary/target-helpers/drm_helper.h7
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_exec.c44
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_info.c3
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_scan.c2
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_scan.h1
-rw-r--r--src/gallium/auxiliary/util/u_box.h12
-rw-r--r--src/gallium/auxiliary/util/u_cpu_detect.c2
-rwxr-xr-xsrc/gallium/auxiliary/util/u_format_parse.py2
-rw-r--r--src/gallium/auxiliary/util/u_half.h7
-rw-r--r--src/gallium/auxiliary/vl/vl_zscan.c7
-rw-r--r--src/gallium/auxiliary/vl/vl_zscan.h1
-rw-r--r--src/gallium/docs/source/context.rst5
-rw-r--r--src/gallium/docs/source/screen.rst15
-rw-r--r--src/gallium/docs/source/tgsi.rst17
-rw-r--r--src/gallium/drivers/freedreno/freedreno_context.c3
-rw-r--r--src/gallium/drivers/freedreno/freedreno_screen.c5
-rw-r--r--src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c45
-rw-r--r--src/gallium/drivers/i915/i915_screen.c3
-rw-r--r--src/gallium/drivers/ilo/ilo_screen.c5
-rw-r--r--src/gallium/drivers/llvmpipe/lp_rast.h13
-rw-r--r--src/gallium/drivers/llvmpipe/lp_rast_tri.c48
-rw-r--r--src/gallium/drivers/llvmpipe/lp_screen.c4
-rw-r--r--src/gallium/drivers/llvmpipe/lp_setup_context.h15
-rw-r--r--src/gallium/drivers/llvmpipe/lp_setup_line.c81
-rw-r--r--src/gallium/drivers/llvmpipe/lp_setup_tri.c109
-rw-r--r--src/gallium/drivers/nouveau/codegen/nv50_ir.cpp3
-rw-r--r--src/gallium/drivers/nouveau/codegen/nv50_ir.h1
-rw-r--r--src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp8
-rw-r--r--src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp36
-rw-r--r--src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp14
-rw-r--r--src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp122
-rw-r--r--src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp74
-rw-r--r--src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h3
-rw-r--r--src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp45
-rw-r--r--src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp1
-rw-r--r--src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp3
-rw-r--r--src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp6
-rw-r--r--src/gallium/drivers/nouveau/nouveau_vp3_video_bsp.c2
-rw-r--r--src/gallium/drivers/nouveau/nv30/nv30_screen.c4
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_program.c9
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_screen.c4
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_surface.c120
-rw-r--r--src/gallium/drivers/nouveau/nvc0/mme/com9097.mme49
-rw-r--r--src/gallium/drivers/nouveau/nvc0/mme/com9097.mme.h33
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_context.c32
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_context.h13
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_macros.h2
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_program.c3
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_query.c21
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_query.h7
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_query_hw.c123
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_query_hw.h2
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_screen.c34
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_screen.h2
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_shader_state.c2
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_state.c48
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c46
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_surface.c179
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_tex.c4
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c29
-rw-r--r--src/gallium/drivers/r300/r300_screen.c4
-rw-r--r--src/gallium/drivers/r600/evergreen_compute.c32
-rw-r--r--src/gallium/drivers/r600/evergreen_state.c16
-rw-r--r--src/gallium/drivers/r600/r600_asm.h4
-rw-r--r--src/gallium/drivers/r600/r600_llvm.c13
-rw-r--r--src/gallium/drivers/r600/r600_llvm.h2
-rw-r--r--src/gallium/drivers/r600/r600_pipe.c10
-rw-r--r--src/gallium/drivers/r600/r600_uvd.c2
-rw-r--r--src/gallium/drivers/radeon/r600_buffer_common.c4
-rw-r--r--src/gallium/drivers/radeon/r600_cs.h2
-rw-r--r--src/gallium/drivers/radeon/r600_perfcounter.c38
-rw-r--r--src/gallium/drivers/radeon/r600_pipe_common.c217
-rw-r--r--src/gallium/drivers/radeon/r600_pipe_common.h14
-rw-r--r--src/gallium/drivers/radeon/r600_query.c94
-rw-r--r--src/gallium/drivers/radeon/r600_query.h32
-rw-r--r--src/gallium/drivers/radeon/r600_texture.c48
-rw-r--r--src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c76
-rw-r--r--src/gallium/drivers/radeon/radeon_winsys.h49
-rw-r--r--src/gallium/drivers/radeonsi/cik_sdma.c2
-rw-r--r--src/gallium/drivers/radeonsi/si_compute.c3
-rw-r--r--src/gallium/drivers/radeonsi/si_hw_context.c2
-rw-r--r--src/gallium/drivers/radeonsi/si_perfcounter.c121
-rw-r--r--src/gallium/drivers/radeonsi/si_pipe.c70
-rw-r--r--src/gallium/drivers/radeonsi/si_pipe.h2
-rw-r--r--src/gallium/drivers/radeonsi/si_shader.c31
-rw-r--r--src/gallium/drivers/radeonsi/si_shader.h1
-rw-r--r--src/gallium/drivers/radeonsi/si_state.c365
-rw-r--r--src/gallium/drivers/radeonsi/si_state.h2
-rw-r--r--src/gallium/drivers/radeonsi/si_state_shaders.c49
-rw-r--r--src/gallium/drivers/softpipe/sp_screen.c5
-rw-r--r--src/gallium/drivers/svga/svga_screen.c4
-rw-r--r--src/gallium/drivers/trace/tr_context.c40
-rw-r--r--src/gallium/drivers/trace/tr_dump_state.c18
-rw-r--r--src/gallium/drivers/trace/tr_dump_state.h2
-rw-r--r--src/gallium/drivers/vc4/vc4_job.c11
-rw-r--r--src/gallium/drivers/vc4/vc4_screen.c4
-rw-r--r--src/gallium/drivers/virgl/virgl_screen.c5
-rw-r--r--src/gallium/drivers/virgl/virgl_screen.h6
-rw-r--r--src/gallium/include/pipe/p_context.h22
-rw-r--r--src/gallium/include/pipe/p_defines.h15
-rw-r--r--src/gallium/include/pipe/p_screen.h6
-rw-r--r--src/gallium/include/pipe/p_shader_tokens.h7
-rw-r--r--src/gallium/include/pipe/p_state.h13
-rw-r--r--src/gallium/state_trackers/nine/Makefile.sources2
-rw-r--r--src/gallium/state_trackers/nine/adapter9.c11
-rw-r--r--src/gallium/state_trackers/nine/basetexture9.c2
-rw-r--r--src/gallium/state_trackers/nine/buffer9.c189
-rw-r--r--src/gallium/state_trackers/nine/buffer9.h73
-rw-r--r--src/gallium/state_trackers/nine/cubetexture9.c2
-rw-r--r--src/gallium/state_trackers/nine/device9.c142
-rw-r--r--src/gallium/state_trackers/nine/device9.h12
-rw-r--r--src/gallium/state_trackers/nine/device9ex.c58
-rw-r--r--src/gallium/state_trackers/nine/device9ex.h17
-rw-r--r--src/gallium/state_trackers/nine/guid.c18
-rw-r--r--src/gallium/state_trackers/nine/guid.h4
-rw-r--r--src/gallium/state_trackers/nine/indexbuffer9.c101
-rw-r--r--src/gallium/state_trackers/nine/indexbuffer9.h9
-rw-r--r--src/gallium/state_trackers/nine/nine_ff.c85
-rw-r--r--src/gallium/state_trackers/nine/nine_limits.h211
-rw-r--r--src/gallium/state_trackers/nine/nine_pdata.h1
-rw-r--r--src/gallium/state_trackers/nine/nine_pipe.c11
-rw-r--r--src/gallium/state_trackers/nine/nine_shader.c57
-rw-r--r--src/gallium/state_trackers/nine/nine_shader.h1
-rw-r--r--src/gallium/state_trackers/nine/nine_state.c49
-rw-r--r--src/gallium/state_trackers/nine/nine_state.h39
-rw-r--r--src/gallium/state_trackers/nine/pixelshader9.c1
-rw-r--r--src/gallium/state_trackers/nine/pixelshader9.h5
-rw-r--r--src/gallium/state_trackers/nine/resource9.c49
-rw-r--r--src/gallium/state_trackers/nine/resource9.h2
-rw-r--r--src/gallium/state_trackers/nine/stateblock9.c16
-rw-r--r--src/gallium/state_trackers/nine/surface9.c17
-rw-r--r--src/gallium/state_trackers/nine/swapchain9.c50
-rw-r--r--src/gallium/state_trackers/nine/swapchain9.h3
-rw-r--r--src/gallium/state_trackers/nine/texture9.c2
-rw-r--r--src/gallium/state_trackers/nine/vertexbuffer9.c129
-rw-r--r--src/gallium/state_trackers/nine/vertexbuffer9.h13
-rw-r--r--src/gallium/state_trackers/nine/vertexdeclaration9.c27
-rw-r--r--src/gallium/state_trackers/nine/vertexdeclaration9.h2
-rw-r--r--src/gallium/state_trackers/nine/volume9.c9
-rw-r--r--src/gallium/state_trackers/omx/vid_dec_h264.c7
-rw-r--r--src/gallium/targets/d3dadapter9/drm.c61
-rw-r--r--src/gallium/targets/dri/Android.mk2
-rw-r--r--src/gallium/winsys/amdgpu/drm/amdgpu_bo.c17
-rw-r--r--src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c36
-rw-r--r--src/gallium/winsys/radeon/drm/radeon_drm_bo.c8
-rw-r--r--src/gallium/winsys/radeon/drm/radeon_drm_cs.c6
-rw-r--r--src/gallium/winsys/radeon/drm/radeon_drm_winsys.c62
-rw-r--r--src/gallium/winsys/virgl/drm/virgl_drm_public.h4
-rw-r--r--src/gallium/winsys/virgl/drm/virgl_drm_winsys.c133
-rw-r--r--src/gallium/winsys/virgl/drm/virgl_drm_winsys.h1
-rw-r--r--src/gallium/winsys/virgl/vtest/Android.mk33
-rw-r--r--src/glx/dri2_glx.c11
-rw-r--r--src/glx/dri3_glx.c7
-rw-r--r--src/glx/dri_common.c28
-rw-r--r--src/glx/drisw_glx.c4
-rw-r--r--src/glx/glxextensions.c1
-rw-r--r--src/glx/glxextensions.h1
-rw-r--r--src/mapi/glapi/gen/gl_API.xml23
-rw-r--r--src/mesa/Makefile.sources3
-rw-r--r--src/mesa/drivers/dri/common/xmlpool/t_options.h5
-rw-r--r--src/mesa/drivers/dri/i965/brw_compiler.c2
-rw-r--r--src/mesa/drivers/dri/i965/brw_context.c19
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs_combine_constants.cpp6
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs_nir.cpp8
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs_visitor.cpp2
-rw-r--r--src/mesa/drivers/dri/i965/brw_vec4_copy_propagation.cpp14
-rw-r--r--src/mesa/drivers/dri/i965/gen8_surface_state.c4
-rw-r--r--src/mesa/main/bufferobj.c73
-rw-r--r--src/mesa/main/dd.h17
-rw-r--r--src/mesa/main/extensions_table.h4
-rw-r--r--src/mesa/main/fbobject.c3
-rwxr-xr-xsrc/mesa/main/format_parser.py2
-rw-r--r--src/mesa/main/framebuffer.h5
-rw-r--r--src/mesa/main/get.c143
-rw-r--r--src/mesa/main/get_hash_params.py71
-rw-r--r--src/mesa/main/hash.c4
-rw-r--r--src/mesa/main/mtypes.h32
-rw-r--r--src/mesa/main/objectlabel.c13
-rw-r--r--src/mesa/main/queryobj.c285
-rw-r--r--src/mesa/main/shared.c2
-rw-r--r--src/mesa/main/state.c2
-rw-r--r--src/mesa/main/syncobj.c89
-rw-r--r--src/mesa/main/syncobj.h11
-rw-r--r--src/mesa/main/transformfeedback.h3
-rw-r--r--src/mesa/program/ir_to_mesa.cpp4
-rw-r--r--src/mesa/program/prog_parameter.c70
-rw-r--r--src/mesa/program/prog_parameter.h7
-rw-r--r--src/mesa/program/prog_statevars.c3
-rw-r--r--src/mesa/program/program.c7
-rw-r--r--src/mesa/state_tracker/st_atom.c10
-rw-r--r--src/mesa/state_tracker/st_atom.h10
-rw-r--r--src/mesa/state_tracker/st_atom_atomicbuf.c158
-rw-r--r--src/mesa/state_tracker/st_atom_storagebuf.c196
-rw-r--r--src/mesa/state_tracker/st_cb_bufferobjects.c7
-rw-r--r--src/mesa/state_tracker/st_cb_queryobj.c97
-rw-r--r--src/mesa/state_tracker/st_cb_texture.c1191
-rw-r--r--src/mesa/state_tracker/st_cb_texture.h5
-rw-r--r--src/mesa/state_tracker/st_cb_texturebarrier.c7
-rw-r--r--src/mesa/state_tracker/st_context.c29
-rw-r--r--src/mesa/state_tracker/st_context.h15
-rw-r--r--src/mesa/state_tracker/st_extensions.c33
-rw-r--r--src/mesa/state_tracker/st_glsl_to_tgsi.cpp397
-rw-r--r--src/mesa/vbo/vbo.h3
-rw-r--r--src/mesa/vbo/vbo_exec_array.c148
-rw-r--r--src/mesa/vbo/vbo_minmax_index.c378
-rw-r--r--src/mesa/x86-64/xform4.S40
-rw-r--r--src/util/hash_table.c29
-rw-r--r--src/util/hash_table.h7
-rw-r--r--src/util/set.c3
-rw-r--r--src/util/tests/hash_table/Makefile.am1
-rw-r--r--src/util/tests/hash_table/clear.c91
240 files changed, 7143 insertions, 2007 deletions
diff --git a/src/compiler/.gitignore b/src/compiler/.gitignore
new file mode 100644
index 00000000000..6fb069f0bcb
--- /dev/null
+++ b/src/compiler/.gitignore
@@ -0,0 +1 @@
+glsl_compiler
diff --git a/src/compiler/Makefile.am b/src/compiler/Makefile.am
index e3d297fe299..fe96cb3c879 100644
--- a/src/compiler/Makefile.am
+++ b/src/compiler/Makefile.am
@@ -220,9 +220,11 @@ YACC_GEN = $(AM_V_YACC)$(YACC) $(YFLAGS)
LEX_GEN = $(AM_V_LEX)$(LEX) $(LFLAGS)
glsl/glsl_parser.cpp glsl/glsl_parser.h: glsl/glsl_parser.yy
+ $(MKDIR_GEN)
$(YACC_GEN) -o $@ -p "_mesa_glsl_" --defines=$(builddir)/glsl/glsl_parser.h $(srcdir)/glsl/glsl_parser.yy
glsl/glsl_lexer.cpp: glsl/glsl_lexer.ll
+ $(MKDIR_GEN)
$(LEX_GEN) -o $@ $(srcdir)/glsl/glsl_lexer.ll
glsl/glcpp/glcpp-parse.c glsl/glcpp/glcpp-parse.h: glsl/glcpp/glcpp-parse.y
diff --git a/src/compiler/glsl/.gitignore b/src/compiler/glsl/.gitignore
index e80f8af6bfc..6db4e738f6e 100644
--- a/src/compiler/glsl/.gitignore
+++ b/src/compiler/glsl/.gitignore
@@ -1,4 +1,3 @@
-glsl_compiler
glsl_lexer.cpp
glsl_parser.cpp
glsl_parser.h
diff --git a/src/compiler/glsl/ast_to_hir.cpp b/src/compiler/glsl/ast_to_hir.cpp
index 98d8bc5f268..7213ad8ebec 100644
--- a/src/compiler/glsl/ast_to_hir.cpp
+++ b/src/compiler/glsl/ast_to_hir.cpp
@@ -291,6 +291,10 @@ apply_implicit_conversion(const glsl_type *to, ir_rvalue * &from,
if (!state->is_version(120, 0))
return false;
+ /* ESSL does not allow implicit conversions */
+ if (state->es_shader)
+ return false;
+
/* From page 27 (page 33 of the PDF) of the GLSL 1.50 spec:
*
* "There are no implicit array or structure conversions. For
diff --git a/src/compiler/glsl/builtin_functions.cpp b/src/compiler/glsl/builtin_functions.cpp
index 95e86df1cdd..5512a33f114 100644
--- a/src/compiler/glsl/builtin_functions.cpp
+++ b/src/compiler/glsl/builtin_functions.cpp
@@ -661,7 +661,7 @@ private:
BA1(roundEven)
BA1(ceil)
BA1(fract)
- B2(mod)
+ BA2(mod)
BA1(modf)
BA2(min)
BA2(max)
@@ -1242,23 +1242,23 @@ builtin_builder::create_builtins()
FD(fract)
add_function("mod",
- _mod(glsl_type::float_type, glsl_type::float_type),
- _mod(glsl_type::vec2_type, glsl_type::float_type),
- _mod(glsl_type::vec3_type, glsl_type::float_type),
- _mod(glsl_type::vec4_type, glsl_type::float_type),
+ _mod(always_available, glsl_type::float_type, glsl_type::float_type),
+ _mod(always_available, glsl_type::vec2_type, glsl_type::float_type),
+ _mod(always_available, glsl_type::vec3_type, glsl_type::float_type),
+ _mod(always_available, glsl_type::vec4_type, glsl_type::float_type),
- _mod(glsl_type::vec2_type, glsl_type::vec2_type),
- _mod(glsl_type::vec3_type, glsl_type::vec3_type),
- _mod(glsl_type::vec4_type, glsl_type::vec4_type),
+ _mod(always_available, glsl_type::vec2_type, glsl_type::vec2_type),
+ _mod(always_available, glsl_type::vec3_type, glsl_type::vec3_type),
+ _mod(always_available, glsl_type::vec4_type, glsl_type::vec4_type),
- _mod(glsl_type::double_type, glsl_type::double_type),
- _mod(glsl_type::dvec2_type, glsl_type::double_type),
- _mod(glsl_type::dvec3_type, glsl_type::double_type),
- _mod(glsl_type::dvec4_type, glsl_type::double_type),
+ _mod(fp64, glsl_type::double_type, glsl_type::double_type),
+ _mod(fp64, glsl_type::dvec2_type, glsl_type::double_type),
+ _mod(fp64, glsl_type::dvec3_type, glsl_type::double_type),
+ _mod(fp64, glsl_type::dvec4_type, glsl_type::double_type),
- _mod(glsl_type::dvec2_type, glsl_type::dvec2_type),
- _mod(glsl_type::dvec3_type, glsl_type::dvec3_type),
- _mod(glsl_type::dvec4_type, glsl_type::dvec4_type),
+ _mod(fp64, glsl_type::dvec2_type, glsl_type::dvec2_type),
+ _mod(fp64, glsl_type::dvec3_type, glsl_type::dvec3_type),
+ _mod(fp64, glsl_type::dvec4_type, glsl_type::dvec4_type),
NULL);
FD(modf)
@@ -3452,9 +3452,10 @@ UNOPA(ceil, ir_unop_ceil)
UNOPA(fract, ir_unop_fract)
ir_function_signature *
-builtin_builder::_mod(const glsl_type *x_type, const glsl_type *y_type)
+builtin_builder::_mod(builtin_available_predicate avail,
+ const glsl_type *x_type, const glsl_type *y_type)
{
- return binop(always_available, ir_binop_mod, x_type, x_type, y_type);
+ return binop(avail, ir_binop_mod, x_type, x_type, y_type);
}
ir_function_signature *
diff --git a/src/compiler/glsl/builtin_variables.cpp b/src/compiler/glsl/builtin_variables.cpp
index ccc04c00cea..6db74f1c634 100644
--- a/src/compiler/glsl/builtin_variables.cpp
+++ b/src/compiler/glsl/builtin_variables.cpp
@@ -328,6 +328,11 @@ per_vertex_accumulator::add_field(int slot, const glsl_type *type,
this->fields[this->num_fields].sample = 0;
this->fields[this->num_fields].patch = 0;
this->fields[this->num_fields].precision = GLSL_PRECISION_NONE;
+ this->fields[this->num_fields].image_read_only = 0;
+ this->fields[this->num_fields].image_write_only = 0;
+ this->fields[this->num_fields].image_coherent = 0;
+ this->fields[this->num_fields].image_volatile = 0;
+ this->fields[this->num_fields].image_restrict = 0;
this->num_fields++;
}
@@ -1201,7 +1206,12 @@ builtin_variable_generator::generate_varyings()
/* gl_Position and gl_PointSize are not visible from fragment shaders. */
if (state->stage != MESA_SHADER_FRAGMENT) {
add_varying(VARYING_SLOT_POS, vec4_t, "gl_Position");
- add_varying(VARYING_SLOT_PSIZ, float_t, "gl_PointSize");
+ if (!state->es_shader ||
+ state->stage == MESA_SHADER_VERTEX ||
+ (state->stage == MESA_SHADER_GEOMETRY &&
+ state->OES_geometry_point_size_enable)) {
+ add_varying(VARYING_SLOT_PSIZ, float_t, "gl_PointSize");
+ }
}
if (state->is_version(130, 0)) {
diff --git a/src/compiler/glsl/glcpp/glcpp-parse.y b/src/compiler/glsl/glcpp/glcpp-parse.y
index ef1a6575aaa..43a1aa94aff 100644
--- a/src/compiler/glsl/glcpp/glcpp-parse.y
+++ b/src/compiler/glsl/glcpp/glcpp-parse.y
@@ -2386,6 +2386,13 @@ _glcpp_parser_handle_version_declaration(glcpp_parser_t *parser, intmax_t versio
add_builtin_define(parser, "GL_OES_texture_storage_multisample_2d_array", 1);
if (extensions->ARB_blend_func_extended)
add_builtin_define(parser, "GL_EXT_blend_func_extended", 1);
+
+ if (version >= 310) {
+ if (extensions->OES_geometry_shader) {
+ add_builtin_define(parser, "GL_OES_geometry_point_size", 1);
+ add_builtin_define(parser, "GL_OES_geometry_shader", 1);
+ }
+ }
}
} else {
add_builtin_define(parser, "GL_ARB_draw_buffers", 1);
diff --git a/src/compiler/glsl/glsl_parser_extras.cpp b/src/compiler/glsl/glsl_parser_extras.cpp
index ecf0d7f76e5..d7a4b254aa2 100644
--- a/src/compiler/glsl/glsl_parser_extras.cpp
+++ b/src/compiler/glsl/glsl_parser_extras.cpp
@@ -600,6 +600,7 @@ static const _mesa_glsl_extension _mesa_glsl_supported_extensions[] = {
/* OES extensions go here, sorted alphabetically.
*/
EXT(OES_EGL_image_external, false, true, OES_EGL_image_external),
+ EXT(OES_geometry_point_size, false, true, OES_geometry_shader),
EXT(OES_geometry_shader, false, true, OES_geometry_shader),
EXT(OES_standard_derivatives, false, true, OES_standard_derivatives),
EXT(OES_texture_3D, false, true, dummy_true),
@@ -1867,59 +1868,76 @@ do_common_optimization(exec_list *ir, bool linked,
const struct gl_shader_compiler_options *options,
bool native_integers)
{
+ const bool debug = false;
GLboolean progress = GL_FALSE;
- progress = lower_instructions(ir, SUB_TO_ADD_NEG) || progress;
+#define OPT(PASS, ...) do { \
+ if (debug) { \
+ fprintf(stderr, "START GLSL optimization %s\n", #PASS); \
+ const bool opt_progress = PASS(__VA_ARGS__); \
+ progress = opt_progress || progress; \
+ if (opt_progress) \
+ _mesa_print_ir(stderr, ir, NULL); \
+ fprintf(stderr, "GLSL optimization %s: %s progress\n", \
+ #PASS, opt_progress ? "made" : "no"); \
+ } else { \
+ progress = PASS(__VA_ARGS__) || progress; \
+ } \
+ } while (false)
+
+ OPT(lower_instructions, ir, SUB_TO_ADD_NEG);
if (linked) {
- progress = do_function_inlining(ir) || progress;
- progress = do_dead_functions(ir) || progress;
- progress = do_structure_splitting(ir) || progress;
+ OPT(do_function_inlining, ir);
+ OPT(do_dead_functions, ir);
+ OPT(do_structure_splitting, ir);
}
- progress = do_if_simplification(ir) || progress;
- progress = opt_flatten_nested_if_blocks(ir) || progress;
- progress = opt_conditional_discard(ir) || progress;
- progress = do_copy_propagation(ir) || progress;
- progress = do_copy_propagation_elements(ir) || progress;
+ OPT(do_if_simplification, ir);
+ OPT(opt_flatten_nested_if_blocks, ir);
+ OPT(opt_conditional_discard, ir);
+ OPT(do_copy_propagation, ir);
+ OPT(do_copy_propagation_elements, ir);
if (options->OptimizeForAOS && !linked)
- progress = opt_flip_matrices(ir) || progress;
+ OPT(opt_flip_matrices, ir);
if (linked && options->OptimizeForAOS) {
- progress = do_vectorize(ir) || progress;
+ OPT(do_vectorize, ir);
}
if (linked)
- progress = do_dead_code(ir, uniform_locations_assigned) || progress;
+ OPT(do_dead_code, ir, uniform_locations_assigned);
else
- progress = do_dead_code_unlinked(ir) || progress;
- progress = do_dead_code_local(ir) || progress;
- progress = do_tree_grafting(ir) || progress;
- progress = do_constant_propagation(ir) || progress;
+ OPT(do_dead_code_unlinked, ir);
+ OPT(do_dead_code_local, ir);
+ OPT(do_tree_grafting, ir);
+ OPT(do_constant_propagation, ir);
if (linked)
- progress = do_constant_variable(ir) || progress;
+ OPT(do_constant_variable, ir);
else
- progress = do_constant_variable_unlinked(ir) || progress;
- progress = do_constant_folding(ir) || progress;
- progress = do_minmax_prune(ir) || progress;
- progress = do_rebalance_tree(ir) || progress;
- progress = do_algebraic(ir, native_integers, options) || progress;
- progress = do_lower_jumps(ir) || progress;
- progress = do_vec_index_to_swizzle(ir) || progress;
- progress = lower_vector_insert(ir, false) || progress;
- progress = do_swizzle_swizzle(ir) || progress;
- progress = do_noop_swizzle(ir) || progress;
-
- progress = optimize_split_arrays(ir, linked) || progress;
- progress = optimize_redundant_jumps(ir) || progress;
+ OPT(do_constant_variable_unlinked, ir);
+ OPT(do_constant_folding, ir);
+ OPT(do_minmax_prune, ir);
+ OPT(do_rebalance_tree, ir);
+ OPT(do_algebraic, ir, native_integers, options);
+ OPT(do_lower_jumps, ir);
+ OPT(do_vec_index_to_swizzle, ir);
+ OPT(lower_vector_insert, ir, false);
+ OPT(do_swizzle_swizzle, ir);
+ OPT(do_noop_swizzle, ir);
+
+ OPT(optimize_split_arrays, ir, linked);
+ OPT(optimize_redundant_jumps, ir);
loop_state *ls = analyze_loop_variables(ir);
if (ls->loop_found) {
- progress = set_loop_controls(ir, ls) || progress;
- progress = unroll_loops(ir, ls, options) || progress;
+ OPT(set_loop_controls, ir, ls);
+ OPT(unroll_loops, ir, ls, options);
}
delete ls;
+#undef OPT
+
return progress;
}
diff --git a/src/compiler/glsl/glsl_parser_extras.h b/src/compiler/glsl/glsl_parser_extras.h
index 3f88e01d599..a905b564787 100644
--- a/src/compiler/glsl/glsl_parser_extras.h
+++ b/src/compiler/glsl/glsl_parser_extras.h
@@ -591,6 +591,8 @@ struct _mesa_glsl_parse_state {
*/
bool OES_EGL_image_external_enable;
bool OES_EGL_image_external_warn;
+ bool OES_geometry_point_size_enable;
+ bool OES_geometry_point_size_warn;
bool OES_geometry_shader_enable;
bool OES_geometry_shader_warn;
bool OES_standard_derivatives_enable;
diff --git a/src/compiler/glsl/link_uniforms.cpp b/src/compiler/glsl/link_uniforms.cpp
index 33b2d4c8646..7072c16cb28 100644
--- a/src/compiler/glsl/link_uniforms.cpp
+++ b/src/compiler/glsl/link_uniforms.cpp
@@ -471,10 +471,11 @@ private:
*/
class parcel_out_uniform_storage : public program_resource_visitor {
public:
- parcel_out_uniform_storage(struct string_to_uint_map *map,
+ parcel_out_uniform_storage(struct gl_shader_program *prog,
+ struct string_to_uint_map *map,
struct gl_uniform_storage *uniforms,
union gl_constant_value *values)
- : map(map), uniforms(uniforms), values(values)
+ : prog(prog), map(map), uniforms(uniforms), values(values)
{
}
@@ -492,8 +493,7 @@ public:
memset(this->targets, 0, sizeof(this->targets));
}
- void set_and_process(struct gl_shader_program *prog,
- ir_variable *var)
+ void set_and_process(ir_variable *var)
{
current_var = var;
field_counter = 0;
@@ -643,6 +643,16 @@ private:
uniform->opaque[shader_type].index = this->next_image;
uniform->opaque[shader_type].active = true;
+ /* Set image access qualifiers */
+ const GLenum access =
+ (current_var->data.image_read_only ? GL_READ_ONLY :
+ current_var->data.image_write_only ? GL_WRITE_ONLY :
+ GL_READ_WRITE);
+
+ for (unsigned j = 0; j < MAX2(1, uniform->array_elements); ++j)
+ prog->_LinkedShaders[shader_type]->
+ ImageAccess[this->next_image + j] = access;
+
/* Increment the image index by 1 for non-arrays and by the
* number of array elements for arrays.
*/
@@ -844,6 +854,11 @@ private:
this->values += values_for_type(type);
}
+ /**
+ * Current program being processed.
+ */
+ struct gl_shader_program *prog;
+
struct string_to_uint_map *map;
struct gl_uniform_storage *uniforms;
@@ -1007,40 +1022,6 @@ link_update_uniform_buffer_variables(struct gl_shader *shader)
}
}
-static void
-link_set_image_access_qualifiers(struct gl_shader_program *prog,
- gl_shader *sh, unsigned shader_stage,
- ir_variable *var, const glsl_type *type,
- char **name, size_t name_length)
-{
- /* Handle arrays of arrays */
- if (type->is_array() && type->fields.array->is_array()) {
- for (unsigned i = 0; i < type->length; i++) {
- size_t new_length = name_length;
-
- /* Append the subscript to the current variable name */
- ralloc_asprintf_rewrite_tail(name, &new_length, "[%u]", i);
-
- link_set_image_access_qualifiers(prog, sh, shader_stage, var,
- type->fields.array, name,
- new_length);
- }
- } else {
- unsigned id = 0;
- bool found = prog->UniformHash->get(id, *name);
- assert(found);
- (void) found;
- const gl_uniform_storage *storage = &prog->UniformStorage[id];
- const unsigned index = storage->opaque[shader_stage].index;
- const GLenum access = (var->data.image_read_only ? GL_READ_ONLY :
- var->data.image_write_only ? GL_WRITE_ONLY :
- GL_READ_WRITE);
-
- for (unsigned j = 0; j < MAX2(1, storage->array_elements); ++j)
- sh->ImageAccess[index + j] = access;
- }
-}
-
/**
* Combine the hidden uniform hash map with the uniform hash map so that the
* hidden uniforms will be given indicies at the end of the uniform storage
@@ -1148,7 +1129,7 @@ link_assign_uniform_locations(struct gl_shader_program *prog,
union gl_constant_value *data_end = &data[num_data_slots];
#endif
- parcel_out_uniform_storage parcel(prog->UniformHash, uniforms, data);
+ parcel_out_uniform_storage parcel(prog, prog->UniformHash, uniforms, data);
for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) {
if (prog->_LinkedShaders[i] == NULL)
@@ -1163,7 +1144,7 @@ link_assign_uniform_locations(struct gl_shader_program *prog,
var->data.mode != ir_var_shader_storage))
continue;
- parcel.set_and_process(prog, var);
+ parcel.set_and_process(var);
}
prog->_LinkedShaders[i]->active_samplers = parcel.shader_samplers_used;
@@ -1301,29 +1282,6 @@ link_assign_uniform_locations(struct gl_shader_program *prog,
prog->NumHiddenUniforms = hidden_uniforms;
prog->UniformStorage = uniforms;
- /**
- * Scan the program for image uniforms and store image unit access
- * information into the gl_shader data structure.
- */
- for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) {
- gl_shader *sh = prog->_LinkedShaders[i];
-
- if (sh == NULL)
- continue;
-
- foreach_in_list(ir_instruction, node, sh->ir) {
- ir_variable *var = node->as_variable();
-
- if (var && var->data.mode == ir_var_uniform &&
- var->type->contains_image()) {
- char *name_copy = ralloc_strdup(NULL, var->name);
- link_set_image_access_qualifiers(prog, sh, i, var, var->type,
- &name_copy, strlen(var->name));
- ralloc_free(name_copy);
- }
- }
- }
-
link_set_uniform_initializers(prog, boolean_true);
return;
diff --git a/src/compiler/glsl/link_varyings.cpp b/src/compiler/glsl/link_varyings.cpp
index 264b69ca619..a4c730ffdcf 100644
--- a/src/compiler/glsl/link_varyings.cpp
+++ b/src/compiler/glsl/link_varyings.cpp
@@ -967,11 +967,16 @@ varying_matches::record(ir_variable *producer_var, ir_variable *consumer_var)
return;
}
- if ((consumer_var == NULL && producer_var->type->contains_integer()) ||
+ bool needs_flat_qualifier = consumer_var == NULL &&
+ (producer_var->type->contains_integer() ||
+ producer_var->type->contains_double());
+
+ if (needs_flat_qualifier ||
(consumer_stage != -1 && consumer_stage != MESA_SHADER_FRAGMENT)) {
/* Since this varying is not being consumed by the fragment shader, its
* interpolation type varying cannot possibly affect rendering.
- * Also, this variable is non-flat and is (or contains) an integer.
+ * Also, this variable is non-flat and is (or contains) an integer
+ * or a double.
* If the consumer stage is unknown, don't modify the interpolation
* type as it could affect rendering later with separate shaders.
*
diff --git a/src/compiler/glsl/linker.cpp b/src/compiler/glsl/linker.cpp
index 6657777d74c..4776ffa6acd 100644
--- a/src/compiler/glsl/linker.cpp
+++ b/src/compiler/glsl/linker.cpp
@@ -4633,8 +4633,6 @@ link_shaders(struct gl_context *ctx, struct gl_shader_program *prog)
&prog->NumShaderStorageBlocks,
&prog->SsboInterfaceBlockIndex);
- /* FINISHME: Assign fragment shader output locations. */
-
for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) {
if (prog->_LinkedShaders[i] == NULL)
continue;
diff --git a/src/compiler/glsl/lower_buffer_access.cpp b/src/compiler/glsl/lower_buffer_access.cpp
index f8c8d140ea8..9ad811de9f1 100644
--- a/src/compiler/glsl/lower_buffer_access.cpp
+++ b/src/compiler/glsl/lower_buffer_access.cpp
@@ -327,6 +327,7 @@ lower_buffer_access::setup_buffer_access(void *mem_ctx,
unsigned *const_offset,
bool *row_major,
int *matrix_columns,
+ const glsl_struct_field **struct_field,
unsigned packing)
{
*offset = new(mem_ctx) ir_constant(0u);
@@ -442,8 +443,11 @@ lower_buffer_access::setup_buffer_access(void *mem_ctx,
intra_struct_offset = glsl_align(intra_struct_offset, field_align);
if (strcmp(struct_type->fields.structure[i].name,
- deref_record->field) == 0)
+ deref_record->field) == 0) {
+ if (struct_field)
+ *struct_field = &struct_type->fields.structure[i];
break;
+ }
if (packing == GLSL_INTERFACE_PACKING_STD430)
intra_struct_offset += type->std430_size(field_row_major);
diff --git a/src/compiler/glsl/lower_buffer_access.h b/src/compiler/glsl/lower_buffer_access.h
index cc4614e9792..8772bdb76ff 100644
--- a/src/compiler/glsl/lower_buffer_access.h
+++ b/src/compiler/glsl/lower_buffer_access.h
@@ -57,6 +57,7 @@ public:
void setup_buffer_access(void *mem_ctx, ir_variable *var, ir_rvalue *deref,
ir_rvalue **offset, unsigned *const_offset,
bool *row_major, int *matrix_columns,
+ const glsl_struct_field **struct_field,
unsigned packing);
};
diff --git a/src/compiler/glsl/lower_shared_reference.cpp b/src/compiler/glsl/lower_shared_reference.cpp
index 533cd9202f4..12499695882 100644
--- a/src/compiler/glsl/lower_shared_reference.cpp
+++ b/src/compiler/glsl/lower_shared_reference.cpp
@@ -142,7 +142,7 @@ lower_shared_reference_visitor::handle_rvalue(ir_rvalue **rvalue)
setup_buffer_access(mem_ctx, var, deref,
&offset, &const_offset,
- &row_major, &matrix_columns, packing);
+ &row_major, &matrix_columns, NULL, packing);
/* Now that we've calculated the offset to the start of the
* dereference, walk over the type and emit loads into a temporary.
@@ -210,7 +210,7 @@ lower_shared_reference_visitor::handle_assignment(ir_assignment *ir)
setup_buffer_access(mem_ctx, var, deref,
&offset, &const_offset,
- &row_major, &matrix_columns, packing);
+ &row_major, &matrix_columns, NULL, packing);
deref = new(mem_ctx) ir_dereference_variable(store_var);
@@ -370,7 +370,7 @@ lower_shared_reference_visitor::lower_shared_atomic_intrinsic(ir_call *ir)
setup_buffer_access(mem_ctx, var, deref,
&offset, &const_offset,
- &row_major, &matrix_columns, packing);
+ &row_major, &matrix_columns, NULL, packing);
assert(offset);
assert(!row_major);
diff --git a/src/compiler/glsl/lower_ubo_reference.cpp b/src/compiler/glsl/lower_ubo_reference.cpp
index a172054bac8..d6269f7cbac 100644
--- a/src/compiler/glsl/lower_ubo_reference.cpp
+++ b/src/compiler/glsl/lower_ubo_reference.cpp
@@ -45,7 +45,7 @@ class lower_ubo_reference_visitor :
public lower_buffer_access::lower_buffer_access {
public:
lower_ubo_reference_visitor(struct gl_shader *shader)
- : shader(shader)
+ : shader(shader), struct_field(NULL), variable(NULL)
{
}
@@ -60,6 +60,7 @@ public:
bool *row_major,
int *matrix_columns,
unsigned packing);
+ uint32_t ssbo_access_params();
ir_expression *ubo_load(void *mem_ctx, const struct glsl_type *type,
ir_rvalue *offset);
ir_call *ssbo_load(void *mem_ctx, const struct glsl_type *type,
@@ -104,6 +105,8 @@ public:
struct gl_shader *shader;
struct gl_uniform_buffer_variable *ubo_var;
+ const struct glsl_struct_field *struct_field;
+ ir_variable *variable;
ir_rvalue *uniform_block;
bool progress;
};
@@ -288,8 +291,9 @@ lower_ubo_reference_visitor::setup_for_load_or_store(void *mem_ctx,
*const_offset = ubo_var->Offset;
+ this->struct_field = NULL;
setup_buffer_access(mem_ctx, var, deref, offset, const_offset, row_major,
- matrix_columns, packing);
+ matrix_columns, &this->struct_field, packing);
}
void
@@ -317,6 +321,7 @@ lower_ubo_reference_visitor::handle_rvalue(ir_rvalue **rvalue)
this->buffer_access_type =
var->is_in_shader_storage_block() ?
ssbo_load_access : ubo_load_access;
+ this->variable = var;
/* Compute the offset to the start if the dereference as well as other
* information we need to configure the write
@@ -370,6 +375,24 @@ shader_storage_buffer_object(const _mesa_glsl_parse_state *state)
return state->ARB_shader_storage_buffer_object_enable;
}
+uint32_t
+lower_ubo_reference_visitor::ssbo_access_params()
+{
+ assert(variable);
+
+ if (variable->is_interface_instance()) {
+ assert(struct_field);
+
+ return ((struct_field->image_coherent ? ACCESS_COHERENT : 0) |
+ (struct_field->image_restrict ? ACCESS_RESTRICT : 0) |
+ (struct_field->image_volatile ? ACCESS_VOLATILE : 0));
+ } else {
+ return ((variable->data.image_coherent ? ACCESS_COHERENT : 0) |
+ (variable->data.image_restrict ? ACCESS_RESTRICT : 0) |
+ (variable->data.image_volatile ? ACCESS_VOLATILE : 0));
+ }
+}
+
ir_call *
lower_ubo_reference_visitor::ssbo_store(void *mem_ctx,
ir_rvalue *deref,
@@ -394,6 +417,10 @@ lower_ubo_reference_visitor::ssbo_store(void *mem_ctx,
ir_variable(glsl_type::uint_type, "write_mask" , ir_var_function_in);
sig_params.push_tail(writemask_ref);
+ ir_variable *access_ref = new(mem_ctx)
+ ir_variable(glsl_type::uint_type, "access" , ir_var_function_in);
+ sig_params.push_tail(access_ref);
+
ir_function_signature *sig = new(mem_ctx)
ir_function_signature(glsl_type::void_type, shader_storage_buffer_object);
assert(sig);
@@ -408,6 +435,7 @@ lower_ubo_reference_visitor::ssbo_store(void *mem_ctx,
call_params.push_tail(offset->clone(mem_ctx, NULL));
call_params.push_tail(deref->clone(mem_ctx, NULL));
call_params.push_tail(new(mem_ctx) ir_constant(write_mask));
+ call_params.push_tail(new(mem_ctx) ir_constant(ssbo_access_params()));
return new(mem_ctx) ir_call(sig, NULL, &call_params);
}
@@ -426,6 +454,10 @@ lower_ubo_reference_visitor::ssbo_load(void *mem_ctx,
ir_variable(glsl_type::uint_type, "offset_ref" , ir_var_function_in);
sig_params.push_tail(offset_ref);
+ ir_variable *access_ref = new(mem_ctx)
+ ir_variable(glsl_type::uint_type, "access" , ir_var_function_in);
+ sig_params.push_tail(access_ref);
+
ir_function_signature *sig =
new(mem_ctx) ir_function_signature(type, shader_storage_buffer_object);
assert(sig);
@@ -444,6 +476,7 @@ lower_ubo_reference_visitor::ssbo_load(void *mem_ctx,
exec_list call_params;
call_params.push_tail(this->uniform_block->clone(mem_ctx, NULL));
call_params.push_tail(offset->clone(mem_ctx, NULL));
+ call_params.push_tail(new(mem_ctx) ir_constant(ssbo_access_params()));
return new(mem_ctx) ir_call(sig, deref_result, &call_params);
}
@@ -499,6 +532,7 @@ lower_ubo_reference_visitor::write_to_memory(void *mem_ctx,
unsigned packing = var->get_interface_type()->interface_packing;
this->buffer_access_type = ssbo_store_access;
+ this->variable = var;
/* Compute the offset to the start if the dereference as well as other
* information we need to configure the write
@@ -678,6 +712,7 @@ lower_ubo_reference_visitor::process_ssbo_unsized_array_length(ir_rvalue **rvalu
int unsized_array_stride = calculate_unsized_array_stride(deref, packing);
this->buffer_access_type = ssbo_unsized_array_length_access;
+ this->variable = var;
/* Compute the offset to the start if the dereference as well as other
* information we need to calculate the length.
@@ -910,6 +945,7 @@ lower_ubo_reference_visitor::lower_ssbo_atomic_intrinsic(ir_call *ir)
unsigned packing = var->get_interface_type()->interface_packing;
this->buffer_access_type = ssbo_atomic_access;
+ this->variable = var;
setup_for_load_or_store(mem_ctx, var, deref,
&offset, &const_offset,
diff --git a/src/compiler/glsl/opt_tree_grafting.cpp b/src/compiler/glsl/opt_tree_grafting.cpp
index 83effb7424c..812f996fb81 100644
--- a/src/compiler/glsl/opt_tree_grafting.cpp
+++ b/src/compiler/glsl/opt_tree_grafting.cpp
@@ -361,11 +361,12 @@ tree_grafting_basic_block(ir_instruction *bb_first,
if (!lhs_var)
continue;
- if (lhs_var->data.mode == ir_var_function_out ||
- lhs_var->data.mode == ir_var_function_inout ||
- lhs_var->data.mode == ir_var_shader_out ||
- lhs_var->data.mode == ir_var_shader_storage)
- continue;
+ if (lhs_var->data.mode == ir_var_function_out ||
+ lhs_var->data.mode == ir_var_function_inout ||
+ lhs_var->data.mode == ir_var_shader_out ||
+ lhs_var->data.mode == ir_var_shader_storage ||
+ lhs_var->data.mode == ir_var_shader_shared)
+ continue;
ir_variable_refcount_entry *entry = info->refs->get_variable_entry(lhs_var);
diff --git a/src/compiler/glsl_types.cpp b/src/compiler/glsl_types.cpp
index 5920c2e2611..d2eaec173b3 100644
--- a/src/compiler/glsl_types.cpp
+++ b/src/compiler/glsl_types.cpp
@@ -164,6 +164,11 @@ glsl_type::glsl_type(const glsl_struct_field *fields, unsigned num_fields,
this->fields.structure[i].sample = fields[i].sample;
this->fields.structure[i].matrix_layout = fields[i].matrix_layout;
this->fields.structure[i].patch = fields[i].patch;
+ this->fields.structure[i].image_read_only = fields[i].image_read_only;
+ this->fields.structure[i].image_write_only = fields[i].image_write_only;
+ this->fields.structure[i].image_coherent = fields[i].image_coherent;
+ this->fields.structure[i].image_volatile = fields[i].image_volatile;
+ this->fields.structure[i].image_restrict = fields[i].image_restrict;
this->fields.structure[i].precision = fields[i].precision;
}
@@ -1330,6 +1335,13 @@ glsl_type::can_implicitly_convert_to(const glsl_type *desired,
if (this == desired)
return true;
+ /* ESSL does not allow implicit conversions. If there is no state, we're
+ * doing intra-stage function linking where these checks have already been
+ * done.
+ */
+ if (state && state->es_shader)
+ return false;
+
/* There is no conversion among matrix types. */
if (this->matrix_columns > 1 || desired->matrix_columns > 1)
return false;
diff --git a/src/compiler/glsl_types.h b/src/compiler/glsl_types.h
index a9b5281e774..5965cb2eedb 100644
--- a/src/compiler/glsl_types.h
+++ b/src/compiler/glsl_types.h
@@ -885,7 +885,8 @@ struct glsl_struct_field {
glsl_struct_field(const struct glsl_type *_type, const char *_name)
: type(_type), name(_name), location(-1), interpolation(0), centroid(0),
sample(0), matrix_layout(GLSL_MATRIX_LAYOUT_INHERITED), patch(0),
- precision(GLSL_PRECISION_NONE)
+ precision(GLSL_PRECISION_NONE), image_read_only(0), image_write_only(0),
+ image_coherent(0), image_volatile(0), image_restrict(0)
{
/* empty */
}
diff --git a/src/compiler/nir/nir_lower_alu_to_scalar.c b/src/compiler/nir/nir_lower_alu_to_scalar.c
index 37cb0221e0b..312d2f99a1c 100644
--- a/src/compiler/nir/nir_lower_alu_to_scalar.c
+++ b/src/compiler/nir/nir_lower_alu_to_scalar.c
@@ -139,7 +139,7 @@ lower_alu_instr_scalar(nir_alu_instr *instr, nir_builder *b)
b->shader->options->lower_pack_unorm_2x16);
nir_ssa_def *word =
- nir_extract_uword(b, instr->src[0].src.ssa, nir_imm_int(b, 0));
+ nir_extract_u16(b, instr->src[0].src.ssa, nir_imm_int(b, 0));
nir_ssa_def *val =
nir_ior(b, nir_ishl(b, nir_channel(b, word, 1), nir_imm_int(b, 16)),
nir_channel(b, word, 0));
@@ -154,7 +154,7 @@ lower_alu_instr_scalar(nir_alu_instr *instr, nir_builder *b)
b->shader->options->lower_pack_unorm_4x8);
nir_ssa_def *byte =
- nir_extract_ubyte(b, instr->src[0].src.ssa, nir_imm_int(b, 0));
+ nir_extract_u8(b, instr->src[0].src.ssa, nir_imm_int(b, 0));
nir_ssa_def *val =
nir_ior(b, nir_ior(b, nir_ishl(b, nir_channel(b, byte, 3), nir_imm_int(b, 24)),
nir_ishl(b, nir_channel(b, byte, 2), nir_imm_int(b, 16))),
diff --git a/src/compiler/nir/nir_opcodes.py b/src/compiler/nir/nir_opcodes.py
index 0eff89783dd..60ade4a80ae 100644
--- a/src/compiler/nir/nir_opcodes.py
+++ b/src/compiler/nir/nir_opcodes.py
@@ -238,15 +238,15 @@ unpack_2x16("unorm")
unpack_4x8("unorm")
unpack_2x16("half")
-unop_horiz("pack_uvec2_to_uint", 0, tuint, 2, tuint, """
-dst = (src0.x & 0xffff) | (src0.y >> 16);
+unop_horiz("pack_uvec2_to_uint", 1, tuint, 2, tuint, """
+dst.x = (src0.x & 0xffff) | (src0.y >> 16);
""")
-unop_horiz("pack_uvec4_to_uint", 0, tuint, 4, tuint, """
-dst = (src0.x << 0) |
- (src0.y << 8) |
- (src0.z << 16) |
- (src0.w << 24);
+unop_horiz("pack_uvec4_to_uint", 1, tuint, 4, tuint, """
+dst.x = (src0.x << 0) |
+ (src0.y << 8) |
+ (src0.z << 16) |
+ (src0.w << 24);
""")
# Lowered floating point unpacking operations.
@@ -562,12 +562,12 @@ dst.y = src1.x;
""")
# Byte extraction
-binop("extract_ubyte", tuint, "", "(uint8_t)(src0 >> (src1 * 8))")
-binop("extract_ibyte", tint, "", "(int8_t)(src0 >> (src1 * 8))")
+binop("extract_u8", tuint, "", "(uint8_t)(src0 >> (src1 * 8))")
+binop("extract_i8", tint, "", "(int8_t)(src0 >> (src1 * 8))")
# Word extraction
-binop("extract_uword", tuint, "", "(uint16_t)(src0 >> (src1 * 16))")
-binop("extract_iword", tint, "", "(int16_t)(src0 >> (src1 * 16))")
+binop("extract_u16", tuint, "", "(uint16_t)(src0 >> (src1 * 16))")
+binop("extract_i16", tint, "", "(int16_t)(src0 >> (src1 * 16))")
def triop(name, ty, const_expr):
diff --git a/src/compiler/nir/nir_opt_algebraic.py b/src/compiler/nir/nir_opt_algebraic.py
index f4bfd3a921a..d4f4a3d903c 100644
--- a/src/compiler/nir/nir_opt_algebraic.py
+++ b/src/compiler/nir/nir_opt_algebraic.py
@@ -248,19 +248,19 @@ optimizations = [
('ubfe', 'value', 'offset', 'bits')),
'options->lower_bitfield_extract'),
- (('extract_ibyte', a, b),
- ('ishr', ('ishl', a, ('imul', ('isub', 3, b), 8)), 8),
+ (('extract_i8', a, b),
+ ('ishr', ('ishl', a, ('imul', ('isub', 3, b), 8)), 24),
'options->lower_extract_byte'),
- (('extract_ubyte', a, b),
+ (('extract_u8', a, b),
('iand', ('ushr', a, ('imul', b, 8)), 0xff),
'options->lower_extract_byte'),
- (('extract_iword', a, b),
+ (('extract_i16', a, b),
('ishr', ('ishl', a, ('imul', ('isub', 1, b), 16)), 16),
'options->lower_extract_word'),
- (('extract_uword', a, b),
+ (('extract_u16', a, b),
('iand', ('ushr', a, ('imul', b, 16)), 0xffff),
'options->lower_extract_word'),
@@ -285,30 +285,30 @@ optimizations = [
'options->lower_pack_snorm_4x8'),
(('unpack_unorm_2x16', 'v'),
- ('fdiv', ('u2f', ('vec4', ('extract_uword', 'v', 0),
- ('extract_uword', 'v', 1), 0, 0)),
+ ('fdiv', ('u2f', ('vec2', ('extract_u16', 'v', 0),
+ ('extract_u16', 'v', 1))),
65535.0),
'options->lower_unpack_unorm_2x16'),
(('unpack_unorm_4x8', 'v'),
- ('fdiv', ('u2f', ('vec4', ('extract_ubyte', 'v', 0),
- ('extract_ubyte', 'v', 1),
- ('extract_ubyte', 'v', 2),
- ('extract_ubyte', 'v', 3))),
+ ('fdiv', ('u2f', ('vec4', ('extract_u8', 'v', 0),
+ ('extract_u8', 'v', 1),
+ ('extract_u8', 'v', 2),
+ ('extract_u8', 'v', 3))),
255.0),
'options->lower_unpack_unorm_4x8'),
(('unpack_snorm_2x16', 'v'),
- ('fmin', 1.0, ('fmax', -1.0, ('fdiv', ('i2f', ('vec4', ('extract_iword', 'v', 0),
- ('extract_iword', 'v', 1), 0, 0)),
+ ('fmin', 1.0, ('fmax', -1.0, ('fdiv', ('i2f', ('vec2', ('extract_i16', 'v', 0),
+ ('extract_i16', 'v', 1))),
32767.0))),
'options->lower_unpack_snorm_2x16'),
(('unpack_snorm_4x8', 'v'),
- ('fmin', 1.0, ('fmax', -1.0, ('fdiv', ('i2f', ('vec4', ('extract_ibyte', 'v', 0),
- ('extract_ibyte', 'v', 1),
- ('extract_ibyte', 'v', 2),
- ('extract_ibyte', 'v', 3))),
+ ('fmin', 1.0, ('fmax', -1.0, ('fdiv', ('i2f', ('vec4', ('extract_i8', 'v', 0),
+ ('extract_i8', 'v', 1),
+ ('extract_i8', 'v', 2),
+ ('extract_i8', 'v', 3))),
127.0))),
'options->lower_unpack_snorm_4x8'),
]
diff --git a/src/compiler/shader_enums.h b/src/compiler/shader_enums.h
index e3f46e3d739..d44aabf8f3c 100644
--- a/src/compiler/shader_enums.h
+++ b/src/compiler/shader_enums.h
@@ -544,6 +544,16 @@ enum gl_frag_depth_layout
FRAG_DEPTH_LAYOUT_UNCHANGED
};
+/**
+ * \brief Buffer access qualifiers
+ */
+enum gl_buffer_access_qualifier
+{
+ ACCESS_COHERENT = 1,
+ ACCESS_RESTRICT = 2,
+ ACCESS_VOLATILE = 4,
+};
+
#ifdef __cplusplus
} /* extern "C" */
#endif
diff --git a/src/gallium/Android.mk b/src/gallium/Android.mk
index 749be7dfeb9..2b469b65ee4 100644
--- a/src/gallium/Android.mk
+++ b/src/gallium/Android.mk
@@ -85,7 +85,7 @@ endif
# virgl
ifneq ($(filter virgl, $(MESA_GPU_DRIVERS)),)
-SUBDIRS += winsys/virgl/drm drivers/virgl
+SUBDIRS += winsys/virgl/drm winsys/virgl/vtest drivers/virgl
endif
# vmwgfx
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_conv.c b/src/gallium/auxiliary/gallivm/lp_bld_conv.c
index 7854142f736..7cf0deece81 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_conv.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_conv.c
@@ -130,6 +130,7 @@ lp_build_half_to_float(struct gallivm_state *gallivm,
*
* Convert float32 to half floats, preserving Infs and NaNs,
* with rounding towards zero (trunc).
+ * XXX: For GL, would prefer rounding towards nearest(-even).
*/
LLVMValueRef
lp_build_float_to_half(struct gallivm_state *gallivm,
@@ -143,6 +144,15 @@ lp_build_float_to_half(struct gallivm_state *gallivm,
struct lp_type i16_type = lp_type_int_vec(16, 16 * length);
LLVMValueRef result;
+ /*
+ * Note: Newer llvm versions (3.6 or so) support fptrunc to 16 bits
+ * directly, without any (x86 or generic) intrinsics.
+ * Albeit the rounding mode cannot be specified (and is undefined,
+ * though in practice on x86 seems to do nearest-even but it may
+ * be dependent on instruction set support), so is essentially
+ * useless.
+ */
+
if (util_cpu_caps.has_f16c &&
(length == 4 || length == 8)) {
struct lp_type i168_type = lp_type_int_vec(16, 16 * 8);
@@ -187,7 +197,11 @@ lp_build_float_to_half(struct gallivm_state *gallivm,
LLVMValueRef index = LLVMConstInt(i32t, i, 0);
LLVMValueRef f32 = LLVMBuildExtractElement(builder, src, index, "");
#if 0
- /* XXX: not really supported by backends */
+ /*
+ * XXX: not really supported by backends.
+ * Even if they would now, rounding mode cannot be specified and
+ * is undefined.
+ */
LLVMValueRef f16 = lp_build_intrinsic_unary(builder, "llvm.convert.to.fp16", i16t, f32);
#else
LLVMValueRef f16 = LLVMBuildCall(builder, func, &f32, 1, "");
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_pack.c b/src/gallium/auxiliary/gallivm/lp_bld_pack.c
index 0b0f7f0147c..d80c997ad84 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_pack.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_pack.c
@@ -257,6 +257,32 @@ lp_build_concat_n(struct gallivm_state *gallivm,
/**
+ * Un-interleave vector.
+ * This will return a vector consisting of every second element
+ * (depending on lo_hi, beginning at 0 or 1).
+ * The returned vector size (elems and width) will only be half
+ * that of the source vector.
+ */
+LLVMValueRef
+lp_build_uninterleave1(struct gallivm_state *gallivm,
+ unsigned num_elems,
+ LLVMValueRef a,
+ unsigned lo_hi)
+{
+ LLVMValueRef shuffle, elems[LP_MAX_VECTOR_LENGTH];
+ unsigned i;
+ assert(num_elems <= LP_MAX_VECTOR_LENGTH);
+
+ for (i = 0; i < num_elems / 2; ++i)
+ elems[i] = lp_build_const_int32(gallivm, 2*i + lo_hi);
+
+ shuffle = LLVMConstVector(elems, num_elems / 2);
+
+ return LLVMBuildShuffleVector(gallivm->builder, a, a, shuffle, "");
+}
+
+
+/**
* Interleave vector elements.
*
* Matches the PUNPCKLxx and PUNPCKHxx SSE instructions
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_pack.h b/src/gallium/auxiliary/gallivm/lp_bld_pack.h
index 7cede35bbde..367fba1fd21 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_pack.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_pack.h
@@ -58,6 +58,11 @@ lp_build_interleave2(struct gallivm_state *gallivm,
LLVMValueRef b,
unsigned lo_hi);
+LLVMValueRef
+lp_build_uninterleave1(struct gallivm_state *gallivm,
+ unsigned num_elems,
+ LLVMValueRef a,
+ unsigned lo_hi);
void
lp_build_unpack2(struct gallivm_state *gallivm,
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.c b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.c
index c88dfbf974a..1cbe47ca91f 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.c
@@ -248,7 +248,6 @@ lp_build_tgsi_inst_llvm(
/* Ignore deprecated instructions */
switch (inst->Instruction.Opcode) {
- case TGSI_OPCODE_UP2H:
case TGSI_OPCODE_UP2US:
case TGSI_OPCODE_UP4B:
case TGSI_OPCODE_UP4UB:
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c
index 6f75bec5005..43af6b4ea0d 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c
@@ -45,8 +45,10 @@
#include "lp_bld_arit.h"
#include "lp_bld_bitarit.h"
#include "lp_bld_const.h"
+#include "lp_bld_conv.h"
#include "lp_bld_gather.h"
#include "lp_bld_logic.h"
+#include "lp_bld_pack.h"
#include "tgsi/tgsi_exec.h"
@@ -530,6 +532,77 @@ static struct lp_build_tgsi_action log_action = {
log_emit /* emit */
};
+/* TGSI_OPCODE_PK2H */
+
+static void
+pk2h_fetch_args(
+ struct lp_build_tgsi_context * bld_base,
+ struct lp_build_emit_data * emit_data)
+{
+ /* src0.x */
+ emit_data->args[0] = lp_build_emit_fetch(bld_base, emit_data->inst,
+ 0, TGSI_CHAN_X);
+ /* src0.y */
+ emit_data->args[1] = lp_build_emit_fetch(bld_base, emit_data->inst,
+ 0, TGSI_CHAN_Y);
+}
+
+static void
+pk2h_emit(
+ const struct lp_build_tgsi_action *action,
+ struct lp_build_tgsi_context *bld_base,
+ struct lp_build_emit_data *emit_data)
+{
+ struct gallivm_state *gallivm = bld_base->base.gallivm;
+ struct lp_type f16i_t;
+ LLVMValueRef lo, hi, res;
+
+ f16i_t = lp_type_uint_vec(16, bld_base->base.type.length * 32);
+ lo = lp_build_float_to_half(gallivm, emit_data->args[0]);
+ hi = lp_build_float_to_half(gallivm, emit_data->args[1]);
+ /* maybe some interleave doubling vector width would be useful... */
+ lo = lp_build_pad_vector(gallivm, lo, bld_base->base.type.length * 2);
+ hi = lp_build_pad_vector(gallivm, hi, bld_base->base.type.length * 2);
+ res = lp_build_interleave2(gallivm, f16i_t, lo, hi, 0);
+
+ emit_data->output[emit_data->chan] = res;
+}
+
+static struct lp_build_tgsi_action pk2h_action = {
+ pk2h_fetch_args, /* fetch_args */
+ pk2h_emit /* emit */
+};
+
+/* TGSI_OPCODE_UP2H */
+
+static void
+up2h_emit(
+ const struct lp_build_tgsi_action *action,
+ struct lp_build_tgsi_context *bld_base,
+ struct lp_build_emit_data *emit_data)
+{
+ struct gallivm_state *gallivm = bld_base->base.gallivm;
+ LLVMBuilderRef builder = gallivm->builder;
+ LLVMContextRef context = gallivm->context;
+ LLVMValueRef lo, hi, res[2], arg;
+ unsigned nr = bld_base->base.type.length;
+ LLVMTypeRef i16t = LLVMVectorType(LLVMInt16TypeInContext(context), nr * 2);
+
+ arg = LLVMBuildBitCast(builder, emit_data->args[0], i16t, "");
+ lo = lp_build_uninterleave1(gallivm, nr * 2, arg, 0);
+ hi = lp_build_uninterleave1(gallivm, nr * 2, arg, 1);
+ res[0] = lp_build_half_to_float(gallivm, lo);
+ res[1] = lp_build_half_to_float(gallivm, hi);
+
+ emit_data->output[0] = emit_data->output[2] = res[0];
+ emit_data->output[1] = emit_data->output[3] = res[1];
+}
+
+static struct lp_build_tgsi_action up2h_action = {
+ scalar_unary_fetch_args, /* fetch_args */
+ up2h_emit /* emit */
+};
+
/* TGSI_OPCODE_LRP */
static void
@@ -1032,10 +1105,12 @@ lp_set_default_actions(struct lp_build_tgsi_context * bld_base)
bld_base->op_actions[TGSI_OPCODE_EXP] = exp_action;
bld_base->op_actions[TGSI_OPCODE_LIT] = lit_action;
bld_base->op_actions[TGSI_OPCODE_LOG] = log_action;
+ bld_base->op_actions[TGSI_OPCODE_PK2H] = pk2h_action;
bld_base->op_actions[TGSI_OPCODE_RSQ] = rsq_action;
bld_base->op_actions[TGSI_OPCODE_SQRT] = sqrt_action;
bld_base->op_actions[TGSI_OPCODE_POW] = pow_action;
bld_base->op_actions[TGSI_OPCODE_SCS] = scs_action;
+ bld_base->op_actions[TGSI_OPCODE_UP2H] = up2h_action;
bld_base->op_actions[TGSI_OPCODE_XPD] = xpd_action;
bld_base->op_actions[TGSI_OPCODE_BREAKC].fetch_args = scalar_unary_fetch_args;
diff --git a/src/gallium/auxiliary/target-helpers/drm_helper.h b/src/gallium/auxiliary/target-helpers/drm_helper.h
index 332b1cba984..90820d3fe91 100644
--- a/src/gallium/auxiliary/target-helpers/drm_helper.h
+++ b/src/gallium/auxiliary/target-helpers/drm_helper.h
@@ -226,14 +226,9 @@ pipe_freedreno_create_screen(int fd)
struct pipe_screen *
pipe_virgl_create_screen(int fd)
{
- struct virgl_winsys *vws;
struct pipe_screen *screen;
- vws = virgl_drm_winsys_create(fd);
- if (!vws)
- return NULL;
-
- screen = virgl_create_screen(vws);
+ screen = virgl_drm_screen_create(fd);
return screen ? debug_screen_wrap(screen) : NULL;
}
diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c
index f67c16200a9..d898fd66f48 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_exec.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c
@@ -58,6 +58,7 @@
#include "tgsi/tgsi_parse.h"
#include "tgsi/tgsi_util.h"
#include "tgsi_exec.h"
+#include "util/u_half.h"
#include "util/u_memory.h"
#include "util/u_math.h"
@@ -3058,6 +3059,45 @@ exec_dp2(struct tgsi_exec_machine *mach,
}
static void
+exec_pk2h(struct tgsi_exec_machine *mach,
+ const struct tgsi_full_instruction *inst)
+{
+ unsigned chan;
+ union tgsi_exec_channel arg[2], dst;
+
+ fetch_source(mach, &arg[0], &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
+ fetch_source(mach, &arg[1], &inst->Src[0], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT);
+ for (chan = 0; chan < TGSI_QUAD_SIZE; chan++) {
+ dst.u[chan] = util_float_to_half(arg[0].f[chan]) |
+ (util_float_to_half(arg[1].f[chan]) << 16);
+ }
+ for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
+ if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
+ store_dest(mach, &dst, &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_UINT);
+ }
+ }
+}
+
+static void
+exec_up2h(struct tgsi_exec_machine *mach,
+ const struct tgsi_full_instruction *inst)
+{
+ unsigned chan;
+ union tgsi_exec_channel arg, dst[2];
+
+ fetch_source(mach, &arg, &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_UINT);
+ for (chan = 0; chan < TGSI_QUAD_SIZE; chan++) {
+ dst[0].f[chan] = util_half_to_float(arg.u[chan] & 0xffff);
+ dst[1].f[chan] = util_half_to_float(arg.u[chan] >> 16);
+ }
+ for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
+ if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
+ store_dest(mach, &dst[chan & 1], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT);
+ }
+ }
+}
+
+static void
exec_scs(struct tgsi_exec_machine *mach,
const struct tgsi_full_instruction *inst)
{
@@ -4339,7 +4379,7 @@ exec_instruction(
break;
case TGSI_OPCODE_PK2H:
- assert (0);
+ exec_pk2h(mach, inst);
break;
case TGSI_OPCODE_PK2US:
@@ -4425,7 +4465,7 @@ exec_instruction(
break;
case TGSI_OPCODE_UP2H:
- assert (0);
+ exec_up2h(mach, inst);
break;
case TGSI_OPCODE_UP2US:
diff --git a/src/gallium/auxiliary/tgsi/tgsi_info.c b/src/gallium/auxiliary/tgsi/tgsi_info.c
index b270dd73b67..70fc4604537 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_info.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_info.c
@@ -149,7 +149,7 @@ static const struct tgsi_opcode_info opcode_info[TGSI_OPCODE_LAST] =
{ 1, 2, 0, 0, 0, 0, 0, COMP, "FSGE", TGSI_OPCODE_FSGE },
{ 1, 2, 0, 0, 0, 0, 0, COMP, "FSLT", TGSI_OPCODE_FSLT },
{ 1, 2, 0, 0, 0, 0, 0, COMP, "FSNE", TGSI_OPCODE_FSNE },
- { 0, 1, 0, 0, 0, 0, 1, NONE, "", 112 }, /* removed */
+ { 0, 1, 0, 0, 0, 0, 0, OTHR, "MEMBAR", TGSI_OPCODE_MEMBAR },
{ 0, 1, 0, 0, 0, 0, 0, NONE, "CALLNZ", TGSI_OPCODE_CALLNZ },
{ 0, 1, 0, 0, 0, 0, 0, NONE, "", 114 }, /* removed */
{ 0, 1, 0, 0, 0, 0, 0, NONE, "BREAKC", TGSI_OPCODE_BREAKC },
@@ -426,6 +426,7 @@ tgsi_opcode_infer_src_type( uint opcode )
case TGSI_OPCODE_SAMPLE_I:
case TGSI_OPCODE_SAMPLE_I_MS:
case TGSI_OPCODE_UMUL_HI:
+ case TGSI_OPCODE_UP2H:
return TGSI_TYPE_UNSIGNED;
case TGSI_OPCODE_IMUL_HI:
case TGSI_OPCODE_I2F:
diff --git a/src/gallium/auxiliary/tgsi/tgsi_scan.c b/src/gallium/auxiliary/tgsi/tgsi_scan.c
index 7a02e27e01e..687fb54830d 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_scan.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_scan.c
@@ -377,6 +377,8 @@ tgsi_scan_shader(const struct tgsi_token *tokens,
info->reads_position = TRUE;
else if (semName == TGSI_SEMANTIC_FACE)
info->uses_frontface = TRUE;
+ else if (semName == TGSI_SEMANTIC_SAMPLEMASK)
+ info->reads_samplemask = TRUE;
}
else if (file == TGSI_FILE_OUTPUT) {
info->output_semantic_name[reg] = (ubyte) semName;
diff --git a/src/gallium/auxiliary/tgsi/tgsi_scan.h b/src/gallium/auxiliary/tgsi/tgsi_scan.h
index b0b423ab528..0541255764c 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_scan.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_scan.h
@@ -81,6 +81,7 @@ struct tgsi_shader_info
ubyte colors_written;
boolean reads_position; /**< does fragment shader read position? */
boolean reads_z; /**< does fragment shader read depth? */
+ boolean reads_samplemask; /**< does fragment shader read sample mask? */
boolean writes_z; /**< does fragment shader write Z value? */
boolean writes_stencil; /**< does fragment shader write stencil value? */
boolean writes_samplemask; /**< does fragment shader write sample mask? */
diff --git a/src/gallium/auxiliary/util/u_box.h b/src/gallium/auxiliary/util/u_box.h
index 66cf989a830..00f231dc683 100644
--- a/src/gallium/auxiliary/util/u_box.h
+++ b/src/gallium/auxiliary/util/u_box.h
@@ -195,4 +195,16 @@ u_box_minify_2d(struct pipe_box *dst,
dst->height = MAX2(src->height >> l, 1);
}
+static inline void
+u_box_minify_3d(struct pipe_box *dst,
+ const struct pipe_box *src, unsigned l)
+{
+ dst->x = src->x >> l;
+ dst->y = src->y >> l;
+ dst->z = src->z >> l;
+ dst->width = MAX2(src->width >> l, 1);
+ dst->height = MAX2(src->height >> l, 1);
+ dst->depth = MAX2(src->depth >> l, 1);
+}
+
#endif
diff --git a/src/gallium/auxiliary/util/u_cpu_detect.c b/src/gallium/auxiliary/util/u_cpu_detect.c
index c719d3a77f0..a84de4fef7b 100644
--- a/src/gallium/auxiliary/util/u_cpu_detect.c
+++ b/src/gallium/auxiliary/util/u_cpu_detect.c
@@ -52,7 +52,7 @@
#include <machine/cpu.h>
#endif
-#if defined(PIPE_OS_FREEBSD)
+#if defined(PIPE_OS_FREEBSD) || defined(PIPE_OS_DRAGONFLY)
#include <sys/types.h>
#include <sys/sysctl.h>
#endif
diff --git a/src/gallium/auxiliary/util/u_format_parse.py b/src/gallium/auxiliary/util/u_format_parse.py
index 929017a4486..d83603faa78 100755
--- a/src/gallium/auxiliary/util/u_format_parse.py
+++ b/src/gallium/auxiliary/util/u_format_parse.py
@@ -313,7 +313,7 @@ def _parse_channels(fields, layout, colorspace, swizzles):
return channels
def parse(filename):
- '''Parse the format descrition in CSV format in terms of the
+ '''Parse the format description in CSV format in terms of the
Channel and Format classes above.'''
stream = open(filename)
diff --git a/src/gallium/auxiliary/util/u_half.h b/src/gallium/auxiliary/util/u_half.h
index d28fae3c77d..966d213bdd5 100644
--- a/src/gallium/auxiliary/util/u_half.h
+++ b/src/gallium/auxiliary/util/u_half.h
@@ -74,7 +74,11 @@ util_float_to_half(float f)
f32.ui &= round_mask;
f32.f *= magic.f;
f32.ui -= round_mask;
-
+ /*
+ * XXX: The magic mul relies on denorms being available, otherwise
+ * all f16 denorms get flushed to zero - hence when this is used
+ * for tgsi_exec in softpipe we won't get f16 denorms.
+ */
/*
* Clamp to max finite value if overflowed.
* OpenGL has completely undefined rounding behavior for float to
@@ -112,6 +116,7 @@ util_half_to_float(uint16_t f16)
/* Adjust */
f32.f *= magic.f;
+ /* XXX: The magic mul relies on denorms being available */
/* Inf / NaN */
if (f32.f >= infnan.f)
diff --git a/src/gallium/auxiliary/vl/vl_zscan.c b/src/gallium/auxiliary/vl/vl_zscan.c
index 1c6cdd4f2c9..5241471f516 100644
--- a/src/gallium/auxiliary/vl/vl_zscan.c
+++ b/src/gallium/auxiliary/vl/vl_zscan.c
@@ -49,6 +49,13 @@ enum VS_OUTPUT
VS_O_VTEX = 0
};
+const int vl_zscan_normal_16[] =
+{
+ /* Zig-Zag scan pattern */
+ 0, 1, 4, 8, 5, 2, 3, 6,
+ 9,12,13,10, 7,11,14,15
+};
+
const int vl_zscan_linear[] =
{
/* Linear scan pattern */
diff --git a/src/gallium/auxiliary/vl/vl_zscan.h b/src/gallium/auxiliary/vl/vl_zscan.h
index eacee2db64f..268cf0a6e32 100644
--- a/src/gallium/auxiliary/vl/vl_zscan.h
+++ b/src/gallium/auxiliary/vl/vl_zscan.h
@@ -64,6 +64,7 @@ struct vl_zscan_buffer
struct pipe_surface *dst;
};
+extern const int vl_zscan_normal_16[];
extern const int vl_zscan_linear[];
extern const int vl_zscan_normal[];
extern const int vl_zscan_alternate[];
diff --git a/src/gallium/docs/source/context.rst b/src/gallium/docs/source/context.rst
index 4c03e00008c..904e1ff04e7 100644
--- a/src/gallium/docs/source/context.rst
+++ b/src/gallium/docs/source/context.rst
@@ -325,6 +325,11 @@ returned). Otherwise, if the ``wait`` parameter is FALSE, the call
will not block and the return value will be TRUE if the query has
completed or FALSE otherwise.
+``get_query_result_resource`` is used to store the result of a query into
+a resource without synchronizing with the CPU. This write will optionally
+wait for the query to complete, and will optionally write whether the value
+is available instead of the value itself.
+
The interface currently includes the following types of queries:
``PIPE_QUERY_OCCLUSION_COUNTER`` counts the number of fragments which
diff --git a/src/gallium/docs/source/screen.rst b/src/gallium/docs/source/screen.rst
index b461810644a..3324bcca6f4 100644
--- a/src/gallium/docs/source/screen.rst
+++ b/src/gallium/docs/source/screen.rst
@@ -138,6 +138,10 @@ The integer capabilities:
* ``PIPE_CAP_TEXTURE_BUFFER_OFFSET_ALIGNMENT``: Describes the required
alignment for pipe_sampler_view::u.buf.first_element, in bytes.
If a driver does not support first/last_element, it should return 0.
+* ``PIPE_CAP_BUFFER_SAMPLER_VIEW_RGBA_ONLY``: Whether the driver only
+ supports R, RG, RGB and RGBA formats for PIPE_BUFFER sampler views.
+ When this is the case it should be assumed that the swizzle parameters
+ in the sampler view have no effect.
* ``PIPE_CAP_TGSI_TEXCOORD``: This CAP describes a hw limitation.
If true, the hardware cannot replace arbitrary shader inputs with sprite
coordinates and hence the inputs that are desired to be replaceable must
@@ -164,7 +168,7 @@ The integer capabilities:
view it is intended to be used with, or herein undefined results may occur
for permutational swizzles.
* ``PIPE_CAP_MAX_TEXTURE_BUFFER_SIZE``: The maximum accessible size with
- a buffer sampler view, in bytes.
+ a buffer sampler view, in texels.
* ``PIPE_CAP_MAX_VIEWPORTS``: The maximum number of viewports (and scissors
since they are linked) a driver can support. Returning 0 is equivalent
to returning 1 because every driver has to support at least a single
@@ -306,6 +310,15 @@ The integer capabilities:
* ``PIPE_CAP_GENERATE_MIPMAP``: Indicates whether pipe_context::generate_mipmap
is supported.
* ``PIPE_CAP_STRING_MARKER``: Whether pipe->emit_string_marker() is supported.
+* ``PIPE_CAP_SURFACE_REINTERPRET_BLOCKS``: Indicates whether
+ pipe_context::create_surface supports reinterpreting a texture as a surface
+ of a format with different block width/height (but same block size in bits).
+ For example, a compressed texture image can be interpreted as a
+ non-compressed surface whose texels are the same number of bits as the
+ compressed blocks, and vice versa. The width and height of the surface is
+ adjusted appropriately.
+* ``PIPE_CAP_QUERY_BUFFER_OBJECT``: Driver supports
+ context::get_query_result_resource callback.
.. _pipe_capf:
diff --git a/src/gallium/docs/source/tgsi.rst b/src/gallium/docs/source/tgsi.rst
index 7810a3eb915..489cbb0bc2f 100644
--- a/src/gallium/docs/source/tgsi.rst
+++ b/src/gallium/docs/source/tgsi.rst
@@ -2372,6 +2372,23 @@ programs.
the program. Results are unspecified if any of the remaining
threads terminates or never reaches an executed BARRIER instruction.
+.. opcode:: MEMBAR - Memory barrier
+
+ ``MEMBAR type``
+
+ This opcode waits for the completion of all memory accesses based on
+ the type passed in. The type is an immediate bitfield with the following
+ meaning:
+
+ Bit 0: Shader storage buffers
+ Bit 1: Atomic buffers
+ Bit 2: Images
+ Bit 3: Shared memory
+ Bit 4: Thread group
+
+ These may be passed in in any combination. An implementation is free to not
+ distinguish between these as it sees fit. However these map to all the
+ possibilities made available by GLSL.
.. _atomopcodes:
diff --git a/src/gallium/drivers/freedreno/freedreno_context.c b/src/gallium/drivers/freedreno/freedreno_context.c
index c5ea86f9368..c54bb1091f7 100644
--- a/src/gallium/drivers/freedreno/freedreno_context.c
+++ b/src/gallium/drivers/freedreno/freedreno_context.c
@@ -152,6 +152,9 @@ fd_emit_string_marker(struct pipe_context *pctx, const char *string, int len)
struct fd_ringbuffer *ring = ctx->ring;
const uint32_t *buf = (const void *)string;
+ /* max packet size is 0x3fff dwords: */
+ len = MIN2(len, 0x3fff * 4);
+
OUT_PKT3(ring, CP_NOP, align(len, 4) / 4);
while (len >= 4) {
OUT_RING(ring, *buf);
diff --git a/src/gallium/drivers/freedreno/freedreno_screen.c b/src/gallium/drivers/freedreno/freedreno_screen.c
index 640f50f5dcb..27f4d267438 100644
--- a/src/gallium/drivers/freedreno/freedreno_screen.c
+++ b/src/gallium/drivers/freedreno/freedreno_screen.c
@@ -165,6 +165,7 @@ fd_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
case PIPE_CAP_TEXTURE_BARRIER:
case PIPE_CAP_TEXTURE_MIRROR_CLAMP:
case PIPE_CAP_COMPUTE:
+ case PIPE_CAP_QUERY_MEMORY_INFO:
return 0;
case PIPE_CAP_SM3:
@@ -183,6 +184,8 @@ fd_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
case PIPE_CAP_CLIP_HALFZ:
return is_a3xx(screen) || is_a4xx(screen);
+ case PIPE_CAP_BUFFER_SAMPLER_VIEW_RGBA_ONLY:
+ return 0;
case PIPE_CAP_TEXTURE_BUFFER_OFFSET_ALIGNMENT:
if (is_a3xx(screen)) return 16;
if (is_a4xx(screen)) return 32;
@@ -248,6 +251,7 @@ fd_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
case PIPE_CAP_SHADER_BUFFER_OFFSET_ALIGNMENT:
case PIPE_CAP_INVALIDATE_BUFFER:
case PIPE_CAP_GENERATE_MIPMAP:
+ case PIPE_CAP_SURFACE_REINTERPRET_BLOCKS:
return 0;
case PIPE_CAP_MAX_VIEWPORTS:
@@ -296,6 +300,7 @@ fd_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
/* Queries. */
case PIPE_CAP_QUERY_TIME_ELAPSED:
case PIPE_CAP_QUERY_TIMESTAMP:
+ case PIPE_CAP_QUERY_BUFFER_OBJECT:
return 0;
case PIPE_CAP_OCCLUSION_QUERY:
return is_a3xx(screen) || is_a4xx(screen);
diff --git a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c
index 1ea2dd9cbf7..6eb6a2d52ef 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c
+++ b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c
@@ -556,6 +556,10 @@ create_frag_coord(struct ir3_compile *ctx, unsigned comp)
}
}
+/* NOTE: this creates the "TGSI" style fragface (ie. input slot
+ * VARYING_SLOT_FACE). For NIR style nir_intrinsic_load_front_face
+ * we can just use the value from hw directly (since it is boolean)
+ */
static struct ir3_instruction *
create_frag_face(struct ir3_compile *ctx, unsigned comp)
{
@@ -1224,7 +1228,7 @@ emit_intrinsic(struct ir3_compile *ctx, nir_intrinsic_instr *intr)
break;
case nir_intrinsic_load_vertex_id_zero_base:
if (!ctx->vertex_id) {
- ctx->vertex_id = create_input(ctx->block, 0);
+ ctx->vertex_id = create_input(b, 0);
add_sysval_input(ctx, SYSTEM_VALUE_VERTEX_ID_ZERO_BASE,
ctx->vertex_id);
}
@@ -1232,7 +1236,7 @@ emit_intrinsic(struct ir3_compile *ctx, nir_intrinsic_instr *intr)
break;
case nir_intrinsic_load_instance_id:
if (!ctx->instance_id) {
- ctx->instance_id = create_input(ctx->block, 0);
+ ctx->instance_id = create_input(b, 0);
add_sysval_input(ctx, SYSTEM_VALUE_INSTANCE_ID,
ctx->instance_id);
}
@@ -1244,6 +1248,14 @@ emit_intrinsic(struct ir3_compile *ctx, nir_intrinsic_instr *intr)
dst[i] = create_driver_param(ctx, IR3_DP_UCP0_X + n);
}
break;
+ case nir_intrinsic_load_front_face:
+ if (!ctx->frag_face) {
+ ctx->so->frag_face = true;
+ ctx->frag_face = create_input(b, 0);
+ ctx->frag_face->regs[0]->flags |= IR3_REG_HALF;
+ }
+ dst[0] = ir3_ADD_S(b, ctx->frag_face, 0, create_immed(b, 1), 0);
+ break;
case nir_intrinsic_discard_if:
case nir_intrinsic_discard: {
struct ir3_instruction *cond, *kill;
@@ -1349,6 +1361,7 @@ emit_tex(struct ir3_compile *ctx, nir_tex_instr *tex)
struct ir3_block *b = ctx->block;
struct ir3_instruction **dst, *sam, *src0[12], *src1[4];
struct ir3_instruction **coord, *lod, *compare, *proj, **off, **ddx, **ddy;
+ struct ir3_instruction *const_off[4];
bool has_bias = false, has_lod = false, has_proj = false, has_off = false;
unsigned i, coords, flags;
unsigned nsrc0 = 0, nsrc1 = 0;
@@ -1392,7 +1405,7 @@ emit_tex(struct ir3_compile *ctx, nir_tex_instr *tex)
ddy = get_src(ctx, &tex->src[i].src);
break;
default:
- compile_error(ctx, "Unhandled NIR tex serc type: %d\n",
+ compile_error(ctx, "Unhandled NIR tex src type: %d\n",
tex->src[i].src_type);
return;
}
@@ -1417,6 +1430,21 @@ emit_tex(struct ir3_compile *ctx, nir_tex_instr *tex)
tex_info(tex, &flags, &coords);
+ if (!has_off) {
+ /* could still have a constant offset: */
+ if (tex->const_offset[0] || tex->const_offset[1] ||
+ tex->const_offset[2] || tex->const_offset[3]) {
+ off = const_off;
+
+ off[0] = create_immed(b, tex->const_offset[0]);
+ off[1] = create_immed(b, tex->const_offset[1]);
+ off[2] = create_immed(b, tex->const_offset[2]);
+ off[3] = create_immed(b, tex->const_offset[3]);
+
+ has_off = true;
+ }
+ }
+
/* scale up integer coords for TXF based on the LOD */
if (ctx->unminify_coords && (opc == OPC_ISAML)) {
assert(has_lod);
@@ -2053,6 +2081,9 @@ setup_output(struct ir3_compile *ctx, nir_variable *out)
case VARYING_SLOT_CLIP_DIST0:
case VARYING_SLOT_CLIP_DIST1:
break;
+ case VARYING_SLOT_CLIP_VERTEX:
+ /* handled entirely in nir_lower_clip: */
+ return;
default:
if (slot >= VARYING_SLOT_VAR0)
break;
@@ -2135,11 +2166,17 @@ emit_instructions(struct ir3_compile *ctx)
setup_output(ctx, var);
}
- /* Setup variables (which should only be arrays): */
+ /* Setup global variables (which should only be arrays): */
nir_foreach_variable(var, &ctx->s->globals) {
declare_var(ctx, var);
}
+ /* Setup local variables (which should only be arrays): */
+ /* NOTE: need to do something more clever when we support >1 fxn */
+ nir_foreach_variable(var, &fxn->locals) {
+ declare_var(ctx, var);
+ }
+
/* And emit the body: */
ctx->impl = fxn;
emit_function(ctx, fxn);
diff --git a/src/gallium/drivers/i915/i915_screen.c b/src/gallium/drivers/i915/i915_screen.c
index 6b0ab587001..8d010f9dc8c 100644
--- a/src/gallium/drivers/i915/i915_screen.c
+++ b/src/gallium/drivers/i915/i915_screen.c
@@ -262,6 +262,9 @@ i915_get_param(struct pipe_screen *screen, enum pipe_cap cap)
case PIPE_CAP_INVALIDATE_BUFFER:
case PIPE_CAP_GENERATE_MIPMAP:
case PIPE_CAP_STRING_MARKER:
+ case PIPE_CAP_BUFFER_SAMPLER_VIEW_RGBA_ONLY:
+ case PIPE_CAP_SURFACE_REINTERPRET_BLOCKS:
+ case PIPE_CAP_QUERY_MEMORY_INFO:
return 0;
case PIPE_CAP_MAX_DUAL_SOURCE_RENDER_TARGETS:
diff --git a/src/gallium/drivers/ilo/ilo_screen.c b/src/gallium/drivers/ilo/ilo_screen.c
index 5171cca9ea6..44d7c11af43 100644
--- a/src/gallium/drivers/ilo/ilo_screen.c
+++ b/src/gallium/drivers/ilo/ilo_screen.c
@@ -428,6 +428,8 @@ ilo_get_param(struct pipe_screen *screen, enum pipe_cap param)
case PIPE_CAP_CUBE_MAP_ARRAY:
case PIPE_CAP_TEXTURE_BUFFER_OBJECTS:
return true;
+ case PIPE_CAP_BUFFER_SAMPLER_VIEW_RGBA_ONLY:
+ return 0;
case PIPE_CAP_TEXTURE_BUFFER_OFFSET_ALIGNMENT:
return 1;
case PIPE_CAP_TGSI_TEXCOORD:
@@ -486,6 +488,9 @@ ilo_get_param(struct pipe_screen *screen, enum pipe_cap param)
case PIPE_CAP_INVALIDATE_BUFFER:
case PIPE_CAP_GENERATE_MIPMAP:
case PIPE_CAP_STRING_MARKER:
+ case PIPE_CAP_SURFACE_REINTERPRET_BLOCKS:
+ case PIPE_CAP_QUERY_BUFFER_OBJECT:
+ case PIPE_CAP_QUERY_MEMORY_INFO:
return 0;
case PIPE_CAP_VENDOR_ID:
diff --git a/src/gallium/drivers/llvmpipe/lp_rast.h b/src/gallium/drivers/llvmpipe/lp_rast.h
index db45cbbb057..34008e1c01e 100644
--- a/src/gallium/drivers/llvmpipe/lp_rast.h
+++ b/src/gallium/drivers/llvmpipe/lp_rast.h
@@ -308,17 +308,4 @@ void
lp_debug_draw_bins_by_coverage( struct lp_scene *scene );
-#ifdef PIPE_ARCH_SSE
-#include <emmintrin.h>
-#include "util/u_sse.h"
-
-static inline __m128i
-lp_plane_to_m128i(const struct lp_rast_plane *plane)
-{
- return _mm_setr_epi32((int32_t)plane->c, (int32_t)plane->dcdx,
- (int32_t)plane->dcdy, (int32_t)plane->eo);
-}
-
-#endif
-
#endif
diff --git a/src/gallium/drivers/llvmpipe/lp_rast_tri.c b/src/gallium/drivers/llvmpipe/lp_rast_tri.c
index 0ae6ec28d35..f4a2f0268f0 100644
--- a/src/gallium/drivers/llvmpipe/lp_rast_tri.c
+++ b/src/gallium/drivers/llvmpipe/lp_rast_tri.c
@@ -239,7 +239,7 @@ sign_bits4(const __m128i *cstep, int cdiff)
void
lp_rast_triangle_32_3_16(struct lp_rasterizer_task *task,
- const union lp_rast_cmd_arg arg)
+ const union lp_rast_cmd_arg arg)
{
const struct lp_rast_triangle *tri = arg.triangle.tri;
const struct lp_rast_plane *plane = GET_PLANES(tri);
@@ -250,26 +250,29 @@ lp_rast_triangle_32_3_16(struct lp_rasterizer_task *task,
struct { unsigned mask:16; unsigned i:8; unsigned j:8; } out[16];
unsigned nr = 0;
- __m128i p0 = lp_plane_to_m128i(&plane[0]); /* c, dcdx, dcdy, eo */
- __m128i p1 = lp_plane_to_m128i(&plane[1]); /* c, dcdx, dcdy, eo */
- __m128i p2 = lp_plane_to_m128i(&plane[2]); /* c, dcdx, dcdy, eo */
+ /* p0 and p2 are aligned, p1 is not (plane size 24 bytes). */
+ __m128i p0 = _mm_load_si128((__m128i *)&plane[0]); /* clo, chi, dcdx, dcdy */
+ __m128i p1 = _mm_loadu_si128((__m128i *)&plane[1]);
+ __m128i p2 = _mm_load_si128((__m128i *)&plane[2]);
__m128i zero = _mm_setzero_si128();
- __m128i c;
- __m128i dcdx;
- __m128i dcdy;
- __m128i rej4;
-
- __m128i dcdx2;
- __m128i dcdx3;
+ __m128i c, dcdx, dcdy, rej4;
+ __m128i dcdx_neg_mask, dcdy_neg_mask;
+ __m128i dcdx2, dcdx3;
__m128i span_0; /* 0,dcdx,2dcdx,3dcdx for plane 0 */
__m128i span_1; /* 0,dcdx,2dcdx,3dcdx for plane 1 */
__m128i span_2; /* 0,dcdx,2dcdx,3dcdx for plane 2 */
__m128i unused;
-
+
transpose4_epi32(&p0, &p1, &p2, &zero,
- &c, &dcdx, &dcdy, &rej4);
+ &c, &unused, &dcdx, &dcdy);
+
+ /* recalc eo - easier than trying to load as scalars / shuffle... */
+ dcdx_neg_mask = _mm_srai_epi32(dcdx, 31);
+ dcdy_neg_mask = _mm_srai_epi32(dcdy, 31);
+ rej4 = _mm_sub_epi32(_mm_andnot_si128(dcdy_neg_mask, dcdy),
+ _mm_and_si128(dcdx_neg_mask, dcdx));
/* Adjust dcdx;
*/
@@ -349,32 +352,29 @@ lp_rast_triangle_32_3_16(struct lp_rasterizer_task *task,
void
lp_rast_triangle_32_3_4(struct lp_rasterizer_task *task,
- const union lp_rast_cmd_arg arg)
+ const union lp_rast_cmd_arg arg)
{
const struct lp_rast_triangle *tri = arg.triangle.tri;
const struct lp_rast_plane *plane = GET_PLANES(tri);
unsigned x = (arg.triangle.plane_mask & 0xff) + task->x;
unsigned y = (arg.triangle.plane_mask >> 8) + task->y;
- __m128i p0 = lp_plane_to_m128i(&plane[0]); /* c, dcdx, dcdy, eo */
- __m128i p1 = lp_plane_to_m128i(&plane[1]); /* c, dcdx, dcdy, eo */
- __m128i p2 = lp_plane_to_m128i(&plane[2]); /* c, dcdx, dcdy, eo */
+ /* p0 and p2 are aligned, p1 is not (plane size 24 bytes). */
+ __m128i p0 = _mm_load_si128((__m128i *)&plane[0]); /* clo, chi, dcdx, dcdy */
+ __m128i p1 = _mm_loadu_si128((__m128i *)&plane[1]);
+ __m128i p2 = _mm_load_si128((__m128i *)&plane[2]);
__m128i zero = _mm_setzero_si128();
- __m128i c;
- __m128i dcdx;
- __m128i dcdy;
+ __m128i c, dcdx, dcdy;
+ __m128i dcdx2, dcdx3;
- __m128i dcdx2;
- __m128i dcdx3;
-
__m128i span_0; /* 0,dcdx,2dcdx,3dcdx for plane 0 */
__m128i span_1; /* 0,dcdx,2dcdx,3dcdx for plane 1 */
__m128i span_2; /* 0,dcdx,2dcdx,3dcdx for plane 2 */
__m128i unused;
transpose4_epi32(&p0, &p1, &p2, &zero,
- &c, &dcdx, &dcdy, &unused);
+ &c, &unused, &dcdx, &dcdy);
/* Adjust dcdx;
*/
diff --git a/src/gallium/drivers/llvmpipe/lp_screen.c b/src/gallium/drivers/llvmpipe/lp_screen.c
index 879a2e7d2f0..2c66bf46332 100644
--- a/src/gallium/drivers/llvmpipe/lp_screen.c
+++ b/src/gallium/drivers/llvmpipe/lp_screen.c
@@ -311,6 +311,10 @@ llvmpipe_get_param(struct pipe_screen *screen, enum pipe_cap param)
case PIPE_CAP_INVALIDATE_BUFFER:
case PIPE_CAP_GENERATE_MIPMAP:
case PIPE_CAP_STRING_MARKER:
+ case PIPE_CAP_BUFFER_SAMPLER_VIEW_RGBA_ONLY:
+ case PIPE_CAP_SURFACE_REINTERPRET_BLOCKS:
+ case PIPE_CAP_QUERY_BUFFER_OBJECT:
+ case PIPE_CAP_QUERY_MEMORY_INFO:
return 0;
}
/* should only get here on unhandled cases */
diff --git a/src/gallium/drivers/llvmpipe/lp_setup_context.h b/src/gallium/drivers/llvmpipe/lp_setup_context.h
index 03bb8ce2b6f..5ab297d7e1a 100644
--- a/src/gallium/drivers/llvmpipe/lp_setup_context.h
+++ b/src/gallium/drivers/llvmpipe/lp_setup_context.h
@@ -168,6 +168,21 @@ struct lp_setup_context
const float (*v2)[4]);
};
+static inline void
+scissor_planes_needed(boolean scis_planes[4], struct u_rect *bbox,
+ struct u_rect *scissor)
+{
+ /* left */
+ scis_planes[0] = (bbox->x0 < scissor->x0);
+ /* right */
+ scis_planes[1] = (bbox->x1 > scissor->x1);
+ /* top */
+ scis_planes[2] = (bbox->y0 < scissor->y0);
+ /* bottom */
+ scis_planes[3] = (bbox->y1 > scissor->y1);
+}
+
+
void lp_setup_choose_triangle( struct lp_setup_context *setup );
void lp_setup_choose_line( struct lp_setup_context *setup );
void lp_setup_choose_point( struct lp_setup_context *setup );
diff --git a/src/gallium/drivers/llvmpipe/lp_setup_line.c b/src/gallium/drivers/llvmpipe/lp_setup_line.c
index f425825fc2a..af4e7900d3c 100644
--- a/src/gallium/drivers/llvmpipe/lp_setup_line.c
+++ b/src/gallium/drivers/llvmpipe/lp_setup_line.c
@@ -336,13 +336,6 @@ try_setup_line( struct lp_setup_context *setup,
layer = MIN2(layer, scene->fb_max_layer);
}
- if (setup->scissor_test) {
- nr_planes = 8;
- }
- else {
- nr_planes = 4;
- }
-
dx = v1[0][0] - v2[0][0];
dy = v1[0][1] - v2[0][1];
area = (dx * dx + dy * dy);
@@ -591,6 +584,18 @@ try_setup_line( struct lp_setup_context *setup,
bbox.x0 = MAX2(bbox.x0, 0);
bbox.y0 = MAX2(bbox.y0, 0);
+ nr_planes = 4;
+ /*
+ * Determine how many scissor planes we need, that is drop scissor
+ * edges if the bounding box of the tri is fully inside that edge.
+ */
+ if (setup->scissor_test) {
+ /* why not just use draw_regions */
+ boolean s_planes[4];
+ scissor_planes_needed(s_planes, &bbox, &setup->scissors[viewport_index]);
+ nr_planes += s_planes[0] + s_planes[1] + s_planes[2] + s_planes[3];
+ }
+
line = lp_setup_alloc_triangle(scene,
key->num_inputs,
nr_planes,
@@ -708,30 +713,46 @@ try_setup_line( struct lp_setup_context *setup,
* Note that otherwise, the scissor planes only vary in 'C' value,
* and even then only on state-changes. Could alternatively store
* these planes elsewhere.
+ * (Or only store the c value together with a bit indicating which
+ * scissor edge this is, so rasterization would treat them differently
+ * (easier to evaluate) to ordinary planes.)
*/
- if (nr_planes == 8) {
- const struct u_rect *scissor =
- &setup->scissors[viewport_index];
-
- plane[4].dcdx = -1 << 8;
- plane[4].dcdy = 0;
- plane[4].c = (1-scissor->x0) << 8;
- plane[4].eo = 1 << 8;
-
- plane[5].dcdx = 1 << 8;
- plane[5].dcdy = 0;
- plane[5].c = (scissor->x1+1) << 8;
- plane[5].eo = 0;
-
- plane[6].dcdx = 0;
- plane[6].dcdy = 1 << 8;
- plane[6].c = (1-scissor->y0) << 8;
- plane[6].eo = 1 << 8;
-
- plane[7].dcdx = 0;
- plane[7].dcdy = -1 << 8;
- plane[7].c = (scissor->y1+1) << 8;
- plane[7].eo = 0;
+ if (nr_planes > 4) {
+ /* why not just use draw_regions */
+ struct u_rect *scissor = &setup->scissors[viewport_index];
+ struct lp_rast_plane *plane_s = &plane[4];
+ boolean s_planes[4];
+ scissor_planes_needed(s_planes, &bbox, scissor);
+
+ if (s_planes[0]) {
+ plane_s->dcdx = -1 << 8;
+ plane_s->dcdy = 0;
+ plane_s->c = (1-scissor->x0) << 8;
+ plane_s->eo = 1 << 8;
+ plane_s++;
+ }
+ if (s_planes[1]) {
+ plane_s->dcdx = 1 << 8;
+ plane_s->dcdy = 0;
+ plane_s->c = (scissor->x1+1) << 8;
+ plane_s->eo = 0 << 8;
+ plane_s++;
+ }
+ if (s_planes[2]) {
+ plane_s->dcdx = 0;
+ plane_s->dcdy = 1 << 8;
+ plane_s->c = (1-scissor->y0) << 8;
+ plane_s->eo = 1 << 8;
+ plane_s++;
+ }
+ if (s_planes[3]) {
+ plane_s->dcdx = 0;
+ plane_s->dcdy = -1 << 8;
+ plane_s->c = (scissor->y1+1) << 8;
+ plane_s->eo = 0;
+ plane_s++;
+ }
+ assert(plane_s == &plane[nr_planes]);
}
return lp_setup_bin_triangle(setup, line, &bbox, nr_planes, viewport_index);
diff --git a/src/gallium/drivers/llvmpipe/lp_setup_tri.c b/src/gallium/drivers/llvmpipe/lp_setup_tri.c
index 907129dbd1b..cdb3d015dec 100644
--- a/src/gallium/drivers/llvmpipe/lp_setup_tri.c
+++ b/src/gallium/drivers/llvmpipe/lp_setup_tri.c
@@ -302,13 +302,6 @@ do_triangle_ccw(struct lp_setup_context *setup,
layer = MIN2(layer, scene->fb_max_layer);
}
- if (setup->scissor_test) {
- nr_planes = 7;
- }
- else {
- nr_planes = 3;
- }
-
/* Bounding rectangle (in pixels) */
{
/* Yes this is necessary to accurately calculate bounding boxes
@@ -347,6 +340,18 @@ do_triangle_ccw(struct lp_setup_context *setup,
bbox.x0 = MAX2(bbox.x0, 0);
bbox.y0 = MAX2(bbox.y0, 0);
+ nr_planes = 3;
+ /*
+ * Determine how many scissor planes we need, that is drop scissor
+ * edges if the bounding box of the tri is fully inside that edge.
+ */
+ if (setup->scissor_test) {
+ /* why not just use draw_regions */
+ boolean s_planes[4];
+ scissor_planes_needed(s_planes, &bbox, &setup->scissors[viewport_index]);
+ nr_planes += s_planes[0] + s_planes[1] + s_planes[2] + s_planes[3];
+ }
+
tri = lp_setup_alloc_triangle(scene,
key->num_inputs,
nr_planes,
@@ -367,13 +372,11 @@ do_triangle_ccw(struct lp_setup_context *setup,
/* Setup parameter interpolants:
*/
- setup->setup.variant->jit_function( v0,
- v1,
- v2,
- frontfacing,
- GET_A0(&tri->inputs),
- GET_DADX(&tri->inputs),
- GET_DADY(&tri->inputs) );
+ setup->setup.variant->jit_function(v0, v1, v2,
+ frontfacing,
+ GET_A0(&tri->inputs),
+ GET_DADX(&tri->inputs),
+ GET_DADY(&tri->inputs));
tri->inputs.frontfacing = frontfacing;
tri->inputs.disable = FALSE;
@@ -383,9 +386,9 @@ do_triangle_ccw(struct lp_setup_context *setup,
if (0)
lp_dump_setup_coef(&setup->setup.variant->key,
- (const float (*)[4])GET_A0(&tri->inputs),
- (const float (*)[4])GET_DADX(&tri->inputs),
- (const float (*)[4])GET_DADY(&tri->inputs));
+ (const float (*)[4])GET_A0(&tri->inputs),
+ (const float (*)[4])GET_DADX(&tri->inputs),
+ (const float (*)[4])GET_DADY(&tri->inputs));
plane = GET_PLANES(tri);
@@ -672,29 +675,46 @@ do_triangle_ccw(struct lp_setup_context *setup,
* Note that otherwise, the scissor planes only vary in 'C' value,
* and even then only on state-changes. Could alternatively store
* these planes elsewhere.
+ * (Or only store the c value together with a bit indicating which
+ * scissor edge this is, so rasterization would treat them differently
+ * (easier to evaluate) to ordinary planes.)
*/
- if (nr_planes == 7) {
- const struct u_rect *scissor = &setup->scissors[viewport_index];
-
- plane[3].dcdx = -1 << 8;
- plane[3].dcdy = 0;
- plane[3].c = (1-scissor->x0) << 8;
- plane[3].eo = 1 << 8;
-
- plane[4].dcdx = 1 << 8;
- plane[4].dcdy = 0;
- plane[4].c = (scissor->x1+1) << 8;
- plane[4].eo = 0;
-
- plane[5].dcdx = 0;
- plane[5].dcdy = 1 << 8;
- plane[5].c = (1-scissor->y0) << 8;
- plane[5].eo = 1 << 8;
-
- plane[6].dcdx = 0;
- plane[6].dcdy = -1 << 8;
- plane[6].c = (scissor->y1+1) << 8;
- plane[6].eo = 0;
+ if (nr_planes > 3) {
+ /* why not just use draw_regions */
+ struct u_rect *scissor = &setup->scissors[viewport_index];
+ struct lp_rast_plane *plane_s = &plane[3];
+ boolean s_planes[4];
+ scissor_planes_needed(s_planes, &bbox, scissor);
+
+ if (s_planes[0]) {
+ plane_s->dcdx = -1 << 8;
+ plane_s->dcdy = 0;
+ plane_s->c = (1-scissor->x0) << 8;
+ plane_s->eo = 1 << 8;
+ plane_s++;
+ }
+ if (s_planes[1]) {
+ plane_s->dcdx = 1 << 8;
+ plane_s->dcdy = 0;
+ plane_s->c = (scissor->x1+1) << 8;
+ plane_s->eo = 0 << 8;
+ plane_s++;
+ }
+ if (s_planes[2]) {
+ plane_s->dcdx = 0;
+ plane_s->dcdy = 1 << 8;
+ plane_s->c = (1-scissor->y0) << 8;
+ plane_s->eo = 1 << 8;
+ plane_s++;
+ }
+ if (s_planes[3]) {
+ plane_s->dcdx = 0;
+ plane_s->dcdy = -1 << 8;
+ plane_s->c = (scissor->y1+1) << 8;
+ plane_s->eo = 0;
+ plane_s++;
+ }
+ assert(plane_s == &plane[nr_planes]);
}
return lp_setup_bin_triangle(setup, tri, &bbox, nr_planes, viewport_index);
@@ -984,17 +1004,16 @@ calc_fixed_position(struct lp_setup_context *setup,
* Both should be acceptable, I think.
*/
#if defined(PIPE_ARCH_SSE)
- __m128d v0r, v1r, v2r;
+ __m128 v0r, v1r;
__m128 vxy0xy2, vxy1xy0;
__m128i vxy0xy2i, vxy1xy0i;
__m128i dxdy0120, x0x2y0y2, x1x0y1y0, x0120, y0120;
__m128 pix_offset = _mm_set1_ps(setup->pixel_offset);
__m128 fixed_one = _mm_set1_ps((float)FIXED_ONE);
- v0r = _mm_load_sd((const double *)v0[0]);
- v1r = _mm_load_sd((const double *)v1[0]);
- v2r = _mm_load_sd((const double *)v2[0]);
- vxy0xy2 = _mm_castpd_ps(_mm_unpacklo_pd(v0r, v2r));
- vxy1xy0 = _mm_castpd_ps(_mm_unpacklo_pd(v1r, v0r));
+ v0r = _mm_castpd_ps(_mm_load_sd((double *)v0[0]));
+ vxy0xy2 = _mm_loadh_pi(v0r, (__m64 *)v2[0]);
+ v1r = _mm_castpd_ps(_mm_load_sd((double *)v1[0]));
+ vxy1xy0 = _mm_movelh_ps(v1r, vxy0xy2);
vxy0xy2 = _mm_sub_ps(vxy0xy2, pix_offset);
vxy1xy0 = _mm_sub_ps(vxy1xy0, pix_offset);
vxy0xy2 = _mm_mul_ps(vxy0xy2, fixed_one);
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir.cpp
index 6ad9dd31681..75e5fd843c2 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir.cpp
@@ -393,6 +393,9 @@ ImmediateValue::isInteger(const int i) const
case TYPE_S32:
case TYPE_U32:
return reg.data.s32 == i; // as if ...
+ case TYPE_S64:
+ case TYPE_U64:
+ return reg.data.s64 == i; // as if ...
case TYPE_F32:
return reg.data.f32 == static_cast<float>(i);
case TYPE_F64:
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir.h b/src/gallium/drivers/nouveau/codegen/nv50_ir.h
index d1fdd75495f..9d7becf27d4 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir.h
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir.h
@@ -132,6 +132,7 @@ enum operation
OP_SUBFM, // surface bitfield manipulation
OP_SUCLAMP, // clamp surface coordinates
OP_SUEAU, // surface effective address
+ OP_SUQ, // surface query
OP_MADSP, // special integer multiply-add
OP_TEXBAR, // texture dependency barrier
OP_DFDX,
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp
index 17cb484d2ba..0c7cd1d8137 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp
@@ -1947,10 +1947,16 @@ CodeEmitterGK110::emitInstruction(Instruction *insn)
case OP_CEIL:
case OP_FLOOR:
case OP_TRUNC:
- case OP_CVT:
case OP_SAT:
emitCVT(insn);
break;
+ case OP_CVT:
+ if (insn->def(0).getFile() == FILE_PREDICATE ||
+ insn->src(0).getFile() == FILE_PREDICATE)
+ emitMOV(insn);
+ else
+ emitCVT(insn);
+ break;
case OP_RSQ:
emitSFnOp(insn, 5 + 2 * insn->subOp);
break;
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp
index 1fa0eb6da6d..dee26225b7e 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp
@@ -673,7 +673,12 @@ CodeEmitterGM107::emitMOV()
(insn->sType != TYPE_F32 && !longIMMD(insn->src(0)))) {
switch (insn->src(0).getFile()) {
case FILE_GPR:
- emitInsn(0x5c980000);
+ if (insn->def(0).getFile() == FILE_PREDICATE) {
+ emitInsn(0x5b6a0000);
+ emitGPR (0x08);
+ } else {
+ emitInsn(0x5c980000);
+ }
emitGPR (0x14, insn->src(0));
break;
case FILE_MEMORY_CONST:
@@ -684,18 +689,32 @@ CodeEmitterGM107::emitMOV()
emitInsn(0x38980000);
emitIMMD(0x14, 19, insn->src(0));
break;
+ case FILE_PREDICATE:
+ emitInsn(0x50880000);
+ emitPRED(0x0c, insn->src(0));
+ emitPRED(0x1d);
+ emitPRED(0x27);
+ break;
default:
assert(!"bad src file");
break;
}
- emitField(0x27, 4, insn->lanes);
+ if (insn->def(0).getFile() != FILE_PREDICATE &&
+ insn->src(0).getFile() != FILE_PREDICATE)
+ emitField(0x27, 4, insn->lanes);
} else {
emitInsn (0x01000000);
emitIMMD (0x14, 32, insn->src(0));
emitField(0x0c, 4, insn->lanes);
}
- emitGPR(0x00, insn->def(0));
+ if (insn->def(0).getFile() == FILE_PREDICATE) {
+ emitPRED(0x27);
+ emitPRED(0x03, insn->def(0));
+ emitPRED(0x00);
+ } else {
+ emitGPR(0x00, insn->def(0));
+ }
}
void
@@ -2684,11 +2703,7 @@ CodeEmitterGM107::emitInstruction(Instruction *i)
emitRAM();
break;
case OP_MOV:
- if (insn->def(0).getFile() == FILE_GPR &&
- insn->src(0).getFile() != FILE_PREDICATE)
- emitMOV();
- else
- assert(!"R2P/P2R");
+ emitMOV();
break;
case OP_RDSV:
emitS2R();
@@ -2700,7 +2715,10 @@ CodeEmitterGM107::emitInstruction(Instruction *i)
case OP_CEIL:
case OP_TRUNC:
case OP_CVT:
- if (isFloatType(insn->dType)) {
+ if (insn->op == OP_CVT && (insn->def(0).getFile() == FILE_PREDICATE ||
+ insn->src(0).getFile() == FILE_PREDICATE)) {
+ emitMOV();
+ } else if (isFloatType(insn->dType)) {
if (isFloatType(insn->sType))
emitF2F();
else
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp
index 0b28047e22b..8637db91521 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp
@@ -2021,8 +2021,10 @@ CodeEmitterNVC0::emitATOM(const Instruction *i)
code[0] |= 63 << 20;
}
- if (i->subOp == NV50_IR_SUBOP_ATOM_CAS)
- srcId(i->src(2), 32 + 17);
+ if (i->subOp == NV50_IR_SUBOP_ATOM_CAS) {
+ assert(i->src(1).getSize() == 2 * typeSizeof(i->sType));
+ code[1] |= (SDATA(i->src(1)).id + 1) << 17;
+ }
}
void
@@ -2433,10 +2435,16 @@ CodeEmitterNVC0::emitInstruction(Instruction *insn)
case OP_CEIL:
case OP_FLOOR:
case OP_TRUNC:
- case OP_CVT:
case OP_SAT:
emitCVT(insn);
break;
+ case OP_CVT:
+ if (insn->def(0).getFile() == FILE_PREDICATE ||
+ insn->src(0).getFile() == FILE_PREDICATE)
+ emitMOV(insn);
+ else
+ emitCVT(insn);
+ break;
case OP_RSQ:
emitSFnOp(insn, 5 + 2 * insn->subOp);
break;
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
index 9c4a38f291b..52ac198221d 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
@@ -38,6 +38,7 @@ static nv50_ir::operation translateOpcode(uint opcode);
static nv50_ir::DataFile translateFile(uint file);
static nv50_ir::TexTarget translateTexture(uint texTarg);
static nv50_ir::SVSemantic translateSysVal(uint sysval);
+static nv50_ir::CacheMode translateCacheMode(uint qualifier);
class Instruction
{
@@ -213,6 +214,12 @@ public:
nv50_ir::TexInstruction::Target getTexture(const Source *, int s) const;
+ nv50_ir::CacheMode getCacheMode() const {
+ if (!insn->Instruction.Memory)
+ return nv50_ir::CACHE_CA;
+ return translateCacheMode(insn->Memory.Qualifier);
+ }
+
inline uint getLabel() { return insn->Label.Label; }
unsigned getSaturate() const { return insn->Instruction.Saturate; }
@@ -366,7 +373,7 @@ static nv50_ir::DataFile translateFile(uint file)
case TGSI_FILE_PREDICATE: return nv50_ir::FILE_PREDICATE;
case TGSI_FILE_IMMEDIATE: return nv50_ir::FILE_IMMEDIATE;
case TGSI_FILE_SYSTEM_VALUE: return nv50_ir::FILE_SYSTEM_VALUE;
- //case TGSI_FILE_RESOURCE: return nv50_ir::FILE_MEMORY_GLOBAL;
+ case TGSI_FILE_BUFFER: return nv50_ir::FILE_MEMORY_GLOBAL;
case TGSI_FILE_SAMPLER:
case TGSI_FILE_NULL:
default:
@@ -436,6 +443,15 @@ static nv50_ir::TexTarget translateTexture(uint tex)
}
}
+static nv50_ir::CacheMode translateCacheMode(uint qualifier)
+{
+ if (qualifier & TGSI_MEMORY_VOLATILE)
+ return nv50_ir::CACHE_CV;
+ if (qualifier & TGSI_MEMORY_COHERENT)
+ return nv50_ir::CACHE_CG;
+ return nv50_ir::CACHE_CA;
+}
+
nv50_ir::DataType Instruction::inferSrcType() const
{
switch (getOpcode()) {
@@ -1210,6 +1226,7 @@ bool Source::scanDeclaration(const struct tgsi_full_declaration *decl)
case TGSI_FILE_IMMEDIATE:
case TGSI_FILE_PREDICATE:
case TGSI_FILE_SAMPLER:
+ case TGSI_FILE_BUFFER:
break;
default:
ERROR("unhandled TGSI_FILE %d\n", decl->Declaration.File);
@@ -1255,6 +1272,9 @@ bool Source::scanInstruction(const struct tgsi_full_instruction *inst)
if (insn.getDst(0).getFile() == TGSI_FILE_TEMPORARY) {
if (insn.getDst(0).isIndirect(0))
indirectTempArrays.insert(insn.getDst(0).getArrayId());
+ } else
+ if (insn.getDst(0).getFile() == TGSI_FILE_BUFFER) {
+ info->io.globalAccess |= 0x2;
}
}
@@ -1264,13 +1284,10 @@ bool Source::scanInstruction(const struct tgsi_full_instruction *inst)
if (src.isIndirect(0))
indirectTempArrays.insert(src.getArrayId());
} else
-/*
- if (src.getFile() == TGSI_FILE_RESOURCE) {
- if (src.getIndex(0) == TGSI_RESOURCE_GLOBAL)
- info->io.globalAccess |= (insn.getOpcode() == TGSI_OPCODE_LOAD) ?
+ if (src.getFile() == TGSI_FILE_BUFFER) {
+ info->io.globalAccess |= (insn.getOpcode() == TGSI_OPCODE_LOAD) ?
0x1 : 0x2;
} else
-*/
if (src.getFile() == TGSI_FILE_OUTPUT) {
if (src.isIndirect(0)) {
// We don't know which one is accessed, just mark everything for
@@ -1752,7 +1769,7 @@ Converter::acquireDst(int d, int c)
int idx = dst.getIndex(0);
int idx2d = dst.is2D() ? dst.getIndex(1) : 0;
- if (dst.isMasked(c)/* || f == TGSI_FILE_RESOURCE*/)
+ if (dst.isMasked(c) || f == TGSI_FILE_BUFFER)
return NULL;
if (dst.isIndirect(0) ||
@@ -2222,6 +2239,28 @@ Converter::handleLOAD(Value *dst0[4])
int c;
std::vector<Value *> off, src, ldv, def;
+ if (tgsi.getSrc(0).getFile() == TGSI_FILE_BUFFER) {
+ for (c = 0; c < 4; ++c) {
+ if (!dst0[c])
+ continue;
+
+ Value *off = fetchSrc(1, c);
+ Symbol *sym;
+ if (tgsi.getSrc(1).getFile() == TGSI_FILE_IMMEDIATE) {
+ off = NULL;
+ sym = makeSym(TGSI_FILE_BUFFER, r, -1, c, tgsi.getSrc(1).getValueU32(0, info) + 4 * c);
+ } else {
+ sym = makeSym(TGSI_FILE_BUFFER, r, -1, c, 4 * c);
+ }
+
+ Instruction *ld = mkLoad(TYPE_U32, dst0[c], sym, off);
+ ld->cache = tgsi.getCacheMode();
+ if (tgsi.getSrc(0).isIndirect(0))
+ ld->setIndirect(0, 1, fetchSrc(tgsi.getSrc(0).getIndirect(0), 0, 0));
+ }
+ return;
+ }
+
getResourceCoords(off, r, 1);
if (isResourceRaw(code, r)) {
@@ -2298,6 +2337,30 @@ Converter::handleSTORE()
int c;
std::vector<Value *> off, src, dummy;
+ if (tgsi.getDst(0).getFile() == TGSI_FILE_BUFFER) {
+ for (c = 0; c < 4; ++c) {
+ if (!(tgsi.getDst(0).getMask() & (1 << c)))
+ continue;
+
+ Symbol *sym;
+ Value *off;
+ if (tgsi.getSrc(0).getFile() == TGSI_FILE_IMMEDIATE) {
+ off = NULL;
+ sym = makeSym(TGSI_FILE_BUFFER, r, -1, c,
+ tgsi.getSrc(0).getValueU32(0, info) + 4 * c);
+ } else {
+ off = fetchSrc(0, 0);
+ sym = makeSym(TGSI_FILE_BUFFER, r, -1, c, 4 * c);
+ }
+
+ Instruction *st = mkStore(OP_STORE, TYPE_U32, sym, off, fetchSrc(1, c));
+ st->cache = tgsi.getCacheMode();
+ if (tgsi.getDst(0).isIndirect(0))
+ st->setIndirect(0, 1, fetchSrc(tgsi.getDst(0).getIndirect(0), 0, 0));
+ }
+ return;
+ }
+
getResourceCoords(off, r, 0);
src = off;
const int s = src.size();
@@ -2359,6 +2422,37 @@ Converter::handleATOM(Value *dst0[4], DataType ty, uint16_t subOp)
std::vector<Value *> defv;
LValue *dst = getScratch();
+ if (tgsi.getSrc(0).getFile() == TGSI_FILE_BUFFER) {
+ for (int c = 0; c < 4; ++c) {
+ if (!dst0[c])
+ continue;
+
+ Instruction *insn;
+ Value *off = fetchSrc(1, c), *off2 = NULL;
+ Value *sym;
+ if (tgsi.getSrc(1).getFile() == TGSI_FILE_IMMEDIATE)
+ sym = makeSym(TGSI_FILE_BUFFER, r, -1, c, tgsi.getSrc(1).getValueU32(c, info));
+ else
+ sym = makeSym(TGSI_FILE_BUFFER, r, -1, c, 0);
+ if (tgsi.getSrc(0).isIndirect(0))
+ off2 = fetchSrc(tgsi.getSrc(0).getIndirect(0), 0, 0);
+ if (subOp == NV50_IR_SUBOP_ATOM_CAS)
+ insn = mkOp3(OP_ATOM, ty, dst, sym, fetchSrc(2, c), fetchSrc(3, c));
+ else
+ insn = mkOp2(OP_ATOM, ty, dst, sym, fetchSrc(2, c));
+ if (tgsi.getSrc(1).getFile() != TGSI_FILE_IMMEDIATE)
+ insn->setIndirect(0, 0, off);
+ if (off2)
+ insn->setIndirect(0, 1, off2);
+ insn->subOp = subOp;
+ }
+ for (int c = 0; c < 4; ++c)
+ if (dst0[c])
+ dst0[c] = dst; // not equal to rDst so handleInstruction will do mkMov
+ return;
+ }
+
+
getResourceCoords(srcv, r, 1);
if (isResourceSpecial(r)) {
@@ -3103,6 +3197,14 @@ Converter::handleInstruction(const struct tgsi_full_instruction *insn)
geni->fixed = 1;
geni->subOp = tgsi::opcodeToSubOp(tgsi.getOpcode());
break;
+ case TGSI_OPCODE_MEMBAR:
+ geni = mkOp(OP_MEMBAR, TYPE_NONE, NULL);
+ geni->fixed = 1;
+ if (tgsi.getSrc(0).getValueU32(0, info) & TGSI_MEMBAR_THREAD_GROUP)
+ geni->subOp = NV50_IR_SUBOP_MEMBAR(M, CTA);
+ else
+ geni->subOp = NV50_IR_SUBOP_MEMBAR(M, GL);
+ break;
case TGSI_OPCODE_ATOMUADD:
case TGSI_OPCODE_ATOMXCHG:
case TGSI_OPCODE_ATOMCAS:
@@ -3115,6 +3217,12 @@ Converter::handleInstruction(const struct tgsi_full_instruction *insn)
case TGSI_OPCODE_ATOMIMAX:
handleATOM(dst0, dstTy, tgsi::opcodeToSubOp(tgsi.getOpcode()));
break;
+ case TGSI_OPCODE_RESQ:
+ geni = mkOp1(OP_SUQ, TYPE_U32, dst0[0],
+ makeSym(TGSI_FILE_BUFFER, tgsi.getSrc(0).getIndex(0), -1, 0, 0));
+ if (tgsi.getSrc(0).isIndirect(0))
+ geni->setIndirect(0, 1, fetchSrc(tgsi.getSrc(0).getIndirect(0), 0, 0));
+ break;
case TGSI_OPCODE_IBFE:
case TGSI_OPCODE_UBFE:
FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
index dc1ab769b98..e7cb54bc426 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
@@ -1022,11 +1022,22 @@ NVC0LoweringPass::handleTXLQ(TexInstruction *i)
return true;
}
+bool
+NVC0LoweringPass::handleSUQ(Instruction *suq)
+{
+ suq->op = OP_MOV;
+ suq->setSrc(0, loadResLength32(suq->getIndirect(0, 1),
+ suq->getSrc(0)->reg.fileIndex * 16));
+ suq->setIndirect(0, 0, NULL);
+ suq->setIndirect(0, 1, NULL);
+ return true;
+}
bool
NVC0LoweringPass::handleATOM(Instruction *atom)
{
SVSemantic sv;
+ Value *ptr = atom->getIndirect(0, 0), *ind = atom->getIndirect(0, 1), *base;
switch (atom->src(0).getFile()) {
case FILE_MEMORY_LOCAL:
@@ -1037,16 +1048,22 @@ NVC0LoweringPass::handleATOM(Instruction *atom)
break;
default:
assert(atom->src(0).getFile() == FILE_MEMORY_GLOBAL);
+ base = loadResInfo64(ind, atom->getSrc(0)->reg.fileIndex * 16);
+ assert(base->reg.size == 8);
+ if (ptr)
+ base = bld.mkOp2v(OP_ADD, TYPE_U64, base, base, ptr);
+ assert(base->reg.size == 8);
+ atom->setIndirect(0, 0, base);
return true;
}
- Value *base =
+ base =
bld.mkOp1v(OP_RDSV, TYPE_U32, bld.getScratch(), bld.mkSysVal(sv, 0));
- Value *ptr = atom->getIndirect(0, 0);
atom->setSrc(0, cloneShallow(func, atom->getSrc(0)));
atom->getSrc(0)->reg.file = FILE_MEMORY_GLOBAL;
if (ptr)
base = bld.mkOp2v(OP_ADD, TYPE_U32, base, base, ptr);
+ atom->setIndirect(0, 1, NULL);
atom->setIndirect(0, 0, base);
return true;
@@ -1069,7 +1086,7 @@ NVC0LoweringPass::handleCasExch(Instruction *cas, bool needCctl)
cctl->setPredicate(cas->cc, cas->getPredicate());
}
- if (cas->defExists(0) && cas->subOp == NV50_IR_SUBOP_ATOM_CAS) {
+ if (cas->subOp == NV50_IR_SUBOP_ATOM_CAS) {
// CAS is crazy. It's 2nd source is a double reg, and the 3rd source
// should be set to the high part of the double reg or bad things will
// happen elsewhere in the universe.
@@ -1079,6 +1096,7 @@ NVC0LoweringPass::handleCasExch(Instruction *cas, bool needCctl)
bld.setPosition(cas, false);
bld.mkOp2(OP_MERGE, TYPE_U64, dreg, cas->getSrc(1), cas->getSrc(2));
cas->setSrc(1, dreg);
+ cas->setSrc(2, dreg);
}
return true;
@@ -1094,6 +1112,32 @@ NVC0LoweringPass::loadResInfo32(Value *ptr, uint32_t off)
}
inline Value *
+NVC0LoweringPass::loadResInfo64(Value *ptr, uint32_t off)
+{
+ uint8_t b = prog->driver->io.resInfoCBSlot;
+ off += prog->driver->io.suInfoBase;
+
+ if (ptr)
+ ptr = bld.mkOp2v(OP_SHL, TYPE_U32, bld.getScratch(), ptr, bld.mkImm(4));
+
+ return bld.
+ mkLoadv(TYPE_U64, bld.mkSymbol(FILE_MEMORY_CONST, b, TYPE_U64, off), ptr);
+}
+
+inline Value *
+NVC0LoweringPass::loadResLength32(Value *ptr, uint32_t off)
+{
+ uint8_t b = prog->driver->io.resInfoCBSlot;
+ off += prog->driver->io.suInfoBase;
+
+ if (ptr)
+ ptr = bld.mkOp2v(OP_SHL, TYPE_U32, bld.getScratch(), ptr, bld.mkImm(4));
+
+ return bld.
+ mkLoadv(TYPE_U32, bld.mkSymbol(FILE_MEMORY_CONST, b, TYPE_U64, off + 8), ptr);
+}
+
+inline Value *
NVC0LoweringPass::loadMsInfo32(Value *ptr, uint32_t off)
{
uint8_t b = prog->driver->io.msInfoCBSlot;
@@ -1786,6 +1830,7 @@ NVC0LoweringPass::visit(Instruction *i)
return handleRDSV(i);
case OP_WRSV:
return handleWRSV(i);
+ case OP_STORE:
case OP_LOAD:
if (i->src(0).getFile() == FILE_SHADER_INPUT) {
if (prog->getType() == Program::TYPE_COMPUTE) {
@@ -1820,6 +1865,26 @@ NVC0LoweringPass::visit(Instruction *i)
} else if (i->src(0).getFile() == FILE_SHADER_OUTPUT) {
assert(prog->getType() == Program::TYPE_TESSELLATION_CONTROL);
i->op = OP_VFETCH;
+ } else if (i->src(0).getFile() == FILE_MEMORY_GLOBAL) {
+ Value *ind = i->getIndirect(0, 1);
+ Value *ptr = loadResInfo64(ind, i->getSrc(0)->reg.fileIndex * 16);
+ // XXX come up with a way not to do this for EVERY little access but
+ // rather to batch these up somehow. Unfortunately we've lost the
+ // information about the field width by the time we get here.
+ Value *offset = bld.loadImm(NULL, i->getSrc(0)->reg.data.offset + typeSizeof(i->sType));
+ Value *length = loadResLength32(ind, i->getSrc(0)->reg.fileIndex * 16);
+ Value *pred = new_LValue(func, FILE_PREDICATE);
+ if (i->src(0).isIndirect(0)) {
+ bld.mkOp2(OP_ADD, TYPE_U64, ptr, ptr, i->getIndirect(0, 0));
+ bld.mkOp2(OP_ADD, TYPE_U32, offset, offset, i->getIndirect(0, 0));
+ }
+ i->setIndirect(0, 1, NULL);
+ i->setIndirect(0, 0, ptr);
+ bld.mkCmp(OP_SET, CC_GT, TYPE_U32, pred, TYPE_U32, offset, length);
+ i->setPredicate(CC_NOT_P, pred);
+ if (i->defExists(0)) {
+ bld.mkMov(i->getDef(0), bld.mkImm(0));
+ }
}
break;
case OP_ATOM:
@@ -1838,6 +1903,9 @@ NVC0LoweringPass::visit(Instruction *i)
if (targ->getChipset() >= NVISA_GK104_CHIPSET)
handleSurfaceOpNVE4(i->asTex());
break;
+ case OP_SUQ:
+ handleSUQ(i);
+ break;
default:
break;
}
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h
index adb400a559a..09ec7e69ddc 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h
@@ -101,6 +101,7 @@ protected:
bool handleTXQ(TexInstruction *);
virtual bool handleManualTXD(TexInstruction *);
bool handleTXLQ(TexInstruction *);
+ bool handleSUQ(Instruction *);
bool handleATOM(Instruction *);
bool handleCasExch(Instruction *, bool needCctl);
void handleSurfaceOpNVE4(TexInstruction *);
@@ -116,6 +117,8 @@ private:
void readTessCoord(LValue *dst, int c);
Value *loadResInfo32(Value *ptr, uint32_t off);
+ Value *loadResInfo64(Value *ptr, uint32_t off);
+ Value *loadResLength32(Value *ptr, uint32_t off);
Value *loadMsInfo32(Value *ptr, uint32_t off);
Value *loadTexHandle(Value *ptr, unsigned int slot);
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
index 95e9fdfc57d..05b8db4a3d8 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
@@ -336,6 +336,7 @@ private:
void expr(Instruction *, ImmediateValue&, ImmediateValue&);
void expr(Instruction *, ImmediateValue&, ImmediateValue&, ImmediateValue&);
void opnd(Instruction *, ImmediateValue&, int s);
+ void opnd3(Instruction *, ImmediateValue&);
void unary(Instruction *, const ImmediateValue&);
@@ -388,6 +389,8 @@ ConstantFolding::visit(BasicBlock *bb)
else
if (i->srcExists(1) && i->src(1).getImmediate(src1))
opnd(i, src1, 1);
+ if (i->srcExists(2) && i->src(2).getImmediate(src2))
+ opnd3(i, src2);
}
return true;
}
@@ -873,6 +876,24 @@ ConstantFolding::tryCollapseChainedMULs(Instruction *mul2,
}
void
+ConstantFolding::opnd3(Instruction *i, ImmediateValue &imm2)
+{
+ switch (i->op) {
+ case OP_MAD:
+ case OP_FMA:
+ if (imm2.isInteger(0)) {
+ i->op = OP_MUL;
+ i->setSrc(2, NULL);
+ foldCount++;
+ return;
+ }
+ break;
+ default:
+ return;
+ }
+}
+
+void
ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s)
{
const int t = !s;
@@ -1202,6 +1223,14 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s)
i->setSrc(1, bld.loadImm(NULL, imm0.reg.data.u32 + imm1.reg.data.u32));
}
break;
+ case OP_SHR:
+ if (si->src(1).getImmediate(imm1) && imm0.reg.data.u32 == imm1.reg.data.u32) {
+ bld.setPosition(i, false);
+ i->op = OP_AND;
+ i->setSrc(0, si->getSrc(0));
+ i->setSrc(1, bld.loadImm(NULL, ~((1 << imm0.reg.data.u32) - 1)));
+ }
+ break;
case OP_MUL:
int muls;
if (isFloatType(si->dType))
@@ -2504,6 +2533,12 @@ MemoryOpt::runOpt(BasicBlock *bb)
}
} else
if (ldst->op == OP_STORE || ldst->op == OP_EXPORT) {
+ if (typeSizeof(ldst->dType) == 4 &&
+ ldst->src(1).getFile() == FILE_GPR &&
+ ldst->getSrc(1)->getInsn()->op == OP_NOP) {
+ delete_Instruction(prog, ldst);
+ continue;
+ }
isLoad = false;
} else {
// TODO: maybe have all fixed ops act as barrier ?
@@ -3015,7 +3050,7 @@ Instruction::isResultEqual(const Instruction *that) const
if (that->srcExists(s))
return false;
- if (op == OP_LOAD || op == OP_VFETCH) {
+ if (op == OP_LOAD || op == OP_VFETCH || op == OP_ATOM) {
switch (src(0).getFile()) {
case FILE_MEMORY_CONST:
case FILE_SHADER_INPUT:
@@ -3046,6 +3081,8 @@ GlobalCSE::visit(BasicBlock *bb)
ik = phi->getSrc(0)->getInsn();
if (!ik)
continue; // probably a function input
+ if (ik->defCount(0xff) > 1)
+ continue; // too painful to check if we can really push this forward
for (s = 1; phi->srcExists(s); ++s) {
if (phi->getSrc(s)->refCount() > 1)
break;
@@ -3179,10 +3216,10 @@ DeadCodeElim::buryAll(Program *prog)
bool
DeadCodeElim::visit(BasicBlock *bb)
{
- Instruction *next;
+ Instruction *prev;
- for (Instruction *i = bb->getFirst(); i; i = next) {
- next = i->next;
+ for (Instruction *i = bb->getExit(); i; i = prev) {
+ prev = i->prev;
if (i->isDead()) {
++deadCount;
delete_Instruction(prog, i);
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp
index 0b02599dbdd..47285a25c33 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp
@@ -161,6 +161,7 @@ const char *operationStr[OP_LAST + 1] =
"subfm",
"suclamp",
"sueau",
+ "suq",
"madsp",
"texbar",
"dfdx",
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp
index cd8c42ced5e..de39be872e4 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp
@@ -1544,6 +1544,9 @@ GCRA::cleanup(const bool success)
delete[] nodes;
nodes = NULL;
+ hi.next = hi.prev = &hi;
+ lo[0].next = lo[0].prev = &lo[0];
+ lo[1].next = lo[1].prev = &lo[1];
}
Symbol *
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp
index 4390a726d1c..ae0a8bb61d1 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp
@@ -46,7 +46,7 @@ const uint8_t Target::operationSrcNr[] =
1, 1, 1, // TEX, TXB, TXL,
1, 1, 1, 1, 1, 1, 2, // TXF, TXQ, TXD, TXG, TXLQ, TEXCSAA, TEXPREP
1, 1, 2, 2, 2, 2, 2, // SULDB, SULDP, SUSTB, SUSTP, SUREDB, SUREDP, SULEA
- 3, 3, 3, 3, // SUBFM, SUCLAMP, SUEAU, MADSP
+ 3, 3, 3, 1, 3, // SUBFM, SUCLAMP, SUEAU, SUQ, MADSP
0, // TEXBAR
1, 1, // DFDX, DFDY
1, 2, 1, 2, 0, 0, // RDSV, WRSV, PIXLD, QUADOP, QUADON, QUADPOP
@@ -109,8 +109,8 @@ const OpClass Target::operationClass[] =
// SULDB, SULDP, SUSTB, SUSTP; SUREDB, SUREDP, SULEA
OPCLASS_SURFACE, OPCLASS_SURFACE, OPCLASS_ATOMIC, OPCLASS_SURFACE,
OPCLASS_SURFACE, OPCLASS_SURFACE, OPCLASS_SURFACE,
- // SUBFM, SUCLAMP, SUEAU, MADSP
- OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_ARITH,
+ // SUBFM, SUCLAMP, SUEAU, SUQ, MADSP
+ OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_ARITH,
// TEXBAR
OPCLASS_OTHER,
// DFDX, DFDY, RDSV, WRSV; PIXLD, QUADOP, QUADON, QUADPOP
diff --git a/src/gallium/drivers/nouveau/nouveau_vp3_video_bsp.c b/src/gallium/drivers/nouveau/nouveau_vp3_video_bsp.c
index a3d07deeb18..c6c287bb8bb 100644
--- a/src/gallium/drivers/nouveau/nouveau_vp3_video_bsp.c
+++ b/src/gallium/drivers/nouveau/nouveau_vp3_video_bsp.c
@@ -266,7 +266,9 @@ nouveau_vp3_bsp_next(struct nouveau_vp3_decoder *dec, unsigned num_buffers,
int i;
for (i = 0; i < num_buffers; ++i) {
+#ifndef NDEBUG
assert(bsp_bo->size >= str_bsp->w0[0] + num_bytes[i]);
+#endif
memcpy(dec->bsp_ptr, data[i], num_bytes[i]);
dec->bsp_ptr += num_bytes[i];
str_bsp->w0[0] += num_bytes[i];
diff --git a/src/gallium/drivers/nouveau/nv30/nv30_screen.c b/src/gallium/drivers/nouveau/nv30/nv30_screen.c
index 61d91fd4cce..b62889119c5 100644
--- a/src/gallium/drivers/nouveau/nv30/nv30_screen.c
+++ b/src/gallium/drivers/nouveau/nv30/nv30_screen.c
@@ -184,6 +184,10 @@ nv30_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
case PIPE_CAP_INVALIDATE_BUFFER:
case PIPE_CAP_GENERATE_MIPMAP:
case PIPE_CAP_STRING_MARKER:
+ case PIPE_CAP_BUFFER_SAMPLER_VIEW_RGBA_ONLY:
+ case PIPE_CAP_SURFACE_REINTERPRET_BLOCKS:
+ case PIPE_CAP_QUERY_BUFFER_OBJECT:
+ case PIPE_CAP_QUERY_MEMORY_INFO:
return 0;
case PIPE_CAP_VENDOR_ID:
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_program.c b/src/gallium/drivers/nouveau/nv50/nv50_program.c
index 888d62e1c52..a67ef28abf8 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_program.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_program.c
@@ -369,7 +369,6 @@ nv50_program_translate(struct nv50_program *prog, uint16_t chipset,
NOUVEAU_ERR("shader translation failed: %i\n", ret);
goto out;
}
- FREE(info->bin.syms);
prog->code = info->bin.code;
prog->code_size = info->bin.codeSize;
@@ -403,10 +402,13 @@ nv50_program_translate(struct nv50_program *prog, uint16_t chipset,
break;
}
prog->gp.vert_count = info->prop.gp.maxVertices;
- } else
+ }
+
if (prog->type == PIPE_SHADER_COMPUTE) {
prog->cp.syms = info->bin.syms;
prog->cp.num_syms = info->bin.numSyms;
+ } else {
+ FREE(info->bin.syms);
}
if (prog->pipe.stream_output.num_outputs)
@@ -507,6 +509,9 @@ nv50_program_destroy(struct nv50_context *nv50, struct nv50_program *p)
FREE(p->interps);
FREE(p->so);
+ if (type == PIPE_SHADER_COMPUTE)
+ FREE(p->cp.syms);
+
memset(p, 0, sizeof(*p));
p->pipe = pipe;
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_screen.c b/src/gallium/drivers/nouveau/nv50/nv50_screen.c
index 32da60e0a23..14d0085975b 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_screen.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_screen.c
@@ -227,6 +227,10 @@ nv50_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
case PIPE_CAP_INVALIDATE_BUFFER:
case PIPE_CAP_GENERATE_MIPMAP:
case PIPE_CAP_STRING_MARKER:
+ case PIPE_CAP_BUFFER_SAMPLER_VIEW_RGBA_ONLY:
+ case PIPE_CAP_SURFACE_REINTERPRET_BLOCKS:
+ case PIPE_CAP_QUERY_BUFFER_OBJECT:
+ case PIPE_CAP_QUERY_MEMORY_INFO:
return 0;
case PIPE_CAP_VENDOR_ID:
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_surface.c b/src/gallium/drivers/nouveau/nv50/nv50_surface.c
index 86be1b4c4ed..ec5cf376227 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_surface.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_surface.c
@@ -595,6 +595,82 @@ nv50_clear(struct pipe_context *pipe, unsigned buffers,
}
static void
+nv50_clear_buffer_push(struct pipe_context *pipe,
+ struct pipe_resource *res,
+ unsigned offset, unsigned size,
+ const void *data, int data_size)
+{
+ struct nv50_context *nv50 = nv50_context(pipe);
+ struct nouveau_pushbuf *push = nv50->base.pushbuf;
+ struct nv04_resource *buf = nv04_resource(res);
+ unsigned count = (size + 3) / 4;
+ unsigned xcoord = offset & 0xff;
+ unsigned tmp, i;
+
+ if (data_size == 1) {
+ tmp = *(unsigned char *)data;
+ tmp = (tmp << 24) | (tmp << 16) | (tmp << 8) | tmp;
+ data = &tmp;
+ data_size = 4;
+ } else if (data_size == 2) {
+ tmp = *(unsigned short *)data;
+ tmp = (tmp << 16) | tmp;
+ data = &tmp;
+ data_size = 4;
+ }
+
+ unsigned data_words = data_size / 4;
+
+ nouveau_bufctx_refn(nv50->bufctx, 0, buf->bo, buf->domain | NOUVEAU_BO_WR);
+ nouveau_pushbuf_bufctx(push, nv50->bufctx);
+ nouveau_pushbuf_validate(push);
+
+ offset &= ~0xff;
+
+ BEGIN_NV04(push, NV50_2D(DST_FORMAT), 2);
+ PUSH_DATA (push, NV50_SURFACE_FORMAT_R8_UNORM);
+ PUSH_DATA (push, 1);
+ BEGIN_NV04(push, NV50_2D(DST_PITCH), 5);
+ PUSH_DATA (push, 262144);
+ PUSH_DATA (push, 65536);
+ PUSH_DATA (push, 1);
+ PUSH_DATAh(push, buf->address + offset);
+ PUSH_DATA (push, buf->address + offset);
+ BEGIN_NV04(push, NV50_2D(SIFC_BITMAP_ENABLE), 2);
+ PUSH_DATA (push, 0);
+ PUSH_DATA (push, NV50_SURFACE_FORMAT_R8_UNORM);
+ BEGIN_NV04(push, NV50_2D(SIFC_WIDTH), 10);
+ PUSH_DATA (push, size);
+ PUSH_DATA (push, 1);
+ PUSH_DATA (push, 0);
+ PUSH_DATA (push, 1);
+ PUSH_DATA (push, 0);
+ PUSH_DATA (push, 1);
+ PUSH_DATA (push, 0);
+ PUSH_DATA (push, xcoord);
+ PUSH_DATA (push, 0);
+ PUSH_DATA (push, 0);
+
+ while (count) {
+ unsigned nr_data = MIN2(count, NV04_PFIFO_MAX_PACKET_LEN) / data_words;
+ unsigned nr = nr_data * data_words;
+
+ BEGIN_NI04(push, NV50_2D(SIFC_DATA), nr);
+ for (i = 0; i < nr_data; i++)
+ PUSH_DATAp(push, data, data_words);
+
+ count -= nr;
+ }
+
+ if (buf->mm) {
+ nouveau_fence_ref(nv50->screen->base.fence.current, &buf->fence);
+ nouveau_fence_ref(nv50->screen->base.fence.current, &buf->fence_wr);
+ }
+
+ nouveau_bufctx_reset(nv50->bufctx, 0);
+}
+
+static void
nv50_clear_buffer(struct pipe_context *pipe,
struct pipe_resource *res,
unsigned offset, unsigned size,
@@ -643,9 +719,22 @@ nv50_clear_buffer(struct pipe_context *pipe,
assert(size % data_size == 0);
+ if (offset & 0xff) {
+ unsigned fixup_size = MIN2(size, align(offset, 0x100) - offset);
+ assert(fixup_size % data_size == 0);
+ nv50_clear_buffer_push(pipe, res, offset, fixup_size, data, data_size);
+ offset += fixup_size;
+ size -= fixup_size;
+ if (!size)
+ return;
+ }
+
elements = size / data_size;
height = (elements + 8191) / 8192;
width = elements / height;
+ if (height > 1)
+ width &= ~0xff;
+ assert(width > 0);
BEGIN_NV04(push, NV50_3D(CLEAR_COLOR(0)), 4);
PUSH_DATAf(push, color.f[0]);
@@ -669,13 +758,13 @@ nv50_clear_buffer(struct pipe_context *pipe,
BEGIN_NV04(push, NV50_3D(RT_CONTROL), 1);
PUSH_DATA (push, 1);
BEGIN_NV04(push, NV50_3D(RT_ADDRESS_HIGH(0)), 5);
- PUSH_DATAh(push, buf->bo->offset + buf->offset + offset);
- PUSH_DATA (push, buf->bo->offset + buf->offset + offset);
+ PUSH_DATAh(push, buf->address + offset);
+ PUSH_DATA (push, buf->address + offset);
PUSH_DATA (push, nv50_format_table[dst_fmt].rt);
PUSH_DATA (push, 0);
PUSH_DATA (push, 0);
BEGIN_NV04(push, NV50_3D(RT_HORIZ(0)), 2);
- PUSH_DATA (push, NV50_3D_RT_HORIZ_LINEAR | (width * data_size));
+ PUSH_DATA (push, NV50_3D_RT_HORIZ_LINEAR | align(width * data_size, 0x100));
PUSH_DATA (push, height);
BEGIN_NV04(push, NV50_3D(ZETA_ENABLE), 1);
PUSH_DATA (push, 0);
@@ -694,26 +783,21 @@ nv50_clear_buffer(struct pipe_context *pipe,
BEGIN_NI04(push, NV50_3D(CLEAR_BUFFERS), 1);
PUSH_DATA (push, 0x3c);
+ BEGIN_NV04(push, NV50_3D(COND_MODE), 1);
+ PUSH_DATA (push, nv50->cond_condmode);
+
+ if (buf->mm) {
+ nouveau_fence_ref(nv50->screen->base.fence.current, &buf->fence);
+ nouveau_fence_ref(nv50->screen->base.fence.current, &buf->fence_wr);
+ }
+
if (width * height != elements) {
offset += width * height * data_size;
width = elements - width * height;
- height = 1;
- BEGIN_NV04(push, NV50_3D(RT_ADDRESS_HIGH(0)), 2);
- PUSH_DATAh(push, buf->bo->offset + buf->offset + offset);
- PUSH_DATA (push, buf->bo->offset + buf->offset + offset);
- BEGIN_NV04(push, NV50_3D(RT_HORIZ(0)), 2);
- PUSH_DATA (push, NV50_3D_RT_HORIZ_LINEAR | (width * data_size));
- PUSH_DATA (push, height);
- BEGIN_NI04(push, NV50_3D(CLEAR_BUFFERS), 1);
- PUSH_DATA (push, 0x3c);
+ nv50_clear_buffer_push(pipe, res, offset, width * data_size,
+ data, data_size);
}
- BEGIN_NV04(push, NV50_3D(COND_MODE), 1);
- PUSH_DATA (push, nv50->cond_condmode);
-
- nouveau_fence_ref(nv50->screen->base.fence.current, &buf->fence);
- nouveau_fence_ref(nv50->screen->base.fence.current, &buf->fence_wr);
-
nv50->dirty |= NV50_NEW_FRAMEBUFFER | NV50_NEW_SCISSOR;
}
diff --git a/src/gallium/drivers/nouveau/nvc0/mme/com9097.mme b/src/gallium/drivers/nouveau/nvc0/mme/com9097.mme
index 4daa57d47bb..7f76ec66edb 100644
--- a/src/gallium/drivers/nouveau/nvc0/mme/com9097.mme
+++ b/src/gallium/drivers/nouveau/nvc0/mme/com9097.mme
@@ -491,3 +491,52 @@ daic_runout:
daic_runout_check:
branz annul $r7 #daic_runout
bra annul #daic_restore
+
+/* NVC0_3D_MACRO_QUERY_BUFFER_WRITE:
+ *
+ * This is a combination macro for all of our query buffer object needs.
+ * It has the option to clamp results to a configurable amount, as well as
+ * to write out one or two words.
+ *
+ * We use the query engine to write out the values, and expect the query
+ * address to point to the right place.
+ *
+ * arg = clamp value (0 means unclamped). clamped means just 1 written value.
+ * parm[0] = LSB of end value
+ * parm[1] = MSB of end value
+ * parm[2] = LSB of start value
+ * parm[3] = MSB of start value
+ * parm[4] = desired sequence
+ * parm[5] = actual sequence
+ */
+.section #mme9097_query_buffer_write
+ parm $r2
+ parm $r3
+ parm $r4
+ parm $r5 maddr 0x16c2 /* QUERY_SEQUENCE */
+ parm $r6
+ parm $r7
+ mov $r6 (sub $r7 $r6) /* actual - desired */
+ mov $r6 (sbb 0x0 0x0) /* if there was underflow, not reached yet */
+ braz annul $r6 #qbw_ready
+ exit
+qbw_ready:
+ mov $r2 (sub $r2 $r4)
+ braz $r1 #qbw_postclamp
+ mov $r3 (sbb $r3 $r5)
+ branz annul $r3 #qbw_clamp
+ mov $r4 (sub $r1 $r2)
+ mov $r4 (sbb 0x0 0x0)
+ braz annul $r4 #qbw_postclamp
+qbw_clamp:
+ mov $r2 $r1
+qbw_postclamp:
+ send $r2
+ mov $r4 0x1000
+ branz annul $r1 #qbw_done
+ send (extrinsrt 0x0 $r4 0x0 0x10 0x10)
+ maddr 0x16c2 /* QUERY_SEQUENCE */
+ send $r3
+qbw_done:
+ exit send (extrinsrt 0x0 $r4 0x0 0x10 0x10)
+ nop
diff --git a/src/gallium/drivers/nouveau/nvc0/mme/com9097.mme.h b/src/gallium/drivers/nouveau/nvc0/mme/com9097.mme.h
index bf8625e0584..ecadf7e4d29 100644
--- a/src/gallium/drivers/nouveau/nvc0/mme/com9097.mme.h
+++ b/src/gallium/drivers/nouveau/nvc0/mme/com9097.mme.h
@@ -332,3 +332,36 @@ uint32_t mme9097_draw_arrays_indirect_count[] = {
0xfffef837,
0xfffdc027,
};
+
+uint32_t mme9097_query_buffer_write[] = {
+ 0x00000201,
+ 0x00000301,
+/* 0x000a: qbw_ready */
+ 0x00000401,
+ 0x05b08551,
+/* 0x0011: qbw_clamp */
+/* 0x0012: qbw_postclamp */
+ 0x00000601,
+ 0x00000701,
+/* 0x0018: qbw_done */
+ 0x0005be10,
+ 0x00060610,
+ 0x0000b027,
+ 0x00000091,
+ 0x00051210,
+ 0x0001c807,
+ 0x00075b10,
+ 0x00011837,
+ 0x00048c10,
+ 0x00060410,
+ 0x0000a027,
+ 0x00000a11,
+ 0x00001041,
+ 0x04000411,
+ 0x00010837,
+ 0x84010042,
+ 0x05b08021,
+ 0x00001841,
+ 0x840100c2,
+ 0x00000011,
+};
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_context.c b/src/gallium/drivers/nouveau/nvc0/nvc0_context.c
index 162661ff2a7..547b8f5d309 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_context.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_context.c
@@ -56,6 +56,7 @@ static void
nvc0_memory_barrier(struct pipe_context *pipe, unsigned flags)
{
struct nvc0_context *nvc0 = nvc0_context(pipe);
+ struct nouveau_pushbuf *push = nvc0->base.pushbuf;
int i, s;
if (flags & PIPE_BARRIER_MAPPED_BUFFER) {
@@ -90,6 +91,9 @@ nvc0_memory_barrier(struct pipe_context *pipe, unsigned flags)
}
}
}
+ if (flags & PIPE_BARRIER_SHADER_BUFFER) {
+ IMMED_NVC0(push, NVC0_3D(MEM_BARRIER), 0x1011);
+ }
}
static void
@@ -122,6 +126,10 @@ nvc0_context_unreference_resources(struct nvc0_context *nvc0)
pipe_surface_reference(&nvc0->surfaces[s][i], NULL);
}
+ for (s = 0; s < 6; ++s)
+ for (i = 0; i < NVC0_MAX_BUFFERS; ++i)
+ pipe_resource_reference(&nvc0->buffers[s][i].buffer, NULL);
+
for (i = 0; i < nvc0->num_tfbbufs; ++i)
pipe_so_target_reference(&nvc0->tfbbuf[i], NULL);
@@ -180,10 +188,9 @@ nvc0_invalidate_resource_storage(struct nouveau_context *ctx,
int ref)
{
struct nvc0_context *nvc0 = nvc0_context(&ctx->pipe);
- unsigned bind = res->bind ? res->bind : PIPE_BIND_VERTEX_BUFFER;
unsigned s, i;
- if (bind & PIPE_BIND_RENDER_TARGET) {
+ if (res->bind & PIPE_BIND_RENDER_TARGET) {
for (i = 0; i < nvc0->framebuffer.nr_cbufs; ++i) {
if (nvc0->framebuffer.cbufs[i] &&
nvc0->framebuffer.cbufs[i]->texture == res) {
@@ -194,7 +201,7 @@ nvc0_invalidate_resource_storage(struct nouveau_context *ctx,
}
}
}
- if (bind & PIPE_BIND_DEPTH_STENCIL) {
+ if (res->bind & PIPE_BIND_DEPTH_STENCIL) {
if (nvc0->framebuffer.zsbuf &&
nvc0->framebuffer.zsbuf->texture == res) {
nvc0->dirty |= NVC0_NEW_FRAMEBUFFER;
@@ -204,12 +211,7 @@ nvc0_invalidate_resource_storage(struct nouveau_context *ctx,
}
}
- if (bind & (PIPE_BIND_VERTEX_BUFFER |
- PIPE_BIND_INDEX_BUFFER |
- PIPE_BIND_CONSTANT_BUFFER |
- PIPE_BIND_STREAM_OUTPUT |
- PIPE_BIND_COMMAND_ARGS_BUFFER |
- PIPE_BIND_SAMPLER_VIEW)) {
+ if (res->target == PIPE_BUFFER) {
for (i = 0; i < nvc0->num_vtxbufs; ++i) {
if (nvc0->vtxbuf[i].buffer == res) {
nvc0->dirty |= NVC0_NEW_ARRAYS;
@@ -253,6 +255,18 @@ nvc0_invalidate_resource_storage(struct nouveau_context *ctx,
}
}
}
+
+ for (s = 0; s < 5; ++s) {
+ for (i = 0; i < NVC0_MAX_BUFFERS; ++i) {
+ if (nvc0->buffers[s][i].buffer == res) {
+ nvc0->buffers_dirty[s] |= 1 << i;
+ nvc0->dirty |= NVC0_NEW_BUFFERS;
+ nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_BUF);
+ if (!--ref)
+ return ref;
+ }
+ }
+ }
}
return ref;
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_context.h b/src/gallium/drivers/nouveau/nvc0/nvc0_context.h
index 12195489691..4ab2ac41183 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_context.h
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_context.h
@@ -56,6 +56,7 @@
#define NVC0_NEW_SURFACES (1 << 23)
#define NVC0_NEW_MIN_SAMPLES (1 << 24)
#define NVC0_NEW_TESSFACTOR (1 << 25)
+#define NVC0_NEW_BUFFERS (1 << 26)
#define NVC0_NEW_CP_PROGRAM (1 << 0)
#define NVC0_NEW_CP_SURFACES (1 << 1)
@@ -73,9 +74,10 @@
#define NVC0_BIND_CB(s, i) (164 + 16 * (s) + (i))
#define NVC0_BIND_TFB 244
#define NVC0_BIND_SUF 245
-#define NVC0_BIND_SCREEN 246
-#define NVC0_BIND_TLS 247
-#define NVC0_BIND_3D_COUNT 248
+#define NVC0_BIND_BUF 246
+#define NVC0_BIND_SCREEN 247
+#define NVC0_BIND_TLS 249
+#define NVC0_BIND_3D_COUNT 250
/* compute bufctx (during launch_grid) */
#define NVC0_BIND_CP_CB(i) ( 0 + (i))
@@ -187,10 +189,15 @@ struct nvc0_context {
struct nvc0_blitctx *blit;
+ /* NOTE: some of these surfaces may reference buffers */
struct pipe_surface *surfaces[2][NVC0_MAX_SURFACE_SLOTS];
uint16_t surfaces_dirty[2];
uint16_t surfaces_valid[2];
+ struct pipe_shader_buffer buffers[6][NVC0_MAX_BUFFERS];
+ uint32_t buffers_dirty[6];
+ uint32_t buffers_valid[6];
+
struct util_dynarray global_residents;
};
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_macros.h b/src/gallium/drivers/nouveau/nvc0/nvc0_macros.h
index 27c026b8b30..49e176cbd49 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_macros.h
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_macros.h
@@ -33,4 +33,6 @@
#define NVC0_3D_MACRO_DRAW_ELEMENTS_INDIRECT_COUNT 0x00003850
+#define NVC0_3D_MACRO_QUERY_BUFFER_WRITE 0x00003858
+
#endif /* __NVC0_MACROS_H__ */
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_program.c b/src/gallium/drivers/nouveau/nvc0/nvc0_program.c
index c3b53621630..93f211bd5fc 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_program.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_program.c
@@ -554,6 +554,7 @@ nvc0_program_translate(struct nvc0_program *prog, uint16_t chipset,
}
info->io.resInfoCBSlot = 15;
info->io.sampleInfoBase = 256 + 128;
+ info->io.suInfoBase = 512;
info->io.msInfoCBSlot = 15;
info->io.msInfoBase = 0; /* TODO */
}
@@ -635,6 +636,8 @@ nvc0_program_translate(struct nvc0_program *prog, uint16_t chipset,
}
*/
if (info->io.globalAccess)
+ prog->hdr[0] |= 1 << 26;
+ if (info->io.globalAccess & 0x2)
prog->hdr[0] |= 1 << 16;
if (info->io.fp64)
prog->hdr[0] |= 1 << 27;
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_query.c b/src/gallium/drivers/nouveau/nvc0/nvc0_query.c
index 7497317c419..d2acce7d5be 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_query.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_query.c
@@ -74,6 +74,24 @@ nvc0_get_query_result(struct pipe_context *pipe, struct pipe_query *pq,
}
static void
+nvc0_get_query_result_resource(struct pipe_context *pipe,
+ struct pipe_query *pq,
+ boolean wait,
+ enum pipe_query_value_type result_type,
+ int index,
+ struct pipe_resource *resource,
+ unsigned offset)
+{
+ struct nvc0_query *q = nvc0_query(pq);
+ if (!q->funcs->get_query_result_resource) {
+ assert(!"Unexpected lack of get_query_result_resource");
+ return;
+ }
+ q->funcs->get_query_result_resource(nvc0_context(pipe), q, wait, result_type,
+ index, resource, offset);
+}
+
+static void
nvc0_render_condition(struct pipe_context *pipe,
struct pipe_query *pq,
boolean condition, uint mode)
@@ -129,7 +147,7 @@ nvc0_render_condition(struct pipe_context *pipe,
}
if (wait)
- nvc0_hw_query_fifo_wait(push, q);
+ nvc0_hw_query_fifo_wait(nvc0, q);
PUSH_SPACE(push, 7);
PUSH_REFN (push, hq->bo, NOUVEAU_BO_GART | NOUVEAU_BO_RD);
@@ -262,6 +280,7 @@ nvc0_init_query_functions(struct nvc0_context *nvc0)
pipe->begin_query = nvc0_begin_query;
pipe->end_query = nvc0_end_query;
pipe->get_query_result = nvc0_get_query_result;
+ pipe->get_query_result_resource = nvc0_get_query_result_resource;
pipe->render_condition = nvc0_render_condition;
nvc0->cond_condmode = NVC0_3D_COND_MODE_ALWAYS;
}
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_query.h b/src/gallium/drivers/nouveau/nvc0/nvc0_query.h
index c46361c31aa..a887b220557 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_query.h
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_query.h
@@ -14,6 +14,13 @@ struct nvc0_query_funcs {
void (*end_query)(struct nvc0_context *, struct nvc0_query *);
boolean (*get_query_result)(struct nvc0_context *, struct nvc0_query *,
boolean, union pipe_query_result *);
+ void (*get_query_result_resource)(struct nvc0_context *nvc0,
+ struct nvc0_query *q,
+ boolean wait,
+ enum pipe_query_value_type result_type,
+ int index,
+ struct pipe_resource *resource,
+ unsigned offset);
};
struct nvc0_query {
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw.c b/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw.c
index 1bed0162baf..62385884137 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw.c
@@ -358,11 +358,119 @@ nvc0_hw_get_query_result(struct nvc0_context *nvc0, struct nvc0_query *q,
return true;
}
+static void
+nvc0_hw_get_query_result_resource(struct nvc0_context *nvc0,
+ struct nvc0_query *q,
+ boolean wait,
+ enum pipe_query_value_type result_type,
+ int index,
+ struct pipe_resource *resource,
+ unsigned offset)
+{
+ struct nouveau_pushbuf *push = nvc0->base.pushbuf;
+ struct nvc0_hw_query *hq = nvc0_hw_query(q);
+ struct nv04_resource *buf = nv04_resource(resource);
+ unsigned stride;
+
+ assert(!hq->funcs || !hq->funcs->get_query_result);
+
+ if (index == -1) {
+ /* TODO: Use a macro to write the availability of the query */
+ if (hq->state != NVC0_HW_QUERY_STATE_READY)
+ nvc0_hw_query_update(nvc0->screen->base.client, q);
+ uint32_t ready[2] = {hq->state == NVC0_HW_QUERY_STATE_READY};
+ nvc0->base.push_cb(&nvc0->base, buf, offset,
+ result_type >= PIPE_QUERY_TYPE_I64 ? 2 : 1,
+ ready);
+ return;
+ }
+
+ /* If the fence guarding this query has not been emitted, that makes a lot
+ * of the following logic more complicated.
+ */
+ if (hq->is64bit && hq->fence->state < NOUVEAU_FENCE_STATE_EMITTED)
+ nouveau_fence_emit(hq->fence);
+
+ /* We either need to compute a 32- or 64-bit difference between 2 values,
+ * and then store the result as either a 32- or 64-bit value. As such let's
+ * treat all inputs as 64-bit (and just push an extra 0 for the 32-bit
+ * ones), and have one macro that clamps result to i32, u32, or just
+ * outputs the difference (no need to worry about 64-bit clamping).
+ */
+ if (hq->state != NVC0_HW_QUERY_STATE_READY)
+ nvc0_hw_query_update(nvc0->screen->base.client, q);
+
+ if (wait && hq->state != NVC0_HW_QUERY_STATE_READY)
+ nvc0_hw_query_fifo_wait(nvc0, q);
+
+ nouveau_pushbuf_space(push, 16, 2, 0);
+ PUSH_REFN (push, hq->bo, NOUVEAU_BO_GART | NOUVEAU_BO_RD);
+ PUSH_REFN (push, buf->bo, buf->domain | NOUVEAU_BO_WR);
+ BEGIN_NVC0(push, NVC0_3D(QUERY_ADDRESS_HIGH), 2);
+ PUSH_DATAh(push, buf->address + offset);
+ PUSH_DATA (push, buf->address + offset);
+ BEGIN_1IC0(push, NVC0_3D(MACRO_QUERY_BUFFER_WRITE), 7);
+ if (q->type == PIPE_QUERY_OCCLUSION_PREDICATE) /* XXX what if 64-bit? */
+ PUSH_DATA(push, 0x00000001);
+ else if (result_type == PIPE_QUERY_TYPE_I32)
+ PUSH_DATA(push, 0x7fffffff);
+ else if (result_type == PIPE_QUERY_TYPE_U32)
+ PUSH_DATA(push, 0xffffffff);
+ else
+ PUSH_DATA(push, 0x00000000);
+
+ switch (q->type) {
+ case PIPE_QUERY_SO_STATISTICS:
+ stride = 2;
+ break;
+ case PIPE_QUERY_PIPELINE_STATISTICS:
+ stride = 12;
+ break;
+ default:
+ assert(index == 0);
+ stride = 1;
+ break;
+ }
+
+ if (hq->is64bit) {
+ nouveau_pushbuf_data(push, hq->bo, hq->offset + 16 * index,
+ 8 | NVC0_IB_ENTRY_1_NO_PREFETCH);
+ nouveau_pushbuf_data(push, hq->bo, hq->offset + 16 * (index + stride),
+ 8 | NVC0_IB_ENTRY_1_NO_PREFETCH);
+ } else {
+ nouveau_pushbuf_data(push, hq->bo, hq->offset + 4,
+ 4 | NVC0_IB_ENTRY_1_NO_PREFETCH);
+ PUSH_DATA(push, 0);
+ nouveau_pushbuf_data(push, hq->bo, hq->offset + 16 + 4,
+ 4 | NVC0_IB_ENTRY_1_NO_PREFETCH);
+ PUSH_DATA(push, 0);
+ }
+
+ if (wait || hq->state == NVC0_HW_QUERY_STATE_READY) {
+ PUSH_DATA(push, 0);
+ PUSH_DATA(push, 0);
+ } else if (hq->is64bit) {
+ PUSH_DATA(push, hq->fence->sequence);
+ nouveau_pushbuf_data(push, nvc0->screen->fence.bo, 0,
+ 4 | NVC0_IB_ENTRY_1_NO_PREFETCH);
+ } else {
+ PUSH_DATA(push, hq->sequence);
+ nouveau_pushbuf_data(push, hq->bo, hq->offset,
+ 4 | NVC0_IB_ENTRY_1_NO_PREFETCH);
+ }
+
+ if (buf->mm) {
+ nouveau_fence_ref(nvc0->screen->base.fence.current, &buf->fence);
+ nouveau_fence_ref(nvc0->screen->base.fence.current, &buf->fence_wr);
+ }
+}
+
static const struct nvc0_query_funcs hw_query_funcs = {
.destroy_query = nvc0_hw_destroy_query,
.begin_query = nvc0_hw_begin_query,
.end_query = nvc0_hw_end_query,
.get_query_result = nvc0_hw_get_query_result,
+ .get_query_result_resource = nvc0_hw_get_query_result_resource,
};
struct nvc0_query *
@@ -476,8 +584,9 @@ nvc0_hw_query_pushbuf_submit(struct nouveau_pushbuf *push,
}
void
-nvc0_hw_query_fifo_wait(struct nouveau_pushbuf *push, struct nvc0_query *q)
+nvc0_hw_query_fifo_wait(struct nvc0_context *nvc0, struct nvc0_query *q)
{
+ struct nouveau_pushbuf *push = nvc0->base.pushbuf;
struct nvc0_hw_query *hq = nvc0_hw_query(q);
unsigned offset = hq->offset;
@@ -486,9 +595,15 @@ nvc0_hw_query_fifo_wait(struct nouveau_pushbuf *push, struct nvc0_query *q)
PUSH_SPACE(push, 5);
PUSH_REFN (push, hq->bo, NOUVEAU_BO_GART | NOUVEAU_BO_RD);
BEGIN_NVC0(push, SUBC_3D(NV84_SUBCHAN_SEMAPHORE_ADDRESS_HIGH), 4);
- PUSH_DATAh(push, hq->bo->offset + offset);
- PUSH_DATA (push, hq->bo->offset + offset);
- PUSH_DATA (push, hq->sequence);
+ if (hq->is64bit) {
+ PUSH_DATAh(push, nvc0->screen->fence.bo->offset);
+ PUSH_DATA (push, nvc0->screen->fence.bo->offset);
+ PUSH_DATA (push, hq->fence->sequence);
+ } else {
+ PUSH_DATAh(push, hq->bo->offset + offset);
+ PUSH_DATA (push, hq->bo->offset + offset);
+ PUSH_DATA (push, hq->sequence);
+ }
PUSH_DATA (push, (1 << 12) |
NV84_SUBCHAN_SEMAPHORE_TRIGGER_ACQUIRE_EQUAL);
}
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw.h b/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw.h
index 3701eb7100f..8225755d85e 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw.h
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw.h
@@ -51,6 +51,6 @@ void
nvc0_hw_query_pushbuf_submit(struct nouveau_pushbuf *, struct nvc0_query *,
unsigned);
void
-nvc0_hw_query_fifo_wait(struct nouveau_pushbuf *, struct nvc0_query *);
+nvc0_hw_query_fifo_wait(struct nvc0_context *, struct nvc0_query *);
#endif
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
index 84dbd69b8a5..d368fda707d 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
@@ -111,6 +111,8 @@ nvc0_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
return 256;
case PIPE_CAP_TEXTURE_BUFFER_OFFSET_ALIGNMENT:
return 1; /* 256 for binding as RT, but that's not possible in GL */
+ case PIPE_CAP_SHADER_BUFFER_OFFSET_ALIGNMENT:
+ return 16;
case PIPE_CAP_MIN_MAP_BUFFER_ALIGNMENT:
return NOUVEAU_MIN_BUFFER_MAP_ALIGN;
case PIPE_CAP_MAX_VIEWPORTS:
@@ -189,6 +191,7 @@ nvc0_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
case PIPE_CAP_MULTI_DRAW_INDIRECT:
case PIPE_CAP_MULTI_DRAW_INDIRECT_PARAMS:
case PIPE_CAP_TGSI_FS_FACE_IS_INTEGER_SYSVAL:
+ case PIPE_CAP_QUERY_BUFFER_OBJECT:
return 1;
case PIPE_CAP_SEAMLESS_CUBE_MAP_PER_TEXTURE:
return (class_3d >= NVE4_3D_CLASS) ? 1 : 0;
@@ -212,10 +215,12 @@ nvc0_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
case PIPE_CAP_RESOURCE_FROM_USER_MEMORY:
case PIPE_CAP_DEVICE_RESET_STATUS_QUERY:
case PIPE_CAP_TGSI_FS_POSITION_IS_SYSVAL:
- case PIPE_CAP_SHADER_BUFFER_OFFSET_ALIGNMENT:
case PIPE_CAP_INVALIDATE_BUFFER:
case PIPE_CAP_GENERATE_MIPMAP:
case PIPE_CAP_STRING_MARKER:
+ case PIPE_CAP_BUFFER_SAMPLER_VIEW_RGBA_ONLY:
+ case PIPE_CAP_SURFACE_REINTERPRET_BLOCKS:
+ case PIPE_CAP_QUERY_MEMORY_INFO:
return 0;
case PIPE_CAP_VENDOR_ID:
@@ -322,8 +327,9 @@ nvc0_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader,
case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED:
case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE:
- case PIPE_SHADER_CAP_MAX_SHADER_BUFFERS:
return 0;
+ case PIPE_SHADER_CAP_MAX_SHADER_BUFFERS:
+ return NVC0_MAX_BUFFERS;
case PIPE_SHADER_CAP_MAX_TEXTURE_SAMPLERS:
return 16; /* would be 32 in linked (OpenGL-style) mode */
case PIPE_SHADER_CAP_MAX_SAMPLER_VIEWS:
@@ -676,8 +682,9 @@ nvc0_screen_create(struct nouveau_device *dev)
push->rsvd_kick = 5;
screen->base.vidmem_bindings |= PIPE_BIND_CONSTANT_BUFFER |
+ PIPE_BIND_SHADER_BUFFER |
PIPE_BIND_VERTEX_BUFFER | PIPE_BIND_INDEX_BUFFER |
- PIPE_BIND_COMMAND_ARGS_BUFFER;
+ PIPE_BIND_COMMAND_ARGS_BUFFER | PIPE_BIND_QUERY_BUFFER;
screen->base.sysmem_bindings |=
PIPE_BIND_VERTEX_BUFFER | PIPE_BIND_INDEX_BUFFER;
@@ -891,9 +898,9 @@ nvc0_screen_create(struct nouveau_device *dev)
/* TIC and TSC entries for each unit (nve4+ only) */
/* auxiliary constants (6 user clip planes, base instance id) */
BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3);
- PUSH_DATA (push, 512);
- PUSH_DATAh(push, screen->uniform_bo->offset + (5 << 16) + (i << 9));
- PUSH_DATA (push, screen->uniform_bo->offset + (5 << 16) + (i << 9));
+ PUSH_DATA (push, 1024);
+ PUSH_DATAh(push, screen->uniform_bo->offset + (5 << 16) + (i << 10));
+ PUSH_DATA (push, screen->uniform_bo->offset + (5 << 16) + (i << 10));
BEGIN_NVC0(push, NVC0_3D(CB_BIND(i)), 1);
PUSH_DATA (push, (15 << 4) | 1);
if (screen->eng3d->oclass >= NVE4_3D_CLASS) {
@@ -913,8 +920,8 @@ nvc0_screen_create(struct nouveau_device *dev)
/* return { 0.0, 0.0, 0.0, 0.0 } for out-of-bounds vtxbuf access */
BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3);
PUSH_DATA (push, 256);
- PUSH_DATAh(push, screen->uniform_bo->offset + (5 << 16) + (6 << 9));
- PUSH_DATA (push, screen->uniform_bo->offset + (5 << 16) + (6 << 9));
+ PUSH_DATAh(push, screen->uniform_bo->offset + (5 << 16) + (6 << 10));
+ PUSH_DATA (push, screen->uniform_bo->offset + (5 << 16) + (6 << 10));
BEGIN_1IC0(push, NVC0_3D(CB_POS), 5);
PUSH_DATA (push, 0);
PUSH_DATAf(push, 0.0f);
@@ -922,8 +929,8 @@ nvc0_screen_create(struct nouveau_device *dev)
PUSH_DATAf(push, 0.0f);
PUSH_DATAf(push, 0.0f);
BEGIN_NVC0(push, NVC0_3D(VERTEX_RUNOUT_ADDRESS_HIGH), 2);
- PUSH_DATAh(push, screen->uniform_bo->offset + (5 << 16) + (6 << 9));
- PUSH_DATA (push, screen->uniform_bo->offset + (5 << 16) + (6 << 9));
+ PUSH_DATAh(push, screen->uniform_bo->offset + (5 << 16) + (6 << 10));
+ PUSH_DATA (push, screen->uniform_bo->offset + (5 << 16) + (6 << 10));
if (screen->base.drm->version >= 0x01000101) {
ret = nouveau_getparam(dev, NOUVEAU_GETPARAM_GRAPH_UNITS, &value);
@@ -953,8 +960,12 @@ nvc0_screen_create(struct nouveau_device *dev)
PUSH_DATA (push, screen->tls->size);
BEGIN_NVC0(push, NVC0_3D(WARP_TEMP_ALLOC), 1);
PUSH_DATA (push, 0);
+ /* Reduce likelihood of collision with real buffers by placing the hole at
+ * the top of the 4G area. This will have to be dealt with for real
+ * eventually by blocking off that area from the VM.
+ */
BEGIN_NVC0(push, NVC0_3D(LOCAL_BASE), 1);
- PUSH_DATA (push, 0);
+ PUSH_DATA (push, 0xff << 24);
if (screen->eng3d->oclass < GM107_3D_CLASS) {
ret = nouveau_bo_new(dev, NV_VRAM_DOMAIN(&screen->base), 1 << 17, 1 << 20, NULL,
@@ -1039,6 +1050,7 @@ nvc0_screen_create(struct nouveau_device *dev)
MK_MACRO(NVC0_3D_MACRO_DRAW_ELEMENTS_INDIRECT, mme9097_draw_elts_indirect);
MK_MACRO(NVC0_3D_MACRO_DRAW_ARRAYS_INDIRECT_COUNT, mme9097_draw_arrays_indirect_count);
MK_MACRO(NVC0_3D_MACRO_DRAW_ELEMENTS_INDIRECT_COUNT, mme9097_draw_elts_indirect_count);
+ MK_MACRO(NVC0_3D_MACRO_QUERY_BUFFER_WRITE, mme9097_query_buffer_write);
BEGIN_NVC0(push, NVC0_3D(RASTERIZE_ENABLE), 1);
PUSH_DATA (push, 1);
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.h b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.h
index 8b73102b98b..1a56177815c 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.h
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.h
@@ -22,6 +22,8 @@
#define NVC0_MAX_VIEWPORTS 16
+#define NVC0_MAX_BUFFERS 32
+
struct nvc0_context;
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_shader_state.c b/src/gallium/drivers/nouveau/nvc0/nvc0_shader_state.c
index dc02b011bdf..382a18ef153 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_shader_state.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_shader_state.c
@@ -316,7 +316,7 @@ nvc0_tfb_validate(struct nvc0_context *nvc0)
continue;
if (!targ->clean)
- nvc0_hw_query_fifo_wait(push, nvc0_query(targ->pq));
+ nvc0_hw_query_fifo_wait(nvc0, nvc0_query(targ->pq));
nouveau_pushbuf_space(push, 0, 0, 1);
BEGIN_NVC0(push, NVC0_3D(TFB_BUFFER_ENABLE(b)), 5);
PUSH_DATA (push, 1);
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_state.c b/src/gallium/drivers/nouveau/nvc0/nvc0_state.c
index 24a6c222dd5..cf3d3497c78 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_state.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_state.c
@@ -1243,11 +1243,50 @@ nvc0_set_shader_images(struct pipe_context *pipe, unsigned shader,
unsigned start_slot, unsigned count,
struct pipe_image_view **views)
{
-#if 0
- nvc0_bind_surfaces_range(nvc0_context(pipe), 0, start, nr, views);
+}
+
+static void
+nvc0_bind_buffers_range(struct nvc0_context *nvc0, const unsigned t,
+ unsigned start, unsigned nr,
+ struct pipe_shader_buffer *pbuffers)
+{
+ const unsigned end = start + nr;
+ const unsigned mask = ((1 << nr) - 1) << start;
+ unsigned i;
+
+ assert(t < 5);
+
+ if (pbuffers) {
+ for (i = start; i < end; ++i) {
+ const unsigned p = i - start;
+ if (pbuffers[p].buffer)
+ nvc0->buffers_valid[t] |= (1 << i);
+ else
+ nvc0->buffers_valid[t] &= ~(1 << i);
+ nvc0->buffers[t][i].buffer_offset = pbuffers[p].buffer_offset;
+ nvc0->buffers[t][i].buffer_size = pbuffers[p].buffer_size;
+ pipe_resource_reference(&nvc0->buffers[t][i].buffer, pbuffers[p].buffer);
+ }
+ } else {
+ for (i = start; i < end; ++i)
+ pipe_resource_reference(&nvc0->buffers[t][i].buffer, NULL);
+ nvc0->buffers_valid[t] &= ~mask;
+ }
+ nvc0->buffers_dirty[t] |= mask;
+
+ nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_BUF);
+}
+
+static void
+nvc0_set_shader_buffers(struct pipe_context *pipe,
+ unsigned shader,
+ unsigned start, unsigned nr,
+ struct pipe_shader_buffer *buffers)
+{
+ const unsigned s = nvc0_shader_stage(shader);
+ nvc0_bind_buffers_range(nvc0_context(pipe), s, start, nr, buffers);
- nvc0_context(pipe)->dirty |= NVC0_NEW_SURFACES;
-#endif
+ nvc0_context(pipe)->dirty |= NVC0_NEW_BUFFERS;
}
static inline void
@@ -1377,6 +1416,7 @@ nvc0_init_state_functions(struct nvc0_context *nvc0)
pipe->set_global_binding = nvc0_set_global_bindings;
pipe->set_compute_resources = nvc0_set_compute_resources;
pipe->set_shader_images = nvc0_set_shader_images;
+ pipe->set_shader_buffers = nvc0_set_shader_buffers;
nvc0->sample_mask = ~0;
nvc0->min_samples = 1;
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c b/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c
index b02a590c375..c17223a1b2b 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c
@@ -183,9 +183,9 @@ nvc0_validate_fb(struct nvc0_context *nvc0)
ms = 1 << ms_mode;
BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3);
- PUSH_DATA (push, 512);
- PUSH_DATAh(push, nvc0->screen->uniform_bo->offset + (5 << 16) + (4 << 9));
- PUSH_DATA (push, nvc0->screen->uniform_bo->offset + (5 << 16) + (4 << 9));
+ PUSH_DATA (push, 1024);
+ PUSH_DATAh(push, nvc0->screen->uniform_bo->offset + (5 << 16) + (4 << 10));
+ PUSH_DATA (push, nvc0->screen->uniform_bo->offset + (5 << 16) + (4 << 10));
BEGIN_1IC0(push, NVC0_3D(CB_POS), 1 + 2 * ms);
PUSH_DATA (push, 256 + 128);
for (i = 0; i < ms; i++) {
@@ -317,9 +317,9 @@ nvc0_upload_uclip_planes(struct nvc0_context *nvc0, unsigned s)
struct nouveau_bo *bo = nvc0->screen->uniform_bo;
BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3);
- PUSH_DATA (push, 512);
- PUSH_DATAh(push, bo->offset + (5 << 16) + (s << 9));
- PUSH_DATA (push, bo->offset + (5 << 16) + (s << 9));
+ PUSH_DATA (push, 1024);
+ PUSH_DATAh(push, bo->offset + (5 << 16) + (s << 10));
+ PUSH_DATA (push, bo->offset + (5 << 16) + (s << 10));
BEGIN_1IC0(push, NVC0_3D(CB_POS), PIPE_MAX_CLIP_PLANES * 4 + 1);
PUSH_DATA (push, 256);
PUSH_DATAp(push, &nvc0->clip.ucp[0][0], PIPE_MAX_CLIP_PLANES * 4);
@@ -471,6 +471,39 @@ nvc0_constbufs_validate(struct nvc0_context *nvc0)
}
static void
+nvc0_validate_buffers(struct nvc0_context *nvc0)
+{
+ struct nouveau_pushbuf *push = nvc0->base.pushbuf;
+ int i, s;
+
+ for (s = 0; s < 5; s++) {
+ BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3);
+ PUSH_DATA (push, 1024);
+ PUSH_DATAh(push, nvc0->screen->uniform_bo->offset + (5 << 16) + (s << 10));
+ PUSH_DATA (push, nvc0->screen->uniform_bo->offset + (5 << 16) + (s << 10));
+ BEGIN_1IC0(push, NVC0_3D(CB_POS), 1 + 4 * NVC0_MAX_BUFFERS);
+ PUSH_DATA (push, 512);
+ for (i = 0; i < NVC0_MAX_BUFFERS; i++) {
+ if (nvc0->buffers[s][i].buffer) {
+ struct nv04_resource *res =
+ nv04_resource(nvc0->buffers[s][i].buffer);
+ PUSH_DATA (push, res->address + nvc0->buffers[s][i].buffer_offset);
+ PUSH_DATAh(push, res->address + nvc0->buffers[s][i].buffer_offset);
+ PUSH_DATA (push, nvc0->buffers[s][i].buffer_size);
+ PUSH_DATA (push, 0);
+ BCTX_REFN(nvc0->bufctx_3d, BUF, res, RDWR);
+ } else {
+ PUSH_DATA (push, 0);
+ PUSH_DATA (push, 0);
+ PUSH_DATA (push, 0);
+ PUSH_DATA (push, 0);
+ }
+ }
+ }
+
+}
+
+static void
nvc0_validate_sample_mask(struct nvc0_context *nvc0)
{
struct nouveau_pushbuf *push = nvc0->base.pushbuf;
@@ -663,6 +696,7 @@ static struct state_validate {
{ nve4_set_tex_handles, NVC0_NEW_TEXTURES | NVC0_NEW_SAMPLERS },
{ nvc0_vertex_arrays_validate, NVC0_NEW_VERTEX | NVC0_NEW_ARRAYS },
{ nvc0_validate_surfaces, NVC0_NEW_SURFACES },
+ { nvc0_validate_buffers, NVC0_NEW_BUFFERS },
{ nvc0_idxbuf_validate, NVC0_NEW_IDXBUF },
{ nvc0_tfb_validate, NVC0_NEW_TFB_TARGETS | NVC0_NEW_GMTYPROG },
{ nvc0_validate_min_samples, NVC0_NEW_MIN_SAMPLES },
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c b/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c
index 4e43c4e99fd..71726d1aa59 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c
@@ -357,27 +357,132 @@ nvc0_clear_render_target(struct pipe_context *pipe,
}
static void
-nvc0_clear_buffer_cpu(struct pipe_context *pipe,
- struct pipe_resource *res,
- unsigned offset, unsigned size,
- const void *data, int data_size)
+nvc0_clear_buffer_push_nvc0(struct pipe_context *pipe,
+ struct pipe_resource *res,
+ unsigned offset, unsigned size,
+ const void *data, int data_size)
{
+ struct nvc0_context *nvc0 = nvc0_context(pipe);
+ struct nouveau_pushbuf *push = nvc0->base.pushbuf;
struct nv04_resource *buf = nv04_resource(res);
- struct pipe_transfer *pt;
- struct pipe_box box;
- unsigned elements, i;
+ unsigned i;
- elements = size / data_size;
+ nouveau_bufctx_refn(nvc0->bufctx, 0, buf->bo, buf->domain | NOUVEAU_BO_WR);
+ nouveau_pushbuf_bufctx(push, nvc0->bufctx);
+ nouveau_pushbuf_validate(push);
- u_box_1d(offset, size, &box);
+ unsigned count = (size + 3) / 4;
+ unsigned data_words = data_size / 4;
- uint8_t *map = buf->vtbl->transfer_map(pipe, res, 0, PIPE_TRANSFER_WRITE,
- &box, &pt);
+ while (count) {
+ unsigned nr_data = MIN2(count, NV04_PFIFO_MAX_PACKET_LEN) / data_words;
+ unsigned nr = nr_data * data_words;
- for (i = 0; i < elements; ++i)
- memcpy(&map[i*data_size], data, data_size);
+ if (!PUSH_SPACE(push, nr + 9))
+ break;
- buf->vtbl->transfer_unmap(pipe, pt);
+ BEGIN_NVC0(push, NVC0_M2MF(OFFSET_OUT_HIGH), 2);
+ PUSH_DATAh(push, buf->address + offset);
+ PUSH_DATA (push, buf->address + offset);
+ BEGIN_NVC0(push, NVC0_M2MF(LINE_LENGTH_IN), 2);
+ PUSH_DATA (push, MIN2(size, nr * 4));
+ PUSH_DATA (push, 1);
+ BEGIN_NVC0(push, NVC0_M2MF(EXEC), 1);
+ PUSH_DATA (push, 0x100111);
+
+ /* must not be interrupted (trap on QUERY fence, 0x50 works however) */
+ BEGIN_NIC0(push, NVC0_M2MF(DATA), nr);
+ for (i = 0; i < nr_data; i++)
+ PUSH_DATAp(push, data, data_words);
+
+ count -= nr;
+ offset += nr * 4;
+ size -= nr * 4;
+ }
+
+ if (buf->mm) {
+ nouveau_fence_ref(nvc0->screen->base.fence.current, &buf->fence);
+ nouveau_fence_ref(nvc0->screen->base.fence.current, &buf->fence_wr);
+ }
+
+ nouveau_bufctx_reset(nvc0->bufctx, 0);
+}
+
+static void
+nvc0_clear_buffer_push_nve4(struct pipe_context *pipe,
+ struct pipe_resource *res,
+ unsigned offset, unsigned size,
+ const void *data, int data_size)
+{
+ struct nvc0_context *nvc0 = nvc0_context(pipe);
+ struct nouveau_pushbuf *push = nvc0->base.pushbuf;
+ struct nv04_resource *buf = nv04_resource(res);
+ unsigned i;
+
+ nouveau_bufctx_refn(nvc0->bufctx, 0, buf->bo, buf->domain | NOUVEAU_BO_WR);
+ nouveau_pushbuf_bufctx(push, nvc0->bufctx);
+ nouveau_pushbuf_validate(push);
+
+ unsigned count = (size + 3) / 4;
+ unsigned data_words = data_size / 4;
+
+ while (count) {
+ unsigned nr_data = MIN2(count, NV04_PFIFO_MAX_PACKET_LEN) / data_words;
+ unsigned nr = nr_data * data_words;
+
+ if (!PUSH_SPACE(push, nr + 10))
+ break;
+
+ BEGIN_NVC0(push, NVE4_P2MF(UPLOAD_DST_ADDRESS_HIGH), 2);
+ PUSH_DATAh(push, buf->address + offset);
+ PUSH_DATA (push, buf->address + offset);
+ BEGIN_NVC0(push, NVE4_P2MF(UPLOAD_LINE_LENGTH_IN), 2);
+ PUSH_DATA (push, MIN2(size, nr * 4));
+ PUSH_DATA (push, 1);
+ /* must not be interrupted (trap on QUERY fence, 0x50 works however) */
+ BEGIN_1IC0(push, NVE4_P2MF(UPLOAD_EXEC), nr + 1);
+ PUSH_DATA (push, 0x1001);
+ for (i = 0; i < nr_data; i++)
+ PUSH_DATAp(push, data, data_words);
+
+ count -= nr;
+ offset += nr * 4;
+ size -= nr * 4;
+ }
+
+ if (buf->mm) {
+ nouveau_fence_ref(nvc0->screen->base.fence.current, &buf->fence);
+ nouveau_fence_ref(nvc0->screen->base.fence.current, &buf->fence_wr);
+ }
+
+ nouveau_bufctx_reset(nvc0->bufctx, 0);
+}
+
+static void
+nvc0_clear_buffer_push(struct pipe_context *pipe,
+ struct pipe_resource *res,
+ unsigned offset, unsigned size,
+ const void *data, int data_size)
+{
+ struct nvc0_context *nvc0 = nvc0_context(pipe);
+ unsigned tmp;
+
+ if (data_size == 1) {
+ tmp = *(unsigned char *)data;
+ tmp = (tmp << 24) | (tmp << 16) | (tmp << 8) | tmp;
+ data = &tmp;
+ data_size = 4;
+ } else if (data_size == 2) {
+ tmp = *(unsigned short *)data;
+ tmp = (tmp << 16) | tmp;
+ data = &tmp;
+ data_size = 4;
+ }
+
+ if (nvc0->screen->base.class_3d < NVE4_3D_CLASS)
+ nvc0_clear_buffer_push_nvc0(pipe, res, offset, size, data, data_size);
+ else
+ nvc0_clear_buffer_push_nve4(pipe, res, offset, size, data, data_size);
}
static void
@@ -402,10 +507,8 @@ nvc0_clear_buffer(struct pipe_context *pipe,
memcpy(&color.ui, data, 16);
break;
case 12:
- /* This doesn't work, RGB32 is not a valid RT format.
- * dst_fmt = PIPE_FORMAT_R32G32B32_UINT;
- * memcpy(&color.ui, data, 12);
- * memset(&color.ui[3], 0, 4);
+ /* RGB32 is not a valid RT format. This will be handled by the pushbuf
+ * uploader.
*/
break;
case 8:
@@ -437,14 +540,26 @@ nvc0_clear_buffer(struct pipe_context *pipe,
assert(size % data_size == 0);
if (data_size == 12) {
- /* TODO: Find a way to do this with the GPU! */
- nvc0_clear_buffer_cpu(pipe, res, offset, size, data, data_size);
+ nvc0_clear_buffer_push(pipe, res, offset, size, data, data_size);
return;
}
+ if (offset & 0xff) {
+ unsigned fixup_size = MIN2(size, align(offset, 0x100) - offset);
+ assert(fixup_size % data_size == 0);
+ nvc0_clear_buffer_push(pipe, res, offset, fixup_size, data, data_size);
+ offset += fixup_size;
+ size -= fixup_size;
+ if (!size)
+ return;
+ }
+
elements = size / data_size;
height = (elements + 16383) / 16384;
width = elements / height;
+ if (height > 1)
+ width &= ~0xff;
+ assert(width > 0);
if (!PUSH_SPACE(push, 40))
return;
@@ -465,7 +580,7 @@ nvc0_clear_buffer(struct pipe_context *pipe,
BEGIN_NVC0(push, NVC0_3D(RT_ADDRESS_HIGH(0)), 9);
PUSH_DATAh(push, buf->address + offset);
PUSH_DATA (push, buf->address + offset);
- PUSH_DATA (push, width * data_size);
+ PUSH_DATA (push, align(width * data_size, 0x100));
PUSH_DATA (push, height);
PUSH_DATA (push, nvc0_format_table[dst_fmt].rt);
PUSH_DATA (push, NVC0_3D_RT_TILE_MODE_LINEAR);
@@ -480,24 +595,20 @@ nvc0_clear_buffer(struct pipe_context *pipe,
IMMED_NVC0(push, NVC0_3D(CLEAR_BUFFERS), 0x3c);
+ IMMED_NVC0(push, NVC0_3D(COND_MODE), nvc0->cond_condmode);
+
+ if (buf->mm) {
+ nouveau_fence_ref(nvc0->screen->base.fence.current, &buf->fence);
+ nouveau_fence_ref(nvc0->screen->base.fence.current, &buf->fence_wr);
+ }
+
if (width * height != elements) {
offset += width * height * data_size;
width = elements - width * height;
- height = 1;
-
- BEGIN_NVC0(push, NVC0_3D(RT_ADDRESS_HIGH(0)), 4);
- PUSH_DATAh(push, buf->address + offset);
- PUSH_DATA (push, buf->address + offset);
- PUSH_DATA (push, width * data_size);
- PUSH_DATA (push, height);
-
- IMMED_NVC0(push, NVC0_3D(CLEAR_BUFFERS), 0x3c);
+ nvc0_clear_buffer_push(pipe, res, offset, width * data_size,
+ data, data_size);
}
- IMMED_NVC0(push, NVC0_3D(COND_MODE), nvc0->cond_condmode);
-
- nouveau_fence_ref(nvc0->screen->base.fence.current, &buf->fence);
- nouveau_fence_ref(nvc0->screen->base.fence.current, &buf->fence_wr);
nvc0->dirty |= NVC0_NEW_FRAMEBUFFER;
}
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_tex.c b/src/gallium/drivers/nouveau/nvc0/nvc0_tex.c
index 74090ce40a5..7223f5aecfb 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_tex.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_tex.c
@@ -515,12 +515,12 @@ nve4_set_tex_handles(struct nvc0_context *nvc0)
return;
address = nvc0->screen->uniform_bo->offset + (5 << 16);
- for (s = 0; s < 5; ++s, address += (1 << 9)) {
+ for (s = 0; s < 5; ++s, address += (1 << 10)) {
uint32_t dirty = nvc0->textures_dirty[s] | nvc0->samplers_dirty[s];
if (!dirty)
continue;
BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3);
- PUSH_DATA (push, 512);
+ PUSH_DATA (push, 1024);
PUSH_DATAh(push, address);
PUSH_DATA (push, address);
do {
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c b/src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c
index ad79d1cbb9c..44aed1adeeb 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c
@@ -334,7 +334,7 @@ nvc0_validate_vertex_buffers(struct nvc0_context *nvc0)
b = ve->pipe.vertex_buffer_index;
vb = &nvc0->vtxbuf[b];
- if (!vb->buffer) {
+ if (nvc0->vbo_user & (1 << b)) {
if (!(nvc0->constant_vbos & (1 << b))) {
if (ve->pipe.instance_divisor) {
BEGIN_NVC0(push, NVC0_3D(VERTEX_ARRAY_DIVISOR(i)), 1);
@@ -352,13 +352,13 @@ nvc0_validate_vertex_buffers(struct nvc0_context *nvc0)
if (unlikely(ve->pipe.instance_divisor)) {
BEGIN_NVC0(push, NVC0_3D(VERTEX_ARRAY_FETCH(i)), 4);
- PUSH_DATA (push, (1 << 12) | vb->stride);
+ PUSH_DATA (push, NVC0_3D_VERTEX_ARRAY_FETCH_ENABLE | vb->stride);
PUSH_DATAh(push, res->address + offset);
PUSH_DATA (push, res->address + offset);
PUSH_DATA (push, ve->pipe.instance_divisor);
} else {
BEGIN_NVC0(push, NVC0_3D(VERTEX_ARRAY_FETCH(i)), 3);
- PUSH_DATA (push, (1 << 12) | vb->stride);
+ PUSH_DATA (push, NVC0_3D_VERTEX_ARRAY_FETCH_ENABLE | vb->stride);
PUSH_DATAh(push, res->address + offset);
PUSH_DATA (push, res->address + offset);
}
@@ -382,7 +382,7 @@ nvc0_validate_vertex_buffers_shared(struct nvc0_context *nvc0)
unsigned b;
const uint32_t mask = nvc0->vbo_user;
- PUSH_SPACE(push, nvc0->num_vtxbufs * 8);
+ PUSH_SPACE(push, nvc0->num_vtxbufs * 8 + nvc0->vertex->num_elements);
for (b = 0; b < nvc0->num_vtxbufs; ++b) {
struct pipe_vertex_buffer *vb = &nvc0->vtxbuf[b];
struct nv04_resource *buf;
@@ -395,6 +395,10 @@ nvc0_validate_vertex_buffers_shared(struct nvc0_context *nvc0)
}
/* address/value set in nvc0_update_user_vbufs_shared */
continue;
+ } else if (!vb->buffer) {
+ /* there can be holes in the vertex buffer lists */
+ IMMED_NVC0(push, NVC0_3D(VERTEX_ARRAY_FETCH(b)), 0);
+ continue;
}
buf = nv04_resource(vb->buffer);
offset = vb->buffer_offset;
@@ -410,6 +414,12 @@ nvc0_validate_vertex_buffers_shared(struct nvc0_context *nvc0)
BCTX_REFN(nvc0->bufctx_3d, VTX, buf, RD);
}
+ /* If there are more elements than buffers, we might not have unset
+ * fetching on the later elements.
+ */
+ for (; b < nvc0->vertex->num_elements; ++b)
+ IMMED_NVC0(push, NVC0_3D(VERTEX_ARRAY_FETCH(b)), 0);
+
if (nvc0->vbo_user)
nvc0_update_user_vbufs_shared(nvc0);
}
@@ -680,7 +690,7 @@ nvc0_draw_elements_inline_u32_short(struct nouveau_pushbuf *push,
if (count & 1) {
count--;
- PUSH_SPACE(push, 1);
+ PUSH_SPACE(push, 2);
BEGIN_NVC0(push, NVC0_3D(VB_ELEMENT_U32), 1);
PUSH_DATA (push, *map++);
}
@@ -779,7 +789,7 @@ nvc0_draw_stream_output(struct nvc0_context *nvc0,
res->status &= ~NOUVEAU_BUFFER_STATUS_GPU_WRITING;
PUSH_SPACE(push, 2);
IMMED_NVC0(push, NVC0_3D(SERIALIZE), 0);
- nvc0_hw_query_fifo_wait(push, nvc0_query(so->pq));
+ nvc0_hw_query_fifo_wait(nvc0, nvc0_query(so->pq));
if (nvc0->screen->eng3d->oclass < GM107_3D_CLASS)
IMMED_NVC0(push, NVC0_3D(VERTEX_ARRAY_FLUSH), 0);
@@ -811,6 +821,8 @@ nvc0_draw_indirect(struct nvc0_context *nvc0, const struct pipe_draw_info *info)
unsigned size, macro, count = info->indirect_count, drawid = info->drawid;
uint32_t offset = buf->offset + info->indirect_offset;
+ PUSH_SPACE(push, 7);
+
/* must make FIFO wait for engines idle before continuing to process */
if ((buf->fence_wr && !nouveau_fence_signalled(buf->fence_wr)) ||
(buf_count && buf_count->fence_wr &&
@@ -951,6 +963,7 @@ nvc0_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
if (info->mode == PIPE_PRIM_PATCHES &&
nvc0->state.patch_vertices != info->vertices_per_patch) {
nvc0->state.patch_vertices = info->vertices_per_patch;
+ PUSH_SPACE(push, 1);
IMMED_NVC0(push, NVC0_3D(PATCH_VERTICES), nvc0->state.patch_vertices);
}
@@ -958,6 +971,7 @@ nvc0_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
nvc0_state_validate(nvc0, ~0, 8);
if (nvc0->vertprog->vp.need_draw_parameters) {
+ PUSH_SPACE(push, 9);
BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3);
PUSH_DATA (push, 512);
PUSH_DATAh(push, nvc0->screen->uniform_bo->offset + (5 << 16) + (0 << 9));
@@ -979,6 +993,7 @@ nvc0_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
}
if (nvc0->cb_dirty) {
+ PUSH_SPACE(push, 1);
IMMED_NVC0(push, NVC0_3D(MEM_BARRIER), 0x1011);
nvc0->cb_dirty = false;
}
@@ -987,6 +1002,8 @@ nvc0_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
if (!nvc0->textures_coherent[s])
continue;
+ PUSH_SPACE(push, nvc0->num_textures[s] * 2);
+
for (int i = 0; i < nvc0->num_textures[s]; ++i) {
struct nv50_tic_entry *tic = nv50_tic_entry(nvc0->textures[s][i]);
if (!(nvc0->textures_coherent[s] & (1 << i)))
diff --git a/src/gallium/drivers/r300/r300_screen.c b/src/gallium/drivers/r300/r300_screen.c
index 90c4f71a945..a2b7f87855d 100644
--- a/src/gallium/drivers/r300/r300_screen.c
+++ b/src/gallium/drivers/r300/r300_screen.c
@@ -210,6 +210,10 @@ static int r300_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
case PIPE_CAP_INVALIDATE_BUFFER:
case PIPE_CAP_GENERATE_MIPMAP:
case PIPE_CAP_STRING_MARKER:
+ case PIPE_CAP_BUFFER_SAMPLER_VIEW_RGBA_ONLY:
+ case PIPE_CAP_SURFACE_REINTERPRET_BLOCKS:
+ case PIPE_CAP_QUERY_BUFFER_OBJECT:
+ case PIPE_CAP_QUERY_MEMORY_INFO:
return 0;
/* SWTCL-only features. */
diff --git a/src/gallium/drivers/r600/evergreen_compute.c b/src/gallium/drivers/r600/evergreen_compute.c
index 20945ece155..2cf08897a8d 100644
--- a/src/gallium/drivers/r600/evergreen_compute.c
+++ b/src/gallium/drivers/r600/evergreen_compute.c
@@ -225,7 +225,7 @@ void *evergreen_create_compute_state(
}
}
#else
- memset(&shader->binary, 0, sizeof(shader->binary));
+ radeon_shader_binary_init(&shader->binary);
radeon_elf_read(code, header->num_bytes, &shader->binary);
r600_create_shader(&shader->bc, &shader->binary, &use_kill);
@@ -245,13 +245,31 @@ void *evergreen_create_compute_state(
return shader;
}
-void evergreen_delete_compute_state(struct pipe_context *ctx, void* state)
+void evergreen_delete_compute_state(struct pipe_context *ctx_, void* state)
{
- struct r600_pipe_compute *shader = (struct r600_pipe_compute *)state;
+ struct r600_context *ctx = (struct r600_context *)ctx_;
+ COMPUTE_DBG(ctx->screen, "*** evergreen_delete_compute_state\n");
+ struct r600_pipe_compute *shader = state;
if (!shader)
return;
+#ifdef HAVE_OPENCL
+#if HAVE_LLVM < 0x0306
+ for (unsigned i = 0; i < shader->num_kernels; i++) {
+ struct r600_kernel *kernel = &shader->kernels[i];
+ LLVMDisposeModule(module);
+ }
+ FREE(shader->kernels);
+ LLVMContextDispose(shader->llvm_ctx);
+#else
+ radeon_shader_binary_clean(&shader->binary);
+ r600_destroy_shader(&shader->bc);
+
+ /* TODO destroy shader->code_bo, shader->const_bo
+ * we'll need something like r600_buffer_free */
+#endif
+#endif
FREE(shader);
}
@@ -349,7 +367,7 @@ static void evergreen_emit_direct_dispatch(
struct radeon_winsys_cs *cs = rctx->b.gfx.cs;
struct r600_pipe_compute *shader = rctx->cs_shader_state.shader;
unsigned num_waves;
- unsigned num_pipes = rctx->screen->b.info.r600_max_pipes;
+ unsigned num_pipes = rctx->screen->b.info.r600_max_quad_pipes;
unsigned wave_divisor = (16 * num_pipes);
int group_size = 1;
int grid_size = 1;
@@ -723,7 +741,7 @@ static void evergreen_set_global_binding(
* command stream by the start_cs_cmd atom. However, since the SET_CONTEXT_REG
* packet requires that the shader type bit be set, we must initialize all
* context registers needed for compute in this function. The registers
- * intialized by the start_cs_cmd atom can be found in evereen_state.c in the
+ * initialized by the start_cs_cmd atom can be found in evergreen_state.c in the
* functions evergreen_init_atom_start_cs or cayman_init_atom_start_cs depending
* on the GPU family.
*/
@@ -733,7 +751,7 @@ void evergreen_init_atom_start_compute_cs(struct r600_context *ctx)
int num_threads;
int num_stack_entries;
- /* since all required registers are initialised in the
+ /* since all required registers are initialized in the
* start_compute_cs_cmd atom, we can EMIT_EARLY here.
*/
r600_init_command_buffer(cb, 256);
@@ -818,7 +836,7 @@ void evergreen_init_atom_start_compute_cs(struct r600_context *ctx)
* R_008E28_SQ_STATIC_THREAD_MGMT3
*/
- /* XXX: We may need to adjust the thread and stack resouce
+ /* XXX: We may need to adjust the thread and stack resource
* values for 3D/compute interop */
r600_store_config_reg_seq(cb, R_008C18_SQ_THREAD_RESOURCE_MGMT_1, 5);
diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c
index 9dfb84965cf..61d32c06671 100644
--- a/src/gallium/drivers/r600/evergreen_state.c
+++ b/src/gallium/drivers/r600/evergreen_state.c
@@ -772,7 +772,7 @@ evergreen_create_sampler_view_custom(struct pipe_context *ctx,
if (util_format_get_blocksize(pipe_format) >= 16)
non_disp_tiling = 1;
}
- nbanks = eg_num_banks(rscreen->b.tiling_info.num_banks);
+ nbanks = eg_num_banks(rscreen->b.info.r600_num_banks);
if (state->target == PIPE_TEXTURE_1D_ARRAY) {
height = 1;
@@ -986,7 +986,7 @@ void evergreen_init_color_surface_rat(struct r600_context *rctx,
unsigned block_size =
align(util_format_get_blocksize(pipe_buffer->format), 4);
unsigned pitch_alignment =
- MAX2(64, rctx->screen->b.tiling_info.group_bytes / block_size);
+ MAX2(64, rctx->screen->b.info.pipe_interleave_bytes / block_size);
unsigned pitch = align(pipe_buffer->width0, pitch_alignment);
/* XXX: This is copied from evergreen_init_color_surface(). I don't
@@ -1098,7 +1098,7 @@ void evergreen_init_color_surface(struct r600_context *rctx,
if (util_format_get_blocksize(surf->base.format) >= 16)
non_disp_tiling = 1;
}
- nbanks = eg_num_banks(rscreen->b.tiling_info.num_banks);
+ nbanks = eg_num_banks(rscreen->b.info.r600_num_banks);
desc = util_format_description(surf->base.format);
for (i = 0; i < 4; i++) {
if (desc->channel[i].type != UTIL_FORMAT_TYPE_VOID) {
@@ -1253,7 +1253,7 @@ static void evergreen_init_depth_surface(struct r600_context *rctx,
macro_aspect = eg_macro_tile_aspect(macro_aspect);
bankw = eg_bank_wh(bankw);
bankh = eg_bank_wh(bankh);
- nbanks = eg_num_banks(rscreen->b.tiling_info.num_banks);
+ nbanks = eg_num_banks(rscreen->b.info.r600_num_banks);
offset >>= 8;
surf->db_z_info = S_028040_ARRAY_MODE(array_mode) |
@@ -3467,7 +3467,7 @@ static void evergreen_dma_copy_tile(struct r600_context *rctx,
sub_cmd = EG_DMA_COPY_TILED;
lbpp = util_logbase2(bpp);
pitch_tile_max = ((pitch / bpp) / 8) - 1;
- nbanks = eg_num_banks(rctx->screen->b.tiling_info.num_banks);
+ nbanks = eg_num_banks(rctx->screen->b.info.r600_num_banks);
if (dst_mode == RADEON_SURF_MODE_LINEAR) {
/* T2L */
@@ -3670,9 +3670,9 @@ void evergreen_init_state_functions(struct r600_context *rctx)
unsigned id = 1;
unsigned i;
/* !!!
- * To avoid GPU lockup registers must be emited in a specific order
+ * To avoid GPU lockup registers must be emitted in a specific order
* (no kidding ...). The order below is important and have been
- * partialy infered from analyzing fglrx command stream.
+ * partially inferred from analyzing fglrx command stream.
*
* Don't reorder atom without carefully checking the effect (GPU lockup
* or piglit regression).
@@ -3793,7 +3793,7 @@ void evergreen_setup_tess_constants(struct r600_context *rctx, const struct pipe
unsigned output_patch0_offset, perpatch_output_offset, lds_size;
uint32_t values[16];
unsigned num_waves;
- unsigned num_pipes = rctx->screen->b.info.r600_max_pipes;
+ unsigned num_pipes = rctx->screen->b.info.r600_max_quad_pipes;
unsigned wave_divisor = (16 * num_pipes);
*num_patches = 1;
diff --git a/src/gallium/drivers/r600/r600_asm.h b/src/gallium/drivers/r600/r600_asm.h
index 0b78290295a..1629399d8fe 100644
--- a/src/gallium/drivers/r600/r600_asm.h
+++ b/src/gallium/drivers/r600/r600_asm.h
@@ -245,8 +245,8 @@ struct r600_bytecode {
unsigned ar_chan;
unsigned ar_handling;
unsigned r6xx_nop_after_rel_dst;
- bool index_loaded[2];
- unsigned index_reg[2]; /* indexing register CF_INDEX_[01] */
+ bool index_loaded[2];
+ unsigned index_reg[2]; /* indexing register CF_INDEX_[01] */
unsigned debug_id;
struct r600_isa* isa;
};
diff --git a/src/gallium/drivers/r600/r600_llvm.c b/src/gallium/drivers/r600/r600_llvm.c
index 8b91372f3ae..0fe7c74418d 100644
--- a/src/gallium/drivers/r600/r600_llvm.c
+++ b/src/gallium/drivers/r600/r600_llvm.c
@@ -848,6 +848,7 @@ LLVMModuleRef r600_tgsi_llvm(
lp_build_tgsi_llvm(bld_base, tokens);
+ LLVMBuildRetVoid(bld_base->base.gallivm->builder);
radeon_llvm_finalize_module(ctx);
return ctx->gallivm.module;
@@ -910,6 +911,11 @@ unsigned r600_create_shader(struct r600_bytecode *bc,
return 0;
}
+void r600_destroy_shader(struct r600_bytecode *bc)
+{
+ FREE(bc->bytecode);
+}
+
unsigned r600_llvm_compile(
LLVMModuleRef mod,
enum radeon_family family,
@@ -922,17 +928,14 @@ unsigned r600_llvm_compile(
struct radeon_shader_binary binary;
const char * gpu_family = r600_get_llvm_processor_name(family);
- memset(&binary, 0, sizeof(struct radeon_shader_binary));
+ radeon_shader_binary_init(&binary);
if (dump)
LLVMDumpModule(mod);
r = radeon_llvm_compile(mod, &binary, gpu_family, NULL, debug);
r = r600_create_shader(bc, &binary, use_kill);
- FREE(binary.code);
- FREE(binary.config);
- FREE(binary.rodata);
- FREE(binary.global_symbol_offsets);
+ radeon_shader_binary_clean(&binary);
return r;
}
diff --git a/src/gallium/drivers/r600/r600_llvm.h b/src/gallium/drivers/r600/r600_llvm.h
index f570b739fbe..3f7fc4bef7e 100644
--- a/src/gallium/drivers/r600/r600_llvm.h
+++ b/src/gallium/drivers/r600/r600_llvm.h
@@ -30,6 +30,8 @@ unsigned r600_create_shader(struct r600_bytecode *bc,
const struct radeon_shader_binary *binary,
boolean *use_kill);
+void r600_destroy_shader(struct r600_bytecode *bc);
+
void r600_shader_binary_read_config(const struct radeon_shader_binary *binary,
struct r600_bytecode *bc,
uint64_t symbol_offset,
diff --git a/src/gallium/drivers/r600/r600_pipe.c b/src/gallium/drivers/r600/r600_pipe.c
index 9b0f31270df..9d378013be0 100644
--- a/src/gallium/drivers/r600/r600_pipe.c
+++ b/src/gallium/drivers/r600/r600_pipe.c
@@ -285,6 +285,8 @@ static int r600_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
case PIPE_CAP_TGSI_TXQS:
case PIPE_CAP_COPY_BETWEEN_COMPRESSED_AND_PLAIN_FORMATS:
case PIPE_CAP_INVALIDATE_BUFFER:
+ case PIPE_CAP_SURFACE_REINTERPRET_BLOCKS:
+ case PIPE_CAP_QUERY_MEMORY_INFO:
return 1;
case PIPE_CAP_DEVICE_RESET_STATUS_QUERY:
@@ -342,6 +344,9 @@ static int r600_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
/* kernel command checker support is also required */
return family >= CHIP_CEDAR && rscreen->b.info.drm_minor >= 41;
+ case PIPE_CAP_BUFFER_SAMPLER_VIEW_RGBA_ONLY:
+ return family >= CHIP_CEDAR ? 0 : 1;
+
/* Unsupported features. */
case PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT:
case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER:
@@ -364,6 +369,7 @@ static int r600_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
case PIPE_CAP_SHADER_BUFFER_OFFSET_ALIGNMENT:
case PIPE_CAP_GENERATE_MIPMAP:
case PIPE_CAP_STRING_MARKER:
+ case PIPE_CAP_QUERY_BUFFER_OBJECT:
return 0;
case PIPE_CAP_MAX_SHADER_PATCH_VARYINGS:
@@ -415,10 +421,10 @@ static int r600_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
/* Timer queries, present when the clock frequency is non zero. */
case PIPE_CAP_QUERY_TIME_ELAPSED:
- return rscreen->b.info.r600_clock_crystal_freq != 0;
+ return rscreen->b.info.clock_crystal_freq != 0;
case PIPE_CAP_QUERY_TIMESTAMP:
return rscreen->b.info.drm_minor >= 20 &&
- rscreen->b.info.r600_clock_crystal_freq != 0;
+ rscreen->b.info.clock_crystal_freq != 0;
case PIPE_CAP_MIN_TEXTURE_GATHER_OFFSET:
case PIPE_CAP_MIN_TEXEL_OFFSET:
diff --git a/src/gallium/drivers/r600/r600_uvd.c b/src/gallium/drivers/r600/r600_uvd.c
index 18d2b69afb0..0c928345773 100644
--- a/src/gallium/drivers/r600/r600_uvd.c
+++ b/src/gallium/drivers/r600/r600_uvd.c
@@ -160,7 +160,7 @@ static struct pb_buffer* r600_uvd_set_dtb(struct ruvd_msg *msg, struct vl_video_
struct r600_texture *chroma = (struct r600_texture *)buf->resources[1];
msg->body.decode.dt_field_mode = buf->base.interlaced;
- msg->body.decode.dt_surf_tile_config |= RUVD_NUM_BANKS(eg_num_banks(rscreen->b.tiling_info.num_banks));
+ msg->body.decode.dt_surf_tile_config |= RUVD_NUM_BANKS(eg_num_banks(rscreen->b.info.r600_num_banks));
ruvd_set_dt_surfaces(msg, &luma->surface, &chroma->surface);
diff --git a/src/gallium/drivers/radeon/r600_buffer_common.c b/src/gallium/drivers/radeon/r600_buffer_common.c
index c7984c47304..b384baa9237 100644
--- a/src/gallium/drivers/radeon/r600_buffer_common.c
+++ b/src/gallium/drivers/radeon/r600_buffer_common.c
@@ -181,7 +181,7 @@ bool r600_init_resource(struct r600_common_screen *rscreen,
old_buf = res->buf;
res->buf = new_buf; /* should be atomic */
- if (rscreen->info.r600_virtual_address)
+ if (rscreen->info.has_virtual_memory)
res->gpu_address = rscreen->ws->buffer_get_virtual_address(res->buf);
else
res->gpu_address = 0;
@@ -511,7 +511,7 @@ r600_buffer_from_user_memory(struct pipe_screen *screen,
return NULL;
}
- if (rscreen->info.r600_virtual_address)
+ if (rscreen->info.has_virtual_memory)
rbuffer->gpu_address =
ws->buffer_get_virtual_address(rbuffer->buf);
else
diff --git a/src/gallium/drivers/radeon/r600_cs.h b/src/gallium/drivers/radeon/r600_cs.h
index caf7deef37c..ff5b055448a 100644
--- a/src/gallium/drivers/radeon/r600_cs.h
+++ b/src/gallium/drivers/radeon/r600_cs.h
@@ -60,7 +60,7 @@ static inline void r600_emit_reloc(struct r600_common_context *rctx,
enum radeon_bo_priority priority)
{
struct radeon_winsys_cs *cs = ring->cs;
- bool has_vm = ((struct r600_common_screen*)rctx->b.screen)->info.r600_virtual_address;
+ bool has_vm = ((struct r600_common_screen*)rctx->b.screen)->info.has_virtual_memory;
unsigned reloc = radeon_add_to_buffer_list(rctx, ring, rbo, usage, priority);
if (!has_vm) {
diff --git a/src/gallium/drivers/radeon/r600_perfcounter.c b/src/gallium/drivers/radeon/r600_perfcounter.c
index fad7bdec40a..f3529a1fe0f 100644
--- a/src/gallium/drivers/radeon/r600_perfcounter.c
+++ b/src/gallium/drivers/radeon/r600_perfcounter.c
@@ -33,10 +33,6 @@
/* Max counters per HW block */
#define R600_QUERY_MAX_COUNTERS 16
-static const char * const r600_pc_shader_suffix[] = {
- "", "_PS", "_VS", "_GS", "_ES", "_HS", "_LS", "_CS"
-};
-
static struct r600_perfcounter_block *
lookup_counter(struct r600_perfcounters *pc, unsigned index,
unsigned *base_gid, unsigned *sub_index)
@@ -92,6 +88,8 @@ struct r600_pc_counter {
unsigned stride;
};
+#define R600_PC_SHADERS_WINDOWING (1 << 31)
+
struct r600_query_pc {
struct r600_query_hw b;
@@ -246,32 +244,29 @@ static struct r600_pc_group *get_group_state(struct r600_common_screen *screen,
if (block->flags & R600_PC_BLOCK_SHADER) {
unsigned sub_gids = block->num_instances;
unsigned shader_id;
- unsigned shader_mask;
- unsigned query_shader_mask;
+ unsigned shaders;
+ unsigned query_shaders;
if (block->flags & R600_PC_BLOCK_SE_GROUPS)
sub_gids = sub_gids * screen->info.max_se;
shader_id = sub_gid / sub_gids;
sub_gid = sub_gid % sub_gids;
- if (shader_id == 0)
- shader_mask = R600_PC_SHADER_ALL;
- else
- shader_mask = 1 << (shader_id - 1);
+ shaders = screen->perfcounters->shader_type_bits[shader_id];
- query_shader_mask = query->shaders & R600_PC_SHADER_ALL;
- if (query_shader_mask && query_shader_mask != shader_mask) {
+ query_shaders = query->shaders & ~R600_PC_SHADERS_WINDOWING;
+ if (query_shaders && query_shaders != shaders) {
fprintf(stderr, "r600_perfcounter: incompatible shader groups\n");
FREE(group);
return NULL;
}
- query->shaders |= shader_mask;
+ query->shaders = shaders;
}
- if (block->flags & R600_PC_BLOCK_SHADER_WINDOWED) {
+ if (block->flags & R600_PC_BLOCK_SHADER_WINDOWED && !query->shaders) {
// A non-zero value in query->shaders ensures that the shader
// masking is reset unless the user explicitly requests one.
- query->shaders |= R600_PC_SHADER_WINDOWING;
+ query->shaders = R600_PC_SHADERS_WINDOWING;
}
if (block->flags & R600_PC_BLOCK_SE_GROUPS) {
@@ -379,8 +374,8 @@ struct pipe_query *r600_create_batch_query(struct pipe_context *ctx,
}
if (query->shaders) {
- if ((query->shaders & R600_PC_SHADER_ALL) == 0)
- query->shaders |= R600_PC_SHADER_ALL;
+ if (query->shaders == R600_PC_SHADERS_WINDOWING)
+ query->shaders = 0xffffffff;
query->b.num_cs_dw_begin += pc->num_shaders_cs_dwords;
}
@@ -438,7 +433,7 @@ static boolean r600_init_block_names(struct r600_common_screen *screen,
if (block->flags & R600_PC_BLOCK_SE_GROUPS)
groups_se = screen->info.max_se;
if (block->flags & R600_PC_BLOCK_SHADER)
- groups_shader = ARRAY_SIZE(r600_pc_shader_suffix);
+ groups_shader = screen->perfcounters->num_shader_types;
namelen = strlen(block->basename);
block->group_name_stride = namelen + 1;
@@ -462,14 +457,15 @@ static boolean r600_init_block_names(struct r600_common_screen *screen,
groupname = block->group_names;
for (i = 0; i < groups_shader; ++i) {
- unsigned shaderlen = strlen(r600_pc_shader_suffix[i]);
+ const char *shader_suffix = screen->perfcounters->shader_type_suffixes[i];
+ unsigned shaderlen = strlen(shader_suffix);
for (j = 0; j < groups_se; ++j) {
for (k = 0; k < groups_instance; ++k) {
strcpy(groupname, block->basename);
p = groupname + namelen;
if (block->flags & R600_PC_BLOCK_SHADER) {
- strcpy(p, r600_pc_shader_suffix[i]);
+ strcpy(p, shader_suffix);
p += shaderlen;
}
@@ -626,7 +622,7 @@ void r600_perfcounters_add_block(struct r600_common_screen *rscreen,
if (block->flags & R600_PC_BLOCK_SE_GROUPS)
block->num_groups *= rscreen->info.max_se;
if (block->flags & R600_PC_BLOCK_SHADER)
- block->num_groups *= ARRAY_SIZE(r600_pc_shader_suffix);
+ block->num_groups *= pc->num_shader_types;
++pc->num_blocks;
pc->num_groups += block->num_groups;
diff --git a/src/gallium/drivers/radeon/r600_pipe_common.c b/src/gallium/drivers/radeon/r600_pipe_common.c
index 4c066c14cd8..d75317b1cbe 100644
--- a/src/gallium/drivers/radeon/r600_pipe_common.c
+++ b/src/gallium/drivers/radeon/r600_pipe_common.c
@@ -49,6 +49,26 @@ struct r600_multi_fence {
};
/*
+ * shader binary helpers.
+ */
+void radeon_shader_binary_init(struct radeon_shader_binary *b)
+{
+ memset(b, 0, sizeof(*b));
+}
+
+void radeon_shader_binary_clean(struct radeon_shader_binary *b)
+{
+ if (!b)
+ return;
+ FREE(b->code);
+ FREE(b->config);
+ FREE(b->rodata);
+ FREE(b->global_symbol_offsets);
+ FREE(b->relocs);
+ FREE(b->disasm_string);
+}
+
+/*
* pipe_context
*/
@@ -251,7 +271,7 @@ bool r600_common_context_init(struct r600_common_context *rctx,
rctx->chip_class = rscreen->chip_class;
if (rscreen->chip_class >= CIK)
- rctx->max_db = MAX2(8, rscreen->info.r600_num_backends);
+ rctx->max_db = MAX2(8, rscreen->info.num_render_backends);
else if (rscreen->chip_class >= EVERGREEN)
rctx->max_db = 8;
else
@@ -295,7 +315,7 @@ bool r600_common_context_init(struct r600_common_context *rctx,
if (!rctx->ctx)
return false;
- if (rscreen->info.r600_has_dma && !(rscreen->debug_flags & DBG_NO_ASYNC_DMA)) {
+ if (rscreen->info.has_sdma && !(rscreen->debug_flags & DBG_NO_ASYNC_DMA)) {
rctx->dma.cs = rctx->ws->cs_create(rctx->ctx, RING_DMA,
r600_flush_dma_ring,
rctx, NULL);
@@ -373,6 +393,7 @@ static const struct debug_named_value common_debug_options[] = {
{ "noir", DBG_NO_IR, "Don't print the LLVM IR"},
{ "notgsi", DBG_NO_TGSI, "Don't print the TGSI"},
{ "noasm", DBG_NO_ASM, "Don't print disassembled shaders"},
+ { "preoptir", DBG_PREOPT_IR, "Print the LLVM IR before initial optimizations" },
/* features */
{ "nodma", DBG_NO_ASYNC_DMA, "Disable asynchronous DMA" },
@@ -389,6 +410,7 @@ static const struct debug_named_value common_debug_options[] = {
{ "nodcc", DBG_NO_DCC, "Disable DCC." },
{ "nodccclear", DBG_NO_DCC_CLEAR, "Disable DCC fast clear." },
{ "norbplus", DBG_NO_RB_PLUS, "Disable RB+ on Stoney." },
+ { "sisched", DBG_SI_SCHED, "Enable LLVM SI Machine Instruction Scheduler." },
DEBUG_NAMED_VALUE_END /* must be last */
};
@@ -698,7 +720,7 @@ static int r600_get_compute_param(struct pipe_screen *screen,
case PIPE_COMPUTE_CAP_MAX_CLOCK_FREQUENCY:
if (ret) {
uint32_t *max_clock_frequency = ret;
- *max_clock_frequency = rscreen->info.max_sclk;
+ *max_clock_frequency = rscreen->info.max_shader_clock;
}
return sizeof(uint32_t);
@@ -734,7 +756,7 @@ static uint64_t r600_get_timestamp(struct pipe_screen *screen)
struct r600_common_screen *rscreen = (struct r600_common_screen*)screen;
return 1000000 * rscreen->ws->query_value(rscreen->ws, RADEON_TIMESTAMP) /
- rscreen->info.r600_clock_crystal_freq;
+ rscreen->info.clock_crystal_freq;
}
static void r600_fence_reference(struct pipe_screen *screen,
@@ -778,116 +800,40 @@ static boolean r600_fence_finish(struct pipe_screen *screen,
return rws->fence_wait(rws, rfence->gfx, timeout);
}
-static bool r600_interpret_tiling(struct r600_common_screen *rscreen,
- uint32_t tiling_config)
+static void r600_query_memory_info(struct pipe_screen *screen,
+ struct pipe_memory_info *info)
{
- switch ((tiling_config & 0xe) >> 1) {
- case 0:
- rscreen->tiling_info.num_channels = 1;
- break;
- case 1:
- rscreen->tiling_info.num_channels = 2;
- break;
- case 2:
- rscreen->tiling_info.num_channels = 4;
- break;
- case 3:
- rscreen->tiling_info.num_channels = 8;
- break;
- default:
- return false;
- }
-
- switch ((tiling_config & 0x30) >> 4) {
- case 0:
- rscreen->tiling_info.num_banks = 4;
- break;
- case 1:
- rscreen->tiling_info.num_banks = 8;
- break;
- default:
- return false;
-
- }
- switch ((tiling_config & 0xc0) >> 6) {
- case 0:
- rscreen->tiling_info.group_bytes = 256;
- break;
- case 1:
- rscreen->tiling_info.group_bytes = 512;
- break;
- default:
- return false;
- }
- return true;
-}
-
-static bool evergreen_interpret_tiling(struct r600_common_screen *rscreen,
- uint32_t tiling_config)
-{
- switch (tiling_config & 0xf) {
- case 0:
- rscreen->tiling_info.num_channels = 1;
- break;
- case 1:
- rscreen->tiling_info.num_channels = 2;
- break;
- case 2:
- rscreen->tiling_info.num_channels = 4;
- break;
- case 3:
- rscreen->tiling_info.num_channels = 8;
- break;
- default:
- return false;
- }
-
- switch ((tiling_config & 0xf0) >> 4) {
- case 0:
- rscreen->tiling_info.num_banks = 4;
- break;
- case 1:
- rscreen->tiling_info.num_banks = 8;
- break;
- case 2:
- rscreen->tiling_info.num_banks = 16;
- break;
- default:
- return false;
- }
-
- switch ((tiling_config & 0xf00) >> 8) {
- case 0:
- rscreen->tiling_info.group_bytes = 256;
- break;
- case 1:
- rscreen->tiling_info.group_bytes = 512;
- break;
- default:
- return false;
- }
- return true;
-}
-
-static bool r600_init_tiling(struct r600_common_screen *rscreen)
-{
- uint32_t tiling_config = rscreen->info.r600_tiling_config;
-
- /* set default group bytes, overridden by tiling info ioctl */
- if (rscreen->chip_class <= R700) {
- rscreen->tiling_info.group_bytes = 256;
- } else {
- rscreen->tiling_info.group_bytes = 512;
- }
-
- if (!tiling_config)
- return true;
-
- if (rscreen->chip_class <= R700) {
- return r600_interpret_tiling(rscreen, tiling_config);
- } else {
- return evergreen_interpret_tiling(rscreen, tiling_config);
- }
+ struct r600_common_screen *rscreen = (struct r600_common_screen*)screen;
+ struct radeon_winsys *ws = rscreen->ws;
+ unsigned vram_usage, gtt_usage;
+
+ info->total_device_memory = rscreen->info.vram_size / 1024;
+ info->total_staging_memory = rscreen->info.gart_size / 1024;
+
+ /* The real TTM memory usage is somewhat random, because:
+ *
+ * 1) TTM delays freeing memory, because it can only free it after
+ * fences expire.
+ *
+ * 2) The memory usage can be really low if big VRAM evictions are
+ * taking place, but the real usage is well above the size of VRAM.
+ *
+ * Instead, return statistics of this process.
+ */
+ vram_usage = ws->query_value(ws, RADEON_REQUESTED_VRAM_MEMORY) / 1024;
+ gtt_usage = ws->query_value(ws, RADEON_REQUESTED_GTT_MEMORY) / 1024;
+
+ info->avail_device_memory =
+ vram_usage <= info->total_device_memory ?
+ info->total_device_memory - vram_usage : 0;
+ info->avail_staging_memory =
+ gtt_usage <= info->total_staging_memory ?
+ info->total_staging_memory - gtt_usage : 0;
+
+ info->device_memory_evicted =
+ ws->query_value(ws, RADEON_NUM_BYTES_MOVED) / 1024;
+ /* Just return the number of evicted 64KB pages. */
+ info->nr_device_memory_evictions = info->device_memory_evicted / 64;
}
struct pipe_resource *r600_resource_create_common(struct pipe_screen *screen,
@@ -929,6 +875,7 @@ bool r600_common_screen_init(struct r600_common_screen *rscreen,
rscreen->b.fence_reference = r600_fence_reference;
rscreen->b.resource_destroy = u_resource_destroy_vtbl;
rscreen->b.resource_from_user_memory = r600_buffer_from_user_memory;
+ rscreen->b.query_memory_info = r600_query_memory_info;
if (rscreen->info.has_uvd) {
rscreen->b.get_video_param = rvid_get_video_param;
@@ -946,9 +893,6 @@ bool r600_common_screen_init(struct r600_common_screen *rscreen,
rscreen->chip_class = rscreen->info.chip_class;
rscreen->debug_flags = debug_get_flags_option("R600_DEBUG", common_debug_options, 0);
- if (!r600_init_tiling(rscreen)) {
- return false;
- }
util_format_s3tc_init();
pipe_mutex_init(rscreen->aux_context_lock);
pipe_mutex_init(rscreen->gpu_load_mutex);
@@ -968,27 +912,34 @@ bool r600_common_screen_init(struct r600_common_screen *rscreen,
if (rscreen->debug_flags & DBG_INFO) {
printf("pci_id = 0x%x\n", rscreen->info.pci_id);
- printf("family = %i\n", rscreen->info.family);
+ printf("family = %i (%s)\n", rscreen->info.family,
+ r600_get_chip_name(rscreen));
printf("chip_class = %i\n", rscreen->info.chip_class);
- printf("gart_size = %i MB\n", (int)(rscreen->info.gart_size >> 20));
- printf("vram_size = %i MB\n", (int)(rscreen->info.vram_size >> 20));
- printf("max_sclk = %i\n", rscreen->info.max_sclk);
+ printf("gart_size = %i MB\n", (int)DIV_ROUND_UP(rscreen->info.gart_size, 1024*1024));
+ printf("vram_size = %i MB\n", (int)DIV_ROUND_UP(rscreen->info.vram_size, 1024*1024));
+ printf("has_virtual_memory = %i\n", rscreen->info.has_virtual_memory);
+ printf("gfx_ib_pad_with_type2 = %i\n", rscreen->info.gfx_ib_pad_with_type2);
+ printf("has_sdma = %i\n", rscreen->info.has_sdma);
+ printf("has_uvd = %i\n", rscreen->info.has_uvd);
+ printf("vce_fw_version = %i\n", rscreen->info.vce_fw_version);
+ printf("vce_harvest_config = %i\n", rscreen->info.vce_harvest_config);
+ printf("clock_crystal_freq = %i\n", rscreen->info.clock_crystal_freq);
+ printf("drm = %i.%i.%i\n", rscreen->info.drm_major,
+ rscreen->info.drm_minor, rscreen->info.drm_patchlevel);
+ printf("has_userptr = %i\n", rscreen->info.has_userptr);
+
+ printf("r600_max_quad_pipes = %i\n", rscreen->info.r600_max_quad_pipes);
+ printf("max_shader_clock = %i\n", rscreen->info.max_shader_clock);
printf("num_good_compute_units = %i\n", rscreen->info.num_good_compute_units);
printf("max_se = %i\n", rscreen->info.max_se);
printf("max_sh_per_se = %i\n", rscreen->info.max_sh_per_se);
- printf("drm = %i.%i.%i\n", rscreen->info.drm_major,
- rscreen->info.drm_minor, rscreen->info.drm_patchlevel);
- printf("has_uvd = %i\n", rscreen->info.has_uvd);
- printf("vce_fw_version = %i\n", rscreen->info.vce_fw_version);
- printf("r600_num_backends = %i\n", rscreen->info.r600_num_backends);
- printf("r600_clock_crystal_freq = %i\n", rscreen->info.r600_clock_crystal_freq);
- printf("r600_tiling_config = 0x%x\n", rscreen->info.r600_tiling_config);
- printf("r600_num_tile_pipes = %i\n", rscreen->info.r600_num_tile_pipes);
- printf("r600_max_pipes = %i\n", rscreen->info.r600_max_pipes);
- printf("r600_virtual_address = %i\n", rscreen->info.r600_virtual_address);
- printf("r600_has_dma = %i\n", rscreen->info.r600_has_dma);
- printf("r600_backend_map = %i\n", rscreen->info.r600_backend_map);
- printf("r600_backend_map_valid = %i\n", rscreen->info.r600_backend_map_valid);
+
+ printf("r600_gb_backend_map = %i\n", rscreen->info.r600_gb_backend_map);
+ printf("r600_gb_backend_map_valid = %i\n", rscreen->info.r600_gb_backend_map_valid);
+ printf("r600_num_banks = %i\n", rscreen->info.r600_num_banks);
+ printf("num_render_backends = %i\n", rscreen->info.num_render_backends);
+ printf("num_tile_pipes = %i\n", rscreen->info.num_tile_pipes);
+ printf("pipe_interleave_bytes = %i\n", rscreen->info.pipe_interleave_bytes);
printf("si_tile_mode_array_valid = %i\n", rscreen->info.si_tile_mode_array_valid);
printf("cik_macrotile_mode_array_valid = %i\n", rscreen->info.cik_macrotile_mode_array_valid);
}
diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h b/src/gallium/drivers/radeon/r600_pipe_common.h
index d66e74f9254..e92df876c22 100644
--- a/src/gallium/drivers/radeon/r600_pipe_common.h
+++ b/src/gallium/drivers/radeon/r600_pipe_common.h
@@ -71,6 +71,7 @@
#define DBG_NO_IR (1 << 12)
#define DBG_NO_TGSI (1 << 13)
#define DBG_NO_ASM (1 << 14)
+#define DBG_PREOPT_IR (1 << 15)
/* Bits 21-31 are reserved for the r600g driver. */
/* features */
#define DBG_NO_ASYNC_DMA (1llu << 32)
@@ -87,6 +88,7 @@
#define DBG_NO_DCC (1llu << 43)
#define DBG_NO_DCC_CLEAR (1llu << 44)
#define DBG_NO_RB_PLUS (1llu << 45)
+#define DBG_SI_SCHED (1llu << 46)
#define R600_MAP_BUFFER_ALIGNMENT 64
@@ -129,6 +131,9 @@ struct radeon_shader_binary {
char *disasm_string;
};
+void radeon_shader_binary_init(struct radeon_shader_binary *b);
+void radeon_shader_binary_clean(struct radeon_shader_binary *b);
+
struct r600_resource {
struct u_resource b;
@@ -257,8 +262,6 @@ struct r600_surface {
unsigned spi_shader_col_format_alpha; /* SI+, alpha-to-coverage */
unsigned spi_shader_col_format_blend; /* SI+, blending without alpha. */
unsigned spi_shader_col_format_blend_alpha; /* SI+, blending with alpha. */
- unsigned sx_ps_downconvert; /* Stoney only */
- unsigned sx_blend_opt_epsilon; /* Stoney only */
struct r600_resource *cb_buffer_fmask; /* Used for FMASK relocations. R600 only */
struct r600_resource *cb_buffer_cmask; /* Used for CMASK relocations. R600 only */
@@ -278,19 +281,12 @@ struct r600_surface {
unsigned pa_su_poly_offset_db_fmt_cntl;
};
-struct r600_tiling_info {
- unsigned num_channels;
- unsigned num_banks;
- unsigned group_bytes;
-};
-
struct r600_common_screen {
struct pipe_screen b;
struct radeon_winsys *ws;
enum radeon_family family;
enum chip_class chip_class;
struct radeon_info info;
- struct r600_tiling_info tiling_info;
uint64_t debug_flags;
bool has_cp_dma;
bool has_streamout;
diff --git a/src/gallium/drivers/radeon/r600_query.c b/src/gallium/drivers/radeon/r600_query.c
index 0aa19cd54fe..f8b62411722 100644
--- a/src/gallium/drivers/radeon/r600_query.c
+++ b/src/gallium/drivers/radeon/r600_query.c
@@ -100,6 +100,12 @@ static boolean r600_query_sw_begin(struct r600_common_context *rctx,
case R600_QUERY_NUM_SHADERS_CREATED:
query->begin_result = p_atomic_read(&rctx->screen->num_shaders_created);
break;
+ case R600_QUERY_GPIN_ASIC_ID:
+ case R600_QUERY_GPIN_NUM_SIMD:
+ case R600_QUERY_GPIN_NUM_RB:
+ case R600_QUERY_GPIN_NUM_SPI:
+ case R600_QUERY_GPIN_NUM_SE:
+ break;
default:
unreachable("r600_query_sw_begin: bad query type");
}
@@ -146,6 +152,12 @@ static void r600_query_sw_end(struct r600_common_context *rctx,
case R600_QUERY_NUM_SHADERS_CREATED:
query->end_result = p_atomic_read(&rctx->screen->num_shaders_created);
break;
+ case R600_QUERY_GPIN_ASIC_ID:
+ case R600_QUERY_GPIN_NUM_SIMD:
+ case R600_QUERY_GPIN_NUM_RB:
+ case R600_QUERY_GPIN_NUM_SPI:
+ case R600_QUERY_GPIN_NUM_SE:
+ break;
default:
unreachable("r600_query_sw_end: bad query type");
}
@@ -162,7 +174,7 @@ static boolean r600_query_sw_get_result(struct r600_common_context *rctx,
case PIPE_QUERY_TIMESTAMP_DISJOINT:
/* Convert from cycles per millisecond to cycles per second (Hz). */
result->timestamp_disjoint.frequency =
- (uint64_t)rctx->screen->info.r600_clock_crystal_freq * 1000;
+ (uint64_t)rctx->screen->info.clock_crystal_freq * 1000;
result->timestamp_disjoint.disjoint = FALSE;
return TRUE;
case PIPE_QUERY_GPU_FINISHED: {
@@ -171,6 +183,22 @@ static boolean r600_query_sw_get_result(struct r600_common_context *rctx,
wait ? PIPE_TIMEOUT_INFINITE : 0);
return result->b;
}
+
+ case R600_QUERY_GPIN_ASIC_ID:
+ result->u32 = 0;
+ return TRUE;
+ case R600_QUERY_GPIN_NUM_SIMD:
+ result->u32 = rctx->screen->info.num_good_compute_units;
+ return TRUE;
+ case R600_QUERY_GPIN_NUM_RB:
+ result->u32 = rctx->screen->info.num_render_backends;
+ return TRUE;
+ case R600_QUERY_GPIN_NUM_SPI:
+ result->u32 = 1; /* all supported chips have one SPI per SE */
+ return TRUE;
+ case R600_QUERY_GPIN_NUM_SE:
+ result->u32 = rctx->screen->info.max_se;
+ return TRUE;
}
result->u64 = query->end_result - query->begin_result;
@@ -908,7 +936,7 @@ boolean r600_query_hw_get_result(struct r600_common_context *rctx,
/* Convert the time to expected units. */
if (rquery->type == PIPE_QUERY_TIME_ELAPSED ||
rquery->type == PIPE_QUERY_TIMESTAMP) {
- result->u64 = (1000000 * result->u64) / rctx->screen->info.r600_clock_crystal_freq;
+ result->u64 = (1000000 * result->u64) / rctx->screen->info.clock_crystal_freq;
}
return TRUE;
}
@@ -1021,13 +1049,13 @@ void r600_query_init_backend_mask(struct r600_common_context *ctx)
struct radeon_winsys_cs *cs = ctx->gfx.cs;
struct r600_resource *buffer;
uint32_t *results;
- unsigned num_backends = ctx->screen->info.r600_num_backends;
+ unsigned num_backends = ctx->screen->info.num_render_backends;
unsigned i, mask = 0;
/* if backend_map query is supported by the kernel */
- if (ctx->screen->info.r600_backend_map_valid) {
- unsigned num_tile_pipes = ctx->screen->info.r600_num_tile_pipes;
- unsigned backend_map = ctx->screen->info.r600_backend_map;
+ if (ctx->screen->info.r600_gb_backend_map_valid) {
+ unsigned num_tile_pipes = ctx->screen->info.num_tile_pipes;
+ unsigned backend_map = ctx->screen->info.r600_gb_backend_map;
unsigned item_width, item_mask;
if (ctx->chip_class >= EVERGREEN) {
@@ -1096,15 +1124,21 @@ err:
return;
}
-#define X(name_, query_type_, type_, result_type_) \
+#define XFULL(name_, query_type_, type_, result_type_, group_id_) \
{ \
.name = name_, \
.query_type = R600_QUERY_##query_type_, \
.type = PIPE_DRIVER_QUERY_TYPE_##type_, \
.result_type = PIPE_DRIVER_QUERY_RESULT_TYPE_##result_type_, \
- .group_id = ~(unsigned)0 \
+ .group_id = group_id_ \
}
+#define X(name_, query_type_, type_, result_type_) \
+ XFULL(name_, query_type_, type_, result_type_, ~(unsigned)0)
+
+#define XG(group_, name_, query_type_, type_, result_type_) \
+ XFULL(name_, query_type_, type_, result_type_, R600_QUERY_GROUP_##group_)
+
static struct pipe_driver_query_info r600_driver_query_list[] = {
X("num-compilations", NUM_COMPILATIONS, UINT64, CUMULATIVE),
X("num-shaders-created", NUM_SHADERS_CREATED, UINT64, CUMULATIVE),
@@ -1116,6 +1150,20 @@ static struct pipe_driver_query_info r600_driver_query_list[] = {
X("num-bytes-moved", NUM_BYTES_MOVED, BYTES, CUMULATIVE),
X("VRAM-usage", VRAM_USAGE, BYTES, AVERAGE),
X("GTT-usage", GTT_USAGE, BYTES, AVERAGE),
+
+ /* GPIN queries are for the benefit of old versions of GPUPerfStudio,
+ * which use it as a fallback path to detect the GPU type.
+ *
+ * Note: The names of these queries are significant for GPUPerfStudio
+ * (and possibly their order as well). */
+ XG(GPIN, "GPIN_000", GPIN_ASIC_ID, UINT, AVERAGE),
+ XG(GPIN, "GPIN_001", GPIN_NUM_SIMD, UINT, AVERAGE),
+ XG(GPIN, "GPIN_002", GPIN_NUM_RB, UINT, AVERAGE),
+ XG(GPIN, "GPIN_003", GPIN_NUM_SPI, UINT, AVERAGE),
+ XG(GPIN, "GPIN_004", GPIN_NUM_SE, UINT, AVERAGE),
+
+ /* The following queries must be at the end of the list because their
+ * availability is adjusted dynamically based on the DRM version. */
X("GPU-load", GPU_LOAD, UINT64, AVERAGE),
X("temperature", GPU_TEMPERATURE, UINT64, AVERAGE),
X("shader-clock", CURRENT_GPU_SCLK, HZ, AVERAGE),
@@ -1123,6 +1171,8 @@ static struct pipe_driver_query_info r600_driver_query_list[] = {
};
#undef X
+#undef XG
+#undef XFULL
static unsigned r600_get_num_queries(struct r600_common_screen *rscreen)
{
@@ -1167,16 +1217,40 @@ static int r600_get_driver_query_info(struct pipe_screen *screen,
break;
}
+ if (info->group_id != ~(unsigned)0 && rscreen->perfcounters)
+ info->group_id += rscreen->perfcounters->num_groups;
+
return 1;
}
+/* Note: Unfortunately, GPUPerfStudio hardcodes the order of hardware
+ * performance counter groups, so be careful when changing this and related
+ * functions.
+ */
static int r600_get_driver_query_group_info(struct pipe_screen *screen,
unsigned index,
struct pipe_driver_query_group_info *info)
{
struct r600_common_screen *rscreen = (struct r600_common_screen *)screen;
+ unsigned num_pc_groups = 0;
- return r600_get_perfcounter_group_info(rscreen, index, info);
+ if (rscreen->perfcounters)
+ num_pc_groups = rscreen->perfcounters->num_groups;
+
+ if (!info)
+ return num_pc_groups + R600_NUM_SW_QUERY_GROUPS;
+
+ if (index < num_pc_groups)
+ return r600_get_perfcounter_group_info(rscreen, index, info);
+
+ index -= num_pc_groups;
+ if (index >= R600_NUM_SW_QUERY_GROUPS)
+ return 0;
+
+ info->name = "GPIN";
+ info->max_active_queries = 5;
+ info->num_queries = 5;
+ return 1;
}
void r600_query_init(struct r600_common_context *rctx)
@@ -1189,7 +1263,7 @@ void r600_query_init(struct r600_common_context *rctx)
rctx->b.get_query_result = r600_get_query_result;
rctx->render_cond_atom.emit = r600_emit_query_predication;
- if (((struct r600_common_screen*)rctx->b.screen)->info.r600_num_backends > 0)
+ if (((struct r600_common_screen*)rctx->b.screen)->info.num_render_backends > 0)
rctx->b.render_condition = r600_render_condition;
LIST_INITHEAD(&rctx->active_nontimer_queries);
diff --git a/src/gallium/drivers/radeon/r600_query.h b/src/gallium/drivers/radeon/r600_query.h
index e5a98bfe5bd..8b2c4e3fe93 100644
--- a/src/gallium/drivers/radeon/r600_query.h
+++ b/src/gallium/drivers/radeon/r600_query.h
@@ -54,8 +54,18 @@ struct r600_resource;
#define R600_QUERY_GPU_LOAD (PIPE_QUERY_DRIVER_SPECIFIC + 11)
#define R600_QUERY_NUM_COMPILATIONS (PIPE_QUERY_DRIVER_SPECIFIC + 12)
#define R600_QUERY_NUM_SHADERS_CREATED (PIPE_QUERY_DRIVER_SPECIFIC + 13)
+#define R600_QUERY_GPIN_ASIC_ID (PIPE_QUERY_DRIVER_SPECIFIC + 14)
+#define R600_QUERY_GPIN_NUM_SIMD (PIPE_QUERY_DRIVER_SPECIFIC + 15)
+#define R600_QUERY_GPIN_NUM_RB (PIPE_QUERY_DRIVER_SPECIFIC + 16)
+#define R600_QUERY_GPIN_NUM_SPI (PIPE_QUERY_DRIVER_SPECIFIC + 17)
+#define R600_QUERY_GPIN_NUM_SE (PIPE_QUERY_DRIVER_SPECIFIC + 18)
#define R600_QUERY_FIRST_PERFCOUNTER (PIPE_QUERY_DRIVER_SPECIFIC + 100)
+enum {
+ R600_QUERY_GROUP_GPIN = 0,
+ R600_NUM_SW_QUERY_GROUPS
+};
+
struct r600_query_ops {
void (*destroy)(struct r600_common_context *, struct r600_query *);
boolean (*begin)(struct r600_common_context *, struct r600_query *);
@@ -156,24 +166,6 @@ enum {
R600_PC_BLOCK_SHADER_WINDOWED = (1 << 4),
};
-/* Shader enable bits. Chosen to coincide with SQ_PERFCOUNTER_CTRL values */
-enum {
- R600_PC_SHADER_PS = (1 << 0),
- R600_PC_SHADER_VS = (1 << 1),
- R600_PC_SHADER_GS = (1 << 2),
- R600_PC_SHADER_ES = (1 << 3),
- R600_PC_SHADER_HS = (1 << 4),
- R600_PC_SHADER_LS = (1 << 5),
- R600_PC_SHADER_CS = (1 << 6),
-
- R600_PC_SHADER_ALL = R600_PC_SHADER_PS | R600_PC_SHADER_VS |
- R600_PC_SHADER_GS | R600_PC_SHADER_ES |
- R600_PC_SHADER_HS | R600_PC_SHADER_LS |
- R600_PC_SHADER_CS,
-
- R600_PC_SHADER_WINDOWING = (1 << 31),
-};
-
/* Describes a hardware block with performance counters. Multiple instances of
* each block, possibly per-SE, may exist on the chip. Depending on the block
* and on the user's configuration, we either
@@ -210,6 +202,10 @@ struct r600_perfcounters {
unsigned num_instance_cs_dwords;
unsigned num_shaders_cs_dwords;
+ unsigned num_shader_types;
+ const char * const *shader_type_suffixes;
+ const unsigned *shader_type_bits;
+
void (*get_size)(struct r600_perfcounter_block *,
unsigned count, unsigned *selectors,
unsigned *num_select_dw, unsigned *num_read_dw);
diff --git a/src/gallium/drivers/radeon/r600_texture.c b/src/gallium/drivers/radeon/r600_texture.c
index 7c4717d29fa..af206e43860 100644
--- a/src/gallium/drivers/radeon/r600_texture.c
+++ b/src/gallium/drivers/radeon/r600_texture.c
@@ -361,8 +361,8 @@ void r600_texture_get_cmask_info(struct r600_common_screen *rscreen,
unsigned cmask_tile_elements = cmask_tile_width * cmask_tile_height;
unsigned element_bits = 4;
unsigned cmask_cache_bits = 1024;
- unsigned num_pipes = rscreen->tiling_info.num_channels;
- unsigned pipe_interleave_bytes = rscreen->tiling_info.group_bytes;
+ unsigned num_pipes = rscreen->info.num_tile_pipes;
+ unsigned pipe_interleave_bytes = rscreen->info.pipe_interleave_bytes;
unsigned elements_per_macro_tile = (cmask_cache_bits / element_bits) * num_pipes;
unsigned pixels_per_macro_tile = elements_per_macro_tile * cmask_tile_elements;
@@ -394,8 +394,8 @@ static void si_texture_get_cmask_info(struct r600_common_screen *rscreen,
struct r600_texture *rtex,
struct r600_cmask_info *out)
{
- unsigned pipe_interleave_bytes = rscreen->tiling_info.group_bytes;
- unsigned num_pipes = rscreen->tiling_info.num_channels;
+ unsigned pipe_interleave_bytes = rscreen->info.pipe_interleave_bytes;
+ unsigned num_pipes = rscreen->info.num_tile_pipes;
unsigned cl_width, cl_height;
switch (num_pipes) {
@@ -515,7 +515,7 @@ static unsigned r600_texture_get_htile_size(struct r600_common_screen *rscreen,
{
unsigned cl_width, cl_height, width, height;
unsigned slice_elements, slice_bytes, pipe_interleave_bytes, base_align;
- unsigned num_pipes = rscreen->tiling_info.num_channels;
+ unsigned num_pipes = rscreen->info.num_tile_pipes;
if (rscreen->chip_class <= EVERGREEN &&
rscreen->info.drm_major == 2 && rscreen->info.drm_minor < 26)
@@ -533,6 +533,10 @@ static unsigned r600_texture_get_htile_size(struct r600_common_screen *rscreen,
rscreen->info.drm_major == 2 && rscreen->info.drm_minor < 38)
return 0;
+ /* Overalign HTILE on Stoney to fix piglit/depthstencil-render-miplevels 585. */
+ if (rscreen->family == CHIP_STONEY)
+ num_pipes = 4;
+
switch (num_pipes) {
case 1:
cl_width = 32;
@@ -565,7 +569,7 @@ static unsigned r600_texture_get_htile_size(struct r600_common_screen *rscreen,
slice_elements = (width * height) / (8 * 8);
slice_bytes = slice_elements * 4;
- pipe_interleave_bytes = rscreen->tiling_info.group_bytes;
+ pipe_interleave_bytes = rscreen->info.pipe_interleave_bytes;
base_align = num_pipes * pipe_interleave_bytes;
rtex->htile.pitch = width;
@@ -1212,10 +1216,30 @@ static struct pipe_surface *r600_create_surface(struct pipe_context *pipe,
const struct pipe_surface *templ)
{
unsigned level = templ->u.tex.level;
+ unsigned width = u_minify(tex->width0, level);
+ unsigned height = u_minify(tex->height0, level);
+
+ if (tex->target != PIPE_BUFFER && templ->format != tex->format) {
+ const struct util_format_description *tex_desc
+ = util_format_description(tex->format);
+ const struct util_format_description *templ_desc
+ = util_format_description(templ->format);
+
+ assert(tex_desc->block.bits == templ_desc->block.bits);
+
+ /* Adjust size of surface if and only if the block width or
+ * height is changed. */
+ if (tex_desc->block.width != templ_desc->block.width ||
+ tex_desc->block.height != templ_desc->block.height) {
+ unsigned nblks_x = util_format_get_nblocksx(tex->format, width);
+ unsigned nblks_y = util_format_get_nblocksy(tex->format, height);
+
+ width = nblks_x * templ_desc->block.width;
+ height = nblks_y * templ_desc->block.height;
+ }
+ }
- return r600_create_surface_custom(pipe, tex, templ,
- u_minify(tex->width0, level),
- u_minify(tex->height0, level));
+ return r600_create_surface_custom(pipe, tex, templ, width, height);
}
static void r600_surface_destroy(struct pipe_context *pipe,
@@ -1388,7 +1412,6 @@ void evergreen_do_fast_color_clear(struct r600_common_context *rctx,
return;
for (i = 0; i < fb->nr_cbufs; i++) {
- struct r600_surface *surf;
struct r600_texture *tex;
unsigned clear_bit = PIPE_CLEAR_COLOR0 << i;
@@ -1399,7 +1422,6 @@ void evergreen_do_fast_color_clear(struct r600_common_context *rctx,
if (!(*buffers & clear_bit))
continue;
- surf = (struct r600_surface *)fb->cbufs[i];
tex = (struct r600_texture *)fb->cbufs[i]->texture;
/* 128-bit formats are unusupported */
@@ -1446,8 +1468,8 @@ void evergreen_do_fast_color_clear(struct r600_common_context *rctx,
if (clear_words_needed)
tex->dirty_level_mask |= 1 << fb->cbufs[i]->u.tex.level;
} else {
- /* RB+ doesn't work with CMASK fast clear. */
- if (surf->sx_ps_downconvert)
+ /* Stoney/RB+ doesn't work with CMASK fast clear. */
+ if (rctx->family == CHIP_STONEY)
continue;
/* ensure CMASK is enabled */
diff --git a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
index 76be37625f3..f5e3f6af1a0 100644
--- a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
+++ b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
@@ -1452,6 +1452,74 @@ static void emit_minmax_int(const struct lp_build_tgsi_action *action,
emit_data->args[1], "");
}
+static void pk2h_fetch_args(struct lp_build_tgsi_context * bld_base,
+ struct lp_build_emit_data * emit_data)
+{
+ emit_data->args[0] = lp_build_emit_fetch(bld_base, emit_data->inst,
+ 0, TGSI_CHAN_X);
+ emit_data->args[1] = lp_build_emit_fetch(bld_base, emit_data->inst,
+ 0, TGSI_CHAN_Y);
+}
+
+static void emit_pk2h(const struct lp_build_tgsi_action *action,
+ struct lp_build_tgsi_context *bld_base,
+ struct lp_build_emit_data *emit_data)
+{
+ LLVMBuilderRef builder = bld_base->base.gallivm->builder;
+ LLVMContextRef context = bld_base->base.gallivm->context;
+ struct lp_build_context *uint_bld = &bld_base->uint_bld;
+ LLVMTypeRef fp16, i16;
+ LLVMValueRef const16, comp[2];
+ unsigned i;
+
+ fp16 = LLVMHalfTypeInContext(context);
+ i16 = LLVMInt16TypeInContext(context);
+ const16 = lp_build_const_int32(uint_bld->gallivm, 16);
+
+ for (i = 0; i < 2; i++) {
+ comp[i] = LLVMBuildFPTrunc(builder, emit_data->args[i], fp16, "");
+ comp[i] = LLVMBuildBitCast(builder, comp[i], i16, "");
+ comp[i] = LLVMBuildZExt(builder, comp[i], uint_bld->elem_type, "");
+ }
+
+ comp[1] = LLVMBuildShl(builder, comp[1], const16, "");
+ comp[0] = LLVMBuildOr(builder, comp[0], comp[1], "");
+
+ emit_data->output[emit_data->chan] = comp[0];
+}
+
+static void up2h_fetch_args(struct lp_build_tgsi_context * bld_base,
+ struct lp_build_emit_data * emit_data)
+{
+ emit_data->args[0] = lp_build_emit_fetch(bld_base, emit_data->inst,
+ 0, TGSI_CHAN_X);
+}
+
+static void emit_up2h(const struct lp_build_tgsi_action *action,
+ struct lp_build_tgsi_context *bld_base,
+ struct lp_build_emit_data *emit_data)
+{
+ LLVMBuilderRef builder = bld_base->base.gallivm->builder;
+ LLVMContextRef context = bld_base->base.gallivm->context;
+ struct lp_build_context *uint_bld = &bld_base->uint_bld;
+ LLVMTypeRef fp16, i16;
+ LLVMValueRef const16, input, val;
+ unsigned i;
+
+ fp16 = LLVMHalfTypeInContext(context);
+ i16 = LLVMInt16TypeInContext(context);
+ const16 = lp_build_const_int32(uint_bld->gallivm, 16);
+ input = emit_data->args[0];
+
+ for (i = 0; i < 2; i++) {
+ val = i == 1 ? LLVMBuildLShr(builder, input, const16, "") : input;
+ val = LLVMBuildTrunc(builder, val, i16, "");
+ val = LLVMBuildBitCast(builder, val, fp16, "");
+ emit_data->output[i] =
+ LLVMBuildFPExt(builder, val, bld_base->base.elem_type, "");
+ }
+}
+
void radeon_llvm_context_init(struct radeon_llvm_context * ctx)
{
struct lp_type type;
@@ -1581,6 +1649,8 @@ void radeon_llvm_context_init(struct radeon_llvm_context * ctx)
bld_base->op_actions[TGSI_OPCODE_UMSB].emit = emit_umsb;
bld_base->op_actions[TGSI_OPCODE_NOT].emit = emit_not;
bld_base->op_actions[TGSI_OPCODE_OR].emit = emit_or;
+ bld_base->op_actions[TGSI_OPCODE_PK2H].fetch_args = pk2h_fetch_args;
+ bld_base->op_actions[TGSI_OPCODE_PK2H].emit = emit_pk2h;
bld_base->op_actions[TGSI_OPCODE_POPC].emit = build_tgsi_intrinsic_nomem;
bld_base->op_actions[TGSI_OPCODE_POPC].intr_name = "llvm.ctpop.i32";
bld_base->op_actions[TGSI_OPCODE_POW].emit = build_tgsi_intrinsic_nomem;
@@ -1618,6 +1688,8 @@ void radeon_llvm_context_init(struct radeon_llvm_context * ctx)
bld_base->op_actions[TGSI_OPCODE_U2F].emit = emit_u2f;
bld_base->op_actions[TGSI_OPCODE_XOR].emit = emit_xor;
bld_base->op_actions[TGSI_OPCODE_UCMP].emit = emit_ucmp;
+ bld_base->op_actions[TGSI_OPCODE_UP2H].fetch_args = up2h_fetch_args;
+ bld_base->op_actions[TGSI_OPCODE_UP2H].emit = emit_up2h;
}
void radeon_llvm_create_func(struct radeon_llvm_context * ctx,
@@ -1638,11 +1710,9 @@ void radeon_llvm_create_func(struct radeon_llvm_context * ctx,
void radeon_llvm_finalize_module(struct radeon_llvm_context * ctx)
{
struct gallivm_state * gallivm = ctx->soa.bld_base.base.gallivm;
- /* End the main function with Return*/
- LLVMBuildRetVoid(gallivm->builder);
/* Create the pass manager */
- ctx->gallivm.passmgr = LLVMCreateFunctionPassManagerForModule(
+ gallivm->passmgr = LLVMCreateFunctionPassManagerForModule(
gallivm->module);
/* This pass should eliminate all the load and store instructions */
diff --git a/src/gallium/drivers/radeon/radeon_winsys.h b/src/gallium/drivers/radeon/radeon_winsys.h
index 2e5caa67d10..7329ceedf04 100644
--- a/src/gallium/drivers/radeon/radeon_winsys.h
+++ b/src/gallium/drivers/radeon/radeon_winsys.h
@@ -245,46 +245,49 @@ struct radeon_winsys_cs {
};
struct radeon_info {
+ /* Device info. */
uint32_t pci_id;
enum radeon_family family;
enum chip_class chip_class;
uint64_t gart_size;
uint64_t vram_size;
- uint32_t max_sclk;
- uint32_t num_good_compute_units;
- uint32_t max_se;
- uint32_t max_sh_per_se;
+ boolean has_virtual_memory;
+ bool gfx_ib_pad_with_type2;
+ boolean has_sdma;
+ boolean has_uvd;
+ uint32_t vce_fw_version;
+ uint32_t vce_harvest_config;
+ uint32_t clock_crystal_freq;
+ /* Kernel info. */
uint32_t drm_major; /* version */
uint32_t drm_minor;
uint32_t drm_patchlevel;
-
- boolean has_uvd;
- uint32_t vce_fw_version;
boolean has_userptr;
- bool gfx_ib_pad_with_type2;
+ /* Shader cores. */
+ uint32_t r600_max_quad_pipes; /* wave size / 16 */
+ uint32_t max_shader_clock;
+ uint32_t num_good_compute_units;
+ uint32_t max_se; /* shader engines */
+ uint32_t max_sh_per_se; /* shader arrays per shader engine */
+
+ /* Render backends (color + depth blocks). */
uint32_t r300_num_gb_pipes;
uint32_t r300_num_z_pipes;
-
- uint32_t r600_num_backends;
- uint32_t r600_clock_crystal_freq;
- uint32_t r600_tiling_config;
- uint32_t r600_num_tile_pipes;
- uint32_t r600_max_pipes;
- boolean r600_virtual_address;
- boolean r600_has_dma;
-
- uint32_t r600_backend_map;
- boolean r600_backend_map_valid;
-
+ uint32_t r600_gb_backend_map; /* R600 harvest config */
+ boolean r600_gb_backend_map_valid;
+ uint32_t r600_num_banks;
+ uint32_t num_render_backends;
+ uint32_t num_tile_pipes; /* pipe count from PIPE_CONFIG */
+ uint32_t pipe_interleave_bytes;
+ uint32_t enabled_rb_mask; /* GCN harvest config */
+
+ /* Tile modes. */
boolean si_tile_mode_array_valid;
uint32_t si_tile_mode_array[32];
- uint32_t si_backend_enabled_mask;
-
boolean cik_macrotile_mode_array_valid;
uint32_t cik_macrotile_mode_array[16];
- uint32_t vce_harvest_config;
};
enum radeon_feature_id {
diff --git a/src/gallium/drivers/radeonsi/cik_sdma.c b/src/gallium/drivers/radeonsi/cik_sdma.c
index 105a1b2a878..76913914b38 100644
--- a/src/gallium/drivers/radeonsi/cik_sdma.c
+++ b/src/gallium/drivers/radeonsi/cik_sdma.c
@@ -308,7 +308,7 @@ void cik_sdma_copy(struct pipe_context *ctx,
}
mtilew = (8 * rsrc->surface.bankw *
- sctx->screen->b.tiling_info.num_channels) *
+ sctx->screen->b.info.num_tile_pipes) *
rsrc->surface.mtilea;
assert(!(mtilew & (mtilew - 1)));
mtileh = (8 * rsrc->surface.bankh * num_banks) /
diff --git a/src/gallium/drivers/radeonsi/si_compute.c b/src/gallium/drivers/radeonsi/si_compute.c
index 6ef6eeec178..825fbb181ba 100644
--- a/src/gallium/drivers/radeonsi/si_compute.c
+++ b/src/gallium/drivers/radeonsi/si_compute.c
@@ -461,9 +461,6 @@ static void si_delete_compute_state(struct pipe_context *ctx, void* state){
LLVMContextDispose(program->llvm_ctx);
}
#else
- FREE(program->shader.binary.config);
- FREE(program->shader.binary.rodata);
- FREE(program->shader.binary.global_symbol_offsets);
si_shader_destroy(&program->shader);
#endif
diff --git a/src/gallium/drivers/radeonsi/si_hw_context.c b/src/gallium/drivers/radeonsi/si_hw_context.c
index baa02293c41..d60c4515625 100644
--- a/src/gallium/drivers/radeonsi/si_hw_context.c
+++ b/src/gallium/drivers/radeonsi/si_hw_context.c
@@ -177,7 +177,7 @@ void si_begin_new_cs(struct si_context *ctx)
si_mark_atom_dirty(ctx, &ctx->msaa_sample_locs);
si_mark_atom_dirty(ctx, &ctx->msaa_config);
si_mark_atom_dirty(ctx, &ctx->sample_mask.atom);
- si_mark_atom_dirty(ctx, &ctx->cb_target_mask);
+ si_mark_atom_dirty(ctx, &ctx->cb_render_state);
si_mark_atom_dirty(ctx, &ctx->blend_color.atom);
si_mark_atom_dirty(ctx, &ctx->db_render_state);
si_mark_atom_dirty(ctx, &ctx->stencil_ref.atom);
diff --git a/src/gallium/drivers/radeonsi/si_perfcounter.c b/src/gallium/drivers/radeonsi/si_perfcounter.c
index 7ee1daee7bf..24855e4e6f2 100644
--- a/src/gallium/drivers/radeonsi/si_perfcounter.c
+++ b/src/gallium/drivers/radeonsi/si_perfcounter.c
@@ -56,6 +56,8 @@ enum si_pc_reg_layout {
/* Registers are laid out in decreasing rather than increasing order. */
SI_PC_REG_REVERSE = 4,
+
+ SI_PC_FAKE = 8,
};
struct si_pc_block_base {
@@ -79,6 +81,23 @@ struct si_pc_block {
unsigned instances;
};
+/* The order is chosen to be compatible with GPUPerfStudio's hardcoding of
+ * performance counter group IDs.
+ */
+static const char * const si_pc_shader_type_suffixes[] = {
+ "", "_ES", "_GS", "_VS", "_PS", "_LS", "_HS", "_CS"
+};
+
+static const unsigned si_pc_shader_type_bits[] = {
+ 0x7f,
+ S_036780_ES_EN(1),
+ S_036780_GS_EN(1),
+ S_036780_VS_EN(1),
+ S_036780_PS_EN(1),
+ S_036780_LS_EN(1),
+ S_036780_HS_EN(1),
+ S_036780_CS_EN(1),
+};
static struct si_pc_block_base cik_CB = {
.name = "CB",
@@ -308,56 +327,80 @@ static struct si_pc_block_base cik_WD = {
.counter0_lo = R_034200_WD_PERFCOUNTER0_LO,
};
+static struct si_pc_block_base cik_MC = {
+ .name = "MC",
+ .num_counters = 4,
+
+ .layout = SI_PC_FAKE,
+};
+
+static struct si_pc_block_base cik_SRBM = {
+ .name = "SRBM",
+ .num_counters = 2,
+
+ .layout = SI_PC_FAKE,
+};
+
/* Both the number of instances and selectors varies between chips of the same
* class. We only differentiate by class here and simply expose the maximum
* number over all chips in a class.
+ *
+ * Unfortunately, GPUPerfStudio uses the order of performance counter groups
+ * blindly once it believes it has identified the hardware, so the order of
+ * blocks here matters.
*/
static struct si_pc_block groups_CIK[] = {
{ &cik_CB, 226, 4 },
- { &cik_CPC, 22 },
{ &cik_CPF, 17 },
- { &cik_CPG, 46 },
{ &cik_DB, 257, 4 },
- { &cik_GDS, 121 },
{ &cik_GRBM, 34 },
{ &cik_GRBMSE, 15 },
- { &cik_IA, 22 },
- { &cik_PA_SC, 395 },
{ &cik_PA_SU, 153 },
+ { &cik_PA_SC, 395 },
{ &cik_SPI, 186 },
{ &cik_SQ, 252 },
{ &cik_SX, 32 },
{ &cik_TA, 111, 11 },
{ &cik_TCA, 39, 2 },
{ &cik_TCC, 160, 16 },
- { &cik_TCP, 154, 11 },
{ &cik_TD, 55, 11 },
+ { &cik_TCP, 154, 11 },
+ { &cik_GDS, 121 },
{ &cik_VGT, 140 },
+ { &cik_IA, 22 },
+ { &cik_MC, 22 },
+ { &cik_SRBM, 19 },
{ &cik_WD, 22 },
+ { &cik_CPG, 46 },
+ { &cik_CPC, 22 },
+
};
static struct si_pc_block groups_VI[] = {
{ &cik_CB, 396, 4 },
- { &cik_CPC, 24 },
{ &cik_CPF, 19 },
- { &cik_CPG, 48 },
{ &cik_DB, 257, 4 },
- { &cik_GDS, 121 },
{ &cik_GRBM, 34 },
{ &cik_GRBMSE, 15 },
- { &cik_IA, 24 },
- { &cik_PA_SC, 397 },
{ &cik_PA_SU, 153 },
+ { &cik_PA_SC, 397 },
{ &cik_SPI, 197 },
{ &cik_SQ, 273 },
{ &cik_SX, 34 },
{ &cik_TA, 119, 16 },
{ &cik_TCA, 35, 2 },
{ &cik_TCC, 192, 16 },
- { &cik_TCP, 180, 16 },
{ &cik_TD, 55, 16 },
+ { &cik_TCP, 180, 16 },
+ { &cik_GDS, 121 },
{ &cik_VGT, 147 },
+ { &cik_IA, 24 },
+ { &cik_MC, 22 },
+ { &cik_SRBM, 27 },
{ &cik_WD, 37 },
+ { &cik_CPG, 48 },
+ { &cik_CPC, 24 },
+
};
static void si_pc_get_size(struct r600_perfcounter_block *group,
@@ -368,7 +411,9 @@ static void si_pc_get_size(struct r600_perfcounter_block *group,
struct si_pc_block_base *regs = sigroup->b;
unsigned layout_multi = regs->layout & SI_PC_MULTI_MASK;
- if (layout_multi == SI_PC_MULTI_BLOCK) {
+ if (regs->layout & SI_PC_FAKE) {
+ *num_select_dw = 0;
+ } else if (layout_multi == SI_PC_MULTI_BLOCK) {
if (count < regs->num_multi)
*num_select_dw = 2 * (count + 2) + regs->num_prelude;
else
@@ -431,6 +476,9 @@ static void si_pc_emit_select(struct r600_common_context *ctx,
assert(count <= regs->num_counters);
+ if (regs->layout & SI_PC_FAKE)
+ return;
+
if (layout_multi == SI_PC_MULTI_BLOCK) {
assert(!(regs->layout & SI_PC_REG_REVERSE));
@@ -590,22 +638,35 @@ static void si_pc_emit_read(struct r600_common_context *ctx,
unsigned reg = regs->counter0_lo;
unsigned reg_delta = 8;
- if (regs->layout & SI_PC_REG_REVERSE)
- reg_delta = -reg_delta;
+ if (!(regs->layout & SI_PC_FAKE)) {
+ if (regs->layout & SI_PC_REG_REVERSE)
+ reg_delta = -reg_delta;
- for (idx = 0; idx < count; ++idx) {
- if (regs->counters)
- reg = regs->counters[idx];
-
- radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0));
- radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_PERF) |
- COPY_DATA_DST_SEL(COPY_DATA_MEM));
- radeon_emit(cs, reg >> 2);
- radeon_emit(cs, 0); /* unused */
- radeon_emit(cs, va);
- radeon_emit(cs, va >> 32);
- va += 4;
- reg += reg_delta;
+ for (idx = 0; idx < count; ++idx) {
+ if (regs->counters)
+ reg = regs->counters[idx];
+
+ radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0));
+ radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_PERF) |
+ COPY_DATA_DST_SEL(COPY_DATA_MEM));
+ radeon_emit(cs, reg >> 2);
+ radeon_emit(cs, 0); /* unused */
+ radeon_emit(cs, va);
+ radeon_emit(cs, va >> 32);
+ va += 4;
+ reg += reg_delta;
+ }
+ } else {
+ for (idx = 0; idx < count; ++idx) {
+ radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0));
+ radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_IMM) |
+ COPY_DATA_DST_SEL(COPY_DATA_MEM));
+ radeon_emit(cs, 0); /* immediate */
+ radeon_emit(cs, 0); /* unused */
+ radeon_emit(cs, va);
+ radeon_emit(cs, va >> 32);
+ va += 4;
+ }
}
}
@@ -656,6 +717,10 @@ void si_init_perfcounters(struct si_screen *screen)
pc->num_stop_cs_dwords += 6;
}
+ pc->num_shader_types = ARRAY_SIZE(si_pc_shader_type_bits);
+ pc->shader_type_suffixes = si_pc_shader_type_suffixes;
+ pc->shader_type_bits = si_pc_shader_type_bits;
+
pc->get_size = si_pc_get_size;
pc->emit_instance = si_pc_emit_instance;
pc->emit_shaders = si_pc_emit_shaders;
diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c
index 0c1ae90f9da..61ce976c32c 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.c
+++ b/src/gallium/drivers/radeonsi/si_pipe.c
@@ -215,7 +215,11 @@ static struct pipe_context *si_create_context(struct pipe_screen *screen,
r600_target = radeon_llvm_get_r600_target(triple);
sctx->tm = LLVMCreateTargetMachine(r600_target, triple,
r600_get_llvm_processor_name(sscreen->b.family),
- "+DumpCode,+vgpr-spilling",
+#if HAVE_LLVM >= 0x0308
+ sscreen->b.debug_flags & DBG_SI_SCHED ?
+ "+DumpCode,+vgpr-spilling,+si-scheduler" :
+#endif
+ "+DumpCode,+vgpr-spilling",
LLVMCodeGenLevelDefault,
LLVMRelocDefault,
LLVMCodeModelDefault);
@@ -304,6 +308,8 @@ static int si_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
case PIPE_CAP_TGSI_FS_POSITION_IS_SYSVAL:
case PIPE_CAP_TGSI_FS_FACE_IS_INTEGER_SYSVAL:
case PIPE_CAP_INVALIDATE_BUFFER:
+ case PIPE_CAP_SURFACE_REINTERPRET_BLOCKS:
+ case PIPE_CAP_QUERY_MEMORY_INFO:
return 1;
case PIPE_CAP_RESOURCE_FROM_USER_MEMORY:
@@ -329,12 +335,18 @@ static int si_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
case PIPE_CAP_MAX_TEXTURE_GATHER_COMPONENTS:
return 4;
+ case PIPE_CAP_TGSI_PACK_HALF_FLOAT:
+ return HAVE_LLVM >= 0x0306;
+
case PIPE_CAP_GLSL_FEATURE_LEVEL:
return HAVE_LLVM >= 0x0307 ? 410 : 330;
case PIPE_CAP_MAX_TEXTURE_BUFFER_SIZE:
return MIN2(sscreen->b.info.vram_size, 0xFFFFFFFF);
+ case PIPE_CAP_BUFFER_SAMPLER_VIEW_RGBA_ONLY:
+ return 0;
+
/* Unsupported features. */
case PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT:
case PIPE_CAP_TGSI_CAN_COMPACT_CONSTANTS:
@@ -344,12 +356,12 @@ static int si_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
case PIPE_CAP_VERTEXID_NOBASE:
case PIPE_CAP_CLEAR_TEXTURE:
case PIPE_CAP_DRAW_PARAMETERS:
- case PIPE_CAP_TGSI_PACK_HALF_FLOAT:
case PIPE_CAP_MULTI_DRAW_INDIRECT:
case PIPE_CAP_MULTI_DRAW_INDIRECT_PARAMS:
case PIPE_CAP_SHADER_BUFFER_OFFSET_ALIGNMENT:
case PIPE_CAP_GENERATE_MIPMAP:
case PIPE_CAP_STRING_MARKER:
+ case PIPE_CAP_QUERY_BUFFER_OBJECT:
return 0;
case PIPE_CAP_MAX_SHADER_PATCH_VARYINGS:
@@ -399,7 +411,7 @@ static int si_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
/* Timer queries, present when the clock frequency is non zero. */
case PIPE_CAP_QUERY_TIMESTAMP:
case PIPE_CAP_QUERY_TIME_ELAPSED:
- return sscreen->b.info.r600_clock_crystal_freq != 0;
+ return sscreen->b.info.clock_crystal_freq != 0;
case PIPE_CAP_MIN_TEXTURE_GATHER_OFFSET:
case PIPE_CAP_MIN_TEXEL_OFFSET:
@@ -541,57 +553,6 @@ static void si_destroy_screen(struct pipe_screen* pscreen)
r600_destroy_common_screen(&sscreen->b);
}
-#define SI_TILE_MODE_COLOR_2D_8BPP 14
-
-/* Initialize pipe config. This is especially important for GPUs
- * with 16 pipes and more where it's initialized incorrectly by
- * the TILING_CONFIG ioctl. */
-static bool si_initialize_pipe_config(struct si_screen *sscreen)
-{
- unsigned mode2d;
-
- /* This is okay, because there can be no 2D tiling without
- * the tile mode array, so we won't need the pipe config.
- * Return "success".
- */
- if (!sscreen->b.info.si_tile_mode_array_valid)
- return true;
-
- /* The same index is used for the 2D mode on CIK too. */
- mode2d = sscreen->b.info.si_tile_mode_array[SI_TILE_MODE_COLOR_2D_8BPP];
-
- switch (G_009910_PIPE_CONFIG(mode2d)) {
- case V_02803C_ADDR_SURF_P2:
- sscreen->b.tiling_info.num_channels = 2;
- break;
- case V_02803C_X_ADDR_SURF_P4_8X16:
- case V_02803C_X_ADDR_SURF_P4_16X16:
- case V_02803C_X_ADDR_SURF_P4_16X32:
- case V_02803C_X_ADDR_SURF_P4_32X32:
- sscreen->b.tiling_info.num_channels = 4;
- break;
- case V_02803C_X_ADDR_SURF_P8_16X16_8X16:
- case V_02803C_X_ADDR_SURF_P8_16X32_8X16:
- case V_02803C_X_ADDR_SURF_P8_32X32_8X16:
- case V_02803C_X_ADDR_SURF_P8_16X32_16X16:
- case V_02803C_X_ADDR_SURF_P8_32X32_16X16:
- case V_02803C_X_ADDR_SURF_P8_32X32_16X32:
- case V_02803C_X_ADDR_SURF_P8_32X64_32X32:
- sscreen->b.tiling_info.num_channels = 8;
- break;
- case V_02803C_X_ADDR_SURF_P16_32X32_8X16:
- case V_02803C_X_ADDR_SURF_P16_32X32_16X16:
- sscreen->b.tiling_info.num_channels = 16;
- break;
- default:
- assert(0);
- fprintf(stderr, "radeonsi: Unknown pipe config %i.\n",
- G_009910_PIPE_CONFIG(mode2d));
- return false;
- }
- return true;
-}
-
static bool si_init_gs_info(struct si_screen *sscreen)
{
switch (sscreen->b.family) {
@@ -636,7 +597,6 @@ struct pipe_screen *radeonsi_screen_create(struct radeon_winsys *ws)
sscreen->b.b.resource_create = r600_resource_create_common;
if (!r600_common_screen_init(&sscreen->b, ws) ||
- !si_initialize_pipe_config(sscreen) ||
!si_init_gs_info(sscreen)) {
FREE(sscreen);
return NULL;
diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h
index e2725fe3679..48947442757 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@@ -193,7 +193,7 @@ struct si_context {
struct r600_atom db_render_state;
struct r600_atom msaa_config;
struct si_sample_mask sample_mask;
- struct r600_atom cb_target_mask;
+ struct r600_atom cb_render_state;
struct si_blend_color blend_color;
struct r600_atom clip_regs;
struct si_clip_state clip_state;
diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
index 94c1129c88d..d9ed6b234e0 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -4074,7 +4074,7 @@ void si_shader_dump(struct si_screen *sscreen, struct si_shader *shader,
si_shader_dump_disassembly(&shader->binary, debug);
si_shader_dump_stats(sscreen, &shader->config,
- shader->selector->info.num_inputs,
+ shader->selector ? shader->selector->info.num_inputs : 0,
shader->binary.code_size, debug, processor);
}
@@ -4092,7 +4092,7 @@ int si_compile_llvm(struct si_screen *sscreen,
if (r600_can_dump_shader(&sscreen->b, processor)) {
fprintf(stderr, "radeonsi: Compiling shader %d\n", count);
- if (!(sscreen->b.debug_flags & DBG_NO_IR))
+ if (!(sscreen->b.debug_flags & (DBG_NO_IR | DBG_PREOPT_IR)))
LLVMDumpModule(mod);
}
@@ -4177,6 +4177,13 @@ static int si_generate_gs_copy_shader(struct si_screen *sscreen,
si_llvm_export_vs(bld_base, outputs, gsinfo->num_outputs);
+ LLVMBuildRetVoid(bld_base->base.gallivm->builder);
+
+ /* Dump LLVM IR before any optimization passes */
+ if (sscreen->b.debug_flags & DBG_PREOPT_IR &&
+ r600_can_dump_shader(&sscreen->b, TGSI_PROCESSOR_GEOMETRY))
+ LLVMDumpModule(bld_base->base.gallivm->module);
+
radeon_llvm_finalize_module(&si_shader_ctx->radeon_bld);
if (dump)
@@ -4383,9 +4390,16 @@ int si_shader_create(struct si_screen *sscreen, LLVMTargetMachineRef tm,
goto out;
}
+ LLVMBuildRetVoid(bld_base->base.gallivm->builder);
+ mod = bld_base->base.gallivm->module;
+
+ /* Dump LLVM IR before any optimization passes */
+ if (sscreen->b.debug_flags & DBG_PREOPT_IR &&
+ r600_can_dump_shader(&sscreen->b, si_shader_ctx.type))
+ LLVMDumpModule(mod);
+
radeon_llvm_finalize_module(&si_shader_ctx.radeon_bld);
- mod = bld_base->base.gallivm->module;
r = si_compile_llvm(sscreen, &shader->binary, &shader->config, tm,
mod, debug, si_shader_ctx.type);
if (r) {
@@ -4423,14 +4437,6 @@ out:
return r;
}
-void si_shader_destroy_binary(struct radeon_shader_binary *binary)
-{
- FREE(binary->code);
- FREE(binary->rodata);
- FREE(binary->relocs);
- FREE(binary->disasm_string);
-}
-
void si_shader_destroy(struct si_shader *shader)
{
if (shader->gs_copy_shader) {
@@ -4442,5 +4448,6 @@ void si_shader_destroy(struct si_shader *shader)
r600_resource_reference(&shader->scratch_bo, NULL);
r600_resource_reference(&shader->bo, NULL);
- si_shader_destroy_binary(&shader->binary);
+
+ radeon_shader_binary_clean(&shader->binary);
}
diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h
index c1512078a18..98bdb890a45 100644
--- a/src/gallium/drivers/radeonsi/si_shader.h
+++ b/src/gallium/drivers/radeonsi/si_shader.h
@@ -345,7 +345,6 @@ int si_compile_llvm(struct si_screen *sscreen,
struct pipe_debug_callback *debug,
unsigned processor);
void si_shader_destroy(struct si_shader *shader);
-void si_shader_destroy_binary(struct radeon_shader_binary *binary);
unsigned si_shader_io_get_unique_index(unsigned semantic_name, unsigned index);
int si_shader_binary_upload(struct si_screen *sscreen, struct si_shader *shader);
void si_shader_dump(struct si_screen *sscreen, struct si_shader *shader,
diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c
index 9e0ccfc5dde..bf780777b50 100644
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -97,7 +97,7 @@ uint32_t si_num_banks(struct si_screen *sscreen, struct r600_texture *tex)
}
/* The old way. */
- switch (sscreen->b.tiling_info.num_banks) {
+ switch (sscreen->b.info.r600_num_banks) {
case 2:
return V_02803C_ADDR_SURF_2_BANK;
case 4:
@@ -189,14 +189,14 @@ unsigned cik_db_pipe_config(struct si_screen *sscreen, unsigned tile_mode)
/* This is probably broken for a lot of chips, but it's only used
* if the kernel cannot return the tile mode array for CIK. */
- switch (sscreen->b.info.r600_num_tile_pipes) {
+ switch (sscreen->b.info.num_tile_pipes) {
case 16:
return V_02803C_X_ADDR_SURF_P16_32X32_16X16;
case 8:
return V_02803C_X_ADDR_SURF_P8_32X32_16X16;
case 4:
default:
- if (sscreen->b.info.r600_num_backends == 4)
+ if (sscreen->b.info.num_render_backends == 4)
return V_02803C_X_ADDR_SURF_P4_16X16;
else
return V_02803C_X_ADDR_SURF_P4_8X16;
@@ -238,7 +238,8 @@ static unsigned si_pack_float_12p4(float x)
/*
* Inferred framebuffer and blender state.
*
- * One of the reasons this must be derived from the framebuffer state is that:
+ * One of the reasons CB_TARGET_MASK must be derived from the framebuffer state
+ * is that:
* - The blend state mask is 0xf most of the time.
* - The COLOR1 format isn't INVALID because of possible dual-source blending,
* so COLOR1 is enabled pretty much all the time.
@@ -246,18 +247,18 @@ static unsigned si_pack_float_12p4(float x)
*
* Another reason is to avoid a hang with dual source blending.
*/
-static void si_emit_cb_target_mask(struct si_context *sctx, struct r600_atom *atom)
+static void si_emit_cb_render_state(struct si_context *sctx, struct r600_atom *atom)
{
struct radeon_winsys_cs *cs = sctx->b.gfx.cs;
struct si_state_blend *blend = sctx->queued.named.blend;
- uint32_t mask = 0, i;
+ uint32_t cb_target_mask = 0, i;
for (i = 0; i < sctx->framebuffer.state.nr_cbufs; i++)
if (sctx->framebuffer.state.cbufs[i])
- mask |= 0xf << (4*i);
+ cb_target_mask |= 0xf << (4*i);
if (blend)
- mask &= blend->cb_target_mask;
+ cb_target_mask &= blend->cb_target_mask;
/* Avoid a hang that happens when dual source blending is enabled
* but there is not enough color outputs. This is undefined behavior,
@@ -268,9 +269,146 @@ static void si_emit_cb_target_mask(struct si_context *sctx, struct r600_atom *at
if (blend && blend->dual_src_blend &&
sctx->ps_shader.cso &&
(sctx->ps_shader.cso->info.colors_written & 0x3) != 0x3)
- mask = 0;
+ cb_target_mask = 0;
- radeon_set_context_reg(cs, R_028238_CB_TARGET_MASK, mask);
+ radeon_set_context_reg(cs, R_028238_CB_TARGET_MASK, cb_target_mask);
+
+ /* STONEY-specific register settings. */
+ if (sctx->b.family == CHIP_STONEY) {
+ unsigned spi_shader_col_format =
+ sctx->ps_shader.cso ?
+ sctx->ps_shader.current->key.ps.spi_shader_col_format : 0;
+ unsigned sx_ps_downconvert = 0;
+ unsigned sx_blend_opt_epsilon = 0;
+ unsigned sx_blend_opt_control = 0;
+
+ for (i = 0; i < sctx->framebuffer.state.nr_cbufs; i++) {
+ struct r600_surface *surf =
+ (struct r600_surface*)sctx->framebuffer.state.cbufs[i];
+ unsigned format, swap, spi_format, colormask;
+ bool has_alpha, has_rgb;
+
+ if (!surf)
+ continue;
+
+ format = G_028C70_FORMAT(surf->cb_color_info);
+ swap = G_028C70_COMP_SWAP(surf->cb_color_info);
+ spi_format = (spi_shader_col_format >> (i * 4)) & 0xf;
+ colormask = (cb_target_mask >> (i * 4)) & 0xf;
+
+ /* Set if RGB and A are present. */
+ has_alpha = !G_028C74_FORCE_DST_ALPHA_1(surf->cb_color_attrib);
+
+ if (format == V_028C70_COLOR_8 ||
+ format == V_028C70_COLOR_16 ||
+ format == V_028C70_COLOR_32)
+ has_rgb = !has_alpha;
+ else
+ has_rgb = true;
+
+ /* Check the colormask and export format. */
+ if (!(colormask & (PIPE_MASK_RGBA & ~PIPE_MASK_A)))
+ has_rgb = false;
+ if (!(colormask & PIPE_MASK_A))
+ has_alpha = false;
+
+ if (spi_format == V_028714_SPI_SHADER_ZERO) {
+ has_rgb = false;
+ has_alpha = false;
+ }
+
+ /* Disable value checking for disabled channels. */
+ if (!has_rgb)
+ sx_blend_opt_control |= S_02875C_MRT0_COLOR_OPT_DISABLE(1) << (i * 4);
+ if (!has_alpha)
+ sx_blend_opt_control |= S_02875C_MRT0_ALPHA_OPT_DISABLE(1) << (i * 4);
+
+ /* Enable down-conversion for 32bpp and smaller formats. */
+ switch (format) {
+ case V_028C70_COLOR_8:
+ case V_028C70_COLOR_8_8:
+ case V_028C70_COLOR_8_8_8_8:
+ /* For 1 and 2-channel formats, use the superset thereof. */
+ if (spi_format == V_028714_SPI_SHADER_FP16_ABGR ||
+ spi_format == V_028714_SPI_SHADER_UINT16_ABGR ||
+ spi_format == V_028714_SPI_SHADER_SINT16_ABGR) {
+ sx_ps_downconvert |= V_028754_SX_RT_EXPORT_8_8_8_8 << (i * 4);
+ sx_blend_opt_epsilon |= V_028758_8BIT_FORMAT << (i * 4);
+ }
+ break;
+
+ case V_028C70_COLOR_5_6_5:
+ if (spi_format == V_028714_SPI_SHADER_FP16_ABGR) {
+ sx_ps_downconvert |= V_028754_SX_RT_EXPORT_5_6_5 << (i * 4);
+ sx_blend_opt_epsilon |= V_028758_6BIT_FORMAT << (i * 4);
+ }
+ break;
+
+ case V_028C70_COLOR_1_5_5_5:
+ if (spi_format == V_028714_SPI_SHADER_FP16_ABGR) {
+ sx_ps_downconvert |= V_028754_SX_RT_EXPORT_1_5_5_5 << (i * 4);
+ sx_blend_opt_epsilon |= V_028758_5BIT_FORMAT << (i * 4);
+ }
+ break;
+
+ case V_028C70_COLOR_4_4_4_4:
+ if (spi_format == V_028714_SPI_SHADER_FP16_ABGR) {
+ sx_ps_downconvert |= V_028754_SX_RT_EXPORT_4_4_4_4 << (i * 4);
+ sx_blend_opt_epsilon |= V_028758_4BIT_FORMAT << (i * 4);
+ }
+ break;
+
+ case V_028C70_COLOR_32:
+ if (swap == V_0280A0_SWAP_STD &&
+ spi_format == V_028714_SPI_SHADER_32_R)
+ sx_ps_downconvert |= V_028754_SX_RT_EXPORT_32_R << (i * 4);
+ else if (swap == V_0280A0_SWAP_ALT_REV &&
+ spi_format == V_028714_SPI_SHADER_32_AR)
+ sx_ps_downconvert |= V_028754_SX_RT_EXPORT_32_A << (i * 4);
+ break;
+
+ case V_028C70_COLOR_16:
+ case V_028C70_COLOR_16_16:
+ /* For 1-channel formats, use the superset thereof. */
+ if (spi_format == V_028714_SPI_SHADER_UNORM16_ABGR ||
+ spi_format == V_028714_SPI_SHADER_SNORM16_ABGR ||
+ spi_format == V_028714_SPI_SHADER_UINT16_ABGR ||
+ spi_format == V_028714_SPI_SHADER_SINT16_ABGR) {
+ if (swap == V_0280A0_SWAP_STD ||
+ swap == V_0280A0_SWAP_STD_REV)
+ sx_ps_downconvert |= V_028754_SX_RT_EXPORT_16_16_GR << (i * 4);
+ else
+ sx_ps_downconvert |= V_028754_SX_RT_EXPORT_16_16_AR << (i * 4);
+ }
+ break;
+
+ case V_028C70_COLOR_10_11_11:
+ if (spi_format == V_028714_SPI_SHADER_FP16_ABGR) {
+ sx_ps_downconvert |= V_028754_SX_RT_EXPORT_10_11_11 << (i * 4);
+ sx_blend_opt_epsilon |= V_028758_11BIT_FORMAT << (i * 4);
+ }
+ break;
+
+ case V_028C70_COLOR_2_10_10_10:
+ if (spi_format == V_028714_SPI_SHADER_FP16_ABGR) {
+ sx_ps_downconvert |= V_028754_SX_RT_EXPORT_2_10_10_10 << (i * 4);
+ sx_blend_opt_epsilon |= V_028758_10BIT_FORMAT << (i * 4);
+ }
+ break;
+ }
+ }
+
+ if (sctx->screen->b.debug_flags & DBG_NO_RB_PLUS) {
+ sx_ps_downconvert = 0;
+ sx_blend_opt_epsilon = 0;
+ sx_blend_opt_control = 0;
+ }
+
+ radeon_set_context_reg_seq(cs, R_028754_SX_PS_DOWNCONVERT, 3);
+ radeon_emit(cs, sx_ps_downconvert); /* R_028754_SX_PS_DOWNCONVERT */
+ radeon_emit(cs, sx_blend_opt_epsilon); /* R_028758_SX_BLEND_OPT_EPSILON */
+ radeon_emit(cs, sx_blend_opt_control); /* R_02875C_SX_BLEND_OPT_CONTROL */
+ }
}
/*
@@ -390,6 +528,36 @@ static uint32_t si_translate_blend_opt_factor(int blend_fact, bool is_alpha)
}
}
+/**
+ * Get rid of DST in the blend factors by commuting the operands:
+ * func(src * DST, dst * 0) ---> func(src * 0, dst * SRC)
+ */
+static void si_blend_remove_dst(unsigned *func, unsigned *src_factor,
+ unsigned *dst_factor, unsigned expected_dst,
+ unsigned replacement_src)
+{
+ if (*src_factor == expected_dst &&
+ *dst_factor == PIPE_BLENDFACTOR_ZERO) {
+ *src_factor = PIPE_BLENDFACTOR_ZERO;
+ *dst_factor = replacement_src;
+
+ /* Commuting the operands requires reversing subtractions. */
+ if (*func == PIPE_BLEND_SUBTRACT)
+ *func = PIPE_BLEND_REVERSE_SUBTRACT;
+ else if (*func == PIPE_BLEND_REVERSE_SUBTRACT)
+ *func = PIPE_BLEND_SUBTRACT;
+ }
+}
+
+static bool si_blend_factor_uses_dst(unsigned factor)
+{
+ return factor == PIPE_BLENDFACTOR_DST_COLOR ||
+ factor == PIPE_BLENDFACTOR_DST_ALPHA ||
+ factor == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE ||
+ factor == PIPE_BLENDFACTOR_INV_DST_ALPHA ||
+ factor == PIPE_BLENDFACTOR_INV_DST_COLOR;
+}
+
static void *si_create_blend_state_mode(struct pipe_context *ctx,
const struct pipe_blend_state *state,
unsigned mode)
@@ -397,7 +565,7 @@ static void *si_create_blend_state_mode(struct pipe_context *ctx,
struct si_context *sctx = (struct si_context*)ctx;
struct si_state_blend *blend = CALLOC_STRUCT(si_state_blend);
struct si_pm4_state *pm4 = &blend->pm4;
-
+ uint32_t sx_mrt_blend_opt[8] = {0};
uint32_t color_control = 0;
if (!blend)
@@ -435,12 +603,17 @@ static void *si_create_blend_state_mode(struct pipe_context *ctx,
unsigned srcA = state->rt[j].alpha_src_factor;
unsigned dstA = state->rt[j].alpha_dst_factor;
+ unsigned srcRGB_opt, dstRGB_opt, srcA_opt, dstA_opt;
unsigned blend_cntl = 0;
+ sx_mrt_blend_opt[i] =
+ S_028760_COLOR_COMB_FCN(V_028760_OPT_COMB_BLEND_DISABLED) |
+ S_028760_ALPHA_COMB_FCN(V_028760_OPT_COMB_BLEND_DISABLED);
+
if (!state->rt[j].colormask)
continue;
- /* we pretend 8 buffer are used, CB_SHADER_MASK will disable unused one */
+ /* cb_render_state will disable unused ones */
blend->cb_target_mask |= state->rt[j].colormask << (4 * i);
if (!state->rt[j].blend_enable) {
@@ -448,6 +621,50 @@ static void *si_create_blend_state_mode(struct pipe_context *ctx,
continue;
}
+ /* Blending optimizations for Stoney.
+ * These transformations don't change the behavior.
+ *
+ * First, get rid of DST in the blend factors:
+ * func(src * DST, dst * 0) ---> func(src * 0, dst * SRC)
+ */
+ si_blend_remove_dst(&eqRGB, &srcRGB, &dstRGB,
+ PIPE_BLENDFACTOR_DST_COLOR,
+ PIPE_BLENDFACTOR_SRC_COLOR);
+ si_blend_remove_dst(&eqA, &srcA, &dstA,
+ PIPE_BLENDFACTOR_DST_COLOR,
+ PIPE_BLENDFACTOR_SRC_COLOR);
+ si_blend_remove_dst(&eqA, &srcA, &dstA,
+ PIPE_BLENDFACTOR_DST_ALPHA,
+ PIPE_BLENDFACTOR_SRC_ALPHA);
+
+ /* Look up the ideal settings from tables. */
+ srcRGB_opt = si_translate_blend_opt_factor(srcRGB, false);
+ dstRGB_opt = si_translate_blend_opt_factor(dstRGB, false);
+ srcA_opt = si_translate_blend_opt_factor(srcA, true);
+ dstA_opt = si_translate_blend_opt_factor(dstA, true);
+
+ /* Handle interdependencies. */
+ if (si_blend_factor_uses_dst(srcRGB))
+ dstRGB_opt = V_028760_BLEND_OPT_PRESERVE_NONE_IGNORE_NONE;
+ if (si_blend_factor_uses_dst(srcA))
+ dstA_opt = V_028760_BLEND_OPT_PRESERVE_NONE_IGNORE_NONE;
+
+ if (srcRGB == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE &&
+ (dstRGB == PIPE_BLENDFACTOR_ZERO ||
+ dstRGB == PIPE_BLENDFACTOR_SRC_ALPHA ||
+ dstRGB == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE))
+ dstRGB_opt = V_028760_BLEND_OPT_PRESERVE_NONE_IGNORE_A0;
+
+ /* Set the final value. */
+ sx_mrt_blend_opt[i] =
+ S_028760_COLOR_SRC_OPT(srcRGB_opt) |
+ S_028760_COLOR_DST_OPT(dstRGB_opt) |
+ S_028760_COLOR_COMB_FCN(si_translate_blend_opt_function(eqRGB)) |
+ S_028760_ALPHA_SRC_OPT(srcA_opt) |
+ S_028760_ALPHA_DST_OPT(dstA_opt) |
+ S_028760_ALPHA_COMB_FCN(si_translate_blend_opt_function(eqA));
+
+ /* Set blend state. */
blend_cntl |= S_028780_ENABLE(1);
blend_cntl |= S_028780_COLOR_COMB_FCN(si_translate_blend_function(eqRGB));
blend_cntl |= S_028780_COLOR_SRCBLEND(si_translate_blend_factor(srcRGB));
@@ -480,41 +697,13 @@ static void *si_create_blend_state_mode(struct pipe_context *ctx,
}
if (sctx->b.family == CHIP_STONEY) {
- uint32_t sx_blend_opt_control = 0;
-
- for (int i = 0; i < 8; i++) {
- const int j = state->independent_blend_enable ? i : 0;
-
- /* TODO: We can also set this if the surface doesn't contain RGB. */
- if (!state->rt[j].blend_enable ||
- !(state->rt[j].colormask & (PIPE_MASK_R | PIPE_MASK_G | PIPE_MASK_B)))
- sx_blend_opt_control |= S_02875C_MRT0_COLOR_OPT_DISABLE(1) << (4 * i);
-
- /* TODO: We can also set this if the surface doesn't contain alpha. */
- if (!state->rt[j].blend_enable ||
- !(state->rt[j].colormask & PIPE_MASK_A))
- sx_blend_opt_control |= S_02875C_MRT0_ALPHA_OPT_DISABLE(1) << (4 * i);
-
- if (!state->rt[j].blend_enable) {
- si_pm4_set_reg(pm4, R_028760_SX_MRT0_BLEND_OPT + i * 4,
- S_028760_COLOR_COMB_FCN(V_028760_OPT_COMB_BLEND_DISABLED) |
- S_028760_ALPHA_COMB_FCN(V_028760_OPT_COMB_BLEND_DISABLED));
- continue;
- }
-
+ for (int i = 0; i < 8; i++)
si_pm4_set_reg(pm4, R_028760_SX_MRT0_BLEND_OPT + i * 4,
- S_028760_COLOR_SRC_OPT(si_translate_blend_opt_factor(state->rt[j].rgb_src_factor, false)) |
- S_028760_COLOR_DST_OPT(si_translate_blend_opt_factor(state->rt[j].rgb_dst_factor, false)) |
- S_028760_COLOR_COMB_FCN(si_translate_blend_opt_function(state->rt[j].rgb_func)) |
- S_028760_ALPHA_SRC_OPT(si_translate_blend_opt_factor(state->rt[j].alpha_src_factor, true)) |
- S_028760_ALPHA_DST_OPT(si_translate_blend_opt_factor(state->rt[j].alpha_dst_factor, true)) |
- S_028760_ALPHA_COMB_FCN(si_translate_blend_opt_function(state->rt[j].alpha_func)));
- }
-
- si_pm4_set_reg(pm4, R_02875C_SX_BLEND_OPT_CONTROL, sx_blend_opt_control);
+ sx_mrt_blend_opt[i]);
- /* RB+ doesn't work with dual source blending */
- if (blend->dual_src_blend)
+ /* RB+ doesn't work with dual source blending, logic op, and RESOLVE. */
+ if (blend->dual_src_blend || state->logicop_enable ||
+ mode == V_028808_CB_RESOLVE)
color_control |= S_028808_DISABLE_DUAL_QUAD(1);
}
@@ -532,7 +721,7 @@ static void si_bind_blend_state(struct pipe_context *ctx, void *state)
{
struct si_context *sctx = (struct si_context *)ctx;
si_pm4_bind_state(sctx, blend, (struct si_state_blend *)state);
- si_mark_atom_dirty(sctx, &sctx->cb_target_mask);
+ si_mark_atom_dirty(sctx, &sctx->cb_render_state);
}
static void si_delete_blend_state(struct pipe_context *ctx, void *state)
@@ -2097,8 +2286,10 @@ static void si_initialize_color_surface(struct si_context *sctx,
color_pitch = S_028C64_TILE_MAX(pitch);
+ /* Intensity is implemented as Red, so treat it that way. */
color_attrib = S_028C74_TILE_MODE_INDEX(tile_mode_index) |
- S_028C74_FORCE_DST_ALPHA_1(desc->swizzle[3] == UTIL_FORMAT_SWIZZLE_1);
+ S_028C74_FORCE_DST_ALPHA_1(desc->swizzle[3] == UTIL_FORMAT_SWIZZLE_1 ||
+ util_format_is_intensity(surf->base.format));
if (rtex->resource.b.b.nr_samples > 1) {
unsigned log_samples = util_logbase2(rtex->resource.b.b.nr_samples);
@@ -2169,61 +2360,6 @@ static void si_initialize_color_surface(struct si_context *sctx,
/* Determine pixel shader export format */
si_choose_spi_color_formats(surf, format, swap, ntype, rtex->is_depth);
- if (sctx->b.family == CHIP_STONEY &&
- !(sctx->screen->b.debug_flags & DBG_NO_RB_PLUS)) {
- switch (desc->channel[0].size) {
- case 32:
- if (desc->nr_channels == 1) {
- if (swap == V_0280A0_SWAP_STD)
- surf->sx_ps_downconvert = V_028754_SX_RT_EXPORT_32_R;
- else if (swap == V_0280A0_SWAP_ALT_REV)
- surf->sx_ps_downconvert = V_028754_SX_RT_EXPORT_32_A;
- }
- break;
- case 16:
- /* For 1-channel formats, use the superset thereof. */
- if (desc->nr_channels <= 2) {
- if (swap == V_0280A0_SWAP_STD ||
- swap == V_0280A0_SWAP_STD_REV)
- surf->sx_ps_downconvert = V_028754_SX_RT_EXPORT_16_16_GR;
- else
- surf->sx_ps_downconvert = V_028754_SX_RT_EXPORT_16_16_AR;
- }
- break;
- case 11:
- if (desc->nr_channels == 3) {
- surf->sx_ps_downconvert = V_028754_SX_RT_EXPORT_10_11_11;
- surf->sx_blend_opt_epsilon = V_028758_11BIT_FORMAT;
- }
- break;
- case 10:
- if (desc->nr_channels == 4) {
- surf->sx_ps_downconvert = V_028754_SX_RT_EXPORT_2_10_10_10;
- surf->sx_blend_opt_epsilon = V_028758_10BIT_FORMAT;
- }
- break;
- case 8:
- /* For 1 and 2-channel formats, use the superset thereof. */
- surf->sx_ps_downconvert = V_028754_SX_RT_EXPORT_8_8_8_8;
- surf->sx_blend_opt_epsilon = V_028758_8BIT_FORMAT;
- break;
- case 5:
- if (desc->nr_channels == 3) {
- surf->sx_ps_downconvert = V_028754_SX_RT_EXPORT_5_6_5;
- surf->sx_blend_opt_epsilon = V_028758_6BIT_FORMAT;
- } else if (desc->nr_channels == 4) {
- surf->sx_ps_downconvert = V_028754_SX_RT_EXPORT_1_5_5_5;
- surf->sx_blend_opt_epsilon = V_028758_5BIT_FORMAT;
- }
- break;
- case 4:
- /* For 1 nad 2-channel formats, use the superset thereof. */
- surf->sx_ps_downconvert = V_028754_SX_RT_EXPORT_4_4_4_4;
- surf->sx_blend_opt_epsilon = V_028758_4BIT_FORMAT;
- break;
- }
- }
-
surf->color_initialized = true;
}
@@ -2459,7 +2595,7 @@ static void si_set_framebuffer_state(struct pipe_context *ctx,
}
si_update_poly_offset_state(sctx);
- si_mark_atom_dirty(sctx, &sctx->cb_target_mask);
+ si_mark_atom_dirty(sctx, &sctx->cb_render_state);
si_mark_atom_dirty(sctx, &sctx->framebuffer.atom);
if (sctx->framebuffer.nr_samples != old_nr_samples) {
@@ -2512,8 +2648,6 @@ static void si_emit_framebuffer_state(struct si_context *sctx, struct r600_atom
unsigned i, nr_cbufs = state->nr_cbufs;
struct r600_texture *tex = NULL;
struct r600_surface *cb = NULL;
- uint32_t sx_ps_downconvert = 0;
- uint32_t sx_blend_opt_epsilon = 0;
/* Colorbuffers. */
for (i = 0; i < nr_cbufs; i++) {
@@ -2564,29 +2698,18 @@ static void si_emit_framebuffer_state(struct si_context *sctx, struct r600_atom
if (sctx->b.chip_class >= VI)
radeon_emit(cs, cb->cb_dcc_base); /* R_028C94_CB_COLOR0_DCC_BASE */
-
- sx_ps_downconvert |= cb->sx_ps_downconvert << (4 * i);
- sx_blend_opt_epsilon |= cb->sx_blend_opt_epsilon << (4 * i);
}
/* set CB_COLOR1_INFO for possible dual-src blending */
if (i == 1 && state->cbufs[0] &&
sctx->framebuffer.dirty_cbufs & (1 << 0)) {
radeon_set_context_reg(cs, R_028C70_CB_COLOR0_INFO + 1 * 0x3C,
cb->cb_color_info | tex->cb_color_info);
- sx_ps_downconvert |= cb->sx_ps_downconvert << (4 * i);
- sx_blend_opt_epsilon |= cb->sx_blend_opt_epsilon << (4 * i);
i++;
}
for (; i < 8 ; i++)
if (sctx->framebuffer.dirty_cbufs & (1 << i))
radeon_set_context_reg(cs, R_028C70_CB_COLOR0_INFO + i * 0x3C, 0);
- if (sctx->b.family == CHIP_STONEY) {
- radeon_set_context_reg_seq(cs, R_028754_SX_PS_DOWNCONVERT, 2);
- radeon_emit(cs, sx_ps_downconvert); /* R_028754_SX_PS_DOWNCONVERT */
- radeon_emit(cs, sx_blend_opt_epsilon); /* R_028758_SX_BLEND_OPT_EPSILON */
- }
-
/* ZS buffer. */
if (state->zsbuf && sctx->framebuffer.dirty_zsbuf) {
struct r600_surface *zb = (struct r600_surface*)state->zsbuf;
@@ -3374,7 +3497,7 @@ void si_init_state_functions(struct si_context *sctx)
si_init_atom(sctx, &sctx->db_render_state, &sctx->atoms.s.db_render_state, si_emit_db_render_state);
si_init_atom(sctx, &sctx->msaa_config, &sctx->atoms.s.msaa_config, si_emit_msaa_config);
si_init_atom(sctx, &sctx->sample_mask.atom, &sctx->atoms.s.sample_mask, si_emit_sample_mask);
- si_init_atom(sctx, &sctx->cb_target_mask, &sctx->atoms.s.cb_target_mask, si_emit_cb_target_mask);
+ si_init_atom(sctx, &sctx->cb_render_state, &sctx->atoms.s.cb_render_state, si_emit_cb_render_state);
si_init_atom(sctx, &sctx->blend_color.atom, &sctx->atoms.s.blend_color, si_emit_blend_color);
si_init_atom(sctx, &sctx->clip_regs, &sctx->atoms.s.clip_regs, si_emit_clip_regs);
si_init_atom(sctx, &sctx->clip_state.atom, &sctx->atoms.s.clip_state, si_emit_clip_state);
@@ -3449,8 +3572,8 @@ si_write_harvested_raster_configs(struct si_context *sctx,
{
unsigned sh_per_se = MAX2(sctx->screen->b.info.max_sh_per_se, 1);
unsigned num_se = MAX2(sctx->screen->b.info.max_se, 1);
- unsigned rb_mask = sctx->screen->b.info.si_backend_enabled_mask;
- unsigned num_rb = MIN2(sctx->screen->b.info.r600_num_backends, 16);
+ unsigned rb_mask = sctx->screen->b.info.enabled_rb_mask;
+ unsigned num_rb = MIN2(sctx->screen->b.info.num_render_backends, 16);
unsigned rb_per_pkr = MIN2(num_rb / num_se / sh_per_se, 2);
unsigned rb_per_se = num_rb / num_se;
unsigned se_mask[4];
@@ -3579,8 +3702,8 @@ si_write_harvested_raster_configs(struct si_context *sctx,
static void si_init_config(struct si_context *sctx)
{
struct si_screen *sscreen = sctx->screen;
- unsigned num_rb = MIN2(sctx->screen->b.info.r600_num_backends, 16);
- unsigned rb_mask = sctx->screen->b.info.si_backend_enabled_mask;
+ unsigned num_rb = MIN2(sctx->screen->b.info.num_render_backends, 16);
+ unsigned rb_mask = sctx->screen->b.info.enabled_rb_mask;
unsigned raster_config, raster_config_1;
uint64_t border_color_va = sctx->border_color_buffer->gpu_address;
struct si_pm4_state *pm4 = CALLOC_STRUCT(si_pm4_state);
diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h
index be3488e6dba..507f45938ce 100644
--- a/src/gallium/drivers/radeonsi/si_state.h
+++ b/src/gallium/drivers/radeonsi/si_state.h
@@ -124,7 +124,7 @@ union si_state_atoms {
struct r600_atom *db_render_state;
struct r600_atom *msaa_config;
struct r600_atom *sample_mask;
- struct r600_atom *cb_target_mask;
+ struct r600_atom *cb_render_state;
struct r600_atom *blend_color;
struct r600_atom *clip_regs;
struct r600_atom *clip_state;
diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c
index 36174eb5a94..bbef429edc5 100644
--- a/src/gallium/drivers/radeonsi/si_state_shaders.c
+++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
@@ -705,23 +705,21 @@ static inline void si_shader_selector_key(struct pipe_context *ctx,
}
/* Select the hw shader variant depending on the current state. */
-static int si_shader_select(struct pipe_context *ctx,
- struct si_shader_ctx_state *state)
+static int si_shader_select_with_key(struct pipe_context *ctx,
+ struct si_shader_ctx_state *state,
+ union si_shader_key *key)
{
struct si_context *sctx = (struct si_context *)ctx;
struct si_shader_selector *sel = state->cso;
struct si_shader *current = state->current;
- union si_shader_key key;
struct si_shader *iter, *shader = NULL;
int r;
- si_shader_selector_key(ctx, sel, &key);
-
/* Check if we don't need to change anything.
* This path is also used for most shaders that don't need multiple
* variants, it will cost just a computation of the key and this
* test. */
- if (likely(current && memcmp(&current->key, &key, sizeof(key)) == 0))
+ if (likely(current && memcmp(&current->key, key, sizeof(*key)) == 0))
return 0;
pipe_mutex_lock(sel->mutex);
@@ -730,7 +728,7 @@ static int si_shader_select(struct pipe_context *ctx,
for (iter = sel->first_variant; iter; iter = iter->next_variant) {
/* Don't check the "current" shader. We checked it above. */
if (current != iter &&
- memcmp(&iter->key, &key, sizeof(key)) == 0) {
+ memcmp(&iter->key, key, sizeof(*key)) == 0) {
state->current = iter;
pipe_mutex_unlock(sel->mutex);
return 0;
@@ -744,7 +742,7 @@ static int si_shader_select(struct pipe_context *ctx,
return -ENOMEM;
}
shader->selector = sel;
- shader->key = key;
+ shader->key = *key;
r = si_shader_create(sctx->screen, sctx->tm, shader, &sctx->b.debug);
if (unlikely(r)) {
@@ -768,6 +766,15 @@ static int si_shader_select(struct pipe_context *ctx,
return 0;
}
+static int si_shader_select(struct pipe_context *ctx,
+ struct si_shader_ctx_state *state)
+{
+ union si_shader_key key;
+
+ si_shader_selector_key(ctx, state->cso, &key);
+ return si_shader_select_with_key(ctx, state, &key);
+}
+
static void *si_create_shader_selector(struct pipe_context *ctx,
const struct pipe_shader_state *state)
{
@@ -888,8 +895,27 @@ static void *si_create_shader_selector(struct pipe_context *ctx,
/* Pre-compilation. */
if (sscreen->b.debug_flags & DBG_PRECOMPILE) {
struct si_shader_ctx_state state = {sel};
+ union si_shader_key key;
- if (si_shader_select(ctx, &state)) {
+ memset(&key, 0, sizeof(key));
+
+ /* Set reasonable defaults, so that the shader key doesn't
+ * cause any code to be eliminated.
+ */
+ switch (sel->type) {
+ case PIPE_SHADER_TESS_CTRL:
+ key.tcs.prim_mode = PIPE_PRIM_TRIANGLES;
+ break;
+ case PIPE_SHADER_FRAGMENT:
+ key.ps.alpha_func = PIPE_FUNC_ALWAYS;
+ for (i = 0; i < 8; i++)
+ if (sel->info.colors_written & (1 << i))
+ key.ps.spi_shader_col_format |=
+ V_028710_SPI_SHADER_FP16_ABGR << (i * 4);
+ break;
+ }
+
+ if (si_shader_select_with_key(ctx, &state, &key)) {
fprintf(stderr, "radeonsi: can't create a shader\n");
tgsi_free_tokens(sel->tokens);
FREE(sel);
@@ -1001,7 +1027,7 @@ static void si_bind_ps_shader(struct pipe_context *ctx, void *state)
sctx->ps_shader.cso = sel;
sctx->ps_shader.current = sel ? sel->first_variant : NULL;
- si_mark_atom_dirty(sctx, &sctx->cb_target_mask);
+ si_mark_atom_dirty(sctx, &sctx->cb_render_state);
}
static void si_delete_shader_selector(struct pipe_context *ctx, void *state)
@@ -1726,6 +1752,9 @@ bool si_update_shaders(struct si_context *sctx)
si_mark_atom_dirty(sctx, &sctx->spi_ps_input);
}
+ if (sctx->b.family == CHIP_STONEY && si_pm4_state_changed(sctx, ps))
+ si_mark_atom_dirty(sctx, &sctx->cb_render_state);
+
if (sctx->ps_db_shader_control != db_shader_control) {
sctx->ps_db_shader_control = db_shader_control;
si_mark_atom_dirty(sctx, &sctx->db_render_state);
diff --git a/src/gallium/drivers/softpipe/sp_screen.c b/src/gallium/drivers/softpipe/sp_screen.c
index 3bc580899d4..097ffe6f920 100644
--- a/src/gallium/drivers/softpipe/sp_screen.c
+++ b/src/gallium/drivers/softpipe/sp_screen.c
@@ -179,6 +179,8 @@ softpipe_get_param(struct pipe_screen *screen, enum pipe_cap param)
return 1;
case PIPE_CAP_TEXTURE_BUFFER_OBJECTS:
return 1;
+ case PIPE_CAP_BUFFER_SAMPLER_VIEW_RGBA_ONLY:
+ return 0;
case PIPE_CAP_MAX_TEXTURE_BUFFER_SIZE:
return 65536;
case PIPE_CAP_TEXTURE_BUFFER_OFFSET_ALIGNMENT:
@@ -261,6 +263,9 @@ softpipe_get_param(struct pipe_screen *screen, enum pipe_cap param)
case PIPE_CAP_INVALIDATE_BUFFER:
case PIPE_CAP_GENERATE_MIPMAP:
case PIPE_CAP_STRING_MARKER:
+ case PIPE_CAP_SURFACE_REINTERPRET_BLOCKS:
+ case PIPE_CAP_QUERY_BUFFER_OBJECT:
+ case PIPE_CAP_QUERY_MEMORY_INFO:
return 0;
}
/* should only get here on unhandled cases */
diff --git a/src/gallium/drivers/svga/svga_screen.c b/src/gallium/drivers/svga/svga_screen.c
index 8d04222a0cd..d5405f8eacf 100644
--- a/src/gallium/drivers/svga/svga_screen.c
+++ b/src/gallium/drivers/svga/svga_screen.c
@@ -358,6 +358,8 @@ svga_get_param(struct pipe_screen *screen, enum pipe_cap param)
case PIPE_CAP_SHADER_BUFFER_OFFSET_ALIGNMENT:
case PIPE_CAP_INVALIDATE_BUFFER:
case PIPE_CAP_STRING_MARKER:
+ case PIPE_CAP_SURFACE_REINTERPRET_BLOCKS:
+ case PIPE_CAP_QUERY_MEMORY_INFO:
return 0;
case PIPE_CAP_MIN_MAP_BUFFER_ALIGNMENT:
return 64;
@@ -396,6 +398,8 @@ svga_get_param(struct pipe_screen *screen, enum pipe_cap param)
case PIPE_CAP_DRAW_PARAMETERS:
case PIPE_CAP_TGSI_FS_POSITION_IS_SYSVAL:
case PIPE_CAP_TGSI_FS_FACE_IS_INTEGER_SYSVAL:
+ case PIPE_CAP_BUFFER_SAMPLER_VIEW_RGBA_ONLY:
+ case PIPE_CAP_QUERY_BUFFER_OBJECT:
return 0;
}
diff --git a/src/gallium/drivers/trace/tr_context.c b/src/gallium/drivers/trace/tr_context.c
index 6e703f76499..4d03fe1ee0b 100644
--- a/src/gallium/drivers/trace/tr_context.c
+++ b/src/gallium/drivers/trace/tr_context.c
@@ -1578,6 +1578,45 @@ static void trace_context_set_tess_state(struct pipe_context *_context,
}
+static void trace_context_set_shader_buffers(struct pipe_context *_context,
+ unsigned shader,
+ unsigned start, unsigned nr,
+ struct pipe_shader_buffer *buffers)
+{
+ struct trace_context *tr_context = trace_context(_context);
+ struct pipe_context *context = tr_context->pipe;
+ struct pipe_shader_buffer *_buffers = NULL;
+
+ trace_dump_call_begin("pipe_context", "set_shader_buffers");
+ trace_dump_arg(ptr, context);
+ trace_dump_arg(uint, shader);
+ trace_dump_arg(uint, start);
+ trace_dump_arg_begin("buffers");
+ trace_dump_struct_array(shader_buffer, buffers, nr);
+ trace_dump_arg_end();
+ trace_dump_call_end();
+
+ if (buffers) {
+ int i;
+
+ _buffers = MALLOC(nr * sizeof(struct pipe_shader_buffer));
+ if (!_buffers)
+ return;
+
+ for (i = 0; i < nr; i++) {
+ _buffers[i] = buffers[i];
+ _buffers[i].buffer = trace_resource_unwrap(
+ tr_context, _buffers[i].buffer);
+ }
+ }
+
+ context->set_shader_buffers(context, shader, start, nr, _buffers);
+
+ if (_buffers)
+ FREE(_buffers);
+}
+
+
static const struct debug_named_value rbug_blocker_flags[] = {
{"before", 1, NULL},
{"after", 2, NULL},
@@ -1675,6 +1714,7 @@ trace_context_create(struct trace_screen *tr_scr,
TR_CTX_INIT(texture_barrier);
TR_CTX_INIT(memory_barrier);
TR_CTX_INIT(set_tess_state);
+ TR_CTX_INIT(set_shader_buffers);
TR_CTX_INIT(transfer_map);
TR_CTX_INIT(transfer_unmap);
diff --git a/src/gallium/drivers/trace/tr_dump_state.c b/src/gallium/drivers/trace/tr_dump_state.c
index 54f022a8ab6..cfbf53cf767 100644
--- a/src/gallium/drivers/trace/tr_dump_state.c
+++ b/src/gallium/drivers/trace/tr_dump_state.c
@@ -688,6 +688,24 @@ void trace_dump_constant_buffer(const struct pipe_constant_buffer *state)
}
+void trace_dump_shader_buffer(const struct pipe_shader_buffer *state)
+{
+ if (!trace_dumping_enabled_locked())
+ return;
+
+ if(!state) {
+ trace_dump_null();
+ return;
+ }
+
+ trace_dump_struct_begin("pipe_shader_buffer");
+ trace_dump_member(resource_ptr, state, buffer);
+ trace_dump_member(uint, state, buffer_offset);
+ trace_dump_member(uint, state, buffer_size);
+ trace_dump_struct_end();
+}
+
+
void trace_dump_draw_info(const struct pipe_draw_info *state)
{
if (!trace_dumping_enabled_locked())
diff --git a/src/gallium/drivers/trace/tr_dump_state.h b/src/gallium/drivers/trace/tr_dump_state.h
index 117b3c75e87..4f4ade155bc 100644
--- a/src/gallium/drivers/trace/tr_dump_state.h
+++ b/src/gallium/drivers/trace/tr_dump_state.h
@@ -78,6 +78,8 @@ void trace_dump_vertex_element(const struct pipe_vertex_element *state);
void trace_dump_constant_buffer(const struct pipe_constant_buffer *state);
+void trace_dump_shader_buffer(const struct pipe_shader_buffer *buffer);
+
void trace_dump_draw_info(const struct pipe_draw_info *state);
void trace_dump_blit_info(const struct pipe_blit_info *);
diff --git a/src/gallium/drivers/vc4/vc4_job.c b/src/gallium/drivers/vc4/vc4_job.c
index 5d071ec862f..41660f6ac4d 100644
--- a/src/gallium/drivers/vc4/vc4_job.c
+++ b/src/gallium/drivers/vc4/vc4_job.c
@@ -245,10 +245,19 @@ vc4_job_submit(struct vc4_context *vc4)
fprintf(stderr, "Draw call returned %s. "
"Expect corruption.\n", strerror(errno));
warned = true;
+ } else if (!ret) {
+ vc4->last_emit_seqno = submit.seqno;
}
}
- vc4->last_emit_seqno = submit.seqno;
+ if (vc4->last_emit_seqno - vc4->screen->finished_seqno > 5) {
+ if (!vc4_wait_seqno(vc4->screen,
+ vc4->last_emit_seqno - 5,
+ PIPE_TIMEOUT_INFINITE,
+ "job throttling")) {
+ fprintf(stderr, "Job throttling failed\n");
+ }
+ }
if (vc4_debug & VC4_DEBUG_ALWAYS_SYNC) {
if (!vc4_wait_seqno(vc4->screen, vc4->last_emit_seqno,
diff --git a/src/gallium/drivers/vc4/vc4_screen.c b/src/gallium/drivers/vc4/vc4_screen.c
index 08c2dad8406..b19d31af6ac 100644
--- a/src/gallium/drivers/vc4/vc4_screen.c
+++ b/src/gallium/drivers/vc4/vc4_screen.c
@@ -127,6 +127,7 @@ vc4_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
/* Unsupported features. */
case PIPE_CAP_ANISOTROPIC_FILTER:
case PIPE_CAP_TEXTURE_BUFFER_OBJECTS:
+ case PIPE_CAP_BUFFER_SAMPLER_VIEW_RGBA_ONLY:
case PIPE_CAP_CUBE_MAP_ARRAY:
case PIPE_CAP_TEXTURE_MIRROR_CLAMP:
case PIPE_CAP_VERTEX_ELEMENT_INSTANCE_DIVISOR:
@@ -199,6 +200,9 @@ vc4_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
case PIPE_CAP_INVALIDATE_BUFFER:
case PIPE_CAP_GENERATE_MIPMAP:
case PIPE_CAP_STRING_MARKER:
+ case PIPE_CAP_SURFACE_REINTERPRET_BLOCKS:
+ case PIPE_CAP_QUERY_BUFFER_OBJECT:
+ case PIPE_CAP_QUERY_MEMORY_INFO:
return 0;
/* Stream output. */
diff --git a/src/gallium/drivers/virgl/virgl_screen.c b/src/gallium/drivers/virgl/virgl_screen.c
index fb2e5670ef0..18263e91e6a 100644
--- a/src/gallium/drivers/virgl/virgl_screen.c
+++ b/src/gallium/drivers/virgl/virgl_screen.c
@@ -169,6 +169,8 @@ virgl_get_param(struct pipe_screen *screen, enum pipe_cap param)
return vscreen->caps.caps.v1.max_tbo_size > 0;
case PIPE_CAP_TEXTURE_BUFFER_OFFSET_ALIGNMENT:
return 0;
+ case PIPE_CAP_BUFFER_SAMPLER_VIEW_RGBA_ONLY:
+ return 0;
case PIPE_CAP_CUBE_MAP_ARRAY:
return vscreen->caps.caps.v1.bset.cube_map_array;
case PIPE_CAP_TEXTURE_MULTISAMPLE:
@@ -228,6 +230,8 @@ virgl_get_param(struct pipe_screen *screen, enum pipe_cap param)
case PIPE_CAP_SHADER_BUFFER_OFFSET_ALIGNMENT:
case PIPE_CAP_INVALIDATE_BUFFER:
case PIPE_CAP_GENERATE_MIPMAP:
+ case PIPE_CAP_SURFACE_REINTERPRET_BLOCKS:
+ case PIPE_CAP_QUERY_BUFFER_OBJECT:
return 0;
case PIPE_CAP_VENDOR_ID:
return 0x1af4;
@@ -557,6 +561,7 @@ virgl_create_screen(struct virgl_winsys *vws)
vws->get_caps(vws, &screen->caps);
+ screen->refcnt = 1;
util_format_s3tc_init();
return &screen->base;
diff --git a/src/gallium/drivers/virgl/virgl_screen.h b/src/gallium/drivers/virgl/virgl_screen.h
index 52e72ca4958..8cac38d7e96 100644
--- a/src/gallium/drivers/virgl/virgl_screen.h
+++ b/src/gallium/drivers/virgl/virgl_screen.h
@@ -28,6 +28,12 @@
struct virgl_screen {
struct pipe_screen base;
+
+ int refcnt;
+
+ /* place for winsys to stash it's own stuff: */
+ void *winsys_priv;
+
struct virgl_winsys *vws;
struct virgl_drm_caps caps;
diff --git a/src/gallium/include/pipe/p_context.h b/src/gallium/include/pipe/p_context.h
index f69a75be50e..6c95b7b2178 100644
--- a/src/gallium/include/pipe/p_context.h
+++ b/src/gallium/include/pipe/p_context.h
@@ -150,6 +150,28 @@ struct pipe_context {
struct pipe_query *q,
boolean wait,
union pipe_query_result *result);
+
+ /**
+ * Get results of a query, storing into resource. Note that this may not
+ * be used with batch queries.
+ *
+ * \param wait if true, this query will block until the result is ready
+ * \param result_type the type of the value being stored:
+ * \param index for queries that return multiple pieces of data, which
+ * item of that data to store (e.g. for
+ * PIPE_QUERY_PIPELINE_STATISTICS).
+ * When the index is -1, instead of the value of the query
+ * the driver should instead write a 1/0 to the appropriate
+ * location with 1 meaning that the query result is available.
+ */
+ void (*get_query_result_resource)(struct pipe_context *pipe,
+ struct pipe_query *q,
+ boolean wait,
+ enum pipe_query_value_type result_type,
+ int index,
+ struct pipe_resource *resource,
+ unsigned offset);
+
/*@}*/
/**
diff --git a/src/gallium/include/pipe/p_defines.h b/src/gallium/include/pipe/p_defines.h
index b46187bc8a1..800f16cd250 100644
--- a/src/gallium/include/pipe/p_defines.h
+++ b/src/gallium/include/pipe/p_defines.h
@@ -352,6 +352,8 @@ enum pipe_flush_flags
* Flags for pipe_context::memory_barrier.
*/
#define PIPE_BARRIER_MAPPED_BUFFER (1 << 0)
+#define PIPE_BARRIER_SHADER_BUFFER (1 << 1)
+#define PIPE_BARRIER_QUERY_BUFFER (1 << 2)
/**
* Resource binding flags -- state tracker must specify in advance all
@@ -375,6 +377,7 @@ enum pipe_flush_flags
#define PIPE_BIND_SHADER_IMAGE (1 << 15) /* set_shader_images */
#define PIPE_BIND_COMPUTE_RESOURCE (1 << 16) /* set_compute_resources */
#define PIPE_BIND_COMMAND_ARGS_BUFFER (1 << 17) /* pipe_draw_info.indirect */
+#define PIPE_BIND_QUERY_BUFFER (1 << 18) /* get_query_result_resource */
/**
* The first two flags above were previously part of the amorphous
@@ -588,6 +591,7 @@ enum pipe_cap
PIPE_CAP_CUBE_MAP_ARRAY,
PIPE_CAP_TEXTURE_BUFFER_OBJECTS,
PIPE_CAP_TEXTURE_BUFFER_OFFSET_ALIGNMENT,
+ PIPE_CAP_BUFFER_SAMPLER_VIEW_RGBA_ONLY,
PIPE_CAP_TGSI_TEXCOORD,
PIPE_CAP_PREFER_BLIT_BASED_TEXTURE_TRANSFER,
PIPE_CAP_QUERY_PIPELINE_STATISTICS,
@@ -645,6 +649,9 @@ enum pipe_cap
PIPE_CAP_INVALIDATE_BUFFER,
PIPE_CAP_GENERATE_MIPMAP,
PIPE_CAP_STRING_MARKER,
+ PIPE_CAP_SURFACE_REINTERPRET_BLOCKS,
+ PIPE_CAP_QUERY_BUFFER_OBJECT,
+ PIPE_CAP_QUERY_MEMORY_INFO,
};
#define PIPE_QUIRK_TEXTURE_BORDER_COLOR_SWIZZLE_NV50 (1 << 0)
@@ -837,6 +844,14 @@ union pipe_query_result
union pipe_numeric_type_union batch[1];
};
+enum pipe_query_value_type
+{
+ PIPE_QUERY_TYPE_I32,
+ PIPE_QUERY_TYPE_U32,
+ PIPE_QUERY_TYPE_I64,
+ PIPE_QUERY_TYPE_U64,
+};
+
union pipe_color_union
{
float f[4];
diff --git a/src/gallium/include/pipe/p_screen.h b/src/gallium/include/pipe/p_screen.h
index f868d71db23..211bc2440f9 100644
--- a/src/gallium/include/pipe/p_screen.h
+++ b/src/gallium/include/pipe/p_screen.h
@@ -57,6 +57,7 @@ struct pipe_resource;
struct pipe_surface;
struct pipe_transfer;
struct pipe_box;
+struct pipe_memory_info;
/**
@@ -260,6 +261,11 @@ struct pipe_screen {
unsigned index,
struct pipe_driver_query_group_info *info);
+ /**
+ * Query information about memory usage.
+ */
+ void (*query_memory_info)(struct pipe_screen *screen,
+ struct pipe_memory_info *info);
};
diff --git a/src/gallium/include/pipe/p_shader_tokens.h b/src/gallium/include/pipe/p_shader_tokens.h
index f300207d4dd..6539017b77c 100644
--- a/src/gallium/include/pipe/p_shader_tokens.h
+++ b/src/gallium/include/pipe/p_shader_tokens.h
@@ -420,7 +420,7 @@ struct tgsi_property_data {
#define TGSI_OPCODE_FSLT 110
#define TGSI_OPCODE_FSNE 111
- /* gap */
+#define TGSI_OPCODE_MEMBAR 112
#define TGSI_OPCODE_CALLNZ 113
/* gap */
#define TGSI_OPCODE_BREAKC 115
@@ -744,6 +744,11 @@ struct tgsi_instruction_memory
unsigned Padding : 29;
};
+#define TGSI_MEMBAR_SHADER_BUFFER (1 << 0)
+#define TGSI_MEMBAR_ATOMIC_BUFFER (1 << 1)
+#define TGSI_MEMBAR_SHADER_IMAGE (1 << 2)
+#define TGSI_MEMBAR_SHARED (1 << 3)
+#define TGSI_MEMBAR_THREAD_GROUP (1 << 4)
#ifdef __cplusplus
}
diff --git a/src/gallium/include/pipe/p_state.h b/src/gallium/include/pipe/p_state.h
index 2e4d2830199..ed62a33ad72 100644
--- a/src/gallium/include/pipe/p_state.h
+++ b/src/gallium/include/pipe/p_state.h
@@ -720,6 +720,19 @@ struct pipe_debug_callback
void *data;
};
+/**
+ * Information about memory usage. All sizes are in kilobytes.
+ */
+struct pipe_memory_info
+{
+ unsigned total_device_memory; /**< size of device memory, e.g. VRAM */
+ unsigned avail_device_memory; /**< free device memory at the moment */
+ unsigned total_staging_memory; /**< size of staging memory, e.g. GART */
+ unsigned avail_staging_memory; /**< free staging memory at the moment */
+ unsigned device_memory_evicted; /**< size of memory evicted (monotonic counter) */
+ unsigned nr_device_memory_evictions; /**< # of evictions (monotonic counter) */
+};
+
#ifdef __cplusplus
}
#endif
diff --git a/src/gallium/state_trackers/nine/Makefile.sources b/src/gallium/state_trackers/nine/Makefile.sources
index 99b623a5b59..8d178d4b18f 100644
--- a/src/gallium/state_trackers/nine/Makefile.sources
+++ b/src/gallium/state_trackers/nine/Makefile.sources
@@ -5,6 +5,8 @@ C_SOURCES := \
authenticatedchannel9.h \
basetexture9.c \
basetexture9.h \
+ buffer9.c \
+ buffer9.h \
cryptosession9.c \
cryptosession9.h \
cubetexture9.c \
diff --git a/src/gallium/state_trackers/nine/adapter9.c b/src/gallium/state_trackers/nine/adapter9.c
index 69e0fa25961..8428b1bd7eb 100644
--- a/src/gallium/state_trackers/nine/adapter9.c
+++ b/src/gallium/state_trackers/nine/adapter9.c
@@ -563,7 +563,7 @@ NineAdapter9_GetDeviceCaps( struct NineAdapter9 *This,
D3DPIPECAP(INDEP_BLEND_ENABLE, D3DPMISCCAPS_INDEPENDENTWRITEMASKS) |
/*D3DPMISCCAPS_PERSTAGECONSTANT |*/ /* TODO */
/*D3DPMISCCAPS_POSTBLENDSRGBCONVERT |*/ /* TODO */
- D3DPMISCCAPS_FOGANDSPECULARALPHA |
+ D3DPMISCCAPS_FOGANDSPECULARALPHA | /* Note: documentation of the flag is wrong */
D3DPIPECAP(BLEND_EQUATION_SEPARATE, D3DPMISCCAPS_SEPARATEALPHABLEND) |
D3DPIPECAP(MIXED_COLORBUFFER_FORMATS, D3DPMISCCAPS_MRTINDEPENDENTBITDEPTHS) |
D3DPMISCCAPS_MRTPOSTPIXELSHADERBLENDING |
@@ -618,7 +618,8 @@ NineAdapter9_GetDeviceCaps( struct NineAdapter9 *This,
pCaps->DestBlendCaps = pCaps->SrcBlendCaps;
- pCaps->AlphaCmpCaps = D3DPCMPCAPS_LESS |
+ pCaps->AlphaCmpCaps = D3DPCMPCAPS_NEVER |
+ D3DPCMPCAPS_LESS |
D3DPCMPCAPS_EQUAL |
D3DPCMPCAPS_LESSEQUAL |
D3DPCMPCAPS_GREATER |
@@ -980,7 +981,8 @@ NineAdapter9_CreateDevice( struct NineAdapter9 *This,
hr = NineDevice9_new(screen, &params, &caps, pPresentationParameters,
pD3D9, pPresentationGroup, This->ctx, FALSE, NULL,
- (struct NineDevice9 **)ppReturnedDeviceInterface);
+ (struct NineDevice9 **)ppReturnedDeviceInterface,
+ minor);
if (FAILED(hr)) {
DBG("Failed to create device.\n");
return hr;
@@ -1041,7 +1043,8 @@ NineAdapter9_CreateDeviceEx( struct NineAdapter9 *This,
hr = NineDevice9Ex_new(screen, &params, &caps, pPresentationParameters,
pFullscreenDisplayMode,
pD3D9Ex, pPresentationGroup, This->ctx,
- (struct NineDevice9Ex **)ppReturnedDeviceInterface);
+ (struct NineDevice9Ex **)ppReturnedDeviceInterface,
+ minor);
if (FAILED(hr)) {
DBG("Failed to create device.\n");
return hr;
diff --git a/src/gallium/state_trackers/nine/basetexture9.c b/src/gallium/state_trackers/nine/basetexture9.c
index d13138b7d5c..7a0959a8f3e 100644
--- a/src/gallium/state_trackers/nine/basetexture9.c
+++ b/src/gallium/state_trackers/nine/basetexture9.c
@@ -319,7 +319,7 @@ NineBaseTexture9_UploadSelf( struct NineBaseTexture9 *This )
if (tex->dirty_box.width) {
for (l = min_level_dirty; l <= last_level; ++l) {
- u_box_minify_2d(&box, &tex->dirty_box, l);
+ u_box_minify_3d(&box, &tex->dirty_box, l);
NineVolume9_UploadSelf(tex->volumes[l], &box);
}
memset(&tex->dirty_box, 0, sizeof(tex->dirty_box));
diff --git a/src/gallium/state_trackers/nine/buffer9.c b/src/gallium/state_trackers/nine/buffer9.c
new file mode 100644
index 00000000000..b4b91ec2a02
--- /dev/null
+++ b/src/gallium/state_trackers/nine/buffer9.c
@@ -0,0 +1,189 @@
+/*
+ * Copyright 2011 Joakim Sindholt <[email protected]>
+ * Copyright 2015 Patrick Rudolph <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE. */
+
+#include "buffer9.h"
+#include "device9.h"
+#include "nine_helpers.h"
+#include "nine_pipe.h"
+
+#include "pipe/p_screen.h"
+#include "pipe/p_context.h"
+#include "pipe/p_state.h"
+#include "pipe/p_defines.h"
+#include "pipe/p_format.h"
+#include "util/u_box.h"
+
+#define DBG_CHANNEL (DBG_INDEXBUFFER|DBG_VERTEXBUFFER)
+
+HRESULT
+NineBuffer9_ctor( struct NineBuffer9 *This,
+ struct NineUnknownParams *pParams,
+ D3DRESOURCETYPE Type,
+ DWORD Usage,
+ UINT Size,
+ D3DPOOL Pool )
+{
+ struct pipe_resource *info = &This->base.info;
+ HRESULT hr;
+
+ DBG("This=%p Size=0x%x Usage=%x Pool=%u\n", This, Size, Usage, Pool);
+
+ user_assert(Pool != D3DPOOL_SCRATCH, D3DERR_INVALIDCALL);
+
+ This->maps = MALLOC(sizeof(struct pipe_transfer *));
+ if (!This->maps)
+ return E_OUTOFMEMORY;
+ This->nmaps = 0;
+ This->maxmaps = 1;
+ This->size = Size;
+
+ This->pipe = pParams->device->pipe;
+
+ info->screen = pParams->device->screen;
+ info->target = PIPE_BUFFER;
+ info->format = PIPE_FORMAT_R8_UNORM;
+ info->width0 = Size;
+ info->flags = 0;
+
+ info->bind = PIPE_BIND_VERTEX_BUFFER | PIPE_BIND_TRANSFER_WRITE;
+ if (!(Usage & D3DUSAGE_WRITEONLY))
+ info->bind |= PIPE_BIND_TRANSFER_READ;
+
+ info->usage = PIPE_USAGE_DEFAULT;
+ if (Usage & D3DUSAGE_DYNAMIC)
+ info->usage = PIPE_USAGE_STREAM;
+ else if (Pool == D3DPOOL_SYSTEMMEM)
+ info->usage = PIPE_USAGE_STAGING;
+
+ /* if (pDesc->Usage & D3DUSAGE_DONOTCLIP) { } */
+ /* if (pDesc->Usage & D3DUSAGE_NONSECURE) { } */
+ /* if (pDesc->Usage & D3DUSAGE_NPATCHES) { } */
+ /* if (pDesc->Usage & D3DUSAGE_POINTS) { } */
+ /* if (pDesc->Usage & D3DUSAGE_RTPATCHES) { } */
+ if (Usage & D3DUSAGE_SOFTWAREPROCESSING)
+ DBG("Application asked for Software Vertex Processing, "
+ "but this is unimplemented\n");
+ /* if (pDesc->Usage & D3DUSAGE_TEXTAPI) { } */
+
+ info->height0 = 1;
+ info->depth0 = 1;
+ info->array_size = 1;
+ info->last_level = 0;
+ info->nr_samples = 0;
+
+ hr = NineResource9_ctor(&This->base, pParams, NULL, TRUE,
+ Type, Pool, Usage);
+ return hr;
+}
+
+void
+NineBuffer9_dtor( struct NineBuffer9 *This )
+{
+ if (This->maps) {
+ while (This->nmaps) {
+ NineBuffer9_Unlock(This);
+ }
+ FREE(This->maps);
+ }
+
+ NineResource9_dtor(&This->base);
+}
+
+struct pipe_resource *
+NineBuffer9_GetResource( struct NineBuffer9 *This )
+{
+ return NineResource9_GetResource(&This->base);
+}
+
+HRESULT WINAPI
+NineBuffer9_Lock( struct NineBuffer9 *This,
+ UINT OffsetToLock,
+ UINT SizeToLock,
+ void **ppbData,
+ DWORD Flags )
+{
+ struct pipe_box box;
+ void *data;
+ unsigned usage = d3dlock_buffer_to_pipe_transfer_usage(Flags);
+
+ DBG("This=%p(pipe=%p) OffsetToLock=0x%x, SizeToLock=0x%x, Flags=0x%x\n",
+ This, This->base.resource,
+ OffsetToLock, SizeToLock, Flags);
+
+ user_assert(ppbData, E_POINTER);
+ user_assert(!(Flags & ~(D3DLOCK_DISCARD |
+ D3DLOCK_DONOTWAIT |
+ D3DLOCK_NO_DIRTY_UPDATE |
+ D3DLOCK_NOSYSLOCK |
+ D3DLOCK_READONLY |
+ D3DLOCK_NOOVERWRITE)), D3DERR_INVALIDCALL);
+
+ if (This->nmaps == This->maxmaps) {
+ struct pipe_transfer **newmaps =
+ REALLOC(This->maps, sizeof(struct pipe_transfer *)*This->maxmaps,
+ sizeof(struct pipe_transfer *)*(This->maxmaps << 1));
+ if (newmaps == NULL)
+ return E_OUTOFMEMORY;
+
+ This->maxmaps <<= 1;
+ This->maps = newmaps;
+ }
+
+ if (SizeToLock == 0) {
+ SizeToLock = This->size - OffsetToLock;
+ user_warn(OffsetToLock != 0);
+ }
+
+ u_box_1d(OffsetToLock, SizeToLock, &box);
+
+ data = This->pipe->transfer_map(This->pipe, This->base.resource, 0,
+ usage, &box, &This->maps[This->nmaps]);
+
+ if (!data) {
+ DBG("pipe::transfer_map failed\n"
+ " usage = %x\n"
+ " box.x = %u\n"
+ " box.width = %u\n",
+ usage, box.x, box.width);
+ /* not sure what to return, msdn suggests this */
+ if (Flags & D3DLOCK_DONOTWAIT)
+ return D3DERR_WASSTILLDRAWING;
+ return D3DERR_INVALIDCALL;
+ }
+
+ DBG("returning pointer %p\n", data);
+ This->nmaps++;
+ *ppbData = data;
+
+ return D3D_OK;
+}
+
+HRESULT WINAPI
+NineBuffer9_Unlock( struct NineBuffer9 *This )
+{
+ DBG("This=%p\n", This);
+
+ user_assert(This->nmaps > 0, D3DERR_INVALIDCALL);
+ This->pipe->transfer_unmap(This->pipe, This->maps[--(This->nmaps)]);
+ return D3D_OK;
+}
diff --git a/src/gallium/state_trackers/nine/buffer9.h b/src/gallium/state_trackers/nine/buffer9.h
new file mode 100644
index 00000000000..1afd9a996ea
--- /dev/null
+++ b/src/gallium/state_trackers/nine/buffer9.h
@@ -0,0 +1,73 @@
+/*
+ * Copyright 2011 Joakim Sindholt <[email protected]>
+ * Copyright 2015 Patrick Rudolph <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE. */
+
+#ifndef _NINE_BUFFER9_H_
+#define _NINE_BUFFER9_H_
+
+#include "resource9.h"
+
+struct pipe_screen;
+struct pipe_context;
+struct pipe_transfer;
+
+struct NineBuffer9
+{
+ struct NineResource9 base;
+
+ /* G3D */
+ struct pipe_context *pipe;
+ struct pipe_transfer **maps;
+ int nmaps, maxmaps;
+ UINT size;
+};
+static inline struct NineBuffer9 *
+NineBuffer9( void *data )
+{
+ return (struct NineBuffer9 *)data;
+}
+
+HRESULT
+NineBuffer9_ctor( struct NineBuffer9 *This,
+ struct NineUnknownParams *pParams,
+ D3DRESOURCETYPE Type,
+ DWORD Usage,
+ UINT Size,
+ D3DPOOL Pool );
+
+void
+NineBuffer9_dtor( struct NineBuffer9 *This );
+
+struct pipe_resource *
+NineBuffer9_GetResource( struct NineBuffer9 *This );
+
+HRESULT WINAPI
+NineBuffer9_Lock( struct NineBuffer9 *This,
+ UINT OffsetToLock,
+ UINT SizeToLock,
+ void **ppbData,
+ DWORD Flags );
+
+HRESULT WINAPI
+NineBuffer9_Unlock( struct NineBuffer9 *This );
+
+#endif /* _NINE_BUFFER9_H_ */
diff --git a/src/gallium/state_trackers/nine/cubetexture9.c b/src/gallium/state_trackers/nine/cubetexture9.c
index abba2637946..460cc853942 100644
--- a/src/gallium/state_trackers/nine/cubetexture9.c
+++ b/src/gallium/state_trackers/nine/cubetexture9.c
@@ -181,7 +181,7 @@ NineCubeTexture9_dtor( struct NineCubeTexture9 *This )
}
if (This->managed_buffer)
- FREE(This->managed_buffer);
+ align_free(This->managed_buffer);
NineBaseTexture9_dtor(&This->base);
}
diff --git a/src/gallium/state_trackers/nine/device9.c b/src/gallium/state_trackers/nine/device9.c
index 0be83658928..475ef96788e 100644
--- a/src/gallium/state_trackers/nine/device9.c
+++ b/src/gallium/state_trackers/nine/device9.c
@@ -38,6 +38,7 @@
#include "nine_pipe.h"
#include "nine_ff.h"
#include "nine_dump.h"
+#include "nine_limits.h"
#include "pipe/p_screen.h"
#include "pipe/p_context.h"
@@ -81,7 +82,7 @@ static void nine_setup_fpu(void)
#endif
-static void
+void
NineDevice9_SetDefaultState( struct NineDevice9 *This, boolean is_reset )
{
struct NineSurface9 *refSurf = NULL;
@@ -112,8 +113,10 @@ NineDevice9_SetDefaultState( struct NineDevice9 *This, boolean is_reset )
This->state.scissor.maxy = refSurf->desc.Height;
}
- if (This->nswapchains && This->swapchains[0]->params.EnableAutoDepthStencil)
+ if (This->nswapchains && This->swapchains[0]->params.EnableAutoDepthStencil) {
This->state.rs[D3DRS_ZENABLE] = TRUE;
+ This->state.rs_advertised[D3DRS_ZENABLE] = TRUE;
+ }
if (This->state.rs[D3DRS_ZENABLE])
NineDevice9_SetDepthStencilSurface(
This, (IDirect3DSurface9 *)This->swapchains[0]->zsbuf);
@@ -131,7 +134,8 @@ NineDevice9_ctor( struct NineDevice9 *This,
ID3DPresentGroup *pPresentationGroup,
struct d3dadapter9_context *pCTX,
boolean ex,
- D3DDISPLAYMODEEX *pFullscreenDisplayMode )
+ D3DDISPLAYMODEEX *pFullscreenDisplayMode,
+ int minorVersionNum )
{
unsigned i;
HRESULT hr = NineUnknown_ctor(&This->base, pParams);
@@ -152,6 +156,8 @@ NineDevice9_ctor( struct NineDevice9 *This,
This->params = *pCreationParameters;
This->ex = ex;
This->present = pPresentationGroup;
+ This->minor_version_num = minorVersionNum;
+
IDirect3D9_AddRef(This->d3d9);
ID3DPresentGroup_AddRef(This->present);
@@ -172,6 +178,19 @@ NineDevice9_ctor( struct NineDevice9 *This,
/* Create first, it messes up our state. */
This->hud = hud_create(This->pipe, This->cso); /* NULL result is fine */
+ /* Available memory counter. Updated only for allocations with this device
+ * instance. This is the Win 7 behavior.
+ * Win XP shares this counter across multiple devices. */
+ This->available_texture_mem = This->screen->get_param(This->screen, PIPE_CAP_VIDEO_MEMORY);
+ if (This->available_texture_mem < 4096)
+ This->available_texture_mem <<= 20;
+ else
+ This->available_texture_mem = UINT_MAX;
+ /* We cap texture memory usage to 80% of what is reported free initially
+ * This helps get closer Win behaviour. For example VertexBuffer allocation
+ * still succeeds when texture allocation fails. */
+ This->available_texture_limit = This->available_texture_mem * 20LL / 100LL;
+
/* create implicit swapchains */
This->nswapchains = ID3DPresentGroup_GetMultiheadCount(This->present);
This->swapchains = CALLOC(This->nswapchains,
@@ -460,7 +479,8 @@ NineDevice9_dtor( struct NineDevice9 *This )
if (This->swapchains) {
for (i = 0; i < This->nswapchains; ++i)
- NineUnknown_Unbind(NineUnknown(This->swapchains[i]));
+ if (This->swapchains[i])
+ NineUnknown_Unbind(NineUnknown(This->swapchains[i]));
FREE(This->swapchains);
}
@@ -523,17 +543,20 @@ NineDevice9_ResumeRecording( struct NineDevice9 *This )
HRESULT WINAPI
NineDevice9_TestCooperativeLevel( struct NineDevice9 *This )
{
- return D3D_OK; /* TODO */
+ if (NineSwapChain9_GetOccluded(This->swapchains[0])) {
+ This->device_needs_reset = TRUE;
+ return D3DERR_DEVICELOST;
+ } else if (This->device_needs_reset) {
+ return D3DERR_DEVICENOTRESET;
+ }
+
+ return D3D_OK;
}
UINT WINAPI
NineDevice9_GetAvailableTextureMem( struct NineDevice9 *This )
{
- const unsigned mem = This->screen->get_param(This->screen, PIPE_CAP_VIDEO_MEMORY);
- if (mem < 4096)
- return mem << 20;
- else
- return UINT_MAX;
+ return This->available_texture_mem;
}
HRESULT WINAPI
@@ -606,6 +629,7 @@ NineDevice9_SetCursorProperties( struct NineDevice9 *This,
"pCursorBitmap=%p\n", This, XHotSpot, YHotSpot, pCursorBitmap);
user_assert(pCursorBitmap, D3DERR_INVALIDCALL);
+ user_assert(surf->desc.Format == D3DFMT_A8R8G8B8, D3DERR_INVALIDCALL);
if (This->swapchains[0]->params.Windowed) {
This->cursor.w = MIN2(surf->desc.Width, 32);
@@ -709,6 +733,11 @@ NineDevice9_CreateAdditionalSwapChain( struct NineDevice9 *This,
This, pPresentationParameters, pSwapChain);
user_assert(pPresentationParameters, D3DERR_INVALIDCALL);
+ user_assert(tmplt->params.Windowed && pPresentationParameters->Windowed, D3DERR_INVALIDCALL);
+
+ /* TODO: this deserves more tests */
+ if (!pPresentationParameters->hDeviceWindow)
+ pPresentationParameters->hDeviceWindow = This->params.hFocusWindow;
hr = ID3DPresentGroup_CreateAdditionalPresent(This->present, pPresentationParameters, &present);
@@ -757,11 +786,16 @@ NineDevice9_Reset( struct NineDevice9 *This,
DBG("This=%p pPresentationParameters=%p\n", This, pPresentationParameters);
+ if (NineSwapChain9_GetOccluded(This->swapchains[0])) {
+ This->device_needs_reset = TRUE;
+ return D3DERR_DEVICELOST;
+ }
+
for (i = 0; i < This->nswapchains; ++i) {
D3DPRESENT_PARAMETERS *params = &pPresentationParameters[i];
hr = NineSwapChain9_Resize(This->swapchains[i], params, NULL);
if (hr != D3D_OK)
- return hr;
+ break;
}
nine_pipe_context_clear(This);
@@ -772,6 +806,7 @@ NineDevice9_Reset( struct NineDevice9 *This,
This, 0, (IDirect3DSurface9 *)This->swapchains[0]->buffers[0]);
/* XXX: better use GetBackBuffer here ? */
+ This->device_needs_reset = (hr != D3D_OK);
return hr;
}
@@ -806,6 +841,8 @@ NineDevice9_GetBackBuffer( struct NineDevice9 *This,
IDirect3DSurface9 **ppBackBuffer )
{
user_assert(ppBackBuffer != NULL, D3DERR_INVALIDCALL);
+ /* return NULL on error */
+ *ppBackBuffer = NULL;
user_assert(iSwapChain < This->nswapchains, D3DERR_INVALIDCALL);
return NineSwapChain9_GetBackBuffer(This->swapchains[iSwapChain],
@@ -1455,7 +1492,7 @@ NineDevice9_StretchRect( struct NineDevice9 *This,
struct NineSurface9 *src = NineSurface9(pSourceSurface);
struct pipe_resource *dst_res = NineSurface9_GetResource(dst);
struct pipe_resource *src_res = NineSurface9_GetResource(src);
- const boolean zs = util_format_is_depth_or_stencil(dst_res->format);
+ boolean zs;
struct pipe_blit_info blit;
boolean scaled, clamped, ms, flip_x = FALSE, flip_y = FALSE;
@@ -1470,6 +1507,9 @@ NineDevice9_StretchRect( struct NineDevice9 *This,
DBG("pDestRect=(%u,%u)-(%u,%u)\n", pDestRect->left, pDestRect->top,
pDestRect->right, pDestRect->bottom);
+ user_assert(dst->base.pool == D3DPOOL_DEFAULT &&
+ src->base.pool == D3DPOOL_DEFAULT, D3DERR_INVALIDCALL);
+ zs = util_format_is_depth_or_stencil(dst_res->format);
user_assert(!zs || !This->in_scene, D3DERR_INVALIDCALL);
user_assert(!zs || !pSourceRect ||
(pSourceRect->left == 0 &&
@@ -1493,8 +1533,6 @@ NineDevice9_StretchRect( struct NineDevice9 *This,
src_res->nr_samples,
PIPE_BIND_SAMPLER_VIEW),
D3DERR_INVALIDCALL);
- user_assert(dst->base.pool == D3DPOOL_DEFAULT &&
- src->base.pool == D3DPOOL_DEFAULT, D3DERR_INVALIDCALL);
/* We might want to permit these, but wine thinks we shouldn't. */
user_assert(!pDestRect ||
@@ -1668,6 +1706,8 @@ NineDevice9_ColorFill( struct NineDevice9 *This,
user_assert((surf->base.usage & D3DUSAGE_RENDERTARGET) ||
NineSurface9_IsOffscreenPlain(surf), D3DERR_INVALIDCALL);
+ user_assert(surf->desc.Format != D3DFMT_NULL, D3D_OK);
+
if (pRect) {
x = pRect->left;
y = pRect->top;
@@ -1884,15 +1924,18 @@ NineDevice9_Clear( struct NineDevice9 *This,
Count = 0;
#endif
+ nine_update_state_framebuffer_clear(This);
+
if (Flags & D3DCLEAR_TARGET) bufs |= PIPE_CLEAR_COLOR;
- if (Flags & D3DCLEAR_ZBUFFER) bufs |= PIPE_CLEAR_DEPTH;
- if (Flags & D3DCLEAR_STENCIL) bufs |= PIPE_CLEAR_STENCIL;
+ /* Ignore Z buffer if not bound */
+ if (This->state.fb.zsbuf != NULL) {
+ if (Flags & D3DCLEAR_ZBUFFER) bufs |= PIPE_CLEAR_DEPTH;
+ if (Flags & D3DCLEAR_STENCIL) bufs |= PIPE_CLEAR_STENCIL;
+ }
if (!bufs)
return D3D_OK;
d3dcolor_to_pipe_color_union(&rgba, Color);
- nine_update_state_framebuffer(This);
-
rect.x1 = This->state.viewport.X;
rect.y1 = This->state.viewport.Y;
rect.x2 = This->state.viewport.Width + rect.x1;
@@ -1935,7 +1978,6 @@ NineDevice9_Clear( struct NineDevice9 *This,
/* Case we clear depth buffer (and eventually rt too).
* depth buffer size is always >= rt size. Compare to clear region */
((bufs & (PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL)) &&
- This->state.fb.zsbuf != NULL &&
rect.x2 >= zsbuf_surf->desc.Width &&
rect.y2 >= zsbuf_surf->desc.Height))) {
DBG("Clear fast path\n");
@@ -2342,8 +2384,15 @@ NineDevice9_SetRenderState( struct NineDevice9 *This,
DBG("This=%p State=%u(%s) Value=%08x\n", This,
State, nine_d3drs_to_string(State), Value);
+ user_assert(State < D3DRS_COUNT, D3DERR_INVALIDCALL);
+
+ if (state->rs_advertised[State] == Value && likely(!This->is_recording))
+ return D3D_OK;
+
+ state->rs_advertised[State] = Value;
+
/* Amd hacks (equivalent to GL extensions) */
- if (State == D3DRS_POINTSIZE) {
+ if (unlikely(State == D3DRS_POINTSIZE)) {
if (Value == RESZ_CODE)
return NineDevice9_ResolveZ(This);
@@ -2356,20 +2405,17 @@ NineDevice9_SetRenderState( struct NineDevice9 *This,
}
/* NV hack */
- if (State == D3DRS_ADAPTIVETESS_Y &&
- (Value == D3DFMT_ATOC || (Value == D3DFMT_UNKNOWN && state->rs[NINED3DRS_ALPHACOVERAGE]))) {
+ if (unlikely(State == D3DRS_ADAPTIVETESS_Y)) {
+ if (Value == D3DFMT_ATOC || (Value == D3DFMT_UNKNOWN && state->rs[NINED3DRS_ALPHACOVERAGE])) {
state->rs[NINED3DRS_ALPHACOVERAGE] = (Value == D3DFMT_ATOC);
state->changed.group |= NINE_STATE_BLEND;
return D3D_OK;
+ }
}
- user_assert(State < Elements(state->rs), D3DERR_INVALIDCALL);
-
- if (likely(state->rs[State] != Value) || unlikely(This->is_recording)) {
- state->rs[State] = Value;
- state->changed.rs[State / 32] |= 1 << (State % 32);
- state->changed.group |= nine_render_state_group[State];
- }
+ state->rs[State] = nine_fix_render_state_value(State, Value);
+ state->changed.rs[State / 32] |= 1 << (State % 32);
+ state->changed.group |= nine_render_state_group[State];
return D3D_OK;
}
@@ -2379,9 +2425,9 @@ NineDevice9_GetRenderState( struct NineDevice9 *This,
D3DRENDERSTATETYPE State,
DWORD *pValue )
{
- user_assert(State < Elements(This->state.rs), D3DERR_INVALIDCALL);
+ user_assert(State < D3DRS_COUNT, D3DERR_INVALIDCALL);
- *pValue = This->state.rs[State];
+ *pValue = This->state.rs_advertised[State];
return D3D_OK;
}
@@ -3122,7 +3168,7 @@ NineDevice9_ProcessVertices( struct NineDevice9 *This,
buffer_offset = 0;
} else {
/* SO matches vertex declaration */
- resource = dst->base.resource;
+ resource = NineVertexBuffer9_GetResource(dst);
buffer_offset = DestIndex * vs->so->stride[0];
}
target = This->pipe->create_stream_output_target(This->pipe, resource,
@@ -3184,13 +3230,21 @@ NineDevice9_SetVertexDeclaration( struct NineDevice9 *This,
IDirect3DVertexDeclaration9 *pDecl )
{
struct nine_state *state = This->update;
+ BOOL was_programmable_vs = This->state.programmable_vs;
DBG("This=%p pDecl=%p\n", This, pDecl);
if (likely(!This->is_recording) && state->vdecl == NineVertexDeclaration9(pDecl))
return D3D_OK;
+
nine_bind(&state->vdecl, pDecl);
+ This->state.programmable_vs = This->state.vs && !(This->state.vdecl && This->state.vdecl->position_t);
+ if (likely(!This->is_recording) && was_programmable_vs != This->state.programmable_vs) {
+ state->commit |= NINE_STATE_COMMIT_CONST_VS;
+ state->changed.group |= NINE_STATE_VS;
+ }
+
state->changed.group |= NINE_STATE_VDECL;
return D3D_OK;
@@ -3262,18 +3316,21 @@ NineDevice9_SetVertexShader( struct NineDevice9 *This,
IDirect3DVertexShader9 *pShader )
{
struct nine_state *state = This->update;
+ BOOL was_programmable_vs = This->state.programmable_vs;
DBG("This=%p pShader=%p\n", This, pShader);
if (!This->is_recording && state->vs == (struct NineVertexShader9*)pShader)
return D3D_OK;
+ nine_bind(&state->vs, pShader);
+
+ This->state.programmable_vs = This->state.vs && !(This->state.vdecl && This->state.vdecl->position_t);
+
/* ff -> non-ff: commit back non-ff constants */
- if (!state->vs && pShader)
+ if (!was_programmable_vs && This->state.programmable_vs)
state->commit |= NINE_STATE_COMMIT_CONST_VS;
- nine_bind(&state->vs, pShader);
-
state->changed.group |= NINE_STATE_VS;
return D3D_OK;
@@ -3499,7 +3556,8 @@ NineDevice9_SetStreamSource( struct NineDevice9 *This,
state->vtxbuf[i].stride = Stride;
state->vtxbuf[i].buffer_offset = OffsetInBytes;
}
- state->vtxbuf[i].buffer = pStreamData ? pVBuf9->base.resource : NULL;
+ pipe_resource_reference(&state->vtxbuf[i].buffer,
+ pStreamData ? NineVertexBuffer9_GetResource(pVBuf9) : NULL);
return D3D_OK;
}
@@ -3542,6 +3600,9 @@ NineDevice9_SetStreamSourceFreq( struct NineDevice9 *This,
(Setting & D3DSTREAMSOURCE_INDEXEDDATA)), D3DERR_INVALIDCALL);
user_assert(Setting, D3DERR_INVALIDCALL);
+ if (likely(!This->is_recording) && state->stream_freq[StreamNumber] == Setting)
+ return D3D_OK;
+
state->stream_freq[StreamNumber] = Setting;
if (Setting & D3DSTREAMSOURCE_INSTANCEDATA)
@@ -3549,7 +3610,9 @@ NineDevice9_SetStreamSourceFreq( struct NineDevice9 *This,
else
state->stream_instancedata_mask &= ~(1 << StreamNumber);
- state->changed.stream_freq |= 1 << StreamNumber;
+ state->changed.stream_freq |= 1 << StreamNumber; /* Used for stateblocks */
+ if (StreamNumber != 0)
+ state->changed.group |= NINE_STATE_STREAMFREQ;
return D3D_OK;
}
@@ -4013,7 +4076,8 @@ NineDevice9_new( struct pipe_screen *pScreen,
struct d3dadapter9_context *pCTX,
boolean ex,
D3DDISPLAYMODEEX *pFullscreenDisplayMode,
- struct NineDevice9 **ppOut )
+ struct NineDevice9 **ppOut,
+ int minorVersionNum )
{
BOOL lock;
lock = !!(pCreationParameters->BehaviorFlags & D3DCREATE_MULTITHREADED);
@@ -4021,5 +4085,5 @@ NineDevice9_new( struct pipe_screen *pScreen,
NINE_NEW(Device9, ppOut, lock, /* args */
pScreen, pCreationParameters, pCaps,
pPresentationParameters, pD3D9, pPresentationGroup, pCTX,
- ex, pFullscreenDisplayMode);
+ ex, pFullscreenDisplayMode, minorVersionNum );
}
diff --git a/src/gallium/state_trackers/nine/device9.h b/src/gallium/state_trackers/nine/device9.h
index cbc1e61f5db..34edf0cfa48 100644
--- a/src/gallium/state_trackers/nine/device9.h
+++ b/src/gallium/state_trackers/nine/device9.h
@@ -137,6 +137,10 @@ struct NineDevice9
/* dummy vbo (containing 0 0 0 0) to bind if vertex shader input
* is not bound to anything by the vertex declaration */
struct pipe_resource *dummy_vbo;
+ BOOL device_needs_reset;
+ int minor_version_num;
+ long long available_texture_mem;
+ long long available_texture_limit;
};
static inline struct NineDevice9 *
NineDevice9( void *data )
@@ -154,7 +158,8 @@ NineDevice9_new( struct pipe_screen *pScreen,
struct d3dadapter9_context *pCTX,
boolean ex,
D3DDISPLAYMODEEX *pFullscreenDisplayMode,
- struct NineDevice9 **ppOut );
+ struct NineDevice9 **ppOut,
+ int minorVersionNum );
HRESULT
NineDevice9_ctor( struct NineDevice9 *This,
@@ -167,12 +172,15 @@ NineDevice9_ctor( struct NineDevice9 *This,
ID3DPresentGroup *pPresentationGroup,
struct d3dadapter9_context *pCTX,
boolean ex,
- D3DDISPLAYMODEEX *pFullscreenDisplayMode );
+ D3DDISPLAYMODEEX *pFullscreenDisplayMode,
+ int minorVersionNum );
void
NineDevice9_dtor( struct NineDevice9 *This );
/*** Nine private ***/
+void
+NineDevice9_SetDefaultState( struct NineDevice9 *This, boolean is_reset );
struct pipe_screen *
NineDevice9_GetScreen( struct NineDevice9 *This );
diff --git a/src/gallium/state_trackers/nine/device9ex.c b/src/gallium/state_trackers/nine/device9ex.c
index fe8aa9b2704..11244b1bedf 100644
--- a/src/gallium/state_trackers/nine/device9ex.c
+++ b/src/gallium/state_trackers/nine/device9ex.c
@@ -20,7 +20,9 @@
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
* USE OR OTHER DEALINGS IN THE SOFTWARE. */
+#include "device9.h"
#include "device9ex.h"
+#include "nine_pipe.h"
#include "swapchain9ex.h"
#include "nine_helpers.h"
@@ -37,7 +39,8 @@ NineDevice9Ex_ctor( struct NineDevice9Ex *This,
D3DDISPLAYMODEEX *pFullscreenDisplayMode,
IDirect3D9Ex *pD3D9Ex,
ID3DPresentGroup *pPresentationGroup,
- struct d3dadapter9_context *pCTX )
+ struct d3dadapter9_context *pCTX,
+ int minorVersionNum )
{
DBG("This=%p pParams=%p pScreen=%p pCreationParameters=%p pCaps=%p "
"pPresentationParameters=%p pFullscreenDisplayMode=%p "
@@ -50,7 +53,7 @@ NineDevice9Ex_ctor( struct NineDevice9Ex *This,
pScreen, pCreationParameters, pCaps,
pPresentationParameters,
(IDirect3D9 *)pD3D9Ex, pPresentationGroup, pCTX,
- TRUE, pFullscreenDisplayMode);
+ TRUE, pFullscreenDisplayMode, minorVersionNum);
}
static void
@@ -158,6 +161,14 @@ NineDevice9Ex_CheckDeviceState( struct NineDevice9Ex *This,
DBG("This=%p hDestinationWindow=%p\n",
This, hDestinationWindow);
+ user_assert(!This->base.swapchains[0]->params.Windowed, D3D_OK);
+
+ if (This->base.params.hFocusWindow == hDestinationWindow) {
+ if (NineSwapChain9_GetOccluded(This->base.swapchains[0]))
+ return S_PRESENT_OCCLUDED;
+ } else if(!NineSwapChain9_GetOccluded(This->base.swapchains[0])) {
+ return S_PRESENT_OCCLUDED;
+ }
/* TODO: handle the other return values */
return D3D_OK;
}
@@ -221,12 +232,37 @@ NineDevice9Ex_ResetEx( struct NineDevice9Ex *This,
if (pFullscreenDisplayMode) mode = &(pFullscreenDisplayMode[i]);
hr = NineSwapChain9_Resize(This->base.swapchains[i], params, mode);
if (FAILED(hr))
- return (hr == D3DERR_OUTOFVIDEOMEMORY) ? hr : D3DERR_DEVICELOST;
+ break;
}
NineDevice9_SetRenderTarget(
(struct NineDevice9 *)This, 0, (IDirect3DSurface9 *)This->base.swapchains[0]->buffers[0]);
+ return hr;
+}
+
+HRESULT WINAPI
+NineDevice9Ex_Reset( struct NineDevice9Ex *This,
+ D3DPRESENT_PARAMETERS *pPresentationParameters )
+{
+ HRESULT hr = D3D_OK;
+ unsigned i;
+
+ DBG("This=%p pPresentationParameters=%p\n", This, pPresentationParameters);
+
+ for (i = 0; i < This->base.nswapchains; ++i) {
+ D3DPRESENT_PARAMETERS *params = &pPresentationParameters[i];
+ hr = NineSwapChain9_Resize(This->base.swapchains[i], params, NULL);
+ if (FAILED(hr))
+ break;
+ }
+
+ nine_pipe_context_clear((struct NineDevice9 *)This);
+ nine_state_clear(&This->base.state, TRUE);
+
+ NineDevice9_SetDefaultState((struct NineDevice9 *)This, TRUE);
+ NineDevice9_SetRenderTarget(
+ (struct NineDevice9 *)This, 0, (IDirect3DSurface9 *)This->base.swapchains[0]->buffers[0]);
return hr;
}
@@ -248,11 +284,18 @@ NineDevice9Ex_GetDisplayModeEx( struct NineDevice9Ex *This,
return NineSwapChain9Ex_GetDisplayModeEx(swapchain, pMode, pRotation);
}
+HRESULT WINAPI
+NineDevice9Ex_TestCooperativeLevel( struct NineDevice9Ex *This )
+{
+ return D3D_OK;
+}
+
+
IDirect3DDevice9ExVtbl NineDevice9Ex_vtable = {
(void *)NineUnknown_QueryInterface,
(void *)NineUnknown_AddRef,
(void *)NineUnknown_Release,
- (void *)NineDevice9_TestCooperativeLevel,
+ (void *)NineDevice9Ex_TestCooperativeLevel,
(void *)NineDevice9_GetAvailableTextureMem,
(void *)NineDevice9_EvictManagedResources,
(void *)NineDevice9_GetDirect3D,
@@ -265,7 +308,7 @@ IDirect3DDevice9ExVtbl NineDevice9Ex_vtable = {
(void *)NineDevice9_CreateAdditionalSwapChain,
(void *)NineDevice9_GetSwapChain,
(void *)NineDevice9_GetNumberOfSwapChains,
- (void *)NineDevice9_Reset,
+ (void *)NineDevice9Ex_Reset,
(void *)NineDevice9_Present,
(void *)NineDevice9_GetBackBuffer,
(void *)NineDevice9_GetRasterStatus,
@@ -401,13 +444,14 @@ NineDevice9Ex_new( struct pipe_screen *pScreen,
IDirect3D9Ex *pD3D9Ex,
ID3DPresentGroup *pPresentationGroup,
struct d3dadapter9_context *pCTX,
- struct NineDevice9Ex **ppOut )
+ struct NineDevice9Ex **ppOut,
+ int minorVersionNum )
{
BOOL lock;
lock = !!(pCreationParameters->BehaviorFlags & D3DCREATE_MULTITHREADED);
NINE_NEW(Device9Ex, ppOut, lock,
pScreen, pCreationParameters, pCaps, pPresentationParameters,
- pFullscreenDisplayMode, pD3D9Ex, pPresentationGroup, pCTX);
+ pFullscreenDisplayMode, pD3D9Ex, pPresentationGroup, pCTX, minorVersionNum );
}
diff --git a/src/gallium/state_trackers/nine/device9ex.h b/src/gallium/state_trackers/nine/device9ex.h
index 8375622d8a1..1c7e57e0974 100644
--- a/src/gallium/state_trackers/nine/device9ex.h
+++ b/src/gallium/state_trackers/nine/device9ex.h
@@ -44,7 +44,8 @@ NineDevice9Ex_new( struct pipe_screen *pScreen,
IDirect3D9Ex *pD3D9Ex,
ID3DPresentGroup *pPresentationGroup,
struct d3dadapter9_context *pCTX,
- struct NineDevice9Ex **ppOut );
+ struct NineDevice9Ex **ppOut,
+ int minorVersionNum );
HRESULT WINAPI
NineDevice9Ex_SetConvolutionMonoKernel( struct NineDevice9Ex *This,
@@ -73,6 +74,13 @@ NineDevice9Ex_PresentEx( struct NineDevice9Ex *This,
DWORD dwFlags );
HRESULT WINAPI
+NineDevice9Ex_Present( struct NineDevice9Ex *This,
+ const RECT *pSourceRect,
+ const RECT *pDestRect,
+ HWND hDestWindowOverride,
+ const RGNDATA *pDirtyRegion );
+
+HRESULT WINAPI
NineDevice9Ex_GetGPUThreadPriority( struct NineDevice9Ex *This,
INT *pPriority );
@@ -141,9 +149,16 @@ NineDevice9Ex_ResetEx( struct NineDevice9Ex *This,
D3DDISPLAYMODEEX *pFullscreenDisplayMode );
HRESULT WINAPI
+NineDevice9Ex_Reset( struct NineDevice9Ex *This,
+ D3DPRESENT_PARAMETERS *pPresentationParameters );
+
+HRESULT WINAPI
NineDevice9Ex_GetDisplayModeEx( struct NineDevice9Ex *This,
UINT iSwapChain,
D3DDISPLAYMODEEX *pMode,
D3DDISPLAYROTATION *pRotation );
+HRESULT WINAPI
+NineDevice9Ex_TestCooperativeLevel( struct NineDevice9Ex *This );
+
#endif /* _NINE_DEVICE9EX_H_ */
diff --git a/src/gallium/state_trackers/nine/guid.c b/src/gallium/state_trackers/nine/guid.c
index 5034feb4d71..5e63d2f6629 100644
--- a/src/gallium/state_trackers/nine/guid.c
+++ b/src/gallium/state_trackers/nine/guid.c
@@ -20,6 +20,7 @@
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
* USE OR OTHER DEALINGS IN THE SOFTWARE. */
+#include <stdio.h>
#include "guid.h"
const GUID IID_IUnknown = { 0x00000000, 0x0000, 0x0000, { 0xC0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x46 } };
@@ -64,3 +65,20 @@ GUID_equal( const GUID *a,
}
return TRUE;
}
+
+char* GUID_sprintf(char *guid_str, REFGUID id) {
+ sprintf( guid_str,
+ "{%08X,%04X,%04X,%02X%02X%02X%02X%02X%02X%02X%02X}",
+ id->Data1,
+ id->Data2,
+ id->Data3,
+ id->Data4[0],
+ id->Data4[1],
+ id->Data4[2],
+ id->Data4[3],
+ id->Data4[4],
+ id->Data4[5],
+ id->Data4[6],
+ id->Data4[7]);
+ return guid_str;
+}
diff --git a/src/gallium/state_trackers/nine/guid.h b/src/gallium/state_trackers/nine/guid.h
index 1f9ff009ad8..af8f081bfb5 100644
--- a/src/gallium/state_trackers/nine/guid.h
+++ b/src/gallium/state_trackers/nine/guid.h
@@ -33,4 +33,8 @@ boolean
GUID_equal( const GUID *a,
const GUID *b );
+char*
+GUID_sprintf( char *guid_str,
+ REFGUID id );
+
#endif /* _NINE_GUID_H_ */
diff --git a/src/gallium/state_trackers/nine/indexbuffer9.c b/src/gallium/state_trackers/nine/indexbuffer9.c
index 860313b7f7e..401fe75e95f 100644
--- a/src/gallium/state_trackers/nine/indexbuffer9.c
+++ b/src/gallium/state_trackers/nine/indexbuffer9.c
@@ -40,52 +40,17 @@ NineIndexBuffer9_ctor( struct NineIndexBuffer9 *This,
struct NineUnknownParams *pParams,
D3DINDEXBUFFER_DESC *pDesc )
{
- struct pipe_resource *info = &This->base.info;
HRESULT hr;
DBG("This=%p pParams=%p pDesc=%p Usage=%s\n",
This, pParams, pDesc, nine_D3DUSAGE_to_str(pDesc->Usage));
- This->pipe = pParams->device->pipe;
-
- info->screen = pParams->device->screen;
- info->target = PIPE_BUFFER;
- info->format = PIPE_FORMAT_R8_UNORM;
- info->width0 = pDesc->Size;
- info->flags = 0;
-
- info->bind = PIPE_BIND_INDEX_BUFFER | PIPE_BIND_TRANSFER_WRITE;
- if (!(pDesc->Usage & D3DUSAGE_WRITEONLY))
- info->bind |= PIPE_BIND_TRANSFER_READ;
-
- info->usage = PIPE_USAGE_DEFAULT;
- if (pDesc->Usage & D3DUSAGE_DYNAMIC)
- info->usage = PIPE_USAGE_STREAM;
- if (pDesc->Pool == D3DPOOL_SYSTEMMEM)
- info->usage = PIPE_USAGE_STAGING;
-
- /* if (pDesc->Usage & D3DUSAGE_DONOTCLIP) { } */
- /* if (pDesc->Usage & D3DUSAGE_NONSECURE) { } */
- /* if (pDesc->Usage & D3DUSAGE_NPATCHES) { } */
- /* if (pDesc->Usage & D3DUSAGE_POINTS) { } */
- /* if (pDesc->Usage & D3DUSAGE_RTPATCHES) { } */
- if (pDesc->Usage & D3DUSAGE_SOFTWAREPROCESSING)
- DBG("Application asked for Software Vertex Processing, "
- "but this is unimplemented\n");
-
- info->height0 = 1;
- info->depth0 = 1;
- info->array_size = 1;
- info->last_level = 0;
- info->nr_samples = 0;
-
- hr = NineResource9_ctor(&This->base, pParams, NULL, TRUE, D3DRTYPE_INDEXBUFFER,
- pDesc->Pool, pDesc->Usage);
+ hr = NineBuffer9_ctor(&This->base, pParams, D3DRTYPE_INDEXBUFFER,
+ pDesc->Usage, pDesc->Size, pDesc->Pool);
if (FAILED(hr))
return hr;
- This->buffer.buffer = This->base.resource;
+ This->buffer.buffer = NineIndexBuffer9_GetResource(This);
This->buffer.offset = 0;
- This->map_count = 0;
switch (pDesc->Format) {
case D3DFMT_INDEX16: This->buffer.index_size = 2; break;
@@ -105,9 +70,7 @@ NineIndexBuffer9_ctor( struct NineIndexBuffer9 *This,
void
NineIndexBuffer9_dtor( struct NineIndexBuffer9 *This )
{
- if (This->transfer) { NineIndexBuffer9_Unlock(This); }
-
- NineResource9_dtor(&This->base);
+ NineBuffer9_dtor(&This->base);
}
const struct pipe_index_buffer *
@@ -116,6 +79,12 @@ NineIndexBuffer9_GetBuffer( struct NineIndexBuffer9 *This )
return &This->buffer;
}
+struct pipe_resource *
+NineIndexBuffer9_GetResource( struct NineIndexBuffer9 *This )
+{
+ return NineBuffer9_GetResource(&This->base);
+}
+
HRESULT WINAPI
NineIndexBuffer9_Lock( struct NineIndexBuffer9 *This,
UINT OffsetToLock,
@@ -123,59 +92,13 @@ NineIndexBuffer9_Lock( struct NineIndexBuffer9 *This,
void **ppbData,
DWORD Flags )
{
- struct pipe_box box;
- void *data;
- UINT count;
- const unsigned usage = d3dlock_buffer_to_pipe_transfer_usage(Flags);
-
- DBG("This=%p OffsetToLock=%u SizeToLock=%u ppbData=%p Flags=%i "
- "transfer=%p map_count=%u\n", This, OffsetToLock,
- SizeToLock, ppbData, Flags, This->transfer, This->map_count);
-
- count = ++This->map_count;
-
- if (SizeToLock == 0) {
- SizeToLock = This->desc.Size - OffsetToLock;
- user_warn(OffsetToLock != 0);
- }
-
- u_box_1d(OffsetToLock, SizeToLock, &box);
-
- if (unlikely(count != 1)) {
- DBG("Lock has been called on already locked buffer."
- "Unmapping before mapping again.");
- This->pipe->transfer_unmap(This->pipe, This->transfer);
- }
- data = This->pipe->transfer_map(This->pipe, This->base.resource, 0,
- usage, &box, &This->transfer);
- if (!This->transfer) {
- DBG("pipe::transfer_map failed\n"
- " usage = %u\n"
- " box.x = %u\n"
- " box.width = %u\n",
- usage, box.x, box.width);
- }
- *ppbData = data;
- DBG("Returning memory at %p at address %p\n", *ppbData, ppbData);
-
- return D3D_OK;
+ return NineBuffer9_Lock(&This->base, OffsetToLock, SizeToLock, ppbData, Flags);
}
HRESULT WINAPI
NineIndexBuffer9_Unlock( struct NineIndexBuffer9 *This )
{
- DBG("This=%p\n", This);
- if (!This->map_count) {
- DBG("Unmap called without a previous map call.\n");
- return D3D_OK;
- }
- if (--This->map_count) {
- DBG("Ignoring unmap.\n");
- return D3D_OK;
- }
- This->pipe->transfer_unmap(This->pipe, This->transfer);
- This->transfer = NULL;
- return D3D_OK;
+ return NineBuffer9_Unlock(&This->base);
}
HRESULT WINAPI
diff --git a/src/gallium/state_trackers/nine/indexbuffer9.h b/src/gallium/state_trackers/nine/indexbuffer9.h
index f10578f47ba..f3274b71224 100644
--- a/src/gallium/state_trackers/nine/indexbuffer9.h
+++ b/src/gallium/state_trackers/nine/indexbuffer9.h
@@ -24,7 +24,7 @@
#define _NINE_INDEXBUFFER9_H_
#include "resource9.h"
-
+#include "buffer9.h"
#include "pipe/p_state.h"
struct pipe_screen;
@@ -35,13 +35,10 @@ struct NineDevice9;
struct NineIndexBuffer9
{
- struct NineResource9 base;
+ struct NineBuffer9 base;
/* g3d stuff */
- struct pipe_context *pipe;
struct pipe_index_buffer buffer;
- struct pipe_transfer *transfer;
- UINT map_count;
D3DINDEXBUFFER_DESC desc;
};
@@ -69,6 +66,8 @@ NineIndexBuffer9_dtor( struct NineIndexBuffer9 *This );
const struct pipe_index_buffer *
NineIndexBuffer9_GetBuffer( struct NineIndexBuffer9 *This );
+struct pipe_resource *
+NineIndexBuffer9_GetResource( struct NineIndexBuffer9 *This );
/*** Direct3D public ***/
HRESULT WINAPI
diff --git a/src/gallium/state_trackers/nine/nine_ff.c b/src/gallium/state_trackers/nine/nine_ff.c
index 0feaeab7330..a5466a7bdd4 100644
--- a/src/gallium/state_trackers/nine/nine_ff.c
+++ b/src/gallium/state_trackers/nine/nine_ff.c
@@ -58,7 +58,8 @@ struct nine_ff_vs_key
uint32_t color0in_one : 1;
uint32_t color1in_one : 1;
uint32_t fog : 1;
- uint32_t pad1 : 7;
+ uint32_t specular_enable : 1;
+ uint32_t pad1 : 6;
uint32_t tc_dim_input: 16; /* 8 * 2 bits */
uint32_t pad2 : 16;
uint32_t tc_dim_output: 24; /* 8 * 3 bits */
@@ -466,6 +467,10 @@ nine_ff_build_vs(struct NineDevice9 *device, struct vs_build_ctx *vs)
ureg_MAD(ureg, tmp, vs->aInd, ureg_imm1f(ureg, 4.0f), ureg_imm1f(ureg, 224.0f));
ureg_ARL(ureg, AR, ureg_src(tmp));
}
+
+ ureg_MOV(ureg, r[2], ureg_imm4f(ureg, 0.0f, 0.0f, 0.0f, 0.0f));
+ ureg_MOV(ureg, r[3], ureg_imm4f(ureg, 1.0f, 1.0f, 1.0f, 1.0f));
+
for (i = 0; i < key->vertexblend; ++i) {
for (c = 0; c < 4; ++c) {
cWM[c] = ureg_src_register(TGSI_FILE_CONSTANT, (224 + i * 4) * !key->vertexblend_indexed + c);
@@ -473,22 +478,27 @@ nine_ff_build_vs(struct NineDevice9 *device, struct vs_build_ctx *vs)
cWM[c] = ureg_src_indirect(cWM[c], ureg_scalar(ureg_src(AR), i));
}
/* multiply by WORLD(index) */
- ureg_MUL(ureg, r[0], _XXXX(vs->aVtx), cWM[0]);
- ureg_MAD(ureg, r[0], _YYYY(vs->aVtx), cWM[1], ureg_src(r[0]));
- ureg_MAD(ureg, r[0], _ZZZZ(vs->aVtx), cWM[2], ureg_src(r[0]));
- ureg_MAD(ureg, r[0], _WWWW(vs->aVtx), cWM[3], ureg_src(r[0]));
-
- /* accumulate weighted position value */
- if (i)
- ureg_MAD(ureg, r[2], ureg_src(r[0]), ureg_scalar(vs->aWgt, i), ureg_src(r[2]));
- else
- ureg_MUL(ureg, r[2], ureg_src(r[0]), ureg_scalar(vs->aWgt, 0));
+ ureg_MUL(ureg, tmp, _XXXX(vs->aVtx), cWM[0]);
+ ureg_MAD(ureg, tmp, _YYYY(vs->aVtx), cWM[1], ureg_src(tmp));
+ ureg_MAD(ureg, tmp, _ZZZZ(vs->aVtx), cWM[2], ureg_src(tmp));
+ ureg_MAD(ureg, tmp, _WWWW(vs->aVtx), cWM[3], ureg_src(tmp));
+
+ if (i < (key->vertexblend - 1)) {
+ /* accumulate weighted position value */
+ ureg_MAD(ureg, r[2], ureg_src(tmp), ureg_scalar(vs->aWgt, i), ureg_src(r[2]));
+ /* subtract weighted position value for last value */
+ ureg_SUB(ureg, r[3], ureg_src(r[3]), ureg_scalar(vs->aWgt, i));
+ }
}
+
+ /* the last weighted position is always 1 - sum_of_previous_weights */
+ ureg_MAD(ureg, r[2], ureg_src(tmp), ureg_scalar(ureg_src(r[3]), key->vertexblend - 1), ureg_src(r[2]));
+
/* multiply by VIEW_PROJ */
- ureg_MUL(ureg, r[0], _X(r[2]), _CONST(8));
- ureg_MAD(ureg, r[0], _Y(r[2]), _CONST(9), ureg_src(r[0]));
- ureg_MAD(ureg, r[0], _Z(r[2]), _CONST(10), ureg_src(r[0]));
- ureg_MAD(ureg, oPos, _W(r[2]), _CONST(11), ureg_src(r[0]));
+ ureg_MUL(ureg, tmp, _X(r[2]), _CONST(8));
+ ureg_MAD(ureg, tmp, _Y(r[2]), _CONST(9), ureg_src(tmp));
+ ureg_MAD(ureg, tmp, _Z(r[2]), _CONST(10), ureg_src(tmp));
+ ureg_MAD(ureg, oPos, _W(r[2]), _CONST(11), ureg_src(tmp));
if (need_rVtx)
vs->aVtx = ureg_src(r[2]);
@@ -515,10 +525,10 @@ nine_ff_build_vs(struct NineDevice9 *device, struct vs_build_ctx *vs)
ureg_MOV(ureg, oPos, ureg_src(tmp));
} else {
/* position = vertex * WORLD_VIEW_PROJ */
- ureg_MUL(ureg, r[0], _XXXX(vs->aVtx), _CONST(0));
- ureg_MAD(ureg, r[0], _YYYY(vs->aVtx), _CONST(1), ureg_src(r[0]));
- ureg_MAD(ureg, r[0], _ZZZZ(vs->aVtx), _CONST(2), ureg_src(r[0]));
- ureg_MAD(ureg, oPos, _WWWW(vs->aVtx), _CONST(3), ureg_src(r[0]));
+ ureg_MUL(ureg, tmp, _XXXX(vs->aVtx), _CONST(0));
+ ureg_MAD(ureg, tmp, _YYYY(vs->aVtx), _CONST(1), ureg_src(tmp));
+ ureg_MAD(ureg, tmp, _ZZZZ(vs->aVtx), _CONST(2), ureg_src(tmp));
+ ureg_MAD(ureg, oPos, _WWWW(vs->aVtx), _CONST(3), ureg_src(tmp));
}
if (need_rVtx) {
@@ -746,12 +756,10 @@ nine_ff_build_vs(struct NineDevice9 *device, struct vs_build_ctx *vs)
{
/* hitDir = light.position - eyeVtx
* d = length(hitDir)
- * hitDir /= d
*/
ureg_SUB(ureg, rHit, cLPos, ureg_src(rVtx));
ureg_DP3(ureg, tmp_x, ureg_src(rHit), ureg_src(rHit));
ureg_RSQ(ureg, tmp_y, _X(tmp));
- ureg_MUL(ureg, rHit, ureg_src(rHit), _Y(tmp)); /* normalize */
ureg_MUL(ureg, tmp_x, _X(tmp), _Y(tmp)); /* length */
/* att = 1.0 / (light.att0 + (light.att1 + light.att2 * d) * d) */
@@ -765,6 +773,9 @@ nine_ff_build_vs(struct NineDevice9 *device, struct vs_build_ctx *vs)
ureg_fixup_label(ureg, label[l-1], ureg_get_instruction_number(ureg));
ureg_ENDIF(ureg);
+ /* normalize hitDir */
+ ureg_normalize3(ureg, rHit, ureg_src(rHit), tmp);
+
/* if (SPOT light) */
ureg_SEQ(ureg, tmp_x, cLKind, ureg_imm1f(ureg, D3DLIGHT_SPOT));
ureg_IF(ureg, _X(tmp), &label[l++]);
@@ -799,9 +810,9 @@ nine_ff_build_vs(struct NineDevice9 *device, struct vs_build_ctx *vs)
/* midVec = normalize(hitDir + eyeDir) */
if (key->localviewer) {
ureg_normalize3(ureg, rMid, ureg_src(rVtx), tmp);
- ureg_ADD(ureg, rMid, ureg_src(rHit), ureg_negate(ureg_src(rMid)));
+ ureg_SUB(ureg, rMid, ureg_src(rHit), ureg_src(rMid));
} else {
- ureg_ADD(ureg, rMid, ureg_src(rHit), ureg_imm3f(ureg, 0.0f, 0.0f, 1.0f));
+ ureg_SUB(ureg, rMid, ureg_src(rHit), ureg_imm3f(ureg, 0.0f, 0.0f, 1.0f));
}
ureg_normalize3(ureg, rMid, ureg_src(rMid), tmp);
ureg_DP3(ureg, ureg_saturate(tmp_y), ureg_src(rNrm), ureg_src(rMid));
@@ -849,7 +860,14 @@ nine_ff_build_vs(struct NineDevice9 *device, struct vs_build_ctx *vs)
ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_XYZ), vs->mtlA, ureg_src(tmp), vs->mtlE);
ureg_ADD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_W ), vs->mtlA, vs->mtlE);
}
- ureg_MAD(ureg, oCol[0], ureg_src(rD), vs->mtlD, ureg_src(tmp));
+
+ if (key->specular_enable) {
+ /* add oCol[1] to oCol[0] */
+ ureg_MAD(ureg, tmp, ureg_src(rD), vs->mtlD, ureg_src(tmp));
+ ureg_MAD(ureg, oCol[0], ureg_src(rS), vs->mtlS, ureg_src(tmp));
+ } else {
+ ureg_MAD(ureg, oCol[0], ureg_src(rD), vs->mtlD, ureg_src(tmp));
+ }
ureg_MUL(ureg, oCol[1], ureg_src(rS), vs->mtlS);
} else
/* COLOR */
@@ -1012,10 +1030,10 @@ ps_get_ts_arg(struct ps_build_ctx *ps, unsigned ta)
reg = (ps->stage.index == ps->stage.index_pre_mod) ? ureg_src(ps->rMod) : ps->rCurSrc;
break;
case D3DTA_DIFFUSE:
- reg = ureg_DECL_fs_input(ps->ureg, TGSI_SEMANTIC_COLOR, 0, TGSI_INTERPOLATE_PERSPECTIVE);
+ reg = ureg_DECL_fs_input(ps->ureg, TGSI_SEMANTIC_COLOR, 0, TGSI_INTERPOLATE_COLOR);
break;
case D3DTA_SPECULAR:
- reg = ureg_DECL_fs_input(ps->ureg, TGSI_SEMANTIC_COLOR, 1, TGSI_INTERPOLATE_PERSPECTIVE);
+ reg = ureg_DECL_fs_input(ps->ureg, TGSI_SEMANTIC_COLOR, 1, TGSI_INTERPOLATE_COLOR);
break;
case D3DTA_TEMP:
reg = ps->rTmpSrc;
@@ -1222,7 +1240,7 @@ nine_ff_build_ps(struct NineDevice9 *device, struct nine_ff_ps_key *key)
ps.ureg = ureg;
ps.stage.index_pre_mod = -1;
- ps.vC[0] = ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_COLOR, 0, TGSI_INTERPOLATE_PERSPECTIVE);
+ ps.vC[0] = ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_COLOR, 0, TGSI_INTERPOLATE_COLOR);
/* Declare all TEMPs we might need, serious drivers have a register allocator. */
for (i = 0; i < Elements(ps.r); ++i)
@@ -1241,7 +1259,7 @@ nine_ff_build_ps(struct NineDevice9 *device, struct nine_ff_ps_key *key)
if (key->ts[s].colorarg0 == D3DTA_SPECULAR ||
key->ts[s].colorarg1 == D3DTA_SPECULAR ||
key->ts[s].colorarg2 == D3DTA_SPECULAR)
- ps.vC[1] = ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_COLOR, 1, TGSI_INTERPOLATE_PERSPECTIVE);
+ ps.vC[1] = ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_COLOR, 1, TGSI_INTERPOLATE_COLOR);
if (key->ts[s].colorarg0 == D3DTA_TEXTURE ||
key->ts[s].colorarg1 == D3DTA_TEXTURE ||
@@ -1258,7 +1276,7 @@ nine_ff_build_ps(struct NineDevice9 *device, struct nine_ff_ps_key *key)
if (key->ts[s].alphaarg0 == D3DTA_SPECULAR ||
key->ts[s].alphaarg1 == D3DTA_SPECULAR ||
key->ts[s].alphaarg2 == D3DTA_SPECULAR)
- ps.vC[1] = ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_COLOR, 1, TGSI_INTERPOLATE_PERSPECTIVE);
+ ps.vC[1] = ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_COLOR, 1, TGSI_INTERPOLATE_COLOR);
if (key->ts[s].alphaarg0 == D3DTA_TEXTURE ||
key->ts[s].alphaarg1 == D3DTA_TEXTURE ||
@@ -1269,7 +1287,7 @@ nine_ff_build_ps(struct NineDevice9 *device, struct nine_ff_ps_key *key)
}
}
if (key->specular)
- ps.vC[1] = ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_COLOR, 1, TGSI_INTERPOLATE_PERSPECTIVE);
+ ps.vC[1] = ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_COLOR, 1, TGSI_INTERPOLATE_COLOR);
oCol = ureg_DECL_output(ureg, TGSI_SEMANTIC_COLOR, 0);
@@ -1500,6 +1518,9 @@ nine_ff_get_vs(struct NineDevice9 *device)
if (key.fog_mode)
key.fog_range = !key.position_t && state->rs[D3DRS_RANGEFOGENABLE];
+ key.localviewer = !!state->rs[D3DRS_LOCALVIEWER];
+ key.specular_enable = !!state->rs[D3DRS_SPECULARENABLE];
+
if (state->rs[D3DRS_VERTEXBLEND] != D3DVBF_DISABLE) {
key.vertexblend_indexed = !!state->rs[D3DRS_INDEXEDVERTEXBLENDENABLE];
@@ -1847,7 +1868,7 @@ nine_ff_update(struct NineDevice9 *device)
DBG("vs=%p ps=%p\n", device->state.vs, device->state.ps);
/* NOTE: the only reference belongs to the hash table */
- if (!device->state.vs) {
+ if (!state->programmable_vs) {
device->ff.vs = nine_ff_get_vs(device);
device->state.changed.group |= NINE_STATE_VS;
}
@@ -1856,7 +1877,7 @@ nine_ff_update(struct NineDevice9 *device)
device->state.changed.group |= NINE_STATE_PS;
}
- if (!device->state.vs) {
+ if (!state->programmable_vs) {
nine_ff_load_vs_transforms(device);
nine_ff_load_tex_matrices(device);
nine_ff_load_lights(device);
diff --git a/src/gallium/state_trackers/nine/nine_limits.h b/src/gallium/state_trackers/nine/nine_limits.h
new file mode 100644
index 00000000000..ef1ed2566ba
--- /dev/null
+++ b/src/gallium/state_trackers/nine/nine_limits.h
@@ -0,0 +1,211 @@
+/*
+ * Copyright 2015 Axel Davy <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE. */
+
+#ifndef _NINE_LIMITS_H_
+#define _NINE_LIMITS_H_
+
+#include "assert.h"
+#include "d3d9types.h"
+
+// state can be any value
+#define NINE_STATE_NO_LIMIT 0
+// value is clamped if below min or max
+#define NINE_STATE_CLAMP 1
+// boolean: 0 -> false; any other value -> true
+#define NINE_STATE_BOOL 2
+// a mask is applied on the value
+#define NINE_STATE_MASK 3
+// if outside a range, state value is changed to a default value
+#define NINE_STATE_RANGE_DEF_VAL 4
+
+struct nine_state_behaviour {
+ unsigned state_value_behaviour;
+ union {
+ struct {
+ unsigned min;
+ unsigned max;
+ } clamp;
+ unsigned mask;
+ struct {
+ unsigned min;
+ unsigned max;
+ unsigned default_val;
+ } range_def_val;
+ } u;
+};
+
+#define __NO_LIMIT_RS(o) \
+ [D3DRS_##o] = {NINE_STATE_NO_LIMIT}
+
+#define __CLAMP_RS(o, m, M) \
+ [D3DRS_##o] = {NINE_STATE_CLAMP, {.clamp = {m, M}}}
+
+#define __BOOLEAN_RS(o) \
+ [D3DRS_##o] = {NINE_STATE_BOOL}
+
+#define __MASK_RS(o, m) \
+ [D3DRS_##o] = {NINE_STATE_MASK, {.mask = m}}
+
+#define __RANGE_DEF_VAL_RS(o, m, M, d) \
+ [D3DRS_##o] = {NINE_STATE_RANGE_DEF_VAL, {.range_def_val = {m, M, d}}}
+
+#define __TO_DETERMINE_RS(o, m, M) \
+ [D3DRS_##o] = {NINE_STATE_NO_LIMIT}
+
+static const struct nine_state_behaviour
+render_state_limits_table[D3DRS_BLENDOPALPHA + 1] = {
+ __TO_DETERMINE_RS(ZENABLE, 0, 3),
+ __TO_DETERMINE_RS(FILLMODE, 1, 3),
+ __CLAMP_RS(SHADEMODE, 1, 3),
+ __BOOLEAN_RS(ZWRITEENABLE),
+ __BOOLEAN_RS(ALPHATESTENABLE),
+ __BOOLEAN_RS(LASTPIXEL),
+ __RANGE_DEF_VAL_RS(SRCBLEND, 1, 17, D3DBLEND_ZERO),
+ __RANGE_DEF_VAL_RS(DESTBLEND, 1, 17, D3DBLEND_ZERO),
+ __CLAMP_RS(CULLMODE, 1, 3),
+ __CLAMP_RS(ZFUNC, 1, 8),
+ __MASK_RS(ALPHAREF, 0x000000FF),
+ __CLAMP_RS(ALPHAFUNC, 1, 8),
+ __BOOLEAN_RS(DITHERENABLE),
+ __BOOLEAN_RS(ALPHABLENDENABLE),
+ __BOOLEAN_RS(FOGENABLE),
+ __BOOLEAN_RS(SPECULARENABLE),
+ __NO_LIMIT_RS(FOGCOLOR),
+ __MASK_RS(FOGTABLEMODE, 0x00000007),
+ __NO_LIMIT_RS(FOGSTART), /* a bit more complex than that, lets ignore */
+ __NO_LIMIT_RS(FOGEND),
+ __NO_LIMIT_RS(FOGDENSITY), /* actually should be between 0.0 and 1.0 */
+ __BOOLEAN_RS(RANGEFOGENABLE),
+ __BOOLEAN_RS(STENCILENABLE),
+ __CLAMP_RS(STENCILFAIL, 1, 8),
+ __CLAMP_RS(STENCILZFAIL, 1, 8),
+ __CLAMP_RS(STENCILPASS, 1, 8),
+ __CLAMP_RS(STENCILFUNC, 1, 8),
+ __NO_LIMIT_RS(STENCILREF),
+ __NO_LIMIT_RS(STENCILMASK),
+ __NO_LIMIT_RS(STENCILWRITEMASK),
+ __NO_LIMIT_RS(TEXTUREFACTOR),
+ __TO_DETERMINE_RS(WRAP0, 0, 15),
+ __TO_DETERMINE_RS(WRAP1, 0, 15),
+ __TO_DETERMINE_RS(WRAP2, 0, 15),
+ __TO_DETERMINE_RS(WRAP3, 0, 15),
+ __TO_DETERMINE_RS(WRAP4, 0, 15),
+ __TO_DETERMINE_RS(WRAP5, 0, 15),
+ __TO_DETERMINE_RS(WRAP6, 0, 15),
+ __TO_DETERMINE_RS(WRAP7, 0, 15),
+ __BOOLEAN_RS(CLIPPING),
+ __BOOLEAN_RS(LIGHTING),
+ __NO_LIMIT_RS(AMBIENT),
+ __MASK_RS(FOGVERTEXMODE, 0x00000007),
+ __BOOLEAN_RS(COLORVERTEX),
+ __BOOLEAN_RS(LOCALVIEWER),
+ __BOOLEAN_RS(NORMALIZENORMALS),
+ __TO_DETERMINE_RS(DIFFUSEMATERIALSOURCE, 0, 2),
+ __TO_DETERMINE_RS(SPECULARMATERIALSOURCE, 0, 2),
+ __TO_DETERMINE_RS(AMBIENTMATERIALSOURCE, 0, 2),
+ __TO_DETERMINE_RS(EMISSIVEMATERIALSOURCE, 0, 2),
+ __TO_DETERMINE_RS(VERTEXBLEND, 0, 256), /* values between 4 and 254 -both included- are forbidden too */
+ __NO_LIMIT_RS(CLIPPLANEENABLE), /* expected check seems complex */
+ __TO_DETERMINE_RS(POINTSIZE, 0, 0xFFFFFFFF),
+ __TO_DETERMINE_RS(POINTSIZE_MIN, 0, 0x7FFFFFFF), /* float >= 0.0 */
+ __BOOLEAN_RS(POINTSPRITEENABLE),
+ __BOOLEAN_RS(POINTSCALEENABLE),
+ __TO_DETERMINE_RS(POINTSCALE_A, 0, 0x7FFFFFFF), /* float >= 0.0 */
+ __TO_DETERMINE_RS(POINTSCALE_B, 0, 0x7FFFFFFF), /* float >= 0.0 */
+ __TO_DETERMINE_RS(POINTSCALE_C, 0, 0x7FFFFFFF), /* float >= 0.0 */
+ __BOOLEAN_RS(MULTISAMPLEANTIALIAS),
+ __NO_LIMIT_RS(MULTISAMPLEMASK),
+ __TO_DETERMINE_RS(PATCHEDGESTYLE, 0, 1),
+ __TO_DETERMINE_RS(DEBUGMONITORTOKEN, 0, 1),
+ __TO_DETERMINE_RS(POINTSIZE_MAX, 0, 0x7FFFFFFF), /* check more complex than that */
+ __BOOLEAN_RS(INDEXEDVERTEXBLENDENABLE),
+ __TO_DETERMINE_RS(COLORWRITEENABLE, 0, 15),
+ __NO_LIMIT_RS(TWEENFACTOR),
+ __CLAMP_RS(BLENDOP, 1, 5),
+ __TO_DETERMINE_RS(POSITIONDEGREE, 1, 5), /* can actually be only 1 or 5 */
+ __TO_DETERMINE_RS(NORMALDEGREE, 1, 2),
+ __BOOLEAN_RS(SCISSORTESTENABLE),
+ __NO_LIMIT_RS(SLOPESCALEDEPTHBIAS),
+ __BOOLEAN_RS(ANTIALIASEDLINEENABLE),
+ __NO_LIMIT_RS(MINTESSELLATIONLEVEL),
+ __NO_LIMIT_RS(MAXTESSELLATIONLEVEL),
+ __NO_LIMIT_RS(ADAPTIVETESS_X),
+ __NO_LIMIT_RS(ADAPTIVETESS_Y),
+ __NO_LIMIT_RS(ADAPTIVETESS_Z),
+ __NO_LIMIT_RS(ADAPTIVETESS_W),
+ __BOOLEAN_RS(ENABLEADAPTIVETESSELLATION),
+ __BOOLEAN_RS(TWOSIDEDSTENCILMODE),
+ __CLAMP_RS(CCW_STENCILFAIL, 1, 8),
+ __CLAMP_RS(CCW_STENCILZFAIL, 1, 8),
+ __CLAMP_RS(CCW_STENCILPASS, 1, 8),
+ __CLAMP_RS(CCW_STENCILFUNC, 1, 8),
+ __TO_DETERMINE_RS(COLORWRITEENABLE1, 0, 15),
+ __TO_DETERMINE_RS(COLORWRITEENABLE2, 0, 15),
+ __TO_DETERMINE_RS(COLORWRITEENABLE3, 0, 15),
+ __NO_LIMIT_RS(BLENDFACTOR),
+ __BOOLEAN_RS(SRGBWRITEENABLE),
+ __NO_LIMIT_RS(DEPTHBIAS),
+ __TO_DETERMINE_RS(WRAP8, 0, 15),
+ __TO_DETERMINE_RS(WRAP9, 0, 15),
+ __TO_DETERMINE_RS(WRAP10, 0, 15),
+ __TO_DETERMINE_RS(WRAP11, 0, 15),
+ __TO_DETERMINE_RS(WRAP12, 0, 15),
+ __TO_DETERMINE_RS(WRAP13, 0, 15),
+ __TO_DETERMINE_RS(WRAP14, 0, 15),
+ __TO_DETERMINE_RS(WRAP15, 0, 15),
+ __BOOLEAN_RS(SEPARATEALPHABLENDENABLE),
+ __RANGE_DEF_VAL_RS(SRCBLENDALPHA, 1, 17, D3DBLEND_ZERO),
+ __RANGE_DEF_VAL_RS(DESTBLENDALPHA, 1, 17, D3DBLEND_ZERO),
+ __CLAMP_RS(BLENDOPALPHA, 1, 5)
+};
+
+static DWORD inline
+nine_fix_render_state_value(D3DRENDERSTATETYPE State,
+ DWORD Value)
+{
+ struct nine_state_behaviour behaviour = render_state_limits_table[State];
+
+ switch (behaviour.state_value_behaviour) {
+ case NINE_STATE_NO_LIMIT:
+ break;
+ case NINE_STATE_CLAMP:
+ if (Value < behaviour.u.clamp.min)
+ Value = behaviour.u.clamp.min;
+ else if (Value > behaviour.u.clamp.max)
+ Value = behaviour.u.clamp.max;
+ break;
+ case NINE_STATE_BOOL:
+ Value = Value ? 1 : 0;
+ break;
+ case NINE_STATE_MASK:
+ Value = Value & behaviour.u.mask;
+ break;
+ case NINE_STATE_RANGE_DEF_VAL:
+ if (Value < behaviour.u.range_def_val.min || Value > behaviour.u.range_def_val.max)
+ Value = behaviour.u.range_def_val.default_val;
+ break;
+ }
+
+ return Value;
+}
+
+#endif /* _NINE_HELPERS_H_ */
diff --git a/src/gallium/state_trackers/nine/nine_pdata.h b/src/gallium/state_trackers/nine/nine_pdata.h
index 7bdd702cfbb..0e9a2aa7160 100644
--- a/src/gallium/state_trackers/nine/nine_pdata.h
+++ b/src/gallium/state_trackers/nine/nine_pdata.h
@@ -5,6 +5,7 @@
struct pheader
{
boolean unknown;
+ GUID guid;
DWORD size;
char data[1];
};
diff --git a/src/gallium/state_trackers/nine/nine_pipe.c b/src/gallium/state_trackers/nine/nine_pipe.c
index 2be30f7e097..27a10d64473 100644
--- a/src/gallium/state_trackers/nine/nine_pipe.c
+++ b/src/gallium/state_trackers/nine/nine_pipe.c
@@ -181,6 +181,7 @@ nine_convert_blend_state(struct pipe_blend_state *blend_state, const DWORD *rs)
}
nine_convert_blend_state_fixup(&blend, rs); /* for BOTH[INV]SRCALPHA */
}
+
blend.rt[0].colormask = rs[D3DRS_COLORWRITEENABLE];
if (rs[D3DRS_COLORWRITEENABLE1] != rs[D3DRS_COLORWRITEENABLE] ||
@@ -222,8 +223,8 @@ nine_convert_sampler_state(struct cso_context *ctx, int idx, const DWORD *ss)
samp.wrap_s = d3dtextureaddress_to_pipe_tex_wrap(ss[D3DSAMP_ADDRESSU]);
samp.wrap_t = d3dtextureaddress_to_pipe_tex_wrap(ss[D3DSAMP_ADDRESSV]);
samp.wrap_r = d3dtextureaddress_to_pipe_tex_wrap(ss[D3DSAMP_ADDRESSW]);
- samp.min_img_filter = ss[D3DSAMP_MINFILTER] == D3DTEXF_POINT ? PIPE_TEX_FILTER_NEAREST : PIPE_TEX_FILTER_LINEAR;
- samp.mag_img_filter = ss[D3DSAMP_MAGFILTER] == D3DTEXF_POINT ? PIPE_TEX_FILTER_NEAREST : PIPE_TEX_FILTER_LINEAR;
+ samp.min_img_filter = (ss[D3DSAMP_MINFILTER] == D3DTEXF_POINT && !ss[NINED3DSAMP_SHADOW]) ? PIPE_TEX_FILTER_NEAREST : PIPE_TEX_FILTER_LINEAR;
+ samp.mag_img_filter = (ss[D3DSAMP_MAGFILTER] == D3DTEXF_POINT && !ss[NINED3DSAMP_SHADOW]) ? PIPE_TEX_FILTER_NEAREST : PIPE_TEX_FILTER_LINEAR;
if (ss[D3DSAMP_MINFILTER] == D3DTEXF_ANISOTROPIC ||
ss[D3DSAMP_MAGFILTER] == D3DTEXF_ANISOTROPIC)
samp.max_anisotropy = ss[D3DSAMP_MAXANISOTROPY];
@@ -265,7 +266,7 @@ nine_pipe_context_clear(struct NineDevice9 *This)
const enum pipe_format nine_d3d9_to_pipe_format_map[120] =
{
[D3DFMT_UNKNOWN] = PIPE_FORMAT_NONE,
- [D3DFMT_R8G8B8] = PIPE_FORMAT_NONE,
+ [D3DFMT_R8G8B8] = PIPE_FORMAT_R8G8B8_UNORM,
[D3DFMT_A8R8G8B8] = PIPE_FORMAT_B8G8R8A8_UNORM,
[D3DFMT_X8R8G8B8] = PIPE_FORMAT_B8G8R8X8_UNORM,
[D3DFMT_R5G6B5] = PIPE_FORMAT_B5G6R5_UNORM,
@@ -323,8 +324,8 @@ const enum pipe_format nine_d3d9_to_pipe_format_map[120] =
const D3DFORMAT nine_pipe_to_d3d9_format_map[PIPE_FORMAT_COUNT] =
{
[PIPE_FORMAT_NONE] = D3DFMT_UNKNOWN,
-
-/* [PIPE_FORMAT_B8G8R8_UNORM] = D3DFMT_R8G8B8, */
+ /* TODO: rename PIPE_FORMAT_R8G8B8_UNORM to PIPE_FORMAT_B8G8R8_UNORM */
+ [PIPE_FORMAT_R8G8B8_UNORM] = D3DFMT_R8G8B8,
[PIPE_FORMAT_B8G8R8A8_UNORM] = D3DFMT_A8R8G8B8,
[PIPE_FORMAT_B8G8R8X8_UNORM] = D3DFMT_X8R8G8B8,
[PIPE_FORMAT_B5G6R5_UNORM] = D3DFMT_R5G6B5,
diff --git a/src/gallium/state_trackers/nine/nine_shader.c b/src/gallium/state_trackers/nine/nine_shader.c
index ed431738abc..a7a7da27903 100644
--- a/src/gallium/state_trackers/nine/nine_shader.c
+++ b/src/gallium/state_trackers/nine/nine_shader.c
@@ -852,7 +852,12 @@ tx_src_param(struct shader_translator *tx, const struct sm1_src_param *param)
/* the address register (vs only) must be
* assigned before use */
assert(!ureg_dst_is_undef(tx->regs.a0));
- ureg_ARR(ureg, tx->regs.address, ureg_src(tx->regs.a0));
+ /* Round to lowest for vs1.1 (contrary to the doc), else
+ * round to nearest */
+ if (tx->version.major < 2 && tx->version.minor < 2)
+ ureg_ARL(ureg, tx->regs.address, ureg_src(tx->regs.a0));
+ else
+ ureg_ARR(ureg, tx->regs.address, ureg_src(tx->regs.a0));
src = ureg_src(tx->regs.address);
} else {
if (tx->version.major < 2 && tx->version.minor < 4) {
@@ -870,9 +875,12 @@ tx_src_param(struct shader_translator *tx, const struct sm1_src_param *param)
} else {
if (tx->version.major < 3) {
assert(!param->rel);
- src = ureg_DECL_fs_input(tx->ureg, TGSI_SEMANTIC_COLOR,
- param->idx,
- TGSI_INTERPOLATE_PERSPECTIVE);
+ src = ureg_DECL_fs_input_cyl_centroid(
+ ureg, TGSI_SEMANTIC_COLOR, param->idx,
+ TGSI_INTERPOLATE_COLOR, 0,
+ tx->info->force_color_in_centroid ?
+ TGSI_INTERPOLATE_LOC_CENTROID : 0,
+ 0, 1);
} else {
assert(!param->rel); /* TODO */
assert(param->idx < Elements(tx->regs.v));
@@ -1163,12 +1171,9 @@ _tx_dst_param(struct shader_translator *tx, const struct sm1_dst_param *param)
assert(!param->rel);
tx->info->rt_mask |= 1 << param->idx;
if (ureg_dst_is_undef(tx->regs.oCol[param->idx])) {
- /* ps < 3: oCol[0] will have fog blending afterward
- * vs < 3: oD1.w (D3DPMISCCAPS_FOGANDSPECULARALPHA) set to 0 even if set */
+ /* ps < 3: oCol[0] will have fog blending afterward */
if (!IS_VS && tx->version.major < 3 && param->idx == 0) {
tx->regs.oCol[0] = ureg_DECL_temporary(tx->ureg);
- } else if (IS_VS && tx->version.major < 3 && param->idx == 1) {
- tx->regs.oCol[1] = ureg_DECL_temporary(tx->ureg);
} else {
tx->regs.oCol[param->idx] =
ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_COLOR, param->idx);
@@ -1543,25 +1548,6 @@ DECL_SPECIAL(CALLNZ)
return D3D_OK;
}
-DECL_SPECIAL(MOV_vs1x)
-{
- if (tx->insn.dst[0].file == D3DSPR_ADDR) {
- /* Implementation note: We don't write directly
- * to the addr register, but to an intermediate
- * float register.
- * Contrary to the doc, when writing to ADDR here,
- * the rounding is not to nearest, but to lowest
- * (wine test).
- * Since we use ARR next, substract 0.5. */
- ureg_SUB(tx->ureg,
- tx_dst_param(tx, &tx->insn.dst[0]),
- tx_src_param(tx, &tx->insn.src[0]),
- ureg_imm1f(tx->ureg, 0.5f));
- return D3D_OK;
- }
- return NineTranslateInstruction_Generic(tx);
-}
-
DECL_SPECIAL(LOOP)
{
struct ureg_program *ureg = tx->ureg;
@@ -1978,6 +1964,7 @@ nine_tgsi_to_interp_mode(struct tgsi_declaration_semantic *sem)
return TGSI_INTERPOLATE_LINEAR;
case TGSI_SEMANTIC_BCOLOR:
case TGSI_SEMANTIC_COLOR:
+ return TGSI_INTERPOLATE_COLOR;
case TGSI_SEMANTIC_FOG:
case TGSI_SEMANTIC_GENERIC:
case TGSI_SEMANTIC_TEXCOORD:
@@ -2058,13 +2045,17 @@ DECL_SPECIAL(DCL)
}
} else {
if (is_input && tx->version.major >= 3) {
+ unsigned interp_location = 0;
/* SM3 only, SM2 input semantic determined by file */
assert(sem.reg.idx < Elements(tx->regs.v));
+ if (sem.reg.mod & NINED3DSPDM_CENTROID ||
+ (tgsi.Name == TGSI_SEMANTIC_COLOR && tx->info->force_color_in_centroid))
+ interp_location = TGSI_INTERPOLATE_LOC_CENTROID;
tx->regs.v[sem.reg.idx] = ureg_DECL_fs_input_cyl_centroid(
ureg, tgsi.Name, tgsi.Index,
nine_tgsi_to_interp_mode(&tgsi),
0, /* cylwrap */
- sem.reg.mod & NINED3DSPDM_CENTROID, 0, 1);
+ interp_location, 0, 1);
} else
if (!is_input && 0) { /* declare in COLOROUT/DEPTHOUT case */
/* FragColor or FragDepth */
@@ -2736,8 +2727,7 @@ DECL_SPECIAL(COMMENT)
struct sm1_op_info inst_table[] =
{
_OPI(NOP, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 0, 0, NULL), /* 0 */
- _OPI(MOV, MOV, V(0,0), V(1,1), V(0,0), V(0,0), 1, 1, SPECIAL(MOV_vs1x)),
- _OPI(MOV, MOV, V(2,0), V(3,0), V(0,0), V(3,0), 1, 1, NULL),
+ _OPI(MOV, MOV, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, NULL),
_OPI(ADD, ADD, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 2 */
_OPI(SUB, SUB, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 3 */
_OPI(MAD, MAD, V(0,0), V(3,0), V(0,0), V(3,0), 1, 3, NULL), /* 4 */
@@ -3426,13 +3416,6 @@ nine_translate_shader(struct NineDevice9 *device, struct nine_shader_info *info)
ureg_MOV(tx->ureg, ureg_writemask(tx->regs.oFog, TGSI_WRITEMASK_X), ureg_imm1f(tx->ureg, 0.0f));
}
- /* vs < 3: oD1.w (D3DPMISCCAPS_FOGANDSPECULARALPHA) set to 0 even if set */
- if (IS_VS && tx->version.major < 3 && !ureg_dst_is_undef(tx->regs.oCol[1])) {
- struct ureg_dst dst = ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_COLOR, 1);
- ureg_MOV(tx->ureg, ureg_writemask(dst, TGSI_WRITEMASK_XYZ), ureg_src(tx->regs.oCol[1]));
- ureg_MOV(tx->ureg, ureg_writemask(dst, TGSI_WRITEMASK_W), ureg_imm1f(tx->ureg, 0.0f));
- }
-
if (info->position_t)
ureg_property(tx->ureg, TGSI_PROPERTY_VS_WINDOW_SPACE_POSITION, TRUE);
diff --git a/src/gallium/state_trackers/nine/nine_shader.h b/src/gallium/state_trackers/nine/nine_shader.h
index 41577ac572b..1fe0c4bd182 100644
--- a/src/gallium/state_trackers/nine/nine_shader.h
+++ b/src/gallium/state_trackers/nine/nine_shader.h
@@ -61,6 +61,7 @@ struct nine_shader_info
uint8_t fog_enable;
uint8_t fog_mode;
+ uint8_t force_color_in_centroid;
uint16_t projected; /* ps 1.1 to 1.3 */
unsigned const_i_base; /* in vec4 (16 byte) units */
diff --git a/src/gallium/state_trackers/nine/nine_state.c b/src/gallium/state_trackers/nine/nine_state.c
index aee31622088..6f94e378984 100644
--- a/src/gallium/state_trackers/nine/nine_state.c
+++ b/src/gallium/state_trackers/nine/nine_state.c
@@ -367,14 +367,14 @@ prepare_vs(struct NineDevice9 *device, uint8_t shader_changed)
uint32_t changed_group = 0;
int has_key_changed = 0;
- if (likely(vs))
+ if (likely(state->programmable_vs))
has_key_changed = NineVertexShader9_UpdateKey(vs, state);
if (!shader_changed && !has_key_changed)
return 0;
/* likely because we dislike FF */
- if (likely(vs)) {
+ if (likely(state->programmable_vs)) {
state->cso.vs = NineVertexShader9_GetVariant(vs);
} else {
vs = device->ff.vs;
@@ -427,8 +427,8 @@ prepare_ps(struct NineDevice9 *device, uint8_t shader_changed)
/* State preparation + State commit */
-static uint32_t
-update_framebuffer(struct NineDevice9 *device)
+static void
+update_framebuffer(struct NineDevice9 *device, bool is_clear)
{
struct pipe_context *pipe = device->pipe;
struct nine_state *state = &device->state;
@@ -438,7 +438,8 @@ update_framebuffer(struct NineDevice9 *device)
unsigned w = rt0->desc.Width;
unsigned h = rt0->desc.Height;
D3DMULTISAMPLE_TYPE nr_samples = rt0->desc.MultiSampleType;
- unsigned mask = state->ps ? state->ps->rt_mask : 1;
+ unsigned ps_mask = state->ps ? state->ps->rt_mask : 1;
+ unsigned mask = is_clear ? 0xf : ps_mask;
const int sRGB = state->rs[D3DRS_SRGBWRITEENABLE] ? 1 : 0;
DBG("\n");
@@ -498,13 +499,13 @@ update_framebuffer(struct NineDevice9 *device)
pipe->set_framebuffer_state(pipe, fb); /* XXX: cso ? */
- return state->changed.group;
+ if (is_clear && state->rt_mask == ps_mask)
+ state->changed.group &= ~NINE_STATE_FB;
}
static void
update_viewport(struct NineDevice9 *device)
{
- struct pipe_context *pipe = device->pipe;
const D3DVIEWPORT9 *vport = &device->state.viewport;
struct pipe_viewport_state pvport;
@@ -543,7 +544,7 @@ update_viewport(struct NineDevice9 *device)
pvport.translate[1] -= 1.0f / 128.0f;
}
- pipe->set_viewport_states(pipe, 0, 1, &pvport);
+ cso_set_viewport(device->cso, &pvport);
}
/* Loop through VS inputs and pick the vertex elements with the declared
@@ -567,7 +568,7 @@ update_vertex_elements(struct NineDevice9 *device)
state->stream_usage_mask = 0;
memset(vdecl_index_map, -1, 16);
memset(used_streams, 0, device->caps.MaxStreams);
- vs = device->state.vs ? device->state.vs : device->ff.vs;
+ vs = state->programmable_vs ? device->state.vs : device->ff.vs;
if (vdecl) {
for (n = 0; n < vs->num_inputs; ++n) {
@@ -761,7 +762,7 @@ update_textures_and_samplers(struct NineDevice9 *device)
cso_single_sampler_done(device->cso, PIPE_SHADER_FRAGMENT);
commit_samplers = FALSE;
- sampler_mask = state->vs ? state->vs->sampler_mask : 0;
+ sampler_mask = state->programmable_vs ? state->vs->sampler_mask : 0;
state->bound_samplers_mask_vs = 0;
for (num_textures = 0, i = 0; i < NINE_MAX_SAMPLERS_VS; ++i) {
const unsigned s = NINE_SAMPLER_VS(i);
@@ -854,7 +855,7 @@ commit_vs_constants(struct NineDevice9 *device)
{
struct pipe_context *pipe = device->pipe;
- if (unlikely(!device->state.vs))
+ if (unlikely(!device->state.programmable_vs))
pipe->set_constant_buffer(pipe, PIPE_SHADER_VERTEX, 0, &device->state.pipe.cb_vs_ff);
else
pipe->set_constant_buffer(pipe, PIPE_SHADER_VERTEX, 0, &device->state.pipe.cb_vs);
@@ -913,7 +914,8 @@ commit_ps(struct NineDevice9 *device)
NINE_STATE_DSA | \
NINE_STATE_VIEWPORT | \
NINE_STATE_VDECL | \
- NINE_STATE_IDXBUF)
+ NINE_STATE_IDXBUF | \
+ NINE_STATE_STREAMFREQ)
#define NINE_STATE_RARE \
(NINE_STATE_SCISSOR | \
@@ -934,16 +936,14 @@ validate_textures(struct NineDevice9 *device)
}
void
-nine_update_state_framebuffer(struct NineDevice9 *device)
+nine_update_state_framebuffer_clear(struct NineDevice9 *device)
{
struct nine_state *state = &device->state;
validate_textures(device);
if (state->changed.group & NINE_STATE_FB)
- update_framebuffer(device);
-
- state->changed.group &= ~NINE_STATE_FB;
+ update_framebuffer(device, TRUE);
}
boolean
@@ -964,7 +964,7 @@ nine_update_state(struct NineDevice9 *device)
validate_textures(device); /* may clobber state */
/* ff_update may change VS/PS dirty bits */
- if (unlikely(!state->vs || !state->ps))
+ if (unlikely(!state->programmable_vs || !state->ps))
nine_ff_update(device);
group = state->changed.group;
@@ -977,15 +977,14 @@ nine_update_state(struct NineDevice9 *device)
if (group & (NINE_STATE_COMMON | NINE_STATE_VS)) {
if (group & NINE_STATE_FB)
- group |= update_framebuffer(device); /* may set NINE_STATE_RASTERIZER */
+ update_framebuffer(device, FALSE);
if (group & NINE_STATE_BLEND)
prepare_blend(device);
if (group & NINE_STATE_DSA)
prepare_dsa(device);
if (group & NINE_STATE_VIEWPORT)
update_viewport(device);
- if ((group & (NINE_STATE_VDECL | NINE_STATE_VS)) ||
- state->changed.stream_freq & ~1)
+ if (group & (NINE_STATE_VDECL | NINE_STATE_VS | NINE_STATE_STREAMFREQ))
update_vertex_elements(device);
if (group & NINE_STATE_IDXBUF)
commit_index_buffer(device);
@@ -997,12 +996,12 @@ nine_update_state(struct NineDevice9 *device)
if (group & (NINE_STATE_TEXTURE | NINE_STATE_SAMPLER))
update_textures_and_samplers(device);
if (device->prefer_user_constbuf) {
- if ((group & (NINE_STATE_VS_CONST | NINE_STATE_VS)) && state->vs)
+ if ((group & (NINE_STATE_VS_CONST | NINE_STATE_VS)) && state->programmable_vs)
prepare_vs_constants_userbuf(device);
if ((group & (NINE_STATE_PS_CONST | NINE_STATE_PS)) && state->ps)
prepare_ps_constants_userbuf(device);
} else {
- if ((group & NINE_STATE_VS_CONST) && state->vs)
+ if ((group & NINE_STATE_VS_CONST) && state->programmable_vs)
upload_constants(device, PIPE_SHADER_VERTEX);
if ((group & NINE_STATE_PS_CONST) && state->ps)
upload_constants(device, PIPE_SHADER_FRAGMENT);
@@ -1262,6 +1261,8 @@ nine_state_set_defaults(struct NineDevice9 *device, const D3DCAPS9 *caps,
*/
state->rs[D3DRS_POINTSIZE_MAX] = fui(caps->MaxPointSize);
+ memcpy(state->rs_advertised, state->rs, sizeof(state->rs));
+
/* Set changed flags to initialize driver.
*/
state->changed.group = NINE_STATE_ALL;
@@ -1314,8 +1315,10 @@ nine_state_clear(struct nine_state *state, const boolean device)
nine_bind(&state->vs, NULL);
nine_bind(&state->ps, NULL);
nine_bind(&state->vdecl, NULL);
- for (i = 0; i < PIPE_MAX_ATTRIBS; ++i)
+ for (i = 0; i < PIPE_MAX_ATTRIBS; ++i) {
nine_bind(&state->stream[i], NULL);
+ pipe_resource_reference(&state->vtxbuf[i].buffer, NULL);
+ }
nine_bind(&state->idxbuf, NULL);
for (i = 0; i < NINE_MAX_SAMPLERS; ++i) {
if (device &&
diff --git a/src/gallium/state_trackers/nine/nine_state.h b/src/gallium/state_trackers/nine/nine_state.h
index b34da70ef48..a4ec4e3b63a 100644
--- a/src/gallium/state_trackers/nine/nine_state.h
+++ b/src/gallium/state_trackers/nine/nine_state.h
@@ -61,23 +61,24 @@
#define NINE_STATE_SAMPLER (1 << 11)
#define NINE_STATE_VDECL (1 << 12)
#define NINE_STATE_IDXBUF (1 << 13)
-#define NINE_STATE_PRIM (1 << 14)
-#define NINE_STATE_MATERIAL (1 << 15)
-#define NINE_STATE_BLEND_COLOR (1 << 16)
-#define NINE_STATE_STENCIL_REF (1 << 17)
-#define NINE_STATE_SAMPLE_MASK (1 << 18)
-#define NINE_STATE_FF (0x1f << 19)
-#define NINE_STATE_FF_VS (0x17 << 19)
-#define NINE_STATE_FF_PS (0x18 << 19)
-#define NINE_STATE_FF_LIGHTING (1 << 19)
-#define NINE_STATE_FF_MATERIAL (1 << 20)
-#define NINE_STATE_FF_VSTRANSF (1 << 21)
-#define NINE_STATE_FF_PSSTAGES (1 << 22)
-#define NINE_STATE_FF_OTHER (1 << 23)
-#define NINE_STATE_FOG_SHADER (1 << 24)
-#define NINE_STATE_PS1X_SHADER (1 << 25)
-#define NINE_STATE_ALL 0x3ffffff
-#define NINE_STATE_UNHANDLED (1 << 26)
+#define NINE_STATE_STREAMFREQ (1 << 14)
+#define NINE_STATE_PRIM (1 << 15)
+#define NINE_STATE_MATERIAL (1 << 16)
+#define NINE_STATE_BLEND_COLOR (1 << 17)
+#define NINE_STATE_STENCIL_REF (1 << 18)
+#define NINE_STATE_SAMPLE_MASK (1 << 19)
+#define NINE_STATE_FF (0x1f << 20)
+#define NINE_STATE_FF_VS (0x17 << 20)
+#define NINE_STATE_FF_PS (0x18 << 20)
+#define NINE_STATE_FF_LIGHTING (1 << 20)
+#define NINE_STATE_FF_MATERIAL (1 << 21)
+#define NINE_STATE_FF_VSTRANSF (1 << 22)
+#define NINE_STATE_FF_PSSTAGES (1 << 23)
+#define NINE_STATE_FF_OTHER (1 << 24)
+#define NINE_STATE_FOG_SHADER (1 << 25)
+#define NINE_STATE_PS1X_SHADER (1 << 26)
+#define NINE_STATE_ALL 0x7ffffff
+#define NINE_STATE_UNHANDLED (1 << 27)
#define NINE_STATE_COMMIT_DSA (1 << 0)
#define NINE_STATE_COMMIT_RASTERIZER (1 << 1)
@@ -152,6 +153,7 @@ struct nine_state
int vs_const_i[NINE_MAX_CONST_I][4];
BOOL vs_const_b[NINE_MAX_CONST_B];
float *vs_lconstf_temp;
+ BOOL programmable_vs;
struct NinePixelShader9 *ps;
float *ps_const_f;
@@ -179,6 +181,7 @@ struct nine_state
uint8_t rt_mask;
DWORD rs[NINED3DRS_COUNT];
+ DWORD rs_advertised[NINED3DRS_COUNT]; /* the ones apps get with GetRenderState */
struct NineBaseTexture9 *texture[NINE_MAX_SAMPLERS]; /* PS, DMAP, VS */
@@ -236,7 +239,7 @@ extern const uint32_t nine_render_states_vertex[(NINED3DRS_COUNT + 31) / 32];
struct NineDevice9;
-void nine_update_state_framebuffer(struct NineDevice9 *);
+void nine_update_state_framebuffer_clear(struct NineDevice9 *);
boolean nine_update_state(struct NineDevice9 *);
void nine_state_restore_non_cso(struct NineDevice9 *device);
diff --git a/src/gallium/state_trackers/nine/pixelshader9.c b/src/gallium/state_trackers/nine/pixelshader9.c
index 42bc349c2cc..00be67f8955 100644
--- a/src/gallium/state_trackers/nine/pixelshader9.c
+++ b/src/gallium/state_trackers/nine/pixelshader9.c
@@ -160,6 +160,7 @@ NinePixelShader9_GetVariant( struct NinePixelShader9 *This )
info.sampler_ps1xtypes = key;
info.fog_enable = device->state.rs[D3DRS_FOGENABLE];
info.fog_mode = device->state.rs[D3DRS_FOGTABLEMODE];
+ info.force_color_in_centroid = key >> 34 & 1;
info.projected = (key >> 48) & 0xffff;
hr = nine_translate_shader(This->base.device, &info);
diff --git a/src/gallium/state_trackers/nine/pixelshader9.h b/src/gallium/state_trackers/nine/pixelshader9.h
index e09009f6621..6b431813a81 100644
--- a/src/gallium/state_trackers/nine/pixelshader9.h
+++ b/src/gallium/state_trackers/nine/pixelshader9.h
@@ -28,6 +28,7 @@
#include "nine_state.h"
#include "basetexture9.h"
#include "nine_ff.h"
+#include "surface9.h"
struct nine_lconstf;
@@ -92,6 +93,10 @@ NinePixelShader9_UpdateKey( struct NinePixelShader9 *ps,
key |= ((uint64_t)state->rs[D3DRS_FOGTABLEMODE]) << 33;
}
+ /* centroid interpolation automatically used for color ps inputs */
+ if (state->rt[0]->desc.MultiSampleType > 1)
+ key |= ((uint64_t)1) << 34;
+
if (unlikely(ps->byte_code.version < 0x14)) {
projected = nine_ff_get_projected_key(state);
key |= ((uint64_t) projected) << 48;
diff --git a/src/gallium/state_trackers/nine/resource9.c b/src/gallium/state_trackers/nine/resource9.c
index 6d915338b24..b929c50a83c 100644
--- a/src/gallium/state_trackers/nine/resource9.c
+++ b/src/gallium/state_trackers/nine/resource9.c
@@ -29,12 +29,12 @@
#include "util/u_hash_table.h"
#include "util/u_inlines.h"
+#include "util/u_resource.h"
#include "nine_pdata.h"
#define DBG_CHANNEL DBG_RESOURCE
-
HRESULT
NineResource9_ctor( struct NineResource9 *This,
struct NineUnknownParams *pParams,
@@ -62,6 +62,33 @@ NineResource9_ctor( struct NineResource9 *This,
if (Allocate) {
assert(!initResource);
+
+ /* On Windows it is possible allocation fails when
+ * IDirect3DDevice9::GetAvailableTextureMem() still reports
+ * enough free space.
+ *
+ * Some games allocate surfaces
+ * in a loop until they receive D3DERR_OUTOFVIDEOMEMORY to measure
+ * the available texture memory size.
+ *
+ * We are not using the drivers VRAM statistics because:
+ * * This would add overhead to each resource allocation.
+ * * Freeing memory is lazy and takes some time, but applications
+ * expects the memory counter to change immediately after allocating
+ * or freeing memory.
+ *
+ * Vertexbuffers and indexbuffers are not accounted !
+ */
+ if (This->info.target != PIPE_BUFFER) {
+ This->size = util_resource_size(&This->info);
+
+ This->base.device->available_texture_mem -= This->size;
+ if (This->base.device->available_texture_mem <=
+ This->base.device->available_texture_limit) {
+ return D3DERR_OUTOFVIDEOMEMORY;
+ }
+ }
+
DBG("(%p) Creating pipe_resource.\n", This);
This->resource = screen->resource_create(screen, &This->info);
if (!This->resource)
@@ -92,6 +119,10 @@ NineResource9_dtor( struct NineResource9 *This )
* still hold a reference. */
pipe_resource_reference(&This->resource, NULL);
+ /* NOTE: size is 0, unless something has actually been allocated */
+ if (This->base.device)
+ This->base.device->available_texture_mem += This->size;
+
NineUnknown_dtor(&This->base);
}
@@ -117,9 +148,10 @@ NineResource9_SetPrivateData( struct NineResource9 *This,
enum pipe_error err;
struct pheader *header;
const void *user_data = pData;
+ char guid_str[64];
- DBG("This=%p refguid=%p pData=%p SizeOfData=%u Flags=%x\n",
- This, refguid, pData, SizeOfData, Flags);
+ DBG("This=%p GUID=%s pData=%p SizeOfData=%u Flags=%x\n",
+ This, GUID_sprintf(guid_str, refguid), pData, SizeOfData, Flags);
if (Flags & D3DSPD_IUNKNOWN)
user_assert(SizeOfData == sizeof(IUnknown *), D3DERR_INVALIDCALL);
@@ -141,8 +173,9 @@ NineResource9_SetPrivateData( struct NineResource9 *This,
header->size = SizeOfData;
memcpy(header->data, user_data, header->size);
+ memcpy(&header->guid, refguid, sizeof(header->guid));
- err = util_hash_table_set(This->pdata, refguid, header);
+ err = util_hash_table_set(This->pdata, &header->guid, header);
if (err == PIPE_OK) {
if (header->unknown) { IUnknown_AddRef(*(IUnknown **)header->data); }
return D3D_OK;
@@ -162,9 +195,10 @@ NineResource9_GetPrivateData( struct NineResource9 *This,
{
struct pheader *header;
DWORD sizeofdata;
+ char guid_str[64];
- DBG("This=%p refguid=%p pData=%p pSizeOfData=%p\n",
- This, refguid, pData, pSizeOfData);
+ DBG("This=%p GUID=%s pData=%p pSizeOfData=%p\n",
+ This, GUID_sprintf(guid_str, refguid), pData, pSizeOfData);
header = util_hash_table_get(This->pdata, refguid);
if (!header) { return D3DERR_NOTFOUND; }
@@ -191,8 +225,9 @@ NineResource9_FreePrivateData( struct NineResource9 *This,
REFGUID refguid )
{
struct pheader *header;
+ char guid_str[64];
- DBG("This=%p refguid=%p\n", This, refguid);
+ DBG("This=%p GUID=%s\n", This, GUID_sprintf(guid_str, refguid));
header = util_hash_table_get(This->pdata, refguid);
if (!header)
diff --git a/src/gallium/state_trackers/nine/resource9.h b/src/gallium/state_trackers/nine/resource9.h
index 906f90806ce..8122257b7a7 100644
--- a/src/gallium/state_trackers/nine/resource9.h
+++ b/src/gallium/state_trackers/nine/resource9.h
@@ -45,6 +45,8 @@ struct NineResource9
/* for [GS]etPrivateData/FreePrivateData */
struct util_hash_table *pdata;
+
+ long long size;
};
static inline struct NineResource9 *
NineResource9( void *data )
diff --git a/src/gallium/state_trackers/nine/stateblock9.c b/src/gallium/state_trackers/nine/stateblock9.c
index 6d6e1be0b7f..0d1a04b657a 100644
--- a/src/gallium/state_trackers/nine/stateblock9.c
+++ b/src/gallium/state_trackers/nine/stateblock9.c
@@ -24,6 +24,7 @@
#include "device9.h"
#include "basetexture9.h"
#include "nine_helpers.h"
+#include "vertexdeclaration9.h"
#define DBG_CHANNEL DBG_STATEBLOCK
@@ -179,6 +180,7 @@ nine_state_copy_common(struct nine_state *dst,
const int r = ffs(m) - 1;
m &= ~(1 << r);
dst->rs[i * 32 + r] = src->rs[i * 32 + r];
+ dst->rs_advertised[i * 32 + r] = src->rs_advertised[i * 32 + r];
}
}
@@ -223,7 +225,7 @@ nine_state_copy_common(struct nine_state *dst,
nine_bind(&dst->stream[i], src->stream[i]);
if (src->stream[i]) {
dst->vtxbuf[i].buffer_offset = src->vtxbuf[i].buffer_offset;
- dst->vtxbuf[i].buffer = src->vtxbuf[i].buffer;
+ pipe_resource_reference(&dst->vtxbuf[i].buffer, src->vtxbuf[i].buffer);
dst->vtxbuf[i].stride = src->vtxbuf[i].stride;
}
}
@@ -269,6 +271,10 @@ nine_state_copy_common(struct nine_state *dst,
dst->ff.light = REALLOC(dst->ff.light,
dst->ff.num_lights * sizeof(D3DLIGHT9),
mask->ff.num_lights * sizeof(D3DLIGHT9));
+ for (i = dst->ff.num_lights; i < mask->ff.num_lights; ++i) {
+ memset(&dst->ff.light[i], 0, sizeof(D3DLIGHT9));
+ dst->ff.light[i].Type = (D3DLIGHTTYPE)NINED3DLIGHT_INVALID;
+ }
dst->ff.num_lights = mask->ff.num_lights;
}
for (i = 0; i < mask->ff.num_lights; ++i)
@@ -353,6 +359,7 @@ nine_state_copy_common_all(struct nine_state *dst,
/* Render states. */
memcpy(dst->rs, src->rs, sizeof(dst->rs));
+ memcpy(dst->rs_advertised, src->rs_advertised, sizeof(dst->rs_advertised));
if (apply)
memcpy(dst->changed.rs, src->changed.rs, sizeof(dst->changed.rs));
@@ -377,7 +384,7 @@ nine_state_copy_common_all(struct nine_state *dst,
nine_bind(&dst->stream[i], src->stream[i]);
if (src->stream[i]) {
dst->vtxbuf[i].buffer_offset = src->vtxbuf[i].buffer_offset;
- dst->vtxbuf[i].buffer = src->vtxbuf[i].buffer;
+ pipe_resource_reference(&dst->vtxbuf[i].buffer, src->vtxbuf[i].buffer);
dst->vtxbuf[i].stride = src->vtxbuf[i].stride;
}
dst->stream_freq[i] = src->stream_freq[i];
@@ -486,7 +493,10 @@ NineStateBlock9_Apply( struct NineStateBlock9 *This )
nine_state_copy_common(dst, src, src, TRUE, pool);
if ((src->changed.group & NINE_STATE_VDECL) && src->vdecl)
- nine_bind(&dst->vdecl, src->vdecl);
+ NineDevice9_SetVertexDeclaration(This->base.device, (IDirect3DVertexDeclaration9 *)src->vdecl);
+
+ /* Recomputing it is needed if we changed vs but not vdecl */
+ dst->programmable_vs = dst->vs && !(dst->vdecl && dst->vdecl->position_t);
/* Textures */
if (src->changed.texture) {
diff --git a/src/gallium/state_trackers/nine/surface9.c b/src/gallium/state_trackers/nine/surface9.c
index 14c1ce927ad..f88b75c3dd7 100644
--- a/src/gallium/state_trackers/nine/surface9.c
+++ b/src/gallium/state_trackers/nine/surface9.c
@@ -56,6 +56,9 @@ NineSurface9_ctor( struct NineSurface9 *This,
D3DSURFACE_DESC *pDesc )
{
HRESULT hr;
+ union pipe_color_union rgba = {0};
+ struct pipe_surface *surf;
+ struct pipe_context *pipe = pParams->device->pipe;
DBG("This=%p pDevice=%p pResource=%p Level=%u Layer=%u pDesc=%p\n",
This, pParams->device, pResource, Level, Layer, pDesc);
@@ -140,6 +143,12 @@ NineSurface9_ctor( struct NineSurface9 *This,
if (pResource && NineSurface9_IsOffscreenPlain(This))
pResource->flags |= NINE_RESOURCE_FLAG_LOCKABLE;
+ /* TODO: investigate what else exactly needs to be cleared */
+ if (This->base.resource && (pDesc->Usage & D3DUSAGE_RENDERTARGET)) {
+ surf = NineSurface9_GetSurface(This, 0);
+ pipe->clear_render_target(pipe, surf, &rgba, 0, 0, pDesc->Width, pDesc->Height);
+ }
+
NineSurface9_Dump(This);
return D3D_OK;
@@ -156,7 +165,7 @@ NineSurface9_dtor( struct NineSurface9 *This )
/* Release system memory when we have to manage it (no parent) */
if (!This->base.base.container && This->data)
- FREE(This->data);
+ align_free(This->data);
NineResource9_dtor(&This->base);
}
@@ -348,7 +357,7 @@ NineSurface9_LockRect( struct NineSurface9 *This,
D3DERR_INVALIDCALL);
if (pRect && This->desc.Pool == D3DPOOL_DEFAULT &&
- compressed_format (This->desc.Format)) {
+ util_format_is_compressed(This->base.info.format)) {
const unsigned w = util_format_get_blockwidth(This->base.info.format);
const unsigned h = util_format_get_blockheight(This->base.info.format);
user_assert((pRect->left == 0 && pRect->right == This->desc.Width &&
@@ -384,8 +393,8 @@ NineSurface9_LockRect( struct NineSurface9 *This,
* and bpp 8, and the app has a workaround to work with the fact
* that it is actually compressed. */
if (is_ATI1_ATI2(This->base.info.format)) {
- pLockedRect->Pitch = This->desc.Height;
- pLockedRect->pBits = This->data + box.y * This->desc.Height + box.x;
+ pLockedRect->Pitch = This->desc.Width;
+ pLockedRect->pBits = This->data + box.y * This->desc.Width + box.x;
} else {
pLockedRect->Pitch = This->stride;
pLockedRect->pBits = NineSurface9_GetSystemMemPointer(This,
diff --git a/src/gallium/state_trackers/nine/swapchain9.c b/src/gallium/state_trackers/nine/swapchain9.c
index 3b1a7a4493c..82d4173fbb2 100644
--- a/src/gallium/state_trackers/nine/swapchain9.c
+++ b/src/gallium/state_trackers/nine/swapchain9.c
@@ -118,6 +118,14 @@ NineSwapChain9_Resize( struct NineSwapChain9 *This,
DBG("This=%p pParams=%p\n", This, pParams);
user_assert(pParams != NULL, E_POINTER);
+ user_assert(pParams->SwapEffect, D3DERR_INVALIDCALL);
+ user_assert((pParams->SwapEffect != D3DSWAPEFFECT_COPY) ||
+ (pParams->BackBufferCount <= 1), D3DERR_INVALIDCALL);
+ user_assert(pDevice->ex || pParams->BackBufferCount <= 3, D3DERR_INVALIDCALL);
+ user_assert(pDevice->ex ||
+ (pParams->SwapEffect == D3DSWAPEFFECT_FLIP) ||
+ (pParams->SwapEffect == D3DSWAPEFFECT_COPY) ||
+ (pParams->SwapEffect == D3DSWAPEFFECT_DISCARD), D3DERR_INVALIDCALL);
DBG("pParams(%p):\n"
"BackBufferWidth: %u\n"
@@ -145,11 +153,6 @@ NineSwapChain9_Resize( struct NineSwapChain9 *This,
pParams->FullScreen_RefreshRateInHz,
pParams->PresentationInterval);
- if (pParams->SwapEffect == D3DSWAPEFFECT_COPY &&
- pParams->BackBufferCount > 1) {
- pParams->BackBufferCount = 1;
- }
-
if (pParams->BackBufferCount > 3) {
pParams->BackBufferCount = 3;
}
@@ -713,6 +716,10 @@ present( struct NineSwapChain9 *This,
This->pipe->blit(This->pipe, &blit);
}
+ /* The resource we present has to resolve fast clears
+ * if needed (and other things) */
+ This->pipe->flush_resource(This->pipe, resource);
+
if (This->params.SwapEffect != D3DSWAPEFFECT_DISCARD)
handle_draw_cursor_and_hud(This, resource);
@@ -738,12 +745,6 @@ bypass_rendering:
return D3DERR_WASSTILLDRAWING;
}
- if (This->present_buffers)
- resource = This->present_buffers[0];
- else
- resource = This->buffers[0]->base.resource;
- This->pipe->flush_resource(This->pipe, resource);
-
if (!This->enable_threadpool) {
This->tasks[0]=NULL;
fence = swap_fences_pop_front(This);
@@ -786,6 +787,19 @@ NineSwapChain9_Present( struct NineSwapChain9 *This,
if (hr == D3DERR_WASSTILLDRAWING)
return hr;
+ if (This->base.device->ex) {
+ if (NineSwapChain9_GetOccluded(This)) {
+ return S_PRESENT_OCCLUDED;
+ }
+ } else {
+ if (NineSwapChain9_GetOccluded(This)) {
+ This->base.device->device_needs_reset = TRUE;
+ }
+ if (This->base.device->device_needs_reset) {
+ return D3DERR_DEVICELOST;
+ }
+ }
+
switch (This->params.SwapEffect) {
case D3DSWAPEFFECT_FLIP:
UNTESTED(4);
@@ -840,7 +854,6 @@ NineSwapChain9_Present( struct NineSwapChain9 *This,
ID3DPresent_WaitBufferReleased(This->present, This->present_handles[0]);
This->base.device->state.changed.group |= NINE_STATE_FB;
- nine_update_state_framebuffer(This->base.device);
return hr;
}
@@ -907,8 +920,9 @@ NineSwapChain9_GetBackBuffer( struct NineSwapChain9 *This,
DBG("GetBackBuffer: This=%p iBackBuffer=%d Type=%d ppBackBuffer=%p\n",
This, iBackBuffer, Type, ppBackBuffer);
(void)user_error(Type == D3DBACKBUFFER_TYPE_MONO);
+ /* don't touch ppBackBuffer on error */
+ user_assert(ppBackBuffer != NULL, D3DERR_INVALIDCALL);
user_assert(iBackBuffer < This->params.BackBufferCount, D3DERR_INVALIDCALL);
- user_assert(ppBackBuffer != NULL, E_POINTER);
NineUnknown_AddRef(NineUnknown(This->buffers[iBackBuffer]));
*ppBackBuffer = (IDirect3DSurface9 *)This->buffers[iBackBuffer];
@@ -990,3 +1004,13 @@ NineSwapChain9_new( struct NineDevice9 *pDevice,
implicit, pPresent, pPresentationParameters,
pCTX, hFocusWindow, NULL);
}
+
+BOOL
+NineSwapChain9_GetOccluded( struct NineSwapChain9 *This )
+{
+ if (This->base.device->minor_version_num > 0) {
+ return ID3DPresent_GetWindowOccluded(This->present);
+ }
+
+ return FALSE;
+}
diff --git a/src/gallium/state_trackers/nine/swapchain9.h b/src/gallium/state_trackers/nine/swapchain9.h
index 5e48dde5004..4bd74f7b6ec 100644
--- a/src/gallium/state_trackers/nine/swapchain9.h
+++ b/src/gallium/state_trackers/nine/swapchain9.h
@@ -139,4 +139,7 @@ HRESULT WINAPI
NineSwapChain9_GetPresentParameters( struct NineSwapChain9 *This,
D3DPRESENT_PARAMETERS *pPresentationParameters );
+BOOL
+NineSwapChain9_GetOccluded( struct NineSwapChain9 *This );
+
#endif /* _NINE_SWAPCHAIN9_H_ */
diff --git a/src/gallium/state_trackers/nine/texture9.c b/src/gallium/state_trackers/nine/texture9.c
index bc325c1335e..ada08cea90a 100644
--- a/src/gallium/state_trackers/nine/texture9.c
+++ b/src/gallium/state_trackers/nine/texture9.c
@@ -235,7 +235,7 @@ NineTexture9_dtor( struct NineTexture9 *This )
}
if (This->managed_buffer)
- FREE(This->managed_buffer);
+ align_free(This->managed_buffer);
NineBaseTexture9_dtor(&This->base);
}
diff --git a/src/gallium/state_trackers/nine/vertexbuffer9.c b/src/gallium/state_trackers/nine/vertexbuffer9.c
index 8e2eaaf8ff9..10311b428fe 100644
--- a/src/gallium/state_trackers/nine/vertexbuffer9.c
+++ b/src/gallium/state_trackers/nine/vertexbuffer9.c
@@ -39,56 +39,13 @@ NineVertexBuffer9_ctor( struct NineVertexBuffer9 *This,
struct NineUnknownParams *pParams,
D3DVERTEXBUFFER_DESC *pDesc )
{
- struct pipe_resource *info = &This->base.info;
HRESULT hr;
DBG("This=%p Size=0x%x Usage=%x Pool=%u\n", This,
pDesc->Size, pDesc->Usage, pDesc->Pool);
- user_assert(pDesc->Pool != D3DPOOL_SCRATCH, D3DERR_INVALIDCALL);
-
- This->maps = MALLOC(sizeof(struct pipe_transfer *));
- if (!This->maps)
- return E_OUTOFMEMORY;
- This->nmaps = 0;
- This->maxmaps = 1;
-
- This->pipe = pParams->device->pipe;
-
- info->screen = pParams->device->screen;
- info->target = PIPE_BUFFER;
- info->format = PIPE_FORMAT_R8_UNORM;
- info->width0 = pDesc->Size;
- info->flags = 0;
-
- info->bind = PIPE_BIND_VERTEX_BUFFER | PIPE_BIND_TRANSFER_WRITE;
- if (!(pDesc->Usage & D3DUSAGE_WRITEONLY))
- info->bind |= PIPE_BIND_TRANSFER_READ;
-
- info->usage = PIPE_USAGE_DEFAULT;
- if (pDesc->Usage & D3DUSAGE_DYNAMIC)
- info->usage = PIPE_USAGE_STREAM;
- if (pDesc->Pool == D3DPOOL_SYSTEMMEM)
- info->usage = PIPE_USAGE_STAGING;
-
- /* if (pDesc->Usage & D3DUSAGE_DONOTCLIP) { } */
- /* if (pDesc->Usage & D3DUSAGE_NONSECURE) { } */
- /* if (pDesc->Usage & D3DUSAGE_NPATCHES) { } */
- /* if (pDesc->Usage & D3DUSAGE_POINTS) { } */
- /* if (pDesc->Usage & D3DUSAGE_RTPATCHES) { } */
- if (pDesc->Usage & D3DUSAGE_SOFTWAREPROCESSING)
- DBG("Application asked for Software Vertex Processing, "
- "but this is unimplemented\n");
- /* if (pDesc->Usage & D3DUSAGE_TEXTAPI) { } */
-
- info->height0 = 1;
- info->depth0 = 1;
- info->array_size = 1;
- info->last_level = 0;
- info->nr_samples = 0;
-
- hr = NineResource9_ctor(&This->base, pParams, NULL, TRUE,
- D3DRTYPE_VERTEXBUFFER, pDesc->Pool, pDesc->Usage);
+ hr = NineBuffer9_ctor(&This->base, pParams, D3DRTYPE_VERTEXBUFFER,
+ pDesc->Usage, pDesc->Size, pDesc->Pool);
if (FAILED(hr))
return hr;
@@ -102,85 +59,29 @@ NineVertexBuffer9_ctor( struct NineVertexBuffer9 *This,
void
NineVertexBuffer9_dtor( struct NineVertexBuffer9 *This )
{
- if (This->maps) {
- while (This->nmaps) {
- NineVertexBuffer9_Unlock(This);
- }
- FREE(This->maps);
- }
-
- NineResource9_dtor(&This->base);
+ NineBuffer9_dtor(&This->base);
+}
+
+struct pipe_resource *
+NineVertexBuffer9_GetResource( struct NineVertexBuffer9 *This )
+{
+ return NineBuffer9_GetResource(&This->base);
}
HRESULT WINAPI
NineVertexBuffer9_Lock( struct NineVertexBuffer9 *This,
- UINT OffsetToLock,
- UINT SizeToLock,
- void **ppbData,
- DWORD Flags )
+ UINT OffsetToLock,
+ UINT SizeToLock,
+ void **ppbData,
+ DWORD Flags )
{
- struct pipe_box box;
- void *data;
- const unsigned usage = d3dlock_buffer_to_pipe_transfer_usage(Flags);
-
- DBG("This=%p(pipe=%p) OffsetToLock=0x%x, SizeToLock=0x%x, Flags=0x%x\n",
- This, This->base.resource,
- OffsetToLock, SizeToLock, Flags);
-
- user_assert(ppbData, E_POINTER);
- user_assert(!(Flags & ~(D3DLOCK_DISCARD |
- D3DLOCK_DONOTWAIT |
- D3DLOCK_NO_DIRTY_UPDATE |
- D3DLOCK_NOSYSLOCK |
- D3DLOCK_READONLY |
- D3DLOCK_NOOVERWRITE)), D3DERR_INVALIDCALL);
-
- if (This->nmaps == This->maxmaps) {
- struct pipe_transfer **newmaps =
- REALLOC(This->maps, sizeof(struct pipe_transfer *)*This->maxmaps,
- sizeof(struct pipe_transfer *)*(This->maxmaps << 1));
- if (newmaps == NULL)
- return E_OUTOFMEMORY;
-
- This->maxmaps <<= 1;
- This->maps = newmaps;
- }
-
- if (SizeToLock == 0) {
- SizeToLock = This->desc.Size - OffsetToLock;
- user_warn(OffsetToLock != 0);
- }
-
- u_box_1d(OffsetToLock, SizeToLock, &box);
-
- data = This->pipe->transfer_map(This->pipe, This->base.resource, 0,
- usage, &box, &This->maps[This->nmaps]);
- if (!data) {
- DBG("pipe::transfer_map failed\n"
- " usage = %x\n"
- " box.x = %u\n"
- " box.width = %u\n",
- usage, box.x, box.width);
- /* not sure what to return, msdn suggests this */
- if (Flags & D3DLOCK_DONOTWAIT)
- return D3DERR_WASSTILLDRAWING;
- return D3DERR_INVALIDCALL;
- }
-
- This->nmaps++;
- *ppbData = data;
-
- return D3D_OK;
+ return NineBuffer9_Lock(&This->base, OffsetToLock, SizeToLock, ppbData, Flags);
}
HRESULT WINAPI
NineVertexBuffer9_Unlock( struct NineVertexBuffer9 *This )
{
- DBG("This=%p\n", This);
-
- user_assert(This->nmaps > 0, D3DERR_INVALIDCALL);
- This->pipe->transfer_unmap(This->pipe, This->maps[--(This->nmaps)]);
- return D3D_OK;
+ return NineBuffer9_Unlock(&This->base);
}
HRESULT WINAPI
diff --git a/src/gallium/state_trackers/nine/vertexbuffer9.h b/src/gallium/state_trackers/nine/vertexbuffer9.h
index 6174de4df08..859402b925b 100644
--- a/src/gallium/state_trackers/nine/vertexbuffer9.h
+++ b/src/gallium/state_trackers/nine/vertexbuffer9.h
@@ -22,8 +22,8 @@
#ifndef _NINE_VERTEXBUFFER9_H_
#define _NINE_VERTEXBUFFER9_H_
-
#include "resource9.h"
+#include "buffer9.h"
struct pipe_screen;
struct pipe_context;
@@ -31,13 +31,10 @@ struct pipe_transfer;
struct NineVertexBuffer9
{
- struct NineResource9 base;
+ struct NineBuffer9 base;
/* G3D */
struct pipe_context *pipe;
- struct pipe_transfer **maps;
- int nmaps, maxmaps;
-
D3DVERTEXBUFFER_DESC desc;
};
static inline struct NineVertexBuffer9 *
@@ -58,6 +55,12 @@ NineVertexBuffer9_ctor( struct NineVertexBuffer9 *This,
void
NineVertexBuffer9_dtor( struct NineVertexBuffer9 *This );
+/*** Nine private ***/
+
+struct pipe_resource *
+NineVertexBuffer9_GetResource( struct NineVertexBuffer9 *This );
+
+/*** Direct3D public ***/
HRESULT WINAPI
NineVertexBuffer9_Lock( struct NineVertexBuffer9 *This,
diff --git a/src/gallium/state_trackers/nine/vertexdeclaration9.c b/src/gallium/state_trackers/nine/vertexdeclaration9.c
index 2047b91abc4..36c594b5be3 100644
--- a/src/gallium/state_trackers/nine/vertexdeclaration9.c
+++ b/src/gallium/state_trackers/nine/vertexdeclaration9.c
@@ -174,24 +174,24 @@ NineVertexDeclaration9_ctor( struct NineVertexDeclaration9 *This,
const D3DVERTEXELEMENT9 *pElements )
{
const D3DCAPS9 *caps;
- unsigned i;
-
+ unsigned i, nelems;
DBG("This=%p pParams=%p pElements=%p\n", This, pParams, pElements);
- HRESULT hr = NineUnknown_ctor(&This->base, pParams);
- if (FAILED(hr)) { return hr; }
-
/* wine */
- for (This->nelems = 0;
- pElements[This->nelems].Stream != 0xFF;
- ++This->nelems) {
- user_assert(pElements[This->nelems].Type != D3DDECLTYPE_UNUSED, E_FAIL);
- user_assert(!(pElements[This->nelems].Offset & 3), E_FAIL);
+ for (nelems = 0;
+ pElements[nelems].Stream != 0xFF;
+ ++nelems) {
+ user_assert(pElements[nelems].Type != D3DDECLTYPE_UNUSED, E_FAIL);
+ user_assert(!(pElements[nelems].Offset & 3), E_FAIL);
}
- caps = NineDevice9_GetCaps(This->base.device);
- user_assert(This->nelems <= caps->MaxStreams, D3DERR_INVALIDCALL);
+ caps = NineDevice9_GetCaps(pParams->device);
+ user_assert(nelems <= caps->MaxStreams, D3DERR_INVALIDCALL);
+ HRESULT hr = NineUnknown_ctor(&This->base, pParams);
+ if (FAILED(hr)) { return hr; }
+
+ This->nelems = nelems;
This->decls = CALLOC(This->nelems+1, sizeof(D3DVERTEXELEMENT9));
This->elems = CALLOC(This->nelems, sizeof(struct pipe_vertex_element));
This->usage_map = CALLOC(This->nelems, sizeof(uint16_t));
@@ -203,6 +203,9 @@ NineVertexDeclaration9_ctor( struct NineVertexDeclaration9 *This,
This->decls[i].UsageIndex);
This->usage_map[i] = usage;
+ if (This->decls[i].Usage == D3DDECLUSAGE_POSITIONT)
+ This->position_t = TRUE;
+
This->elems[i].src_offset = This->decls[i].Offset;
This->elems[i].instance_divisor = 0;
This->elems[i].vertex_buffer_index = This->decls[i].Stream;
diff --git a/src/gallium/state_trackers/nine/vertexdeclaration9.h b/src/gallium/state_trackers/nine/vertexdeclaration9.h
index 655bcfbf165..e39f259440f 100644
--- a/src/gallium/state_trackers/nine/vertexdeclaration9.h
+++ b/src/gallium/state_trackers/nine/vertexdeclaration9.h
@@ -46,6 +46,8 @@ struct NineVertexDeclaration9
D3DVERTEXELEMENT9 *decls;
DWORD fvf;
+
+ BOOL position_t;
};
static inline struct NineVertexDeclaration9 *
NineVertexDeclaration9( void *data )
diff --git a/src/gallium/state_trackers/nine/volume9.c b/src/gallium/state_trackers/nine/volume9.c
index 0b9005685a9..f6988923caa 100644
--- a/src/gallium/state_trackers/nine/volume9.c
+++ b/src/gallium/state_trackers/nine/volume9.c
@@ -136,7 +136,7 @@ NineVolume9_dtor( struct NineVolume9 *This )
NineVolume9_UnlockBox(This);
if (This->data)
- FREE(This->data);
+ align_free(This->data);
pipe_resource_reference(&This->resource, NULL);
@@ -264,6 +264,13 @@ NineVolume9_LockBox( struct NineVolume9 *This,
usage |= PIPE_TRANSFER_DONTBLOCK;
if (pBox) {
+ user_assert(pBox->Right > pBox->Left, D3DERR_INVALIDCALL);
+ user_assert(pBox->Bottom > pBox->Top, D3DERR_INVALIDCALL);
+ user_assert(pBox->Back > pBox->Front, D3DERR_INVALIDCALL);
+ user_assert(pBox->Right <= This->desc.Width, D3DERR_INVALIDCALL);
+ user_assert(pBox->Bottom <= This->desc.Height, D3DERR_INVALIDCALL);
+ user_assert(pBox->Back <= This->desc.Depth, D3DERR_INVALIDCALL);
+
d3dbox_to_pipe_box(&box, pBox);
if (u_box_clip_2d(&box, &box, This->desc.Width, This->desc.Height) < 0) {
DBG("Locked volume intersection empty.\n");
diff --git a/src/gallium/state_trackers/omx/vid_dec_h264.c b/src/gallium/state_trackers/omx/vid_dec_h264.c
index f66ed896e62..b4536828909 100644
--- a/src/gallium/state_trackers/omx/vid_dec_h264.c
+++ b/src/gallium/state_trackers/omx/vid_dec_h264.c
@@ -35,6 +35,7 @@
#include "util/u_memory.h"
#include "util/u_video.h"
#include "vl/vl_rbsp.h"
+#include "vl/vl_zscan.h"
#include "entrypoint.h"
#include "vid_dec.h"
@@ -205,6 +206,7 @@ static void scaling_list(struct vl_rbsp *rbsp, uint8_t *scalingList, unsigned si
const uint8_t *defaultList, const uint8_t *fallbackList)
{
unsigned lastScale = 8, nextScale = 8;
+ const int *list;
unsigned i;
/* (pic|seq)_scaling_list_present_flag[i] */
@@ -214,6 +216,7 @@ static void scaling_list(struct vl_rbsp *rbsp, uint8_t *scalingList, unsigned si
return;
}
+ list = (sizeOfScalingList == 16) ? vl_zscan_normal_16 : vl_zscan_normal;
for (i = 0; i < sizeOfScalingList; ++i ) {
if (nextScale != 0) {
@@ -224,8 +227,8 @@ static void scaling_list(struct vl_rbsp *rbsp, uint8_t *scalingList, unsigned si
return;
}
}
- scalingList[i] = nextScale == 0 ? lastScale : nextScale;
- lastScale = scalingList[i];
+ scalingList[list[i]] = nextScale == 0 ? lastScale : nextScale;
+ lastScale = scalingList[list[i]];
}
}
diff --git a/src/gallium/targets/d3dadapter9/drm.c b/src/gallium/targets/d3dadapter9/drm.c
index 5cd1ba7815c..233db8ae372 100644
--- a/src/gallium/targets/d3dadapter9/drm.c
+++ b/src/gallium/targets/d3dadapter9/drm.c
@@ -53,22 +53,29 @@ DRI_CONF_BEGIN
DRI_CONF_VBLANK_MODE(DRI_CONF_VBLANK_DEF_INTERVAL_1)
DRI_CONF_SECTION_END
DRI_CONF_SECTION_NINE
+ DRI_CONF_NINE_OVERRIDEVENDOR(-1)
DRI_CONF_NINE_THROTTLE(-2)
DRI_CONF_NINE_THREADSUBMIT("false")
DRI_CONF_SECTION_END
DRI_CONF_END;
-/* define fallback value here: NVIDIA GeForce GTX 970 */
-#define FALLBACK_NAME "NV124"
-#define FALLBACK_DEVID 0x13C2
-#define FALLBACK_VENID 0x10de
+struct fallback_card_config {
+ const char *name;
+ unsigned vendor_id;
+ unsigned device_id;
+} fallback_cards[] = {
+ {"NV124", 0x10de, 0x13C2}, /* NVIDIA GeForce GTX 970 */
+ {"HAWAII", 0x1002, 0x67b1}, /* AMD Radeon R9 290 */
+ {"Haswell Mobile", 0x8086, 0x13C2}, /* Intel Haswell Mobile */
+ {"SVGA3D", 0x15ad, 0x0405}, /* VMware SVGA 3D */
+};
/* prototypes */
void
d3d_match_vendor_id( D3DADAPTER_IDENTIFIER9* drvid,
- unsigned fallback_ven,
- unsigned fallback_dev,
- const char* fallback_name );
+ unsigned fallback_ven,
+ unsigned fallback_dev,
+ const char* fallback_name );
void d3d_fill_driver_version(D3DADAPTER_IDENTIFIER9* drvid);
@@ -118,9 +125,9 @@ get_bus_info( int fd,
*subsysid = 0;
*revision = 0;
} else {
- DBG("Unable to detect card. Faking %s\n", FALLBACK_NAME);
- *vendorid = FALLBACK_VENID;
- *deviceid = FALLBACK_DEVID;
+ DBG("Unable to detect card. Faking %s\n", fallback_cards[0].name);
+ *vendorid = fallback_cards[0].vendor_id;
+ *deviceid = fallback_cards[0].device_id;
*subsysid = 0;
*revision = 0;
}
@@ -128,8 +135,10 @@ get_bus_info( int fd,
static inline void
read_descriptor( struct d3dadapter9_context *ctx,
- int fd )
+ int fd, int override_vendorid )
{
+ unsigned i;
+ BOOL found;
D3DADAPTER_IDENTIFIER9 *drvid = &ctx->identifier;
memset(drvid, 0, sizeof(*drvid));
@@ -140,9 +149,30 @@ read_descriptor( struct d3dadapter9_context *ctx,
strncpy(drvid->Description, ctx->hal->get_name(ctx->hal),
sizeof(drvid->Description));
+ if (override_vendorid > 0) {
+ found = FALSE;
+ /* fill in device_id and card name for fake vendor */
+ for (i = 0; i < sizeof(fallback_cards)/sizeof(fallback_cards[0]); i++) {
+ if (fallback_cards[i].vendor_id == override_vendorid) {
+ DBG("Faking card '%s' vendor 0x%04x, device 0x%04x\n",
+ fallback_cards[i].name,
+ fallback_cards[i].vendor_id,
+ fallback_cards[i].device_id);
+ drvid->VendorId = fallback_cards[i].vendor_id;
+ drvid->DeviceId = fallback_cards[i].device_id;
+ strncpy(drvid->Description, fallback_cards[i].name,
+ sizeof(drvid->Description));
+ found = TRUE;
+ break;
+ }
+ }
+ if (!found) {
+ DBG("Unknown fake vendor 0x%04x! Using detected vendor !\n", override_vendorid);
+ }
+ }
/* choose fall-back vendor if necessary to allow
* the following functions to return sane results */
- d3d_match_vendor_id(drvid, FALLBACK_VENID, FALLBACK_DEVID, FALLBACK_NAME);
+ d3d_match_vendor_id(drvid, fallback_cards[0].vendor_id, fallback_cards[0].device_id, fallback_cards[0].name);
/* fill in driver name and version info */
d3d_fill_driver_version(drvid);
/* override Description field with Windows like names */
@@ -177,6 +207,7 @@ drm_create_adapter( int fd,
driOptionCache defaultInitOptions;
driOptionCache userInitOptions;
int throttling_value_user = -2;
+ int override_vendorid = -1;
if (!ctx) { return E_OUTOFMEMORY; }
@@ -247,6 +278,10 @@ drm_create_adapter( int fd,
"You should not expect any benefit.");
}
+ if (driCheckOption(&userInitOptions, "override_vendorid", DRI_INT)) {
+ override_vendorid = driQueryOptioni(&userInitOptions, "override_vendorid");
+ }
+
driDestroyOptionCache(&userInitOptions);
driDestroyOptionInfo(&defaultInitOptions);
@@ -260,7 +295,7 @@ drm_create_adapter( int fd,
}
/* read out PCI info */
- read_descriptor(&ctx->base, fd);
+ read_descriptor(&ctx->base, fd, override_vendorid);
/* create and return new ID3DAdapter9 */
hr = NineAdapter9_new(&ctx->base, (struct NineAdapter9 **)ppAdapter);
diff --git a/src/gallium/targets/dri/Android.mk b/src/gallium/targets/dri/Android.mk
index d4030852943..2a7738e6979 100644
--- a/src/gallium/targets/dri/Android.mk
+++ b/src/gallium/targets/dri/Android.mk
@@ -94,7 +94,7 @@ gallium_DRIVERS += libmesa_winsys_vc4 libmesa_pipe_vc4
endif
ifneq ($(filter virgl,$(MESA_GPU_DRIVERS)),)
LOCAL_CFLAGS += -DGALLIUM_VIRGL
-gallium_DRIVERS += libmesa_winsys_virgl libmesa_pipe_virgl
+gallium_DRIVERS += libmesa_winsys_virgl libmesa_winsys_virgl_vtest libmesa_pipe_virgl
endif
ifneq ($(filter vmwgfx,$(MESA_GPU_DRIVERS)),)
gallium_DRIVERS += libmesa_winsys_svga libmesa_pipe_svga
diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c b/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c
index 30a1aa8d6ba..59a801b1426 100644
--- a/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c
+++ b/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c
@@ -288,16 +288,17 @@ static struct amdgpu_winsys_bo *amdgpu_create_bo(struct amdgpu_winsys *ws,
request.alloc_size = size;
request.phys_alignment = alignment;
- if (initial_domain & RADEON_DOMAIN_VRAM) {
+ if (initial_domain & RADEON_DOMAIN_VRAM)
request.preferred_heap |= AMDGPU_GEM_DOMAIN_VRAM;
- if (flags & RADEON_FLAG_CPU_ACCESS)
- request.flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
- }
- if (initial_domain & RADEON_DOMAIN_GTT) {
+ if (initial_domain & RADEON_DOMAIN_GTT)
request.preferred_heap |= AMDGPU_GEM_DOMAIN_GTT;
- if (flags & RADEON_FLAG_GTT_WC)
- request.flags |= AMDGPU_GEM_CREATE_CPU_GTT_USWC;
- }
+
+ if (flags & RADEON_FLAG_CPU_ACCESS)
+ request.flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
+ if (flags & RADEON_FLAG_NO_CPU_ACCESS)
+ request.flags |= AMDGPU_GEM_CREATE_NO_CPU_ACCESS;
+ if (flags & RADEON_FLAG_GTT_WC)
+ request.flags |= AMDGPU_GEM_CREATE_CPU_GTT_USWC;
r = amdgpu_bo_alloc(ws->dev, &request, &buf_handle);
if (r) {
diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c b/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c
index 7393a1d1eb4..dab27dfba96 100644
--- a/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c
+++ b/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c
@@ -68,7 +68,6 @@ static unsigned cik_get_num_tile_pipes(struct amdgpu_gpu_info *info)
switch (CIK__GB_TILE_MODE__PIPE_CONFIG(mode2d)) {
case CIK__PIPE_CONFIG__ADDR_SURF_P2:
- default:
return 2;
case CIK__PIPE_CONFIG__ADDR_SURF_P4_8x16:
case CIK__PIPE_CONFIG__ADDR_SURF_P4_16x16:
@@ -86,23 +85,13 @@ static unsigned cik_get_num_tile_pipes(struct amdgpu_gpu_info *info)
case CIK__PIPE_CONFIG__ADDR_SURF_P16_32X32_8X16:
case CIK__PIPE_CONFIG__ADDR_SURF_P16_32X32_16X16:
return 16;
+ default:
+ fprintf(stderr, "Invalid CIK pipe configuration, assuming P2\n");
+ assert(!"this should never occur");
+ return 2;
}
}
-/* Convert Sea Islands register values GB_ADDR_CFG and MC_ADDR_CFG
- * into GB_TILING_CONFIG register which is only present on R600-R700. */
-static unsigned r600_get_gb_tiling_config(struct amdgpu_gpu_info *info)
-{
- unsigned num_pipes = info->gb_addr_cfg & 0x7;
- unsigned num_banks = info->mc_arb_ramcfg & 0x3;
- unsigned pipe_interleave_bytes = (info->gb_addr_cfg >> 4) & 0x7;
- unsigned row_size = (info->gb_addr_cfg >> 28) & 0x3;
-
- return num_pipes | (num_banks << 4) |
- (pipe_interleave_bytes << 8) |
- (row_size << 12);
-}
-
/* Helper function to do the ioctls needed for setup and init. */
static boolean do_winsys_init(struct amdgpu_winsys *ws)
{
@@ -251,20 +240,19 @@ static boolean do_winsys_init(struct amdgpu_winsys *ws)
ws->info.gart_size = gtt.heap_size;
ws->info.vram_size = vram.heap_size;
/* convert the shader clock from KHz to MHz */
- ws->info.max_sclk = ws->amdinfo.max_engine_clk / 1000;
+ ws->info.max_shader_clock = ws->amdinfo.max_engine_clk / 1000;
ws->info.max_se = ws->amdinfo.num_shader_engines;
ws->info.max_sh_per_se = ws->amdinfo.num_shader_arrays_per_engine;
ws->info.has_uvd = uvd.available_rings != 0;
ws->info.vce_fw_version =
vce.available_rings ? vce_version : 0;
ws->info.has_userptr = TRUE;
- ws->info.r600_num_backends = ws->amdinfo.rb_pipes;
- ws->info.r600_clock_crystal_freq = ws->amdinfo.gpu_counter_freq;
- ws->info.r600_tiling_config = r600_get_gb_tiling_config(&ws->amdinfo);
- ws->info.r600_num_tile_pipes = cik_get_num_tile_pipes(&ws->amdinfo);
- ws->info.r600_max_pipes = ws->amdinfo.max_quad_shader_pipes; /* TODO: is this correct? */
- ws->info.r600_virtual_address = TRUE;
- ws->info.r600_has_dma = dma.available_rings != 0;
+ ws->info.num_render_backends = ws->amdinfo.rb_pipes;
+ ws->info.clock_crystal_freq = ws->amdinfo.gpu_counter_freq;
+ ws->info.num_tile_pipes = cik_get_num_tile_pipes(&ws->amdinfo);
+ ws->info.pipe_interleave_bytes = 256 << ((ws->amdinfo.gb_addr_cfg >> 4) & 0x7);
+ ws->info.has_virtual_memory = TRUE;
+ ws->info.has_sdma = dma.available_rings != 0;
/* Get the number of good compute units. */
ws->info.num_good_compute_units = 0;
@@ -276,7 +264,7 @@ static boolean do_winsys_init(struct amdgpu_winsys *ws)
memcpy(ws->info.si_tile_mode_array, ws->amdinfo.gb_tile_mode,
sizeof(ws->amdinfo.gb_tile_mode));
ws->info.si_tile_mode_array_valid = TRUE;
- ws->info.si_backend_enabled_mask = ws->amdinfo.enabled_rb_pipes_mask;
+ ws->info.enabled_rb_mask = ws->amdinfo.enabled_rb_pipes_mask;
memcpy(ws->info.cik_macrotile_mode_array, ws->amdinfo.gb_macro_tile_mode,
sizeof(ws->amdinfo.gb_macro_tile_mode));
diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_bo.c b/src/gallium/winsys/radeon/drm/radeon_drm_bo.c
index 3ec6a065c7d..7e9ed0ca0fe 100644
--- a/src/gallium/winsys/radeon/drm/radeon_drm_bo.c
+++ b/src/gallium/winsys/radeon/drm/radeon_drm_bo.c
@@ -281,7 +281,7 @@ void radeon_bo_destroy(struct pb_buffer *_buf)
if (bo->ptr)
os_munmap(bo->ptr, bo->base.size);
- if (rws->info.r600_virtual_address) {
+ if (rws->info.has_virtual_memory) {
if (rws->va_unmap_working) {
struct drm_radeon_gem_va va;
@@ -552,7 +552,7 @@ static struct radeon_bo *radeon_create_bo(struct radeon_drm_winsys *rws,
pipe_mutex_init(bo->map_mutex);
pb_cache_init_entry(&rws->bo_cache, &bo->cache_entry, &bo->base);
- if (rws->info.r600_virtual_address) {
+ if (rws->info.has_virtual_memory) {
struct drm_radeon_gem_va va;
bo->va = radeon_bomgr_find_va(rws, size, alignment);
@@ -834,7 +834,7 @@ static struct pb_buffer *radeon_winsys_bo_from_ptr(struct radeon_winsys *rws,
pipe_mutex_unlock(ws->bo_handles_mutex);
- if (ws->info.r600_virtual_address) {
+ if (ws->info.has_virtual_memory) {
struct drm_radeon_gem_va va;
bo->va = radeon_bomgr_find_va(ws, bo->base.size, 1 << 20);
@@ -966,7 +966,7 @@ done:
if (stride)
*stride = whandle->stride;
- if (ws->info.r600_virtual_address && !bo->va) {
+ if (ws->info.has_virtual_memory && !bo->va) {
struct drm_radeon_gem_va va;
bo->va = radeon_bomgr_find_va(ws, bo->base.size, 1 << 20);
diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_cs.c b/src/gallium/winsys/radeon/drm/radeon_drm_cs.c
index 085071c381c..155a13008a4 100644
--- a/src/gallium/winsys/radeon/drm/radeon_drm_cs.c
+++ b/src/gallium/winsys/radeon/drm/radeon_drm_cs.c
@@ -283,7 +283,7 @@ static unsigned radeon_add_buffer(struct radeon_drm_cs *cs,
* This doesn't have to be done if virtual memory is enabled,
* because there is no offset patching with virtual memory.
*/
- if (cs->base.ring_type != RING_DMA || cs->ws->info.r600_virtual_address) {
+ if (cs->base.ring_type != RING_DMA || cs->ws->info.has_virtual_memory) {
return i;
}
}
@@ -540,7 +540,7 @@ static void radeon_drm_cs_flush(struct radeon_winsys_cs *rcs,
cs->cst->flags[0] = 0;
cs->cst->flags[1] = RADEON_CS_RING_DMA;
cs->cst->cs.num_chunks = 3;
- if (cs->ws->info.r600_virtual_address) {
+ if (cs->ws->info.has_virtual_memory) {
cs->cst->flags[0] |= RADEON_CS_USE_VM;
}
break;
@@ -567,7 +567,7 @@ static void radeon_drm_cs_flush(struct radeon_winsys_cs *rcs,
cs->cst->flags[0] |= RADEON_CS_KEEP_TILING_FLAGS;
cs->cst->cs.num_chunks = 3;
}
- if (cs->ws->info.r600_virtual_address) {
+ if (cs->ws->info.has_virtual_memory) {
cs->cst->flags[0] |= RADEON_CS_USE_VM;
cs->cst->cs.num_chunks = 3;
}
diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c
index 8a1ed3ae08c..35dc7e69dcf 100644
--- a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c
+++ b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c
@@ -298,10 +298,10 @@ static boolean do_winsys_init(struct radeon_drm_winsys *ws)
}
/* Check for dma */
- ws->info.r600_has_dma = FALSE;
+ ws->info.has_sdma = FALSE;
/* DMA is disabled on R700. There is IB corruption and hangs. */
if (ws->info.chip_class >= EVERGREEN && ws->info.drm_minor >= 27) {
- ws->info.r600_has_dma = TRUE;
+ ws->info.has_sdma = TRUE;
}
/* Check for UVD and VCE */
@@ -351,11 +351,11 @@ static boolean do_winsys_init(struct radeon_drm_winsys *ws)
/* Get max clock frequency info and convert it to MHz */
radeon_get_drm_value(ws->fd, RADEON_INFO_MAX_SCLK, NULL,
- &ws->info.max_sclk);
- ws->info.max_sclk /= 1000;
+ &ws->info.max_shader_clock);
+ ws->info.max_shader_clock /= 1000;
radeon_get_drm_value(ws->fd, RADEON_INFO_SI_BACKEND_ENABLED_MASK, NULL,
- &ws->info.si_backend_enabled_mask);
+ &ws->info.enabled_rb_mask);
ws->num_cpus = sysconf(_SC_NPROCESSORS_ONLN);
@@ -372,51 +372,72 @@ static boolean do_winsys_init(struct radeon_drm_winsys *ws)
return FALSE;
}
else if (ws->gen >= DRV_R600) {
+ uint32_t tiling_config = 0;
+
if (ws->info.drm_minor >= 9 &&
!radeon_get_drm_value(ws->fd, RADEON_INFO_NUM_BACKENDS,
"num backends",
- &ws->info.r600_num_backends))
+ &ws->info.num_render_backends))
return FALSE;
/* get the GPU counter frequency, failure is not fatal */
radeon_get_drm_value(ws->fd, RADEON_INFO_CLOCK_CRYSTAL_FREQ, NULL,
- &ws->info.r600_clock_crystal_freq);
+ &ws->info.clock_crystal_freq);
radeon_get_drm_value(ws->fd, RADEON_INFO_TILING_CONFIG, NULL,
- &ws->info.r600_tiling_config);
+ &tiling_config);
+
+ ws->info.r600_num_banks =
+ ws->info.chip_class >= EVERGREEN ?
+ 4 << ((tiling_config & 0xf0) >> 4) :
+ 4 << ((tiling_config & 0x30) >> 4);
+
+ ws->info.pipe_interleave_bytes =
+ ws->info.chip_class >= EVERGREEN ?
+ 256 << ((tiling_config & 0xf00) >> 8) :
+ 256 << ((tiling_config & 0xc0) >> 6);
+
+ if (!ws->info.pipe_interleave_bytes)
+ ws->info.pipe_interleave_bytes =
+ ws->info.chip_class >= EVERGREEN ? 512 : 256;
if (ws->info.drm_minor >= 11) {
radeon_get_drm_value(ws->fd, RADEON_INFO_NUM_TILE_PIPES, NULL,
- &ws->info.r600_num_tile_pipes);
+ &ws->info.num_tile_pipes);
if (radeon_get_drm_value(ws->fd, RADEON_INFO_BACKEND_MAP, NULL,
- &ws->info.r600_backend_map))
- ws->info.r600_backend_map_valid = TRUE;
+ &ws->info.r600_gb_backend_map))
+ ws->info.r600_gb_backend_map_valid = TRUE;
+ } else {
+ ws->info.num_tile_pipes =
+ ws->info.chip_class >= EVERGREEN ?
+ 1 << (tiling_config & 0xf) :
+ 1 << ((tiling_config & 0xe) >> 1);
}
- ws->info.r600_virtual_address = FALSE;
+ ws->info.has_virtual_memory = FALSE;
if (ws->info.drm_minor >= 13) {
uint32_t ib_vm_max_size;
- ws->info.r600_virtual_address = TRUE;
+ ws->info.has_virtual_memory = TRUE;
if (!radeon_get_drm_value(ws->fd, RADEON_INFO_VA_START, NULL,
&ws->va_start))
- ws->info.r600_virtual_address = FALSE;
+ ws->info.has_virtual_memory = FALSE;
if (!radeon_get_drm_value(ws->fd, RADEON_INFO_IB_VM_MAX_SIZE, NULL,
&ib_vm_max_size))
- ws->info.r600_virtual_address = FALSE;
+ ws->info.has_virtual_memory = FALSE;
radeon_get_drm_value(ws->fd, RADEON_INFO_VA_UNMAP_WORKING, NULL,
&ws->va_unmap_working);
}
if (ws->gen == DRV_R600 && !debug_get_bool_option("RADEON_VA", FALSE))
- ws->info.r600_virtual_address = FALSE;
+ ws->info.has_virtual_memory = FALSE;
}
/* Get max pipes, this is only needed for compute shaders. All evergreen+
* chips have at least 2 pipes, so we use 2 as a default. */
- ws->info.r600_max_pipes = 2;
+ ws->info.r600_max_quad_pipes = 2;
radeon_get_drm_value(ws->fd, RADEON_INFO_MAX_PIPES, NULL,
- &ws->info.r600_max_pipes);
+ &ws->info.r600_max_quad_pipes);
/* All GPUs have at least one compute unit */
ws->info.num_good_compute_units = 1;
@@ -742,7 +763,7 @@ radeon_drm_winsys_create(int fd, radeon_screen_create_t screen_create)
ws->fd = dup(fd);
if (!do_winsys_init(ws))
- goto fail;
+ goto fail1;
pb_cache_init(&ws->bo_cache, 500000, 2.0f, 0,
MIN2(ws->info.vram_size, ws->info.gart_size),
@@ -812,8 +833,9 @@ radeon_drm_winsys_create(int fd, radeon_screen_create_t screen_create)
return &ws->base;
fail:
- pipe_mutex_unlock(fd_tab_mutex);
pb_cache_deinit(&ws->bo_cache);
+fail1:
+ pipe_mutex_unlock(fd_tab_mutex);
if (ws->surf_man)
radeon_surface_manager_free(ws->surf_man);
if (ws->fd >= 0)
diff --git a/src/gallium/winsys/virgl/drm/virgl_drm_public.h b/src/gallium/winsys/virgl/drm/virgl_drm_public.h
index be01021ca9a..f70f0e50448 100644
--- a/src/gallium/winsys/virgl/drm/virgl_drm_public.h
+++ b/src/gallium/winsys/virgl/drm/virgl_drm_public.h
@@ -23,8 +23,8 @@
#ifndef VIRGL_DRM_PUBLIC_H
#define VIRGL_DRM_PUBLIC_H
-struct virgl_winsys;
+struct pipe_screen;
-struct virgl_winsys *virgl_drm_winsys_create(int drmFD);
+struct pipe_screen *virgl_drm_screen_create(int fd);
#endif
diff --git a/src/gallium/winsys/virgl/drm/virgl_drm_winsys.c b/src/gallium/winsys/virgl/drm/virgl_drm_winsys.c
index b5d4435e5e6..ba009882ec2 100644
--- a/src/gallium/winsys/virgl/drm/virgl_drm_winsys.c
+++ b/src/gallium/winsys/virgl/drm/virgl_drm_winsys.c
@@ -25,6 +25,7 @@
#include <fcntl.h>
#include <stdio.h>
#include <sys/ioctl.h>
+#include <sys/stat.h>
#include "os/os_mman.h"
#include "os/os_time.h"
@@ -33,6 +34,8 @@
#include "util/u_hash_table.h"
#include "util/u_inlines.h"
#include "state_tracker/drm_driver.h"
+#include "virgl/virgl_screen.h"
+#include "virgl/virgl_public.h"
#include <xf86drm.h>
#include "virtgpu_drm.h"
@@ -50,10 +53,17 @@ static void virgl_hw_res_destroy(struct virgl_drm_winsys *qdws,
{
struct drm_gem_close args;
- if (res->name) {
+ if (res->flinked) {
+ pipe_mutex_lock(qdws->bo_handles_mutex);
+ util_hash_table_remove(qdws->bo_names,
+ (void *)(uintptr_t)res->flink);
+ pipe_mutex_unlock(qdws->bo_handles_mutex);
+ }
+
+ if (res->bo_handle) {
pipe_mutex_lock(qdws->bo_handles_mutex);
util_hash_table_remove(qdws->bo_handles,
- (void *)(uintptr_t)res->name);
+ (void *)(uintptr_t)res->bo_handle);
pipe_mutex_unlock(qdws->bo_handles_mutex);
}
@@ -109,6 +119,7 @@ virgl_drm_winsys_destroy(struct virgl_winsys *qws)
virgl_cache_flush(qdws);
util_hash_table_destroy(qdws->bo_handles);
+ util_hash_table_destroy(qdws->bo_names);
pipe_mutex_destroy(qdws->bo_handles_mutex);
pipe_mutex_destroy(qdws->mutex);
@@ -367,11 +378,12 @@ virgl_drm_winsys_resource_create_handle(struct virgl_winsys *qws,
struct drm_gem_open open_arg = {};
struct drm_virtgpu_resource_info info_arg = {};
struct virgl_hw_res *res;
+ uint32_t handle = whandle->handle;
pipe_mutex_lock(qdws->bo_handles_mutex);
if (whandle->type == DRM_API_HANDLE_TYPE_SHARED) {
- res = util_hash_table_get(qdws->bo_handles, (void*)(uintptr_t)whandle->handle);
+ res = util_hash_table_get(qdws->bo_names, (void*)(uintptr_t)handle);
if (res) {
struct virgl_hw_res *r = NULL;
virgl_drm_resource_reference(qdws, &r, res);
@@ -379,21 +391,31 @@ virgl_drm_winsys_resource_create_handle(struct virgl_winsys *qws,
}
}
- res = CALLOC_STRUCT(virgl_hw_res);
- if (!res)
- goto done;
-
if (whandle->type == DRM_API_HANDLE_TYPE_FD) {
int r;
- uint32_t handle;
r = drmPrimeFDToHandle(qdws->fd, whandle->handle, &handle);
if (r) {
- FREE(res);
res = NULL;
goto done;
}
+ }
+
+ res = util_hash_table_get(qdws->bo_handles, (void*)(uintptr_t)handle);
+ fprintf(stderr, "resource %p for handle %d, pfd=%d\n", res, handle, whandle->handle);
+ if (res) {
+ struct virgl_hw_res *r = NULL;
+ virgl_drm_resource_reference(qdws, &r, res);
+ goto done;
+ }
+
+ res = CALLOC_STRUCT(virgl_hw_res);
+ if (!res)
+ goto done;
+
+ if (whandle->type == DRM_API_HANDLE_TYPE_FD) {
res->bo_handle = handle;
} else {
+ fprintf(stderr, "gem open handle %d\n", handle);
memset(&open_arg, 0, sizeof(open_arg));
open_arg.name = whandle->handle;
if (drmIoctl(qdws->fd, DRM_IOCTL_GEM_OPEN, &open_arg)) {
@@ -403,7 +425,7 @@ virgl_drm_winsys_resource_create_handle(struct virgl_winsys *qws,
}
res->bo_handle = open_arg.handle;
}
- res->name = whandle->handle;
+ res->name = handle;
memset(&info_arg, 0, sizeof(info_arg));
info_arg.bo_handle = res->bo_handle;
@@ -422,7 +444,7 @@ virgl_drm_winsys_resource_create_handle(struct virgl_winsys *qws,
pipe_reference_init(&res->reference, 1);
res->num_cs_references = 0;
- util_hash_table_set(qdws->bo_handles, (void *)(uintptr_t)whandle->handle, res);
+ util_hash_table_set(qdws->bo_handles, (void *)(uintptr_t)handle, res);
done:
pipe_mutex_unlock(qdws->bo_handles_mutex);
@@ -452,7 +474,7 @@ static boolean virgl_drm_winsys_resource_get_handle(struct virgl_winsys *qws,
res->flink = flink.name;
pipe_mutex_lock(qdws->bo_handles_mutex);
- util_hash_table_set(qdws->bo_handles, (void *)(uintptr_t)res->flink, res);
+ util_hash_table_set(qdws->bo_names, (void *)(uintptr_t)res->flink, res);
pipe_mutex_unlock(qdws->bo_handles_mutex);
}
whandle->handle = res->flink;
@@ -732,7 +754,7 @@ static void virgl_fence_reference(struct virgl_winsys *vws,
}
-struct virgl_winsys *
+static struct virgl_winsys *
virgl_drm_winsys_create(int drmFD)
{
struct virgl_drm_winsys *qdws;
@@ -748,6 +770,7 @@ virgl_drm_winsys_create(int drmFD)
pipe_mutex_init(qdws->mutex);
pipe_mutex_init(qdws->bo_handles_mutex);
qdws->bo_handles = util_hash_table_create(handle_hash, handle_compare);
+ qdws->bo_names = util_hash_table_create(handle_hash, handle_compare);
qdws->base.destroy = virgl_drm_winsys_destroy;
qdws->base.transfer_put = virgl_bo_transfer_put;
@@ -772,3 +795,87 @@ virgl_drm_winsys_create(int drmFD)
return &qdws->base;
}
+
+static struct util_hash_table *fd_tab = NULL;
+pipe_static_mutex(virgl_screen_mutex);
+
+static void
+virgl_drm_screen_destroy(struct pipe_screen *pscreen)
+{
+ struct virgl_screen *screen = virgl_screen(pscreen);
+ boolean destroy;
+
+ pipe_mutex_lock(virgl_screen_mutex);
+ destroy = --screen->refcnt == 0;
+ if (destroy) {
+ int fd = virgl_drm_winsys(screen->vws)->fd;
+ util_hash_table_remove(fd_tab, intptr_to_pointer(fd));
+ }
+ pipe_mutex_unlock(virgl_screen_mutex);
+
+ if (destroy) {
+ pscreen->destroy = screen->winsys_priv;
+ pscreen->destroy(pscreen);
+ }
+}
+
+static unsigned hash_fd(void *key)
+{
+ int fd = pointer_to_intptr(key);
+ struct stat stat;
+ fstat(fd, &stat);
+
+ return stat.st_dev ^ stat.st_ino ^ stat.st_rdev;
+}
+
+static int compare_fd(void *key1, void *key2)
+{
+ int fd1 = pointer_to_intptr(key1);
+ int fd2 = pointer_to_intptr(key2);
+ struct stat stat1, stat2;
+ fstat(fd1, &stat1);
+ fstat(fd2, &stat2);
+
+ return stat1.st_dev != stat2.st_dev ||
+ stat1.st_ino != stat2.st_ino ||
+ stat1.st_rdev != stat2.st_rdev;
+}
+
+struct pipe_screen *
+virgl_drm_screen_create(int fd)
+{
+ struct pipe_screen *pscreen = NULL;
+
+ pipe_mutex_lock(virgl_screen_mutex);
+ if (!fd_tab) {
+ fd_tab = util_hash_table_create(hash_fd, compare_fd);
+ if (!fd_tab)
+ goto unlock;
+ }
+
+ pscreen = util_hash_table_get(fd_tab, intptr_to_pointer(fd));
+ if (pscreen) {
+ virgl_screen(pscreen)->refcnt++;
+ } else {
+ struct virgl_winsys *vws;
+ int dup_fd = dup(fd);
+
+ vws = virgl_drm_winsys_create(dup_fd);
+
+ pscreen = virgl_create_screen(vws);
+ if (pscreen) {
+ util_hash_table_set(fd_tab, intptr_to_pointer(dup_fd), pscreen);
+
+ /* Bit of a hack, to avoid circular linkage dependency,
+ * ie. pipe driver having to call in to winsys, we
+ * override the pipe drivers screen->destroy():
+ */
+ virgl_screen(pscreen)->winsys_priv = pscreen->destroy;
+ pscreen->destroy = virgl_drm_screen_destroy;
+ }
+ }
+
+unlock:
+ pipe_mutex_unlock(virgl_screen_mutex);
+ return pscreen;
+}
diff --git a/src/gallium/winsys/virgl/drm/virgl_drm_winsys.h b/src/gallium/winsys/virgl/drm/virgl_drm_winsys.h
index da85ff87d2a..ffd7658ca81 100644
--- a/src/gallium/winsys/virgl/drm/virgl_drm_winsys.h
+++ b/src/gallium/winsys/virgl/drm/virgl_drm_winsys.h
@@ -62,6 +62,7 @@ struct virgl_drm_winsys
pipe_mutex mutex;
struct util_hash_table *bo_handles;
+ struct util_hash_table *bo_names;
pipe_mutex bo_handles_mutex;
};
diff --git a/src/gallium/winsys/virgl/vtest/Android.mk b/src/gallium/winsys/virgl/vtest/Android.mk
new file mode 100644
index 00000000000..3e084e44ceb
--- /dev/null
+++ b/src/gallium/winsys/virgl/vtest/Android.mk
@@ -0,0 +1,33 @@
+# Copyright (C) 2014 Emil Velikov <[email protected]>
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included
+# in all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+# DEALINGS IN THE SOFTWARE.
+
+LOCAL_PATH := $(call my-dir)
+
+# get C_SOURCES
+include $(LOCAL_PATH)/Makefile.sources
+
+include $(CLEAR_VARS)
+
+LOCAL_SRC_FILES := $(C_SOURCES)
+
+LOCAL_MODULE := libmesa_winsys_virgl_vtest
+
+include $(GALLIUM_COMMON_MK)
+include $(BUILD_STATIC_LIBRARY)
diff --git a/src/glx/dri2_glx.c b/src/glx/dri2_glx.c
index 651915aed71..77103492a4f 100644
--- a/src/glx/dri2_glx.c
+++ b/src/glx/dri2_glx.c
@@ -1102,9 +1102,14 @@ dri2BindExtensions(struct dri2_screen *psc, struct glx_display * priv,
__glXEnableDirectExtension(&psc->base, "GLX_ARB_create_context");
__glXEnableDirectExtension(&psc->base, "GLX_ARB_create_context_profile");
- if ((mask & (1 << __DRI_API_GLES2)) != 0)
- __glXEnableDirectExtension(&psc->base,
- "GLX_EXT_create_context_es2_profile");
+ if ((mask & ((1 << __DRI_API_GLES) |
+ (1 << __DRI_API_GLES2) |
+ (1 << __DRI_API_GLES3))) != 0) {
+ __glXEnableDirectExtension(&psc->base,
+ "GLX_EXT_create_context_es_profile");
+ __glXEnableDirectExtension(&psc->base,
+ "GLX_EXT_create_context_es2_profile");
+ }
}
for (i = 0; extensions[i]; i++) {
diff --git a/src/glx/dri3_glx.c b/src/glx/dri3_glx.c
index 8bdbb9caf56..6054ffc3dc1 100644
--- a/src/glx/dri3_glx.c
+++ b/src/glx/dri3_glx.c
@@ -665,9 +665,14 @@ dri3_bind_extensions(struct dri3_screen *psc, struct glx_display * priv,
__glXEnableDirectExtension(&psc->base, "GLX_ARB_create_context");
__glXEnableDirectExtension(&psc->base, "GLX_ARB_create_context_profile");
- if ((mask & (1 << __DRI_API_GLES2)) != 0)
+ if ((mask & ((1 << __DRI_API_GLES) |
+ (1 << __DRI_API_GLES2) |
+ (1 << __DRI_API_GLES3))) != 0) {
+ __glXEnableDirectExtension(&psc->base,
+ "GLX_EXT_create_context_es_profile");
__glXEnableDirectExtension(&psc->base,
"GLX_EXT_create_context_es2_profile");
+ }
for (i = 0; extensions[i]; i++) {
/* when on a different gpu than the server, the server pixmaps
diff --git a/src/glx/dri_common.c b/src/glx/dri_common.c
index 8a56385c4bd..6728d38fa0a 100644
--- a/src/glx/dri_common.c
+++ b/src/glx/dri_common.c
@@ -547,9 +547,18 @@ dri2_convert_glx_attribs(unsigned num_attribs, const uint32_t *attribs,
case GLX_CONTEXT_COMPATIBILITY_PROFILE_BIT_ARB:
*api = __DRI_API_OPENGL;
break;
- case GLX_CONTEXT_ES2_PROFILE_BIT_EXT:
- *api = __DRI_API_GLES2;
- break;
+ case GLX_CONTEXT_ES_PROFILE_BIT_EXT:
+ if (*major_ver >= 3)
+ *api = __DRI_API_GLES3;
+ else if (*major_ver == 2 && *minor_ver == 0)
+ *api = __DRI_API_GLES2;
+ else if (*major_ver == 1 && *minor_ver < 2)
+ *api = __DRI_API_GLES;
+ else {
+ *error = __DRI_CTX_ERROR_BAD_API;
+ return false;
+ }
+ break;
default:
*error = __DRI_CTX_ERROR_BAD_API;
return false;
@@ -580,19 +589,6 @@ dri2_convert_glx_attribs(unsigned num_attribs, const uint32_t *attribs,
return false;
}
- /* The GLX_EXT_create_context_es2_profile spec says:
- *
- * "... If the version requested is 2.0, and the
- * GLX_CONTEXT_ES2_PROFILE_BIT_EXT bit is set in the
- * GLX_CONTEXT_PROFILE_MASK_ARB attribute (see below), then the context
- * returned will implement OpenGL ES 2.0. This is the only way in which
- * an implementation may request an OpenGL ES 2.0 context."
- */
- if (*api == __DRI_API_GLES2 && (*major_ver != 2 || *minor_ver != 0)) {
- *error = __DRI_CTX_ERROR_BAD_API;
- return false;
- }
-
*error = __DRI_CTX_ERROR_SUCCESS;
return true;
}
diff --git a/src/glx/drisw_glx.c b/src/glx/drisw_glx.c
index 76cc3214b7b..241ac7f6d2c 100644
--- a/src/glx/drisw_glx.c
+++ b/src/glx/drisw_glx.c
@@ -623,9 +623,11 @@ driswBindExtensions(struct drisw_screen *psc, const __DRIextension **extensions)
__glXEnableDirectExtension(&psc->base, "GLX_ARB_create_context");
__glXEnableDirectExtension(&psc->base, "GLX_ARB_create_context_profile");
- /* DRISW version >= 2 implies support for OpenGL ES 2.0.
+ /* DRISW version >= 2 implies support for OpenGL ES.
*/
__glXEnableDirectExtension(&psc->base,
+ "GLX_EXT_create_context_es_profile");
+ __glXEnableDirectExtension(&psc->base,
"GLX_EXT_create_context_es2_profile");
}
diff --git a/src/glx/glxextensions.c b/src/glx/glxextensions.c
index 3b29aef1234..22b078ce484 100644
--- a/src/glx/glxextensions.c
+++ b/src/glx/glxextensions.c
@@ -146,6 +146,7 @@ static const struct extension_info known_glx_extensions[] = {
{ GLX(EXT_fbconfig_packed_float), VER(0,0), Y, Y, N, N },
{ GLX(EXT_framebuffer_sRGB), VER(0,0), Y, Y, N, N },
{ GLX(EXT_create_context_es2_profile), VER(0,0), Y, N, N, N },
+ { GLX(EXT_create_context_es_profile), VER(0,0), Y, N, N, N },
{ GLX(MESA_copy_sub_buffer), VER(0,0), Y, N, N, N },
{ GLX(MESA_multithread_makecurrent),VER(0,0), Y, N, Y, N },
{ GLX(MESA_query_renderer), VER(0,0), Y, N, N, Y },
diff --git a/src/glx/glxextensions.h b/src/glx/glxextensions.h
index 3a9bc823052..906b3fc16c0 100644
--- a/src/glx/glxextensions.h
+++ b/src/glx/glxextensions.h
@@ -45,6 +45,7 @@ enum
EXT_import_context_bit,
EXT_framebuffer_sRGB_bit,
EXT_fbconfig_packed_float_bit,
+ EXT_create_context_es_profile_bit,
EXT_create_context_es2_profile_bit,
MESA_copy_sub_buffer_bit,
MESA_depth_float_bit,
diff --git a/src/mapi/glapi/gen/gl_API.xml b/src/mapi/glapi/gen/gl_API.xml
index d7ab3bff4df..db98ac05fd9 100644
--- a/src/mapi/glapi/gen/gl_API.xml
+++ b/src/mapi/glapi/gen/gl_API.xml
@@ -8247,7 +8247,14 @@
<xi:include href="ARB_multi_bind.xml" xmlns:xi="http://www.w3.org/2001/XInclude"/>
-<!-- ARB extensions 148 - 153 -->
+<category name="GL_ARB_query_buffer_object" number="148">
+ <enum name="QUERY_RESULT_NO_WAIT" value="0x9194"/>
+ <enum name="QUERY_BUFFER" value="0x9192"/>
+ <enum name="QUERY_BUFFER_BINDING" value="0x9193"/>
+ <enum name="QUERY_BUFFER_BARRIER_BIT" value="0x00008000"/>
+</category>
+
+<!-- ARB extensions 149 - 153 -->
<xi:include href="ARB_indirect_parameters.xml" xmlns:xi="http://www.w3.org/2001/XInclude"/>
@@ -12661,6 +12668,12 @@
<enum name="FRAMEBUFFER_SRGB_CAPABLE_EXT" value="0x8DBA"/>
</category>
+<category name="GL_ATI_meminfo" number="359">
+ <enum name="VBO_FREE_MEMORY_ATI" value="0x87FB" />
+ <enum name="TEXTURE_FREE_MEMORY_ATI" value="0x87FC" />
+ <enum name="RENDERBUFFER_FREE_MEMORY_ATI" value="0x87FD" />
+</category>
+
<xi:include href="AMD_performance_monitor.xml" xmlns:xi="http://www.w3.org/2001/XInclude"/>
<category name="GL_APPLE_texture_range" number="367">
@@ -12714,6 +12727,14 @@
<enum name="EXTERNAL_VIRTUAL_MEMORY_BUFFER_AMD" value="0x9160"/>
</category>
+<category name="GL_NVX_gpu_memory_info" number="438">
+ <enum name="GPU_MEMORY_INFO_DEDICATED_VIDMEM_NVX" value="0x9047" />
+ <enum name="GPU_MEMORY_INFO_TOTAL_AVAILABLE_MEMORY_NVX" value="0x9048" />
+ <enum name="GPU_MEMORY_INFO_CURRENT_AVAILABLE_VIDMEM_NVX" value="0x9049" />
+ <enum name="GPU_MEMORY_INFO_EVICTION_COUNT_NVX" value="0x904A" />
+ <enum name="GPU_MEMORY_INFO_EVICTED_MEMORY_NVX" value="0x904B" />
+</category>
+
<xi:include href="INTEL_performance_query.xml" xmlns:xi="http://www.w3.org/2001/XInclude"/>
<category name="GL_EXT_polygon_offset_clamp" number="460">
diff --git a/src/mesa/Makefile.sources b/src/mesa/Makefile.sources
index 7af8becd607..ffe560faa3d 100644
--- a/src/mesa/Makefile.sources
+++ b/src/mesa/Makefile.sources
@@ -377,6 +377,7 @@ VBO_FILES = \
vbo/vbo_exec_eval.c \
vbo/vbo_exec.h \
vbo/vbo.h \
+ vbo/vbo_minmax_index.c \
vbo/vbo_noop.c \
vbo/vbo_noop.h \
vbo/vbo_primitive_restart.c \
@@ -393,6 +394,7 @@ VBO_FILES = \
STATETRACKER_FILES = \
state_tracker/st_atom_array.c \
+ state_tracker/st_atom_atomicbuf.c \
state_tracker/st_atom_blend.c \
state_tracker/st_atom.c \
state_tracker/st_atom_clip.c \
@@ -409,6 +411,7 @@ STATETRACKER_FILES = \
state_tracker/st_atom_shader.c \
state_tracker/st_atom_shader.h \
state_tracker/st_atom_stipple.c \
+ state_tracker/st_atom_storagebuf.c \
state_tracker/st_atom_tess.c \
state_tracker/st_atom_texture.c \
state_tracker/st_atom_viewport.c \
diff --git a/src/mesa/drivers/dri/common/xmlpool/t_options.h b/src/mesa/drivers/dri/common/xmlpool/t_options.h
index 55e926b239e..e5cbc465871 100644
--- a/src/mesa/drivers/dri/common/xmlpool/t_options.h
+++ b/src/mesa/drivers/dri/common/xmlpool/t_options.h
@@ -363,3 +363,8 @@ DRI_CONF_OPT_END
DRI_CONF_OPT_BEGIN_B(thread_submit, def) \
DRI_CONF_DESC(en,gettext("Use an additional thread to submit buffers.")) \
DRI_CONF_OPT_END
+
+#define DRI_CONF_NINE_OVERRIDEVENDOR(def) \
+DRI_CONF_OPT_BEGIN(override_vendorid, int, def) \
+ DRI_CONF_DESC(en,"Define the vendor_id to report. This allows faking another hardware vendor.") \
+DRI_CONF_OPT_END
diff --git a/src/mesa/drivers/dri/i965/brw_compiler.c b/src/mesa/drivers/dri/i965/brw_compiler.c
index 0401e397031..00e44af2f8d 100644
--- a/src/mesa/drivers/dri/i965/brw_compiler.c
+++ b/src/mesa/drivers/dri/i965/brw_compiler.c
@@ -23,7 +23,7 @@
#include "brw_compiler.h"
#include "brw_context.h"
-#include "nir.h"
+#include "compiler/nir/nir.h"
#include "main/errors.h"
#include "util/debug.h"
diff --git a/src/mesa/drivers/dri/i965/brw_context.c b/src/mesa/drivers/dri/i965/brw_context.c
index 1032e5a8175..44d2fe4d9e4 100644
--- a/src/mesa/drivers/dri/i965/brw_context.c
+++ b/src/mesa/drivers/dri/i965/brw_context.c
@@ -167,6 +167,19 @@ intel_viewport(struct gl_context *ctx)
}
static void
+intel_update_framebuffer(struct gl_context *ctx,
+ struct gl_framebuffer *fb)
+{
+ struct brw_context *brw = brw_context(ctx);
+
+ /* Quantize the derived default number of samples
+ */
+ fb->DefaultGeometry._NumSamples =
+ intel_quantize_num_samples(brw->intelScreen,
+ fb->DefaultGeometry.NumSamples);
+}
+
+static void
intel_update_state(struct gl_context * ctx, GLuint new_state)
{
struct brw_context *brw = brw_context(ctx);
@@ -245,6 +258,12 @@ intel_update_state(struct gl_context * ctx, GLuint new_state)
}
_mesa_lock_context_textures(ctx);
+
+ if (new_state & _NEW_BUFFERS) {
+ intel_update_framebuffer(ctx, ctx->DrawBuffer);
+ if (ctx->DrawBuffer != ctx->ReadBuffer)
+ intel_update_framebuffer(ctx, ctx->ReadBuffer);
+ }
}
#define flushFront(screen) ((screen)->image.loader ? (screen)->image.loader->flushFrontBuffer : (screen)->dri2.loader->flushFrontBuffer)
diff --git a/src/mesa/drivers/dri/i965/brw_fs_combine_constants.cpp b/src/mesa/drivers/dri/i965/brw_fs_combine_constants.cpp
index 994c699bb5a..d7a1456bce0 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_combine_constants.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_combine_constants.cpp
@@ -268,7 +268,7 @@ fs_visitor::opt_combine_constants()
qsort(table.imm, table.len, sizeof(struct imm), compare);
/* Insert MOVs to load the constant values into GRFs. */
- fs_reg reg(VGRF, alloc.allocate(dispatch_width / 8));
+ fs_reg reg(VGRF, alloc.allocate(1));
reg.stride = 0;
for (int i = 0; i < table.len; i++) {
struct imm *imm = &table.imm[i];
@@ -284,8 +284,8 @@ fs_visitor::opt_combine_constants()
imm->subreg_offset = reg.subreg_offset;
reg.subreg_offset += sizeof(float);
- if ((unsigned)reg.subreg_offset == dispatch_width * sizeof(float)) {
- reg.nr = alloc.allocate(dispatch_width / 8);
+ if ((unsigned)reg.subreg_offset == 8 * sizeof(float)) {
+ reg.nr = alloc.allocate(1);
reg.subreg_offset = 0;
}
}
diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
index 6c3a8d70677..cd7f3fe851a 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
@@ -1144,16 +1144,16 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr)
inst->predicate = BRW_PREDICATE_NORMAL;
break;
- case nir_op_extract_ubyte:
- case nir_op_extract_ibyte: {
+ case nir_op_extract_u8:
+ case nir_op_extract_i8: {
nir_const_value *byte = nir_src_as_const_value(instr->src[1].src);
bld.emit(SHADER_OPCODE_EXTRACT_BYTE,
result, op[0], brw_imm_ud(byte->u[0]));
break;
}
- case nir_op_extract_uword:
- case nir_op_extract_iword: {
+ case nir_op_extract_u16:
+ case nir_op_extract_i16: {
nir_const_value *word = nir_src_as_const_value(instr->src[1].src);
bld.emit(SHADER_OPCODE_EXTRACT_WORD,
result, op[0], brw_imm_ud(word->u[0]));
diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
index 6b9bfcf0b85..c1690ad45c3 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
@@ -939,7 +939,7 @@ fs_visitor::emit_barrier()
/* Clear the message payload */
pbld.MOV(payload, brw_imm_ud(0u));
- /* Copy bits 27:24 of r0.2 (barrier id) to the message payload reg.2 */
+ /* Copy the barrier id from r0.2 to the message payload reg.2 */
fs_reg r0_2 = fs_reg(retype(brw_vec1_grf(0, 2), BRW_REGISTER_TYPE_UD));
pbld.AND(component(payload, 2), r0_2, brw_imm_ud(barrier_id_mask));
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_copy_propagation.cpp b/src/mesa/drivers/dri/i965/brw_vec4_copy_propagation.cpp
index c6f0b0d8a2a..6bd992882b8 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_copy_propagation.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_copy_propagation.cpp
@@ -254,8 +254,8 @@ try_constant_propagate(const struct brw_device_info *devinfo,
static bool
try_copy_propagate(const struct brw_device_info *devinfo,
- vec4_instruction *inst,
- int arg, struct copy_entry *entry)
+ vec4_instruction *inst, int arg,
+ struct copy_entry *entry, int attributes_per_reg)
{
/* Build up the value we are propagating as if it were the source of a
* single MOV
@@ -320,7 +320,8 @@ try_copy_propagate(const struct brw_device_info *devinfo,
unsigned composed_swizzle = brw_compose_swizzle(inst->src[arg].swizzle,
value.swizzle);
if (inst->is_3src() &&
- value.file == UNIFORM &&
+ (value.file == UNIFORM ||
+ (value.file == ATTR && attributes_per_reg != 1)) &&
!brw_is_single_value_swizzle(composed_swizzle))
return false;
@@ -395,6 +396,11 @@ try_copy_propagate(const struct brw_device_info *devinfo,
bool
vec4_visitor::opt_copy_propagation(bool do_constant_prop)
{
+ /* If we are in dual instanced or single mode, then attributes are going
+ * to be interleaved, so one register contains two attribute slots.
+ */
+ const int attributes_per_reg =
+ prog_data->dispatch_mode == DISPATCH_MODE_4X2_DUAL_OBJECT ? 1 : 2;
bool progress = false;
struct copy_entry entries[alloc.total_size];
@@ -465,7 +471,7 @@ vec4_visitor::opt_copy_propagation(bool do_constant_prop)
if (do_constant_prop && try_constant_propagate(devinfo, inst, i, &entry))
progress = true;
- if (try_copy_propagate(devinfo, inst, i, &entry))
+ if (try_copy_propagate(devinfo, inst, i, &entry, attributes_per_reg))
progress = true;
}
diff --git a/src/mesa/drivers/dri/i965/gen8_surface_state.c b/src/mesa/drivers/dri/i965/gen8_surface_state.c
index 904950dfa07..0df25d2557c 100644
--- a/src/mesa/drivers/dri/i965/gen8_surface_state.c
+++ b/src/mesa/drivers/dri/i965/gen8_surface_state.c
@@ -210,7 +210,7 @@ gen8_emit_texture_surface_state(struct brw_context *brw,
{
const unsigned depth = max_layer - min_layer;
struct intel_mipmap_tree *aux_mt = NULL;
- uint32_t aux_mode = 0;
+ uint32_t aux_mode = GEN8_SURFACE_AUX_MODE_NONE;
uint32_t mocs_wb = brw->gen >= 9 ? SKL_MOCS_WB : BDW_MOCS_WB;
int surf_index = surf_offset - &brw->wm.base.surf_offset[0];
unsigned tiling_mode, pitch;
@@ -425,7 +425,7 @@ gen8_update_renderbuffer_surface(struct brw_context *brw,
struct intel_renderbuffer *irb = intel_renderbuffer(rb);
struct intel_mipmap_tree *mt = irb->mt;
struct intel_mipmap_tree *aux_mt = NULL;
- uint32_t aux_mode = 0;
+ uint32_t aux_mode = GEN8_SURFACE_AUX_MODE_NONE;
unsigned width = mt->logical_width0;
unsigned height = mt->logical_height0;
unsigned pitch = mt->pitch;
diff --git a/src/mesa/main/bufferobj.c b/src/mesa/main/bufferobj.c
index 8ede1f06e4e..de1aba44c1b 100644
--- a/src/mesa/main/bufferobj.c
+++ b/src/mesa/main/bufferobj.c
@@ -32,6 +32,7 @@
#include <stdbool.h>
#include <inttypes.h> /* for PRId64 macro */
+#include "util/debug.h"
#include "glheader.h"
#include "enums.h"
#include "hash.h"
@@ -120,6 +121,10 @@ get_buffer_target(struct gl_context *ctx, GLenum target)
return &ctx->CopyReadBuffer;
case GL_COPY_WRITE_BUFFER:
return &ctx->CopyWriteBuffer;
+ case GL_QUERY_BUFFER:
+ if (_mesa_has_ARB_query_buffer_object(ctx))
+ return &ctx->QueryBuffer;
+ break;
case GL_DRAW_INDIRECT_BUFFER:
if ((ctx->API == API_OPENGL_CORE &&
ctx->Extensions.ARB_draw_indirect) ||
@@ -458,6 +463,7 @@ _mesa_delete_buffer_object(struct gl_context *ctx,
{
(void) ctx;
+ vbo_delete_minmax_cache(bufObj);
_mesa_align_free(bufObj->Data);
/* assign strange values here to help w/ debugging */
@@ -520,6 +526,24 @@ _mesa_reference_buffer_object_(struct gl_context *ctx,
/**
+ * Get the value of MESA_NO_MINMAX_CACHE.
+ */
+static bool
+get_no_minmax_cache()
+{
+ static bool read = false;
+ static bool disable = false;
+
+ if (!read) {
+ disable = env_var_as_boolean("MESA_NO_MINMAX_CACHE", false);
+ read = true;
+ }
+
+ return disable;
+}
+
+
+/**
* Initialize a buffer object to default values.
*/
void
@@ -532,6 +556,9 @@ _mesa_initialize_buffer_object(struct gl_context *ctx,
obj->RefCount = 1;
obj->Name = name;
obj->Usage = GL_STATIC_DRAW_ARB;
+
+ if (get_no_minmax_cache())
+ obj->UsageHistory |= USAGE_DISABLE_MINMAX_CACHE;
}
@@ -877,6 +904,9 @@ _mesa_init_buffer_objects( struct gl_context *ctx )
_mesa_reference_buffer_object(ctx, &ctx->DispatchIndirectBuffer,
ctx->Shared->NullBufferObj);
+ _mesa_reference_buffer_object(ctx, &ctx->QueryBuffer,
+ ctx->Shared->NullBufferObj);
+
for (i = 0; i < MAX_COMBINED_UNIFORM_BUFFERS; i++) {
_mesa_reference_buffer_object(ctx,
&ctx->UniformBufferBindings[i].BufferObject,
@@ -925,6 +955,8 @@ _mesa_free_buffer_objects( struct gl_context *ctx )
_mesa_reference_buffer_object(ctx, &ctx->DispatchIndirectBuffer, NULL);
+ _mesa_reference_buffer_object(ctx, &ctx->QueryBuffer, NULL);
+
for (i = 0; i < MAX_COMBINED_UNIFORM_BUFFERS; i++) {
_mesa_reference_buffer_object(ctx,
&ctx->UniformBufferBindings[i].BufferObject,
@@ -1014,6 +1046,15 @@ bind_buffer_object(struct gl_context *ctx, GLenum target, GLuint buffer)
return;
}
+ /* record usage history */
+ switch (target) {
+ case GL_PIXEL_PACK_BUFFER:
+ newBufObj->UsageHistory |= USAGE_PIXEL_PACK_BUFFER;
+ break;
+ default:
+ break;
+ }
+
/* bind new buffer */
_mesa_reference_buffer_object(ctx, bindTarget, newBufObj);
}
@@ -1348,6 +1389,11 @@ _mesa_DeleteBuffers(GLsizei n, const GLuint *ids)
_mesa_BindBuffer(GL_EXTERNAL_VIRTUAL_MEMORY_BUFFER_AMD, 0);
}
+ /* unbind query buffer binding point */
+ if (ctx->QueryBuffer == bufObj) {
+ _mesa_BindBuffer(GL_QUERY_BUFFER, 0);
+ }
+
/* The ID is immediately freed for re-use */
_mesa_HashRemove(ctx->Shared->BufferObjects, ids[i]);
/* Make sure we do not run into the classic ABA problem on bind.
@@ -1519,6 +1565,7 @@ _mesa_buffer_storage(struct gl_context *ctx, struct gl_buffer_object *bufObj,
bufObj->Written = GL_TRUE;
bufObj->Immutable = GL_TRUE;
+ bufObj->MinMaxCacheDirty = true;
assert(ctx->Driver.BufferData);
if (!ctx->Driver.BufferData(ctx, target, size, data, GL_DYNAMIC_DRAW,
@@ -1632,6 +1679,7 @@ _mesa_buffer_data(struct gl_context *ctx, struct gl_buffer_object *bufObj,
FLUSH_VERTICES(ctx, _NEW_BUFFER_OBJECT);
bufObj->Written = GL_TRUE;
+ bufObj->MinMaxCacheDirty = true;
#ifdef VBO_DEBUG
printf("glBufferDataARB(%u, sz %ld, from %p, usage 0x%x)\n",
@@ -1744,6 +1792,7 @@ _mesa_buffer_sub_data(struct gl_context *ctx, struct gl_buffer_object *bufObj,
}
bufObj->Written = GL_TRUE;
+ bufObj->MinMaxCacheDirty = true;
assert(ctx->Driver.BufferSubData);
ctx->Driver.BufferSubData(ctx, offset, size, data, bufObj);
@@ -1859,12 +1908,16 @@ _mesa_clear_buffer_sub_data(struct gl_context *ctx,
return;
}
+ /* Bail early. Negative size has already been checked. */
+ if (size == 0)
+ return;
+
+ bufObj->MinMaxCacheDirty = true;
+
if (data == NULL) {
/* clear to zeros, per the spec */
- if (size > 0) {
- ctx->Driver.ClearBufferSubData(ctx, offset, size,
- NULL, clearValueSize, bufObj);
- }
+ ctx->Driver.ClearBufferSubData(ctx, offset, size,
+ NULL, clearValueSize, bufObj);
return;
}
@@ -1873,10 +1926,8 @@ _mesa_clear_buffer_sub_data(struct gl_context *ctx,
return;
}
- if (size > 0) {
- ctx->Driver.ClearBufferSubData(ctx, offset, size,
- clearValue, clearValueSize, bufObj);
- }
+ ctx->Driver.ClearBufferSubData(ctx, offset, size,
+ clearValue, clearValueSize, bufObj);
}
void GLAPIENTRY
@@ -2276,6 +2327,8 @@ _mesa_copy_buffer_sub_data(struct gl_context *ctx,
}
}
+ dst->MinMaxCacheDirty = true;
+
ctx->Driver.CopyBufferSubData(ctx, src, dst, readOffset, writeOffset, size);
}
@@ -2480,8 +2533,10 @@ _mesa_map_buffer_range(struct gl_context *ctx,
assert(bufObj->Mappings[MAP_USER].AccessFlags == access);
}
- if (access & GL_MAP_WRITE_BIT)
+ if (access & GL_MAP_WRITE_BIT) {
bufObj->Written = GL_TRUE;
+ bufObj->MinMaxCacheDirty = true;
+ }
#ifdef VBO_DEBUG
if (strstr(func, "Range") == NULL) { /* If not MapRange */
diff --git a/src/mesa/main/dd.h b/src/mesa/main/dd.h
index d4378e51159..19ef3042548 100644
--- a/src/mesa/main/dd.h
+++ b/src/mesa/main/dd.h
@@ -48,6 +48,7 @@ struct gl_shader;
struct gl_shader_program;
struct gl_texture_image;
struct gl_texture_object;
+struct gl_memory_info;
/* GL_ARB_vertex_buffer_object */
/* Modifies GL_MAP_UNSYNCHRONIZED_BIT to allow driver to fail (return
@@ -726,6 +727,15 @@ struct dd_function_table {
void (*EndQuery)(struct gl_context *ctx, struct gl_query_object *q);
void (*CheckQuery)(struct gl_context *ctx, struct gl_query_object *q);
void (*WaitQuery)(struct gl_context *ctx, struct gl_query_object *q);
+ /*
+ * \pname the value requested to be written (GL_QUERY_RESULT, etc)
+ * \ptype the type of the value requested to be written:
+ * GL_UNSIGNED_INT, GL_UNSIGNED_INT64_ARB,
+ * GL_INT, GL_INT64_ARB
+ */
+ void (*StoreQueryResult)(struct gl_context *ctx, struct gl_query_object *q,
+ struct gl_buffer_object *buf, intptr_t offset,
+ GLenum pname, GLenum ptype);
/*@}*/
/**
@@ -939,6 +949,13 @@ struct dd_function_table {
void (*DispatchCompute)(struct gl_context *ctx, const GLuint *num_groups);
void (*DispatchComputeIndirect)(struct gl_context *ctx, GLintptr indirect);
/*@}*/
+
+ /**
+ * Query information about memory. Device memory is e.g. VRAM. Staging
+ * memory is e.g. GART. All sizes are in kilobytes.
+ */
+ void (*QueryMemoryInfo)(struct gl_context *ctx,
+ struct gl_memory_info *info);
};
diff --git a/src/mesa/main/extensions_table.h b/src/mesa/main/extensions_table.h
index 11f4482f8d2..ded6f2c06dc 100644
--- a/src/mesa/main/extensions_table.h
+++ b/src/mesa/main/extensions_table.h
@@ -88,6 +88,7 @@ EXT(ARB_point_parameters , EXT_point_parameters
EXT(ARB_point_sprite , ARB_point_sprite , GLL, GLC, x , x , 2003)
EXT(ARB_program_interface_query , dummy_true , GLL, GLC, x , x , 2012)
EXT(ARB_provoking_vertex , EXT_provoking_vertex , GLL, GLC, x , x , 2009)
+EXT(ARB_query_buffer_object , ARB_query_buffer_object , GLL, GLC, x , x , 2013)
EXT(ARB_robustness , dummy_true , GLL, GLC, x , x , 2010)
EXT(ARB_sample_shading , ARB_sample_shading , GLL, GLC, x , x , 2009)
EXT(ARB_sampler_objects , dummy_true , GLL, GLC, x , x , 2009)
@@ -165,6 +166,7 @@ EXT(ARB_window_pos , dummy_true
EXT(ATI_blend_equation_separate , EXT_blend_equation_separate , GLL, GLC, x , x , 2003)
EXT(ATI_draw_buffers , dummy_true , GLL, x , x , x , 2002)
EXT(ATI_fragment_shader , ATI_fragment_shader , GLL, x , x , x , 2001)
+EXT(ATI_meminfo , ATI_meminfo , GLL, GLC, x , x , 2009)
EXT(ATI_separate_stencil , ATI_separate_stencil , GLL, x , x , x , 2006)
EXT(ATI_texture_compression_3dc , ATI_texture_compression_3dc , GLL, x , x , x , 2004)
EXT(ATI_texture_env_combine3 , ATI_texture_env_combine3 , GLL, x , x , x , 2002)
@@ -291,6 +293,7 @@ EXT(NV_texture_barrier , NV_texture_barrier
EXT(NV_texture_env_combine4 , NV_texture_env_combine4 , GLL, x , x , x , 1999)
EXT(NV_texture_rectangle , NV_texture_rectangle , GLL, x , x , x , 2000)
EXT(NV_vdpau_interop , NV_vdpau_interop , GLL, GLC, x , x , 2010)
+EXT(NVX_gpu_memory_info , NVX_gpu_memory_info , GLL, GLC, x , x , 2013)
EXT(OES_EGL_image , OES_EGL_image , GLL, GLC, ES1, ES2, 2006) /* FIXME: Mesa expects GL_OES_EGL_image to be available in OpenGL contexts. */
EXT(OES_EGL_image_external , OES_EGL_image_external , x , x , ES1, ES2, 2010)
@@ -311,6 +314,7 @@ EXT(OES_element_index_uint , dummy_true
EXT(OES_fbo_render_mipmap , dummy_true , x , x , ES1, ES2, 2005)
EXT(OES_fixed_point , dummy_true , x , x , ES1, x , 2002)
EXT(OES_framebuffer_object , dummy_true , x , x , ES1, x , 2005)
+EXT(OES_geometry_point_size , OES_geometry_shader , x , x , x , 31, 2015)
EXT(OES_geometry_shader , OES_geometry_shader , x , x , x , 31, 2015)
EXT(OES_get_program_binary , dummy_true , x , x , x , ES2, 2008)
EXT(OES_mapbuffer , dummy_true , x , x , ES1, ES2, 2005)
diff --git a/src/mesa/main/fbobject.c b/src/mesa/main/fbobject.c
index 3be216da234..2d4acb35bd6 100644
--- a/src/mesa/main/fbobject.c
+++ b/src/mesa/main/fbobject.c
@@ -1414,6 +1414,9 @@ framebuffer_parameteri(struct gl_context *ctx, struct gl_framebuffer *fb,
_mesa_error(ctx, GL_INVALID_ENUM,
"%s(pname=0x%x)", func, pname);
}
+
+ invalidate_framebuffer(fb);
+ ctx->NewState |= _NEW_BUFFERS;
}
void GLAPIENTRY
diff --git a/src/mesa/main/format_parser.py b/src/mesa/main/format_parser.py
index 799b14f0b1c..a29f20754a8 100755
--- a/src/mesa/main/format_parser.py
+++ b/src/mesa/main/format_parser.py
@@ -532,7 +532,7 @@ def _parse_channels(fields, layout, colorspace, swizzle):
return channels
def parse(filename):
- """Parse a format descrition in CSV format.
+ """Parse a format description in CSV format.
This function parses the given CSV file and returns an iterable of
channels."""
diff --git a/src/mesa/main/framebuffer.h b/src/mesa/main/framebuffer.h
index bfc8a0836e7..fa434d447ae 100644
--- a/src/mesa/main/framebuffer.h
+++ b/src/mesa/main/framebuffer.h
@@ -26,7 +26,7 @@
#ifndef FRAMEBUFFER_H
#define FRAMEBUFFER_H
-#include "glheader.h"
+#include "mtypes.h"
struct gl_config;
struct gl_context;
@@ -97,7 +97,8 @@ static inline GLuint
_mesa_geometric_samples(const struct gl_framebuffer *buffer)
{
return buffer->_HasAttachments ?
- buffer->Visual.samples : buffer->DefaultGeometry.NumSamples;
+ buffer->Visual.samples :
+ buffer->DefaultGeometry._NumSamples;
}
static inline GLuint
diff --git a/src/mesa/main/get.c b/src/mesa/main/get.c
index 95cb18c8ee8..8453a922549 100644
--- a/src/mesa/main/get.c
+++ b/src/mesa/main/get.c
@@ -147,11 +147,14 @@ enum value_extra {
EXTRA_VALID_CLIP_DISTANCE,
EXTRA_FLUSH_CURRENT,
EXTRA_GLSL_130,
- EXTRA_EXT_UBO_GS4,
- EXTRA_EXT_ATOMICS_GS4,
- EXTRA_EXT_SHADER_IMAGE_GS4,
+ EXTRA_EXT_UBO_GS,
+ EXTRA_EXT_ATOMICS_GS,
+ EXTRA_EXT_SHADER_IMAGE_GS,
EXTRA_EXT_ATOMICS_TESS,
EXTRA_EXT_SHADER_IMAGE_TESS,
+ EXTRA_EXT_SSBO_GS,
+ EXTRA_EXT_FB_NO_ATTACH_GS,
+ EXTRA_EXT_ES_GS,
};
#define NO_EXTRA NULL
@@ -308,7 +311,7 @@ static const int extra_ARB_transform_feedback2_api_es3[] = {
};
static const int extra_ARB_uniform_buffer_object_and_geometry_shader[] = {
- EXTRA_EXT_UBO_GS4,
+ EXTRA_EXT_UBO_GS,
EXTRA_END
};
@@ -343,12 +346,12 @@ static const int extra_EXT_texture_array_es3[] = {
};
static const int extra_ARB_shader_atomic_counters_and_geometry_shader[] = {
- EXTRA_EXT_ATOMICS_GS4,
+ EXTRA_EXT_ATOMICS_GS,
EXTRA_END
};
static const int extra_ARB_shader_image_load_store_and_geometry_shader[] = {
- EXTRA_EXT_SHADER_IMAGE_GS4,
+ EXTRA_EXT_SHADER_IMAGE_GS,
EXTRA_END
};
@@ -375,6 +378,28 @@ static const int extra_ARB_shader_storage_buffer_object_es31[] = {
EXTRA_END
};
+static const int extra_ARB_shader_storage_buffer_object_and_geometry_shader[] = {
+ EXTRA_EXT_SSBO_GS,
+ EXTRA_END
+};
+
+static const int extra_ARB_framebuffer_no_attachments_and_geometry_shader[] = {
+ EXTRA_EXT_FB_NO_ATTACH_GS,
+ EXTRA_END
+};
+
+static const int extra_ARB_viewport_array_or_oes_geometry_shader[] = {
+ EXT(ARB_viewport_array),
+ EXTRA_EXT_ES_GS,
+ EXTRA_END
+};
+
+static const int extra_ARB_gpu_shader5_or_oes_geometry_shader[] = {
+ EXT(ARB_gpu_shader5),
+ EXTRA_EXT_ES_GS,
+ EXTRA_END
+};
+
EXTRA_EXT(ARB_texture_cube_map);
EXTRA_EXT(EXT_texture_array);
EXTRA_EXT(NV_fog_distance);
@@ -414,6 +439,7 @@ EXTRA_EXT(ARB_shader_image_load_store);
EXTRA_EXT(ARB_viewport_array);
EXTRA_EXT(ARB_compute_shader);
EXTRA_EXT(ARB_gpu_shader5);
+EXTRA_EXT(ARB_query_buffer_object);
EXTRA_EXT2(ARB_transform_feedback3, ARB_gpu_shader5);
EXTRA_EXT(INTEL_performance_query);
EXTRA_EXT(ARB_explicit_uniform_location);
@@ -424,6 +450,8 @@ EXTRA_EXT(ARB_tessellation_shader);
EXTRA_EXT(ARB_shader_subroutine);
EXTRA_EXT(ARB_shader_storage_buffer_object);
EXTRA_EXT(ARB_indirect_parameters);
+EXTRA_EXT(ATI_meminfo);
+EXTRA_EXT(NVX_gpu_memory_info);
static const int
extra_ARB_color_buffer_float_or_glcore[] = {
@@ -455,6 +483,12 @@ static const int extra_gl32_es3[] = {
EXTRA_END,
};
+static const int extra_version_32_OES_geometry_shader[] = {
+ EXTRA_VERSION_32,
+ EXTRA_EXT_ES_GS,
+ EXTRA_END
+};
+
static const int extra_gl40_ARB_sample_shading[] = {
EXTRA_VERSION_40,
EXT(ARB_sample_shading),
@@ -1006,6 +1040,10 @@ find_custom_value(struct gl_context *ctx, const struct value_desc *d, union valu
case GL_SHADER_STORAGE_BUFFER_BINDING:
v->value_int = ctx->ShaderStorageBuffer->Name;
break;
+ /* GL_ARB_query_buffer_object */
+ case GL_QUERY_BUFFER_BINDING:
+ v->value_int = ctx->QueryBuffer->Name;
+ break;
/* GL_ARB_timer_query */
case GL_TIMESTAMP:
if (ctx->Driver.GetTimestamp) {
@@ -1049,6 +1087,60 @@ find_custom_value(struct gl_context *ctx, const struct value_desc *d, union valu
case GL_DISPATCH_INDIRECT_BUFFER_BINDING:
v->value_int = ctx->DispatchIndirectBuffer->Name;
break;
+ /* GL_ARB_multisample */
+ case GL_SAMPLES:
+ v->value_int = _mesa_geometric_samples(ctx->DrawBuffer);
+ break;
+ case GL_SAMPLE_BUFFERS:
+ v->value_int = _mesa_geometric_samples(ctx->DrawBuffer) > 0;
+ break;
+ /* GL_ATI_meminfo & GL_NVX_gpu_memory_info */
+ case GL_VBO_FREE_MEMORY_ATI:
+ case GL_TEXTURE_FREE_MEMORY_ATI:
+ case GL_RENDERBUFFER_FREE_MEMORY_ATI:
+ case GL_GPU_MEMORY_INFO_DEDICATED_VIDMEM_NVX:
+ case GL_GPU_MEMORY_INFO_TOTAL_AVAILABLE_MEMORY_NVX:
+ case GL_GPU_MEMORY_INFO_CURRENT_AVAILABLE_VIDMEM_NVX:
+ case GL_GPU_MEMORY_INFO_EVICTION_COUNT_NVX:
+ case GL_GPU_MEMORY_INFO_EVICTED_MEMORY_NVX:
+ {
+ struct gl_memory_info info;
+
+ ctx->Driver.QueryMemoryInfo(ctx, &info);
+
+ if (d->pname == GL_GPU_MEMORY_INFO_DEDICATED_VIDMEM_NVX)
+ v->value_int = info.total_device_memory;
+ else if (d->pname == GL_GPU_MEMORY_INFO_TOTAL_AVAILABLE_MEMORY_NVX)
+ v->value_int = info.total_device_memory +
+ info.total_staging_memory;
+ else if (d->pname == GL_GPU_MEMORY_INFO_CURRENT_AVAILABLE_VIDMEM_NVX)
+ v->value_int = info.avail_device_memory;
+ else if (d->pname == GL_GPU_MEMORY_INFO_EVICTION_COUNT_NVX)
+ v->value_int = info.nr_device_memory_evictions;
+ else if (d->pname == GL_GPU_MEMORY_INFO_EVICTED_MEMORY_NVX)
+ v->value_int = info.device_memory_evicted;
+ else {
+ /* ATI free memory enums.
+ *
+ * Since the GPU memory is (usually) page-table based, every two
+ * consecutive elements are equal. From the GL_ATI_meminfo
+ * specification:
+ *
+ * "param[0] - total memory free in the pool
+ * param[1] - largest available free block in the pool
+ * param[2] - total auxiliary memory free
+ * param[3] - largest auxiliary free block"
+ *
+ * All three (VBO, TEXTURE, RENDERBUFFER) queries return
+ * the same numbers here.
+ */
+ v->value_int_4[0] = info.avail_device_memory;
+ v->value_int_4[1] = info.avail_device_memory;
+ v->value_int_4[2] = info.avail_staging_memory;
+ v->value_int_4[3] = info.avail_staging_memory;
+ }
+ }
+ break;
}
}
@@ -1154,20 +1246,23 @@ check_extra(struct gl_context *ctx, const char *func, const struct value_desc *d
if (ctx->Const.GLSLVersion >= 130)
api_found = GL_TRUE;
break;
- case EXTRA_EXT_UBO_GS4:
+ case EXTRA_EXT_UBO_GS:
api_check = GL_TRUE;
- api_found = (ctx->Extensions.ARB_uniform_buffer_object &&
- _mesa_has_geometry_shaders(ctx));
+ if (ctx->Extensions.ARB_uniform_buffer_object &&
+ _mesa_has_geometry_shaders(ctx))
+ api_found = GL_TRUE;
break;
- case EXTRA_EXT_ATOMICS_GS4:
+ case EXTRA_EXT_ATOMICS_GS:
api_check = GL_TRUE;
- api_found = (ctx->Extensions.ARB_shader_atomic_counters &&
- _mesa_has_geometry_shaders(ctx));
+ if (ctx->Extensions.ARB_shader_atomic_counters &&
+ _mesa_has_geometry_shaders(ctx))
+ api_found = GL_TRUE;
break;
- case EXTRA_EXT_SHADER_IMAGE_GS4:
+ case EXTRA_EXT_SHADER_IMAGE_GS:
api_check = GL_TRUE;
- api_found = (ctx->Extensions.ARB_shader_image_load_store &&
- _mesa_has_geometry_shaders(ctx));
+ if (ctx->Extensions.ARB_shader_image_load_store &&
+ _mesa_has_geometry_shaders(ctx))
+ api_found = GL_TRUE;
break;
case EXTRA_EXT_ATOMICS_TESS:
api_check = GL_TRUE;
@@ -1179,6 +1274,24 @@ check_extra(struct gl_context *ctx, const char *func, const struct value_desc *d
api_found = ctx->Extensions.ARB_shader_image_load_store &&
_mesa_has_tessellation(ctx);
break;
+ case EXTRA_EXT_SSBO_GS:
+ api_check = GL_TRUE;
+ if (ctx->Extensions.ARB_shader_storage_buffer_object &&
+ _mesa_has_geometry_shaders(ctx))
+ api_found = GL_TRUE;
+ break;
+ case EXTRA_EXT_FB_NO_ATTACH_GS:
+ api_check = GL_TRUE;
+ if (ctx->Extensions.ARB_framebuffer_no_attachments &&
+ (_mesa_is_desktop_gl(ctx) ||
+ _mesa_has_OES_geometry_shader(ctx)))
+ api_found = GL_TRUE;
+ break;
+ case EXTRA_EXT_ES_GS:
+ api_check = GL_TRUE;
+ if (_mesa_has_OES_geometry_shader(ctx))
+ api_found = GL_TRUE;
+ break;
case EXTRA_END:
break;
default: /* *e is a offset into the extension struct */
diff --git a/src/mesa/main/get_hash_params.py b/src/mesa/main/get_hash_params.py
index af7a8f4a906..164095c103c 100644
--- a/src/mesa/main/get_hash_params.py
+++ b/src/mesa/main/get_hash_params.py
@@ -80,8 +80,8 @@ descriptor=[
[ "SAMPLE_COVERAGE_ARB", "CONTEXT_BOOL(Multisample.SampleCoverage), NO_EXTRA" ],
[ "SAMPLE_COVERAGE_VALUE_ARB", "CONTEXT_FLOAT(Multisample.SampleCoverageValue), NO_EXTRA" ],
[ "SAMPLE_COVERAGE_INVERT_ARB", "CONTEXT_BOOL(Multisample.SampleCoverageInvert), NO_EXTRA" ],
- [ "SAMPLE_BUFFERS_ARB", "BUFFER_INT(Visual.sampleBuffers), extra_new_buffers" ],
- [ "SAMPLES_ARB", "BUFFER_INT(Visual.samples), extra_new_buffers" ],
+ [ "SAMPLE_BUFFERS_ARB", "LOC_CUSTOM, TYPE_INT, 0, extra_new_buffers" ],
+ [ "SAMPLES_ARB", "LOC_CUSTOM, TYPE_INT, 0, extra_new_buffers" ],
# GL_ARB_sample_shading
[ "SAMPLE_SHADING_ARB", "CONTEXT_BOOL(Multisample.SampleShading), extra_gl40_ARB_sample_shading" ],
@@ -470,6 +470,9 @@ descriptor=[
["MAX_FRAMEBUFFER_HEIGHT", "CONTEXT_INT(Const.MaxFramebufferHeight), extra_ARB_framebuffer_no_attachments"],
["MAX_FRAMEBUFFER_SAMPLES", "CONTEXT_INT(Const.MaxFramebufferSamples), extra_ARB_framebuffer_no_attachments"],
+# GL_ARB_framebuffer_no_attachments / geometry shader
+ [ "MAX_FRAMEBUFFER_LAYERS", "CONTEXT_INT(Const.MaxFramebufferLayers), extra_ARB_framebuffer_no_attachments_and_geometry_shader" ],
+
# GL_ARB_explicit_uniform_location / GLES 3.1
[ "MAX_UNIFORM_LOCATIONS", "CONTEXT_INT(Const.MaxUserAssignableUniformLocations), extra_ARB_explicit_uniform_location" ],
@@ -499,6 +502,34 @@ descriptor=[
{ "apis": ["GL_CORE", "GLES31"], "params": [
# GL_ARB_draw_indirect / GLES 3.1
[ "DRAW_INDIRECT_BUFFER_BINDING", "LOC_CUSTOM, TYPE_INT, 0, extra_ARB_draw_indirect" ],
+
+# GL 3.2 / GL OES_geometry_shader
+ [ "MAX_GEOMETRY_INPUT_COMPONENTS", "CONTEXT_INT(Const.Program[MESA_SHADER_GEOMETRY].MaxInputComponents), extra_version_32_OES_geometry_shader" ],
+ [ "MAX_GEOMETRY_OUTPUT_COMPONENTS", "CONTEXT_INT(Const.Program[MESA_SHADER_GEOMETRY].MaxOutputComponents), extra_version_32_OES_geometry_shader" ],
+ [ "MAX_GEOMETRY_TEXTURE_IMAGE_UNITS", "CONTEXT_INT(Const.Program[MESA_SHADER_GEOMETRY].MaxTextureImageUnits), extra_version_32_OES_geometry_shader" ],
+ [ "MAX_GEOMETRY_OUTPUT_VERTICES", "CONTEXT_INT(Const.MaxGeometryOutputVertices), extra_version_32_OES_geometry_shader" ],
+ [ "MAX_GEOMETRY_TOTAL_OUTPUT_COMPONENTS", "CONTEXT_INT(Const.MaxGeometryTotalOutputComponents), extra_version_32_OES_geometry_shader" ],
+ [ "MAX_GEOMETRY_UNIFORM_COMPONENTS", "CONTEXT_INT(Const.Program[MESA_SHADER_GEOMETRY].MaxUniformComponents), extra_version_32_OES_geometry_shader" ],
+
+# GL_ARB_shader_image_load_store / geometry shader
+ [ "MAX_GEOMETRY_IMAGE_UNIFORMS", "CONTEXT_INT(Const.Program[MESA_SHADER_GEOMETRY].MaxImageUniforms), extra_ARB_shader_image_load_store_and_geometry_shader" ],
+
+# GL_ARB_shader_atomic_counters / geometry shader
+ [ "MAX_GEOMETRY_ATOMIC_COUNTER_BUFFERS", "CONTEXT_INT(Const.Program[MESA_SHADER_GEOMETRY].MaxAtomicBuffers), extra_ARB_shader_atomic_counters_and_geometry_shader " ],
+ [ "MAX_GEOMETRY_ATOMIC_COUNTERS", "CONTEXT_INT(Const.Program[MESA_SHADER_GEOMETRY].MaxAtomicCounters), extra_ARB_shader_atomic_counters_and_geometry_shader" ],
+
+# GL_ARB_shader_storage_buffer_object / geometry shader
+ [ "MAX_GEOMETRY_SHADER_STORAGE_BLOCKS", "CONTEXT_INT(Const.Program[MESA_SHADER_FRAGMENT].MaxShaderStorageBlocks), extra_ARB_shader_storage_buffer_object_and_geometry_shader" ],
+
+# GL_ARB_uniform_buffer_object / geometry shader
+ [ "MAX_GEOMETRY_UNIFORM_BLOCKS", "CONTEXT_INT(Const.Program[MESA_SHADER_GEOMETRY].MaxUniformBlocks), extra_ARB_uniform_buffer_object_and_geometry_shader" ],
+ [ "MAX_COMBINED_GEOMETRY_UNIFORM_COMPONENTS", "CONTEXT_INT(Const.Program[MESA_SHADER_GEOMETRY].MaxCombinedUniformComponents), extra_ARB_uniform_buffer_object_and_geometry_shader" ],
+
+# GL_ARB_viewport_array / GL_OES_geometry_shader
+ [ "LAYER_PROVOKING_VERTEX", "CONTEXT_ENUM(Light.ProvokingVertex), extra_ARB_viewport_array_or_oes_geometry_shader" ],
+
+# GL_ARB_gpu_shader5 / GL_OES_geometry_shader
+ [ "MAX_GEOMETRY_SHADER_INVOCATIONS", "CONST(MAX_GEOMETRY_SHADER_INVOCATIONS), extra_ARB_gpu_shader5_or_oes_geometry_shader" ],
]},
# Remaining enums are only in OpenGL
@@ -790,21 +821,10 @@ descriptor=[
# GL 3.2
[ "CONTEXT_PROFILE_MASK", "CONTEXT_INT(Const.ProfileMask), extra_version_32" ],
- [ "MAX_GEOMETRY_INPUT_COMPONENTS", "CONTEXT_INT(Const.Program[MESA_SHADER_GEOMETRY].MaxInputComponents), extra_version_32" ],
- [ "MAX_GEOMETRY_OUTPUT_COMPONENTS", "CONTEXT_INT(Const.Program[MESA_SHADER_GEOMETRY].MaxOutputComponents), extra_version_32" ],
- [ "MAX_GEOMETRY_TEXTURE_IMAGE_UNITS", "CONTEXT_INT(Const.Program[MESA_SHADER_GEOMETRY].MaxTextureImageUnits), extra_version_32" ],
- [ "MAX_GEOMETRY_OUTPUT_VERTICES", "CONTEXT_INT(Const.MaxGeometryOutputVertices), extra_version_32" ],
- [ "MAX_GEOMETRY_TOTAL_OUTPUT_COMPONENTS", "CONTEXT_INT(Const.MaxGeometryTotalOutputComponents), extra_version_32" ],
- [ "MAX_GEOMETRY_UNIFORM_COMPONENTS", "CONTEXT_INT(Const.Program[MESA_SHADER_GEOMETRY].MaxUniformComponents), extra_version_32" ],
# GL_ARB_robustness
[ "RESET_NOTIFICATION_STRATEGY_ARB", "CONTEXT_ENUM(Const.ResetStrategy), NO_EXTRA" ],
-
-# GL_ARB_uniform_buffer_object
- [ "MAX_GEOMETRY_UNIFORM_BLOCKS", "CONTEXT_INT(Const.Program[MESA_SHADER_GEOMETRY].MaxUniformBlocks), extra_ARB_uniform_buffer_object_and_geometry_shader" ],
- [ "MAX_COMBINED_GEOMETRY_UNIFORM_COMPONENTS", "CONTEXT_INT(Const.Program[MESA_SHADER_GEOMETRY].MaxCombinedUniformComponents), extra_ARB_uniform_buffer_object_and_geometry_shader" ],
-
# GL_ARB_timer_query
[ "TIMESTAMP", "LOC_CUSTOM, TYPE_INT64, 0, extra_ARB_timer_query" ],
@@ -817,25 +837,31 @@ descriptor=[
# GL_ARB_texture_gather
[ "MAX_PROGRAM_TEXTURE_GATHER_COMPONENTS_ARB", "CONTEXT_INT(Const.MaxProgramTextureGatherComponents), extra_ARB_texture_gather"],
-# GL_ARB_shader_atomic_counters
- [ "MAX_GEOMETRY_ATOMIC_COUNTER_BUFFERS", "CONTEXT_INT(Const.Program[MESA_SHADER_GEOMETRY].MaxAtomicBuffers), extra_ARB_shader_atomic_counters_and_geometry_shader" ],
- [ "MAX_GEOMETRY_ATOMIC_COUNTERS", "CONTEXT_INT(Const.Program[MESA_SHADER_GEOMETRY].MaxAtomicCounters), extra_ARB_shader_atomic_counters_and_geometry_shader" ],
-
# GL_ARB_shader_image_load_store
[ "MAX_COMBINED_IMAGE_UNITS_AND_FRAGMENT_OUTPUTS", "CONTEXT_INT(Const.MaxCombinedShaderOutputResources), extra_ARB_shader_image_load_store" ],
[ "MAX_IMAGE_SAMPLES", "CONTEXT_INT(Const.MaxImageSamples), extra_ARB_shader_image_load_store" ],
- [ "MAX_GEOMETRY_IMAGE_UNIFORMS", "CONTEXT_INT(Const.Program[MESA_SHADER_GEOMETRY].MaxImageUniforms), extra_ARB_shader_image_load_store_and_geometry_shader"],
-
-# GL_ARB_framebuffer_no_attachments
- ["MAX_FRAMEBUFFER_LAYERS", "CONTEXT_INT(Const.MaxFramebufferLayers), extra_ARB_framebuffer_no_attachments"],
# GL_EXT_polygon_offset_clamp
[ "POLYGON_OFFSET_CLAMP_EXT", "CONTEXT_FLOAT(Polygon.OffsetClamp), extra_EXT_polygon_offset_clamp" ],
# GL_ARB_shader_storage_buffer_object
- [ "MAX_GEOMETRY_SHADER_STORAGE_BLOCKS", "CONTEXT_INT(Const.Program[MESA_SHADER_FRAGMENT].MaxShaderStorageBlocks), extra_ARB_shader_storage_buffer_object" ],
[ "MAX_TESS_CONTROL_SHADER_STORAGE_BLOCKS", "CONTEXT_INT(Const.Program[MESA_SHADER_TESS_CTRL].MaxShaderStorageBlocks), extra_ARB_shader_storage_buffer_object" ],
[ "MAX_TESS_EVALUATION_SHADER_STORAGE_BLOCKS", "CONTEXT_INT(Const.Program[MESA_SHADER_TESS_EVAL].MaxShaderStorageBlocks), extra_ARB_shader_storage_buffer_object" ],
+
+# GL_ARB_query_buffer_object
+ [ "QUERY_BUFFER_BINDING", "LOC_CUSTOM, TYPE_INT, 0, extra_ARB_query_buffer_object" ],
+
+# GL_ATI_meminfo
+ [ "VBO_FREE_MEMORY_ATI", "LOC_CUSTOM, TYPE_INT_4, NO_OFFSET, extra_ATI_meminfo" ],
+ [ "TEXTURE_FREE_MEMORY_ATI", "LOC_CUSTOM, TYPE_INT_4, NO_OFFSET, extra_ATI_meminfo" ],
+ [ "RENDERBUFFER_FREE_MEMORY_ATI", "LOC_CUSTOM, TYPE_INT_4, NO_OFFSET, extra_ATI_meminfo" ],
+
+# GL_NVX_gpu_memory_info
+ [ "GPU_MEMORY_INFO_DEDICATED_VIDMEM_NVX", "LOC_CUSTOM, TYPE_INT, NO_OFFSET, extra_NVX_gpu_memory_info" ],
+ [ "GPU_MEMORY_INFO_TOTAL_AVAILABLE_MEMORY_NVX", "LOC_CUSTOM, TYPE_INT, NO_OFFSET, extra_NVX_gpu_memory_info" ],
+ [ "GPU_MEMORY_INFO_CURRENT_AVAILABLE_VIDMEM_NVX", "LOC_CUSTOM, TYPE_INT, NO_OFFSET, extra_NVX_gpu_memory_info" ],
+ [ "GPU_MEMORY_INFO_EVICTION_COUNT_NVX", "LOC_CUSTOM, TYPE_INT, NO_OFFSET, extra_NVX_gpu_memory_info" ],
+ [ "GPU_MEMORY_INFO_EVICTED_MEMORY_NVX", "LOC_CUSTOM, TYPE_INT, NO_OFFSET, extra_NVX_gpu_memory_info" ],
]},
# Enums restricted to OpenGL Core profile
@@ -847,7 +873,6 @@ descriptor=[
[ "MAX_VIEWPORTS", "CONTEXT_INT(Const.MaxViewports), extra_ARB_viewport_array" ],
[ "VIEWPORT_SUBPIXEL_BITS", "CONTEXT_INT(Const.ViewportSubpixelBits), extra_ARB_viewport_array" ],
[ "VIEWPORT_BOUNDS_RANGE", "CONTEXT_FLOAT2(Const.ViewportBounds), extra_ARB_viewport_array" ],
- [ "LAYER_PROVOKING_VERTEX", "CONTEXT_ENUM(Const.LayerAndVPIndexProvokingVertex), extra_ARB_viewport_array" ],
[ "VIEWPORT_INDEX_PROVOKING_VERTEX", "CONTEXT_ENUM(Const.LayerAndVPIndexProvokingVertex), extra_ARB_viewport_array" ],
# GL_ARB_gpu_shader5
diff --git a/src/mesa/main/hash.c b/src/mesa/main/hash.c
index 315b5d64004..ab1b9e907ae 100644
--- a/src/mesa/main/hash.c
+++ b/src/mesa/main/hash.c
@@ -496,14 +496,12 @@ _mesa_HashFindFreeKeyBlock(struct _mesa_HashTable *table, GLuint numKeys)
GLuint
_mesa_HashNumEntries(const struct _mesa_HashTable *table)
{
- struct hash_entry *entry;
GLuint count = 0;
if (table->deleted_key_data)
count++;
- hash_table_foreach(table->ht, entry)
- count++;
+ count += _mesa_hash_table_num_entries(table->ht);
return count;
}
diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h
index 56dce2d1b81..a66b56c62bf 100644
--- a/src/mesa/main/mtypes.h
+++ b/src/mesa/main/mtypes.h
@@ -1253,6 +1253,9 @@ typedef enum {
USAGE_TEXTURE_BUFFER = 0x2,
USAGE_ATOMIC_COUNTER_BUFFER = 0x4,
USAGE_SHADER_STORAGE_BUFFER = 0x8,
+ USAGE_TRANSFORM_FEEDBACK_BUFFER = 0x10,
+ USAGE_PIXEL_PACK_BUFFER = 0x20,
+ USAGE_DISABLE_MINMAX_CACHE = 0x40,
} gl_buffer_usage;
@@ -1280,6 +1283,12 @@ struct gl_buffer_object
GLuint NumMapBufferWriteCalls;
struct gl_buffer_mapping Mappings[MAP_COUNT];
+
+ /** Memoization of min/max index computations for static index buffers */
+ struct hash_table *MinMaxCache;
+ unsigned MinMaxCacheHitIndices;
+ unsigned MinMaxCacheMissIndices;
+ bool MinMaxCacheDirty;
};
@@ -1861,6 +1870,8 @@ typedef enum
PROGRAM_SAMPLER, /**< for shader samplers, compile-time only */
PROGRAM_SYSTEM_VALUE,/**< InstanceId, PrimitiveID, etc. */
PROGRAM_UNDEFINED, /**< Invalid/TBD value */
+ PROGRAM_IMMEDIATE, /**< Immediate value, used by TGSI */
+ PROGRAM_BUFFER, /**< for shader buffers, compile-time only */
PROGRAM_FILE_MAX
} gl_register_file;
@@ -3217,6 +3228,10 @@ struct gl_framebuffer
struct {
GLuint Width, Height, Layers, NumSamples;
GLboolean FixedSampleLocations;
+ /* Derived from NumSamples by the driver so that it can choose a valid
+ * value for the hardware.
+ */
+ GLuint _NumSamples;
} DefaultGeometry;
/** \name Drawing bounds (Intersection of buffer size and scissor box)
@@ -3785,6 +3800,7 @@ struct gl_extensions
GLboolean ARB_occlusion_query2;
GLboolean ARB_pipeline_statistics_query;
GLboolean ARB_point_sprite;
+ GLboolean ARB_query_buffer_object;
GLboolean ARB_sample_shading;
GLboolean ARB_seamless_cube_map;
GLboolean ARB_shader_atomic_counters;
@@ -3880,6 +3896,7 @@ struct gl_extensions
GLboolean AMD_vertex_shader_layer;
GLboolean AMD_vertex_shader_viewport_index;
GLboolean APPLE_object_purgeable;
+ GLboolean ATI_meminfo;
GLboolean ATI_texture_compression_3dc;
GLboolean ATI_texture_mirror_once;
GLboolean ATI_texture_env_combine3;
@@ -3900,6 +3917,7 @@ struct gl_extensions
GLboolean NV_texture_env_combine4;
GLboolean NV_texture_rectangle;
GLboolean NV_vdpau_interop;
+ GLboolean NVX_gpu_memory_info;
GLboolean TDFX_texture_compression_FXT1;
GLboolean OES_EGL_image;
GLboolean OES_draw_texture;
@@ -4434,6 +4452,8 @@ struct gl_context
struct gl_buffer_object *CopyReadBuffer; /**< GL_ARB_copy_buffer */
struct gl_buffer_object *CopyWriteBuffer; /**< GL_ARB_copy_buffer */
+ struct gl_buffer_object *QueryBuffer; /**< GL_ARB_query_buffer_object */
+
/**
* Current GL_ARB_uniform_buffer_object binding referenced by
* GL_UNIFORM_BUFFER target for glBufferData, glMapBuffer, etc.
@@ -4576,6 +4596,18 @@ struct gl_context
GLboolean ShareGroupReset;
};
+/**
+ * Information about memory usage. All sizes are in kilobytes.
+ */
+struct gl_memory_info
+{
+ unsigned total_device_memory; /**< size of device memory, e.g. VRAM */
+ unsigned avail_device_memory; /**< free device memory at the moment */
+ unsigned total_staging_memory; /**< size of staging memory, e.g. GART */
+ unsigned avail_staging_memory; /**< free staging memory at the moment */
+ unsigned device_memory_evicted; /**< size of memory evicted (monotonic counter) */
+ unsigned nr_device_memory_evictions; /**< # of evictions (monotonic counter) */
+};
#ifdef DEBUG
extern int MESA_VERBOSE;
diff --git a/src/mesa/main/objectlabel.c b/src/mesa/main/objectlabel.c
index 41f370ce485..b622d6a2979 100644
--- a/src/mesa/main/objectlabel.c
+++ b/src/mesa/main/objectlabel.c
@@ -288,16 +288,18 @@ void GLAPIENTRY
_mesa_ObjectPtrLabel(const void *ptr, GLsizei length, const GLchar *label)
{
GET_CURRENT_CONTEXT(ctx);
- struct gl_sync_object *const syncObj = (struct gl_sync_object *) ptr;
+ struct gl_sync_object *syncObj;
const char *callerstr;
char **labelPtr;
+ syncObj = _mesa_get_and_ref_sync(ctx, (void*)ptr, true);
+
if (_mesa_is_desktop_gl(ctx))
callerstr = "glObjectPtrLabel";
else
callerstr = "glObjectPtrLabelKHR";
- if (!_mesa_validate_sync(ctx, syncObj)) {
+ if (!syncObj) {
_mesa_error(ctx, GL_INVALID_VALUE, "%s (not a valid sync object)",
callerstr);
return;
@@ -306,6 +308,7 @@ _mesa_ObjectPtrLabel(const void *ptr, GLsizei length, const GLchar *label)
labelPtr = &syncObj->Label;
set_label(ctx, labelPtr, label, length, callerstr);
+ _mesa_unref_sync_object(ctx, syncObj, 1);
}
void GLAPIENTRY
@@ -313,7 +316,7 @@ _mesa_GetObjectPtrLabel(const void *ptr, GLsizei bufSize, GLsizei *length,
GLchar *label)
{
GET_CURRENT_CONTEXT(ctx);
- struct gl_sync_object *const syncObj = (struct gl_sync_object *) ptr;
+ struct gl_sync_object *syncObj;
const char *callerstr;
char **labelPtr;
@@ -328,7 +331,8 @@ _mesa_GetObjectPtrLabel(const void *ptr, GLsizei bufSize, GLsizei *length,
return;
}
- if (!_mesa_validate_sync(ctx, syncObj)) {
+ syncObj = _mesa_get_and_ref_sync(ctx, (void*)ptr, true);
+ if (!syncObj) {
_mesa_error(ctx, GL_INVALID_VALUE, "%s (not a valid sync object)",
callerstr);
return;
@@ -337,4 +341,5 @@ _mesa_GetObjectPtrLabel(const void *ptr, GLsizei bufSize, GLsizei *length,
labelPtr = &syncObj->Label;
copy_label(*labelPtr, label, length, bufSize);
+ _mesa_unref_sync_object(ctx, syncObj, 1);
}
diff --git a/src/mesa/main/queryobj.c b/src/mesa/main/queryobj.c
index 98366857f62..b86692a5f7e 100644
--- a/src/mesa/main/queryobj.c
+++ b/src/mesa/main/queryobj.c
@@ -23,6 +23,7 @@
*/
+#include "bufferobj.h"
#include "glheader.h"
#include "context.h"
#include "enums.h"
@@ -732,14 +733,16 @@ _mesa_GetQueryiv(GLenum target, GLenum pname, GLint *params)
_mesa_GetQueryIndexediv(target, 0, pname, params);
}
-void GLAPIENTRY
-_mesa_GetQueryObjectiv(GLuint id, GLenum pname, GLint *params)
+static void
+get_query_object(struct gl_context *ctx, const char *func,
+ GLuint id, GLenum pname, GLenum ptype,
+ struct gl_buffer_object *buf, intptr_t offset)
{
struct gl_query_object *q = NULL;
- GET_CURRENT_CONTEXT(ctx);
+ uint64_t value;
if (MESA_VERBOSE & VERBOSE_API)
- _mesa_debug(ctx, "glGetQueryObjectiv(%u, %s)\n", id,
+ _mesa_debug(ctx, "%s(%u, %s)\n", func, id,
_mesa_enum_to_string(pname));
if (id)
@@ -747,96 +750,114 @@ _mesa_GetQueryObjectiv(GLuint id, GLenum pname, GLint *params)
if (!q || q->Active || !q->EverBound) {
_mesa_error(ctx, GL_INVALID_OPERATION,
- "glGetQueryObjectivARB(id=%d is invalid or active)", id);
+ "%s(id=%d is invalid or active)", func, id);
return;
}
- switch (pname) {
- case GL_QUERY_RESULT_ARB:
- if (!q->Ready)
- ctx->Driver.WaitQuery(ctx, q);
- /* if result is too large for returned type, clamp to max value */
- if (q->Target == GL_ANY_SAMPLES_PASSED
- || q->Target == GL_ANY_SAMPLES_PASSED_CONSERVATIVE) {
- if (q->Result)
- *params = GL_TRUE;
- else
- *params = GL_FALSE;
- } else {
- if (q->Result > 0x7fffffff) {
- *params = 0x7fffffff;
- }
- else {
- *params = (GLint)q->Result;
- }
- }
- break;
- case GL_QUERY_RESULT_AVAILABLE_ARB:
- if (!q->Ready)
- ctx->Driver.CheckQuery( ctx, q );
- *params = q->Ready;
- break;
+ if (buf && buf != ctx->Shared->NullBufferObj) {
+ bool is_64bit = ptype == GL_INT64_ARB ||
+ ptype == GL_UNSIGNED_INT64_ARB;
+ if (!ctx->Extensions.ARB_query_buffer_object) {
+ _mesa_error(ctx, GL_INVALID_OPERATION, "%s(not supported)", func);
+ return;
+ }
+ if (buf->Size < offset + 4 * (is_64bit ? 2 : 1)) {
+ _mesa_error(ctx, GL_INVALID_OPERATION, "%s(out of bounds)", func);
+ return;
+ }
+
+ switch (pname) {
+ case GL_QUERY_RESULT:
+ case GL_QUERY_RESULT_NO_WAIT:
+ case GL_QUERY_RESULT_AVAILABLE:
case GL_QUERY_TARGET:
- *params = q->Target;
- break;
- default:
- _mesa_error(ctx, GL_INVALID_ENUM, "glGetQueryObjectivARB(pname)");
+ ctx->Driver.StoreQueryResult(ctx, q, buf, offset, pname, ptype);
return;
+ }
+
+ /* fall through to get error below */
}
-}
+ switch (pname) {
+ case GL_QUERY_RESULT:
+ if (!q->Ready)
+ ctx->Driver.WaitQuery(ctx, q);
+ value = q->Result;
+ break;
+ case GL_QUERY_RESULT_NO_WAIT:
+ if (!ctx->Extensions.ARB_query_buffer_object)
+ goto invalid_enum;
+ ctx->Driver.CheckQuery(ctx, q);
+ if (!q->Ready)
+ return;
+ value = q->Result;
+ break;
+ case GL_QUERY_RESULT_AVAILABLE:
+ if (!q->Ready)
+ ctx->Driver.CheckQuery(ctx, q);
+ value = q->Ready;
+ break;
+ case GL_QUERY_TARGET:
+ value = q->Target;
+ break;
+ default:
+invalid_enum:
+ _mesa_error(ctx, GL_INVALID_ENUM, "%s(pname=%s)",
+ func, _mesa_enum_to_string(pname));
+ return;
+ }
+
+ /* TODO: Have the driver be required to handle this fixup. */
+ if (q->Target == GL_ANY_SAMPLES_PASSED ||
+ q->Target == GL_ANY_SAMPLES_PASSED_CONSERVATIVE)
+ value = !!value;
+
+ switch (ptype) {
+ case GL_INT: {
+ GLint *param = (GLint *)offset;
+ if (value > 0x7fffffff)
+ *param = 0x7fffffff;
+ else
+ *param = value;
+ break;
+ }
+ case GL_UNSIGNED_INT: {
+ GLuint *param = (GLuint *)offset;
+ if (value > 0xffffffff)
+ *param = 0xffffffff;
+ else
+ *param = value;
+ break;
+ }
+ case GL_INT64_ARB:
+ case GL_UNSIGNED_INT64_ARB: {
+ GLuint64EXT *param = (GLuint64EXT *)offset;
+ *param = value;
+ break;
+ }
+ default:
+ unreachable("unexpected ptype");
+ }
+}
void GLAPIENTRY
-_mesa_GetQueryObjectuiv(GLuint id, GLenum pname, GLuint *params)
+_mesa_GetQueryObjectiv(GLuint id, GLenum pname, GLint *params)
{
- struct gl_query_object *q = NULL;
GET_CURRENT_CONTEXT(ctx);
- if (MESA_VERBOSE & VERBOSE_API)
- _mesa_debug(ctx, "glGetQueryObjectuiv(%u, %s)\n", id,
- _mesa_enum_to_string(pname));
+ get_query_object(ctx, "glGetQueryObjectiv",
+ id, pname, GL_INT, ctx->QueryBuffer, (intptr_t)params);
+}
- if (id)
- q = _mesa_lookup_query_object(ctx, id);
- if (!q || q->Active || !q->EverBound) {
- _mesa_error(ctx, GL_INVALID_OPERATION,
- "glGetQueryObjectuivARB(id=%d is invalid or active)", id);
- return;
- }
+void GLAPIENTRY
+_mesa_GetQueryObjectuiv(GLuint id, GLenum pname, GLuint *params)
+{
+ GET_CURRENT_CONTEXT(ctx);
- switch (pname) {
- case GL_QUERY_RESULT_ARB:
- if (!q->Ready)
- ctx->Driver.WaitQuery(ctx, q);
- /* if result is too large for returned type, clamp to max value */
- if (q->Target == GL_ANY_SAMPLES_PASSED
- || q->Target == GL_ANY_SAMPLES_PASSED_CONSERVATIVE) {
- if (q->Result)
- *params = GL_TRUE;
- else
- *params = GL_FALSE;
- } else {
- if (q->Result > 0xffffffff) {
- *params = 0xffffffff;
- }
- else {
- *params = (GLuint)q->Result;
- }
- }
- break;
- case GL_QUERY_RESULT_AVAILABLE_ARB:
- if (!q->Ready)
- ctx->Driver.CheckQuery( ctx, q );
- *params = q->Ready;
- break;
- case GL_QUERY_TARGET:
- *params = q->Target;
- break;
- default:
- _mesa_error(ctx, GL_INVALID_ENUM, "glGetQueryObjectuivARB(pname)");
- return;
- }
+ get_query_object(ctx, "glGetQueryObjectuiv",
+ id, pname, GL_UNSIGNED_INT,
+ ctx->QueryBuffer, (intptr_t)params);
}
@@ -846,40 +867,11 @@ _mesa_GetQueryObjectuiv(GLuint id, GLenum pname, GLuint *params)
void GLAPIENTRY
_mesa_GetQueryObjecti64v(GLuint id, GLenum pname, GLint64EXT *params)
{
- struct gl_query_object *q = NULL;
GET_CURRENT_CONTEXT(ctx);
- if (MESA_VERBOSE & VERBOSE_API)
- _mesa_debug(ctx, "glGetQueryObjecti64v(%u, %s)\n", id,
- _mesa_enum_to_string(pname));
-
- if (id)
- q = _mesa_lookup_query_object(ctx, id);
-
- if (!q || q->Active || !q->EverBound) {
- _mesa_error(ctx, GL_INVALID_OPERATION,
- "glGetQueryObjectui64vARB(id=%d is invalid or active)", id);
- return;
- }
-
- switch (pname) {
- case GL_QUERY_RESULT_ARB:
- if (!q->Ready)
- ctx->Driver.WaitQuery(ctx, q);
- *params = q->Result;
- break;
- case GL_QUERY_RESULT_AVAILABLE_ARB:
- if (!q->Ready)
- ctx->Driver.CheckQuery( ctx, q );
- *params = q->Ready;
- break;
- case GL_QUERY_TARGET:
- *params = q->Target;
- break;
- default:
- _mesa_error(ctx, GL_INVALID_ENUM, "glGetQueryObjecti64vARB(pname)");
- return;
- }
+ get_query_object(ctx, "glGetQueryObjecti64v",
+ id, pname, GL_INT64_ARB,
+ ctx->QueryBuffer, (intptr_t)params);
}
@@ -889,40 +881,11 @@ _mesa_GetQueryObjecti64v(GLuint id, GLenum pname, GLint64EXT *params)
void GLAPIENTRY
_mesa_GetQueryObjectui64v(GLuint id, GLenum pname, GLuint64EXT *params)
{
- struct gl_query_object *q = NULL;
GET_CURRENT_CONTEXT(ctx);
- if (MESA_VERBOSE & VERBOSE_API)
- _mesa_debug(ctx, "glGetQueryObjectui64v(%u, %s)\n", id,
- _mesa_enum_to_string(pname));
-
- if (id)
- q = _mesa_lookup_query_object(ctx, id);
-
- if (!q || q->Active || !q->EverBound) {
- _mesa_error(ctx, GL_INVALID_OPERATION,
- "glGetQueryObjectuui64vARB(id=%d is invalid or active)", id);
- return;
- }
-
- switch (pname) {
- case GL_QUERY_RESULT_ARB:
- if (!q->Ready)
- ctx->Driver.WaitQuery(ctx, q);
- *params = q->Result;
- break;
- case GL_QUERY_RESULT_AVAILABLE_ARB:
- if (!q->Ready)
- ctx->Driver.CheckQuery( ctx, q );
- *params = q->Ready;
- break;
- case GL_QUERY_TARGET:
- *params = q->Target;
- break;
- default:
- _mesa_error(ctx, GL_INVALID_ENUM, "glGetQueryObjectui64vARB(pname)");
- return;
- }
+ get_query_object(ctx, "glGetQueryObjectui64v",
+ id, pname, GL_UNSIGNED_INT64_ARB,
+ ctx->QueryBuffer, (intptr_t)params);
}
/**
@@ -932,8 +895,15 @@ void GLAPIENTRY
_mesa_GetQueryBufferObjectiv(GLuint id, GLuint buffer, GLenum pname,
GLintptr offset)
{
+ struct gl_buffer_object *buf;
GET_CURRENT_CONTEXT(ctx);
- _mesa_error(ctx, GL_INVALID_OPERATION, "glGetQueryBufferObjectiv");
+
+ buf = _mesa_lookup_bufferobj_err(ctx, buffer, "glGetQueryBufferObjectiv");
+ if (!buf)
+ return;
+
+ get_query_object(ctx, "glGetQueryBufferObjectiv",
+ id, pname, GL_INT, buf, offset);
}
@@ -941,8 +911,15 @@ void GLAPIENTRY
_mesa_GetQueryBufferObjectuiv(GLuint id, GLuint buffer, GLenum pname,
GLintptr offset)
{
+ struct gl_buffer_object *buf;
GET_CURRENT_CONTEXT(ctx);
- _mesa_error(ctx, GL_INVALID_OPERATION, "glGetQueryBufferObjectuiv");
+
+ buf = _mesa_lookup_bufferobj_err(ctx, buffer, "glGetQueryBufferObjectuiv");
+ if (!buf)
+ return;
+
+ get_query_object(ctx, "glGetQueryBufferObjectuiv",
+ id, pname, GL_UNSIGNED_INT, buf, offset);
}
@@ -950,8 +927,15 @@ void GLAPIENTRY
_mesa_GetQueryBufferObjecti64v(GLuint id, GLuint buffer, GLenum pname,
GLintptr offset)
{
+ struct gl_buffer_object *buf;
GET_CURRENT_CONTEXT(ctx);
- _mesa_error(ctx, GL_INVALID_OPERATION, "glGetQueryBufferObjecti64v");
+
+ buf = _mesa_lookup_bufferobj_err(ctx, buffer, "glGetQueryBufferObjecti64v");
+ if (!buf)
+ return;
+
+ get_query_object(ctx, "glGetQueryBufferObjecti64v",
+ id, pname, GL_INT64_ARB, buf, offset);
}
@@ -959,8 +943,15 @@ void GLAPIENTRY
_mesa_GetQueryBufferObjectui64v(GLuint id, GLuint buffer, GLenum pname,
GLintptr offset)
{
+ struct gl_buffer_object *buf;
GET_CURRENT_CONTEXT(ctx);
- _mesa_error(ctx, GL_INVALID_OPERATION, "glGetQueryBufferObjectui64v");
+
+ buf = _mesa_lookup_bufferobj_err(ctx, buffer, "glGetQueryBufferObjectui64v");
+ if (!buf)
+ return;
+
+ get_query_object(ctx, "glGetQueryBufferObjectui64v",
+ id, pname, GL_UNSIGNED_INT64_ARB, buf, offset);
}
diff --git a/src/mesa/main/shared.c b/src/mesa/main/shared.c
index c37b31d1753..b9f7bb65fb6 100644
--- a/src/mesa/main/shared.c
+++ b/src/mesa/main/shared.c
@@ -338,7 +338,7 @@ free_shared_state(struct gl_context *ctx, struct gl_shared_state *shared)
struct set_entry *entry;
set_foreach(shared->SyncObjects, entry) {
- _mesa_unref_sync_object(ctx, (struct gl_sync_object *) entry->key);
+ _mesa_unref_sync_object(ctx, (struct gl_sync_object *) entry->key, 1);
}
}
_mesa_set_destroy(shared->SyncObjects, NULL);
diff --git a/src/mesa/main/state.c b/src/mesa/main/state.c
index 4043c4f2057..57f13411fdf 100644
--- a/src/mesa/main/state.c
+++ b/src/mesa/main/state.c
@@ -352,7 +352,7 @@ update_multisample(struct gl_context *ctx)
ctx->Multisample._Enabled = GL_FALSE;
if (ctx->Multisample.Enabled &&
ctx->DrawBuffer &&
- ctx->DrawBuffer->Visual.sampleBuffers)
+ _mesa_geometric_samples(ctx->DrawBuffer) > 0)
ctx->Multisample._Enabled = GL_TRUE;
}
diff --git a/src/mesa/main/syncobj.c b/src/mesa/main/syncobj.c
index c1b2d3bed54..be758dd1241 100644
--- a/src/mesa/main/syncobj.c
+++ b/src/mesa/main/syncobj.c
@@ -167,34 +167,42 @@ _mesa_free_sync_data(struct gl_context *ctx)
* - not in sync objects hash table
* - type is GL_SYNC_FENCE
* - not marked as deleted
+ *
+ * Returns the internal gl_sync_object pointer if the sync object is valid
+ * or NULL if it isn't.
+ *
+ * If "incRefCount" is true, the reference count is incremented, which is
+ * normally what you want; otherwise, a glDeleteSync from another thread
+ * could delete the sync object while you are still working on it.
*/
-bool
-_mesa_validate_sync(struct gl_context *ctx,
- const struct gl_sync_object *syncObj)
+struct gl_sync_object *
+_mesa_get_and_ref_sync(struct gl_context *ctx, GLsync sync, bool incRefCount)
{
- return (syncObj != NULL)
+ struct gl_sync_object *syncObj = (struct gl_sync_object *) sync;
+ mtx_lock(&ctx->Shared->Mutex);
+ if (syncObj != NULL
&& _mesa_set_search(ctx->Shared->SyncObjects, syncObj) != NULL
&& (syncObj->Type == GL_SYNC_FENCE)
- && !syncObj->DeletePending;
-}
-
-
-void
-_mesa_ref_sync_object(struct gl_context *ctx, struct gl_sync_object *syncObj)
-{
- mtx_lock(&ctx->Shared->Mutex);
- syncObj->RefCount++;
+ && !syncObj->DeletePending) {
+ if (incRefCount) {
+ syncObj->RefCount++;
+ }
+ } else {
+ syncObj = NULL;
+ }
mtx_unlock(&ctx->Shared->Mutex);
+ return syncObj;
}
void
-_mesa_unref_sync_object(struct gl_context *ctx, struct gl_sync_object *syncObj)
+_mesa_unref_sync_object(struct gl_context *ctx, struct gl_sync_object *syncObj,
+ int amount)
{
struct set_entry *entry;
mtx_lock(&ctx->Shared->Mutex);
- syncObj->RefCount--;
+ syncObj->RefCount -= amount;
if (syncObj->RefCount == 0) {
entry = _mesa_set_search(ctx->Shared->SyncObjects, syncObj);
assert (entry != NULL);
@@ -212,10 +220,9 @@ GLboolean GLAPIENTRY
_mesa_IsSync(GLsync sync)
{
GET_CURRENT_CONTEXT(ctx);
- struct gl_sync_object *const syncObj = (struct gl_sync_object *) sync;
ASSERT_OUTSIDE_BEGIN_END_WITH_RETVAL(ctx, GL_FALSE);
- return _mesa_validate_sync(ctx, syncObj) ? GL_TRUE : GL_FALSE;
+ return _mesa_get_and_ref_sync(ctx, sync, false) ? GL_TRUE : GL_FALSE;
}
@@ -223,7 +230,7 @@ void GLAPIENTRY
_mesa_DeleteSync(GLsync sync)
{
GET_CURRENT_CONTEXT(ctx);
- struct gl_sync_object *const syncObj = (struct gl_sync_object *) sync;
+ struct gl_sync_object *syncObj;
/* From the GL_ARB_sync spec:
*
@@ -235,16 +242,19 @@ _mesa_DeleteSync(GLsync sync)
return;
}
- if (!_mesa_validate_sync(ctx, syncObj)) {
+ syncObj = _mesa_get_and_ref_sync(ctx, sync, true);
+ if (!syncObj) {
_mesa_error(ctx, GL_INVALID_VALUE, "glDeleteSync (not a valid sync object)");
return;
}
/* If there are no client-waits or server-waits pending on this sync, delete
- * the underlying object.
+ * the underlying object. Note that we double-unref the object, as
+ * _mesa_get_and_ref_sync above took an extra refcount to make sure the pointer
+ * is valid for us to manipulate.
*/
syncObj->DeletePending = GL_TRUE;
- _mesa_unref_sync_object(ctx, syncObj);
+ _mesa_unref_sync_object(ctx, syncObj, 2);
}
@@ -299,21 +309,20 @@ GLenum GLAPIENTRY
_mesa_ClientWaitSync(GLsync sync, GLbitfield flags, GLuint64 timeout)
{
GET_CURRENT_CONTEXT(ctx);
- struct gl_sync_object *const syncObj = (struct gl_sync_object *) sync;
+ struct gl_sync_object *syncObj;
GLenum ret;
ASSERT_OUTSIDE_BEGIN_END_WITH_RETVAL(ctx, GL_WAIT_FAILED);
- if (!_mesa_validate_sync(ctx, syncObj)) {
- _mesa_error(ctx, GL_INVALID_VALUE, "glClientWaitSync (not a valid sync object)");
- return GL_WAIT_FAILED;
- }
-
if ((flags & ~GL_SYNC_FLUSH_COMMANDS_BIT) != 0) {
_mesa_error(ctx, GL_INVALID_VALUE, "glClientWaitSync(flags=0x%x)", flags);
return GL_WAIT_FAILED;
}
- _mesa_ref_sync_object(ctx, syncObj);
+ syncObj = _mesa_get_and_ref_sync(ctx, sync, true);
+ if (!syncObj) {
+ _mesa_error(ctx, GL_INVALID_VALUE, "glClientWaitSync (not a valid sync object)");
+ return GL_WAIT_FAILED;
+ }
/* From the GL_ARB_sync spec:
*
@@ -335,7 +344,7 @@ _mesa_ClientWaitSync(GLsync sync, GLbitfield flags, GLuint64 timeout)
}
}
- _mesa_unref_sync_object(ctx, syncObj);
+ _mesa_unref_sync_object(ctx, syncObj, 1);
return ret;
}
@@ -344,12 +353,7 @@ void GLAPIENTRY
_mesa_WaitSync(GLsync sync, GLbitfield flags, GLuint64 timeout)
{
GET_CURRENT_CONTEXT(ctx);
- struct gl_sync_object *const syncObj = (struct gl_sync_object *) sync;
-
- if (!_mesa_validate_sync(ctx, syncObj)) {
- _mesa_error(ctx, GL_INVALID_VALUE, "glWaitSync (not a valid sync object)");
- return;
- }
+ struct gl_sync_object *syncObj;
if (flags != 0) {
_mesa_error(ctx, GL_INVALID_VALUE, "glWaitSync(flags=0x%x)", flags);
@@ -362,7 +366,14 @@ _mesa_WaitSync(GLsync sync, GLbitfield flags, GLuint64 timeout)
return;
}
+ syncObj = _mesa_get_and_ref_sync(ctx, sync, true);
+ if (!syncObj) {
+ _mesa_error(ctx, GL_INVALID_VALUE, "glWaitSync (not a valid sync object)");
+ return;
+ }
+
ctx->Driver.ServerWaitSync(ctx, syncObj, flags, timeout);
+ _mesa_unref_sync_object(ctx, syncObj, 1);
}
@@ -371,11 +382,12 @@ _mesa_GetSynciv(GLsync sync, GLenum pname, GLsizei bufSize, GLsizei *length,
GLint *values)
{
GET_CURRENT_CONTEXT(ctx);
- struct gl_sync_object *const syncObj = (struct gl_sync_object *) sync;
+ struct gl_sync_object *syncObj;
GLsizei size = 0;
GLint v[1];
- if (!_mesa_validate_sync(ctx, syncObj)) {
+ syncObj = _mesa_get_and_ref_sync(ctx, sync, true);
+ if (!syncObj) {
_mesa_error(ctx, GL_INVALID_VALUE, "glGetSynciv (not a valid sync object)");
return;
}
@@ -409,6 +421,7 @@ _mesa_GetSynciv(GLsync sync, GLenum pname, GLsizei bufSize, GLsizei *length,
default:
_mesa_error(ctx, GL_INVALID_ENUM, "glGetSynciv(pname=0x%x)\n", pname);
+ _mesa_unref_sync_object(ctx, syncObj, 1);
return;
}
@@ -421,4 +434,6 @@ _mesa_GetSynciv(GLsync sync, GLenum pname, GLsizei bufSize, GLsizei *length,
if (length != NULL) {
*length = size;
}
+
+ _mesa_unref_sync_object(ctx, syncObj, 1);
}
diff --git a/src/mesa/main/syncobj.h b/src/mesa/main/syncobj.h
index 5d510e873a9..ea4a71222c0 100644
--- a/src/mesa/main/syncobj.h
+++ b/src/mesa/main/syncobj.h
@@ -47,15 +47,12 @@ _mesa_init_sync(struct gl_context *);
extern void
_mesa_free_sync_data(struct gl_context *);
-extern void
-_mesa_ref_sync_object(struct gl_context *ctx, struct gl_sync_object *syncObj);
+struct gl_sync_object *
+_mesa_get_and_ref_sync(struct gl_context *ctx, GLsync sync, bool incRefCount);
extern void
-_mesa_unref_sync_object(struct gl_context *ctx, struct gl_sync_object *syncObj);
-
-extern bool
-_mesa_validate_sync(struct gl_context *ctx,
- const struct gl_sync_object *syncObj);
+_mesa_unref_sync_object(struct gl_context *ctx, struct gl_sync_object *syncObj,
+ int amount);
extern GLboolean GLAPIENTRY
_mesa_IsSync(GLsync sync);
diff --git a/src/mesa/main/transformfeedback.h b/src/mesa/main/transformfeedback.h
index bb9729cdbde..eb274ad6540 100644
--- a/src/mesa/main/transformfeedback.h
+++ b/src/mesa/main/transformfeedback.h
@@ -145,6 +145,9 @@ _mesa_set_transform_feedback_binding(struct gl_context *ctx,
tfObj->BufferNames[index] = bufObj->Name;
tfObj->Offset[index] = offset;
tfObj->RequestedSize[index] = size;
+
+ if (bufObj != ctx->Shared->NullBufferObj)
+ bufObj->UsageHistory |= USAGE_TRANSFORM_FEEDBACK_BUFFER;
}
/*** GL_ARB_direct_state_access ***/
diff --git a/src/mesa/program/ir_to_mesa.cpp b/src/mesa/program/ir_to_mesa.cpp
index 3c51d18ed62..0f17ed136da 100644
--- a/src/mesa/program/ir_to_mesa.cpp
+++ b/src/mesa/program/ir_to_mesa.cpp
@@ -2293,6 +2293,10 @@ add_uniform_to_shader::visit_field(const glsl_type *type, const char *name,
(void) row_major;
+ /* atomics don't get real storage */
+ if (type->contains_atomic())
+ return;
+
if (type->is_vector() || type->is_scalar()) {
size = type->vector_elements;
if (type->is_double())
diff --git a/src/mesa/program/prog_parameter.c b/src/mesa/program/prog_parameter.c
index e98946b9387..34183d4d95f 100644
--- a/src/mesa/program/prog_parameter.c
+++ b/src/mesa/program/prog_parameter.c
@@ -454,73 +454,3 @@ _mesa_lookup_parameter_constant(const struct gl_program_parameter_list *list,
*posOut = -1;
return GL_FALSE;
}
-
-
-struct gl_program_parameter_list *
-_mesa_clone_parameter_list(const struct gl_program_parameter_list *list)
-{
- struct gl_program_parameter_list *clone;
- GLuint i;
-
- clone = _mesa_new_parameter_list();
- if (!clone)
- return NULL;
-
- /** Not too efficient, but correct */
- for (i = 0; i < list->NumParameters; i++) {
- struct gl_program_parameter *p = list->Parameters + i;
- struct gl_program_parameter *pCopy;
- GLuint size = MIN2(p->Size, 4);
- GLint j = _mesa_add_parameter(clone, p->Type, p->Name, size, p->DataType,
- list->ParameterValues[i], NULL);
- assert(j >= 0);
- pCopy = clone->Parameters + j;
- /* copy state indexes */
- if (p->Type == PROGRAM_STATE_VAR) {
- GLint k;
- for (k = 0; k < STATE_LENGTH; k++) {
- pCopy->StateIndexes[k] = p->StateIndexes[k];
- }
- }
- else {
- clone->Parameters[j].Size = p->Size;
- }
-
- }
-
- clone->StateFlags = list->StateFlags;
-
- return clone;
-}
-
-
-/**
- * Return a new parameter list which is listA + listB.
- */
-struct gl_program_parameter_list *
-_mesa_combine_parameter_lists(const struct gl_program_parameter_list *listA,
- const struct gl_program_parameter_list *listB)
-{
- struct gl_program_parameter_list *list;
-
- if (listA) {
- list = _mesa_clone_parameter_list(listA);
- if (list && listB) {
- GLuint i;
- for (i = 0; i < listB->NumParameters; i++) {
- struct gl_program_parameter *param = listB->Parameters + i;
- _mesa_add_parameter(list, param->Type, param->Name, param->Size,
- param->DataType,
- listB->ParameterValues[i],
- param->StateIndexes);
- }
- }
- }
- else if (listB) {
- list = _mesa_clone_parameter_list(listB);
- }
- else {
- list = NULL;
- }
- return list;
-}
diff --git a/src/mesa/program/prog_parameter.h b/src/mesa/program/prog_parameter.h
index 44700b710d7..c04d7a2e634 100644
--- a/src/mesa/program/prog_parameter.h
+++ b/src/mesa/program/prog_parameter.h
@@ -99,13 +99,6 @@ _mesa_new_parameter_list_sized(unsigned size);
extern void
_mesa_free_parameter_list(struct gl_program_parameter_list *paramList);
-extern struct gl_program_parameter_list *
-_mesa_clone_parameter_list(const struct gl_program_parameter_list *list);
-
-extern struct gl_program_parameter_list *
-_mesa_combine_parameter_lists(const struct gl_program_parameter_list *a,
- const struct gl_program_parameter_list *b);
-
static inline GLuint
_mesa_num_parameters(const struct gl_program_parameter_list *list)
{
diff --git a/src/mesa/program/prog_statevars.c b/src/mesa/program/prog_statevars.c
index 12490d0c380..eed241271df 100644
--- a/src/mesa/program/prog_statevars.c
+++ b/src/mesa/program/prog_statevars.c
@@ -40,6 +40,7 @@
#include "prog_statevars.h"
#include "prog_parameter.h"
#include "main/samplerobj.h"
+#include "framebuffer.h"
#define ONE_DIV_SQRT_LN2 (1.201122408786449815)
@@ -352,7 +353,7 @@ _mesa_fetch_state(struct gl_context *ctx, const gl_state_index state[],
}
return;
case STATE_NUM_SAMPLES:
- ((int *)value)[0] = ctx->DrawBuffer->Visual.samples;
+ ((int *)value)[0] = _mesa_geometric_samples(ctx->DrawBuffer);
return;
case STATE_DEPTH_RANGE:
value[0] = ctx->ViewportArray[0].Near; /* near */
diff --git a/src/mesa/program/program.c b/src/mesa/program/program.c
index 0e78e6ab25d..27867c48d52 100644
--- a/src/mesa/program/program.c
+++ b/src/mesa/program/program.c
@@ -31,6 +31,7 @@
#include "main/glheader.h"
#include "main/context.h"
+#include "main/framebuffer.h"
#include "main/hash.h"
#include "main/macros.h"
#include "program.h"
@@ -534,14 +535,14 @@ _mesa_get_min_invocations_per_fragment(struct gl_context *ctx,
* forces per-sample shading"
*/
if (prog->IsSample && !ignore_sample_qualifier)
- return MAX2(ctx->DrawBuffer->Visual.samples, 1);
+ return MAX2(_mesa_geometric_samples(ctx->DrawBuffer), 1);
if (prog->Base.SystemValuesRead & (SYSTEM_BIT_SAMPLE_ID |
SYSTEM_BIT_SAMPLE_POS))
- return MAX2(ctx->DrawBuffer->Visual.samples, 1);
+ return MAX2(_mesa_geometric_samples(ctx->DrawBuffer), 1);
else if (ctx->Multisample.SampleShading)
return MAX2(ceil(ctx->Multisample.MinSampleShadingValue *
- ctx->DrawBuffer->Visual.samples), 1);
+ _mesa_geometric_samples(ctx->DrawBuffer)), 1);
else
return 1;
}
diff --git a/src/mesa/state_tracker/st_atom.c b/src/mesa/state_tracker/st_atom.c
index 03097225bb2..4b89ade1b15 100644
--- a/src/mesa/state_tracker/st_atom.c
+++ b/src/mesa/state_tracker/st_atom.c
@@ -75,6 +75,16 @@ static const struct st_tracked_state *atoms[] =
&st_bind_tes_ubos,
&st_bind_fs_ubos,
&st_bind_gs_ubos,
+ &st_bind_vs_atomics,
+ &st_bind_tcs_atomics,
+ &st_bind_tes_atomics,
+ &st_bind_fs_atomics,
+ &st_bind_gs_atomics,
+ &st_bind_vs_ssbos,
+ &st_bind_tcs_ssbos,
+ &st_bind_tes_ssbos,
+ &st_bind_fs_ssbos,
+ &st_bind_gs_ssbos,
&st_update_pixel_transfer,
&st_update_tess,
diff --git a/src/mesa/state_tracker/st_atom.h b/src/mesa/state_tracker/st_atom.h
index a24842baa4f..3a9153c80cb 100644
--- a/src/mesa/state_tracker/st_atom.h
+++ b/src/mesa/state_tracker/st_atom.h
@@ -78,6 +78,16 @@ extern const struct st_tracked_state st_bind_vs_ubos;
extern const struct st_tracked_state st_bind_gs_ubos;
extern const struct st_tracked_state st_bind_tcs_ubos;
extern const struct st_tracked_state st_bind_tes_ubos;
+extern const struct st_tracked_state st_bind_fs_atomics;
+extern const struct st_tracked_state st_bind_vs_atomics;
+extern const struct st_tracked_state st_bind_gs_atomics;
+extern const struct st_tracked_state st_bind_tcs_atomics;
+extern const struct st_tracked_state st_bind_tes_atomics;
+extern const struct st_tracked_state st_bind_fs_ssbos;
+extern const struct st_tracked_state st_bind_vs_ssbos;
+extern const struct st_tracked_state st_bind_gs_ssbos;
+extern const struct st_tracked_state st_bind_tcs_ssbos;
+extern const struct st_tracked_state st_bind_tes_ssbos;
extern const struct st_tracked_state st_update_pixel_transfer;
extern const struct st_tracked_state st_update_tess;
diff --git a/src/mesa/state_tracker/st_atom_atomicbuf.c b/src/mesa/state_tracker/st_atom_atomicbuf.c
new file mode 100644
index 00000000000..1c30d1fb701
--- /dev/null
+++ b/src/mesa/state_tracker/st_atom_atomicbuf.c
@@ -0,0 +1,158 @@
+/**************************************************************************
+ *
+ * Copyright 2014 Ilia Mirkin. All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include "main/imports.h"
+#include "program/prog_parameter.h"
+#include "program/prog_print.h"
+#include "compiler/glsl/ir_uniform.h"
+
+#include "pipe/p_context.h"
+#include "pipe/p_defines.h"
+#include "util/u_inlines.h"
+#include "util/u_surface.h"
+
+#include "st_debug.h"
+#include "st_cb_bufferobjects.h"
+#include "st_context.h"
+#include "st_atom.h"
+#include "st_program.h"
+
+static void
+st_bind_atomics(struct st_context *st,
+ struct gl_shader_program *prog,
+ unsigned shader_type)
+{
+ unsigned i;
+
+ if (!prog || !st->pipe->set_shader_buffers)
+ return;
+
+ for (i = 0; i < prog->NumAtomicBuffers; i++) {
+ struct gl_active_atomic_buffer *atomic = &prog->AtomicBuffers[i];
+ struct gl_atomic_buffer_binding *binding =
+ &st->ctx->AtomicBufferBindings[atomic->Binding];
+ struct st_buffer_object *st_obj =
+ st_buffer_object(binding->BufferObject);
+ struct pipe_shader_buffer sb = { 0 };
+
+ sb.buffer = st_obj->buffer;
+ sb.buffer_offset = binding->Offset;
+ sb.buffer_size = st_obj->buffer->width0 - binding->Offset;
+
+ st->pipe->set_shader_buffers(st->pipe, shader_type,
+ atomic->Binding, 1, &sb);
+ }
+}
+
+static void
+bind_vs_atomics(struct st_context *st)
+{
+ struct gl_shader_program *prog =
+ st->ctx->_Shader->CurrentProgram[MESA_SHADER_VERTEX];
+
+ st_bind_atomics(st, prog, PIPE_SHADER_VERTEX);
+}
+
+const struct st_tracked_state st_bind_vs_atomics = {
+ "st_bind_vs_atomics",
+ {
+ 0,
+ ST_NEW_VERTEX_PROGRAM | ST_NEW_ATOMIC_BUFFER,
+ },
+ bind_vs_atomics
+};
+
+static void
+bind_fs_atomics(struct st_context *st)
+{
+ struct gl_shader_program *prog =
+ st->ctx->_Shader->CurrentProgram[MESA_SHADER_FRAGMENT];
+
+ st_bind_atomics(st, prog, PIPE_SHADER_FRAGMENT);
+}
+
+const struct st_tracked_state st_bind_fs_atomics = {
+ "st_bind_fs_atomics",
+ {
+ 0,
+ ST_NEW_FRAGMENT_PROGRAM | ST_NEW_ATOMIC_BUFFER,
+ },
+ bind_fs_atomics
+};
+
+static void
+bind_gs_atomics(struct st_context *st)
+{
+ struct gl_shader_program *prog =
+ st->ctx->_Shader->CurrentProgram[MESA_SHADER_GEOMETRY];
+
+ st_bind_atomics(st, prog, PIPE_SHADER_GEOMETRY);
+}
+
+const struct st_tracked_state st_bind_gs_atomics = {
+ "st_bind_gs_atomics",
+ {
+ 0,
+ ST_NEW_GEOMETRY_PROGRAM | ST_NEW_ATOMIC_BUFFER,
+ },
+ bind_gs_atomics
+};
+
+static void
+bind_tcs_atomics(struct st_context *st)
+{
+ struct gl_shader_program *prog =
+ st->ctx->_Shader->CurrentProgram[MESA_SHADER_TESS_CTRL];
+
+ st_bind_atomics(st, prog, PIPE_SHADER_TESS_CTRL);
+}
+
+const struct st_tracked_state st_bind_tcs_atomics = {
+ "st_bind_tcs_atomics",
+ {
+ 0,
+ ST_NEW_TESSCTRL_PROGRAM | ST_NEW_ATOMIC_BUFFER,
+ },
+ bind_tcs_atomics
+};
+
+static void
+bind_tes_atomics(struct st_context *st)
+{
+ struct gl_shader_program *prog =
+ st->ctx->_Shader->CurrentProgram[MESA_SHADER_TESS_EVAL];
+
+ st_bind_atomics(st, prog, PIPE_SHADER_TESS_EVAL);
+}
+
+const struct st_tracked_state st_bind_tes_atomics = {
+ "st_bind_tes_atomics",
+ {
+ 0,
+ ST_NEW_TESSEVAL_PROGRAM | ST_NEW_ATOMIC_BUFFER,
+ },
+ bind_tes_atomics
+};
diff --git a/src/mesa/state_tracker/st_atom_storagebuf.c b/src/mesa/state_tracker/st_atom_storagebuf.c
new file mode 100644
index 00000000000..f165cc3e0a1
--- /dev/null
+++ b/src/mesa/state_tracker/st_atom_storagebuf.c
@@ -0,0 +1,196 @@
+/**************************************************************************
+ *
+ * Copyright 2014 Ilia Mirkin. All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include "main/imports.h"
+#include "program/prog_parameter.h"
+#include "program/prog_print.h"
+#include "compiler/glsl/ir_uniform.h"
+
+#include "pipe/p_context.h"
+#include "pipe/p_defines.h"
+#include "util/u_inlines.h"
+#include "util/u_surface.h"
+
+#include "st_debug.h"
+#include "st_cb_bufferobjects.h"
+#include "st_context.h"
+#include "st_atom.h"
+#include "st_program.h"
+
+static void
+st_bind_ssbos(struct st_context *st, struct gl_shader *shader,
+ unsigned shader_type)
+{
+ unsigned i;
+ struct pipe_shader_buffer buffers[MAX_SHADER_STORAGE_BUFFERS];
+ struct gl_program_constants *c;
+
+ if (!shader || !st->pipe->set_shader_buffers)
+ return;
+
+ c = &st->ctx->Const.Program[shader->Stage];
+
+ for (i = 0; i < shader->NumShaderStorageBlocks; i++) {
+ struct gl_shader_storage_buffer_binding *binding;
+ struct st_buffer_object *st_obj;
+ struct pipe_shader_buffer *sb = &buffers[i];
+
+ binding = &st->ctx->ShaderStorageBufferBindings[
+ shader->ShaderStorageBlocks[i]->Binding];
+ st_obj = st_buffer_object(binding->BufferObject);
+
+ sb->buffer = st_obj->buffer;
+
+ if (sb->buffer) {
+ sb->buffer_offset = binding->Offset;
+ sb->buffer_size = sb->buffer->width0 - binding->Offset;
+
+ /* AutomaticSize is FALSE if the buffer was set with BindBufferRange.
+ * Take the minimum just to be sure.
+ */
+ if (!binding->AutomaticSize)
+ sb->buffer_size = MIN2(sb->buffer_size, (unsigned) binding->Size);
+ }
+ else {
+ sb->buffer_offset = 0;
+ sb->buffer_size = 0;
+ }
+ }
+ st->pipe->set_shader_buffers(st->pipe, shader_type, c->MaxAtomicBuffers,
+ shader->NumShaderStorageBlocks, buffers);
+ /* clear out any stale shader buffers */
+ if (shader->NumShaderStorageBlocks < c->MaxShaderStorageBlocks)
+ st->pipe->set_shader_buffers(
+ st->pipe, shader_type,
+ c->MaxAtomicBuffers + shader->NumShaderStorageBlocks,
+ c->MaxShaderStorageBlocks - shader->NumShaderStorageBlocks,
+ NULL);
+}
+
+static void bind_vs_ssbos(struct st_context *st)
+{
+ struct gl_shader_program *prog =
+ st->ctx->_Shader->CurrentProgram[MESA_SHADER_VERTEX];
+
+ if (!prog)
+ return;
+
+ st_bind_ssbos(st, prog->_LinkedShaders[MESA_SHADER_VERTEX],
+ PIPE_SHADER_VERTEX);
+}
+
+const struct st_tracked_state st_bind_vs_ssbos = {
+ "st_bind_vs_ssbos",
+ {
+ 0,
+ ST_NEW_VERTEX_PROGRAM | ST_NEW_STORAGE_BUFFER,
+ },
+ bind_vs_ssbos
+};
+
+static void bind_fs_ssbos(struct st_context *st)
+{
+ struct gl_shader_program *prog =
+ st->ctx->_Shader->CurrentProgram[MESA_SHADER_FRAGMENT];
+
+ if (!prog)
+ return;
+
+ st_bind_ssbos(st, prog->_LinkedShaders[MESA_SHADER_FRAGMENT],
+ PIPE_SHADER_FRAGMENT);
+}
+
+const struct st_tracked_state st_bind_fs_ssbos = {
+ "st_bind_fs_ssbos",
+ {
+ 0,
+ ST_NEW_FRAGMENT_PROGRAM | ST_NEW_STORAGE_BUFFER,
+ },
+ bind_fs_ssbos
+};
+
+static void bind_gs_ssbos(struct st_context *st)
+{
+ struct gl_shader_program *prog =
+ st->ctx->_Shader->CurrentProgram[MESA_SHADER_GEOMETRY];
+
+ if (!prog)
+ return;
+
+ st_bind_ssbos(st, prog->_LinkedShaders[MESA_SHADER_GEOMETRY],
+ PIPE_SHADER_GEOMETRY);
+}
+
+const struct st_tracked_state st_bind_gs_ssbos = {
+ "st_bind_gs_ssbos",
+ {
+ 0,
+ ST_NEW_GEOMETRY_PROGRAM | ST_NEW_STORAGE_BUFFER,
+ },
+ bind_gs_ssbos
+};
+
+static void bind_tcs_ssbos(struct st_context *st)
+{
+ struct gl_shader_program *prog =
+ st->ctx->_Shader->CurrentProgram[MESA_SHADER_TESS_CTRL];
+
+ if (!prog)
+ return;
+
+ st_bind_ssbos(st, prog->_LinkedShaders[MESA_SHADER_TESS_CTRL],
+ PIPE_SHADER_TESS_CTRL);
+}
+
+const struct st_tracked_state st_bind_tcs_ssbos = {
+ "st_bind_tcs_ssbos",
+ {
+ 0,
+ ST_NEW_TESSCTRL_PROGRAM | ST_NEW_STORAGE_BUFFER,
+ },
+ bind_tcs_ssbos
+};
+
+static void bind_tes_ssbos(struct st_context *st)
+{
+ struct gl_shader_program *prog =
+ st->ctx->_Shader->CurrentProgram[MESA_SHADER_TESS_EVAL];
+
+ if (!prog)
+ return;
+
+ st_bind_ssbos(st, prog->_LinkedShaders[MESA_SHADER_TESS_EVAL],
+ PIPE_SHADER_TESS_EVAL);
+}
+
+const struct st_tracked_state st_bind_tes_ssbos = {
+ "st_bind_tes_ssbos",
+ {
+ 0,
+ ST_NEW_TESSEVAL_PROGRAM | ST_NEW_STORAGE_BUFFER,
+ },
+ bind_tes_ssbos
+};
diff --git a/src/mesa/state_tracker/st_cb_bufferobjects.c b/src/mesa/state_tracker/st_cb_bufferobjects.c
index 68be8ba64ac..202b4eeeefa 100644
--- a/src/mesa/state_tracker/st_cb_bufferobjects.c
+++ b/src/mesa/state_tracker/st_cb_bufferobjects.c
@@ -237,6 +237,13 @@ st_bufferobj_data(struct gl_context *ctx,
case GL_PARAMETER_BUFFER_ARB:
bind = PIPE_BIND_COMMAND_ARGS_BUFFER;
break;
+ case GL_ATOMIC_COUNTER_BUFFER:
+ case GL_SHADER_STORAGE_BUFFER:
+ bind = PIPE_BIND_SHADER_BUFFER;
+ break;
+ case GL_QUERY_BUFFER:
+ bind = PIPE_BIND_QUERY_BUFFER;
+ break;
default:
bind = 0;
}
diff --git a/src/mesa/state_tracker/st_cb_queryobj.c b/src/mesa/state_tracker/st_cb_queryobj.c
index aafae16b2df..fc239bc778c 100644
--- a/src/mesa/state_tracker/st_cb_queryobj.c
+++ b/src/mesa/state_tracker/st_cb_queryobj.c
@@ -39,9 +39,11 @@
#include "pipe/p_context.h"
#include "pipe/p_defines.h"
#include "pipe/p_screen.h"
+#include "util/u_inlines.h"
#include "st_context.h"
#include "st_cb_queryobj.h"
#include "st_cb_bitmap.h"
+#include "st_cb_bufferobjects.h"
static struct gl_query_object *
@@ -271,7 +273,7 @@ st_WaitQuery(struct gl_context *ctx, struct gl_query_object *q)
{
/* nothing */
}
-
+
q->Ready = GL_TRUE;
}
@@ -303,6 +305,98 @@ st_GetTimestamp(struct gl_context *ctx)
}
}
+static void
+st_StoreQueryResult(struct gl_context *ctx, struct gl_query_object *q,
+ struct gl_buffer_object *buf, intptr_t offset,
+ GLenum pname, GLenum ptype)
+{
+ struct pipe_context *pipe = st_context(ctx)->pipe;
+ struct st_query_object *stq = st_query_object(q);
+ struct st_buffer_object *stObj = st_buffer_object(buf);
+ boolean wait = pname == GL_QUERY_RESULT;
+ enum pipe_query_value_type result_type;
+ int index;
+
+ /* GL_QUERY_TARGET is a bit of an extension since it has nothing to
+ * do with the GPU end of the query. Write it in "by hand".
+ */
+ if (pname == GL_QUERY_TARGET) {
+ /* Assume that the data must be LE. The endianness situation wrt CPU and
+ * GPU is incredibly confusing, but the vast majority of GPUs are
+ * LE. When a BE one comes along, this needs some form of resolution.
+ */
+ unsigned data[2] = { CPU_TO_LE32(q->Target), 0 };
+ pipe_buffer_write(pipe, stObj->buffer, offset,
+ (ptype == GL_INT64_ARB ||
+ ptype == GL_UNSIGNED_INT64_ARB) ? 8 : 4,
+ data);
+ return;
+ }
+
+ switch (ptype) {
+ case GL_INT:
+ result_type = PIPE_QUERY_TYPE_I32;
+ break;
+ case GL_UNSIGNED_INT:
+ result_type = PIPE_QUERY_TYPE_U32;
+ break;
+ case GL_INT64_ARB:
+ result_type = PIPE_QUERY_TYPE_I64;
+ break;
+ case GL_UNSIGNED_INT64_ARB:
+ result_type = PIPE_QUERY_TYPE_U64;
+ break;
+ default:
+ unreachable("Unexpected result type");
+ }
+
+ if (pname == GL_QUERY_RESULT_AVAILABLE) {
+ index = -1;
+ } else if (stq->type == PIPE_QUERY_PIPELINE_STATISTICS) {
+ switch (q->Target) {
+ case GL_VERTICES_SUBMITTED_ARB:
+ index = 0;
+ break;
+ case GL_PRIMITIVES_SUBMITTED_ARB:
+ index = 1;
+ break;
+ case GL_VERTEX_SHADER_INVOCATIONS_ARB:
+ index = 2;
+ break;
+ case GL_GEOMETRY_SHADER_INVOCATIONS:
+ index = 3;
+ break;
+ case GL_GEOMETRY_SHADER_PRIMITIVES_EMITTED_ARB:
+ index = 4;
+ break;
+ case GL_CLIPPING_INPUT_PRIMITIVES_ARB:
+ index = 5;
+ break;
+ case GL_CLIPPING_OUTPUT_PRIMITIVES_ARB:
+ index = 6;
+ break;
+ case GL_FRAGMENT_SHADER_INVOCATIONS_ARB:
+ index = 7;
+ break;
+ case GL_TESS_CONTROL_SHADER_PATCHES_ARB:
+ index = 8;
+ break;
+ case GL_TESS_EVALUATION_SHADER_INVOCATIONS_ARB:
+ index = 9;
+ break;
+ case GL_COMPUTE_SHADER_INVOCATIONS_ARB:
+ index = 10;
+ break;
+ default:
+ unreachable("Unexpected target");
+ }
+ } else {
+ index = 0;
+ }
+
+ pipe->get_query_result_resource(pipe, stq->pq, wait, result_type, index,
+ stObj->buffer, offset);
+}
void st_init_query_functions(struct dd_function_table *functions)
{
@@ -313,4 +407,5 @@ void st_init_query_functions(struct dd_function_table *functions)
functions->WaitQuery = st_WaitQuery;
functions->CheckQuery = st_CheckQuery;
functions->GetTimestamp = st_GetTimestamp;
+ functions->StoreQueryResult = st_StoreQueryResult;
}
diff --git a/src/mesa/state_tracker/st_cb_texture.c b/src/mesa/state_tracker/st_cb_texture.c
index 0ceb37027e1..f2b607c3a1d 100644
--- a/src/mesa/state_tracker/st_cb_texture.c
+++ b/src/mesa/state_tracker/st_cb_texture.c
@@ -60,6 +60,7 @@
#include "pipe/p_context.h"
#include "pipe/p_defines.h"
#include "util/u_inlines.h"
+#include "util/u_upload_mgr.h"
#include "pipe/p_shader_tokens.h"
#include "util/u_tile.h"
#include "util/u_format.h"
@@ -67,6 +68,9 @@
#include "util/u_sampler.h"
#include "util/u_math.h"
#include "util/u_box.h"
+#include "util/u_simple_shaders.h"
+#include "cso_cache/cso_context.h"
+#include "tgsi/tgsi_ureg.h"
#define DBG if (0) printf
@@ -686,6 +690,999 @@ st_get_blit_mask(GLenum srcFormat, GLenum dstFormat)
}
}
+void
+st_init_pbo_upload(struct st_context *st)
+{
+ struct pipe_context *pipe = st->pipe;
+ struct pipe_screen *screen = pipe->screen;
+
+ st->pbo_upload.enabled =
+ screen->get_param(screen, PIPE_CAP_TEXTURE_BUFFER_OBJECTS) &&
+ screen->get_param(screen, PIPE_CAP_TEXTURE_BUFFER_OFFSET_ALIGNMENT) >= 1 &&
+ screen->get_shader_param(screen, PIPE_SHADER_FRAGMENT, PIPE_SHADER_CAP_INTEGERS);
+ if (!st->pbo_upload.enabled)
+ return;
+
+ st->pbo_upload.rgba_only =
+ screen->get_param(screen, PIPE_CAP_BUFFER_SAMPLER_VIEW_RGBA_ONLY);
+
+ if (screen->get_param(screen, PIPE_CAP_TGSI_INSTANCEID)) {
+ if (screen->get_param(screen, PIPE_CAP_TGSI_VS_LAYER_VIEWPORT)) {
+ st->pbo_upload.upload_layers = true;
+ } else if (screen->get_param(screen, PIPE_CAP_MAX_GEOMETRY_OUTPUT_VERTICES) >= 3) {
+ st->pbo_upload.upload_layers = true;
+ st->pbo_upload.use_gs = true;
+ }
+ }
+
+ /* Blend state */
+ memset(&st->pbo_upload.blend, 0, sizeof(struct pipe_blend_state));
+ st->pbo_upload.blend.rt[0].colormask = PIPE_MASK_RGBA;
+
+ /* Rasterizer state */
+ memset(&st->pbo_upload.raster, 0, sizeof(struct pipe_rasterizer_state));
+ st->pbo_upload.raster.half_pixel_center = 1;
+}
+
+void
+st_destroy_pbo_upload(struct st_context *st)
+{
+ if (st->pbo_upload.fs) {
+ cso_delete_fragment_shader(st->cso_context, st->pbo_upload.fs);
+ st->pbo_upload.fs = NULL;
+ }
+
+ if (st->pbo_upload.gs) {
+ cso_delete_geometry_shader(st->cso_context, st->pbo_upload.gs);
+ st->pbo_upload.gs = NULL;
+ }
+
+ if (st->pbo_upload.vs) {
+ cso_delete_vertex_shader(st->cso_context, st->pbo_upload.vs);
+ st->pbo_upload.vs = NULL;
+ }
+}
+
+/**
+ * Converts format to a format with the same components, types
+ * and sizes, but with the components in RGBA order.
+ */
+static enum pipe_format
+unswizzle_format(enum pipe_format format)
+{
+ switch (format)
+ {
+ case PIPE_FORMAT_B8G8R8A8_UNORM:
+ case PIPE_FORMAT_A8R8G8B8_UNORM:
+ case PIPE_FORMAT_A8B8G8R8_UNORM:
+ return PIPE_FORMAT_R8G8B8A8_UNORM;
+
+ case PIPE_FORMAT_B10G10R10A2_UNORM:
+ return PIPE_FORMAT_R10G10B10A2_UNORM;
+
+ case PIPE_FORMAT_B10G10R10A2_SNORM:
+ return PIPE_FORMAT_R10G10B10A2_SNORM;
+
+ case PIPE_FORMAT_B10G10R10A2_UINT:
+ return PIPE_FORMAT_R10G10B10A2_UINT;
+
+ default:
+ return format;
+ }
+}
+
+/**
+ * Converts PIPE_FORMAT_A* to PIPE_FORMAT_R*.
+ */
+static enum pipe_format
+alpha_to_red(enum pipe_format format)
+{
+ switch (format)
+ {
+ case PIPE_FORMAT_A8_UNORM:
+ return PIPE_FORMAT_R8_UNORM;
+ case PIPE_FORMAT_A8_SNORM:
+ return PIPE_FORMAT_R8_SNORM;
+ case PIPE_FORMAT_A8_UINT:
+ return PIPE_FORMAT_R8_UINT;
+ case PIPE_FORMAT_A8_SINT:
+ return PIPE_FORMAT_R8_SINT;
+
+ case PIPE_FORMAT_A16_UNORM:
+ return PIPE_FORMAT_R16_UNORM;
+ case PIPE_FORMAT_A16_SNORM:
+ return PIPE_FORMAT_R16_SNORM;
+ case PIPE_FORMAT_A16_UINT:
+ return PIPE_FORMAT_R16_UINT;
+ case PIPE_FORMAT_A16_SINT:
+ return PIPE_FORMAT_R16_SINT;
+ case PIPE_FORMAT_A16_FLOAT:
+ return PIPE_FORMAT_R16_FLOAT;
+
+ case PIPE_FORMAT_A32_UINT:
+ return PIPE_FORMAT_R32_UINT;
+ case PIPE_FORMAT_A32_SINT:
+ return PIPE_FORMAT_R32_SINT;
+ case PIPE_FORMAT_A32_FLOAT:
+ return PIPE_FORMAT_R32_FLOAT;
+
+ default:
+ return format;
+ }
+}
+
+/**
+ * Converts PIPE_FORMAT_R*A* to PIPE_FORMAT_R*G*.
+ */
+static enum pipe_format
+red_alpha_to_red_green(enum pipe_format format)
+{
+ switch (format)
+ {
+ case PIPE_FORMAT_R8A8_UNORM:
+ return PIPE_FORMAT_R8G8_UNORM;
+ case PIPE_FORMAT_R8A8_SNORM:
+ return PIPE_FORMAT_R8G8_SNORM;
+ case PIPE_FORMAT_R8A8_UINT:
+ return PIPE_FORMAT_R8G8_UINT;
+ case PIPE_FORMAT_R8A8_SINT:
+ return PIPE_FORMAT_R8G8_SINT;
+
+ case PIPE_FORMAT_R16A16_UNORM:
+ return PIPE_FORMAT_R16G16_UNORM;
+ case PIPE_FORMAT_R16A16_SNORM:
+ return PIPE_FORMAT_R16G16_SNORM;
+ case PIPE_FORMAT_R16A16_UINT:
+ return PIPE_FORMAT_R16G16_UINT;
+ case PIPE_FORMAT_R16A16_SINT:
+ return PIPE_FORMAT_R16G16_SINT;
+ case PIPE_FORMAT_R16A16_FLOAT:
+ return PIPE_FORMAT_R16G16_FLOAT;
+
+ case PIPE_FORMAT_R32A32_UINT:
+ return PIPE_FORMAT_R32G32_UINT;
+ case PIPE_FORMAT_R32A32_SINT:
+ return PIPE_FORMAT_R32G32_SINT;
+ case PIPE_FORMAT_R32A32_FLOAT:
+ return PIPE_FORMAT_R32G32_FLOAT;
+
+ default:
+ return format;
+ }
+}
+
+/**
+ * Converts PIPE_FORMAT_L*A* to PIPE_FORMAT_R*G*.
+ */
+static enum pipe_format
+luminance_alpha_to_red_green(enum pipe_format format)
+{
+ switch (format)
+ {
+ case PIPE_FORMAT_L8A8_UNORM:
+ return PIPE_FORMAT_R8G8_UNORM;
+ case PIPE_FORMAT_L8A8_SNORM:
+ return PIPE_FORMAT_R8G8_SNORM;
+ case PIPE_FORMAT_L8A8_UINT:
+ return PIPE_FORMAT_R8G8_UINT;
+ case PIPE_FORMAT_L8A8_SINT:
+ return PIPE_FORMAT_R8G8_SINT;
+
+ case PIPE_FORMAT_L16A16_UNORM:
+ return PIPE_FORMAT_R16G16_UNORM;
+ case PIPE_FORMAT_L16A16_SNORM:
+ return PIPE_FORMAT_R16G16_SNORM;
+ case PIPE_FORMAT_L16A16_UINT:
+ return PIPE_FORMAT_R16G16_UINT;
+ case PIPE_FORMAT_L16A16_SINT:
+ return PIPE_FORMAT_R16G16_SINT;
+ case PIPE_FORMAT_L16A16_FLOAT:
+ return PIPE_FORMAT_R16G16_FLOAT;
+
+ case PIPE_FORMAT_L32A32_UINT:
+ return PIPE_FORMAT_R32G32_UINT;
+ case PIPE_FORMAT_L32A32_SINT:
+ return PIPE_FORMAT_R32G32_SINT;
+ case PIPE_FORMAT_L32A32_FLOAT:
+ return PIPE_FORMAT_R32G32_FLOAT;
+
+ default:
+ return format;
+ }
+}
+
+/**
+ * Returns true if format is a PIPE_FORMAT_A* format, and false otherwise.
+ */
+static bool
+format_is_alpha(enum pipe_format format)
+{
+ const struct util_format_description *desc = util_format_description(format);
+
+ if (desc->nr_channels == 1 &&
+ desc->swizzle[0] == UTIL_FORMAT_SWIZZLE_0 &&
+ desc->swizzle[1] == UTIL_FORMAT_SWIZZLE_0 &&
+ desc->swizzle[2] == UTIL_FORMAT_SWIZZLE_0 &&
+ desc->swizzle[3] == UTIL_FORMAT_SWIZZLE_X)
+ return true;
+
+ return false;
+}
+
+/**
+ * Returns true if format is a PIPE_FORMAT_R* format, and false otherwise.
+ */
+static bool
+format_is_red(enum pipe_format format)
+{
+ const struct util_format_description *desc = util_format_description(format);
+
+ if (desc->nr_channels == 1 &&
+ desc->swizzle[0] == UTIL_FORMAT_SWIZZLE_X &&
+ desc->swizzle[1] == UTIL_FORMAT_SWIZZLE_0 &&
+ desc->swizzle[2] == UTIL_FORMAT_SWIZZLE_0 &&
+ desc->swizzle[3] == UTIL_FORMAT_SWIZZLE_1)
+ return true;
+
+ return false;
+}
+
+
+/**
+ * Returns true if format is a PIPE_FORMAT_L* format, and false otherwise.
+ */
+static bool
+format_is_luminance(enum pipe_format format)
+{
+ const struct util_format_description *desc = util_format_description(format);
+
+ if (desc->nr_channels == 1 &&
+ desc->swizzle[0] == UTIL_FORMAT_SWIZZLE_X &&
+ desc->swizzle[1] == UTIL_FORMAT_SWIZZLE_X &&
+ desc->swizzle[2] == UTIL_FORMAT_SWIZZLE_X &&
+ desc->swizzle[3] == UTIL_FORMAT_SWIZZLE_1)
+ return true;
+
+ return false;
+}
+
+/**
+ * Returns true if format is a PIPE_FORMAT_R*A* format, and false otherwise.
+ */
+static bool
+format_is_red_alpha(enum pipe_format format)
+{
+ const struct util_format_description *desc = util_format_description(format);
+
+ if (desc->nr_channels == 2 &&
+ desc->swizzle[0] == UTIL_FORMAT_SWIZZLE_X &&
+ desc->swizzle[1] == UTIL_FORMAT_SWIZZLE_0 &&
+ desc->swizzle[2] == UTIL_FORMAT_SWIZZLE_0 &&
+ desc->swizzle[3] == UTIL_FORMAT_SWIZZLE_Y)
+ return true;
+
+ return false;
+}
+
+static bool
+format_is_swizzled_rgba(enum pipe_format format)
+{
+ const struct util_format_description *desc = util_format_description(format);
+
+ if ((desc->swizzle[0] == TGSI_SWIZZLE_X || desc->swizzle[0] == UTIL_FORMAT_SWIZZLE_0) &&
+ (desc->swizzle[1] == TGSI_SWIZZLE_Y || desc->swizzle[1] == UTIL_FORMAT_SWIZZLE_0) &&
+ (desc->swizzle[2] == TGSI_SWIZZLE_Z || desc->swizzle[2] == UTIL_FORMAT_SWIZZLE_0) &&
+ (desc->swizzle[3] == TGSI_SWIZZLE_W || desc->swizzle[3] == UTIL_FORMAT_SWIZZLE_1))
+ return false;
+
+ return true;
+}
+
+struct format_table
+{
+ unsigned char swizzle[4];
+ enum pipe_format format;
+};
+
+static const struct format_table table_8888_unorm[] = {
+ { { 0, 1, 2, 3 }, PIPE_FORMAT_R8G8B8A8_UNORM },
+ { { 2, 1, 0, 3 }, PIPE_FORMAT_B8G8R8A8_UNORM },
+ { { 3, 0, 1, 2 }, PIPE_FORMAT_A8R8G8B8_UNORM },
+ { { 3, 2, 1, 0 }, PIPE_FORMAT_A8B8G8R8_UNORM }
+};
+
+static const struct format_table table_1010102_unorm[] = {
+ { { 0, 1, 2, 3 }, PIPE_FORMAT_R10G10B10A2_UNORM },
+ { { 2, 1, 0, 3 }, PIPE_FORMAT_B10G10R10A2_UNORM }
+};
+
+static const struct format_table table_1010102_snorm[] = {
+ { { 0, 1, 2, 3 }, PIPE_FORMAT_R10G10B10A2_SNORM },
+ { { 2, 1, 0, 3 }, PIPE_FORMAT_B10G10R10A2_SNORM }
+};
+
+static const struct format_table table_1010102_uint[] = {
+ { { 0, 1, 2, 3 }, PIPE_FORMAT_R10G10B10A2_UINT },
+ { { 2, 1, 0, 3 }, PIPE_FORMAT_B10G10R10A2_UINT }
+};
+
+static enum pipe_format
+swizzle_format(enum pipe_format format, const int * const swizzle)
+{
+ unsigned i;
+
+ switch (format) {
+ case PIPE_FORMAT_R8G8B8A8_UNORM:
+ case PIPE_FORMAT_B8G8R8A8_UNORM:
+ case PIPE_FORMAT_A8R8G8B8_UNORM:
+ case PIPE_FORMAT_A8B8G8R8_UNORM:
+ for (i = 0; i < ARRAY_SIZE(table_8888_unorm); i++) {
+ if (swizzle[0] == table_8888_unorm[i].swizzle[0] &&
+ swizzle[1] == table_8888_unorm[i].swizzle[1] &&
+ swizzle[2] == table_8888_unorm[i].swizzle[2] &&
+ swizzle[3] == table_8888_unorm[i].swizzle[3])
+ return table_8888_unorm[i].format;
+ }
+ break;
+
+ case PIPE_FORMAT_R10G10B10A2_UNORM:
+ case PIPE_FORMAT_B10G10R10A2_UNORM:
+ for (i = 0; i < ARRAY_SIZE(table_1010102_unorm); i++) {
+ if (swizzle[0] == table_1010102_unorm[i].swizzle[0] &&
+ swizzle[1] == table_1010102_unorm[i].swizzle[1] &&
+ swizzle[2] == table_1010102_unorm[i].swizzle[2] &&
+ swizzle[3] == table_1010102_unorm[i].swizzle[3])
+ return table_1010102_unorm[i].format;
+ }
+ break;
+
+ case PIPE_FORMAT_R10G10B10A2_SNORM:
+ case PIPE_FORMAT_B10G10R10A2_SNORM:
+ for (i = 0; i < ARRAY_SIZE(table_1010102_snorm); i++) {
+ if (swizzle[0] == table_1010102_snorm[i].swizzle[0] &&
+ swizzle[1] == table_1010102_snorm[i].swizzle[1] &&
+ swizzle[2] == table_1010102_snorm[i].swizzle[2] &&
+ swizzle[3] == table_1010102_snorm[i].swizzle[3])
+ return table_1010102_snorm[i].format;
+ }
+ break;
+
+ case PIPE_FORMAT_R10G10B10A2_UINT:
+ case PIPE_FORMAT_B10G10R10A2_UINT:
+ for (i = 0; i < ARRAY_SIZE(table_1010102_uint); i++) {
+ if (swizzle[0] == table_1010102_uint[i].swizzle[0] &&
+ swizzle[1] == table_1010102_uint[i].swizzle[1] &&
+ swizzle[2] == table_1010102_uint[i].swizzle[2] &&
+ swizzle[3] == table_1010102_uint[i].swizzle[3])
+ return table_1010102_uint[i].format;
+ }
+ break;
+
+ default:
+ break;
+ }
+
+ return PIPE_FORMAT_NONE;
+}
+
+static bool
+reinterpret_formats(enum pipe_format *src_format, enum pipe_format *dst_format)
+{
+ enum pipe_format src = *src_format;
+ enum pipe_format dst = *dst_format;
+
+ /* Note: dst_format has already been transformed from luminance/intensity
+ * to red when this function is called. The source format will never
+ * be an intensity format, because GL_INTENSITY is not a legal value
+ * for the format parameter in glTex(Sub)Image(). */
+
+ if (format_is_alpha(src)) {
+ if (!format_is_alpha(dst))
+ return false;
+
+ src = alpha_to_red(src);
+ dst = alpha_to_red(dst);
+ } else if (format_is_luminance(src)) {
+ if (!format_is_red(dst) && !format_is_red_alpha(dst))
+ return false;
+
+ src = util_format_luminance_to_red(src);
+ } else if (util_format_is_luminance_alpha(src)) {
+ src = luminance_alpha_to_red_green(src);
+
+ if (format_is_red_alpha(dst)) {
+ dst = red_alpha_to_red_green(dst);
+ } else if (!format_is_red(dst))
+ return false;
+ } else if (format_is_swizzled_rgba(src)) {
+ const struct util_format_description *src_desc = util_format_description(src);
+ const struct util_format_description *dst_desc = util_format_description(dst);
+ int swizzle[4];
+ unsigned i;
+
+ /* Make sure the format is an RGBA and not an RGBX format */
+ if (src_desc->nr_channels != 4 || src_desc->swizzle[3] == UTIL_FORMAT_SWIZZLE_1)
+ return false;
+
+ if (dst_desc->nr_channels != 4 || dst_desc->swizzle[3] == UTIL_FORMAT_SWIZZLE_1)
+ return false;
+
+ for (i = 0; i < 4; i++)
+ swizzle[i] = dst_desc->swizzle[src_desc->swizzle[i]];
+
+ dst = swizzle_format(dst, swizzle);
+ if (dst == PIPE_FORMAT_NONE)
+ return false;
+
+ src = unswizzle_format(src);
+ }
+
+ *src_format = src;
+ *dst_format = dst;
+ return true;
+}
+
+static void *
+create_pbo_upload_vs(struct st_context *st)
+{
+ struct ureg_program *ureg;
+ struct ureg_src in_pos;
+ struct ureg_src in_instanceid;
+ struct ureg_dst out_pos;
+ struct ureg_dst out_layer;
+
+ ureg = ureg_create(TGSI_PROCESSOR_VERTEX);
+
+ in_pos = ureg_DECL_vs_input(ureg, TGSI_SEMANTIC_POSITION);
+
+ out_pos = ureg_DECL_output(ureg, TGSI_SEMANTIC_POSITION, 0);
+
+ if (st->pbo_upload.upload_layers) {
+ in_instanceid = ureg_DECL_system_value(ureg, TGSI_SEMANTIC_INSTANCEID, 0);
+
+ if (!st->pbo_upload.use_gs)
+ out_layer = ureg_DECL_output(ureg, TGSI_SEMANTIC_LAYER, 0);
+ }
+
+ /* out_pos = in_pos */
+ ureg_MOV(ureg, out_pos, in_pos);
+
+ if (st->pbo_upload.upload_layers) {
+ if (st->pbo_upload.use_gs) {
+ /* out_pos.z = i2f(gl_InstanceID) */
+ ureg_I2F(ureg, ureg_writemask(out_pos, TGSI_WRITEMASK_Z),
+ ureg_scalar(in_instanceid, TGSI_SWIZZLE_X));
+ } else {
+ /* out_layer = gl_InstanceID */
+ ureg_MOV(ureg, out_layer, in_instanceid);
+ }
+ }
+
+ ureg_END(ureg);
+
+ return ureg_create_shader_and_destroy(ureg, st->pipe);
+}
+
+static void *
+create_pbo_upload_gs(struct st_context *st)
+{
+ static const int zero = 0;
+ struct ureg_program *ureg;
+ struct ureg_dst out_pos;
+ struct ureg_dst out_layer;
+ struct ureg_src in_pos;
+ struct ureg_src imm;
+ unsigned i;
+
+ ureg = ureg_create(TGSI_PROCESSOR_GEOMETRY);
+ if (!ureg)
+ return NULL;
+
+ ureg_property(ureg, TGSI_PROPERTY_GS_INPUT_PRIM, PIPE_PRIM_TRIANGLES);
+ ureg_property(ureg, TGSI_PROPERTY_GS_OUTPUT_PRIM, PIPE_PRIM_TRIANGLE_STRIP);
+ ureg_property(ureg, TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES, 3);
+
+ out_pos = ureg_DECL_output(ureg, TGSI_SEMANTIC_POSITION, 0);
+ out_layer = ureg_DECL_output(ureg, TGSI_SEMANTIC_LAYER, 0);
+
+ in_pos = ureg_DECL_input(ureg, TGSI_SEMANTIC_POSITION, 0, 0, 1);
+
+ imm = ureg_DECL_immediate_int(ureg, &zero, 1);
+
+ for (i = 0; i < 3; ++i) {
+ struct ureg_src in_pos_vertex = ureg_src_dimension(in_pos, i);
+
+ /* out_pos = in_pos[i] */
+ ureg_MOV(ureg, out_pos, in_pos_vertex);
+
+ /* out_layer.x = f2i(in_pos[i].z) */
+ ureg_F2I(ureg, ureg_writemask(out_layer, TGSI_WRITEMASK_X),
+ ureg_scalar(in_pos_vertex, TGSI_SWIZZLE_Z));
+
+ ureg_EMIT(ureg, ureg_scalar(imm, TGSI_SWIZZLE_X));
+ }
+
+ ureg_END(ureg);
+
+ return ureg_create_shader_and_destroy(ureg, st->pipe);
+}
+
+static void *
+create_pbo_upload_fs(struct st_context *st)
+{
+ struct pipe_context *pipe = st->pipe;
+ struct pipe_screen *screen = pipe->screen;
+ struct ureg_program *ureg;
+ struct ureg_dst out;
+ struct ureg_src sampler;
+ struct ureg_src pos;
+ struct ureg_src layer;
+ struct ureg_src const0;
+ struct ureg_dst temp0;
+
+ ureg = ureg_create(TGSI_PROCESSOR_FRAGMENT);
+ if (!ureg)
+ return NULL;
+
+ out = ureg_DECL_output(ureg, TGSI_SEMANTIC_COLOR, 0);
+ sampler = ureg_DECL_sampler(ureg, 0);
+ if (screen->get_param(screen, PIPE_CAP_TGSI_FS_POSITION_IS_SYSVAL)) {
+ pos = ureg_DECL_system_value(ureg, TGSI_SEMANTIC_POSITION, 0);
+ } else {
+ pos = ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_POSITION, 0,
+ TGSI_INTERPOLATE_LINEAR);
+ }
+ if (st->pbo_upload.upload_layers) {
+ layer = ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_LAYER, 0,
+ TGSI_INTERPOLATE_CONSTANT);
+ }
+ const0 = ureg_DECL_constant(ureg, 0);
+ temp0 = ureg_DECL_temporary(ureg);
+
+ /* Note: const0 = [ -xoffset + skip_pixels, -yoffset, stride, image_height ] */
+
+ /* temp0.xy = f2i(temp0.xy) */
+ ureg_F2I(ureg, ureg_writemask(temp0, TGSI_WRITEMASK_XY),
+ ureg_swizzle(pos,
+ TGSI_SWIZZLE_X, TGSI_SWIZZLE_Y,
+ TGSI_SWIZZLE_Y, TGSI_SWIZZLE_Y));
+
+ /* temp0.xy = temp0.xy + const0.xy */
+ ureg_UADD(ureg, ureg_writemask(temp0, TGSI_WRITEMASK_XY),
+ ureg_swizzle(ureg_src(temp0),
+ TGSI_SWIZZLE_X, TGSI_SWIZZLE_Y,
+ TGSI_SWIZZLE_Y, TGSI_SWIZZLE_Y),
+ ureg_swizzle(const0,
+ TGSI_SWIZZLE_X, TGSI_SWIZZLE_Y,
+ TGSI_SWIZZLE_Y, TGSI_SWIZZLE_Y));
+
+ /* temp0.x = const0.z * temp0.y + temp0.x */
+ ureg_UMAD(ureg, ureg_writemask(temp0, TGSI_WRITEMASK_X),
+ ureg_scalar(const0, TGSI_SWIZZLE_Z),
+ ureg_scalar(ureg_src(temp0), TGSI_SWIZZLE_Y),
+ ureg_scalar(ureg_src(temp0), TGSI_SWIZZLE_X));
+
+ if (st->pbo_upload.upload_layers) {
+ /* temp0.x = const0.w * layer + temp0.x */
+ ureg_UMAD(ureg, ureg_writemask(temp0, TGSI_WRITEMASK_X),
+ ureg_scalar(const0, TGSI_SWIZZLE_W),
+ ureg_scalar(layer, TGSI_SWIZZLE_X),
+ ureg_scalar(ureg_src(temp0), TGSI_SWIZZLE_X));
+ }
+
+ /* out = txf(sampler, temp0.x) */
+ ureg_TXF(ureg, out, TGSI_TEXTURE_BUFFER,
+ ureg_scalar(ureg_src(temp0), TGSI_SWIZZLE_X),
+ sampler);
+
+ ureg_release_temporary(ureg, temp0);
+
+ ureg_END(ureg);
+
+ return ureg_create_shader_and_destroy(ureg, pipe);
+}
+
+static bool
+try_pbo_upload_common(struct gl_context *ctx,
+ struct pipe_surface *surface,
+ int xoffset, int yoffset,
+ unsigned upload_width, unsigned upload_height,
+ struct pipe_resource *buffer,
+ enum pipe_format src_format,
+ intptr_t buf_offset,
+ unsigned bytes_per_pixel,
+ unsigned stride,
+ unsigned image_height)
+{
+ struct st_context *st = st_context(ctx);
+ struct pipe_context *pipe = st->pipe;
+ unsigned depth = surface->u.tex.last_layer - surface->u.tex.first_layer + 1;
+ unsigned skip_pixels = 0;
+ bool success = false;
+
+ /* Check alignment. */
+ {
+ unsigned ofs = (buf_offset * bytes_per_pixel) % ctx->Const.TextureBufferOffsetAlignment;
+ if (ofs != 0) {
+ if (ofs % bytes_per_pixel != 0)
+ return false;
+
+ skip_pixels = ofs / bytes_per_pixel;
+ buf_offset -= skip_pixels;
+ }
+ }
+
+ /* Create the shaders */
+ if (!st->pbo_upload.vs) {
+ st->pbo_upload.vs = create_pbo_upload_vs(st);
+ if (!st->pbo_upload.vs)
+ return false;
+ }
+
+ if (depth != 1 && st->pbo_upload.use_gs && !st->pbo_upload.gs) {
+ st->pbo_upload.gs = create_pbo_upload_gs(st);
+ if (!st->pbo_upload.gs)
+ return false;
+ }
+
+ if (!st->pbo_upload.fs) {
+ st->pbo_upload.fs = create_pbo_upload_fs(st);
+ if (!st->pbo_upload.fs)
+ return false;
+ }
+
+ /* Set up the sampler_view */
+ {
+ unsigned first_element = buf_offset;
+ unsigned last_element = buf_offset + skip_pixels + upload_width - 1
+ + (upload_height - 1 + (depth - 1) * image_height) * stride;
+ struct pipe_sampler_view templ;
+ struct pipe_sampler_view *sampler_view;
+
+ /* This should be ensured by Mesa before calling our callbacks */
+ assert((last_element + 1) * bytes_per_pixel <= buffer->width0);
+
+ if (last_element - first_element > ctx->Const.MaxTextureBufferSize - 1)
+ return false;
+
+ memset(&templ, 0, sizeof(templ));
+ templ.format = src_format;
+ templ.u.buf.first_element = first_element;
+ templ.u.buf.last_element = last_element;
+ templ.swizzle_r = PIPE_SWIZZLE_RED;
+ templ.swizzle_g = PIPE_SWIZZLE_GREEN;
+ templ.swizzle_b = PIPE_SWIZZLE_BLUE;
+ templ.swizzle_a = PIPE_SWIZZLE_ALPHA;
+
+ sampler_view = pipe->create_sampler_view(pipe, buffer, &templ);
+ if (sampler_view == NULL)
+ return false;
+
+ cso_save_fragment_sampler_views(st->cso_context);
+ cso_set_sampler_views(st->cso_context, PIPE_SHADER_FRAGMENT, 1,
+ &sampler_view);
+
+ pipe_sampler_view_reference(&sampler_view, NULL);
+ }
+
+ /* Upload vertices */
+ {
+ struct pipe_vertex_buffer vbo;
+ struct pipe_vertex_element velem;
+
+ float x0 = (float) xoffset / surface->width * 2.0f - 1.0f;
+ float y0 = (float) yoffset / surface->height * 2.0f - 1.0f;
+ float x1 = (float) (xoffset + upload_width) / surface->width * 2.0f - 1.0f;
+ float y1 = (float) (yoffset + upload_height) / surface->height * 2.0f - 1.0f;
+
+ float *verts = NULL;
+
+ vbo.user_buffer = NULL;
+ vbo.buffer = NULL;
+ vbo.stride = 2 * sizeof(float);
+
+ u_upload_alloc(st->uploader, 0, 8 * sizeof(float), 4,
+ &vbo.buffer_offset, &vbo.buffer, (void **) &verts);
+ if (!verts)
+ goto fail_vertex_upload;
+
+ verts[0] = x0;
+ verts[1] = y0;
+ verts[2] = x0;
+ verts[3] = y1;
+ verts[4] = x1;
+ verts[5] = y0;
+ verts[6] = x1;
+ verts[7] = y1;
+
+ u_upload_unmap(st->uploader);
+
+ velem.src_offset = 0;
+ velem.instance_divisor = 0;
+ velem.vertex_buffer_index = cso_get_aux_vertex_buffer_slot(st->cso_context);
+ velem.src_format = PIPE_FORMAT_R32G32_FLOAT;
+
+ cso_save_vertex_elements(st->cso_context);
+ cso_set_vertex_elements(st->cso_context, 1, &velem);
+
+ cso_save_aux_vertex_buffer_slot(st->cso_context);
+ cso_set_vertex_buffers(st->cso_context, velem.vertex_buffer_index,
+ 1, &vbo);
+
+ pipe_resource_reference(&vbo.buffer, NULL);
+ }
+
+ /* Upload constants */
+ {
+ struct pipe_constant_buffer cb;
+
+ struct {
+ int32_t xoffset;
+ int32_t yoffset;
+ int32_t stride;
+ int32_t image_size;
+ } constants;
+
+ constants.xoffset = -xoffset + skip_pixels;
+ constants.yoffset = -yoffset;
+ constants.stride = stride;
+ constants.image_size = stride * image_height;
+
+ if (st->constbuf_uploader) {
+ cb.buffer = NULL;
+ cb.user_buffer = NULL;
+ u_upload_data(st->constbuf_uploader, 0, sizeof(constants),
+ st->ctx->Const.UniformBufferOffsetAlignment,
+ &constants, &cb.buffer_offset, &cb.buffer);
+ if (!cb.buffer)
+ goto fail_constant_upload;
+
+ u_upload_unmap(st->constbuf_uploader);
+ } else {
+ cb.buffer = NULL;
+ cb.user_buffer = &constants;
+ cb.buffer_offset = 0;
+ }
+ cb.buffer_size = sizeof(constants);
+
+ cso_save_constant_buffer_slot0(st->cso_context, PIPE_SHADER_FRAGMENT);
+ cso_set_constant_buffer(st->cso_context, PIPE_SHADER_FRAGMENT, 0, &cb);
+
+ pipe_resource_reference(&cb.buffer, NULL);
+ }
+
+ /* Framebuffer_state */
+ {
+ struct pipe_framebuffer_state fb;
+ memset(&fb, 0, sizeof(fb));
+ fb.width = surface->width;
+ fb.height = surface->height;
+ fb.nr_cbufs = 1;
+ pipe_surface_reference(&fb.cbufs[0], surface);
+
+ cso_save_framebuffer(st->cso_context);
+ cso_set_framebuffer(st->cso_context, &fb);
+
+ pipe_surface_reference(&fb.cbufs[0], NULL);
+ }
+
+ /* Viewport state */
+ {
+ struct pipe_viewport_state vp;
+ vp.scale[0] = 0.5f * surface->width;
+ vp.scale[1] = 0.5f * surface->height;
+ vp.scale[2] = 1.0f;
+ vp.translate[0] = 0.5f * surface->width;
+ vp.translate[1] = 0.5f * surface->height;
+ vp.translate[2] = 0.0f;
+
+ cso_save_viewport(st->cso_context);
+ cso_set_viewport(st->cso_context, &vp);
+ }
+
+ /* Blend state */
+ cso_save_blend(st->cso_context);
+ cso_set_blend(st->cso_context, &st->pbo_upload.blend);
+
+ /* Rasterizer state */
+ cso_save_rasterizer(st->cso_context);
+ cso_set_rasterizer(st->cso_context, &st->pbo_upload.raster);
+
+ /* Set up the shaders */
+ cso_save_vertex_shader(st->cso_context);
+ cso_set_vertex_shader_handle(st->cso_context, st->pbo_upload.vs);
+
+ cso_save_geometry_shader(st->cso_context);
+ cso_set_geometry_shader_handle(st->cso_context,
+ depth != 1 ? st->pbo_upload.gs : NULL);
+
+ cso_save_tessctrl_shader(st->cso_context);
+ cso_set_tessctrl_shader_handle(st->cso_context, NULL);
+
+ cso_save_tesseval_shader(st->cso_context);
+ cso_set_tesseval_shader_handle(st->cso_context, NULL);
+
+ cso_save_fragment_shader(st->cso_context);
+ cso_set_fragment_shader_handle(st->cso_context, st->pbo_upload.fs);
+
+ /* Disable stream output */
+ cso_save_stream_outputs(st->cso_context);
+ cso_set_stream_outputs(st->cso_context, 0, NULL, 0);
+
+ if (depth == 1) {
+ cso_draw_arrays(st->cso_context, PIPE_PRIM_TRIANGLE_STRIP, 0, 4);
+ } else {
+ cso_draw_arrays_instanced(st->cso_context, PIPE_PRIM_TRIANGLE_STRIP,
+ 0, 4, 0, depth);
+ }
+
+ success = true;
+
+ cso_restore_framebuffer(st->cso_context);
+ cso_restore_viewport(st->cso_context);
+ cso_restore_blend(st->cso_context);
+ cso_restore_rasterizer(st->cso_context);
+ cso_restore_vertex_shader(st->cso_context);
+ cso_restore_geometry_shader(st->cso_context);
+ cso_restore_tessctrl_shader(st->cso_context);
+ cso_restore_tesseval_shader(st->cso_context);
+ cso_restore_fragment_shader(st->cso_context);
+ cso_restore_stream_outputs(st->cso_context);
+ cso_restore_constant_buffer_slot0(st->cso_context, PIPE_SHADER_FRAGMENT);
+fail_constant_upload:
+ cso_restore_vertex_elements(st->cso_context);
+ cso_restore_aux_vertex_buffer_slot(st->cso_context);
+fail_vertex_upload:
+ cso_restore_fragment_sampler_views(st->cso_context);
+
+ return success;
+}
+
+static bool
+try_pbo_upload(struct gl_context *ctx, GLuint dims,
+ struct gl_texture_image *texImage,
+ GLenum format, GLenum type,
+ enum pipe_format dst_format,
+ GLint xoffset, GLint yoffset, GLint zoffset,
+ GLint width, GLint height, GLint depth,
+ const void *pixels,
+ const struct gl_pixelstore_attrib *unpack)
+{
+ struct st_context *st = st_context(ctx);
+ struct st_texture_image *stImage = st_texture_image(texImage);
+ struct st_texture_object *stObj = st_texture_object(texImage->TexObject);
+ struct pipe_resource *texture = stImage->pt;
+ struct pipe_context *pipe = st->pipe;
+ struct pipe_screen *screen = pipe->screen;
+ struct pipe_surface *surface = NULL;
+ enum pipe_format src_format;
+ const struct util_format_description *desc;
+ GLenum gl_target = texImage->TexObject->Target;
+ intptr_t buf_offset;
+ unsigned bytes_per_pixel;
+ unsigned stride, image_height;
+ bool success;
+
+ if (!st->pbo_upload.enabled)
+ return false;
+
+ /* From now on, we need the gallium representation of dimensions. */
+ if (gl_target == GL_TEXTURE_1D_ARRAY) {
+ depth = height;
+ height = 1;
+ zoffset = yoffset;
+ yoffset = 0;
+ image_height = 1;
+ } else {
+ image_height = unpack->ImageHeight > 0 ? unpack->ImageHeight : height;
+ }
+
+ if (depth != 1 && !st->pbo_upload.upload_layers)
+ return false;
+
+ /* Choose the source format. Initially, we do so without checking driver
+ * support at all because of the remapping we later perform and because
+ * at least the Radeon driver actually supports some formats for texture
+ * buffers which it doesn't support for regular textures. */
+ src_format = st_choose_matching_format(st, 0, format, type, unpack->SwapBytes);
+ if (!src_format) {
+ return false;
+ }
+
+ src_format = util_format_linear(src_format);
+ desc = util_format_description(src_format);
+
+ if (desc->layout != UTIL_FORMAT_LAYOUT_PLAIN)
+ return false;
+
+ if (desc->colorspace != UTIL_FORMAT_COLORSPACE_RGB)
+ return false;
+
+ if (st->pbo_upload.rgba_only) {
+ enum pipe_format orig_dst_format = dst_format;
+
+ if (!reinterpret_formats(&src_format, &dst_format)) {
+ return false;
+ }
+
+ if (dst_format != orig_dst_format &&
+ !screen->is_format_supported(screen, dst_format, PIPE_TEXTURE_2D, 0,
+ PIPE_BIND_RENDER_TARGET)) {
+ return false;
+ }
+ }
+
+ if (!src_format ||
+ !screen->is_format_supported(screen, src_format, PIPE_BUFFER, 0,
+ PIPE_BIND_SAMPLER_VIEW)) {
+ return false;
+ }
+
+ /* Check if the offset satisfies the alignment requirements */
+ buf_offset = (intptr_t) pixels;
+ bytes_per_pixel = desc->block.bits / 8;
+
+ if (buf_offset % bytes_per_pixel) {
+ return false;
+ }
+
+ /* Convert to texels */
+ buf_offset = buf_offset / bytes_per_pixel;
+
+ /* Compute the stride, taking unpack->Alignment into account */
+ {
+ unsigned pixels_per_row = unpack->RowLength > 0 ?
+ unpack->RowLength : width;
+ unsigned bytes_per_row = pixels_per_row * bytes_per_pixel;
+ unsigned remainder = bytes_per_row % unpack->Alignment;
+ unsigned offset_rows;
+
+ if (remainder > 0)
+ bytes_per_row += (unpack->Alignment - remainder);
+
+ if (bytes_per_row % bytes_per_pixel) {
+ return false;
+ }
+
+ stride = bytes_per_row / bytes_per_pixel;
+
+ offset_rows = unpack->SkipRows;
+ if (dims == 3)
+ offset_rows += image_height * unpack->SkipImages;
+
+ buf_offset += unpack->SkipPixels + stride * offset_rows;
+ }
+
+ /* Set up the surface */
+ {
+ unsigned level = stObj->pt != stImage->pt ? 0 : texImage->TexObject->MinLevel + texImage->Level;
+ unsigned max_layer = util_max_layer(texture, level);
+
+ zoffset += texImage->Face + texImage->TexObject->MinLayer;
+
+ struct pipe_surface templ;
+ memset(&templ, 0, sizeof(templ));
+ templ.format = dst_format;
+ templ.u.tex.level = level;
+ templ.u.tex.first_layer = MIN2(zoffset, max_layer);
+ templ.u.tex.last_layer = MIN2(zoffset + depth - 1, max_layer);
+
+ surface = pipe->create_surface(pipe, texture, &templ);
+ if (!surface)
+ return false;
+ }
+
+ success = try_pbo_upload_common(ctx, surface,
+ xoffset, yoffset, width, height,
+ st_buffer_object(unpack->BufferObj)->buffer,
+ src_format,
+ buf_offset,
+ bytes_per_pixel, stride, image_height);
+
+ pipe_surface_reference(&surface, NULL);
+
+ return success;
+}
static void
st_TexSubImage(struct gl_context *ctx, GLuint dims,
@@ -735,21 +1732,15 @@ st_TexSubImage(struct gl_context *ctx, GLuint dims,
goto fallback;
}
- /* See if the texture format already matches the format and type,
- * in which case the memcpy-based fast path will likely be used and
- * we don't have to blit. */
- if (_mesa_format_matches_format_and_type(texImage->TexFormat, format,
- type, unpack->SwapBytes, NULL)) {
- goto fallback;
- }
+ /* See if the destination format is supported. */
if (format == GL_DEPTH_COMPONENT || format == GL_DEPTH_STENCIL)
bind = PIPE_BIND_DEPTH_STENCIL;
else
bind = PIPE_BIND_RENDER_TARGET;
- /* See if the destination format is supported.
- * For luminance and intensity, only the red channel is stored there. */
+ /* For luminance and intensity, only the red channel is stored
+ * in the destination. */
dst_format = util_format_linear(dst->format);
dst_format = util_format_luminance_to_red(dst_format);
dst_format = util_format_intensity_to_red(dst_format);
@@ -760,6 +1751,21 @@ st_TexSubImage(struct gl_context *ctx, GLuint dims,
goto fallback;
}
+ if (_mesa_is_bufferobj(unpack->BufferObj)) {
+ if (try_pbo_upload(ctx, dims, texImage, format, type, dst_format,
+ xoffset, yoffset, zoffset,
+ width, height, depth, pixels, unpack))
+ return;
+ }
+
+ /* See if the texture format already matches the format and type,
+ * in which case the memcpy-based fast path will likely be used and
+ * we don't have to blit. */
+ if (_mesa_format_matches_format_and_type(texImage->TexFormat, format,
+ type, unpack->SwapBytes, NULL)) {
+ goto fallback;
+ }
+
/* Choose the source format. */
src_format = st_choose_matching_format(st, PIPE_BIND_SAMPLER_VIEW,
format, type, unpack->SwapBytes);
@@ -849,18 +1855,18 @@ st_TexSubImage(struct gl_context *ctx, GLuint dims,
/* 1D array textures.
* We need to convert gallium coords to GL coords.
*/
- GLvoid *src = _mesa_image_address3d(unpack, pixels,
+ GLvoid *src = _mesa_image_address2d(unpack, pixels,
width, depth, format,
- type, 0, slice, 0);
+ type, slice, 0);
memcpy(map, src, bytesPerRow);
}
else {
ubyte *slice_map = map;
for (row = 0; row < (unsigned) height; row++) {
- GLvoid *src = _mesa_image_address3d(unpack, pixels,
- width, height, format,
- type, slice, row, 0);
+ GLvoid *src = _mesa_image_address(dims, unpack, pixels,
+ width, height, format,
+ type, slice, row, 0);
memcpy(slice_map, src, bytesPerRow);
slice_map += transfer->stride;
}
@@ -928,12 +1934,165 @@ st_TexImage(struct gl_context * ctx, GLuint dims,
static void
+st_CompressedTexSubImage(struct gl_context *ctx, GLuint dims,
+ struct gl_texture_image *texImage,
+ GLint x, GLint y, GLint z,
+ GLsizei w, GLsizei h, GLsizei d,
+ GLenum format, GLsizei imageSize, const GLvoid *data)
+{
+ struct st_context *st = st_context(ctx);
+ struct st_texture_image *stImage = st_texture_image(texImage);
+ struct st_texture_object *stObj = st_texture_object(texImage->TexObject);
+ struct pipe_resource *texture = stImage->pt;
+ struct pipe_context *pipe = st->pipe;
+ struct pipe_screen *screen = pipe->screen;
+ struct pipe_resource *dst = stImage->pt;
+ struct pipe_surface *surface = NULL;
+ struct compressed_pixelstore store;
+ enum pipe_format copy_format;
+ unsigned bytes_per_block;
+ unsigned bw, bh;
+ intptr_t buf_offset;
+ bool success = false;
+
+ /* Check basic pre-conditions for PBO upload */
+ if (!st->prefer_blit_based_texture_transfer) {
+ goto fallback;
+ }
+
+ if (!_mesa_is_bufferobj(ctx->Unpack.BufferObj))
+ goto fallback;
+
+ if ((_mesa_is_format_etc2(texImage->TexFormat) && !st->has_etc2) ||
+ (texImage->TexFormat == MESA_FORMAT_ETC1_RGB8 && !st->has_etc1)) {
+ /* ETC isn't supported and is represented by uncompressed formats. */
+ goto fallback;
+ }
+
+ if (!dst) {
+ goto fallback;
+ }
+
+ if (!st->pbo_upload.enabled ||
+ !screen->get_param(screen, PIPE_CAP_SURFACE_REINTERPRET_BLOCKS)) {
+ goto fallback;
+ }
+
+ /* Choose the pipe format for the upload. */
+ bytes_per_block = util_format_get_blocksize(dst->format);
+ bw = util_format_get_blockwidth(dst->format);
+ bh = util_format_get_blockheight(dst->format);
+
+ switch (bytes_per_block) {
+ case 8:
+ copy_format = PIPE_FORMAT_R16G16B16A16_UINT;
+ break;
+ case 16:
+ copy_format = PIPE_FORMAT_R32G32B32A32_UINT;
+ break;
+ default:
+ goto fallback;
+ }
+
+ if (!screen->is_format_supported(screen, copy_format, PIPE_BUFFER, 0,
+ PIPE_BIND_SAMPLER_VIEW)) {
+ goto fallback;
+ }
+
+ if (!screen->is_format_supported(screen, copy_format, dst->target,
+ dst->nr_samples, PIPE_BIND_RENDER_TARGET)) {
+ goto fallback;
+ }
+
+ /* Interpret the pixelstore settings. */
+ _mesa_compute_compressed_pixelstore(dims, texImage->TexFormat, w, h, d,
+ &ctx->Unpack, &store);
+ assert(store.CopyBytesPerRow % bytes_per_block == 0);
+ assert(store.SkipBytes % bytes_per_block == 0);
+
+ /* Compute the offset into the buffer */
+ buf_offset = (intptr_t)data + store.SkipBytes;
+
+ if (buf_offset % bytes_per_block) {
+ goto fallback;
+ }
+
+ buf_offset = buf_offset / bytes_per_block;
+
+ /* Set up the surface. */
+ {
+ unsigned level = stObj->pt != stImage->pt ? 0 : texImage->TexObject->MinLevel + texImage->Level;
+ unsigned max_layer = util_max_layer(texture, level);
+
+ z += texImage->Face + texImage->TexObject->MinLayer;
+
+ struct pipe_surface templ;
+ memset(&templ, 0, sizeof(templ));
+ templ.format = copy_format;
+ templ.u.tex.level = level;
+ templ.u.tex.first_layer = MIN2(z, max_layer);
+ templ.u.tex.last_layer = MIN2(z + d - 1, max_layer);
+
+ surface = pipe->create_surface(pipe, texture, &templ);
+ if (!surface)
+ goto fallback;
+ }
+
+ success = try_pbo_upload_common(ctx, surface,
+ x / bw, y / bh,
+ store.CopyBytesPerRow / bytes_per_block,
+ store.CopyRowsPerSlice,
+ st_buffer_object(ctx->Unpack.BufferObj)->buffer,
+ copy_format,
+ buf_offset,
+ bytes_per_block,
+ store.TotalBytesPerRow / bytes_per_block,
+ store.TotalRowsPerSlice);
+
+ pipe_surface_reference(&surface, NULL);
+
+ if (success)
+ return;
+
+fallback:
+ _mesa_store_compressed_texsubimage(ctx, dims, texImage,
+ x, y, z, w, h, d,
+ format, imageSize, data);
+}
+
+static void
st_CompressedTexImage(struct gl_context *ctx, GLuint dims,
struct gl_texture_image *texImage,
GLsizei imageSize, const GLvoid *data)
{
prep_teximage(ctx, texImage, GL_NONE, GL_NONE);
- _mesa_store_compressed_teximage(ctx, dims, texImage, imageSize, data);
+
+ /* only 2D and 3D compressed images are supported at this time */
+ if (dims == 1) {
+ _mesa_problem(ctx, "Unexpected glCompressedTexImage1D call");
+ return;
+ }
+
+ /* This is pretty simple, because unlike the general texstore path we don't
+ * have to worry about the usual image unpacking or image transfer
+ * operations.
+ */
+ assert(texImage);
+ assert(texImage->Width > 0);
+ assert(texImage->Height > 0);
+ assert(texImage->Depth > 0);
+
+ /* allocate storage for texture data */
+ if (!st_AllocTextureImageBuffer(ctx, texImage)) {
+ _mesa_error(ctx, GL_OUT_OF_MEMORY, "glCompressedTexImage%uD", dims);
+ return;
+ }
+
+ st_CompressedTexSubImage(ctx, dims, texImage,
+ 0, 0, 0,
+ texImage->Width, texImage->Height, texImage->Depth,
+ texImage->TexFormat,
+ imageSize, data);
}
@@ -1958,7 +3117,7 @@ st_init_texture_functions(struct dd_function_table *functions)
functions->QuerySamplesForFormat = st_QuerySamplesForFormat;
functions->TexImage = st_TexImage;
functions->TexSubImage = st_TexSubImage;
- functions->CompressedTexSubImage = _mesa_store_compressed_texsubimage;
+ functions->CompressedTexSubImage = st_CompressedTexSubImage;
functions->CopyTexSubImage = st_CopyTexSubImage;
functions->GenerateMipmap = st_generate_mipmap;
diff --git a/src/mesa/state_tracker/st_cb_texture.h b/src/mesa/state_tracker/st_cb_texture.h
index 1b685298c5f..55c86c401e2 100644
--- a/src/mesa/state_tracker/st_cb_texture.h
+++ b/src/mesa/state_tracker/st_cb_texture.h
@@ -53,5 +53,10 @@ st_finalize_texture(struct gl_context *ctx,
extern void
st_init_texture_functions(struct dd_function_table *functions);
+extern void
+st_init_pbo_upload(struct st_context *st);
+
+extern void
+st_destroy_pbo_upload(struct st_context *st);
#endif /* ST_CB_TEXTURE_H */
diff --git a/src/mesa/state_tracker/st_cb_texturebarrier.c b/src/mesa/state_tracker/st_cb_texturebarrier.c
index dd4dde74c86..2de150ba13a 100644
--- a/src/mesa/state_tracker/st_cb_texturebarrier.c
+++ b/src/mesa/state_tracker/st_cb_texturebarrier.c
@@ -65,6 +65,13 @@ st_MemoryBarrier(struct gl_context *ctx, GLbitfield barriers)
if (barriers & GL_CLIENT_MAPPED_BUFFER_BARRIER_BIT)
flags |= PIPE_BARRIER_MAPPED_BUFFER;
+ if (barriers & GL_ATOMIC_COUNTER_BARRIER_BIT)
+ flags |= PIPE_BARRIER_SHADER_BUFFER;
+ if (barriers & GL_SHADER_STORAGE_BARRIER_BIT)
+ flags |= PIPE_BARRIER_SHADER_BUFFER;
+
+ if (barriers & GL_QUERY_BUFFER_BARRIER_BIT)
+ flags |= PIPE_BARRIER_QUERY_BUFFER;
if (flags && pipe->memory_barrier)
pipe->memory_barrier(pipe, flags);
diff --git a/src/mesa/state_tracker/st_context.c b/src/mesa/state_tracker/st_context.c
index ce1e97aacb5..446ebfb563f 100644
--- a/src/mesa/state_tracker/st_context.c
+++ b/src/mesa/state_tracker/st_context.c
@@ -97,6 +97,30 @@ static void st_Enable(struct gl_context * ctx, GLenum cap, GLboolean state)
/**
+ * Called via ctx->Driver.QueryMemoryInfo()
+ */
+static void
+st_query_memory_info(struct gl_context *ctx, struct gl_memory_info *out)
+{
+ struct pipe_screen *screen = st_context(ctx)->pipe->screen;
+ struct pipe_memory_info info;
+
+ assert(screen->query_memory_info);
+ if (!screen->query_memory_info)
+ return;
+
+ screen->query_memory_info(screen, &info);
+
+ out->total_device_memory = info.total_device_memory;
+ out->avail_device_memory = info.avail_device_memory;
+ out->total_staging_memory = info.total_staging_memory;
+ out->avail_staging_memory = info.avail_staging_memory;
+ out->device_memory_evicted = info.device_memory_evicted;
+ out->nr_device_memory_evictions = info.nr_device_memory_evictions;
+}
+
+
+/**
* Called via ctx->Driver.UpdateState()
*/
void st_invalidate_state(struct gl_context * ctx, GLbitfield new_state)
@@ -136,6 +160,7 @@ st_destroy_context_priv(struct st_context *st)
st_destroy_drawpix(st);
st_destroy_drawtex(st);
st_destroy_perfmon(st);
+ st_destroy_pbo_upload(st);
for (shader = 0; shader < ARRAY_SIZE(st->state.sampler_views); shader++) {
for (i = 0; i < ARRAY_SIZE(st->state.sampler_views[0]); i++) {
@@ -209,6 +234,7 @@ st_create_context_priv( struct gl_context *ctx, struct pipe_context *pipe,
st_init_bitmap(st);
st_init_clear(st);
st_init_draw( st );
+ st_init_pbo_upload(st);
/* Choose texture target for glDrawPixels, glBitmap, renderbuffers */
if (pipe->screen->get_param(pipe->screen, PIPE_CAP_NPOT_TEXTURES))
@@ -350,6 +376,8 @@ static void st_init_driver_flags(struct gl_driver_flags *f)
f->NewUniformBuffer = ST_NEW_UNIFORM_BUFFER;
f->NewDefaultTessLevels = ST_NEW_TESS_STATE;
f->NewTextureBuffer = ST_NEW_SAMPLER_VIEWS;
+ f->NewAtomicBuffer = ST_NEW_ATOMIC_BUFFER;
+ f->NewShaderStorageBuffer = ST_NEW_STORAGE_BUFFER;
}
struct st_context *st_create_context(gl_api api, struct pipe_context *pipe,
@@ -487,4 +515,5 @@ void st_init_driver_functions(struct pipe_screen *screen,
functions->Enable = st_Enable;
functions->UpdateState = st_invalidate_state;
+ functions->QueryMemoryInfo = st_query_memory_info;
}
diff --git a/src/mesa/state_tracker/st_context.h b/src/mesa/state_tracker/st_context.h
index 9db5f11beb5..57076ad0d18 100644
--- a/src/mesa/state_tracker/st_context.h
+++ b/src/mesa/state_tracker/st_context.h
@@ -62,6 +62,8 @@ struct u_upload_mgr;
#define ST_NEW_TESSCTRL_PROGRAM (1 << 9)
#define ST_NEW_TESSEVAL_PROGRAM (1 << 10)
#define ST_NEW_SAMPLER_VIEWS (1 << 11)
+#define ST_NEW_ATOMIC_BUFFER (1 << 12)
+#define ST_NEW_STORAGE_BUFFER (1 << 13)
struct st_state_flags {
@@ -202,6 +204,19 @@ struct st_context
void *gs_layered;
} clear;
+ /* For gl(Compressed)Tex(Sub)Image */
+ struct {
+ struct pipe_rasterizer_state raster;
+ struct pipe_blend_state blend;
+ void *vs;
+ void *gs;
+ void *fs;
+ bool enabled;
+ bool rgba_only;
+ bool upload_layers;
+ bool use_gs;
+ } pbo_upload;
+
/** used for anything using util_draw_vertex_buffer */
struct pipe_vertex_element velems_util_draw[3];
diff --git a/src/mesa/state_tracker/st_extensions.c b/src/mesa/state_tracker/st_extensions.c
index 53ea6767395..f25bd742f79 100644
--- a/src/mesa/state_tracker/st_extensions.c
+++ b/src/mesa/state_tracker/st_extensions.c
@@ -218,6 +218,11 @@ void st_init_limits(struct pipe_screen *screen,
c->MaxUniformBlockSize / 4 *
pc->MaxUniformBlocks);
+ pc->MaxAtomicCounters = MAX_ATOMIC_COUNTERS;
+ pc->MaxAtomicBuffers = screen->get_shader_param(
+ screen, sh, PIPE_SHADER_CAP_MAX_SHADER_BUFFERS) / 2;
+ pc->MaxShaderStorageBlocks = pc->MaxAtomicBuffers;
+
/* Gallium doesn't really care about local vs. env parameters so use the
* same limits.
*/
@@ -333,6 +338,31 @@ void st_init_limits(struct pipe_screen *screen,
screen->get_param(screen, PIPE_CAP_TGSI_FS_POSITION_IS_SYSVAL);
c->GLSLFrontFacingIsSysVal =
screen->get_param(screen, PIPE_CAP_TGSI_FS_FACE_IS_INTEGER_SYSVAL);
+
+ c->MaxAtomicBufferBindings =
+ c->Program[MESA_SHADER_FRAGMENT].MaxAtomicBuffers;
+ c->MaxCombinedAtomicBuffers =
+ c->Program[MESA_SHADER_VERTEX].MaxAtomicBuffers +
+ c->Program[MESA_SHADER_TESS_CTRL].MaxAtomicBuffers +
+ c->Program[MESA_SHADER_TESS_EVAL].MaxAtomicBuffers +
+ c->Program[MESA_SHADER_GEOMETRY].MaxAtomicBuffers +
+ c->Program[MESA_SHADER_FRAGMENT].MaxAtomicBuffers;
+ assert(c->MaxCombinedAtomicBuffers <= MAX_COMBINED_ATOMIC_BUFFERS);
+
+ if (c->MaxCombinedAtomicBuffers > 0)
+ extensions->ARB_shader_atomic_counters = GL_TRUE;
+
+ c->MaxCombinedShaderOutputResources = c->MaxDrawBuffers;
+ c->ShaderStorageBufferOffsetAlignment =
+ screen->get_param(screen, PIPE_CAP_SHADER_BUFFER_OFFSET_ALIGNMENT);
+ if (c->ShaderStorageBufferOffsetAlignment) {
+ c->MaxCombinedShaderStorageBlocks = c->MaxShaderStorageBufferBindings =
+ c->MaxCombinedAtomicBuffers;
+ c->MaxCombinedShaderOutputResources +=
+ c->MaxCombinedShaderStorageBlocks;
+ c->MaxShaderStorageBlockSize = 1 << 27;
+ extensions->ARB_shader_storage_buffer_object = GL_TRUE;
+ }
}
@@ -465,6 +495,7 @@ void st_init_extensions(struct pipe_screen *screen,
{ o(ARB_occlusion_query2), PIPE_CAP_OCCLUSION_QUERY },
{ o(ARB_pipeline_statistics_query), PIPE_CAP_QUERY_PIPELINE_STATISTICS },
{ o(ARB_point_sprite), PIPE_CAP_POINT_SPRITE },
+ { o(ARB_query_buffer_object), PIPE_CAP_QUERY_BUFFER_OBJECT },
{ o(ARB_sample_shading), PIPE_CAP_SAMPLE_SHADING },
{ o(ARB_seamless_cube_map), PIPE_CAP_SEAMLESS_CUBE_MAP },
{ o(ARB_shader_draw_parameters), PIPE_CAP_DRAW_PARAMETERS },
@@ -496,12 +527,14 @@ void st_init_extensions(struct pipe_screen *screen,
{ o(EXT_transform_feedback), PIPE_CAP_MAX_STREAM_OUTPUT_BUFFERS },
{ o(AMD_pinned_memory), PIPE_CAP_RESOURCE_FROM_USER_MEMORY },
+ { o(ATI_meminfo), PIPE_CAP_QUERY_MEMORY_INFO },
{ o(AMD_seamless_cubemap_per_texture), PIPE_CAP_SEAMLESS_CUBE_MAP_PER_TEXTURE },
{ o(ATI_separate_stencil), PIPE_CAP_TWO_SIDED_STENCIL },
{ o(ATI_texture_mirror_once), PIPE_CAP_TEXTURE_MIRROR_CLAMP },
{ o(NV_conditional_render), PIPE_CAP_CONDITIONAL_RENDER },
{ o(NV_primitive_restart), PIPE_CAP_PRIMITIVE_RESTART },
{ o(NV_texture_barrier), PIPE_CAP_TEXTURE_BARRIER },
+ { o(NVX_gpu_memory_info), PIPE_CAP_QUERY_MEMORY_INFO },
/* GL_NV_point_sprite is not supported by gallium because we don't
* support the GL_POINT_SPRITE_R_MODE_NV option. */
diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
index cf91d39ff92..b8182de0be8 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -52,7 +52,6 @@
#include "st_mesa_to_tgsi.h"
-#define PROGRAM_IMMEDIATE PROGRAM_FILE_MAX
#define PROGRAM_ANY_CONST ((1 << PROGRAM_STATE_VAR) | \
(1 << PROGRAM_CONSTANT) | \
(1 << PROGRAM_UNIFORM))
@@ -267,6 +266,9 @@ public:
unsigned tex_offset_num_offset;
int dead_mask; /**< Used in dead code elimination */
+ st_src_reg buffer; /**< buffer register */
+ unsigned buffer_access; /**< buffer access type */
+
class function_entry *function; /* Set on TGSI_OPCODE_CAL or TGSI_OPCODE_BGNSUB */
const struct tgsi_opcode_info *info;
};
@@ -391,6 +393,7 @@ public:
int samplers_used;
glsl_base_type sampler_types[PIPE_MAX_SAMPLERS];
int sampler_targets[PIPE_MAX_SAMPLERS]; /**< One of TGSI_TEXTURE_* */
+ int buffers_used;
bool indirect_addr_consts;
int wpos_transform_const;
@@ -444,6 +447,10 @@ public:
virtual void visit(ir_barrier *);
/*@}*/
+ void visit_atomic_counter_intrinsic(ir_call *);
+ void visit_ssbo_intrinsic(ir_call *);
+ void visit_membar_intrinsic(ir_call *);
+
st_src_reg result;
/** List of variable_storage */
@@ -557,6 +564,28 @@ swizzle_for_size(int size)
return size_swizzles[size - 1];
}
+static bool
+is_resource_instruction(unsigned opcode)
+{
+ switch (opcode) {
+ case TGSI_OPCODE_RESQ:
+ case TGSI_OPCODE_LOAD:
+ case TGSI_OPCODE_ATOMUADD:
+ case TGSI_OPCODE_ATOMXCHG:
+ case TGSI_OPCODE_ATOMCAS:
+ case TGSI_OPCODE_ATOMAND:
+ case TGSI_OPCODE_ATOMOR:
+ case TGSI_OPCODE_ATOMXOR:
+ case TGSI_OPCODE_ATOMUMIN:
+ case TGSI_OPCODE_ATOMUMAX:
+ case TGSI_OPCODE_ATOMIMIN:
+ case TGSI_OPCODE_ATOMIMAX:
+ return true;
+ default:
+ return false;
+ }
+}
+
static unsigned
num_inst_dst_regs(const glsl_to_tgsi_instruction *op)
{
@@ -566,7 +595,8 @@ num_inst_dst_regs(const glsl_to_tgsi_instruction *op)
static unsigned
num_inst_src_regs(const glsl_to_tgsi_instruction *op)
{
- return op->info->is_tex ? op->info->num_src - 1 : op->info->num_src;
+ return op->info->is_tex || is_resource_instruction(op->op) ?
+ op->info->num_src - 1 : op->info->num_src;
}
glsl_to_tgsi_instruction *
@@ -661,8 +691,6 @@ glsl_to_tgsi_visitor::emit_asm(ir_instruction *ir, unsigned op,
}
}
- this->instructions.push_tail(inst);
-
/*
* This section contains the double processing.
* GLSL just represents doubles as single channel values,
@@ -698,7 +726,7 @@ glsl_to_tgsi_visitor::emit_asm(ir_instruction *ir, unsigned op,
int initial_src_swz[4], initial_src_idx[4];
int initial_dst_idx[2], initial_dst_writemask[2];
/* select the writemask for dst0 or dst1 */
- unsigned writemask = inst->dst[0].file == PROGRAM_UNDEFINED ? inst->dst[1].writemask : inst->dst[0].writemask;
+ unsigned writemask = inst->dst[1].file == PROGRAM_UNDEFINED ? inst->dst[0].writemask : inst->dst[1].writemask;
/* copy out the writemask, index and swizzles for all src/dsts. */
for (j = 0; j < 2; j++) {
@@ -715,10 +743,22 @@ glsl_to_tgsi_visitor::emit_asm(ir_instruction *ir, unsigned op,
* scan all the components in the dst writemask
* generate an instruction for each of them if required.
*/
+ st_src_reg addr;
while (writemask) {
int i = u_bit_scan(&writemask);
+ /* before emitting the instruction, see if we have to adjust store
+ * address */
+ if (i > 1 && inst->op == TGSI_OPCODE_STORE &&
+ addr.file == PROGRAM_UNDEFINED) {
+ /* We have to advance the buffer address by 16 */
+ addr = get_temp(glsl_type::uint_type);
+ emit_asm(ir, TGSI_OPCODE_UADD, st_dst_reg(addr),
+ inst->src[0], st_src_reg_for_int(16));
+ }
+
+
/* first time use previous instruction */
if (dinst == NULL) {
dinst = inst;
@@ -728,16 +768,21 @@ glsl_to_tgsi_visitor::emit_asm(ir_instruction *ir, unsigned op,
*dinst = *inst;
dinst->next = NULL;
dinst->prev = NULL;
- this->instructions.push_tail(dinst);
}
+ this->instructions.push_tail(dinst);
/* modify the destination if we are splitting */
for (j = 0; j < 2; j++) {
if (dst_is_double[j]) {
dinst->dst[j].writemask = (i & 1) ? WRITEMASK_ZW : WRITEMASK_XY;
dinst->dst[j].index = initial_dst_idx[j];
- if (i > 1)
+ if (i > 1) {
+ if (dinst->op == TGSI_OPCODE_STORE) {
+ dinst->src[0] = addr;
+ } else {
dinst->dst[j].index++;
+ }
+ }
} else {
/* if we aren't writing to a double, just get the bit of the initial writemask
for this channel */
@@ -773,6 +818,8 @@ glsl_to_tgsi_visitor::emit_asm(ir_instruction *ir, unsigned op,
}
}
inst = dinst;
+ } else {
+ this->instructions.push_tail(inst);
}
@@ -807,7 +854,9 @@ glsl_to_tgsi_visitor::get_opcode(ir_instruction *ir, unsigned op,
assert(src1.type != GLSL_TYPE_ARRAY);
assert(src1.type != GLSL_TYPE_STRUCT);
- if (src0.type == GLSL_TYPE_DOUBLE || src1.type == GLSL_TYPE_DOUBLE)
+ if (is_resource_instruction(op))
+ type = src1.type;
+ else if (src0.type == GLSL_TYPE_DOUBLE || src1.type == GLSL_TYPE_DOUBLE)
type = GLSL_TYPE_DOUBLE;
else if (src0.type == GLSL_TYPE_FLOAT || src1.type == GLSL_TYPE_FLOAT)
type = GLSL_TYPE_FLOAT;
@@ -891,6 +940,9 @@ glsl_to_tgsi_visitor::get_opcode(ir_instruction *ir, unsigned op,
case3fid(FLR, FLR, DFLR);
case3fid(ROUND, ROUND, DROUND);
+ case2iu(ATOMIMAX, ATOMUMAX);
+ case2iu(ATOMIMIN, ATOMUMIN);
+
default: break;
}
@@ -2170,6 +2222,22 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir)
emit_asm(ir, TGSI_OPCODE_UP2H, result_dst, op[0]);
break;
+ case ir_unop_get_buffer_size: {
+ ir_constant *const_offset = ir->operands[0]->as_constant();
+ st_src_reg buffer(
+ PROGRAM_BUFFER,
+ ctx->Const.Program[shader->Stage].MaxAtomicBuffers +
+ (const_offset ? const_offset->value.u[0] : 0),
+ GLSL_TYPE_UINT);
+ if (!const_offset) {
+ buffer.reladdr = ralloc(mem_ctx, st_src_reg);
+ memcpy(buffer.reladdr, &sampler_reladdr, sizeof(sampler_reladdr));
+ emit_arl(ir, sampler_reladdr, op[0]);
+ }
+ emit_asm(ir, TGSI_OPCODE_RESQ, result_dst)->buffer = buffer;
+ break;
+ }
+
case ir_unop_pack_snorm_2x16:
case ir_unop_pack_unorm_2x16:
case ir_unop_pack_snorm_4x8:
@@ -2190,10 +2258,6 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir)
*/
assert(!"Invalid ir opcode in glsl_to_tgsi_visitor::visit()");
break;
-
- case ir_unop_get_buffer_size:
- assert(!"Not implemented yet");
- break;
}
this->result = result_src;
@@ -3071,13 +3135,241 @@ glsl_to_tgsi_visitor::get_function_signature(ir_function_signature *sig)
}
void
+glsl_to_tgsi_visitor::visit_atomic_counter_intrinsic(ir_call *ir)
+{
+ const char *callee = ir->callee->function_name();
+ ir_dereference *deref = static_cast<ir_dereference *>(
+ ir->actual_parameters.get_head());
+ ir_variable *location = deref->variable_referenced();
+
+ st_src_reg buffer(
+ PROGRAM_BUFFER, location->data.binding, GLSL_TYPE_ATOMIC_UINT);
+
+ /* Calculate the surface offset */
+ st_src_reg offset;
+ ir_dereference_array *deref_array = deref->as_dereference_array();
+
+ if (deref_array) {
+ offset = get_temp(glsl_type::uint_type);
+
+ deref_array->array_index->accept(this);
+
+ emit_asm(ir, TGSI_OPCODE_MUL, st_dst_reg(offset),
+ this->result, st_src_reg_for_int(ATOMIC_COUNTER_SIZE));
+ emit_asm(ir, TGSI_OPCODE_ADD, st_dst_reg(offset),
+ offset, st_src_reg_for_int(location->data.offset));
+ } else {
+ offset = st_src_reg_for_int(location->data.offset);
+ }
+
+ ir->return_deref->accept(this);
+ st_dst_reg dst(this->result);
+ dst.writemask = WRITEMASK_X;
+
+ glsl_to_tgsi_instruction *inst;
+
+ if (!strcmp("__intrinsic_atomic_read", callee)) {
+ inst = emit_asm(ir, TGSI_OPCODE_LOAD, dst, offset);
+ inst->buffer = buffer;
+ } else if (!strcmp("__intrinsic_atomic_increment", callee)) {
+ inst = emit_asm(ir, TGSI_OPCODE_ATOMUADD, dst, offset,
+ st_src_reg_for_int(1));
+ inst->buffer = buffer;
+ } else if (!strcmp("__intrinsic_atomic_predecrement", callee)) {
+ inst = emit_asm(ir, TGSI_OPCODE_ATOMUADD, dst, offset,
+ st_src_reg_for_int(-1));
+ inst->buffer = buffer;
+ emit_asm(ir, TGSI_OPCODE_ADD, dst, this->result, st_src_reg_for_int(-1));
+ }
+}
+
+void
+glsl_to_tgsi_visitor::visit_ssbo_intrinsic(ir_call *ir)
+{
+ const char *callee = ir->callee->function_name();
+ exec_node *param = ir->actual_parameters.get_head();
+
+ ir_rvalue *block = ((ir_instruction *)param)->as_rvalue();
+
+ param = param->get_next();
+ ir_rvalue *offset = ((ir_instruction *)param)->as_rvalue();
+
+ ir_constant *const_block = block->as_constant();
+
+ st_src_reg buffer(
+ PROGRAM_BUFFER,
+ ctx->Const.Program[shader->Stage].MaxAtomicBuffers +
+ (const_block ? const_block->value.u[0] : 0),
+ GLSL_TYPE_UINT);
+
+ if (!const_block) {
+ block->accept(this);
+ emit_arl(ir, sampler_reladdr, this->result);
+ buffer.reladdr = ralloc(mem_ctx, st_src_reg);
+ memcpy(buffer.reladdr, &sampler_reladdr, sizeof(sampler_reladdr));
+ }
+
+ /* Calculate the surface offset */
+ offset->accept(this);
+ st_src_reg off = this->result;
+
+ st_dst_reg dst = undef_dst;
+ if (ir->return_deref) {
+ ir->return_deref->accept(this);
+ dst = st_dst_reg(this->result);
+ dst.writemask = (1 << ir->return_deref->type->vector_elements) - 1;
+ }
+
+ glsl_to_tgsi_instruction *inst;
+
+ if (!strcmp("__intrinsic_load_ssbo", callee)) {
+ inst = emit_asm(ir, TGSI_OPCODE_LOAD, dst, off);
+ if (dst.type == GLSL_TYPE_BOOL)
+ emit_asm(ir, TGSI_OPCODE_USNE, dst, st_src_reg(dst), st_src_reg_for_int(0));
+ } else if (!strcmp("__intrinsic_store_ssbo", callee)) {
+ param = param->get_next();
+ ir_rvalue *val = ((ir_instruction *)param)->as_rvalue();
+ val->accept(this);
+
+ param = param->get_next();
+ ir_constant *write_mask = ((ir_instruction *)param)->as_constant();
+ assert(write_mask);
+ dst.writemask = write_mask->value.u[0];
+
+ dst.type = this->result.type;
+ inst = emit_asm(ir, TGSI_OPCODE_STORE, dst, off, this->result);
+ } else {
+ param = param->get_next();
+ ir_rvalue *val = ((ir_instruction *)param)->as_rvalue();
+ val->accept(this);
+
+ st_src_reg data = this->result, data2 = undef_src;
+ unsigned opcode;
+ if (!strcmp("__intrinsic_atomic_add_ssbo", callee))
+ opcode = TGSI_OPCODE_ATOMUADD;
+ else if (!strcmp("__intrinsic_atomic_min_ssbo", callee))
+ opcode = TGSI_OPCODE_ATOMIMIN;
+ else if (!strcmp("__intrinsic_atomic_max_ssbo", callee))
+ opcode = TGSI_OPCODE_ATOMIMAX;
+ else if (!strcmp("__intrinsic_atomic_and_ssbo", callee))
+ opcode = TGSI_OPCODE_ATOMAND;
+ else if (!strcmp("__intrinsic_atomic_or_ssbo", callee))
+ opcode = TGSI_OPCODE_ATOMOR;
+ else if (!strcmp("__intrinsic_atomic_xor_ssbo", callee))
+ opcode = TGSI_OPCODE_ATOMXOR;
+ else if (!strcmp("__intrinsic_atomic_exchange_ssbo", callee))
+ opcode = TGSI_OPCODE_ATOMXCHG;
+ else if (!strcmp("__intrinsic_atomic_comp_swap_ssbo", callee)) {
+ opcode = TGSI_OPCODE_ATOMCAS;
+ param = param->get_next();
+ val = ((ir_instruction *)param)->as_rvalue();
+ val->accept(this);
+ data2 = this->result;
+ } else {
+ assert(!"Unexpected intrinsic");
+ return;
+ }
+
+ inst = emit_asm(ir, opcode, dst, off, data, data2);
+ }
+
+ param = param->get_next();
+ ir_constant *access = NULL;
+ if (!param->is_tail_sentinel()) {
+ access = ((ir_instruction *)param)->as_constant();
+ assert(access);
+ }
+
+ /* The emit_asm() might have actually split the op into pieces, e.g. for
+ * double stores. We have to go back and fix up all the generated ops.
+ */
+ unsigned op = inst->op;
+ do {
+ inst->buffer = buffer;
+ if (access)
+ inst->buffer_access = access->value.u[0];
+ inst = (glsl_to_tgsi_instruction *)inst->get_prev();
+ if (inst->op == TGSI_OPCODE_UADD)
+ inst = (glsl_to_tgsi_instruction *)inst->get_prev();
+ } while (inst && inst->buffer.file == PROGRAM_UNDEFINED && inst->op == op);
+}
+
+void
+glsl_to_tgsi_visitor::visit_membar_intrinsic(ir_call *ir)
+{
+ const char *callee = ir->callee->function_name();
+
+ if (!strcmp("__intrinsic_memory_barrier", callee))
+ emit_asm(ir, TGSI_OPCODE_MEMBAR, undef_dst,
+ st_src_reg_for_int(TGSI_MEMBAR_SHADER_BUFFER |
+ TGSI_MEMBAR_ATOMIC_BUFFER |
+ TGSI_MEMBAR_SHADER_IMAGE |
+ TGSI_MEMBAR_SHARED));
+ else if (!strcmp("__intrinsic_memory_barrier_atomic_counter", callee))
+ emit_asm(ir, TGSI_OPCODE_MEMBAR, undef_dst,
+ st_src_reg_for_int(TGSI_MEMBAR_ATOMIC_BUFFER));
+ else if (!strcmp("__intrinsic_memory_barrier_buffer", callee))
+ emit_asm(ir, TGSI_OPCODE_MEMBAR, undef_dst,
+ st_src_reg_for_int(TGSI_MEMBAR_SHADER_BUFFER));
+ else if (!strcmp("__intrinsic_memory_barrier_image", callee))
+ emit_asm(ir, TGSI_OPCODE_MEMBAR, undef_dst,
+ st_src_reg_for_int(TGSI_MEMBAR_SHADER_IMAGE));
+ else if (!strcmp("__intrinsic_memory_barrier_shared", callee))
+ emit_asm(ir, TGSI_OPCODE_MEMBAR, undef_dst,
+ st_src_reg_for_int(TGSI_MEMBAR_SHARED));
+ else if (!strcmp("__intrinsic_group_memory_barrier", callee))
+ emit_asm(ir, TGSI_OPCODE_MEMBAR, undef_dst,
+ st_src_reg_for_int(TGSI_MEMBAR_SHADER_BUFFER |
+ TGSI_MEMBAR_ATOMIC_BUFFER |
+ TGSI_MEMBAR_SHADER_IMAGE |
+ TGSI_MEMBAR_SHARED |
+ TGSI_MEMBAR_THREAD_GROUP));
+ else
+ assert(!"Unexpected memory barrier intrinsic");
+}
+
+void
glsl_to_tgsi_visitor::visit(ir_call *ir)
{
glsl_to_tgsi_instruction *call_inst;
ir_function_signature *sig = ir->callee;
- function_entry *entry = get_function_signature(sig);
+ const char *callee = sig->function_name();
+ function_entry *entry;
int i;
+ /* Filter out intrinsics */
+ if (!strcmp("__intrinsic_atomic_read", callee) ||
+ !strcmp("__intrinsic_atomic_increment", callee) ||
+ !strcmp("__intrinsic_atomic_predecrement", callee)) {
+ visit_atomic_counter_intrinsic(ir);
+ return;
+ }
+
+ if (!strcmp("__intrinsic_load_ssbo", callee) ||
+ !strcmp("__intrinsic_store_ssbo", callee) ||
+ !strcmp("__intrinsic_atomic_add_ssbo", callee) ||
+ !strcmp("__intrinsic_atomic_min_ssbo", callee) ||
+ !strcmp("__intrinsic_atomic_max_ssbo", callee) ||
+ !strcmp("__intrinsic_atomic_and_ssbo", callee) ||
+ !strcmp("__intrinsic_atomic_or_ssbo", callee) ||
+ !strcmp("__intrinsic_atomic_xor_ssbo", callee) ||
+ !strcmp("__intrinsic_atomic_exchange_ssbo", callee) ||
+ !strcmp("__intrinsic_atomic_comp_swap_ssbo", callee)) {
+ visit_ssbo_intrinsic(ir);
+ return;
+ }
+
+ if (!strcmp("__intrinsic_memory_barrier", callee) ||
+ !strcmp("__intrinsic_memory_barrier_atomic_counter", callee) ||
+ !strcmp("__intrinsic_memory_barrier_buffer", callee) ||
+ !strcmp("__intrinsic_memory_barrier_image", callee) ||
+ !strcmp("__intrinsic_memory_barrier_shared", callee) ||
+ !strcmp("__intrinsic_group_memory_barrier", callee)) {
+ visit_membar_intrinsic(ir);
+ return;
+ }
+
+ entry = get_function_signature(sig);
/* Process in parameters. */
foreach_two_lists(formal_node, &sig->parameters,
actual_node, &ir->actual_parameters) {
@@ -3583,6 +3875,7 @@ glsl_to_tgsi_visitor::glsl_to_tgsi_visitor()
current_function = NULL;
num_address_regs = 0;
samplers_used = 0;
+ buffers_used = 0;
indirect_addr_consts = false;
wpos_transform_const = -1;
glsl_version = 0;
@@ -3617,6 +3910,7 @@ static void
count_resources(glsl_to_tgsi_visitor *v, gl_program *prog)
{
v->samplers_used = 0;
+ v->buffers_used = 0;
foreach_in_list(glsl_to_tgsi_instruction, inst, &v->instructions) {
if (inst->info->is_tex) {
@@ -3634,6 +3928,12 @@ count_resources(glsl_to_tgsi_visitor *v, gl_program *prog)
}
}
}
+ if (inst->buffer.file != PROGRAM_UNDEFINED && (
+ is_resource_instruction(inst->op) ||
+ inst->op == TGSI_OPCODE_STORE)) {
+ if (inst->buffer.file == PROGRAM_BUFFER)
+ v->buffers_used |= 1 << inst->buffer.index;
+ }
}
prog->SamplersUsed = v->samplers_used;
@@ -3822,9 +4122,11 @@ glsl_to_tgsi_visitor::get_last_temp_read_first_temp_write(int *last_reads, int *
last_reads[inst->src[j].index] = (depth == 0) ? i : -2;
}
for (j = 0; j < num_inst_dst_regs(inst); j++) {
- if (inst->dst[j].file == PROGRAM_TEMPORARY)
+ if (inst->dst[j].file == PROGRAM_TEMPORARY) {
if (first_writes[inst->dst[j].index] == -1)
first_writes[inst->dst[j].index] = (depth == 0) ? i : loop_start;
+ last_reads[inst->dst[j].index] = (depth == 0) ? i : -2;
+ }
}
for (j = 0; j < inst->tex_offset_num_offset; j++) {
if (inst->tex_offsets[j].file == PROGRAM_TEMPORARY)
@@ -4229,7 +4531,11 @@ glsl_to_tgsi_visitor::eliminate_dead_code(void)
foreach_in_list_safe(glsl_to_tgsi_instruction, inst, &this->instructions) {
if (!inst->dead_mask || !inst->dst[0].writemask)
continue;
- else if ((inst->dst[0].writemask & ~inst->dead_mask) == 0) {
+ /* No amount of dead masks should remove memory stores */
+ if (inst->info->is_store)
+ continue;
+
+ if ((inst->dst[0].writemask & ~inst->dead_mask) == 0) {
inst->remove();
delete inst;
removed++;
@@ -4338,6 +4644,7 @@ glsl_to_tgsi_visitor::merge_registers(void)
/* Update the first_writes and last_reads arrays with the new
* values for the merged register index, and mark the newly unused
* register index as such. */
+ assert(last_reads[j] >= last_reads[i]);
last_reads[i] = last_reads[j];
first_writes[j] = -1;
last_reads[j] = -1;
@@ -4407,6 +4714,7 @@ struct st_translate {
struct ureg_src inputs[PIPE_MAX_SHADER_INPUTS];
struct ureg_dst address[3];
struct ureg_src samplers[PIPE_MAX_SAMPLERS];
+ struct ureg_src buffers[PIPE_MAX_SHADER_BUFFERS];
struct ureg_src systemValues[SYSTEM_VALUE_MAX];
struct tgsi_texture_offset tex_offsets[MAX_GLSL_TEXTURE_OFFSET];
unsigned *array_sizes;
@@ -4814,13 +5122,13 @@ compile_tgsi_instruction(struct st_translate *t,
const glsl_to_tgsi_instruction *inst)
{
struct ureg_program *ureg = t->ureg;
- GLuint i;
+ int i;
struct ureg_dst dst[2];
struct ureg_src src[4];
struct tgsi_texture_offset texoffsets[MAX_GLSL_TEXTURE_OFFSET];
- unsigned num_dst;
- unsigned num_src;
+ int num_dst;
+ int num_src;
unsigned tex_target;
num_dst = num_inst_dst_regs(inst);
@@ -4868,7 +5176,7 @@ compile_tgsi_instruction(struct st_translate *t,
src[num_src] =
ureg_src_indirect(src[num_src], ureg_src(t->address[2]));
num_src++;
- for (i = 0; i < inst->tex_offset_num_offset; i++) {
+ for (i = 0; i < (int)inst->tex_offset_num_offset; i++) {
texoffsets[i] = translate_tex_offset(t, &inst->tex_offsets[i], i);
}
tex_target = st_translate_texture_target(inst->tex_target, inst->tex_shadow);
@@ -4881,6 +5189,38 @@ compile_tgsi_instruction(struct st_translate *t,
src, num_src);
return;
+ case TGSI_OPCODE_RESQ:
+ case TGSI_OPCODE_LOAD:
+ case TGSI_OPCODE_ATOMUADD:
+ case TGSI_OPCODE_ATOMXCHG:
+ case TGSI_OPCODE_ATOMCAS:
+ case TGSI_OPCODE_ATOMAND:
+ case TGSI_OPCODE_ATOMOR:
+ case TGSI_OPCODE_ATOMXOR:
+ case TGSI_OPCODE_ATOMUMIN:
+ case TGSI_OPCODE_ATOMUMAX:
+ case TGSI_OPCODE_ATOMIMIN:
+ case TGSI_OPCODE_ATOMIMAX:
+ for (i = num_src - 1; i >= 0; i--)
+ src[i + 1] = src[i];
+ num_src++;
+ src[0] = t->buffers[inst->buffer.index];
+ if (inst->buffer.reladdr)
+ src[0] = ureg_src_indirect(src[0], ureg_src(t->address[2]));
+ assert(src[0].File != TGSI_FILE_NULL);
+ ureg_memory_insn(ureg, inst->op, dst, num_dst, src, num_src,
+ inst->buffer_access);
+ break;
+
+ case TGSI_OPCODE_STORE:
+ dst[0] = ureg_writemask(ureg_dst(t->buffers[inst->buffer.index]), inst->dst[0].writemask);
+ if (inst->buffer.reladdr)
+ dst[0] = ureg_dst_indirect(dst[0], ureg_src(t->address[2]));
+ assert(dst[0].File != TGSI_FILE_NULL);
+ ureg_memory_insn(ureg, inst->op, dst, num_dst, src, num_src,
+ inst->buffer_access);
+ break;
+
case TGSI_OPCODE_SCS:
dst[0] = ureg_writemask(dst[0], TGSI_WRITEMASK_XY);
ureg_insn(ureg, inst->op, dst, num_dst, src, num_src);
@@ -5170,6 +5510,8 @@ st_translate_program(
{
struct st_translate *t;
unsigned i;
+ struct gl_program_constants *frag_const =
+ &ctx->Const.Program[MESA_SHADER_FRAGMENT];
enum pipe_error ret = PIPE_OK;
assert(numInputs <= ARRAY_SIZE(t->inputs));
@@ -5485,7 +5827,7 @@ st_translate_program(
assert(i == program->num_immediates);
/* texture samplers */
- for (i = 0; i < ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxTextureImageUnits; i++) {
+ for (i = 0; i < frag_const->MaxTextureImageUnits; i++) {
if (program->samplers_used & (1 << i)) {
unsigned type;
@@ -5510,6 +5852,21 @@ st_translate_program(
}
}
+ for (i = 0; i < frag_const->MaxAtomicBuffers; i++) {
+ if (program->buffers_used & (1 << i)) {
+ t->buffers[i] = ureg_DECL_buffer(ureg, i, true);
+ }
+ }
+
+ for (; i < frag_const->MaxAtomicBuffers + frag_const->MaxShaderStorageBlocks;
+ i++) {
+ if (program->buffers_used & (1 << i)) {
+ t->buffers[i] = ureg_DECL_buffer(ureg, i, false);
+ }
+ }
+
+
+
/* Emit each instruction in turn:
*/
foreach_in_list(glsl_to_tgsi_instruction, inst, &program->instructions) {
diff --git a/src/mesa/vbo/vbo.h b/src/mesa/vbo/vbo.h
index 0b8b6a9de56..6494aa518a2 100644
--- a/src/mesa/vbo/vbo.h
+++ b/src/mesa/vbo/vbo.h
@@ -181,6 +181,9 @@ vbo_sizeof_ib_type(GLenum type)
}
void
+vbo_delete_minmax_cache(struct gl_buffer_object *bufferObj);
+
+void
vbo_get_minmax_indices(struct gl_context *ctx, const struct _mesa_prim *prim,
const struct _mesa_index_buffer *ib,
GLuint *min_index, GLuint *max_index, GLuint nr_prims);
diff --git a/src/mesa/vbo/vbo_exec_array.c b/src/mesa/vbo/vbo_exec_array.c
index 02139ef881f..f0245fd08cc 100644
--- a/src/mesa/vbo/vbo_exec_array.c
+++ b/src/mesa/vbo/vbo_exec_array.c
@@ -37,8 +37,6 @@
#include "main/enums.h"
#include "main/macros.h"
#include "main/transformfeedback.h"
-#include "main/sse_minmax.h"
-#include "x86/common_x86_asm.h"
#include "vbo_context.h"
@@ -80,152 +78,6 @@ vbo_check_buffers_are_unmapped(struct gl_context *ctx)
}
-
-/**
- * Compute min and max elements by scanning the index buffer for
- * glDraw[Range]Elements() calls.
- * If primitive restart is enabled, we need to ignore restart
- * indexes when computing min/max.
- */
-static void
-vbo_get_minmax_index(struct gl_context *ctx,
- const struct _mesa_prim *prim,
- const struct _mesa_index_buffer *ib,
- GLuint *min_index, GLuint *max_index,
- const GLuint count)
-{
- const GLboolean restart = ctx->Array._PrimitiveRestart;
- const GLuint restartIndex = _mesa_primitive_restart_index(ctx, ib->type);
- const int index_size = vbo_sizeof_ib_type(ib->type);
- const char *indices;
- GLuint i;
-
- indices = (char *) ib->ptr + prim->start * index_size;
- if (_mesa_is_bufferobj(ib->obj)) {
- GLsizeiptr size = MIN2(count * index_size, ib->obj->Size);
- indices = ctx->Driver.MapBufferRange(ctx, (GLintptr) indices, size,
- GL_MAP_READ_BIT, ib->obj,
- MAP_INTERNAL);
- }
-
- switch (ib->type) {
- case GL_UNSIGNED_INT: {
- const GLuint *ui_indices = (const GLuint *)indices;
- GLuint max_ui = 0;
- GLuint min_ui = ~0U;
- if (restart) {
- for (i = 0; i < count; i++) {
- if (ui_indices[i] != restartIndex) {
- if (ui_indices[i] > max_ui) max_ui = ui_indices[i];
- if (ui_indices[i] < min_ui) min_ui = ui_indices[i];
- }
- }
- }
- else {
-#if defined(USE_SSE41)
- if (cpu_has_sse4_1) {
- _mesa_uint_array_min_max(ui_indices, &min_ui, &max_ui, count);
- }
- else
-#endif
- for (i = 0; i < count; i++) {
- if (ui_indices[i] > max_ui) max_ui = ui_indices[i];
- if (ui_indices[i] < min_ui) min_ui = ui_indices[i];
- }
- }
- *min_index = min_ui;
- *max_index = max_ui;
- break;
- }
- case GL_UNSIGNED_SHORT: {
- const GLushort *us_indices = (const GLushort *)indices;
- GLuint max_us = 0;
- GLuint min_us = ~0U;
- if (restart) {
- for (i = 0; i < count; i++) {
- if (us_indices[i] != restartIndex) {
- if (us_indices[i] > max_us) max_us = us_indices[i];
- if (us_indices[i] < min_us) min_us = us_indices[i];
- }
- }
- }
- else {
- for (i = 0; i < count; i++) {
- if (us_indices[i] > max_us) max_us = us_indices[i];
- if (us_indices[i] < min_us) min_us = us_indices[i];
- }
- }
- *min_index = min_us;
- *max_index = max_us;
- break;
- }
- case GL_UNSIGNED_BYTE: {
- const GLubyte *ub_indices = (const GLubyte *)indices;
- GLuint max_ub = 0;
- GLuint min_ub = ~0U;
- if (restart) {
- for (i = 0; i < count; i++) {
- if (ub_indices[i] != restartIndex) {
- if (ub_indices[i] > max_ub) max_ub = ub_indices[i];
- if (ub_indices[i] < min_ub) min_ub = ub_indices[i];
- }
- }
- }
- else {
- for (i = 0; i < count; i++) {
- if (ub_indices[i] > max_ub) max_ub = ub_indices[i];
- if (ub_indices[i] < min_ub) min_ub = ub_indices[i];
- }
- }
- *min_index = min_ub;
- *max_index = max_ub;
- break;
- }
- default:
- unreachable("not reached");
- }
-
- if (_mesa_is_bufferobj(ib->obj)) {
- ctx->Driver.UnmapBuffer(ctx, ib->obj, MAP_INTERNAL);
- }
-}
-
-/**
- * Compute min and max elements for nr_prims
- */
-void
-vbo_get_minmax_indices(struct gl_context *ctx,
- const struct _mesa_prim *prims,
- const struct _mesa_index_buffer *ib,
- GLuint *min_index,
- GLuint *max_index,
- GLuint nr_prims)
-{
- GLuint tmp_min, tmp_max;
- GLuint i;
- GLuint count;
-
- *min_index = ~0;
- *max_index = 0;
-
- for (i = 0; i < nr_prims; i++) {
- const struct _mesa_prim *start_prim;
-
- start_prim = &prims[i];
- count = start_prim->count;
- /* Do combination if possible to reduce map/unmap count */
- while ((i + 1 < nr_prims) &&
- (prims[i].start + prims[i].count == prims[i+1].start)) {
- count += prims[i+1].count;
- i++;
- }
- vbo_get_minmax_index(ctx, start_prim, ib, &tmp_min, &tmp_max, count);
- *min_index = MIN2(*min_index, tmp_min);
- *max_index = MAX2(*max_index, tmp_max);
- }
-}
-
-
/**
* Check that element 'j' of the array has reasonable data.
* Map VBO if needed.
diff --git a/src/mesa/vbo/vbo_minmax_index.c b/src/mesa/vbo/vbo_minmax_index.c
new file mode 100644
index 00000000000..0f75a87f3f3
--- /dev/null
+++ b/src/mesa/vbo/vbo_minmax_index.c
@@ -0,0 +1,378 @@
+/*
+ * Mesa 3-D graphics library
+ *
+ * Copyright 2003 VMware, Inc.
+ * Copyright 2009 VMware, Inc.
+ * All Rights Reserved.
+ * Copyright (C) 2016 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "main/glheader.h"
+#include "main/context.h"
+#include "main/varray.h"
+#include "main/macros.h"
+#include "main/sse_minmax.h"
+#include "x86/common_x86_asm.h"
+#include "util/hash_table.h"
+
+
+struct minmax_cache_key {
+ GLintptr offset;
+ GLuint count;
+ GLenum type;
+};
+
+
+struct minmax_cache_entry {
+ struct minmax_cache_key key;
+ GLuint min;
+ GLuint max;
+};
+
+
+static uint32_t
+vbo_minmax_cache_hash(const struct minmax_cache_key *key)
+{
+ return _mesa_hash_data(key, sizeof(*key));
+}
+
+
+static bool
+vbo_minmax_cache_key_equal(const struct minmax_cache_key *a,
+ const struct minmax_cache_key *b)
+{
+ return (a->offset == b->offset) && (a->count == b->count) && (a->type == b->type);
+}
+
+
+static void
+vbo_minmax_cache_delete_entry(struct hash_entry *entry)
+{
+ free(entry->data);
+}
+
+
+static GLboolean
+vbo_use_minmax_cache(struct gl_buffer_object *bufferObj)
+{
+ if (bufferObj->UsageHistory & (USAGE_TEXTURE_BUFFER |
+ USAGE_ATOMIC_COUNTER_BUFFER |
+ USAGE_SHADER_STORAGE_BUFFER |
+ USAGE_TRANSFORM_FEEDBACK_BUFFER |
+ USAGE_PIXEL_PACK_BUFFER |
+ USAGE_DISABLE_MINMAX_CACHE))
+ return GL_FALSE;
+
+ if ((bufferObj->Mappings[MAP_USER].AccessFlags &
+ (GL_MAP_PERSISTENT_BIT | GL_MAP_WRITE_BIT)) ==
+ (GL_MAP_PERSISTENT_BIT | GL_MAP_WRITE_BIT))
+ return GL_FALSE;
+
+ return GL_TRUE;
+}
+
+
+void
+vbo_delete_minmax_cache(struct gl_buffer_object *bufferObj)
+{
+ _mesa_hash_table_destroy(bufferObj->MinMaxCache, vbo_minmax_cache_delete_entry);
+ bufferObj->MinMaxCache = NULL;
+}
+
+
+static GLboolean
+vbo_get_minmax_cached(struct gl_buffer_object *bufferObj,
+ GLenum type, GLintptr offset, GLuint count,
+ GLuint *min_index, GLuint *max_index)
+{
+ GLboolean found = GL_FALSE;
+ struct minmax_cache_key key;
+ uint32_t hash;
+ struct hash_entry *result;
+
+ if (!bufferObj->MinMaxCache)
+ return GL_FALSE;
+ if (!vbo_use_minmax_cache(bufferObj))
+ return GL_FALSE;
+
+ mtx_lock(&bufferObj->Mutex);
+
+ if (bufferObj->MinMaxCacheDirty) {
+ /* Disable the cache permanently for this BO if the number of hits
+ * is asymptotically less than the number of misses. This happens when
+ * applications use the BO for streaming.
+ *
+ * However, some initial optimism allows applications that interleave
+ * draw calls with glBufferSubData during warmup.
+ */
+ unsigned optimism = bufferObj->Size;
+ if (bufferObj->MinMaxCacheMissIndices > optimism &&
+ bufferObj->MinMaxCacheHitIndices < bufferObj->MinMaxCacheMissIndices - optimism) {
+ bufferObj->UsageHistory |= USAGE_DISABLE_MINMAX_CACHE;
+ vbo_delete_minmax_cache(bufferObj);
+ goto out_disable;
+ }
+
+ _mesa_hash_table_clear(bufferObj->MinMaxCache, vbo_minmax_cache_delete_entry);
+ bufferObj->MinMaxCacheDirty = false;
+ goto out_invalidate;
+ }
+
+ key.type = type;
+ key.offset = offset;
+ key.count = count;
+ hash = vbo_minmax_cache_hash(&key);
+ result = _mesa_hash_table_search_pre_hashed(bufferObj->MinMaxCache, hash, &key);
+ if (result) {
+ struct minmax_cache_entry *entry = result->data;
+ *min_index = entry->min;
+ *max_index = entry->max;
+ found = GL_TRUE;
+ }
+
+out_invalidate:
+ if (found) {
+ /* The hit counter saturates so that we don't accidently disable the
+ * cache in a long-running program.
+ */
+ unsigned new_hit_count = bufferObj->MinMaxCacheHitIndices + count;
+
+ if (new_hit_count >= bufferObj->MinMaxCacheHitIndices)
+ bufferObj->MinMaxCacheHitIndices = new_hit_count;
+ else
+ bufferObj->MinMaxCacheHitIndices = ~(unsigned)0;
+ } else {
+ bufferObj->MinMaxCacheMissIndices += count;
+ }
+
+out_disable:
+ mtx_unlock(&bufferObj->Mutex);
+ return found;
+}
+
+
+static void
+vbo_minmax_cache_store(struct gl_context *ctx,
+ struct gl_buffer_object *bufferObj,
+ GLenum type, GLintptr offset, GLuint count,
+ GLuint min, GLuint max)
+{
+ struct minmax_cache_entry *entry;
+ struct hash_entry *table_entry;
+ uint32_t hash;
+
+ if (!vbo_use_minmax_cache(bufferObj))
+ return;
+
+ mtx_lock(&bufferObj->Mutex);
+
+ if (!bufferObj->MinMaxCache) {
+ bufferObj->MinMaxCache =
+ _mesa_hash_table_create(NULL,
+ (uint32_t (*)(const void *))vbo_minmax_cache_hash,
+ (bool (*)(const void *, const void *))vbo_minmax_cache_key_equal);
+ if (!bufferObj->MinMaxCache)
+ goto out;
+ }
+
+ entry = MALLOC_STRUCT(minmax_cache_entry);
+ if (!entry)
+ goto out;
+
+ entry->key.offset = offset;
+ entry->key.count = count;
+ entry->key.type = type;
+ entry->min = min;
+ entry->max = max;
+ hash = vbo_minmax_cache_hash(&entry->key);
+
+ table_entry = _mesa_hash_table_search_pre_hashed(bufferObj->MinMaxCache,
+ hash, &entry->key);
+ if (table_entry) {
+ /* It seems like this could happen when two contexts are rendering using
+ * the same buffer object from multiple threads.
+ */
+ _mesa_debug(ctx, "duplicate entry in minmax cache\n");
+ free(entry);
+ goto out;
+ }
+
+ table_entry = _mesa_hash_table_insert_pre_hashed(bufferObj->MinMaxCache,
+ hash, &entry->key, entry);
+ if (!table_entry)
+ free(entry);
+
+out:
+ mtx_unlock(&bufferObj->Mutex);
+}
+
+
+/**
+ * Compute min and max elements by scanning the index buffer for
+ * glDraw[Range]Elements() calls.
+ * If primitive restart is enabled, we need to ignore restart
+ * indexes when computing min/max.
+ */
+static void
+vbo_get_minmax_index(struct gl_context *ctx,
+ const struct _mesa_prim *prim,
+ const struct _mesa_index_buffer *ib,
+ GLuint *min_index, GLuint *max_index,
+ const GLuint count)
+{
+ const GLboolean restart = ctx->Array._PrimitiveRestart;
+ const GLuint restartIndex = _mesa_primitive_restart_index(ctx, ib->type);
+ const int index_size = vbo_sizeof_ib_type(ib->type);
+ const char *indices;
+ GLuint i;
+
+ indices = (char *) ib->ptr + prim->start * index_size;
+ if (_mesa_is_bufferobj(ib->obj)) {
+ GLsizeiptr size = MIN2(count * index_size, ib->obj->Size);
+
+ if (vbo_get_minmax_cached(ib->obj, ib->type, (GLintptr) indices, count,
+ min_index, max_index))
+ return;
+
+ indices = ctx->Driver.MapBufferRange(ctx, (GLintptr) indices, size,
+ GL_MAP_READ_BIT, ib->obj,
+ MAP_INTERNAL);
+ }
+
+ switch (ib->type) {
+ case GL_UNSIGNED_INT: {
+ const GLuint *ui_indices = (const GLuint *)indices;
+ GLuint max_ui = 0;
+ GLuint min_ui = ~0U;
+ if (restart) {
+ for (i = 0; i < count; i++) {
+ if (ui_indices[i] != restartIndex) {
+ if (ui_indices[i] > max_ui) max_ui = ui_indices[i];
+ if (ui_indices[i] < min_ui) min_ui = ui_indices[i];
+ }
+ }
+ }
+ else {
+#if defined(USE_SSE41)
+ if (cpu_has_sse4_1) {
+ _mesa_uint_array_min_max(ui_indices, &min_ui, &max_ui, count);
+ }
+ else
+#endif
+ for (i = 0; i < count; i++) {
+ if (ui_indices[i] > max_ui) max_ui = ui_indices[i];
+ if (ui_indices[i] < min_ui) min_ui = ui_indices[i];
+ }
+ }
+ *min_index = min_ui;
+ *max_index = max_ui;
+ break;
+ }
+ case GL_UNSIGNED_SHORT: {
+ const GLushort *us_indices = (const GLushort *)indices;
+ GLuint max_us = 0;
+ GLuint min_us = ~0U;
+ if (restart) {
+ for (i = 0; i < count; i++) {
+ if (us_indices[i] != restartIndex) {
+ if (us_indices[i] > max_us) max_us = us_indices[i];
+ if (us_indices[i] < min_us) min_us = us_indices[i];
+ }
+ }
+ }
+ else {
+ for (i = 0; i < count; i++) {
+ if (us_indices[i] > max_us) max_us = us_indices[i];
+ if (us_indices[i] < min_us) min_us = us_indices[i];
+ }
+ }
+ *min_index = min_us;
+ *max_index = max_us;
+ break;
+ }
+ case GL_UNSIGNED_BYTE: {
+ const GLubyte *ub_indices = (const GLubyte *)indices;
+ GLuint max_ub = 0;
+ GLuint min_ub = ~0U;
+ if (restart) {
+ for (i = 0; i < count; i++) {
+ if (ub_indices[i] != restartIndex) {
+ if (ub_indices[i] > max_ub) max_ub = ub_indices[i];
+ if (ub_indices[i] < min_ub) min_ub = ub_indices[i];
+ }
+ }
+ }
+ else {
+ for (i = 0; i < count; i++) {
+ if (ub_indices[i] > max_ub) max_ub = ub_indices[i];
+ if (ub_indices[i] < min_ub) min_ub = ub_indices[i];
+ }
+ }
+ *min_index = min_ub;
+ *max_index = max_ub;
+ break;
+ }
+ default:
+ unreachable("not reached");
+ }
+
+ if (_mesa_is_bufferobj(ib->obj)) {
+ vbo_minmax_cache_store(ctx, ib->obj, ib->type, prim->start, count,
+ *min_index, *max_index);
+ ctx->Driver.UnmapBuffer(ctx, ib->obj, MAP_INTERNAL);
+ }
+}
+
+/**
+ * Compute min and max elements for nr_prims
+ */
+void
+vbo_get_minmax_indices(struct gl_context *ctx,
+ const struct _mesa_prim *prims,
+ const struct _mesa_index_buffer *ib,
+ GLuint *min_index,
+ GLuint *max_index,
+ GLuint nr_prims)
+{
+ GLuint tmp_min, tmp_max;
+ GLuint i;
+ GLuint count;
+
+ *min_index = ~0;
+ *max_index = 0;
+
+ for (i = 0; i < nr_prims; i++) {
+ const struct _mesa_prim *start_prim;
+
+ start_prim = &prims[i];
+ count = start_prim->count;
+ /* Do combination if possible to reduce map/unmap count */
+ while ((i + 1 < nr_prims) &&
+ (prims[i].start + prims[i].count == prims[i+1].start)) {
+ count += prims[i+1].count;
+ i++;
+ }
+ vbo_get_minmax_index(ctx, start_prim, ib, &tmp_min, &tmp_max, count);
+ *min_index = MIN2(*min_index, tmp_min);
+ *max_index = MAX2(*max_index, tmp_max);
+ }
+}
diff --git a/src/mesa/x86-64/xform4.S b/src/mesa/x86-64/xform4.S
index c185f62099e..b0aca19c8b0 100644
--- a/src/mesa/x86-64/xform4.S
+++ b/src/mesa/x86-64/xform4.S
@@ -69,7 +69,7 @@ _mesa_x86_64_transform_points4_general:
movq V4F_START(%rdx), %rdx /* ptr to first src vertex */
movq V4F_START(%rdi), %rdi /* ptr to first dest vertex */
- prefetch 16(%rdx)
+ prefetcht1 16(%rdx)
movaps 0(%rsi), %xmm4 /* m3 | m2 | m1 | m0 */
movaps 16(%rsi), %xmm5 /* m7 | m6 | m5 | m4 */
@@ -80,7 +80,7 @@ _mesa_x86_64_transform_points4_general:
p4_general_loop:
movups (%rdx), %xmm8 /* ox | oy | oz | ow */
- prefetchw 16(%rdi)
+ prefetcht1 16(%rdi)
pshufd $0x00, %xmm8, %xmm0 /* ox | ox | ox | ox */
addq %rax, %rdx
@@ -93,7 +93,7 @@ p4_general_loop:
addps %xmm1, %xmm0 /* ox*m3+oy*m7 | ... */
mulps %xmm7, %xmm3 /* ow*m15 | ow*m14 | ow*m13 | ow*m12 */
addps %xmm2, %xmm0 /* ox*m3+oy*m7+oz*m11 | ... */
- prefetch 16(%rdx)
+ prefetcht1 16(%rdx)
addps %xmm3, %xmm0 /* ox*m3+oy*m7+oz*m11+ow*m15 | ... */
movaps %xmm0, (%rdi) /* ->D(3) | ->D(2) | ->D(1) | ->D(0) */
@@ -150,7 +150,7 @@ _mesa_x86_64_transform_points4_3d:
movq V4F_START(%rdx), %rdx /* ptr to first src vertex */
movq V4F_START(%rdi), %rdi /* ptr to first dest vertex */
- prefetch 16(%rdx)
+ prefetcht1 16(%rdx)
movaps 0(%rsi), %xmm4 /* m3 | m2 | m1 | m0 */
movaps 16(%rsi), %xmm5 /* m7 | m6 | m5 | m4 */
@@ -166,7 +166,7 @@ _mesa_x86_64_transform_points4_3d:
p4_3d_loop:
movups (%rdx), %xmm8 /* ox | oy | oz | ow */
- prefetchw 16(%rdi)
+ prefetcht1 16(%rdi)
pshufd $0x00, %xmm8, %xmm0 /* ox | ox | ox | ox */
addq %rax, %rdx
@@ -179,7 +179,7 @@ p4_3d_loop:
addps %xmm1, %xmm0 /* ox*m3+oy*m7 | ... */
mulps %xmm7, %xmm3 /* ow*m15 | ow*m14 | ow*m13 | ow*m12 */
addps %xmm2, %xmm0 /* ox*m3+oy*m7+oz*m11 | ... */
- prefetch 16(%rdx)
+ prefetcht1 16(%rdx)
addps %xmm3, %xmm0 /* ox*m3+oy*m7+oz*m11+ow*m15 | ... */
movaps %xmm0, (%rdi) /* ->D(3) | ->D(2) | ->D(1) | ->D(0) */
@@ -210,8 +210,8 @@ _mesa_x86_64_transform_points4_identity:
movq V4F_START(%rdx), %rsi /* ptr to first src vertex */
movq V4F_START(%rdi), %rdi /* ptr to first dest vertex */
- prefetch 64(%rsi)
- prefetchw 64(%rdi)
+ prefetcht1 64(%rsi)
+ prefetcht1 64(%rdi)
add %ecx, %ecx
@@ -242,7 +242,7 @@ _mesa_3dnow_transform_points4_3d_no_rot:
movq V4F_START(%rdx), %rdx /* ptr to first src vertex */
movq V4F_START(%rdi), %rdi /* ptr to first dest vertex */
- prefetch (%rdx)
+ prefetcht1 (%rdx)
movd (%rsi), %mm0 /* | m00 */
.byte 0x66, 0x66, 0x90 /* manual align += 3 */
@@ -255,7 +255,7 @@ _mesa_3dnow_transform_points4_3d_no_rot:
p4_3d_no_rot_loop:
- prefetchw 32(%rdi)
+ prefetcht1 32(%rdi)
movq (%rdx), %mm4 /* x1 | x0 */
movq 8(%rdx), %mm5 /* x3 | x2 */
@@ -279,7 +279,7 @@ p4_3d_no_rot_loop:
addq $16, %rdi
decl %ecx
- prefetch 32(%rdx)
+ prefetcht1 32(%rdx)
jnz p4_3d_no_rot_loop
p4_3d_no_rot_done:
@@ -311,7 +311,7 @@ _mesa_3dnow_transform_points4_perspective:
punpckldq 20(%rsi), %mm0 /* m11 | m00 */
movq 32(%rsi), %mm2 /* m21 | m20 */
- prefetch (%rdx)
+ prefetcht1 (%rdx)
movd 40(%rsi), %mm1 /* | m22 */
@@ -321,7 +321,7 @@ _mesa_3dnow_transform_points4_perspective:
p4_perspective_loop:
- prefetchw 32(%rdi) /* prefetch 2 vertices ahead */
+ prefetcht1 32(%rdi) /* prefetch 2 vertices ahead */
movq (%rdx), %mm4 /* x1 | x0 */
movq 8(%rdx), %mm5 /* x3 | x2 */
@@ -347,7 +347,7 @@ p4_perspective_loop:
addq $16, %rdi
decl %ecx
- prefetch 32(%rdx) /* hopefully stride is zero */
+ prefetcht1 32(%rdx) /* hopefully stride is zero */
jnz p4_perspective_loop
p4_perspective_done:
@@ -374,14 +374,14 @@ _mesa_3dnow_transform_points4_2d_no_rot:
movq V4F_START(%rdi), %rdi /* ptr to first dest vertex */
movd (%rsi), %mm0 /* | m00 */
- prefetch (%rdx)
+ prefetcht1 (%rdx)
punpckldq 20(%rsi), %mm0 /* m11 | m00 */
movq 48(%rsi), %mm1 /* m31 | m30 */
p4_2d_no_rot_loop:
- prefetchw 32(%rdi) /* prefetch 2 vertices ahead */
+ prefetcht1 32(%rdi) /* prefetch 2 vertices ahead */
movq (%rdx), %mm4 /* x1 | x0 */
movq 8(%rdx), %mm5 /* x3 | x2 */
@@ -394,7 +394,7 @@ p4_2d_no_rot_loop:
addq %rax, %rdx
pfmul %mm1, %mm6 /* x3*m31 | x3*m30 */
- prefetch 32(%rdx) /* hopefully stride is zero */
+ prefetcht1 32(%rdx) /* hopefully stride is zero */
pfadd %mm4, %mm6 /* x1*m11+x3*m31 | x0*m00+x3*m30 */
movq %mm6, (%rdi) /* write r0, r1 */
@@ -433,7 +433,7 @@ _mesa_3dnow_transform_points4_2d:
movd (%rsi), %mm0 /* | m00 */
movd 4(%rsi), %mm1 /* | m01 */
- prefetch (%rdx)
+ prefetcht1 (%rdx)
punpckldq 16(%rsi), %mm0 /* m10 | m00 */
.byte 0x66, 0x66, 0x90 /* manual align += 4 */
@@ -443,7 +443,7 @@ _mesa_3dnow_transform_points4_2d:
p4_2d_loop:
- prefetchw 32(%rdi) /* prefetch 2 vertices ahead */
+ prefetcht1 32(%rdi) /* prefetch 2 vertices ahead */
movq (%rdx), %mm3 /* x1 | x0 */
movq 8(%rdx), %mm5 /* x3 | x2 */
@@ -460,7 +460,7 @@ p4_2d_loop:
pfacc %mm4, %mm3 /* x0*m01+x1*m11 | x0*m00+x1*m10 */
pfmul %mm2, %mm6 /* x3*m31 | x3*m30 */
- prefetch 32(%rdx) /* hopefully stride is zero */
+ prefetcht1 32(%rdx) /* hopefully stride is zero */
pfadd %mm6, %mm3 /* r1 | r0 */
diff --git a/src/util/hash_table.c b/src/util/hash_table.c
index 3247593c1f6..4cfe3d93251 100644
--- a/src/util/hash_table.c
+++ b/src/util/hash_table.c
@@ -163,6 +163,32 @@ _mesa_hash_table_destroy(struct hash_table *ht,
ralloc_free(ht);
}
+/**
+ * Deletes all entries of the given hash table without deleting the table
+ * itself or changing its structure.
+ *
+ * If delete_function is passed, it gets called on each entry present.
+ */
+void
+_mesa_hash_table_clear(struct hash_table *ht,
+ void (*delete_function)(struct hash_entry *entry))
+{
+ struct hash_entry *entry;
+
+ for (entry = ht->table; entry != ht->table + ht->size; entry++) {
+ if (entry->key == NULL)
+ continue;
+
+ if (delete_function != NULL && entry->key != ht->deleted_key)
+ delete_function(entry);
+
+ entry->key = NULL;
+ }
+
+ ht->entries = 0;
+ ht->deleted_entries = 0;
+}
+
/** Sets the value of the key pointer used for deleted entries in the table.
*
* The assumption is that usually keys are actual pointers, so we use a
@@ -300,7 +326,8 @@ hash_table_insert(struct hash_table *ht, uint32_t hash,
* required to avoid memory leaks, perform a search
* before inserting.
*/
- if (entry->hash == hash &&
+ if (!entry_is_deleted(ht, entry) &&
+ entry->hash == hash &&
ht->key_equals_function(key, entry->key)) {
entry->key = key;
entry->data = data;
diff --git a/src/util/hash_table.h b/src/util/hash_table.h
index eb9dbc333ec..85b013cac24 100644
--- a/src/util/hash_table.h
+++ b/src/util/hash_table.h
@@ -64,9 +64,16 @@ _mesa_hash_table_create(void *mem_ctx,
const void *b));
void _mesa_hash_table_destroy(struct hash_table *ht,
void (*delete_function)(struct hash_entry *entry));
+void _mesa_hash_table_clear(struct hash_table *ht,
+ void (*delete_function)(struct hash_entry *entry));
void _mesa_hash_table_set_deleted_key(struct hash_table *ht,
const void *deleted_key);
+static inline uint32_t _mesa_hash_table_num_entries(struct hash_table *ht)
+{
+ return ht->entries;
+}
+
struct hash_entry *
_mesa_hash_table_insert(struct hash_table *ht, const void *key, void *data);
struct hash_entry *
diff --git a/src/util/set.c b/src/util/set.c
index f01f8699ac2..99abefd0632 100644
--- a/src/util/set.c
+++ b/src/util/set.c
@@ -282,7 +282,8 @@ set_add(struct set *ht, uint32_t hash, const void *key)
* If freeing of old keys is required to avoid memory leaks,
* perform a search before inserting.
*/
- if (entry->hash == hash &&
+ if (!entry_is_deleted(entry) &&
+ entry->hash == hash &&
ht->key_equals_function(key, entry->key)) {
entry->key = key;
return entry;
diff --git a/src/util/tests/hash_table/Makefile.am b/src/util/tests/hash_table/Makefile.am
index 04a77e30df1..8f12240cede 100644
--- a/src/util/tests/hash_table/Makefile.am
+++ b/src/util/tests/hash_table/Makefile.am
@@ -29,6 +29,7 @@ LDADD = \
$(DLOPEN_LIBS)
TESTS = \
+ clear \
collision \
delete_and_lookup \
delete_management \
diff --git a/src/util/tests/hash_table/clear.c b/src/util/tests/hash_table/clear.c
new file mode 100644
index 00000000000..526700bfb0f
--- /dev/null
+++ b/src/util/tests/hash_table/clear.c
@@ -0,0 +1,91 @@
+/*
+ * Copyright (C) 2016 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "hash_table.h"
+
+static void *make_key(uint32_t i)
+{
+ return (void *)(uintptr_t)(1 + i);
+}
+
+static uint32_t key_id(const void *key)
+{
+ return (uintptr_t)key - 1;
+}
+
+static uint32_t key_hash(const void *key)
+{
+ return (uintptr_t)key;
+}
+
+static bool key_equal(const void *a, const void *b)
+{
+ return a == b;
+}
+
+static void delete_function(struct hash_entry *entry)
+{
+ bool *deleted = (bool *)entry->data;
+ assert(!*deleted);
+ *deleted = true;
+}
+
+int main()
+{
+ struct hash_table *ht;
+ struct hash_entry *entry;
+ const uint32_t size = 1000;
+ bool flags[size];
+ uint32_t i;
+
+ ht = _mesa_hash_table_create(NULL, key_hash, key_equal);
+
+ for (i = 0; i < size; ++i) {
+ flags[i] = false;
+ _mesa_hash_table_insert(ht, make_key(i), &flags[i]);
+ }
+
+ _mesa_hash_table_clear(ht, delete_function);
+ assert(_mesa_hash_table_next_entry(ht, NULL) == NULL);
+
+ /* Check that delete_function was called and that repopulating the table
+ * works. */
+ for (i = 0; i < size; ++i) {
+ assert(flags[i]);
+ flags[i] = false;
+ _mesa_hash_table_insert(ht, make_key(i), &flags[i]);
+ }
+
+ /* Check that exactly the right set of entries is in the table. */
+ for (i = 0; i < size; ++i) {
+ assert(_mesa_hash_table_search(ht, make_key(i)));
+ }
+
+ hash_table_foreach(ht, entry) {
+ assert(key_id(entry->key) < size);
+ }
+
+ _mesa_hash_table_destroy(ht, NULL);
+
+ return 0;
+}