summaryrefslogtreecommitdiffstats
path: root/src/mesa
diff options
context:
space:
mode:
authorJason Ekstrand <[email protected]>2015-10-19 11:15:32 -0700
committerJason Ekstrand <[email protected]>2015-10-19 14:14:21 -0700
commit958fc04dc51a2561c8598f42df59e3d9139e56a7 (patch)
treeb6acf05aa073e97ae8e58647bf05c2c3e816f041 /src/mesa
parent995d9c4ac7fb046e01196cec308ebe10002a28da (diff)
parentde862f03accb12b044ced60cb98f47a055457223 (diff)
Merge remote-tracking branch 'mesa-public/master' into vulkan
Diffstat (limited to 'src/mesa')
-rw-r--r--src/mesa/Android.libmesa_dricore.mk3
-rw-r--r--src/mesa/Android.libmesa_glsl_utils.mk2
-rw-r--r--src/mesa/Android.libmesa_st_mesa.mk1
-rw-r--r--src/mesa/Makefile.sources7
-rw-r--r--src/mesa/SConscript1
-rw-r--r--src/mesa/drivers/common/meta_copy_image.c5
-rw-r--r--src/mesa/drivers/dri/i915/i915_fragprog.c10
-rw-r--r--src/mesa/drivers/dri/i965/Android.mk1
-rw-r--r--src/mesa/drivers/dri/i965/Makefile.am1
-rw-r--r--src/mesa/drivers/dri/i965/Makefile.sources1
-rw-r--r--src/mesa/drivers/dri/i965/brw_blorp_blit_eu.cpp2
-rw-r--r--src/mesa/drivers/dri/i965/brw_cfg.cpp10
-rw-r--r--src/mesa/drivers/dri/i965/brw_clear.c2
-rw-r--r--src/mesa/drivers/dri/i965/brw_compiler.h661
-rw-r--r--src/mesa/drivers/dri/i965/brw_context.c2
-rw-r--r--src/mesa/drivers/dri/i965/brw_context.h358
-rw-r--r--src/mesa/drivers/dri/i965/brw_cs.c10
-rw-r--r--src/mesa/drivers/dri/i965/brw_cs.h15
-rw-r--r--src/mesa/drivers/dri/i965/brw_cubemap_normalize.cpp2
-rw-r--r--src/mesa/drivers/dri/i965/brw_eu.h2
-rw-r--r--src/mesa/drivers/dri/i965/brw_eu_emit.c27
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs.cpp274
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs.h7
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp2
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp15
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs_generator.cpp10
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs_live_variables.cpp17
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs_nir.cpp260
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp2
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs_saturate_propagation.cpp15
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs_validate.cpp2
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs_vector_splitting.cpp2
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs_visitor.cpp20
-rw-r--r--src/mesa/drivers/dri/i965/brw_gs.c6
-rw-r--r--src/mesa/drivers/dri/i965/brw_gs_surface_state.c3
-rw-r--r--src/mesa/drivers/dri/i965/brw_ir_fs.h1
-rw-r--r--src/mesa/drivers/dri/i965/brw_ir_vec4.h1
-rw-r--r--src/mesa/drivers/dri/i965/brw_lower_unnormalized_offset.cpp2
-rw-r--r--src/mesa/drivers/dri/i965/brw_meta_fast_clear.c5
-rw-r--r--src/mesa/drivers/dri/i965/brw_nir.c96
-rw-r--r--src/mesa/drivers/dri/i965/brw_program.c10
-rw-r--r--src/mesa/drivers/dri/i965/brw_program.h124
-rw-r--r--src/mesa/drivers/dri/i965/brw_sampler_state.c1
-rw-r--r--src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp2
-rw-r--r--src/mesa/drivers/dri/i965/brw_shader.cpp9
-rw-r--r--src/mesa/drivers/dri/i965/brw_shader.h62
-rw-r--r--src/mesa/drivers/dri/i965/brw_vec4.cpp143
-rw-r--r--src/mesa/drivers/dri/i965/brw_vec4.h12
-rw-r--r--src/mesa/drivers/dri/i965/brw_vec4_copy_propagation.cpp16
-rw-r--r--src/mesa/drivers/dri/i965/brw_vec4_generator.cpp26
-rw-r--r--src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp66
-rw-r--r--src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.h32
-rw-r--r--src/mesa/drivers/dri/i965/brw_vec4_live_variables.cpp16
-rw-r--r--src/mesa/drivers/dri/i965/brw_vec4_nir.cpp54
-rw-r--r--src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp2
-rw-r--r--src/mesa/drivers/dri/i965/brw_vec4_vs_visitor.cpp2
-rw-r--r--src/mesa/drivers/dri/i965/brw_vs.c30
-rw-r--r--src/mesa/drivers/dri/i965/brw_vs.h10
-rw-r--r--src/mesa/drivers/dri/i965/brw_vs_surface_state.c3
-rw-r--r--src/mesa/drivers/dri/i965/brw_wm.c114
-rw-r--r--src/mesa/drivers/dri/i965/brw_wm.h15
-rw-r--r--src/mesa/drivers/dri/i965/brw_wm_surface_state.c89
-rw-r--r--src/mesa/drivers/dri/i965/gen6_gs_visitor.cpp2
-rw-r--r--src/mesa/drivers/dri/i965/gen6_gs_visitor.h2
-rw-r--r--src/mesa/drivers/dri/i965/gen7_gs_state.c4
-rw-r--r--src/mesa/drivers/dri/i965/gen7_vs_state.c4
-rw-r--r--src/mesa/drivers/dri/i965/gen7_wm_state.c12
-rw-r--r--src/mesa/drivers/dri/i965/gen8_gs_state.c4
-rw-r--r--src/mesa/drivers/dri/i965/gen8_ps_state.c32
-rw-r--r--src/mesa/drivers/dri/i965/gen8_surface_state.c8
-rw-r--r--src/mesa/drivers/dri/i965/gen8_vs_state.c4
-rw-r--r--src/mesa/drivers/dri/i965/intel_asm_annotation.c17
-rw-r--r--src/mesa/drivers/dri/i965/intel_asm_annotation.h3
-rw-r--r--src/mesa/drivers/dri/i965/intel_mipmap_tree.c8
-rw-r--r--src/mesa/drivers/dri/i965/test_fs_cmod_propagation.cpp2
-rw-r--r--src/mesa/drivers/dri/i965/test_fs_saturate_propagation.cpp2
-rw-r--r--src/mesa/drivers/dri/i965/test_vec4_copy_propagation.cpp2
-rw-r--r--src/mesa/drivers/dri/i965/test_vec4_register_coalesce.cpp2
-rw-r--r--src/mesa/drivers/dri/r200/r200_vertprog.c17
-rw-r--r--src/mesa/drivers/x11/SConscript2
-rw-r--r--src/mesa/main/blend.c161
-rw-r--r--src/mesa/main/es1_conversion.c20
-rw-r--r--src/mesa/main/ff_fragment_shader.cpp12
-rw-r--r--src/mesa/main/ffvertex_prog.c3
-rw-r--r--src/mesa/main/format_utils.h1
-rw-r--r--src/mesa/main/imports.c148
-rw-r--r--src/mesa/main/imports.h7
-rw-r--r--src/mesa/main/matrix.c1
-rw-r--r--src/mesa/main/mipmap.c1
-rw-r--r--src/mesa/main/mtypes.h97
-rw-r--r--src/mesa/main/pack.c15
-rw-r--r--src/mesa/main/shader_query.cpp224
-rw-r--r--src/mesa/main/shaderapi.c10
-rw-r--r--src/mesa/main/shaderimage.c21
-rw-r--r--src/mesa/main/shaderimage.h12
-rw-r--r--src/mesa/main/shaderobj.c4
-rw-r--r--src/mesa/main/shared.c5
-rw-r--r--src/mesa/main/state.c6
-rw-r--r--src/mesa/main/texcompress_bptc.c1
-rw-r--r--src/mesa/main/texobj.c78
-rw-r--r--src/mesa/main/texobj.h3
-rw-r--r--src/mesa/main/texstate.c3
-rw-r--r--src/mesa/main/textureview.c2
-rw-r--r--src/mesa/main/uniform_query.cpp53
-rw-r--r--src/mesa/main/uniforms.c12
-rw-r--r--src/mesa/main/uniforms.h2
-rw-r--r--src/mesa/main/version.c19
-rw-r--r--src/mesa/program/Android.mk1
-rw-r--r--src/mesa/program/ir_to_mesa.cpp2
-rw-r--r--src/mesa/program/prog_to_nir.c32
-rw-r--r--src/mesa/program/program.c638
-rw-r--r--src/mesa/program/program.h87
-rw-r--r--src/mesa/program/sampler.cpp2
-rw-r--r--src/mesa/state_tracker/st_atom_clip.c5
-rw-r--r--src/mesa/state_tracker/st_atom_constbuf.c2
-rw-r--r--src/mesa/state_tracker/st_atom_pixeltransfer.c225
-rw-r--r--src/mesa/state_tracker/st_cb_bitmap.c145
-rw-r--r--src/mesa/state_tracker/st_cb_bitmap.h11
-rw-r--r--src/mesa/state_tracker/st_cb_bitmap_shader.c174
-rw-r--r--src/mesa/state_tracker/st_cb_drawpixels.c335
-rw-r--r--src/mesa/state_tracker/st_cb_drawpixels.h17
-rw-r--r--src/mesa/state_tracker/st_cb_drawpixels_shader.c278
-rw-r--r--src/mesa/state_tracker/st_cb_fbo.c2
-rw-r--r--src/mesa/state_tracker/st_cb_program.c27
-rw-r--r--src/mesa/state_tracker/st_context.c6
-rw-r--r--src/mesa/state_tracker/st_context.h9
-rw-r--r--src/mesa/state_tracker/st_debug.c2
-rw-r--r--src/mesa/state_tracker/st_glsl_to_tgsi.cpp260
-rw-r--r--src/mesa/state_tracker/st_glsl_to_tgsi.h10
-rw-r--r--src/mesa/state_tracker/st_mesa_to_tgsi.c50
-rw-r--r--src/mesa/state_tracker/st_mesa_to_tgsi.h4
-rw-r--r--src/mesa/state_tracker/st_program.c566
-rw-r--r--src/mesa/state_tracker/st_program.h47
-rw-r--r--src/mesa/tnl/t_draw.c1
-rw-r--r--src/mesa/vbo/vbo_context.c104
-rw-r--r--src/mesa/vbo/vbo_exec.h19
-rw-r--r--src/mesa/vbo/vbo_exec_api.c61
-rw-r--r--src/mesa/vbo/vbo_exec_draw.c25
-rw-r--r--src/mesa/vbo/vbo_save_api.c5
139 files changed, 2990 insertions, 3911 deletions
diff --git a/src/mesa/Android.libmesa_dricore.mk b/src/mesa/Android.libmesa_dricore.mk
index 2e308b83733..cd31e148222 100644
--- a/src/mesa/Android.libmesa_dricore.mk
+++ b/src/mesa/Android.libmesa_dricore.mk
@@ -50,7 +50,7 @@ endif # MESA_ENABLE_ASM
ifeq ($(ARCH_X86_HAVE_SSE4_1),true)
LOCAL_SRC_FILES += \
main/streaming-load-memcpy.c \
- mesa/main/sse_minmax.c
+ main/sse_minmax.c
LOCAL_CFLAGS := \
-msse4.1 \
-DUSE_SSE41
@@ -60,6 +60,7 @@ LOCAL_C_INCLUDES := \
$(MESA_TOP)/src/mapi \
$(MESA_TOP)/src/mesa/main \
$(MESA_TOP)/src/glsl \
+ $(MESA_TOP)/src/glsl/nir \
$(MESA_TOP)/src/gallium/include \
$(MESA_TOP)/src/gallium/auxiliary
diff --git a/src/mesa/Android.libmesa_glsl_utils.mk b/src/mesa/Android.libmesa_glsl_utils.mk
index ed620ac648c..9e150eaa3c0 100644
--- a/src/mesa/Android.libmesa_glsl_utils.mk
+++ b/src/mesa/Android.libmesa_glsl_utils.mk
@@ -37,6 +37,7 @@ LOCAL_MODULE := libmesa_glsl_utils
LOCAL_C_INCLUDES := \
$(MESA_TOP)/src/glsl \
+ $(MESA_TOP)/src/glsl/nir \
$(MESA_TOP)/src/mapi \
$(MESA_TOP)/src/gallium/include \
$(MESA_TOP)/src/gallium/auxiliary
@@ -62,6 +63,7 @@ LOCAL_CFLAGS := -D_POSIX_C_SOURCE=199309L
LOCAL_C_INCLUDES := \
$(MESA_TOP)/src/glsl \
+ $(MESA_TOP)/src/glsl/nir \
$(MESA_TOP)/src/mapi \
$(MESA_TOP)/src/gallium/include \
$(MESA_TOP)/src/gallium/auxiliary
diff --git a/src/mesa/Android.libmesa_st_mesa.mk b/src/mesa/Android.libmesa_st_mesa.mk
index b4b7fd97722..427a35f4f6e 100644
--- a/src/mesa/Android.libmesa_st_mesa.mk
+++ b/src/mesa/Android.libmesa_st_mesa.mk
@@ -55,6 +55,7 @@ LOCAL_C_INCLUDES := \
$(MESA_TOP)/src/mapi \
$(MESA_TOP)/src/mesa/main \
$(MESA_TOP)/src/glsl \
+ $(MESA_TOP)/src/glsl/nir \
$(MESA_TOP)/src/gallium/auxiliary \
$(MESA_TOP)/src/gallium/include
diff --git a/src/mesa/Makefile.sources b/src/mesa/Makefile.sources
index 0915594cea6..34fb4461985 100644
--- a/src/mesa/Makefile.sources
+++ b/src/mesa/Makefile.sources
@@ -415,6 +415,7 @@ STATETRACKER_FILES = \
state_tracker/st_cache.h \
state_tracker/st_cb_bitmap.c \
state_tracker/st_cb_bitmap.h \
+ state_tracker/st_cb_bitmap_shader.c \
state_tracker/st_cb_blit.c \
state_tracker/st_cb_blit.h \
state_tracker/st_cb_bufferobjects.c \
@@ -425,6 +426,7 @@ STATETRACKER_FILES = \
state_tracker/st_cb_condrender.h \
state_tracker/st_cb_drawpixels.c \
state_tracker/st_cb_drawpixels.h \
+ state_tracker/st_cb_drawpixels_shader.c \
state_tracker/st_cb_drawtex.c \
state_tracker/st_cb_drawtex.h \
state_tracker/st_cb_eglimage.c \
@@ -525,9 +527,7 @@ PROGRAM_FILES = \
program/sampler.h \
program/string_to_uint_map.cpp \
program/symbol_table.c \
- program/symbol_table.h \
- ../glsl/shader_enums.c \
- ../glsl/shader_enums.h
+ program/symbol_table.h
PROGRAM_NIR_FILES = \
program/prog_to_nir.c \
@@ -620,6 +620,7 @@ INCLUDE_DIRS = \
-I$(top_srcdir)/include \
-I$(top_srcdir)/src \
-I$(top_srcdir)/src/glsl \
+ -I$(top_srcdir)/src/glsl/nir \
-I$(top_builddir)/src/glsl \
-I$(top_builddir)/src/glsl/nir \
-I$(top_srcdir)/src/glsl/glcpp \
diff --git a/src/mesa/SConscript b/src/mesa/SConscript
index 5b80a216fef..c986326d2bf 100644
--- a/src/mesa/SConscript
+++ b/src/mesa/SConscript
@@ -16,6 +16,7 @@ env.Append(CPPPATH = [
'#/src',
'#/src/mapi',
'#/src/glsl',
+ '#/src/glsl/nir',
'#/src/mesa',
'#/src/gallium/include',
'#/src/gallium/auxiliary',
diff --git a/src/mesa/drivers/common/meta_copy_image.c b/src/mesa/drivers/common/meta_copy_image.c
index 33490ee6615..04b9cafe308 100644
--- a/src/mesa/drivers/common/meta_copy_image.c
+++ b/src/mesa/drivers/common/meta_copy_image.c
@@ -108,7 +108,11 @@ make_view(struct gl_context *ctx, struct gl_texture_image *tex_image,
return false;
}
+ assert(tex_obj->Target != 0);
+ assert(tex_obj->TargetIndex < NUM_TEXTURE_TARGETS);
+
view_tex_obj->Target = tex_obj->Target;
+ view_tex_obj->TargetIndex = tex_obj->TargetIndex;
*view_tex_image = _mesa_get_tex_image(ctx, view_tex_obj, tex_obj->Target, 0);
@@ -129,7 +133,6 @@ make_view(struct gl_context *ctx, struct gl_texture_image *tex_image,
view_tex_obj->NumLayers = tex_obj->NumLayers;
view_tex_obj->Immutable = tex_obj->Immutable;
view_tex_obj->ImmutableLevels = tex_obj->ImmutableLevels;
- view_tex_obj->Target = tex_obj->Target;
if (ctx->Driver.TextureView != NULL &&
!ctx->Driver.TextureView(ctx, view_tex_obj, tex_obj)) {
diff --git a/src/mesa/drivers/dri/i915/i915_fragprog.c b/src/mesa/drivers/dri/i915/i915_fragprog.c
index 1a5943c87fb..59d795998c6 100644
--- a/src/mesa/drivers/dri/i915/i915_fragprog.c
+++ b/src/mesa/drivers/dri/i915/i915_fragprog.c
@@ -1315,9 +1315,10 @@ static struct gl_program *
i915NewProgram(struct gl_context * ctx, GLenum target, GLuint id)
{
switch (target) {
- case GL_VERTEX_PROGRAM_ARB:
- return _mesa_init_vertex_program(ctx, CALLOC_STRUCT(gl_vertex_program),
- target, id);
+ case GL_VERTEX_PROGRAM_ARB: {
+ struct gl_vertex_program *prog = CALLOC_STRUCT(gl_vertex_program);
+ return _mesa_init_gl_program(&prog->Base, target, id);
+ }
case GL_FRAGMENT_PROGRAM_ARB:{
struct i915_fragment_program *prog =
@@ -1325,8 +1326,7 @@ i915NewProgram(struct gl_context * ctx, GLenum target, GLuint id)
if (prog) {
i915_init_program(I915_CONTEXT(ctx), prog);
- return _mesa_init_fragment_program(ctx, &prog->FragProg,
- target, id);
+ return _mesa_init_gl_program(&prog->FragProg.Base, target, id);
}
else
return NULL;
diff --git a/src/mesa/drivers/dri/i965/Android.mk b/src/mesa/drivers/dri/i965/Android.mk
index a9b963a9eca..d30a053e10f 100644
--- a/src/mesa/drivers/dri/i965/Android.mk
+++ b/src/mesa/drivers/dri/i965/Android.mk
@@ -48,6 +48,7 @@ LOCAL_C_INCLUDES := \
$(MESA_DRI_C_INCLUDES)
LOCAL_SRC_FILES := \
+ $(i965_compiler_FILES) \
$(i965_FILES)
LOCAL_WHOLE_STATIC_LIBRARIES := \
diff --git a/src/mesa/drivers/dri/i965/Makefile.am b/src/mesa/drivers/dri/i965/Makefile.am
index 2e241511049..04b3f9cc8ce 100644
--- a/src/mesa/drivers/dri/i965/Makefile.am
+++ b/src/mesa/drivers/dri/i965/Makefile.am
@@ -33,6 +33,7 @@ AM_CFLAGS = \
-I$(top_srcdir)/src/mesa/drivers/dri/common \
-I$(top_srcdir)/src/mesa/drivers/dri/intel/server \
-I$(top_srcdir)/src/gtest/include \
+ -I$(top_srcdir)/src/glsl/nir \
-I$(top_builddir)/src/glsl/nir \
-I$(top_builddir)/src/mesa/drivers/dri/common \
$(DEFINES) \
diff --git a/src/mesa/drivers/dri/i965/Makefile.sources b/src/mesa/drivers/dri/i965/Makefile.sources
index b242ab55aae..ccd540dabca 100644
--- a/src/mesa/drivers/dri/i965/Makefile.sources
+++ b/src/mesa/drivers/dri/i965/Makefile.sources
@@ -1,6 +1,7 @@
i965_compiler_FILES = \
brw_cfg.cpp \
brw_cfg.h \
+ brw_compiler.h \
brw_cubemap_normalize.cpp \
brw_dead_control_flow.cpp \
brw_dead_control_flow.h \
diff --git a/src/mesa/drivers/dri/i965/brw_blorp_blit_eu.cpp b/src/mesa/drivers/dri/i965/brw_blorp_blit_eu.cpp
index d458ad846bf..5308d175416 100644
--- a/src/mesa/drivers/dri/i965/brw_blorp_blit_eu.cpp
+++ b/src/mesa/drivers/dri/i965/brw_blorp_blit_eu.cpp
@@ -32,7 +32,7 @@ brw_blorp_eu_emitter::brw_blorp_eu_emitter(struct brw_context *brw,
generator(brw->intelScreen->compiler, brw,
mem_ctx, (void *) rzalloc(mem_ctx, struct brw_wm_prog_key),
(struct brw_stage_prog_data *) rzalloc(mem_ctx, struct brw_wm_prog_data),
- NULL, 0, false, "BLORP")
+ 0, false, "BLORP")
{
if (debug_flag)
generator.enable_debug("blorp");
diff --git a/src/mesa/drivers/dri/i965/brw_cfg.cpp b/src/mesa/drivers/dri/i965/brw_cfg.cpp
index 91d53eff5a7..10bcd4bafd4 100644
--- a/src/mesa/drivers/dri/i965/brw_cfg.cpp
+++ b/src/mesa/drivers/dri/i965/brw_cfg.cpp
@@ -305,6 +305,10 @@ cfg_t::cfg_t(exec_list *instructions)
assert(cur_do != NULL && cur_while != NULL);
cur->add_successor(mem_ctx, cur_do);
+
+ if (inst->predicate)
+ cur->add_successor(mem_ctx, cur_while);
+
set_next_block(&cur, cur_while, ip);
/* Pop the stack so we're in the previous loop */
@@ -422,7 +426,11 @@ cfg_t::dump(backend_shader *s)
calculate_idom();
foreach_block (block, this) {
- fprintf(stderr, "START B%d IDOM(B%d)", block->num, block->idom->num);
+ if (block->idom)
+ fprintf(stderr, "START B%d IDOM(B%d)", block->num, block->idom->num);
+ else
+ fprintf(stderr, "START B%d IDOM(none)", block->num);
+
foreach_list_typed(bblock_link, link, link, &block->parents) {
fprintf(stderr, " <-B%d",
link->block->num);
diff --git a/src/mesa/drivers/dri/i965/brw_clear.c b/src/mesa/drivers/dri/i965/brw_clear.c
index 17a745d0373..b0119558c3a 100644
--- a/src/mesa/drivers/dri/i965/brw_clear.c
+++ b/src/mesa/drivers/dri/i965/brw_clear.c
@@ -241,7 +241,7 @@ brw_clear(struct gl_context *ctx, GLbitfield mask)
}
/* Clear color buffers with fast clear or at least rep16 writes. */
- if (brw->gen >= 6 && brw->gen < 9 && (mask & BUFFER_BITS_COLOR)) {
+ if (brw->gen >= 6 && (mask & BUFFER_BITS_COLOR)) {
if (brw_meta_fast_clear(brw, fb, mask, partial_clear)) {
debug_mask("blorp color", mask & BUFFER_BITS_COLOR);
mask &= ~BUFFER_BITS_COLOR;
diff --git a/src/mesa/drivers/dri/i965/brw_compiler.h b/src/mesa/drivers/dri/i965/brw_compiler.h
new file mode 100644
index 00000000000..11c485d2f08
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/brw_compiler.h
@@ -0,0 +1,661 @@
+/*
+ * Copyright © 2010 - 2015 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#pragma once
+
+#include "brw_device_info.h"
+#include "main/mtypes.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct ra_regs;
+struct nir_shader;
+struct brw_geometry_program;
+union gl_constant_value;
+
+struct brw_compiler {
+ const struct brw_device_info *devinfo;
+
+ struct {
+ struct ra_regs *regs;
+
+ /**
+ * Array of the ra classes for the unaligned contiguous register
+ * block sizes used.
+ */
+ int *classes;
+
+ /**
+ * Mapping for register-allocated objects in *regs to the first
+ * GRF for that object.
+ */
+ uint8_t *ra_reg_to_grf;
+ } vec4_reg_set;
+
+ struct {
+ struct ra_regs *regs;
+
+ /**
+ * Array of the ra classes for the unaligned contiguous register
+ * block sizes used, indexed by register size.
+ */
+ int classes[16];
+
+ /**
+ * Mapping from classes to ra_reg ranges. Each of the per-size
+ * classes corresponds to a range of ra_reg nodes. This array stores
+ * those ranges in the form of first ra_reg in each class and the
+ * total number of ra_reg elements in the last array element. This
+ * way the range of the i'th class is given by:
+ * [ class_to_ra_reg_range[i], class_to_ra_reg_range[i+1] )
+ */
+ int class_to_ra_reg_range[17];
+
+ /**
+ * Mapping for register-allocated objects in *regs to the first
+ * GRF for that object.
+ */
+ uint8_t *ra_reg_to_grf;
+
+ /**
+ * ra class for the aligned pairs we use for PLN, which doesn't
+ * appear in *classes.
+ */
+ int aligned_pairs_class;
+ } fs_reg_sets[2];
+
+ void (*shader_debug_log)(void *, const char *str, ...) PRINTFLIKE(2, 3);
+ void (*shader_perf_log)(void *, const char *str, ...) PRINTFLIKE(2, 3);
+
+ bool scalar_vs;
+ struct gl_shader_compiler_options glsl_compiler_options[MESA_SHADER_STAGES];
+};
+
+
+/**
+ * Program key structures.
+ *
+ * When drawing, we look for the currently bound shaders in the program
+ * cache. This is essentially a hash table lookup, and these are the keys.
+ *
+ * Sometimes OpenGL features specified as state need to be simulated via
+ * shader code, due to a mismatch between the API and the hardware. This
+ * is often referred to as "non-orthagonal state" or "NOS". We store NOS
+ * in the program key so it's considered when searching for a program. If
+ * we haven't seen a particular combination before, we have to recompile a
+ * new specialized version.
+ *
+ * Shader compilation should not look up state in gl_context directly, but
+ * instead use the copy in the program key. This guarantees recompiles will
+ * happen correctly.
+ *
+ * @{
+ */
+
+enum PACKED gen6_gather_sampler_wa {
+ WA_SIGN = 1, /* whether we need to sign extend */
+ WA_8BIT = 2, /* if we have an 8bit format needing wa */
+ WA_16BIT = 4, /* if we have a 16bit format needing wa */
+};
+
+/**
+ * Sampler information needed by VS, WM, and GS program cache keys.
+ */
+struct brw_sampler_prog_key_data {
+ /**
+ * EXT_texture_swizzle and DEPTH_TEXTURE_MODE swizzles.
+ */
+ uint16_t swizzles[MAX_SAMPLERS];
+
+ uint32_t gl_clamp_mask[3];
+
+ /**
+ * For RG32F, gather4's channel select is broken.
+ */
+ uint32_t gather_channel_quirk_mask;
+
+ /**
+ * Whether this sampler uses the compressed multisample surface layout.
+ */
+ uint32_t compressed_multisample_layout_mask;
+
+ /**
+ * For Sandybridge, which shader w/a we need for gather quirks.
+ */
+ enum gen6_gather_sampler_wa gen6_gather_wa[MAX_SAMPLERS];
+};
+
+
+/** The program key for Vertex Shaders. */
+struct brw_vs_prog_key {
+ unsigned program_string_id;
+
+ /*
+ * Per-attribute workaround flags
+ */
+ uint8_t gl_attrib_wa_flags[VERT_ATTRIB_MAX];
+
+ bool copy_edgeflag:1;
+
+ bool clamp_vertex_color:1;
+
+ /**
+ * How many user clipping planes are being uploaded to the vertex shader as
+ * push constants.
+ *
+ * These are used for lowering legacy gl_ClipVertex/gl_Position clipping to
+ * clip distances.
+ */
+ unsigned nr_userclip_plane_consts:4;
+
+ /**
+ * For pre-Gen6 hardware, a bitfield indicating which texture coordinates
+ * are going to be replaced with point coordinates (as a consequence of a
+ * call to glTexEnvi(GL_POINT_SPRITE, GL_COORD_REPLACE, GL_TRUE)). Because
+ * our SF thread requires exact matching between VS outputs and FS inputs,
+ * these texture coordinates will need to be unconditionally included in
+ * the VUE, even if they aren't written by the vertex shader.
+ */
+ uint8_t point_coord_replace;
+
+ struct brw_sampler_prog_key_data tex;
+};
+
+/** The program key for Geometry Shaders. */
+struct brw_gs_prog_key
+{
+ unsigned program_string_id;
+
+ struct brw_sampler_prog_key_data tex;
+};
+
+/** The program key for Fragment/Pixel Shaders. */
+struct brw_wm_prog_key {
+ uint8_t iz_lookup;
+ bool stats_wm:1;
+ bool flat_shade:1;
+ bool persample_shading:1;
+ bool persample_2x:1;
+ unsigned nr_color_regions:5;
+ bool replicate_alpha:1;
+ bool render_to_fbo:1;
+ bool clamp_fragment_color:1;
+ bool compute_pos_offset:1;
+ bool compute_sample_id:1;
+ unsigned line_aa:2;
+ bool high_quality_derivatives:1;
+
+ uint16_t drawable_height;
+ uint64_t input_slots_valid;
+ unsigned program_string_id;
+ GLenum alpha_test_func; /* < For Gen4/5 MRT alpha test */
+ float alpha_test_ref;
+
+ struct brw_sampler_prog_key_data tex;
+};
+
+struct brw_cs_prog_key {
+ uint32_t program_string_id;
+ struct brw_sampler_prog_key_data tex;
+};
+
+/*
+ * Image metadata structure as laid out in the shader parameter
+ * buffer. Entries have to be 16B-aligned for the vec4 back-end to be
+ * able to use them. That's okay because the padding and any unused
+ * entries [most of them except when we're doing untyped surface
+ * access] will be removed by the uniform packing pass.
+ */
+#define BRW_IMAGE_PARAM_SURFACE_IDX_OFFSET 0
+#define BRW_IMAGE_PARAM_OFFSET_OFFSET 4
+#define BRW_IMAGE_PARAM_SIZE_OFFSET 8
+#define BRW_IMAGE_PARAM_STRIDE_OFFSET 12
+#define BRW_IMAGE_PARAM_TILING_OFFSET 16
+#define BRW_IMAGE_PARAM_SWIZZLING_OFFSET 20
+#define BRW_IMAGE_PARAM_SIZE 24
+
+struct brw_image_param {
+ /** Surface binding table index. */
+ uint32_t surface_idx;
+
+ /** Offset applied to the X and Y surface coordinates. */
+ uint32_t offset[2];
+
+ /** Surface X, Y and Z dimensions. */
+ uint32_t size[3];
+
+ /** X-stride in bytes, Y-stride in pixels, horizontal slice stride in
+ * pixels, vertical slice stride in pixels.
+ */
+ uint32_t stride[4];
+
+ /** Log2 of the tiling modulus in the X, Y and Z dimension. */
+ uint32_t tiling[3];
+
+ /**
+ * Right shift to apply for bit 6 address swizzling. Two different
+ * swizzles can be specified and will be applied one after the other. The
+ * resulting address will be:
+ *
+ * addr' = addr ^ ((1 << 6) & ((addr >> swizzling[0]) ^
+ * (addr >> swizzling[1])))
+ *
+ * Use \c 0xff if any of the swizzles is not required.
+ */
+ uint32_t swizzling[2];
+};
+
+struct brw_stage_prog_data {
+ struct {
+ /** size of our binding table. */
+ uint32_t size_bytes;
+
+ /** @{
+ * surface indices for the various groups of surfaces
+ */
+ uint32_t pull_constants_start;
+ uint32_t texture_start;
+ uint32_t gather_texture_start;
+ uint32_t ubo_start;
+ uint32_t ssbo_start;
+ uint32_t abo_start;
+ uint32_t image_start;
+ uint32_t shader_time_start;
+ /** @} */
+ } binding_table;
+
+ GLuint nr_params; /**< number of float params/constants */
+ GLuint nr_pull_params;
+ unsigned nr_image_params;
+
+ unsigned curb_read_length;
+ unsigned total_scratch;
+
+ /**
+ * Register where the thread expects to find input data from the URB
+ * (typically uniforms, followed by vertex or fragment attributes).
+ */
+ unsigned dispatch_grf_start_reg;
+
+ bool use_alt_mode; /**< Use ALT floating point mode? Otherwise, IEEE. */
+
+ /* Pointers to tracked values (only valid once
+ * _mesa_load_state_parameters has been called at runtime).
+ */
+ const union gl_constant_value **param;
+ const union gl_constant_value **pull_param;
+
+ /** Image metadata passed to the shader as uniforms. */
+ struct brw_image_param *image_param;
+};
+
+/* Data about a particular attempt to compile a program. Note that
+ * there can be many of these, each in a different GL state
+ * corresponding to a different brw_wm_prog_key struct, with different
+ * compiled programs.
+ */
+struct brw_wm_prog_data {
+ struct brw_stage_prog_data base;
+
+ GLuint num_varying_inputs;
+
+ GLuint dispatch_grf_start_reg_16;
+ GLuint reg_blocks;
+ GLuint reg_blocks_16;
+
+ struct {
+ /** @{
+ * surface indices the WM-specific surfaces
+ */
+ uint32_t render_target_start;
+ /** @} */
+ } binding_table;
+
+ uint8_t computed_depth_mode;
+
+ bool early_fragment_tests;
+ bool no_8;
+ bool dual_src_blend;
+ bool uses_pos_offset;
+ bool uses_omask;
+ bool uses_kill;
+ bool pulls_bary;
+ uint32_t prog_offset_16;
+
+ /**
+ * Mask of which interpolation modes are required by the fragment shader.
+ * Used in hardware setup on gen6+.
+ */
+ uint32_t barycentric_interp_modes;
+
+ /**
+ * Map from gl_varying_slot to the position within the FS setup data
+ * payload where the varying's attribute vertex deltas should be delivered.
+ * For varying slots that are not used by the FS, the value is -1.
+ */
+ int urb_setup[VARYING_SLOT_MAX];
+};
+
+struct brw_cs_prog_data {
+ struct brw_stage_prog_data base;
+
+ GLuint dispatch_grf_start_reg_16;
+ unsigned local_size[3];
+ unsigned simd_size;
+ bool uses_barrier;
+ bool uses_num_work_groups;
+ unsigned local_invocation_id_regs;
+
+ struct {
+ /** @{
+ * surface indices the CS-specific surfaces
+ */
+ uint32_t work_groups_start;
+ /** @} */
+ } binding_table;
+};
+
+/**
+ * Enum representing the i965-specific vertex results that don't correspond
+ * exactly to any element of gl_varying_slot. The values of this enum are
+ * assigned such that they don't conflict with gl_varying_slot.
+ */
+typedef enum
+{
+ BRW_VARYING_SLOT_NDC = VARYING_SLOT_MAX,
+ BRW_VARYING_SLOT_PAD,
+ /**
+ * Technically this is not a varying but just a placeholder that
+ * compile_sf_prog() inserts into its VUE map to cause the gl_PointCoord
+ * builtin variable to be compiled correctly. see compile_sf_prog() for
+ * more info.
+ */
+ BRW_VARYING_SLOT_PNTC,
+ BRW_VARYING_SLOT_COUNT
+} brw_varying_slot;
+
+/**
+ * Data structure recording the relationship between the gl_varying_slot enum
+ * and "slots" within the vertex URB entry (VUE). A "slot" is defined as a
+ * single octaword within the VUE (128 bits).
+ *
+ * Note that each BRW register contains 256 bits (2 octawords), so when
+ * accessing the VUE in URB_NOSWIZZLE mode, each register corresponds to two
+ * consecutive VUE slots. When accessing the VUE in URB_INTERLEAVED mode (as
+ * in a vertex shader), each register corresponds to a single VUE slot, since
+ * it contains data for two separate vertices.
+ */
+struct brw_vue_map {
+ /**
+ * Bitfield representing all varying slots that are (a) stored in this VUE
+ * map, and (b) actually written by the shader. Does not include any of
+ * the additional varying slots defined in brw_varying_slot.
+ */
+ GLbitfield64 slots_valid;
+
+ /**
+ * Is this VUE map for a separate shader pipeline?
+ *
+ * Separable programs (GL_ARB_separate_shader_objects) can be mixed and matched
+ * without the linker having a chance to dead code eliminate unused varyings.
+ *
+ * This means that we have to use a fixed slot layout, based on the output's
+ * location field, rather than assigning slots in a compact contiguous block.
+ */
+ bool separate;
+
+ /**
+ * Map from gl_varying_slot value to VUE slot. For gl_varying_slots that are
+ * not stored in a slot (because they are not written, or because
+ * additional processing is applied before storing them in the VUE), the
+ * value is -1.
+ */
+ signed char varying_to_slot[BRW_VARYING_SLOT_COUNT];
+
+ /**
+ * Map from VUE slot to gl_varying_slot value. For slots that do not
+ * directly correspond to a gl_varying_slot, the value comes from
+ * brw_varying_slot.
+ *
+ * For slots that are not in use, the value is BRW_VARYING_SLOT_COUNT (this
+ * simplifies code that uses the value stored in slot_to_varying to
+ * create a bit mask).
+ */
+ signed char slot_to_varying[BRW_VARYING_SLOT_COUNT];
+
+ /**
+ * Total number of VUE slots in use
+ */
+ int num_slots;
+};
+
+/**
+ * Convert a VUE slot number into a byte offset within the VUE.
+ */
+static inline GLuint brw_vue_slot_to_offset(GLuint slot)
+{
+ return 16*slot;
+}
+
+/**
+ * Convert a vertex output (brw_varying_slot) into a byte offset within the
+ * VUE.
+ */
+static inline GLuint brw_varying_to_offset(struct brw_vue_map *vue_map,
+ GLuint varying)
+{
+ return brw_vue_slot_to_offset(vue_map->varying_to_slot[varying]);
+}
+
+void brw_compute_vue_map(const struct brw_device_info *devinfo,
+ struct brw_vue_map *vue_map,
+ GLbitfield64 slots_valid,
+ bool separate_shader);
+
+enum shader_dispatch_mode {
+ DISPATCH_MODE_4X1_SINGLE = 0,
+ DISPATCH_MODE_4X2_DUAL_INSTANCE = 1,
+ DISPATCH_MODE_4X2_DUAL_OBJECT = 2,
+ DISPATCH_MODE_SIMD8 = 3,
+};
+
+struct brw_vue_prog_data {
+ struct brw_stage_prog_data base;
+ struct brw_vue_map vue_map;
+
+ GLuint urb_read_length;
+ GLuint total_grf;
+
+ /* Used for calculating urb partitions. In the VS, this is the size of the
+ * URB entry used for both input and output to the thread. In the GS, this
+ * is the size of the URB entry used for output.
+ */
+ GLuint urb_entry_size;
+
+ enum shader_dispatch_mode dispatch_mode;
+};
+
+struct brw_vs_prog_data {
+ struct brw_vue_prog_data base;
+
+ GLbitfield64 inputs_read;
+
+ unsigned nr_attributes;
+
+ bool uses_vertexid;
+ bool uses_instanceid;
+};
+
+struct brw_gs_prog_data
+{
+ struct brw_vue_prog_data base;
+
+ /**
+ * Size of an output vertex, measured in HWORDS (32 bytes).
+ */
+ unsigned output_vertex_size_hwords;
+
+ unsigned output_topology;
+
+ /**
+ * Size of the control data (cut bits or StreamID bits), in hwords (32
+ * bytes). 0 if there is no control data.
+ */
+ unsigned control_data_header_size_hwords;
+
+ /**
+ * Format of the control data (either GEN7_GS_CONTROL_DATA_FORMAT_GSCTL_SID
+ * if the control data is StreamID bits, or
+ * GEN7_GS_CONTROL_DATA_FORMAT_GSCTL_CUT if the control data is cut bits).
+ * Ignored if control_data_header_size is 0.
+ */
+ unsigned control_data_format;
+
+ bool include_primitive_id;
+
+ /**
+ * The number of vertices emitted, if constant - otherwise -1.
+ */
+ int static_vertex_count;
+
+ int invocations;
+
+ /**
+ * Gen6 transform feedback enabled flag.
+ */
+ bool gen6_xfb_enabled;
+
+ /**
+ * Gen6: Provoking vertex convention for odd-numbered triangles
+ * in tristrips.
+ */
+ GLuint pv_first:1;
+
+ /**
+ * Gen6: Number of varyings that are output to transform feedback.
+ */
+ GLuint num_transform_feedback_bindings:7; /* 0-BRW_MAX_SOL_BINDINGS */
+
+ /**
+ * Gen6: Map from the index of a transform feedback binding table entry to the
+ * gl_varying_slot that should be streamed out through that binding table
+ * entry.
+ */
+ unsigned char transform_feedback_bindings[64 /* BRW_MAX_SOL_BINDINGS */];
+
+ /**
+ * Gen6: Map from the index of a transform feedback binding table entry to the
+ * swizzles that should be used when streaming out data through that
+ * binding table entry.
+ */
+ unsigned char transform_feedback_swizzles[64 /* BRW_MAX_SOL_BINDINGS */];
+};
+
+
+/** @} */
+
+/**
+ * Compile a vertex shader.
+ *
+ * Returns the final assembly and the program's size.
+ */
+const unsigned *
+brw_compile_vs(const struct brw_compiler *compiler, void *log_data,
+ void *mem_ctx,
+ const struct brw_vs_prog_key *key,
+ struct brw_vs_prog_data *prog_data,
+ const struct nir_shader *shader,
+ gl_clip_plane *clip_planes,
+ bool use_legacy_snorm_formula,
+ int shader_time_index,
+ unsigned *final_assembly_size,
+ char **error_str);
+
+/**
+ * Scratch data used when compiling a GLSL geometry shader.
+ */
+struct brw_gs_compile
+{
+ struct brw_gs_prog_key key;
+ struct brw_gs_prog_data prog_data;
+ struct brw_vue_map input_vue_map;
+
+ struct brw_geometry_program *gp;
+
+ unsigned control_data_bits_per_vertex;
+ unsigned control_data_header_size_bits;
+};
+
+/**
+ * Compile a vertex shader.
+ *
+ * Returns the final assembly and the program's size.
+ */
+const unsigned *
+brw_compile_gs(const struct brw_compiler *compiler, void *log_data,
+ struct brw_gs_compile *c,
+ const struct nir_shader *shader,
+ struct gl_shader_program *shader_prog,
+ void *mem_ctx,
+ int shader_time_index,
+ unsigned *final_assembly_size,
+ char **error_str);
+
+/**
+ * Compile a fragment shader.
+ *
+ * Returns the final assembly and the program's size.
+ */
+const unsigned *
+brw_compile_fs(const struct brw_compiler *compiler, void *log_data,
+ void *mem_ctx,
+ const struct brw_wm_prog_key *key,
+ struct brw_wm_prog_data *prog_data,
+ const struct nir_shader *shader,
+ struct gl_program *prog,
+ int shader_time_index8,
+ int shader_time_index16,
+ bool use_rep_send,
+ unsigned *final_assembly_size,
+ char **error_str);
+
+/**
+ * Compile a compute shader.
+ *
+ * Returns the final assembly and the program's size.
+ */
+const unsigned *
+brw_compile_cs(const struct brw_compiler *compiler, void *log_data,
+ void *mem_ctx,
+ const struct brw_cs_prog_key *key,
+ struct brw_cs_prog_data *prog_data,
+ const struct nir_shader *shader,
+ int shader_time_index,
+ unsigned *final_assembly_size,
+ char **error_str);
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif
diff --git a/src/mesa/drivers/dri/i965/brw_context.c b/src/mesa/drivers/dri/i965/brw_context.c
index 6b2bbd21703..3b125448e14 100644
--- a/src/mesa/drivers/dri/i965/brw_context.c
+++ b/src/mesa/drivers/dri/i965/brw_context.c
@@ -51,7 +51,7 @@
#include "brw_context.h"
#include "brw_defines.h"
-#include "brw_shader.h"
+#include "brw_compiler.h"
#include "brw_draw.h"
#include "brw_state.h"
diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h
index aa1284db3ce..4f503ae4869 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -40,6 +40,7 @@
#include "main/mm.h"
#include "main/mtypes.h"
#include "brw_structs.h"
+#include "brw_compiler.h"
#include "intel_aub.h"
#include "program/prog_parameter.h"
@@ -340,260 +341,6 @@ struct brw_shader {
bool compiled_once;
};
-struct brw_stage_prog_data {
- struct {
- /** size of our binding table. */
- uint32_t size_bytes;
-
- /** @{
- * surface indices for the various groups of surfaces
- */
- uint32_t pull_constants_start;
- uint32_t texture_start;
- uint32_t gather_texture_start;
- uint32_t ubo_start;
- uint32_t abo_start;
- uint32_t image_start;
- uint32_t shader_time_start;
- /** @} */
- } binding_table;
-
- GLuint nr_params; /**< number of float params/constants */
- GLuint nr_pull_params;
- unsigned nr_image_params;
-
- unsigned curb_read_length;
- unsigned total_scratch;
-
- /**
- * Register where the thread expects to find input data from the URB
- * (typically uniforms, followed by vertex or fragment attributes).
- */
- unsigned dispatch_grf_start_reg;
-
- bool use_alt_mode; /**< Use ALT floating point mode? Otherwise, IEEE. */
-
- /* Pointers to tracked values (only valid once
- * _mesa_load_state_parameters has been called at runtime).
- */
- const gl_constant_value **param;
- const gl_constant_value **pull_param;
-
- /** Image metadata passed to the shader as uniforms. */
- struct brw_image_param *image_param;
-};
-
-/*
- * Image metadata structure as laid out in the shader parameter
- * buffer. Entries have to be 16B-aligned for the vec4 back-end to be
- * able to use them. That's okay because the padding and any unused
- * entries [most of them except when we're doing untyped surface
- * access] will be removed by the uniform packing pass.
- */
-#define BRW_IMAGE_PARAM_SURFACE_IDX_OFFSET 0
-#define BRW_IMAGE_PARAM_OFFSET_OFFSET 4
-#define BRW_IMAGE_PARAM_SIZE_OFFSET 8
-#define BRW_IMAGE_PARAM_STRIDE_OFFSET 12
-#define BRW_IMAGE_PARAM_TILING_OFFSET 16
-#define BRW_IMAGE_PARAM_SWIZZLING_OFFSET 20
-#define BRW_IMAGE_PARAM_SIZE 24
-
-struct brw_image_param {
- /** Surface binding table index. */
- uint32_t surface_idx;
-
- /** Offset applied to the X and Y surface coordinates. */
- uint32_t offset[2];
-
- /** Surface X, Y and Z dimensions. */
- uint32_t size[3];
-
- /** X-stride in bytes, Y-stride in pixels, horizontal slice stride in
- * pixels, vertical slice stride in pixels.
- */
- uint32_t stride[4];
-
- /** Log2 of the tiling modulus in the X, Y and Z dimension. */
- uint32_t tiling[3];
-
- /**
- * Right shift to apply for bit 6 address swizzling. Two different
- * swizzles can be specified and will be applied one after the other. The
- * resulting address will be:
- *
- * addr' = addr ^ ((1 << 6) & ((addr >> swizzling[0]) ^
- * (addr >> swizzling[1])))
- *
- * Use \c 0xff if any of the swizzles is not required.
- */
- uint32_t swizzling[2];
-};
-
-/* Data about a particular attempt to compile a program. Note that
- * there can be many of these, each in a different GL state
- * corresponding to a different brw_wm_prog_key struct, with different
- * compiled programs.
- */
-struct brw_wm_prog_data {
- struct brw_stage_prog_data base;
-
- GLuint num_varying_inputs;
-
- GLuint dispatch_grf_start_reg_16;
- GLuint reg_blocks;
- GLuint reg_blocks_16;
-
- struct {
- /** @{
- * surface indices the WM-specific surfaces
- */
- uint32_t render_target_start;
- /** @} */
- } binding_table;
-
- uint8_t computed_depth_mode;
-
- bool early_fragment_tests;
- bool no_8;
- bool dual_src_blend;
- bool uses_pos_offset;
- bool uses_omask;
- bool uses_kill;
- bool pulls_bary;
- uint32_t prog_offset_16;
-
- /**
- * Mask of which interpolation modes are required by the fragment shader.
- * Used in hardware setup on gen6+.
- */
- uint32_t barycentric_interp_modes;
-
- /**
- * Map from gl_varying_slot to the position within the FS setup data
- * payload where the varying's attribute vertex deltas should be delivered.
- * For varying slots that are not used by the FS, the value is -1.
- */
- int urb_setup[VARYING_SLOT_MAX];
-};
-
-struct brw_cs_prog_data {
- struct brw_stage_prog_data base;
-
- GLuint dispatch_grf_start_reg_16;
- unsigned local_size[3];
- unsigned simd_size;
- bool uses_barrier;
- bool uses_num_work_groups;
- unsigned local_invocation_id_regs;
-
- struct {
- /** @{
- * surface indices the CS-specific surfaces
- */
- uint32_t work_groups_start;
- /** @} */
- } binding_table;
-};
-
-/**
- * Enum representing the i965-specific vertex results that don't correspond
- * exactly to any element of gl_varying_slot. The values of this enum are
- * assigned such that they don't conflict with gl_varying_slot.
- */
-typedef enum
-{
- BRW_VARYING_SLOT_NDC = VARYING_SLOT_MAX,
- BRW_VARYING_SLOT_PAD,
- /**
- * Technically this is not a varying but just a placeholder that
- * compile_sf_prog() inserts into its VUE map to cause the gl_PointCoord
- * builtin variable to be compiled correctly. see compile_sf_prog() for
- * more info.
- */
- BRW_VARYING_SLOT_PNTC,
- BRW_VARYING_SLOT_COUNT
-} brw_varying_slot;
-
-
-/**
- * Data structure recording the relationship between the gl_varying_slot enum
- * and "slots" within the vertex URB entry (VUE). A "slot" is defined as a
- * single octaword within the VUE (128 bits).
- *
- * Note that each BRW register contains 256 bits (2 octawords), so when
- * accessing the VUE in URB_NOSWIZZLE mode, each register corresponds to two
- * consecutive VUE slots. When accessing the VUE in URB_INTERLEAVED mode (as
- * in a vertex shader), each register corresponds to a single VUE slot, since
- * it contains data for two separate vertices.
- */
-struct brw_vue_map {
- /**
- * Bitfield representing all varying slots that are (a) stored in this VUE
- * map, and (b) actually written by the shader. Does not include any of
- * the additional varying slots defined in brw_varying_slot.
- */
- GLbitfield64 slots_valid;
-
- /**
- * Is this VUE map for a separate shader pipeline?
- *
- * Separable programs (GL_ARB_separate_shader_objects) can be mixed and matched
- * without the linker having a chance to dead code eliminate unused varyings.
- *
- * This means that we have to use a fixed slot layout, based on the output's
- * location field, rather than assigning slots in a compact contiguous block.
- */
- bool separate;
-
- /**
- * Map from gl_varying_slot value to VUE slot. For gl_varying_slots that are
- * not stored in a slot (because they are not written, or because
- * additional processing is applied before storing them in the VUE), the
- * value is -1.
- */
- signed char varying_to_slot[BRW_VARYING_SLOT_COUNT];
-
- /**
- * Map from VUE slot to gl_varying_slot value. For slots that do not
- * directly correspond to a gl_varying_slot, the value comes from
- * brw_varying_slot.
- *
- * For slots that are not in use, the value is BRW_VARYING_SLOT_COUNT (this
- * simplifies code that uses the value stored in slot_to_varying to
- * create a bit mask).
- */
- signed char slot_to_varying[BRW_VARYING_SLOT_COUNT];
-
- /**
- * Total number of VUE slots in use
- */
- int num_slots;
-};
-
-/**
- * Convert a VUE slot number into a byte offset within the VUE.
- */
-static inline GLuint brw_vue_slot_to_offset(GLuint slot)
-{
- return 16*slot;
-}
-
-/**
- * Convert a vertex output (brw_varying_slot) into a byte offset within the
- * VUE.
- */
-static inline GLuint brw_varying_to_offset(struct brw_vue_map *vue_map,
- GLuint varying)
-{
- return brw_vue_slot_to_offset(vue_map->varying_to_slot[varying]);
-}
-
-void brw_compute_vue_map(const struct brw_device_info *devinfo,
- struct brw_vue_map *vue_map,
- GLbitfield64 slots_valid,
- bool separate_shader);
-
-
/**
* Bitmask indicating which fragment shader inputs represent varyings (and
* hence have to be delivered to the fragment shader by the SF/SBE stage).
@@ -670,39 +417,6 @@ struct brw_ff_gs_prog_data {
unsigned svbi_postincrement_value;
};
-enum shader_dispatch_mode {
- DISPATCH_MODE_4X1_SINGLE = 0,
- DISPATCH_MODE_4X2_DUAL_INSTANCE = 1,
- DISPATCH_MODE_4X2_DUAL_OBJECT = 2,
- DISPATCH_MODE_SIMD8 = 3,
-};
-
-struct brw_vue_prog_data {
- struct brw_stage_prog_data base;
- struct brw_vue_map vue_map;
-
- GLuint urb_read_length;
- GLuint total_grf;
-
- /* Used for calculating urb partitions. In the VS, this is the size of the
- * URB entry used for both input and output to the thread. In the GS, this
- * is the size of the URB entry used for output.
- */
- GLuint urb_entry_size;
-
- enum shader_dispatch_mode dispatch_mode;
-};
-
-
-struct brw_vs_prog_data {
- struct brw_vue_prog_data base;
-
- GLbitfield64 inputs_read;
-
- bool uses_vertexid;
- bool uses_instanceid;
-};
-
/** Number of texture sampler units */
#define BRW_MAX_TEX_UNIT 32
@@ -715,9 +429,6 @@ struct brw_vs_prog_data {
/** Max number of SSBOs in a shader */
#define BRW_MAX_SSBO 12
-/** Max number of combined UBOs and SSBOs in a shader */
-#define BRW_MAX_COMBINED_UBO_SSBO (BRW_MAX_UBO + BRW_MAX_SSBO)
-
/** Max number of atomic counter buffer objects in a shader */
#define BRW_MAX_ABO 16
@@ -763,71 +474,6 @@ struct brw_vs_prog_data {
#define SURF_INDEX_GEN6_SOL_BINDING(t) (t)
-struct brw_gs_prog_data
-{
- struct brw_vue_prog_data base;
-
- /**
- * Size of an output vertex, measured in HWORDS (32 bytes).
- */
- unsigned output_vertex_size_hwords;
-
- unsigned output_topology;
-
- /**
- * Size of the control data (cut bits or StreamID bits), in hwords (32
- * bytes). 0 if there is no control data.
- */
- unsigned control_data_header_size_hwords;
-
- /**
- * Format of the control data (either GEN7_GS_CONTROL_DATA_FORMAT_GSCTL_SID
- * if the control data is StreamID bits, or
- * GEN7_GS_CONTROL_DATA_FORMAT_GSCTL_CUT if the control data is cut bits).
- * Ignored if control_data_header_size is 0.
- */
- unsigned control_data_format;
-
- bool include_primitive_id;
-
- /**
- * The number of vertices emitted, if constant - otherwise -1.
- */
- int static_vertex_count;
-
- int invocations;
-
- /**
- * Gen6 transform feedback enabled flag.
- */
- bool gen6_xfb_enabled;
-
- /**
- * Gen6: Provoking vertex convention for odd-numbered triangles
- * in tristrips.
- */
- GLuint pv_first:1;
-
- /**
- * Gen6: Number of varyings that are output to transform feedback.
- */
- GLuint num_transform_feedback_bindings:7; /* 0-BRW_MAX_SOL_BINDINGS */
-
- /**
- * Gen6: Map from the index of a transform feedback binding table entry to the
- * gl_varying_slot that should be streamed out through that binding table
- * entry.
- */
- unsigned char transform_feedback_bindings[BRW_MAX_SOL_BINDINGS];
-
- /**
- * Gen6: Map from the index of a transform feedback binding table entry to the
- * swizzles that should be used when streaming out data through that
- * binding table entry.
- */
- unsigned char transform_feedback_swizzles[BRW_MAX_SOL_BINDINGS];
-};
-
/**
* Stride in bytes between shader_time entries.
*
@@ -953,6 +599,8 @@ struct intel_batchbuffer {
} saved;
};
+#define MAX_GS_INPUT_VERTICES 6
+
#define BRW_MAX_XFB_STREAMS 4
struct brw_transform_feedback_object {
diff --git a/src/mesa/drivers/dri/i965/brw_cs.c b/src/mesa/drivers/dri/i965/brw_cs.c
index 45fb816c160..263d224e882 100644
--- a/src/mesa/drivers/dri/i965/brw_cs.c
+++ b/src/mesa/drivers/dri/i965/brw_cs.c
@@ -105,9 +105,15 @@ brw_codegen_cs_prog(struct brw_context *brw,
if (INTEL_DEBUG & DEBUG_SHADER_TIME)
st_index = brw_get_shader_time_index(brw, prog, &cp->program.Base, ST_CS);
- program = brw_cs_emit(brw, mem_ctx, key, &prog_data,
- &cp->program, prog, st_index, &program_size);
+ char *error_str;
+ program = brw_compile_cs(brw->intelScreen->compiler, brw, mem_ctx,
+ key, &prog_data, cp->program.Base.nir,
+ st_index, &program_size, &error_str);
if (program == NULL) {
+ prog->LinkStatus = false;
+ ralloc_strcat(&prog->InfoLog, error_str);
+ _mesa_problem(NULL, "Failed to compile compute shader: %s\n", error_str);
+
ralloc_free(mem_ctx);
return false;
}
diff --git a/src/mesa/drivers/dri/i965/brw_cs.h b/src/mesa/drivers/dri/i965/brw_cs.h
index 17c2ff9871a..899e340f14e 100644
--- a/src/mesa/drivers/dri/i965/brw_cs.h
+++ b/src/mesa/drivers/dri/i965/brw_cs.h
@@ -27,11 +27,6 @@
#include "brw_program.h"
-struct brw_cs_prog_key {
- uint32_t program_string_id;
- struct brw_sampler_prog_key_data tex;
-};
-
#ifdef __cplusplus
extern "C" {
#endif
@@ -39,16 +34,6 @@ extern "C" {
void
brw_upload_cs_prog(struct brw_context *brw);
-const unsigned *
-brw_cs_emit(struct brw_context *brw,
- void *mem_ctx,
- const struct brw_cs_prog_key *key,
- struct brw_cs_prog_data *prog_data,
- struct gl_compute_program *cp,
- struct gl_shader_program *prog,
- int shader_time_index,
- unsigned *final_assembly_size);
-
void
brw_cs_fill_local_id_payload(const struct brw_cs_prog_data *cs_prog_data,
void *buffer, uint32_t threads, uint32_t stride);
diff --git a/src/mesa/drivers/dri/i965/brw_cubemap_normalize.cpp b/src/mesa/drivers/dri/i965/brw_cubemap_normalize.cpp
index 33571292007..33d2048e657 100644
--- a/src/mesa/drivers/dri/i965/brw_cubemap_normalize.cpp
+++ b/src/mesa/drivers/dri/i965/brw_cubemap_normalize.cpp
@@ -30,7 +30,7 @@
* \author Eric Anholt <[email protected]>
*/
-#include "glsl/glsl_types.h"
+#include "glsl/nir/glsl_types.h"
#include "glsl/ir.h"
#include "program/prog_instruction.h" /* For WRITEMASK_* */
diff --git a/src/mesa/drivers/dri/i965/brw_eu.h b/src/mesa/drivers/dri/i965/brw_eu.h
index 761aa0ec5fa..0ac1ad9378b 100644
--- a/src/mesa/drivers/dri/i965/brw_eu.h
+++ b/src/mesa/drivers/dri/i965/brw_eu.h
@@ -461,7 +461,7 @@ brw_pixel_interpolator_query(struct brw_codegen *p,
struct brw_reg mrf,
bool noperspective,
unsigned mode,
- unsigned data,
+ struct brw_reg data,
unsigned msg_length,
unsigned response_length);
diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c b/src/mesa/drivers/dri/i965/brw_eu_emit.c
index dc699bb6321..bf2fee9ed48 100644
--- a/src/mesa/drivers/dri/i965/brw_eu_emit.c
+++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c
@@ -3212,26 +3212,29 @@ brw_pixel_interpolator_query(struct brw_codegen *p,
struct brw_reg mrf,
bool noperspective,
unsigned mode,
- unsigned data,
+ struct brw_reg data,
unsigned msg_length,
unsigned response_length)
{
const struct brw_device_info *devinfo = p->devinfo;
- struct brw_inst *insn = next_insn(p, BRW_OPCODE_SEND);
-
- brw_set_dest(p, insn, dest);
- brw_set_src0(p, insn, mrf);
- brw_set_message_descriptor(p, insn, GEN7_SFID_PIXEL_INTERPOLATOR,
- msg_length, response_length,
- false /* header is never present for PI */,
- false);
+ struct brw_inst *insn;
+ const uint16_t exec_size = brw_inst_exec_size(devinfo, p->current);
- brw_inst_set_pi_simd_mode(
- devinfo, insn, brw_inst_exec_size(devinfo, insn) == BRW_EXECUTE_16);
+ /* brw_send_indirect_message will automatically use a direct send message
+ * if data is actually immediate.
+ */
+ insn = brw_send_indirect_message(p,
+ GEN7_SFID_PIXEL_INTERPOLATOR,
+ dest,
+ mrf,
+ vec1(data));
+ brw_inst_set_mlen(devinfo, insn, msg_length);
+ brw_inst_set_rlen(devinfo, insn, response_length);
+
+ brw_inst_set_pi_simd_mode(devinfo, insn, exec_size == BRW_EXECUTE_16);
brw_inst_set_pi_slot_group(devinfo, insn, 0); /* zero unless 32/64px dispatch */
brw_inst_set_pi_nopersp(devinfo, insn, noperspective);
brw_inst_set_pi_message_type(devinfo, insn, mode);
- brw_inst_set_pi_message_data(devinfo, insn, data);
}
void
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index 5049851c617..0562c5a9981 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -47,7 +47,7 @@
#include "brw_dead_control_flow.h"
#include "main/uniforms.h"
#include "brw_fs_live_variables.h"
-#include "glsl/glsl_types.h"
+#include "glsl/nir/glsl_types.h"
#include "program/sampler.h"
using namespace brw;
@@ -338,6 +338,18 @@ fs_inst::can_do_source_mods(const struct brw_device_info *devinfo)
}
bool
+fs_inst::can_change_types() const
+{
+ return dst.type == src[0].type &&
+ !src[0].abs && !src[0].negate && !saturate &&
+ (opcode == BRW_OPCODE_MOV ||
+ (opcode == BRW_OPCODE_SEL &&
+ dst.type == src[1].type &&
+ predicate != BRW_PREDICATE_NONE &&
+ !src[1].abs && !src[1].negate));
+}
+
+bool
fs_inst::has_side_effects() const
{
return this->eot || backend_instruction::has_side_effects();
@@ -1049,11 +1061,11 @@ fs_visitor::emit_general_interpolation(fs_reg attr, const char *name,
unsigned int array_elements;
if (type->is_array()) {
- array_elements = type->length;
+ array_elements = type->arrays_of_arrays_size();
if (array_elements == 0) {
fail("dereferenced array '%s' has length 0\n", name);
}
- type = type->fields.array;
+ type = type->without_array();
} else {
array_elements = 1;
}
@@ -1509,25 +1521,14 @@ void
fs_visitor::assign_vs_urb_setup()
{
brw_vs_prog_data *vs_prog_data = (brw_vs_prog_data *) prog_data;
- int grf, count, slot, channel, attr;
assert(stage == MESA_SHADER_VERTEX);
- count = _mesa_bitcount_64(vs_prog_data->inputs_read);
+ int count = _mesa_bitcount_64(vs_prog_data->inputs_read);
if (vs_prog_data->uses_vertexid || vs_prog_data->uses_instanceid)
count++;
/* Each attribute is 4 regs. */
- this->first_non_payload_grf += count * 4;
-
- unsigned vue_entries =
- MAX2(count, vs_prog_data->base.vue_map.num_slots);
-
- /* URB entry size is counted in units of 64 bytes (for the 3DSTATE_URB_VS
- * command). Each attribute is 16 bytes (4 floats/dwords), so each unit
- * fits four attributes.
- */
- vs_prog_data->base.urb_entry_size = ALIGN(vue_entries, 4) / 4;
- vs_prog_data->base.urb_read_length = (count + 1) / 2;
+ this->first_non_payload_grf += 4 * vs_prog_data->nr_attributes;
assert(vs_prog_data->base.urb_read_length <= 15);
@@ -1535,25 +1536,10 @@ fs_visitor::assign_vs_urb_setup()
foreach_block_and_inst(block, fs_inst, inst, cfg) {
for (int i = 0; i < inst->sources; i++) {
if (inst->src[i].file == ATTR) {
-
- if (inst->src[i].reg == VERT_ATTRIB_MAX) {
- slot = count - 1;
- } else {
- /* Attributes come in in a contiguous block, ordered by their
- * gl_vert_attrib value. That means we can compute the slot
- * number for an attribute by masking out the enabled
- * attributes before it and counting the bits.
- */
- attr = inst->src[i].reg + inst->src[i].reg_offset / 4;
- slot = _mesa_bitcount_64(vs_prog_data->inputs_read &
- BITFIELD64_MASK(attr));
- }
-
- channel = inst->src[i].reg_offset & 3;
-
- grf = payload.num_regs +
- prog_data->curb_read_length +
- slot * 4 + channel;
+ int grf = payload.num_regs +
+ prog_data->curb_read_length +
+ inst->src[i].reg +
+ inst->src[i].reg_offset;
inst->src[i].file = HW_REG;
inst->src[i].fixed_hw_reg =
@@ -5134,41 +5120,140 @@ fs_visitor::run_cs()
return !failed;
}
+/**
+ * Return a bitfield where bit n is set if barycentric interpolation mode n
+ * (see enum brw_wm_barycentric_interp_mode) is needed by the fragment shader.
+ */
+static unsigned
+brw_compute_barycentric_interp_modes(const struct brw_device_info *devinfo,
+ bool shade_model_flat,
+ bool persample_shading,
+ const nir_shader *shader)
+{
+ unsigned barycentric_interp_modes = 0;
+
+ nir_foreach_variable(var, &shader->inputs) {
+ enum glsl_interp_qualifier interp_qualifier =
+ (enum glsl_interp_qualifier)var->data.interpolation;
+ bool is_centroid = var->data.centroid && !persample_shading;
+ bool is_sample = var->data.sample || persample_shading;
+ bool is_gl_Color = (var->data.location == VARYING_SLOT_COL0) ||
+ (var->data.location == VARYING_SLOT_COL1);
+
+ /* Ignore WPOS and FACE, because they don't require interpolation. */
+ if (var->data.location == VARYING_SLOT_POS ||
+ var->data.location == VARYING_SLOT_FACE)
+ continue;
+
+ /* Determine the set (or sets) of barycentric coordinates needed to
+ * interpolate this variable. Note that when
+ * brw->needs_unlit_centroid_workaround is set, centroid interpolation
+ * uses PIXEL interpolation for unlit pixels and CENTROID interpolation
+ * for lit pixels, so we need both sets of barycentric coordinates.
+ */
+ if (interp_qualifier == INTERP_QUALIFIER_NOPERSPECTIVE) {
+ if (is_centroid) {
+ barycentric_interp_modes |=
+ 1 << BRW_WM_NONPERSPECTIVE_CENTROID_BARYCENTRIC;
+ } else if (is_sample) {
+ barycentric_interp_modes |=
+ 1 << BRW_WM_NONPERSPECTIVE_SAMPLE_BARYCENTRIC;
+ }
+ if ((!is_centroid && !is_sample) ||
+ devinfo->needs_unlit_centroid_workaround) {
+ barycentric_interp_modes |=
+ 1 << BRW_WM_NONPERSPECTIVE_PIXEL_BARYCENTRIC;
+ }
+ } else if (interp_qualifier == INTERP_QUALIFIER_SMOOTH ||
+ (!(shade_model_flat && is_gl_Color) &&
+ interp_qualifier == INTERP_QUALIFIER_NONE)) {
+ if (is_centroid) {
+ barycentric_interp_modes |=
+ 1 << BRW_WM_PERSPECTIVE_CENTROID_BARYCENTRIC;
+ } else if (is_sample) {
+ barycentric_interp_modes |=
+ 1 << BRW_WM_PERSPECTIVE_SAMPLE_BARYCENTRIC;
+ }
+ if ((!is_centroid && !is_sample) ||
+ devinfo->needs_unlit_centroid_workaround) {
+ barycentric_interp_modes |=
+ 1 << BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC;
+ }
+ }
+ }
+
+ return barycentric_interp_modes;
+}
+
+static uint8_t
+computed_depth_mode(const nir_shader *shader)
+{
+ if (shader->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_DEPTH)) {
+ switch (shader->info.fs.depth_layout) {
+ case FRAG_DEPTH_LAYOUT_NONE:
+ case FRAG_DEPTH_LAYOUT_ANY:
+ return BRW_PSCDEPTH_ON;
+ case FRAG_DEPTH_LAYOUT_GREATER:
+ return BRW_PSCDEPTH_ON_GE;
+ case FRAG_DEPTH_LAYOUT_LESS:
+ return BRW_PSCDEPTH_ON_LE;
+ case FRAG_DEPTH_LAYOUT_UNCHANGED:
+ return BRW_PSCDEPTH_OFF;
+ }
+ }
+ return BRW_PSCDEPTH_OFF;
+}
+
const unsigned *
-brw_wm_fs_emit(struct brw_context *brw,
+brw_compile_fs(const struct brw_compiler *compiler, void *log_data,
void *mem_ctx,
const struct brw_wm_prog_key *key,
struct brw_wm_prog_data *prog_data,
- struct gl_fragment_program *fp,
- struct gl_shader_program *prog,
+ const nir_shader *shader,
+ struct gl_program *prog,
int shader_time_index8, int shader_time_index16,
- unsigned *final_assembly_size)
+ bool use_rep_send,
+ unsigned *final_assembly_size,
+ char **error_str)
{
- /* Now the main event: Visit the shader IR and generate our FS IR for it.
+ /* key->alpha_test_func means simulating alpha testing via discards,
+ * so the shader definitely kills pixels.
*/
- fs_visitor v(brw->intelScreen->compiler, brw, mem_ctx, key,
- &prog_data->base, &fp->Base, fp->Base.nir, 8, shader_time_index8);
+ prog_data->uses_kill = shader->info.fs.uses_discard || key->alpha_test_func;
+ prog_data->uses_omask =
+ shader->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_SAMPLE_MASK);
+ prog_data->computed_depth_mode = computed_depth_mode(shader);
+
+ prog_data->early_fragment_tests = shader->info.fs.early_fragment_tests;
+
+ prog_data->barycentric_interp_modes =
+ brw_compute_barycentric_interp_modes(compiler->devinfo,
+ key->flat_shade,
+ key->persample_shading,
+ shader);
+
+ fs_visitor v(compiler, log_data, mem_ctx, key,
+ &prog_data->base, prog, shader, 8,
+ shader_time_index8);
if (!v.run_fs(false /* do_rep_send */)) {
- if (prog) {
- prog->LinkStatus = false;
- ralloc_strcat(&prog->InfoLog, v.fail_msg);
- }
-
- _mesa_problem(NULL, "Failed to compile fragment shader: %s\n",
- v.fail_msg);
+ if (error_str)
+ *error_str = ralloc_strdup(mem_ctx, v.fail_msg);
return NULL;
}
cfg_t *simd16_cfg = NULL;
- fs_visitor v2(brw->intelScreen->compiler, brw, mem_ctx, key,
- &prog_data->base, &fp->Base, fp->Base.nir, 16, shader_time_index16);
- if (likely(!(INTEL_DEBUG & DEBUG_NO16) || brw->use_rep_send)) {
+ fs_visitor v2(compiler, log_data, mem_ctx, key,
+ &prog_data->base, prog, shader, 16,
+ shader_time_index16);
+ if (likely(!(INTEL_DEBUG & DEBUG_NO16) || use_rep_send)) {
if (!v.simd16_unsupported) {
/* Try a SIMD16 compile */
v2.import_uniforms(&v);
- if (!v2.run_fs(brw->use_rep_send)) {
- perf_debug("SIMD16 shader failed to compile: %s", v2.fail_msg);
+ if (!v2.run_fs(use_rep_send)) {
+ compiler->shader_perf_log(log_data,
+ "SIMD16 shader failed to compile: %s",
+ v2.fail_msg);
} else {
simd16_cfg = v2.cfg;
}
@@ -5176,8 +5261,8 @@ brw_wm_fs_emit(struct brw_context *brw,
}
cfg_t *simd8_cfg;
- int no_simd8 = (INTEL_DEBUG & DEBUG_NO8) || brw->no_simd8;
- if ((no_simd8 || brw->gen < 5) && simd16_cfg) {
+ int no_simd8 = (INTEL_DEBUG & DEBUG_NO8) || use_rep_send;
+ if ((no_simd8 || compiler->devinfo->gen < 5) && simd16_cfg) {
simd8_cfg = NULL;
prog_data->no_8 = true;
} else {
@@ -5185,20 +5270,14 @@ brw_wm_fs_emit(struct brw_context *brw,
prog_data->no_8 = false;
}
- fs_generator g(brw->intelScreen->compiler, brw,
- mem_ctx, (void *) key, &prog_data->base,
- &fp->Base, v.promoted_constants, v.runtime_check_aads_emit, "FS");
+ fs_generator g(compiler, log_data, mem_ctx, (void *) key, &prog_data->base,
+ v.promoted_constants, v.runtime_check_aads_emit, "FS");
if (unlikely(INTEL_DEBUG & DEBUG_WM)) {
- char *name;
- if (prog)
- name = ralloc_asprintf(mem_ctx, "%s fragment shader %d",
- prog->Label ? prog->Label : "unnamed",
- prog->Name);
- else
- name = ralloc_asprintf(mem_ctx, "fragment program %d", fp->Base.Id);
-
- g.enable_debug(name);
+ g.enable_debug(ralloc_asprintf(mem_ctx, "%s fragment shader %s",
+ shader->info.label ? shader->info.label :
+ "unnamed",
+ shader->info.name));
}
if (simd8_cfg)
@@ -5283,29 +5362,32 @@ fs_visitor::emit_cs_work_group_id_setup()
}
const unsigned *
-brw_cs_emit(struct brw_context *brw,
- void *mem_ctx,
- const struct brw_cs_prog_key *key,
- struct brw_cs_prog_data *prog_data,
- struct gl_compute_program *cp,
- struct gl_shader_program *prog,
- int shader_time_index,
- unsigned *final_assembly_size)
+brw_compile_cs(const struct brw_compiler *compiler, void *log_data,
+ void *mem_ctx,
+ const struct brw_cs_prog_key *key,
+ struct brw_cs_prog_data *prog_data,
+ const nir_shader *shader,
+ int shader_time_index,
+ unsigned *final_assembly_size,
+ char **error_str)
{
- prog_data->local_size[0] = cp->LocalSize[0];
- prog_data->local_size[1] = cp->LocalSize[1];
- prog_data->local_size[2] = cp->LocalSize[2];
+ prog_data->local_size[0] = shader->info.cs.local_size[0];
+ prog_data->local_size[1] = shader->info.cs.local_size[1];
+ prog_data->local_size[2] = shader->info.cs.local_size[2];
unsigned local_workgroup_size =
- cp->LocalSize[0] * cp->LocalSize[1] * cp->LocalSize[2];
- unsigned max_cs_threads = brw->intelScreen->compiler->devinfo->max_cs_threads;
+ shader->info.cs.local_size[0] * shader->info.cs.local_size[1] *
+ shader->info.cs.local_size[2];
+
+ unsigned max_cs_threads = compiler->devinfo->max_cs_threads;
cfg_t *cfg = NULL;
const char *fail_msg = NULL;
/* Now the main event: Visit the shader IR and generate our CS IR for it.
*/
- fs_visitor v8(brw->intelScreen->compiler, brw, mem_ctx, key,
- &prog_data->base, &cp->Base, cp->Base.nir, 8, shader_time_index);
+ fs_visitor v8(compiler, log_data, mem_ctx, key, &prog_data->base,
+ NULL, /* Never used in core profile */
+ shader, 8, shader_time_index);
if (!v8.run_cs()) {
fail_msg = v8.fail_msg;
} else if (local_workgroup_size <= 8 * max_cs_threads) {
@@ -5313,15 +5395,18 @@ brw_cs_emit(struct brw_context *brw,
prog_data->simd_size = 8;
}
- fs_visitor v16(brw->intelScreen->compiler, brw, mem_ctx, key,
- &prog_data->base, &cp->Base, cp->Base.nir, 16, shader_time_index);
+ fs_visitor v16(compiler, log_data, mem_ctx, key, &prog_data->base,
+ NULL, /* Never used in core profile */
+ shader, 16, shader_time_index);
if (likely(!(INTEL_DEBUG & DEBUG_NO16)) &&
!fail_msg && !v8.simd16_unsupported &&
local_workgroup_size <= 16 * max_cs_threads) {
/* Try a SIMD16 compile */
v16.import_uniforms(&v8);
if (!v16.run_cs()) {
- perf_debug("SIMD16 shader failed to compile: %s", v16.fail_msg);
+ compiler->shader_perf_log(log_data,
+ "SIMD16 shader failed to compile: %s",
+ v16.fail_msg);
if (!cfg) {
fail_msg =
"Couldn't generate SIMD16 program and not "
@@ -5335,20 +5420,19 @@ brw_cs_emit(struct brw_context *brw,
if (unlikely(cfg == NULL)) {
assert(fail_msg);
- prog->LinkStatus = false;
- ralloc_strcat(&prog->InfoLog, fail_msg);
- _mesa_problem(NULL, "Failed to compile compute shader: %s\n",
- fail_msg);
+ if (error_str)
+ *error_str = ralloc_strdup(mem_ctx, fail_msg);
+
return NULL;
}
- fs_generator g(brw->intelScreen->compiler, brw,
- mem_ctx, (void*) key, &prog_data->base, &cp->Base,
+ fs_generator g(compiler, log_data, mem_ctx, (void*) key, &prog_data->base,
v8.promoted_constants, v8.runtime_check_aads_emit, "CS");
if (INTEL_DEBUG & DEBUG_CS) {
- char *name = ralloc_asprintf(mem_ctx, "%s compute shader %d",
- prog->Label ? prog->Label : "unnamed",
- prog->Name);
+ char *name = ralloc_asprintf(mem_ctx, "%s compute shader %s",
+ shader->info.label ? shader->info.label :
+ "unnamed",
+ shader->info.name);
g.enable_debug(name);
}
diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h
index e8b511f9ce6..171338dcc0b 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_fs.h
@@ -48,7 +48,7 @@ extern "C" {
#include "brw_wm.h"
#include "intel_asm_annotation.h"
}
-#include "glsl/glsl_types.h"
+#include "glsl/nir/glsl_types.h"
#include "glsl/ir.h"
#include "glsl/nir/nir.h"
#include "program/sampler.h"
@@ -96,7 +96,7 @@ public:
const void *key,
struct brw_stage_prog_data *prog_data,
struct gl_program *prog,
- nir_shader *shader,
+ const nir_shader *shader,
unsigned dispatch_width,
int shader_time_index);
@@ -400,7 +400,6 @@ public:
void *mem_ctx,
const void *key,
struct brw_stage_prog_data *prog_data,
- struct gl_program *fp,
unsigned promoted_constants,
bool runtime_check_aads_emit,
const char *stage_abbrev);
@@ -499,8 +498,6 @@ private:
const void * const key;
struct brw_stage_prog_data * const prog_data;
- const struct gl_program *prog;
-
unsigned dispatch_width; /**< 8 or 16 */
exec_list discard_halt_patches;
diff --git a/src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp b/src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp
index 277b6cc3a60..a13d001291c 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp
@@ -45,7 +45,7 @@
#include "brw_wm.h"
#include "glsl/ir.h"
#include "glsl/ir_expression_flattening.h"
-#include "glsl/glsl_types.h"
+#include "glsl/nir/glsl_types.h"
class ir_channel_expressions_visitor : public ir_hierarchical_visitor {
public:
diff --git a/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp b/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp
index 230b0caec47..5589716239a 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp
@@ -275,17 +275,6 @@ is_logic_op(enum opcode opcode)
opcode == BRW_OPCODE_NOT);
}
-static bool
-can_change_source_types(fs_inst *inst)
-{
- return !inst->src[0].abs && !inst->src[0].negate &&
- inst->dst.type == inst->src[0].type &&
- (inst->opcode == BRW_OPCODE_MOV ||
- (inst->opcode == BRW_OPCODE_SEL &&
- inst->predicate != BRW_PREDICATE_NONE &&
- !inst->src[1].abs && !inst->src[1].negate));
-}
-
bool
fs_visitor::try_copy_propagate(fs_inst *inst, int arg, acp_entry *entry)
{
@@ -368,7 +357,7 @@ fs_visitor::try_copy_propagate(fs_inst *inst, int arg, acp_entry *entry)
if (has_source_modifiers &&
entry->dst.type != inst->src[arg].type &&
- !can_change_source_types(inst))
+ !inst->can_change_types())
return false;
if (devinfo->gen >= 8 && (entry->src.negate || entry->src.abs) &&
@@ -438,7 +427,7 @@ fs_visitor::try_copy_propagate(fs_inst *inst, int arg, acp_entry *entry)
* type. If we got here, then we can just change the source and
* destination types of the instruction and keep going.
*/
- assert(can_change_source_types(inst));
+ assert(inst->can_change_types());
for (int i = 0; i < inst->sources; i++) {
inst->src[i].type = entry->dst.type;
}
diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
index 6f8b75e339f..13c495cd395 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
@@ -131,7 +131,6 @@ fs_generator::fs_generator(const struct brw_compiler *compiler, void *log_data,
void *mem_ctx,
const void *key,
struct brw_stage_prog_data *prog_data,
- struct gl_program *prog,
unsigned promoted_constants,
bool runtime_check_aads_emit,
const char *stage_abbrev)
@@ -139,7 +138,7 @@ fs_generator::fs_generator(const struct brw_compiler *compiler, void *log_data,
: compiler(compiler), log_data(log_data),
devinfo(compiler->devinfo), key(key),
prog_data(prog_data),
- prog(prog), promoted_constants(promoted_constants),
+ promoted_constants(promoted_constants),
runtime_check_aads_emit(runtime_check_aads_emit), debug_flag(false),
stage_abbrev(stage_abbrev), mem_ctx(mem_ctx)
{
@@ -1377,15 +1376,14 @@ fs_generator::generate_pixel_interpolator_query(fs_inst *inst,
struct brw_reg msg_data,
unsigned msg_type)
{
- assert(msg_data.file == BRW_IMMEDIATE_VALUE &&
- msg_data.type == BRW_REGISTER_TYPE_UD);
+ assert(msg_data.type == BRW_REGISTER_TYPE_UD);
brw_pixel_interpolator_query(p,
retype(dst, BRW_REGISTER_TYPE_UW),
src,
inst->pi_noperspective,
msg_type,
- msg_data.dw1.ud,
+ msg_data,
inst->mlen,
inst->regs_written);
}
@@ -2188,7 +2186,7 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width)
100.0f * (before_size - after_size) / before_size);
dump_assembly(p->store, annotation.ann_count, annotation.ann,
- p->devinfo, prog);
+ p->devinfo);
ralloc_free(annotation.ann);
}
diff --git a/src/mesa/drivers/dri/i965/brw_fs_live_variables.cpp b/src/mesa/drivers/dri/i965/brw_fs_live_variables.cpp
index 19aec92fad1..ce066a9778e 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_live_variables.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_live_variables.cpp
@@ -259,16 +259,15 @@ fs_live_variables::compute_start_end()
struct block_data *bd = &block_data[block->num];
for (int i = 0; i < num_vars; i++) {
- if (BITSET_TEST(bd->livein, i)) {
- start[i] = MIN2(start[i], block->start_ip);
- end[i] = MAX2(end[i], block->start_ip);
- }
-
- if (BITSET_TEST(bd->liveout, i)) {
- start[i] = MIN2(start[i], block->end_ip);
- end[i] = MAX2(end[i], block->end_ip);
- }
+ if (BITSET_TEST(bd->livein, i)) {
+ start[i] = MIN2(start[i], block->start_ip);
+ end[i] = MAX2(end[i], block->start_ip);
+ }
+ if (BITSET_TEST(bd->liveout, i)) {
+ start[i] = MIN2(start[i], block->end_ip);
+ end[i] = MAX2(end[i], block->end_ip);
+ }
}
}
}
diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
index 45c3f4ef3b4..feedbfbb2e3 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
@@ -56,61 +56,25 @@ fs_visitor::emit_nir_code()
void
fs_visitor::nir_setup_inputs()
{
+ if (stage != MESA_SHADER_FRAGMENT)
+ return;
+
nir_inputs = bld.vgrf(BRW_REGISTER_TYPE_F, nir->num_inputs);
nir_foreach_variable(var, &nir->inputs) {
- enum brw_reg_type type = brw_type_for_base_type(var->type);
fs_reg input = offset(nir_inputs, bld, var->data.driver_location);
fs_reg reg;
- switch (stage) {
- case MESA_SHADER_VERTEX: {
- /* Our ATTR file is indexed by VERT_ATTRIB_*, which is the value
- * stored in nir_variable::location.
- *
- * However, NIR's load_input intrinsics use a different index - an
- * offset into a single contiguous array containing all inputs.
- * This index corresponds to the nir_variable::driver_location field.
- *
- * So, we need to copy from fs_reg(ATTR, var->location) to
- * offset(nir_inputs, var->data.driver_location).
- */
- const glsl_type *const t = var->type->without_array();
- const unsigned components = t->components();
- const unsigned cols = t->matrix_columns;
- const unsigned elts = t->vector_elements;
- unsigned array_length = var->type->is_array() ? var->type->length : 1;
- for (unsigned i = 0; i < array_length; i++) {
- for (unsigned j = 0; j < cols; j++) {
- for (unsigned k = 0; k < elts; k++) {
- bld.MOV(offset(retype(input, type), bld,
- components * i + elts * j + k),
- offset(fs_reg(ATTR, var->data.location + i, type),
- bld, 4 * j + k));
- }
- }
- }
- break;
- }
- case MESA_SHADER_GEOMETRY:
- case MESA_SHADER_COMPUTE:
- case MESA_SHADER_TESS_CTRL:
- case MESA_SHADER_TESS_EVAL:
- unreachable("fs_visitor not used for these stages yet.");
- break;
- case MESA_SHADER_FRAGMENT:
- if (var->data.location == VARYING_SLOT_POS) {
- reg = *emit_fragcoord_interpolation(var->data.pixel_center_integer,
- var->data.origin_upper_left);
- emit_percomp(bld, fs_inst(BRW_OPCODE_MOV, bld.dispatch_width(),
- input, reg), 0xF);
- } else {
- emit_general_interpolation(input, var->name, var->type,
- (glsl_interp_qualifier) var->data.interpolation,
- var->data.location, var->data.centroid,
- var->data.sample);
- }
- break;
+ if (var->data.location == VARYING_SLOT_POS) {
+ reg = *emit_fragcoord_interpolation(var->data.pixel_center_integer,
+ var->data.origin_upper_left);
+ emit_percomp(bld, fs_inst(BRW_OPCODE_MOV, bld.dispatch_width(),
+ input, reg), 0xF);
+ } else {
+ emit_general_interpolation(input, var->name, var->type,
+ (glsl_interp_qualifier) var->data.interpolation,
+ var->data.location, var->data.centroid,
+ var->data.sample);
}
}
}
@@ -125,9 +89,7 @@ fs_visitor::nir_setup_outputs()
nir_foreach_variable(var, &nir->outputs) {
fs_reg reg = offset(nir_outputs, bld, var->data.driver_location);
- int vector_elements =
- var->type->is_array() ? var->type->fields.array->vector_elements
- : var->type->vector_elements;
+ int vector_elements = var->type->without_array()->vector_elements;
switch (stage) {
case MESA_SHADER_VERTEX:
@@ -1180,6 +1142,36 @@ get_image_atomic_op(nir_intrinsic_op op, const glsl_type *type)
}
}
+static fs_inst *
+emit_pixel_interpolater_send(const fs_builder &bld,
+ enum opcode opcode,
+ const fs_reg &dst,
+ const fs_reg &src,
+ const fs_reg &desc,
+ glsl_interp_qualifier interpolation)
+{
+ fs_inst *inst;
+ fs_reg payload;
+ int mlen;
+
+ if (src.file == BAD_FILE) {
+ /* Dummy payload */
+ payload = bld.vgrf(BRW_REGISTER_TYPE_F, 1);
+ mlen = 1;
+ } else {
+ payload = src;
+ mlen = 2 * bld.dispatch_width() / 8;
+ }
+
+ inst = bld.emit(opcode, dst, payload, desc);
+ inst->mlen = mlen;
+ /* 2 floats per slot returned */
+ inst->regs_written = 2 * bld.dispatch_width() / 8;
+ inst->pi_noperspective = interpolation == INTERP_QUALIFIER_NOPERSPECTIVE;
+
+ return inst;
+}
+
void
fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr)
{
@@ -1440,7 +1432,7 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
*/
brw_mark_surface_used(prog_data,
stage_prog_data->binding_table.ubo_start +
- nir->info.num_ssbos - 1);
+ nir->info.num_ubos - 1);
}
if (has_indirect) {
@@ -1488,21 +1480,21 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
fs_reg surf_index;
if (const_uniform_block) {
- unsigned index = stage_prog_data->binding_table.ubo_start +
+ unsigned index = stage_prog_data->binding_table.ssbo_start +
const_uniform_block->u[0];
surf_index = fs_reg(index);
brw_mark_surface_used(prog_data, index);
} else {
surf_index = vgrf(glsl_type::uint_type);
bld.ADD(surf_index, get_nir_src(instr->src[0]),
- fs_reg(stage_prog_data->binding_table.ubo_start));
+ fs_reg(stage_prog_data->binding_table.ssbo_start));
surf_index = bld.emit_uniformize(surf_index);
/* Assume this may touch any UBO. It would be nice to provide
* a tighter bound, but the array information is already lowered away.
*/
brw_mark_surface_used(prog_data,
- stage_prog_data->binding_table.ubo_start +
+ stage_prog_data->binding_table.ssbo_start +
nir->info.num_ssbos - 1);
}
@@ -1545,8 +1537,13 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
case nir_intrinsic_load_input: {
unsigned index = 0;
for (unsigned j = 0; j < instr->num_components; j++) {
- fs_reg src = offset(retype(nir_inputs, dest.type), bld,
- instr->const_index[0] + index);
+ fs_reg src;
+ if (stage == MESA_SHADER_VERTEX) {
+ src = offset(fs_reg(ATTR, instr->const_index[0], dest.type), bld, index);
+ } else {
+ src = offset(retype(nir_inputs, dest.type), bld,
+ instr->const_index[0] + index);
+ }
if (has_indirect)
src.reladdr = new(mem_ctx) fs_reg(get_nir_src(instr->src[0]));
index++;
@@ -1583,28 +1580,81 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
((struct brw_wm_prog_data *) prog_data)->pulls_bary = true;
fs_reg dst_xy = bld.vgrf(BRW_REGISTER_TYPE_F, 2);
-
- /* For most messages, we need one reg of ignored data; the hardware
- * requires mlen==1 even when there is no payload. in the per-slot
- * offset case, we'll replace this with the proper source data.
- */
- fs_reg src = vgrf(glsl_type::float_type);
- int mlen = 1; /* one reg unless overriden */
- fs_inst *inst;
+ const glsl_interp_qualifier interpolation =
+ (glsl_interp_qualifier) instr->variables[0]->var->data.interpolation;
switch (instr->intrinsic) {
case nir_intrinsic_interp_var_at_centroid:
- inst = bld.emit(FS_OPCODE_INTERPOLATE_AT_CENTROID,
- dst_xy, src, fs_reg(0u));
+ emit_pixel_interpolater_send(bld,
+ FS_OPCODE_INTERPOLATE_AT_CENTROID,
+ dst_xy,
+ fs_reg(), /* src */
+ fs_reg(0u),
+ interpolation);
break;
case nir_intrinsic_interp_var_at_sample: {
- /* XXX: We should probably handle non-constant sample id's */
nir_const_value *const_sample = nir_src_as_const_value(instr->src[0]);
- assert(const_sample);
- unsigned msg_data = const_sample ? const_sample->i[0] << 4 : 0;
- inst = bld.emit(FS_OPCODE_INTERPOLATE_AT_SAMPLE, dst_xy, src,
- fs_reg(msg_data));
+
+ if (const_sample) {
+ unsigned msg_data = const_sample->i[0] << 4;
+
+ emit_pixel_interpolater_send(bld,
+ FS_OPCODE_INTERPOLATE_AT_SAMPLE,
+ dst_xy,
+ fs_reg(), /* src */
+ fs_reg(msg_data),
+ interpolation);
+ } else {
+ const fs_reg sample_src = retype(get_nir_src(instr->src[0]),
+ BRW_REGISTER_TYPE_UD);
+
+ if (nir_src_is_dynamically_uniform(instr->src[0])) {
+ const fs_reg sample_id = bld.emit_uniformize(sample_src);
+ const fs_reg msg_data = vgrf(glsl_type::uint_type);
+ bld.exec_all().group(1, 0).SHL(msg_data, sample_id, fs_reg(4u));
+ emit_pixel_interpolater_send(bld,
+ FS_OPCODE_INTERPOLATE_AT_SAMPLE,
+ dst_xy,
+ fs_reg(), /* src */
+ msg_data,
+ interpolation);
+ } else {
+ /* Make a loop that sends a message to the pixel interpolater
+ * for the sample number in each live channel. If there are
+ * multiple channels with the same sample number then these
+ * will be handled simultaneously with a single interation of
+ * the loop.
+ */
+ bld.emit(BRW_OPCODE_DO);
+
+ /* Get the next live sample number into sample_id_reg */
+ const fs_reg sample_id = bld.emit_uniformize(sample_src);
+
+ /* Set the flag register so that we can perform the send
+ * message on all channels that have the same sample number
+ */
+ bld.CMP(bld.null_reg_ud(),
+ sample_src, sample_id,
+ BRW_CONDITIONAL_EQ);
+ const fs_reg msg_data = vgrf(glsl_type::uint_type);
+ bld.exec_all().group(1, 0).SHL(msg_data, sample_id, fs_reg(4u));
+ fs_inst *inst =
+ emit_pixel_interpolater_send(bld,
+ FS_OPCODE_INTERPOLATE_AT_SAMPLE,
+ dst_xy,
+ fs_reg(), /* src */
+ msg_data,
+ interpolation);
+ set_predicate(BRW_PREDICATE_NORMAL, inst);
+
+ /* Continue the loop if there are any live channels left */
+ set_predicate_inv(BRW_PREDICATE_NORMAL,
+ true, /* inverse */
+ bld.emit(BRW_OPCODE_WHILE));
+ }
+ }
+
break;
}
@@ -1615,10 +1665,14 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
unsigned off_x = MIN2((int)(const_offset->f[0] * 16), 7) & 0xf;
unsigned off_y = MIN2((int)(const_offset->f[1] * 16), 7) & 0xf;
- inst = bld.emit(FS_OPCODE_INTERPOLATE_AT_SHARED_OFFSET, dst_xy, src,
- fs_reg(off_x | (off_y << 4)));
+ emit_pixel_interpolater_send(bld,
+ FS_OPCODE_INTERPOLATE_AT_SHARED_OFFSET,
+ dst_xy,
+ fs_reg(), /* src */
+ fs_reg(off_x | (off_y << 4)),
+ interpolation);
} else {
- src = vgrf(glsl_type::ivec2_type);
+ fs_reg src = vgrf(glsl_type::ivec2_type);
fs_reg offset_src = retype(get_nir_src(instr->src[0]),
BRW_REGISTER_TYPE_F);
for (int i = 0; i < 2; i++) {
@@ -1646,9 +1700,13 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
bld.SEL(offset(src, bld, i), itemp, fs_reg(7)));
}
- mlen = 2 * dispatch_width / 8;
- inst = bld.emit(FS_OPCODE_INTERPOLATE_AT_PER_SLOT_OFFSET, dst_xy, src,
- fs_reg(0u));
+ const enum opcode opcode = FS_OPCODE_INTERPOLATE_AT_PER_SLOT_OFFSET;
+ emit_pixel_interpolater_send(bld,
+ opcode,
+ dst_xy,
+ src,
+ fs_reg(0u),
+ interpolation);
}
break;
}
@@ -1657,12 +1715,6 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
unreachable("Invalid intrinsic");
}
- inst->mlen = mlen;
- /* 2 floats per slot returned */
- inst->regs_written = 2 * dispatch_width / 8;
- inst->pi_noperspective = instr->variables[0]->var->data.interpolation ==
- INTERP_QUALIFIER_NOPERSPECTIVE;
-
for (unsigned j = 0; j < instr->num_components; j++) {
fs_reg src = interp_reg(instr->variables[0]->var->data.location, j);
src.type = dest.type;
@@ -1684,18 +1736,18 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
nir_const_value *const_uniform_block =
nir_src_as_const_value(instr->src[1]);
if (const_uniform_block) {
- unsigned index = stage_prog_data->binding_table.ubo_start +
+ unsigned index = stage_prog_data->binding_table.ssbo_start +
const_uniform_block->u[0];
surf_index = fs_reg(index);
brw_mark_surface_used(prog_data, index);
} else {
surf_index = vgrf(glsl_type::uint_type);
bld.ADD(surf_index, get_nir_src(instr->src[1]),
- fs_reg(stage_prog_data->binding_table.ubo_start));
+ fs_reg(stage_prog_data->binding_table.ssbo_start));
surf_index = bld.emit_uniformize(surf_index);
brw_mark_surface_used(prog_data,
- stage_prog_data->binding_table.ubo_start +
+ stage_prog_data->binding_table.ssbo_start +
nir->info.num_ssbos - 1);
}
@@ -1780,17 +1832,17 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
case nir_intrinsic_ssbo_atomic_add:
nir_emit_ssbo_atomic(bld, BRW_AOP_ADD, instr);
break;
- case nir_intrinsic_ssbo_atomic_min:
- if (dest.type == BRW_REGISTER_TYPE_D)
- nir_emit_ssbo_atomic(bld, BRW_AOP_IMIN, instr);
- else
- nir_emit_ssbo_atomic(bld, BRW_AOP_UMIN, instr);
+ case nir_intrinsic_ssbo_atomic_imin:
+ nir_emit_ssbo_atomic(bld, BRW_AOP_IMIN, instr);
break;
- case nir_intrinsic_ssbo_atomic_max:
- if (dest.type == BRW_REGISTER_TYPE_D)
- nir_emit_ssbo_atomic(bld, BRW_AOP_IMAX, instr);
- else
- nir_emit_ssbo_atomic(bld, BRW_AOP_UMAX, instr);
+ case nir_intrinsic_ssbo_atomic_umin:
+ nir_emit_ssbo_atomic(bld, BRW_AOP_UMIN, instr);
+ break;
+ case nir_intrinsic_ssbo_atomic_imax:
+ nir_emit_ssbo_atomic(bld, BRW_AOP_IMAX, instr);
+ break;
+ case nir_intrinsic_ssbo_atomic_umax:
+ nir_emit_ssbo_atomic(bld, BRW_AOP_UMAX, instr);
break;
case nir_intrinsic_ssbo_atomic_and:
nir_emit_ssbo_atomic(bld, BRW_AOP_AND, instr);
@@ -1810,7 +1862,7 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
case nir_intrinsic_get_buffer_size: {
nir_const_value *const_uniform_block = nir_src_as_const_value(instr->src[0]);
- unsigned ubo_index = const_uniform_block ? const_uniform_block->u[0] : 0;
+ unsigned ssbo_index = const_uniform_block ? const_uniform_block->u[0] : 0;
int reg_width = dispatch_width / 8;
/* Set LOD = 0 */
@@ -1821,7 +1873,7 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
BRW_REGISTER_TYPE_UD);
bld.LOAD_PAYLOAD(src_payload, &source, 1, 0);
- fs_reg surf_index = fs_reg(prog_data->binding_table.ubo_start + ubo_index);
+ fs_reg surf_index = fs_reg(prog_data->binding_table.ssbo_start + ssbo_index);
fs_inst *inst = bld.emit(FS_OPCODE_GET_BUFFER_SIZE, dest,
src_payload, surf_index);
inst->header_size = 0;
@@ -1874,20 +1926,20 @@ fs_visitor::nir_emit_ssbo_atomic(const fs_builder &bld,
fs_reg surface;
nir_const_value *const_surface = nir_src_as_const_value(instr->src[0]);
if (const_surface) {
- unsigned surf_index = stage_prog_data->binding_table.ubo_start +
+ unsigned surf_index = stage_prog_data->binding_table.ssbo_start +
const_surface->u[0];
surface = fs_reg(surf_index);
brw_mark_surface_used(prog_data, surf_index);
} else {
surface = vgrf(glsl_type::uint_type);
bld.ADD(surface, get_nir_src(instr->src[0]),
- fs_reg(stage_prog_data->binding_table.ubo_start));
+ fs_reg(stage_prog_data->binding_table.ssbo_start));
- /* Assume this may touch any UBO. This is the same we do for other
+ /* Assume this may touch any SSBO. This is the same we do for other
* UBO/SSBO accesses with non-constant surface.
*/
brw_mark_surface_used(prog_data,
- stage_prog_data->binding_table.ubo_start +
+ stage_prog_data->binding_table.ssbo_start +
nir->info.num_ssbos - 1);
}
diff --git a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
index c3a037be4b1..36388fad98d 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
@@ -27,7 +27,7 @@
#include "brw_fs.h"
#include "brw_cfg.h"
-#include "glsl/glsl_types.h"
+#include "glsl/nir/glsl_types.h"
#include "glsl/ir_optimization.h"
using namespace brw;
diff --git a/src/mesa/drivers/dri/i965/brw_fs_saturate_propagation.cpp b/src/mesa/drivers/dri/i965/brw_fs_saturate_propagation.cpp
index e406c2899e8..8792a8c7b1d 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_saturate_propagation.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_saturate_propagation.cpp
@@ -52,11 +52,12 @@ opt_saturate_propagation_local(fs_visitor *v, bblock_t *block)
ip--;
if (inst->opcode != BRW_OPCODE_MOV ||
+ !inst->saturate ||
inst->dst.file != GRF ||
+ inst->dst.type != inst->src[0].type ||
inst->src[0].file != GRF ||
inst->src[0].abs ||
- inst->src[0].negate ||
- !inst->saturate)
+ inst->src[0].negate)
continue;
int src_var = v->live_intervals->var_from_reg(inst->src[0]);
@@ -65,7 +66,9 @@ opt_saturate_propagation_local(fs_visitor *v, bblock_t *block)
bool interfered = false;
foreach_inst_in_block_reverse_starting_from(fs_inst, scan_inst, inst, block) {
if (scan_inst->overwrites_reg(inst->src[0])) {
- if (scan_inst->is_partial_write())
+ if (scan_inst->is_partial_write() ||
+ (scan_inst->dst.type != inst->dst.type &&
+ !scan_inst->can_change_types()))
break;
if (scan_inst->saturate) {
@@ -73,6 +76,12 @@ opt_saturate_propagation_local(fs_visitor *v, bblock_t *block)
progress = true;
} else if (src_end_ip <= ip || inst->dst.equals(inst->src[0])) {
if (scan_inst->can_do_saturate()) {
+ if (scan_inst->dst.type != inst->dst.type) {
+ scan_inst->dst.type = inst->dst.type;
+ for (int i = 0; i < scan_inst->sources; i++) {
+ scan_inst->src[i].type = inst->dst.type;
+ }
+ }
scan_inst->saturate = true;
inst->saturate = false;
progress = true;
diff --git a/src/mesa/drivers/dri/i965/brw_fs_validate.cpp b/src/mesa/drivers/dri/i965/brw_fs_validate.cpp
index d0e04f3bf47..814c551f1be 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_validate.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_validate.cpp
@@ -32,7 +32,7 @@
#define fsv_assert(cond) \
if (!(cond)) { \
- fprintf(stderr, "ASSERT: FS validation failed!\n"); \
+ fprintf(stderr, "ASSERT: Scalar %s validation failed!\n", stage_abbrev); \
dump_instruction(inst, stderr); \
fprintf(stderr, "%s:%d: %s\n", __FILE__, __LINE__, #cond); \
abort(); \
diff --git a/src/mesa/drivers/dri/i965/brw_fs_vector_splitting.cpp b/src/mesa/drivers/dri/i965/brw_fs_vector_splitting.cpp
index 6000e35b9b9..cab5af318a2 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_vector_splitting.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_vector_splitting.cpp
@@ -42,7 +42,7 @@
#include "glsl/ir.h"
#include "glsl/ir_visitor.h"
#include "glsl/ir_rvalue_visitor.h"
-#include "glsl/glsl_types.h"
+#include "glsl/nir/glsl_types.h"
#include "util/hash_table.h"
static bool debug = false;
diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
index df1a7ed9b59..f825fed4daf 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
@@ -43,7 +43,7 @@
#include "brw_vec4.h"
#include "brw_fs.h"
#include "main/uniforms.h"
-#include "glsl/glsl_types.h"
+#include "glsl/nir/glsl_types.h"
#include "glsl/ir_optimization.h"
#include "program/sampler.h"
@@ -53,7 +53,8 @@ fs_reg *
fs_visitor::emit_vs_system_value(int location)
{
fs_reg *reg = new(this->mem_ctx)
- fs_reg(ATTR, VERT_ATTRIB_MAX, BRW_REGISTER_TYPE_D);
+ fs_reg(ATTR, 4 * _mesa_bitcount_64(nir->info.inputs_read),
+ BRW_REGISTER_TYPE_D);
brw_vs_prog_data *vs_prog_data = (brw_vs_prog_data *) prog_data;
switch (location) {
@@ -903,12 +904,9 @@ fs_visitor::emit_urb_writes()
urb_offset = 0;
flush = false;
for (slot = 0; slot < vue_map->num_slots; slot++) {
- fs_reg reg, src, zero;
-
int varying = vue_map->slot_to_varying[slot];
switch (varying) {
- case VARYING_SLOT_PSIZ:
-
+ case VARYING_SLOT_PSIZ: {
/* The point size varying slot is the vue header and is always in the
* vue map. But often none of the special varyings that live there
* are written and in that case we can skip writing to the vue
@@ -920,7 +918,7 @@ fs_visitor::emit_urb_writes()
break;
}
- zero = fs_reg(GRF, alloc.allocate(1), BRW_REGISTER_TYPE_UD);
+ fs_reg zero(GRF, alloc.allocate(1), BRW_REGISTER_TYPE_UD);
bld.MOV(zero, fs_reg(0u));
sources[length++] = zero;
@@ -939,7 +937,7 @@ fs_visitor::emit_urb_writes()
else
sources[length++] = zero;
break;
-
+ }
case BRW_VARYING_SLOT_NDC:
case VARYING_SLOT_EDGE:
unreachable("unexpected scalar vs output");
@@ -972,8 +970,8 @@ fs_visitor::emit_urb_writes()
* temp register and use that for the payload.
*/
for (int i = 0; i < 4; i++) {
- reg = fs_reg(GRF, alloc.allocate(1), outputs[varying].type);
- src = offset(this->outputs[varying], bld, i);
+ fs_reg reg = fs_reg(GRF, alloc.allocate(1), outputs[varying].type);
+ fs_reg src = offset(this->outputs[varying], bld, i);
set_saturate(true, bld.MOV(reg, src));
sources[length++] = reg;
}
@@ -1069,7 +1067,7 @@ fs_visitor::fs_visitor(const struct brw_compiler *compiler, void *log_data,
const void *key,
struct brw_stage_prog_data *prog_data,
struct gl_program *prog,
- nir_shader *shader,
+ const nir_shader *shader,
unsigned dispatch_width,
int shader_time_index)
: backend_shader(compiler, log_data, mem_ctx, shader, prog_data),
diff --git a/src/mesa/drivers/dri/i965/brw_gs.c b/src/mesa/drivers/dri/i965/brw_gs.c
index e0165fb4a23..10a7f28fdab 100644
--- a/src/mesa/drivers/dri/i965/brw_gs.c
+++ b/src/mesa/drivers/dri/i965/brw_gs.c
@@ -57,6 +57,7 @@ brw_codegen_gs_prog(struct brw_context *brw,
struct brw_geometry_program *gp,
struct brw_gs_prog_key *key)
{
+ struct gl_shader *shader = prog->_LinkedShaders[MESA_SHADER_GEOMETRY];
struct brw_stage_state *stage_state = &brw->gs.base;
struct brw_gs_compile c;
memset(&c, 0, sizeof(c));
@@ -300,8 +301,11 @@ brw_codegen_gs_prog(struct brw_context *brw,
void *mem_ctx = ralloc_context(NULL);
unsigned program_size;
+ char *error_str;
const unsigned *program =
- brw_gs_emit(brw, prog, &c, mem_ctx, st_index, &program_size);
+ brw_compile_gs(brw->intelScreen->compiler, brw, &c,
+ shader->Program->nir, prog,
+ mem_ctx, st_index, &program_size, &error_str);
if (program == NULL) {
ralloc_free(mem_ctx);
return false;
diff --git a/src/mesa/drivers/dri/i965/brw_gs_surface_state.c b/src/mesa/drivers/dri/i965/brw_gs_surface_state.c
index 0bb307432d0..00125c0f405 100644
--- a/src/mesa/drivers/dri/i965/brw_gs_surface_state.c
+++ b/src/mesa/drivers/dri/i965/brw_gs_surface_state.c
@@ -129,7 +129,7 @@ brw_upload_gs_image_surfaces(struct brw_context *brw)
ctx->_Shader->CurrentProgram[MESA_SHADER_GEOMETRY];
if (prog) {
- /* BRW_NEW_GS_PROG_DATA, BRW_NEW_IMAGE_UNITS */
+ /* BRW_NEW_GS_PROG_DATA, BRW_NEW_IMAGE_UNITS, _NEW_TEXTURE */
brw_upload_image_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_GEOMETRY],
&brw->gs.base, &brw->gs.prog_data->base.base);
}
@@ -137,6 +137,7 @@ brw_upload_gs_image_surfaces(struct brw_context *brw)
const struct brw_tracked_state brw_gs_image_surfaces = {
.dirty = {
+ .mesa = _NEW_TEXTURE,
.brw = BRW_NEW_BATCH |
BRW_NEW_GEOMETRY_PROGRAM |
BRW_NEW_GS_PROG_DATA |
diff --git a/src/mesa/drivers/dri/i965/brw_ir_fs.h b/src/mesa/drivers/dri/i965/brw_ir_fs.h
index 97c6f8b2500..7726e4b78a0 100644
--- a/src/mesa/drivers/dri/i965/brw_ir_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_ir_fs.h
@@ -204,6 +204,7 @@ public:
unsigned components_read(unsigned i) const;
int regs_read(int arg) const;
bool can_do_source_mods(const struct brw_device_info *devinfo);
+ bool can_change_types() const;
bool has_side_effects() const;
bool reads_flag() const;
diff --git a/src/mesa/drivers/dri/i965/brw_ir_vec4.h b/src/mesa/drivers/dri/i965/brw_ir_vec4.h
index 96dd633e117..1b57b65db27 100644
--- a/src/mesa/drivers/dri/i965/brw_ir_vec4.h
+++ b/src/mesa/drivers/dri/i965/brw_ir_vec4.h
@@ -179,6 +179,7 @@ public:
int swizzle, int swizzle_mask);
void reswizzle(int dst_writemask, int swizzle);
bool can_do_source_mods(const struct brw_device_info *devinfo);
+ bool can_change_types() const;
bool reads_flag()
{
diff --git a/src/mesa/drivers/dri/i965/brw_lower_unnormalized_offset.cpp b/src/mesa/drivers/dri/i965/brw_lower_unnormalized_offset.cpp
index 8c59b9e415b..4219d471def 100644
--- a/src/mesa/drivers/dri/i965/brw_lower_unnormalized_offset.cpp
+++ b/src/mesa/drivers/dri/i965/brw_lower_unnormalized_offset.cpp
@@ -31,7 +31,7 @@
* \author Chris Forbes <[email protected]>
*/
-#include "glsl/glsl_types.h"
+#include "glsl/nir/glsl_types.h"
#include "glsl/ir.h"
#include "glsl/ir_builder.h"
diff --git a/src/mesa/drivers/dri/i965/brw_meta_fast_clear.c b/src/mesa/drivers/dri/i965/brw_meta_fast_clear.c
index eb201736c6e..fbde3f04204 100644
--- a/src/mesa/drivers/dri/i965/brw_meta_fast_clear.c
+++ b/src/mesa/drivers/dri/i965/brw_meta_fast_clear.c
@@ -451,6 +451,11 @@ brw_meta_fast_clear(struct brw_context *brw, struct gl_framebuffer *fb,
if (irb->mt->fast_clear_state == INTEL_FAST_CLEAR_STATE_NO_MCS)
clear_type = REP_CLEAR;
+ if (brw->gen >= 9 && clear_type == FAST_CLEAR) {
+ perf_debug("fast MCS clears are disabled on gen9");
+ clear_type = REP_CLEAR;
+ }
+
/* We can't do scissored fast clears because of the restrictions on the
* fast clear rectangle size.
*/
diff --git a/src/mesa/drivers/dri/i965/brw_nir.c b/src/mesa/drivers/dri/i965/brw_nir.c
index 0a9c09f1075..dc497770914 100644
--- a/src/mesa/drivers/dri/i965/brw_nir.c
+++ b/src/mesa/drivers/dri/i965/brw_nir.c
@@ -27,30 +27,112 @@
#include "glsl/nir/glsl_to_nir.h"
#include "program/prog_to_nir.h"
+static bool
+remap_vs_attrs(nir_block *block, void *closure)
+{
+ GLbitfield64 inputs_read = *((GLbitfield64 *) closure);
+
+ nir_foreach_instr(block, instr) {
+ if (instr->type != nir_instr_type_intrinsic)
+ continue;
+
+ nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
+
+ /* We set EmitNoIndirect for VS inputs, so there are no indirects. */
+ assert(intrin->intrinsic != nir_intrinsic_load_input_indirect);
+
+ if (intrin->intrinsic == nir_intrinsic_load_input) {
+ /* Attributes come in a contiguous block, ordered by their
+ * gl_vert_attrib value. That means we can compute the slot
+ * number for an attribute by masking out the enabled attributes
+ * before it and counting the bits.
+ */
+ int attr = intrin->const_index[0];
+ int slot = _mesa_bitcount_64(inputs_read & BITFIELD64_MASK(attr));
+ intrin->const_index[0] = 4 * slot;
+ }
+ }
+ return true;
+}
+
static void
brw_nir_lower_inputs(nir_shader *nir, bool is_scalar)
{
switch (nir->stage) {
+ case MESA_SHADER_VERTEX:
+ /* For now, leave the vec4 backend doing the old method. */
+ if (!is_scalar) {
+ nir_assign_var_locations(&nir->inputs, &nir->num_inputs,
+ type_size_vec4);
+ break;
+ }
+
+ /* Start with the location of the variable's base. */
+ foreach_list_typed(nir_variable, var, node, &nir->inputs) {
+ var->data.driver_location = var->data.location;
+ }
+
+ /* Now use nir_lower_io to walk dereference chains. Attribute arrays
+ * are loaded as one vec4 per element (or matrix column), so we use
+ * type_size_vec4 here.
+ */
+ nir_lower_io(nir, nir_var_shader_in, type_size_vec4);
+
+ /* Finally, translate VERT_ATTRIB_* values into the actual registers.
+ *
+ * Note that we can use nir->info.inputs_read instead of key->inputs_read
+ * since the two are identical aside from Gen4-5 edge flag differences.
+ */
+ GLbitfield64 inputs_read = nir->info.inputs_read;
+ nir_foreach_overload(nir, overload) {
+ if (overload->impl) {
+ nir_foreach_block(overload->impl, remap_vs_attrs, &inputs_read);
+ }
+ }
+ break;
case MESA_SHADER_GEOMETRY:
foreach_list_typed(nir_variable, var, node, &nir->inputs) {
var->data.driver_location = var->data.location;
}
break;
- default:
+ case MESA_SHADER_FRAGMENT:
+ assert(is_scalar);
nir_assign_var_locations(&nir->inputs, &nir->num_inputs,
- is_scalar ? type_size_scalar : type_size_vec4);
+ type_size_scalar);
+ break;
+ case MESA_SHADER_COMPUTE:
+ /* Compute shaders have no inputs. */
+ assert(exec_list_is_empty(&nir->inputs));
break;
+ default:
+ unreachable("unsupported shader stage");
}
}
static void
brw_nir_lower_outputs(nir_shader *nir, bool is_scalar)
{
- if (is_scalar) {
- nir_assign_var_locations(&nir->outputs, &nir->num_outputs, type_size_scalar);
- } else {
- nir_foreach_variable(var, &nir->outputs)
- var->data.driver_location = var->data.location;
+ switch (nir->stage) {
+ case MESA_SHADER_VERTEX:
+ case MESA_SHADER_GEOMETRY:
+ if (is_scalar) {
+ nir_assign_var_locations(&nir->outputs, &nir->num_outputs,
+ type_size_scalar);
+ } else {
+ nir_foreach_variable(var, &nir->outputs)
+ var->data.driver_location = var->data.location;
+ }
+ break;
+ case MESA_SHADER_FRAGMENT:
+ nir_assign_var_locations(&nir->outputs, &nir->num_outputs,
+ type_size_scalar);
+ break;
+ case MESA_SHADER_COMPUTE:
+ /* Compute shaders have no outputs. */
+ assert(exec_list_is_empty(&nir->outputs));
+ break;
+ default:
+ unreachable("unsupported shader stage");
}
}
diff --git a/src/mesa/drivers/dri/i965/brw_program.c b/src/mesa/drivers/dri/i965/brw_program.c
index dbd0e50228b..22b0227756e 100644
--- a/src/mesa/drivers/dri/i965/brw_program.c
+++ b/src/mesa/drivers/dri/i965/brw_program.c
@@ -69,8 +69,7 @@ static struct gl_program *brwNewProgram( struct gl_context *ctx,
if (prog) {
prog->id = get_new_program_id(brw->intelScreen);
- return _mesa_init_vertex_program( ctx, &prog->program,
- target, id );
+ return _mesa_init_gl_program(&prog->program.Base, target, id);
}
else
return NULL;
@@ -81,8 +80,7 @@ static struct gl_program *brwNewProgram( struct gl_context *ctx,
if (prog) {
prog->id = get_new_program_id(brw->intelScreen);
- return _mesa_init_fragment_program( ctx, &prog->program,
- target, id );
+ return _mesa_init_gl_program(&prog->program.Base, target, id);
}
else
return NULL;
@@ -93,7 +91,7 @@ static struct gl_program *brwNewProgram( struct gl_context *ctx,
if (prog) {
prog->id = get_new_program_id(brw->intelScreen);
- return _mesa_init_geometry_program(ctx, &prog->program, target, id);
+ return _mesa_init_gl_program(&prog->program, target, id);
} else {
return NULL;
}
@@ -104,7 +102,7 @@ static struct gl_program *brwNewProgram( struct gl_context *ctx,
if (prog) {
prog->id = get_new_program_id(brw->intelScreen);
- return _mesa_init_compute_program(ctx, &prog->program, target, id);
+ return _mesa_init_gl_program(&prog->program.Base, target, id);
} else {
return NULL;
}
diff --git a/src/mesa/drivers/dri/i965/brw_program.h b/src/mesa/drivers/dri/i965/brw_program.h
index cf0522a8b10..f8cf2b062c8 100644
--- a/src/mesa/drivers/dri/i965/brw_program.h
+++ b/src/mesa/drivers/dri/i965/brw_program.h
@@ -24,129 +24,7 @@
#ifndef BRW_PROGRAM_H
#define BRW_PROGRAM_H
-/**
- * Program key structures.
- *
- * When drawing, we look for the currently bound shaders in the program
- * cache. This is essentially a hash table lookup, and these are the keys.
- *
- * Sometimes OpenGL features specified as state need to be simulated via
- * shader code, due to a mismatch between the API and the hardware. This
- * is often referred to as "non-orthagonal state" or "NOS". We store NOS
- * in the program key so it's considered when searching for a program. If
- * we haven't seen a particular combination before, we have to recompile a
- * new specialized version.
- *
- * Shader compilation should not look up state in gl_context directly, but
- * instead use the copy in the program key. This guarantees recompiles will
- * happen correctly.
- *
- * @{
- */
-
-enum PACKED gen6_gather_sampler_wa {
- WA_SIGN = 1, /* whether we need to sign extend */
- WA_8BIT = 2, /* if we have an 8bit format needing wa */
- WA_16BIT = 4, /* if we have a 16bit format needing wa */
-};
-
-/**
- * Sampler information needed by VS, WM, and GS program cache keys.
- */
-struct brw_sampler_prog_key_data {
- /**
- * EXT_texture_swizzle and DEPTH_TEXTURE_MODE swizzles.
- */
- uint16_t swizzles[MAX_SAMPLERS];
-
- uint32_t gl_clamp_mask[3];
-
- /**
- * For RG32F, gather4's channel select is broken.
- */
- uint32_t gather_channel_quirk_mask;
-
- /**
- * Whether this sampler uses the compressed multisample surface layout.
- */
- uint32_t compressed_multisample_layout_mask;
-
- /**
- * For Sandybridge, which shader w/a we need for gather quirks.
- */
- enum gen6_gather_sampler_wa gen6_gather_wa[MAX_SAMPLERS];
-};
-
-
-/** The program key for Vertex Shaders. */
-struct brw_vs_prog_key {
- unsigned program_string_id;
-
- /*
- * Per-attribute workaround flags
- */
- uint8_t gl_attrib_wa_flags[VERT_ATTRIB_MAX];
-
- bool copy_edgeflag:1;
-
- bool clamp_vertex_color:1;
-
- /**
- * How many user clipping planes are being uploaded to the vertex shader as
- * push constants.
- *
- * These are used for lowering legacy gl_ClipVertex/gl_Position clipping to
- * clip distances.
- */
- unsigned nr_userclip_plane_consts:4;
-
- /**
- * For pre-Gen6 hardware, a bitfield indicating which texture coordinates
- * are going to be replaced with point coordinates (as a consequence of a
- * call to glTexEnvi(GL_POINT_SPRITE, GL_COORD_REPLACE, GL_TRUE)). Because
- * our SF thread requires exact matching between VS outputs and FS inputs,
- * these texture coordinates will need to be unconditionally included in
- * the VUE, even if they aren't written by the vertex shader.
- */
- uint8_t point_coord_replace;
-
- struct brw_sampler_prog_key_data tex;
-};
-
-/** The program key for Geometry Shaders. */
-struct brw_gs_prog_key
-{
- unsigned program_string_id;
-
- struct brw_sampler_prog_key_data tex;
-};
-
-/** The program key for Fragment/Pixel Shaders. */
-struct brw_wm_prog_key {
- uint8_t iz_lookup;
- bool stats_wm:1;
- bool flat_shade:1;
- bool persample_shading:1;
- bool persample_2x:1;
- unsigned nr_color_regions:5;
- bool replicate_alpha:1;
- bool render_to_fbo:1;
- bool clamp_fragment_color:1;
- bool compute_pos_offset:1;
- bool compute_sample_id:1;
- unsigned line_aa:2;
- bool high_quality_derivatives:1;
-
- uint16_t drawable_height;
- uint64_t input_slots_valid;
- unsigned program_string_id;
- GLenum alpha_test_func; /* < For Gen4/5 MRT alpha test */
- float alpha_test_ref;
-
- struct brw_sampler_prog_key_data tex;
-};
-
-/** @} */
+#include "brw_compiler.h"
#ifdef __cplusplus
extern "C" {
diff --git a/src/mesa/drivers/dri/i965/brw_sampler_state.c b/src/mesa/drivers/dri/i965/brw_sampler_state.c
index c2db5f69560..6d73444dad0 100644
--- a/src/mesa/drivers/dri/i965/brw_sampler_state.c
+++ b/src/mesa/drivers/dri/i965/brw_sampler_state.c
@@ -44,6 +44,7 @@
#include "main/macros.h"
#include "main/samplerobj.h"
+#include "util/half_float.h"
/**
* Emit a 3DSTATE_SAMPLER_STATE_POINTERS_{VS,HS,GS,DS,PS} packet.
diff --git a/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp b/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp
index 4e43e5ccdbd..b710c60148c 100644
--- a/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp
+++ b/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp
@@ -29,7 +29,7 @@
#include "brw_vec4.h"
#include "brw_cfg.h"
#include "brw_shader.h"
-#include "glsl/glsl_types.h"
+#include "glsl/nir/glsl_types.h"
#include "glsl/ir_optimization.h"
using namespace brw;
diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp b/src/mesa/drivers/dri/i965/brw_shader.cpp
index 3a58a58a00b..6be2a6e5b55 100644
--- a/src/mesa/drivers/dri/i965/brw_shader.cpp
+++ b/src/mesa/drivers/dri/i965/brw_shader.cpp
@@ -660,7 +660,7 @@ brw_abs_immediate(enum brw_reg_type type, struct brw_reg *reg)
backend_shader::backend_shader(const struct brw_compiler *compiler,
void *log_data,
void *mem_ctx,
- nir_shader *shader,
+ const nir_shader *shader,
struct brw_stage_prog_data *stage_prog_data)
: compiler(compiler),
log_data(log_data),
@@ -1131,11 +1131,16 @@ brw_assign_common_binding_table_offsets(gl_shader_stage stage,
next_binding_table_offset += num_textures;
if (shader) {
- assert(shader->NumUniformBlocks <= BRW_MAX_COMBINED_UBO_SSBO);
+ assert(shader->NumUniformBlocks <= BRW_MAX_UBO);
stage_prog_data->binding_table.ubo_start = next_binding_table_offset;
next_binding_table_offset += shader->NumUniformBlocks;
+
+ assert(shader->NumShaderStorageBlocks <= BRW_MAX_SSBO);
+ stage_prog_data->binding_table.ssbo_start = next_binding_table_offset;
+ next_binding_table_offset += shader->NumShaderStorageBlocks;
} else {
stage_prog_data->binding_table.ubo_start = 0xd0d0d0d0;
+ stage_prog_data->binding_table.ssbo_start = 0xd0d0d0d0;
}
if (INTEL_DEBUG & DEBUG_SHADER_TIME) {
diff --git a/src/mesa/drivers/dri/i965/brw_shader.h b/src/mesa/drivers/dri/i965/brw_shader.h
index ad2de5eae2d..b33b08f40d7 100644
--- a/src/mesa/drivers/dri/i965/brw_shader.h
+++ b/src/mesa/drivers/dri/i965/brw_shader.h
@@ -38,64 +38,6 @@
#define MAX_SAMPLER_MESSAGE_SIZE 11
#define MAX_VGRF_SIZE 16
-struct brw_compiler {
- const struct brw_device_info *devinfo;
-
- struct {
- struct ra_regs *regs;
-
- /**
- * Array of the ra classes for the unaligned contiguous register
- * block sizes used.
- */
- int *classes;
-
- /**
- * Mapping for register-allocated objects in *regs to the first
- * GRF for that object.
- */
- uint8_t *ra_reg_to_grf;
- } vec4_reg_set;
-
- struct {
- struct ra_regs *regs;
-
- /**
- * Array of the ra classes for the unaligned contiguous register
- * block sizes used, indexed by register size.
- */
- int classes[16];
-
- /**
- * Mapping from classes to ra_reg ranges. Each of the per-size
- * classes corresponds to a range of ra_reg nodes. This array stores
- * those ranges in the form of first ra_reg in each class and the
- * total number of ra_reg elements in the last array element. This
- * way the range of the i'th class is given by:
- * [ class_to_ra_reg_range[i], class_to_ra_reg_range[i+1] )
- */
- int class_to_ra_reg_range[17];
-
- /**
- * Mapping for register-allocated objects in *regs to the first
- * GRF for that object.
- */
- uint8_t *ra_reg_to_grf;
-
- /**
- * ra class for the aligned pairs we use for PLN, which doesn't
- * appear in *classes.
- */
- int aligned_pairs_class;
- } fs_reg_sets[2];
-
- void (*shader_debug_log)(void *, const char *str, ...) PRINTFLIKE(2, 3);
- void (*shader_perf_log)(void *, const char *str, ...) PRINTFLIKE(2, 3);
-
- bool scalar_vs;
- struct gl_shader_compiler_options glsl_compiler_options[MESA_SHADER_STAGES];
-};
-
enum PACKED register_file {
BAD_FILE,
GRF,
@@ -225,7 +167,7 @@ protected:
backend_shader(const struct brw_compiler *compiler,
void *log_data,
void *mem_ctx,
- nir_shader *shader,
+ const nir_shader *shader,
struct brw_stage_prog_data *stage_prog_data);
public:
@@ -234,7 +176,7 @@ public:
void *log_data; /* Passed to compiler->*_log functions */
const struct brw_device_info * const devinfo;
- nir_shader *nir;
+ const nir_shader *nir;
struct brw_stage_prog_data * const stage_prog_data;
/** ralloc context for temporary data used during compile */
diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp b/src/mesa/drivers/dri/i965/brw_vec4.cpp
index e966b96a5ca..befc92445d3 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp
@@ -280,6 +280,18 @@ vec4_instruction::can_do_source_mods(const struct brw_device_info *devinfo)
return true;
}
+bool
+vec4_instruction::can_change_types() const
+{
+ return dst.type == src[0].type &&
+ !src[0].abs && !src[0].negate && !saturate &&
+ (opcode == BRW_OPCODE_MOV ||
+ (opcode == BRW_OPCODE_SEL &&
+ dst.type == src[1].type &&
+ predicate != BRW_PREDICATE_NONE &&
+ !src[1].abs && !src[1].negate));
+}
+
/**
* Returns how many MRFs an opcode will write over.
*
@@ -1632,28 +1644,11 @@ vec4_vs_visitor::setup_attributes(int payload_reg)
*/
if (vs_prog_data->uses_vertexid || vs_prog_data->uses_instanceid) {
attribute_map[VERT_ATTRIB_MAX] = payload_reg + nr_attributes;
- nr_attributes++;
}
lower_attributes_to_hw_regs(attribute_map, false /* interleaved */);
- /* The BSpec says we always have to read at least one thing from
- * the VF, and it appears that the hardware wedges otherwise.
- */
- if (nr_attributes == 0)
- nr_attributes = 1;
-
- prog_data->urb_read_length = (nr_attributes + 1) / 2;
-
- unsigned vue_entries =
- MAX2(nr_attributes, prog_data->vue_map.num_slots);
-
- if (devinfo->gen == 6)
- prog_data->urb_entry_size = ALIGN(vue_entries, 8) / 8;
- else
- prog_data->urb_entry_size = ALIGN(vue_entries, 4) / 4;
-
- return payload_reg + nr_attributes;
+ return payload_reg + vs_prog_data->nr_attributes;
}
int
@@ -1937,51 +1932,76 @@ extern "C" {
* Returns the final assembly and the program's size.
*/
const unsigned *
-brw_vs_emit(struct brw_context *brw,
- void *mem_ctx,
- const struct brw_vs_prog_key *key,
- struct brw_vs_prog_data *prog_data,
- struct gl_vertex_program *vp,
- struct gl_shader_program *prog,
- int shader_time_index,
- unsigned *final_assembly_size)
+brw_compile_vs(const struct brw_compiler *compiler, void *log_data,
+ void *mem_ctx,
+ const struct brw_vs_prog_key *key,
+ struct brw_vs_prog_data *prog_data,
+ const nir_shader *shader,
+ gl_clip_plane *clip_planes,
+ bool use_legacy_snorm_formula,
+ int shader_time_index,
+ unsigned *final_assembly_size,
+ char **error_str)
{
const unsigned *assembly = NULL;
- if (brw->intelScreen->compiler->scalar_vs) {
+ unsigned nr_attributes = _mesa_bitcount_64(prog_data->inputs_read);
+
+ /* gl_VertexID and gl_InstanceID are system values, but arrive via an
+ * incoming vertex attribute. So, add an extra slot.
+ */
+ if (shader->info.system_values_read &
+ (BITFIELD64_BIT(SYSTEM_VALUE_VERTEX_ID_ZERO_BASE) |
+ BITFIELD64_BIT(SYSTEM_VALUE_INSTANCE_ID))) {
+ nr_attributes++;
+ }
+
+ /* The 3DSTATE_VS documentation lists the lower bound on "Vertex URB Entry
+ * Read Length" as 1 in vec4 mode, and 0 in SIMD8 mode. Empirically, in
+ * vec4 mode, the hardware appears to wedge unless we read something.
+ */
+ if (compiler->scalar_vs)
+ prog_data->base.urb_read_length = DIV_ROUND_UP(nr_attributes, 2);
+ else
+ prog_data->base.urb_read_length = DIV_ROUND_UP(MAX2(nr_attributes, 1), 2);
+
+ prog_data->nr_attributes = nr_attributes;
+
+ /* Since vertex shaders reuse the same VUE entry for inputs and outputs
+ * (overwriting the original contents), we need to make sure the size is
+ * the larger of the two.
+ */
+ const unsigned vue_entries =
+ MAX2(nr_attributes, (unsigned)prog_data->base.vue_map.num_slots);
+
+ if (compiler->devinfo->gen == 6)
+ prog_data->base.urb_entry_size = DIV_ROUND_UP(vue_entries, 8);
+ else
+ prog_data->base.urb_entry_size = DIV_ROUND_UP(vue_entries, 4);
+
+ if (compiler->scalar_vs) {
prog_data->base.dispatch_mode = DISPATCH_MODE_SIMD8;
- fs_visitor v(brw->intelScreen->compiler, brw,
- mem_ctx, key, &prog_data->base.base,
+ fs_visitor v(compiler, log_data, mem_ctx, key, &prog_data->base.base,
NULL, /* prog; Only used for TEXTURE_RECTANGLE on gen < 8 */
- vp->Base.nir, 8, shader_time_index);
- if (!v.run_vs(brw_select_clip_planes(&brw->ctx))) {
- if (prog) {
- prog->LinkStatus = false;
- ralloc_strcat(&prog->InfoLog, v.fail_msg);
- }
-
- _mesa_problem(NULL, "Failed to compile vertex shader: %s\n",
- v.fail_msg);
+ shader, 8, shader_time_index);
+ if (!v.run_vs(clip_planes)) {
+ if (error_str)
+ *error_str = ralloc_strdup(mem_ctx, v.fail_msg);
return NULL;
}
- fs_generator g(brw->intelScreen->compiler, brw,
- mem_ctx, (void *) key, &prog_data->base.base,
- &vp->Base, v.promoted_constants,
+ fs_generator g(compiler, log_data, mem_ctx, (void *) key,
+ &prog_data->base.base, v.promoted_constants,
v.runtime_check_aads_emit, "VS");
if (INTEL_DEBUG & DEBUG_VS) {
- char *name;
- if (prog) {
- name = ralloc_asprintf(mem_ctx, "%s vertex shader %d",
- prog->Label ? prog->Label : "unnamed",
- prog->Name);
- } else {
- name = ralloc_asprintf(mem_ctx, "vertex program %d",
- vp->Base.Id);
- }
- g.enable_debug(name);
+ const char *debug_name =
+ ralloc_asprintf(mem_ctx, "%s vertex shader %s",
+ shader->info.label ? shader->info.label : "unnamed",
+ shader->info.name);
+
+ g.enable_debug(debug_name);
}
g.generate_code(v.cfg, 8);
assembly = g.get_assembly(final_assembly_size);
@@ -1990,26 +2010,19 @@ brw_vs_emit(struct brw_context *brw,
if (!assembly) {
prog_data->base.dispatch_mode = DISPATCH_MODE_4X2_DUAL_OBJECT;
- vec4_vs_visitor v(brw->intelScreen->compiler, brw, key, prog_data,
- vp->Base.nir, brw_select_clip_planes(&brw->ctx),
- mem_ctx, shader_time_index,
- !_mesa_is_gles3(&brw->ctx));
+ vec4_vs_visitor v(compiler, log_data, key, prog_data,
+ shader, clip_planes, mem_ctx,
+ shader_time_index, use_legacy_snorm_formula);
if (!v.run()) {
- if (prog) {
- prog->LinkStatus = false;
- ralloc_strcat(&prog->InfoLog, v.fail_msg);
- }
-
- _mesa_problem(NULL, "Failed to compile vertex shader: %s\n",
- v.fail_msg);
+ if (error_str)
+ *error_str = ralloc_strdup(mem_ctx, v.fail_msg);
return NULL;
}
- vec4_generator g(brw->intelScreen->compiler, brw,
- prog, &vp->Base, &prog_data->base,
+ vec4_generator g(compiler, log_data, &prog_data->base,
mem_ctx, INTEL_DEBUG & DEBUG_VS, "vertex", "VS");
- assembly = g.generate_assembly(v.cfg, final_assembly_size);
+ assembly = g.generate_assembly(v.cfg, final_assembly_size, shader);
}
return assembly;
diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h
index 5e3500c0c9a..d861b2e85df 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.h
+++ b/src/mesa/drivers/dri/i965/brw_vec4.h
@@ -72,7 +72,7 @@ public:
void *log_data,
const struct brw_sampler_prog_key_data *key,
struct brw_vue_prog_data *prog_data,
- nir_shader *shader,
+ const nir_shader *shader,
void *mem_ctx,
bool no_spills,
int shader_time_index);
@@ -391,8 +391,6 @@ class vec4_generator
{
public:
vec4_generator(const struct brw_compiler *compiler, void *log_data,
- struct gl_shader_program *shader_prog,
- struct gl_program *prog,
struct brw_vue_prog_data *prog_data,
void *mem_ctx,
bool debug_flag,
@@ -400,10 +398,11 @@ public:
const char *stage_abbrev);
~vec4_generator();
- const unsigned *generate_assembly(const cfg_t *cfg, unsigned *asm_size);
+ const unsigned *generate_assembly(const cfg_t *cfg, unsigned *asm_size,
+ const nir_shader *nir);
private:
- void generate_code(const cfg_t *cfg);
+ void generate_code(const cfg_t *cfg, const nir_shader *nir);
void generate_math1_gen4(vec4_instruction *inst,
struct brw_reg dst,
@@ -485,9 +484,6 @@ private:
struct brw_codegen *p;
- struct gl_shader_program *shader_prog;
- const struct gl_program *prog;
-
struct brw_vue_prog_data *prog_data;
void *mem_ctx;
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_copy_propagation.cpp b/src/mesa/drivers/dri/i965/brw_vec4_copy_propagation.cpp
index 610caef7dce..db99ecba35a 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_copy_propagation.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_copy_propagation.cpp
@@ -256,18 +256,6 @@ try_constant_propagate(const struct brw_device_info *devinfo,
}
static bool
-can_change_source_types(vec4_instruction *inst)
-{
- return inst->dst.type == inst->src[0].type &&
- !inst->src[0].abs && !inst->src[0].negate && !inst->saturate &&
- (inst->opcode == BRW_OPCODE_MOV ||
- (inst->opcode == BRW_OPCODE_SEL &&
- inst->dst.type == inst->src[1].type &&
- inst->predicate != BRW_PREDICATE_NONE &&
- !inst->src[1].abs && !inst->src[1].negate));
-}
-
-static bool
try_copy_propagate(const struct brw_device_info *devinfo,
vec4_instruction *inst,
int arg, struct copy_entry *entry)
@@ -325,7 +313,7 @@ try_copy_propagate(const struct brw_device_info *devinfo,
if (has_source_modifiers &&
value.type != inst->src[arg].type &&
- !can_change_source_types(inst))
+ !inst->can_change_types())
return false;
if (has_source_modifiers &&
@@ -394,7 +382,7 @@ try_copy_propagate(const struct brw_device_info *devinfo,
value.swizzle = composed_swizzle;
if (has_source_modifiers &&
value.type != inst->src[arg].type) {
- assert(can_change_source_types(inst));
+ assert(inst->can_change_types());
for (int i = 0; i < 3; i++) {
inst->src[i].type = value.type;
}
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
index dcacc900540..a84f6c47471 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
@@ -21,6 +21,7 @@
*/
#include <ctype.h>
+#include "glsl/glsl_parser_extras.h"
#include "brw_vec4.h"
#include "brw_cfg.h"
@@ -137,15 +138,13 @@ vec4_instruction::get_src(const struct brw_vue_prog_data *prog_data, int i)
vec4_generator::vec4_generator(const struct brw_compiler *compiler,
void *log_data,
- struct gl_shader_program *shader_prog,
- struct gl_program *prog,
struct brw_vue_prog_data *prog_data,
void *mem_ctx,
bool debug_flag,
const char *stage_name,
const char *stage_abbrev)
: compiler(compiler), log_data(log_data), devinfo(compiler->devinfo),
- shader_prog(shader_prog), prog(prog), prog_data(prog_data),
+ prog_data(prog_data),
mem_ctx(mem_ctx), stage_name(stage_name), stage_abbrev(stage_abbrev),
debug_flag(debug_flag)
{
@@ -1142,7 +1141,7 @@ vec4_generator::generate_set_simd4x2_header_gen9(vec4_instruction *inst,
}
void
-vec4_generator::generate_code(const cfg_t *cfg)
+vec4_generator::generate_code(const cfg_t *cfg, const nir_shader *nir)
{
struct annotation_info annotation;
memset(&annotation, 0, sizeof(annotation));
@@ -1648,14 +1647,10 @@ vec4_generator::generate_code(const cfg_t *cfg)
int after_size = p->next_insn_offset;
if (unlikely(debug_flag)) {
- if (shader_prog) {
- fprintf(stderr, "Native code for %s %s shader %d:\n",
- shader_prog->Label ? shader_prog->Label : "unnamed",
- stage_name, shader_prog->Name);
- } else {
- fprintf(stderr, "Native code for %s program %d:\n", stage_name,
- prog->Id);
- }
+ fprintf(stderr, "Native code for %s %s shader %s:\n",
+ nir->info.label ? nir->info.label : "unnamed",
+ _mesa_shader_stage_to_string(nir->stage), nir->info.name);
+
fprintf(stderr, "%s vec4 shader: %d instructions. %d loops. Compacted %d to %d"
" bytes (%.0f%%)\n",
stage_abbrev,
@@ -1663,7 +1658,7 @@ vec4_generator::generate_code(const cfg_t *cfg)
100.0f * (before_size - after_size) / before_size);
dump_assembly(p->store, annotation.ann_count, annotation.ann,
- p->devinfo, prog);
+ p->devinfo);
ralloc_free(annotation.ann);
}
@@ -1676,10 +1671,11 @@ vec4_generator::generate_code(const cfg_t *cfg)
const unsigned *
vec4_generator::generate_assembly(const cfg_t *cfg,
- unsigned *assembly_size)
+ unsigned *assembly_size,
+ const nir_shader *nir)
{
brw_set_default_access_mode(p, BRW_ALIGN_16);
- generate_code(cfg);
+ generate_code(cfg, nir);
return brw_get_program(p, assembly_size);
}
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp
index 4ce471e0669..a715cf5a6cb 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp
@@ -30,14 +30,12 @@
#include "brw_vec4_gs_visitor.h"
#include "gen6_gs_visitor.h"
-const unsigned MAX_GS_INPUT_VERTICES = 6;
-
namespace brw {
vec4_gs_visitor::vec4_gs_visitor(const struct brw_compiler *compiler,
void *log_data,
struct brw_gs_compile *c,
- nir_shader *shader,
+ const nir_shader *shader,
void *mem_ctx,
bool no_spills,
int shader_time_index)
@@ -598,32 +596,17 @@ vec4_gs_visitor::gs_end_primitive()
emit(OR(dst_reg(this->control_data_bits), this->control_data_bits, mask));
}
-static const unsigned *
-generate_assembly(struct brw_context *brw,
- struct gl_shader_program *shader_prog,
- struct gl_program *prog,
- struct brw_vue_prog_data *prog_data,
- void *mem_ctx,
- const cfg_t *cfg,
- unsigned *final_assembly_size)
-{
- vec4_generator g(brw->intelScreen->compiler, brw,
- shader_prog, prog, prog_data, mem_ctx,
- INTEL_DEBUG & DEBUG_GS, "geometry", "GS");
- return g.generate_assembly(cfg, final_assembly_size);
-}
-
extern "C" const unsigned *
-brw_gs_emit(struct brw_context *brw,
- struct gl_shader_program *prog,
- struct brw_gs_compile *c,
- void *mem_ctx,
- int shader_time_index,
- unsigned *final_assembly_size)
+brw_compile_gs(const struct brw_compiler *compiler, void *log_data,
+ struct brw_gs_compile *c,
+ const nir_shader *shader,
+ struct gl_shader_program *shader_prog,
+ void *mem_ctx,
+ int shader_time_index,
+ unsigned *final_assembly_size,
+ char **error_str)
{
- struct gl_shader *shader = prog->_LinkedShaders[MESA_SHADER_GEOMETRY];
-
- if (brw->gen >= 7) {
+ if (compiler->devinfo->gen >= 7) {
/* Compile the geometry shader in DUAL_OBJECT dispatch mode, if we can do
* so without spilling. If the GS invocations count > 1, then we can't use
* dual object mode.
@@ -632,13 +615,12 @@ brw_gs_emit(struct brw_context *brw,
likely(!(INTEL_DEBUG & DEBUG_NO_DUAL_OBJECT_GS))) {
c->prog_data.base.dispatch_mode = DISPATCH_MODE_4X2_DUAL_OBJECT;
- vec4_gs_visitor v(brw->intelScreen->compiler, brw,
- c, shader->Program->nir,
+ vec4_gs_visitor v(compiler, log_data, c, shader,
mem_ctx, true /* no_spills */, shader_time_index);
if (v.run()) {
- return generate_assembly(brw, prog, &c->gp->program.Base,
- &c->prog_data.base, mem_ctx, v.cfg,
- final_assembly_size);
+ vec4_generator g(compiler, log_data, &c->prog_data.base, mem_ctx,
+ INTEL_DEBUG & DEBUG_GS, "geometry", "GS");
+ return g.generate_assembly(v.cfg, final_assembly_size, shader);
}
}
}
@@ -666,7 +648,7 @@ brw_gs_emit(struct brw_context *brw,
* mode is more performant when invocations > 1. Gen6 only supports
* SINGLE mode.
*/
- if (c->prog_data.invocations <= 1 || brw->gen < 7)
+ if (c->prog_data.invocations <= 1 || compiler->devinfo->gen < 7)
c->prog_data.base.dispatch_mode = DISPATCH_MODE_4X1_SINGLE;
else
c->prog_data.base.dispatch_mode = DISPATCH_MODE_4X2_DUAL_INSTANCE;
@@ -674,24 +656,22 @@ brw_gs_emit(struct brw_context *brw,
vec4_gs_visitor *gs = NULL;
const unsigned *ret = NULL;
- if (brw->gen >= 7)
- gs = new vec4_gs_visitor(brw->intelScreen->compiler, brw,
- c, shader->Program->nir,
+ if (compiler->devinfo->gen >= 7)
+ gs = new vec4_gs_visitor(compiler, log_data, c, shader,
mem_ctx, false /* no_spills */,
shader_time_index);
else
- gs = new gen6_gs_visitor(brw->intelScreen->compiler, brw,
- c, prog, shader->Program->nir,
+ gs = new gen6_gs_visitor(compiler, log_data, c, shader_prog, shader,
mem_ctx, false /* no_spills */,
shader_time_index);
if (!gs->run()) {
- prog->LinkStatus = false;
- ralloc_strcat(&prog->InfoLog, gs->fail_msg);
+ if (error_str)
+ *error_str = ralloc_strdup(mem_ctx, gs->fail_msg);
} else {
- ret = generate_assembly(brw, prog, &c->gp->program.Base,
- &c->prog_data.base, mem_ctx, gs->cfg,
- final_assembly_size);
+ vec4_generator g(compiler, log_data, &c->prog_data.base, mem_ctx,
+ INTEL_DEBUG & DEBUG_GS, "geometry", "GS");
+ ret = g.generate_assembly(gs->cfg, final_assembly_size, shader);
}
delete gs;
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.h b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.h
index 3ff195c3e68..c52552768c8 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.h
+++ b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.h
@@ -32,36 +32,6 @@
#include "brw_vec4.h"
-/**
- * Scratch data used when compiling a GLSL geometry shader.
- */
-struct brw_gs_compile
-{
- struct brw_gs_prog_key key;
- struct brw_gs_prog_data prog_data;
- struct brw_vue_map input_vue_map;
-
- struct brw_geometry_program *gp;
-
- unsigned control_data_bits_per_vertex;
- unsigned control_data_header_size_bits;
-};
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-const unsigned *brw_gs_emit(struct brw_context *brw,
- struct gl_shader_program *prog,
- struct brw_gs_compile *c,
- void *mem_ctx,
- int shader_time_index,
- unsigned *final_assembly_size);
-
-#ifdef __cplusplus
-} /* extern "C" */
-#endif
-
#ifdef __cplusplus
namespace brw {
@@ -71,7 +41,7 @@ public:
vec4_gs_visitor(const struct brw_compiler *compiler,
void *log_data,
struct brw_gs_compile *c,
- nir_shader *shader,
+ const nir_shader *shader,
void *mem_ctx,
bool no_spills,
int shader_time_index);
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_live_variables.cpp b/src/mesa/drivers/dri/i965/brw_vec4_live_variables.cpp
index cc688ef8083..678237901f2 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_live_variables.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_live_variables.cpp
@@ -291,15 +291,15 @@ vec4_visitor::calculate_live_intervals()
struct block_data *bd = &live_intervals->block_data[block->num];
for (int i = 0; i < live_intervals->num_vars; i++) {
- if (BITSET_TEST(bd->livein, i)) {
- start[i] = MIN2(start[i], block->start_ip);
- end[i] = MAX2(end[i], block->start_ip);
- }
+ if (BITSET_TEST(bd->livein, i)) {
+ start[i] = MIN2(start[i], block->start_ip);
+ end[i] = MAX2(end[i], block->start_ip);
+ }
- if (BITSET_TEST(bd->liveout, i)) {
- start[i] = MIN2(start[i], block->end_ip);
- end[i] = MAX2(end[i], block->end_ip);
- }
+ if (BITSET_TEST(bd->liveout, i)) {
+ start[i] = MIN2(start[i], block->end_ip);
+ end[i] = MAX2(end[i], block->end_ip);
+ }
}
}
}
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
index 41bd80df377..ea1e3e7bbcf 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
@@ -423,10 +423,10 @@ vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
case nir_intrinsic_get_buffer_size: {
nir_const_value *const_uniform_block = nir_src_as_const_value(instr->src[0]);
- unsigned ubo_index = const_uniform_block ? const_uniform_block->u[0] : 0;
+ unsigned ssbo_index = const_uniform_block ? const_uniform_block->u[0] : 0;
- src_reg surf_index = src_reg(prog_data->base.binding_table.ubo_start +
- ubo_index);
+ src_reg surf_index = src_reg(prog_data->base.binding_table.ssbo_start +
+ ssbo_index);
dst_reg result_dst = get_nir_dest(instr->dest);
vec4_instruction *inst = new(mem_ctx)
vec4_instruction(VS_OPCODE_GET_BUFFER_SIZE, result_dst);
@@ -456,18 +456,18 @@ vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
nir_const_value *const_uniform_block =
nir_src_as_const_value(instr->src[1]);
if (const_uniform_block) {
- unsigned index = prog_data->base.binding_table.ubo_start +
+ unsigned index = prog_data->base.binding_table.ssbo_start +
const_uniform_block->u[0];
surf_index = src_reg(index);
brw_mark_surface_used(&prog_data->base, index);
} else {
surf_index = src_reg(this, glsl_type::uint_type);
emit(ADD(dst_reg(surf_index), get_nir_src(instr->src[1], 1),
- src_reg(prog_data->base.binding_table.ubo_start)));
+ src_reg(prog_data->base.binding_table.ssbo_start)));
surf_index = emit_uniformize(surf_index);
brw_mark_surface_used(&prog_data->base,
- prog_data->base.binding_table.ubo_start +
+ prog_data->base.binding_table.ssbo_start +
nir->info.num_ssbos - 1);
}
@@ -599,7 +599,7 @@ vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
src_reg surf_index;
if (const_uniform_block) {
- unsigned index = prog_data->base.binding_table.ubo_start +
+ unsigned index = prog_data->base.binding_table.ssbo_start +
const_uniform_block->u[0];
surf_index = src_reg(index);
@@ -607,14 +607,14 @@ vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
} else {
surf_index = src_reg(this, glsl_type::uint_type);
emit(ADD(dst_reg(surf_index), get_nir_src(instr->src[0], 1),
- src_reg(prog_data->base.binding_table.ubo_start)));
+ src_reg(prog_data->base.binding_table.ssbo_start)));
surf_index = emit_uniformize(surf_index);
/* Assume this may touch any UBO. It would be nice to provide
* a tighter bound, but the array information is already lowered away.
*/
brw_mark_surface_used(&prog_data->base,
- prog_data->base.binding_table.ubo_start +
+ prog_data->base.binding_table.ssbo_start +
nir->info.num_ssbos - 1);
}
@@ -645,17 +645,17 @@ vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
case nir_intrinsic_ssbo_atomic_add:
nir_emit_ssbo_atomic(BRW_AOP_ADD, instr);
break;
- case nir_intrinsic_ssbo_atomic_min:
- if (dest.type == BRW_REGISTER_TYPE_D)
- nir_emit_ssbo_atomic(BRW_AOP_IMIN, instr);
- else
- nir_emit_ssbo_atomic(BRW_AOP_UMIN, instr);
+ case nir_intrinsic_ssbo_atomic_imin:
+ nir_emit_ssbo_atomic(BRW_AOP_IMIN, instr);
+ break;
+ case nir_intrinsic_ssbo_atomic_umin:
+ nir_emit_ssbo_atomic(BRW_AOP_UMIN, instr);
break;
- case nir_intrinsic_ssbo_atomic_max:
- if (dest.type == BRW_REGISTER_TYPE_D)
- nir_emit_ssbo_atomic(BRW_AOP_IMAX, instr);
- else
- nir_emit_ssbo_atomic(BRW_AOP_UMAX, instr);
+ case nir_intrinsic_ssbo_atomic_imax:
+ nir_emit_ssbo_atomic(BRW_AOP_IMAX, instr);
+ break;
+ case nir_intrinsic_ssbo_atomic_umax:
+ nir_emit_ssbo_atomic(BRW_AOP_UMAX, instr);
break;
case nir_intrinsic_ssbo_atomic_and:
nir_emit_ssbo_atomic(BRW_AOP_AND, instr);
@@ -765,7 +765,7 @@ vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
*/
brw_mark_surface_used(&prog_data->base,
prog_data->base.binding_table.ubo_start +
- nir->info.num_ssbos - 1);
+ nir->info.num_ubos - 1);
}
unsigned const_offset = instr->const_index[0];
@@ -821,20 +821,20 @@ vec4_visitor::nir_emit_ssbo_atomic(int op, nir_intrinsic_instr *instr)
src_reg surface;
nir_const_value *const_surface = nir_src_as_const_value(instr->src[0]);
if (const_surface) {
- unsigned surf_index = prog_data->base.binding_table.ubo_start +
+ unsigned surf_index = prog_data->base.binding_table.ssbo_start +
const_surface->u[0];
surface = src_reg(surf_index);
brw_mark_surface_used(&prog_data->base, surf_index);
} else {
surface = src_reg(this, glsl_type::uint_type);
emit(ADD(dst_reg(surface), get_nir_src(instr->src[0]),
- src_reg(prog_data->base.binding_table.ubo_start)));
+ src_reg(prog_data->base.binding_table.ssbo_start)));
/* Assume this may touch any UBO. This is the same we do for other
* UBO/SSBO accesses with non-constant surface.
*/
brw_mark_surface_used(&prog_data->base,
- prog_data->base.binding_table.ubo_start +
+ prog_data->base.binding_table.ssbo_start +
nir->info.num_ssbos - 1);
}
@@ -1237,14 +1237,8 @@ vec4_visitor::nir_emit_alu(nir_alu_instr *instr)
break;
case nir_op_b2i:
- emit(AND(dst, op[0], src_reg(1)));
- break;
-
case nir_op_b2f:
- op[0].type = BRW_REGISTER_TYPE_D;
- dst.type = BRW_REGISTER_TYPE_D;
- emit(AND(dst, op[0], src_reg(0x3f800000u)));
- dst.type = BRW_REGISTER_TYPE_F;
+ emit(MOV(dst, negate(op[0])));
break;
case nir_op_f2b:
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
index 98ea9be6ee4..5be9c6a6b2d 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
@@ -1815,7 +1815,7 @@ vec4_visitor::vec4_visitor(const struct brw_compiler *compiler,
void *log_data,
const struct brw_sampler_prog_key_data *key_tex,
struct brw_vue_prog_data *prog_data,
- nir_shader *shader,
+ const nir_shader *shader,
void *mem_ctx,
bool no_spills,
int shader_time_index)
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_vs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_vs_visitor.cpp
index b6e1971c2ee..485a80ee2fc 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_vs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_vs_visitor.cpp
@@ -301,7 +301,7 @@ vec4_vs_visitor::vec4_vs_visitor(const struct brw_compiler *compiler,
void *log_data,
const struct brw_vs_prog_key *key,
struct brw_vs_prog_data *vs_prog_data,
- nir_shader *shader,
+ const nir_shader *shader,
gl_clip_plane *clip_planes,
void *mem_ctx,
int shader_time_index,
diff --git a/src/mesa/drivers/dri/i965/brw_vs.c b/src/mesa/drivers/dri/i965/brw_vs.c
index 38de98fab86..ba680a98f7e 100644
--- a/src/mesa/drivers/dri/i965/brw_vs.c
+++ b/src/mesa/drivers/dri/i965/brw_vs.c
@@ -31,6 +31,7 @@
#include "main/compiler.h"
+#include "main/context.h"
#include "brw_context.h"
#include "brw_vs.h"
#include "brw_util.h"
@@ -57,18 +58,6 @@ brw_codegen_vs_prog(struct brw_context *brw,
bool start_busy = false;
double start_time = 0;
- if (!vp->program.Base.nir) {
- /* Normally we generate NIR in LinkShader() or
- * ProgramStringNotify(), but Mesa's fixed-function vertex program
- * handling doesn't notify the driver at all. Just do it here, at
- * the last minute, even though it's lame.
- */
- assert(vp->program.Base.Id == 0 && prog == NULL);
- vp->program.Base.nir =
- brw_create_nir(brw, NULL, &vp->program.Base, MESA_SHADER_VERTEX,
- brw->intelScreen->compiler->scalar_vs);
- }
-
if (prog)
vs = (struct brw_shader *) prog->_LinkedShaders[MESA_SHADER_VERTEX];
@@ -171,7 +160,7 @@ brw_codegen_vs_prog(struct brw_context *brw,
}
if (unlikely(INTEL_DEBUG & DEBUG_VS))
- brw_dump_ir("vertex", prog, &vs->base, &vp->program.Base);
+ brw_dump_ir("vertex", prog, vs ? &vs->base : NULL, &vp->program.Base);
int st_index = -1;
if (INTEL_DEBUG & DEBUG_SHADER_TIME)
@@ -179,9 +168,20 @@ brw_codegen_vs_prog(struct brw_context *brw,
/* Emit GEN4 code.
*/
- program = brw_vs_emit(brw, mem_ctx, key, &prog_data,
- &vp->program, prog, st_index, &program_size);
+ char *error_str;
+ program = brw_compile_vs(brw->intelScreen->compiler, brw, mem_ctx, key,
+ &prog_data, vp->program.Base.nir,
+ brw_select_clip_planes(&brw->ctx),
+ !_mesa_is_gles3(&brw->ctx),
+ st_index, &program_size, &error_str);
if (program == NULL) {
+ if (prog) {
+ prog->LinkStatus = false;
+ ralloc_strcat(&prog->InfoLog, error_str);
+ }
+
+ _mesa_problem(NULL, "Failed to compile vertex shader: %s\n", error_str);
+
ralloc_free(mem_ctx);
return false;
}
diff --git a/src/mesa/drivers/dri/i965/brw_vs.h b/src/mesa/drivers/dri/i965/brw_vs.h
index f1242f61b33..bcb5e7b0b2a 100644
--- a/src/mesa/drivers/dri/i965/brw_vs.h
+++ b/src/mesa/drivers/dri/i965/brw_vs.h
@@ -54,14 +54,6 @@
extern "C" {
#endif
-const unsigned *brw_vs_emit(struct brw_context *brw,
- void *mem_ctx,
- const struct brw_vs_prog_key *key,
- struct brw_vs_prog_data *prog_data,
- struct gl_vertex_program *vp,
- struct gl_shader_program *shader_prog,
- int shader_time_index,
- unsigned *program_size);
void brw_vs_debug_recompile(struct brw_context *brw,
struct gl_shader_program *prog,
const struct brw_vs_prog_key *key);
@@ -88,7 +80,7 @@ public:
void *log_data,
const struct brw_vs_prog_key *key,
struct brw_vs_prog_data *vs_prog_data,
- nir_shader *shader,
+ const nir_shader *shader,
gl_clip_plane *clip_planes,
void *mem_ctx,
int shader_time_index,
diff --git a/src/mesa/drivers/dri/i965/brw_vs_surface_state.c b/src/mesa/drivers/dri/i965/brw_vs_surface_state.c
index 9bb48eb2e27..f65258a52a5 100644
--- a/src/mesa/drivers/dri/i965/brw_vs_surface_state.c
+++ b/src/mesa/drivers/dri/i965/brw_vs_surface_state.c
@@ -201,7 +201,7 @@ brw_upload_vs_image_surfaces(struct brw_context *brw)
ctx->_Shader->CurrentProgram[MESA_SHADER_VERTEX];
if (prog) {
- /* BRW_NEW_VS_PROG_DATA, BRW_NEW_IMAGE_UNITS */
+ /* BRW_NEW_VS_PROG_DATA, BRW_NEW_IMAGE_UNITS, _NEW_TEXTURE */
brw_upload_image_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_VERTEX],
&brw->vs.base, &brw->vs.prog_data->base.base);
}
@@ -209,6 +209,7 @@ brw_upload_vs_image_surfaces(struct brw_context *brw)
const struct brw_tracked_state brw_vs_image_surfaces = {
.dirty = {
+ .mesa = _NEW_TEXTURE,
.brw = BRW_NEW_BATCH |
BRW_NEW_IMAGE_UNITS |
BRW_NEW_VERTEX_PROGRAM |
diff --git a/src/mesa/drivers/dri/i965/brw_wm.c b/src/mesa/drivers/dri/i965/brw_wm.c
index 4d5e7f67bd6..5c49db9e63e 100644
--- a/src/mesa/drivers/dri/i965/brw_wm.c
+++ b/src/mesa/drivers/dri/i965/brw_wm.c
@@ -39,89 +39,6 @@
#include "util/ralloc.h"
-/**
- * Return a bitfield where bit n is set if barycentric interpolation mode n
- * (see enum brw_wm_barycentric_interp_mode) is needed by the fragment shader.
- */
-static unsigned
-brw_compute_barycentric_interp_modes(const struct brw_device_info *devinfo,
- bool shade_model_flat,
- bool persample_shading,
- nir_shader *shader)
-{
- unsigned barycentric_interp_modes = 0;
-
- nir_foreach_variable(var, &shader->inputs) {
- enum glsl_interp_qualifier interp_qualifier = var->data.interpolation;
- bool is_centroid = var->data.centroid && !persample_shading;
- bool is_sample = var->data.sample || persample_shading;
- bool is_gl_Color = (var->data.location == VARYING_SLOT_COL0) ||
- (var->data.location == VARYING_SLOT_COL1);
-
- /* Ignore WPOS and FACE, because they don't require interpolation. */
- if (var->data.location == VARYING_SLOT_POS ||
- var->data.location == VARYING_SLOT_FACE)
- continue;
-
- /* Determine the set (or sets) of barycentric coordinates needed to
- * interpolate this variable. Note that when
- * brw->needs_unlit_centroid_workaround is set, centroid interpolation
- * uses PIXEL interpolation for unlit pixels and CENTROID interpolation
- * for lit pixels, so we need both sets of barycentric coordinates.
- */
- if (interp_qualifier == INTERP_QUALIFIER_NOPERSPECTIVE) {
- if (is_centroid) {
- barycentric_interp_modes |=
- 1 << BRW_WM_NONPERSPECTIVE_CENTROID_BARYCENTRIC;
- } else if (is_sample) {
- barycentric_interp_modes |=
- 1 << BRW_WM_NONPERSPECTIVE_SAMPLE_BARYCENTRIC;
- }
- if ((!is_centroid && !is_sample) ||
- devinfo->needs_unlit_centroid_workaround) {
- barycentric_interp_modes |=
- 1 << BRW_WM_NONPERSPECTIVE_PIXEL_BARYCENTRIC;
- }
- } else if (interp_qualifier == INTERP_QUALIFIER_SMOOTH ||
- (!(shade_model_flat && is_gl_Color) &&
- interp_qualifier == INTERP_QUALIFIER_NONE)) {
- if (is_centroid) {
- barycentric_interp_modes |=
- 1 << BRW_WM_PERSPECTIVE_CENTROID_BARYCENTRIC;
- } else if (is_sample) {
- barycentric_interp_modes |=
- 1 << BRW_WM_PERSPECTIVE_SAMPLE_BARYCENTRIC;
- }
- if ((!is_centroid && !is_sample) ||
- devinfo->needs_unlit_centroid_workaround) {
- barycentric_interp_modes |=
- 1 << BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC;
- }
- }
- }
-
- return barycentric_interp_modes;
-}
-
-static uint8_t
-computed_depth_mode(struct gl_fragment_program *fp)
-{
- if (fp->Base.OutputsWritten & BITFIELD64_BIT(FRAG_RESULT_DEPTH)) {
- switch (fp->FragDepthLayout) {
- case FRAG_DEPTH_LAYOUT_NONE:
- case FRAG_DEPTH_LAYOUT_ANY:
- return BRW_PSCDEPTH_ON;
- case FRAG_DEPTH_LAYOUT_GREATER:
- return BRW_PSCDEPTH_ON_GE;
- case FRAG_DEPTH_LAYOUT_LESS:
- return BRW_PSCDEPTH_ON_LE;
- case FRAG_DEPTH_LAYOUT_UNCHANGED:
- return BRW_PSCDEPTH_OFF;
- }
- }
- return BRW_PSCDEPTH_OFF;
-}
-
static void
assign_fs_binding_table_offsets(const struct brw_device_info *devinfo,
const struct gl_shader_program *shader_prog,
@@ -166,15 +83,6 @@ brw_codegen_wm_prog(struct brw_context *brw,
fs = (struct brw_shader *)prog->_LinkedShaders[MESA_SHADER_FRAGMENT];
memset(&prog_data, 0, sizeof(prog_data));
- /* key->alpha_test_func means simulating alpha testing via discards,
- * so the shader definitely kills pixels.
- */
- prog_data.uses_kill = fp->program.UsesKill || key->alpha_test_func;
- prog_data.uses_omask =
- fp->program.Base.OutputsWritten & BITFIELD64_BIT(FRAG_RESULT_SAMPLE_MASK);
- prog_data.computed_depth_mode = computed_depth_mode(&fp->program);
-
- prog_data.early_fragment_tests = fs && fs->base.EarlyFragmentTests;
/* Use ALT floating point mode for ARB programs so that 0^0 == 1. */
if (!prog)
@@ -209,12 +117,6 @@ brw_codegen_wm_prog(struct brw_context *brw,
&prog_data.base);
}
- prog_data.barycentric_interp_modes =
- brw_compute_barycentric_interp_modes(brw->intelScreen->devinfo,
- key->flat_shade,
- key->persample_shading,
- fp->program.Base.nir);
-
if (unlikely(brw->perf_debug)) {
start_busy = (brw->batch.last_bo &&
drm_intel_bo_busy(brw->batch.last_bo));
@@ -222,7 +124,7 @@ brw_codegen_wm_prog(struct brw_context *brw,
}
if (unlikely(INTEL_DEBUG & DEBUG_WM))
- brw_dump_ir("fragment", prog, &fs->base, &fp->program.Base);
+ brw_dump_ir("fragment", prog, fs ? &fs->base : NULL, &fp->program.Base);
int st_index8 = -1, st_index16 = -1;
if (INTEL_DEBUG & DEBUG_SHADER_TIME) {
@@ -230,9 +132,19 @@ brw_codegen_wm_prog(struct brw_context *brw,
st_index16 = brw_get_shader_time_index(brw, prog, &fp->program.Base, ST_FS16);
}
- program = brw_wm_fs_emit(brw, mem_ctx, key, &prog_data,
- &fp->program, prog, st_index8, st_index16, &program_size);
+ char *error_str = NULL;
+ program = brw_compile_fs(brw->intelScreen->compiler, brw, mem_ctx,
+ key, &prog_data, fp->program.Base.nir,
+ &fp->program.Base, st_index8, st_index16,
+ brw->use_rep_send, &program_size, &error_str);
if (program == NULL) {
+ if (prog) {
+ prog->LinkStatus = false;
+ ralloc_strcat(&prog->InfoLog, error_str);
+ }
+
+ _mesa_problem(NULL, "Failed to compile fragment shader: %s\n", error_str);
+
ralloc_free(mem_ctx);
return false;
}
diff --git a/src/mesa/drivers/dri/i965/brw_wm.h b/src/mesa/drivers/dri/i965/brw_wm.h
index 6ee22b2f907..53a642ee8bb 100644
--- a/src/mesa/drivers/dri/i965/brw_wm.h
+++ b/src/mesa/drivers/dri/i965/brw_wm.h
@@ -61,21 +61,6 @@
extern "C" {
#endif
-/**
- * Compile a fragment shader.
- *
- * Returns the final assembly and the program's size.
- */
-const unsigned *brw_wm_fs_emit(struct brw_context *brw,
- void *mem_ctx,
- const struct brw_wm_prog_key *key,
- struct brw_wm_prog_data *prog_data,
- struct gl_fragment_program *fp,
- struct gl_shader_program *prog,
- int shader_time_index8,
- int shader_time_index16,
- unsigned *final_assembly_size);
-
GLboolean brw_link_shader(struct gl_context *ctx, struct gl_shader_program *prog);
struct gl_shader *brw_new_shader(struct gl_context *ctx, GLuint name, GLuint type);
diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
index c671e23827e..6ebe6481c32 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
@@ -34,6 +34,7 @@
#include "main/blend.h"
#include "main/mtypes.h"
#include "main/samplerobj.h"
+#include "main/shaderimage.h"
#include "program/prog_parameter.h"
#include "main/framebuffer.h"
@@ -925,54 +926,53 @@ brw_upload_ubo_surfaces(struct brw_context *brw,
if (!shader)
return;
- uint32_t *surf_offsets =
+ uint32_t *ubo_surf_offsets =
&stage_state->surf_offset[prog_data->binding_table.ubo_start];
for (int i = 0; i < shader->NumUniformBlocks; i++) {
- struct intel_buffer_object *intel_bo;
+ struct gl_uniform_buffer_binding *binding =
+ &ctx->UniformBufferBindings[shader->UniformBlocks[i]->Binding];
- /* Because behavior for referencing outside of the binding's size in the
- * glBindBufferRange case is undefined, we can just bind the whole buffer
- * glBindBufferBase wants and be a correct implementation.
- */
- if (!shader->UniformBlocks[i].IsShaderStorage) {
- struct gl_uniform_buffer_binding *binding;
- binding =
- &ctx->UniformBufferBindings[shader->UniformBlocks[i].Binding];
- if (binding->BufferObject == ctx->Shared->NullBufferObj) {
- brw->vtbl.emit_null_surface_state(brw, 1, 1, 1, &surf_offsets[i]);
- } else {
- intel_bo = intel_buffer_object(binding->BufferObject);
- drm_intel_bo *bo =
- intel_bufferobj_buffer(brw, intel_bo,
- binding->Offset,
- binding->BufferObject->Size - binding->Offset);
- brw_create_constant_surface(brw, bo, binding->Offset,
- binding->BufferObject->Size - binding->Offset,
- &surf_offsets[i],
- dword_pitch);
- }
+ if (binding->BufferObject == ctx->Shared->NullBufferObj) {
+ brw->vtbl.emit_null_surface_state(brw, 1, 1, 1, &ubo_surf_offsets[i]);
} else {
- struct gl_shader_storage_buffer_binding *binding;
- binding =
- &ctx->ShaderStorageBufferBindings[shader->UniformBlocks[i].Binding];
- if (binding->BufferObject == ctx->Shared->NullBufferObj) {
- brw->vtbl.emit_null_surface_state(brw, 1, 1, 1, &surf_offsets[i]);
- } else {
- intel_bo = intel_buffer_object(binding->BufferObject);
- drm_intel_bo *bo =
- intel_bufferobj_buffer(brw, intel_bo,
- binding->Offset,
- binding->BufferObject->Size - binding->Offset);
- brw_create_buffer_surface(brw, bo, binding->Offset,
- binding->BufferObject->Size - binding->Offset,
- &surf_offsets[i],
- dword_pitch);
- }
+ struct intel_buffer_object *intel_bo =
+ intel_buffer_object(binding->BufferObject);
+ drm_intel_bo *bo =
+ intel_bufferobj_buffer(brw, intel_bo,
+ binding->Offset,
+ binding->BufferObject->Size - binding->Offset);
+ brw_create_constant_surface(brw, bo, binding->Offset,
+ binding->BufferObject->Size - binding->Offset,
+ &ubo_surf_offsets[i],
+ dword_pitch);
+ }
+ }
+
+ uint32_t *ssbo_surf_offsets =
+ &stage_state->surf_offset[prog_data->binding_table.ssbo_start];
+
+ for (int i = 0; i < shader->NumShaderStorageBlocks; i++) {
+ struct gl_shader_storage_buffer_binding *binding =
+ &ctx->ShaderStorageBufferBindings[shader->ShaderStorageBlocks[i]->Binding];
+
+ if (binding->BufferObject == ctx->Shared->NullBufferObj) {
+ brw->vtbl.emit_null_surface_state(brw, 1, 1, 1, &ssbo_surf_offsets[i]);
+ } else {
+ struct intel_buffer_object *intel_bo =
+ intel_buffer_object(binding->BufferObject);
+ drm_intel_bo *bo =
+ intel_bufferobj_buffer(brw, intel_bo,
+ binding->Offset,
+ binding->BufferObject->Size - binding->Offset);
+ brw_create_buffer_surface(brw, bo, binding->Offset,
+ binding->BufferObject->Size - binding->Offset,
+ &ssbo_surf_offsets[i],
+ dword_pitch);
}
}
- if (shader->NumUniformBlocks)
+ if (shader->NumUniformBlocks || shader->NumShaderStorageBlocks)
brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
}
@@ -1112,7 +1112,7 @@ brw_upload_cs_image_surfaces(struct brw_context *brw)
ctx->_Shader->CurrentProgram[MESA_SHADER_COMPUTE];
if (prog) {
- /* BRW_NEW_CS_PROG_DATA, BRW_NEW_IMAGE_UNITS */
+ /* BRW_NEW_CS_PROG_DATA, BRW_NEW_IMAGE_UNITS, _NEW_TEXTURE */
brw_upload_image_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_COMPUTE],
&brw->cs.base, &brw->cs.prog_data->base);
}
@@ -1120,7 +1120,7 @@ brw_upload_cs_image_surfaces(struct brw_context *brw)
const struct brw_tracked_state brw_cs_image_surfaces = {
.dirty = {
- .mesa = _NEW_PROGRAM,
+ .mesa = _NEW_TEXTURE | _NEW_PROGRAM,
.brw = BRW_NEW_BATCH |
BRW_NEW_CS_PROG_DATA |
BRW_NEW_IMAGE_UNITS
@@ -1253,7 +1253,7 @@ update_image_surface(struct brw_context *brw,
uint32_t *surf_offset,
struct brw_image_param *param)
{
- if (u->_Valid) {
+ if (_mesa_is_image_unit_valid(&brw->ctx, u)) {
struct gl_texture_object *obj = u->TexObj;
const unsigned format = get_image_format(brw, u->_ActualFormat, access);
@@ -1338,7 +1338,7 @@ brw_upload_wm_image_surfaces(struct brw_context *brw)
struct gl_shader_program *prog = ctx->Shader._CurrentFragmentProgram;
if (prog) {
- /* BRW_NEW_FS_PROG_DATA, BRW_NEW_IMAGE_UNITS */
+ /* BRW_NEW_FS_PROG_DATA, BRW_NEW_IMAGE_UNITS, _NEW_TEXTURE */
brw_upload_image_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_FRAGMENT],
&brw->wm.base, &brw->wm.prog_data->base);
}
@@ -1346,6 +1346,7 @@ brw_upload_wm_image_surfaces(struct brw_context *brw)
const struct brw_tracked_state brw_wm_image_surfaces = {
.dirty = {
+ .mesa = _NEW_TEXTURE,
.brw = BRW_NEW_BATCH |
BRW_NEW_FRAGMENT_PROGRAM |
BRW_NEW_FS_PROG_DATA |
diff --git a/src/mesa/drivers/dri/i965/gen6_gs_visitor.cpp b/src/mesa/drivers/dri/i965/gen6_gs_visitor.cpp
index 59a76559103..671a535a5bd 100644
--- a/src/mesa/drivers/dri/i965/gen6_gs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/gen6_gs_visitor.cpp
@@ -31,8 +31,6 @@
#include "gen6_gs_visitor.h"
-const unsigned MAX_GS_INPUT_VERTICES = 6;
-
namespace brw {
void
diff --git a/src/mesa/drivers/dri/i965/gen6_gs_visitor.h b/src/mesa/drivers/dri/i965/gen6_gs_visitor.h
index e75d6aa10b8..d02c67d8a74 100644
--- a/src/mesa/drivers/dri/i965/gen6_gs_visitor.h
+++ b/src/mesa/drivers/dri/i965/gen6_gs_visitor.h
@@ -39,7 +39,7 @@ public:
void *log_data,
struct brw_gs_compile *c,
struct gl_shader_program *prog,
- nir_shader *shader,
+ const nir_shader *shader,
void *mem_ctx,
bool no_spills,
int shader_time_index) :
diff --git a/src/mesa/drivers/dri/i965/gen7_gs_state.c b/src/mesa/drivers/dri/i965/gen7_gs_state.c
index 497ecec8e45..8d6d3fe1d34 100644
--- a/src/mesa/drivers/dri/i965/gen7_gs_state.c
+++ b/src/mesa/drivers/dri/i965/gen7_gs_state.c
@@ -59,9 +59,7 @@ upload_gs_state(struct brw_context *brw)
OUT_BATCH(((ALIGN(stage_state->sampler_count, 4)/4) <<
GEN6_GS_SAMPLER_COUNT_SHIFT) |
((brw->gs.prog_data->base.base.binding_table.size_bytes / 4) <<
- GEN6_GS_BINDING_TABLE_ENTRY_COUNT_SHIFT) |
- (brw->is_haswell && prog_data->base.nr_image_params ?
- HSW_GS_UAV_ACCESS_ENABLE : 0));
+ GEN6_GS_BINDING_TABLE_ENTRY_COUNT_SHIFT));
if (brw->gs.prog_data->base.base.total_scratch) {
OUT_RELOC(stage_state->scratch_bo,
diff --git a/src/mesa/drivers/dri/i965/gen7_vs_state.c b/src/mesa/drivers/dri/i965/gen7_vs_state.c
index b7e48585482..a18dc697651 100644
--- a/src/mesa/drivers/dri/i965/gen7_vs_state.c
+++ b/src/mesa/drivers/dri/i965/gen7_vs_state.c
@@ -126,9 +126,7 @@ upload_vs_state(struct brw_context *brw)
((ALIGN(stage_state->sampler_count, 4)/4) <<
GEN6_VS_SAMPLER_COUNT_SHIFT) |
((brw->vs.prog_data->base.base.binding_table.size_bytes / 4) <<
- GEN6_VS_BINDING_TABLE_ENTRY_COUNT_SHIFT) |
- (brw->is_haswell && prog_data->base.nr_image_params ?
- HSW_VS_UAV_ACCESS_ENABLE : 0));
+ GEN6_VS_BINDING_TABLE_ENTRY_COUNT_SHIFT));
if (prog_data->base.total_scratch) {
OUT_RELOC(stage_state->scratch_bo,
diff --git a/src/mesa/drivers/dri/i965/gen7_wm_state.c b/src/mesa/drivers/dri/i965/gen7_wm_state.c
index fd6dab5be8b..06d5e65786b 100644
--- a/src/mesa/drivers/dri/i965/gen7_wm_state.c
+++ b/src/mesa/drivers/dri/i965/gen7_wm_state.c
@@ -113,7 +113,14 @@ upload_wm_state(struct brw_context *brw)
else if (prog_data->base.nr_image_params)
dw1 |= GEN7_WM_EARLY_DS_CONTROL_PSEXEC;
- /* _NEW_BUFFERS | _NEW_COLOR */
+ /* The "UAV access enable" bits are unnecessary on HSW because they only
+ * seem to have an effect on the HW-assisted coherency mechanism which we
+ * don't need, and the rasterization-related UAV_ONLY flag and the
+ * DISPATCH_ENABLE bit can be set independently from it.
+ * C.f. gen8_upload_ps_extra().
+ *
+ * BRW_NEW_FRAGMENT_PROGRAM | BRW_NEW_FS_PROG_DATA | _NEW_BUFFERS | _NEW_COLOR
+ */
if (brw->is_haswell &&
!(brw_color_buffer_write_enabled(brw) || writes_depth) &&
prog_data->base.nr_image_params)
@@ -221,9 +228,6 @@ gen7_upload_ps_state(struct brw_context *brw,
_mesa_get_min_invocations_per_fragment(ctx, fp, false);
assert(min_inv_per_frag >= 1);
- if (brw->is_haswell && prog_data->base.nr_image_params)
- dw4 |= HSW_PS_UAV_ACCESS_ENABLE;
-
if (prog_data->prog_offset_16 || prog_data->no_8) {
dw4 |= GEN7_PS_16_DISPATCH_ENABLE;
if (!prog_data->no_8 && min_inv_per_frag == 1) {
diff --git a/src/mesa/drivers/dri/i965/gen8_gs_state.c b/src/mesa/drivers/dri/i965/gen8_gs_state.c
index 4195f4cf4a7..d766ca7bebf 100644
--- a/src/mesa/drivers/dri/i965/gen8_gs_state.c
+++ b/src/mesa/drivers/dri/i965/gen8_gs_state.c
@@ -52,9 +52,7 @@ gen8_upload_gs_state(struct brw_context *brw)
((ALIGN(stage_state->sampler_count, 4)/4) <<
GEN6_GS_SAMPLER_COUNT_SHIFT) |
((prog_data->base.binding_table.size_bytes / 4) <<
- GEN6_GS_BINDING_TABLE_ENTRY_COUNT_SHIFT) |
- (prog_data->base.nr_image_params ?
- HSW_GS_UAV_ACCESS_ENABLE : 0));
+ GEN6_GS_BINDING_TABLE_ENTRY_COUNT_SHIFT));
if (brw->gs.prog_data->base.base.total_scratch) {
OUT_RELOC64(stage_state->scratch_bo,
diff --git a/src/mesa/drivers/dri/i965/gen8_ps_state.c b/src/mesa/drivers/dri/i965/gen8_ps_state.c
index a686fed704f..8f0507413a7 100644
--- a/src/mesa/drivers/dri/i965/gen8_ps_state.c
+++ b/src/mesa/drivers/dri/i965/gen8_ps_state.c
@@ -25,6 +25,7 @@
#include "program/program.h"
#include "brw_state.h"
#include "brw_defines.h"
+#include "brw_wm.h"
#include "intel_batchbuffer.h"
void
@@ -65,8 +66,33 @@ gen8_upload_ps_extra(struct brw_context *brw,
if (brw->gen >= 9 && prog_data->pulls_bary)
dw1 |= GEN9_PSX_SHADER_PULLS_BARY;
- if (_mesa_active_fragment_shader_has_atomic_ops(&brw->ctx) ||
- prog_data->base.nr_image_params)
+ /* The stricter cross-primitive coherency guarantees that the hardware
+ * gives us with the "Accesses UAV" bit set for at least one shader stage
+ * and the "UAV coherency required" bit set on the 3DPRIMITIVE command are
+ * redundant within the current image, atomic counter and SSBO GL APIs,
+ * which all have very loose ordering and coherency requirements and
+ * generally rely on the application to insert explicit barriers when a
+ * shader invocation is expected to see the memory writes performed by the
+ * invocations of some previous primitive. Regardless of the value of "UAV
+ * coherency required", the "Accesses UAV" bits will implicitly cause an in
+ * most cases useless DC flush when the lowermost stage with the bit set
+ * finishes execution.
+ *
+ * It would be nice to disable it, but in some cases we can't because on
+ * Gen8+ it also has an influence on rasterization via the PS UAV-only
+ * signal (which could be set independently from the coherency mechanism in
+ * the 3DSTATE_WM command on Gen7), and because in some cases it will
+ * determine whether the hardware skips execution of the fragment shader or
+ * not via the ThreadDispatchEnable signal. However if we know that
+ * GEN8_PS_BLEND_HAS_WRITEABLE_RT is going to be set and
+ * GEN8_PSX_PIXEL_SHADER_NO_RT_WRITE is not set it shouldn't make any
+ * difference so we may just disable it here.
+ *
+ * BRW_NEW_FS_PROG_DATA | BRW_NEW_FRAGMENT_PROGRAM | _NEW_BUFFERS | _NEW_COLOR
+ */
+ if ((_mesa_active_fragment_shader_has_atomic_ops(&brw->ctx) ||
+ prog_data->base.nr_image_params) &&
+ !brw_color_buffer_write_enabled(brw))
dw1 |= GEN8_PSX_SHADER_HAS_UAV;
BEGIN_BATCH(2);
@@ -91,7 +117,7 @@ upload_ps_extra(struct brw_context *brw)
const struct brw_tracked_state gen8_ps_extra = {
.dirty = {
- .mesa = 0,
+ .mesa = _NEW_BUFFERS | _NEW_COLOR,
.brw = BRW_NEW_CONTEXT |
BRW_NEW_FRAGMENT_PROGRAM |
BRW_NEW_FS_PROG_DATA |
diff --git a/src/mesa/drivers/dri/i965/gen8_surface_state.c b/src/mesa/drivers/dri/i965/gen8_surface_state.c
index e1e7704655d..18b86652fd2 100644
--- a/src/mesa/drivers/dri/i965/gen8_surface_state.c
+++ b/src/mesa/drivers/dri/i965/gen8_surface_state.c
@@ -221,8 +221,8 @@ gen8_emit_texture_surface_state(struct brw_context *brw,
* "When Auxiliary Surface Mode is set to AUX_CCS_D or AUX_CCS_E, HALIGN
* 16 must be used."
*/
- assert(brw->gen < 9 || mt->halign == 16);
- assert(brw->gen < 8 || mt->num_samples > 1 || mt->halign == 16);
+ if (brw->gen >= 9 || mt->num_samples == 1)
+ assert(mt->halign == 16);
}
const uint32_t surf_type = translate_tex_target(target);
@@ -470,8 +470,8 @@ gen8_update_renderbuffer_surface(struct brw_context *brw,
* "When Auxiliary Surface Mode is set to AUX_CCS_D or AUX_CCS_E, HALIGN
* 16 must be used."
*/
- assert(brw->gen < 9 || mt->halign == 16);
- assert(brw->gen < 8 || mt->num_samples > 1 || mt->halign == 16);
+ if (brw->gen >= 9 || mt->num_samples == 1)
+ assert(mt->halign == 16);
}
uint32_t *surf = allocate_surface_state(brw, &offset, surf_index);
diff --git a/src/mesa/drivers/dri/i965/gen8_vs_state.c b/src/mesa/drivers/dri/i965/gen8_vs_state.c
index 8b5048bee7e..28f5adddf14 100644
--- a/src/mesa/drivers/dri/i965/gen8_vs_state.c
+++ b/src/mesa/drivers/dri/i965/gen8_vs_state.c
@@ -53,9 +53,7 @@ upload_vs_state(struct brw_context *brw)
((ALIGN(stage_state->sampler_count, 4) / 4) <<
GEN6_VS_SAMPLER_COUNT_SHIFT) |
((prog_data->base.binding_table.size_bytes / 4) <<
- GEN6_VS_BINDING_TABLE_ENTRY_COUNT_SHIFT) |
- (prog_data->base.nr_image_params ?
- HSW_VS_UAV_ACCESS_ENABLE : 0));
+ GEN6_VS_BINDING_TABLE_ENTRY_COUNT_SHIFT));
if (prog_data->base.total_scratch) {
OUT_RELOC64(stage_state->scratch_bo,
diff --git a/src/mesa/drivers/dri/i965/intel_asm_annotation.c b/src/mesa/drivers/dri/i965/intel_asm_annotation.c
index bb8bb8d38c9..b3d6324a5fe 100644
--- a/src/mesa/drivers/dri/i965/intel_asm_annotation.c
+++ b/src/mesa/drivers/dri/i965/intel_asm_annotation.c
@@ -33,8 +33,7 @@
void
dump_assembly(void *assembly, int num_annotations, struct annotation *annotation,
- const struct brw_device_info *devinfo,
- const struct gl_program *prog)
+ const struct brw_device_info *devinfo)
{
const char *last_annotation_string = NULL;
const void *last_annotation_ir = NULL;
@@ -57,19 +56,7 @@ dump_assembly(void *assembly, int num_annotations, struct annotation *annotation
last_annotation_ir = annotation[i].ir;
if (last_annotation_ir) {
fprintf(stderr, " ");
- if (prog->nir)
- nir_print_instr(annotation[i].ir, stderr);
- else if (!prog->Instructions)
- fprint_ir(stderr, annotation[i].ir);
- else {
- const struct prog_instruction *pi =
- (const struct prog_instruction *)annotation[i].ir;
- fprintf(stderr, "%d: ",
- (int)(pi - prog->Instructions));
- _mesa_fprint_instruction_opt(stderr,
- pi,
- 0, PROG_PRINT_DEBUG, NULL);
- }
+ nir_print_instr(annotation[i].ir, stderr);
fprintf(stderr, "\n");
}
}
diff --git a/src/mesa/drivers/dri/i965/intel_asm_annotation.h b/src/mesa/drivers/dri/i965/intel_asm_annotation.h
index d9c69bc41b0..6c72326f058 100644
--- a/src/mesa/drivers/dri/i965/intel_asm_annotation.h
+++ b/src/mesa/drivers/dri/i965/intel_asm_annotation.h
@@ -60,8 +60,7 @@ struct annotation_info {
void
dump_assembly(void *assembly, int num_annotations, struct annotation *annotation,
- const struct brw_device_info *devinfo,
- const struct gl_program *prog);
+ const struct brw_device_info *devinfo);
void
annotate(const struct brw_device_info *devinfo,
diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
index a169c41790e..b6e35205727 100644
--- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
+++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
@@ -201,6 +201,14 @@ intel_miptree_supports_non_msrt_fast_clear(struct brw_context *brw,
if (brw->gen < 7)
return false;
+ if (brw->gen >= 9) {
+ /* FINISHME: Enable singlesample fast MCS clears on SKL after all GPU
+ * FINISHME: hangs are resolved.
+ */
+ perf_debug("singlesample fast MCS clears disabled on gen9");
+ return false;
+ }
+
if (mt->disable_aux_buffers)
return false;
diff --git a/src/mesa/drivers/dri/i965/test_fs_cmod_propagation.cpp b/src/mesa/drivers/dri/i965/test_fs_cmod_propagation.cpp
index 8adb626d420..5f80f90a91d 100644
--- a/src/mesa/drivers/dri/i965/test_fs_cmod_propagation.cpp
+++ b/src/mesa/drivers/dri/i965/test_fs_cmod_propagation.cpp
@@ -66,7 +66,7 @@ void cmod_propagation_test::SetUp()
v = new cmod_propagation_fs_visitor(compiler, prog_data, shader);
- _mesa_init_fragment_program(ctx, &fp->program, GL_FRAGMENT_SHADER, 0);
+ _mesa_init_gl_program(&fp->program.Base, GL_FRAGMENT_SHADER, 0);
devinfo->gen = 4;
}
diff --git a/src/mesa/drivers/dri/i965/test_fs_saturate_propagation.cpp b/src/mesa/drivers/dri/i965/test_fs_saturate_propagation.cpp
index f77b18e7db8..32e8b8f8867 100644
--- a/src/mesa/drivers/dri/i965/test_fs_saturate_propagation.cpp
+++ b/src/mesa/drivers/dri/i965/test_fs_saturate_propagation.cpp
@@ -66,7 +66,7 @@ void saturate_propagation_test::SetUp()
v = new saturate_propagation_fs_visitor(compiler, prog_data, shader);
- _mesa_init_fragment_program(ctx, &fp->program, GL_FRAGMENT_SHADER, 0);
+ _mesa_init_gl_program(&fp->program.Base, GL_FRAGMENT_SHADER, 0);
devinfo->gen = 4;
}
diff --git a/src/mesa/drivers/dri/i965/test_vec4_copy_propagation.cpp b/src/mesa/drivers/dri/i965/test_vec4_copy_propagation.cpp
index 40253961a65..e80b71b558d 100644
--- a/src/mesa/drivers/dri/i965/test_vec4_copy_propagation.cpp
+++ b/src/mesa/drivers/dri/i965/test_vec4_copy_propagation.cpp
@@ -98,7 +98,7 @@ void copy_propagation_test::SetUp()
v = new copy_propagation_vec4_visitor(compiler, shader);
- _mesa_init_vertex_program(ctx, &vp->program, GL_VERTEX_SHADER, 0);
+ _mesa_init_gl_program(&vp->program.Base, GL_VERTEX_SHADER, 0);
devinfo->gen = 4;
}
diff --git a/src/mesa/drivers/dri/i965/test_vec4_register_coalesce.cpp b/src/mesa/drivers/dri/i965/test_vec4_register_coalesce.cpp
index 76028d36311..2f824617454 100644
--- a/src/mesa/drivers/dri/i965/test_vec4_register_coalesce.cpp
+++ b/src/mesa/drivers/dri/i965/test_vec4_register_coalesce.cpp
@@ -101,7 +101,7 @@ void register_coalesce_test::SetUp()
v = new register_coalesce_vec4_visitor(compiler, shader);
- _mesa_init_vertex_program(ctx, &vp->program, GL_VERTEX_SHADER, 0);
+ _mesa_init_gl_program(&vp->program.Base, GL_VERTEX_SHADER, 0);
devinfo->gen = 4;
}
diff --git a/src/mesa/drivers/dri/r200/r200_vertprog.c b/src/mesa/drivers/dri/r200/r200_vertprog.c
index d43eaf977fc..628c5708090 100644
--- a/src/mesa/drivers/dri/r200/r200_vertprog.c
+++ b/src/mesa/drivers/dri/r200/r200_vertprog.c
@@ -1200,18 +1200,19 @@ r200BindProgram(struct gl_context *ctx, GLenum target, struct gl_program *prog)
static struct gl_program *
r200NewProgram(struct gl_context *ctx, GLenum target, GLuint id)
{
- struct r200_vertex_program *vp;
-
switch(target){
- case GL_VERTEX_PROGRAM_ARB:
- vp = CALLOC_STRUCT(r200_vertex_program);
- return _mesa_init_vertex_program(ctx, &vp->mesa_program, target, id);
- case GL_FRAGMENT_PROGRAM_ARB:
- return _mesa_init_fragment_program( ctx, CALLOC_STRUCT(gl_fragment_program), target, id );
+ case GL_VERTEX_PROGRAM_ARB: {
+ struct r200_vertex_program *vp = CALLOC_STRUCT(r200_vertex_program);
+ return _mesa_init_gl_program(&vp->mesa_program.Base, target, id);
+ }
+ case GL_FRAGMENT_PROGRAM_ARB: {
+ struct gl_fragment_program *prog = CALLOC_STRUCT(gl_fragment_program);
+ return _mesa_init_gl_program(&prog->Base, target, id);
+ }
default:
_mesa_problem(ctx, "Bad target in r200NewProgram");
+ return NULL;
}
- return NULL;
}
diff --git a/src/mesa/drivers/x11/SConscript b/src/mesa/drivers/x11/SConscript
index d29f9874f44..cd5cccda0d1 100644
--- a/src/mesa/drivers/x11/SConscript
+++ b/src/mesa/drivers/x11/SConscript
@@ -4,6 +4,8 @@ env = env.Clone()
env.Append(CPPPATH = [
'#/src',
+ '#/src/glsl',
+ '#/src/glsl/nir',
'#/src/mapi',
'#/src/mesa',
'#/src/mesa/main',
diff --git a/src/mesa/main/blend.c b/src/mesa/main/blend.c
index dee5e29d5b8..20aa4980935 100644
--- a/src/mesa/main/blend.c
+++ b/src/mesa/main/blend.c
@@ -190,6 +190,19 @@ update_uses_dual_src(struct gl_context *ctx, int buf)
blend_factor_is_dual_src(ctx->Color.Blend[buf].DstA));
}
+
+/**
+ * Return the number of per-buffer blend states to update in
+ * glBlendFunc, glBlendFuncSeparate, glBlendEquation, etc.
+ */
+static inline unsigned
+num_buffers(const struct gl_context *ctx)
+{
+ return ctx->Extensions.ARB_draw_buffers_blend
+ ? ctx->Const.MaxDrawBuffers : 1;
+}
+
+
/**
* Set the separate blend source/dest factors for all draw buffers.
*
@@ -202,9 +215,10 @@ void GLAPIENTRY
_mesa_BlendFuncSeparate( GLenum sfactorRGB, GLenum dfactorRGB,
GLenum sfactorA, GLenum dfactorA )
{
- GLuint buf, numBuffers;
- GLboolean changed;
GET_CURRENT_CONTEXT(ctx);
+ const unsigned numBuffers = num_buffers(ctx);
+ unsigned buf;
+ bool changed = false;
if (MESA_VERBOSE & VERBOSE_API)
_mesa_debug(ctx, "glBlendFuncSeparate %s %s %s %s\n",
@@ -213,28 +227,38 @@ _mesa_BlendFuncSeparate( GLenum sfactorRGB, GLenum dfactorRGB,
_mesa_enum_to_string(sfactorA),
_mesa_enum_to_string(dfactorA));
- if (!validate_blend_factors(ctx, "glBlendFuncSeparate",
- sfactorRGB, dfactorRGB,
- sfactorA, dfactorA)) {
- return;
+ /* Check if we're really changing any state. If not, return early. */
+ if (ctx->Color._BlendFuncPerBuffer) {
+ /* Check all per-buffer states */
+ for (buf = 0; buf < numBuffers; buf++) {
+ if (ctx->Color.Blend[buf].SrcRGB != sfactorRGB ||
+ ctx->Color.Blend[buf].DstRGB != dfactorRGB ||
+ ctx->Color.Blend[buf].SrcA != sfactorA ||
+ ctx->Color.Blend[buf].DstA != dfactorA) {
+ changed = true;
+ break;
+ }
+ }
}
-
- numBuffers = ctx->Extensions.ARB_draw_buffers_blend
- ? ctx->Const.MaxDrawBuffers : 1;
-
- changed = GL_FALSE;
- for (buf = 0; buf < numBuffers; buf++) {
- if (ctx->Color.Blend[buf].SrcRGB != sfactorRGB ||
- ctx->Color.Blend[buf].DstRGB != dfactorRGB ||
- ctx->Color.Blend[buf].SrcA != sfactorA ||
- ctx->Color.Blend[buf].DstA != dfactorA) {
- changed = GL_TRUE;
- break;
+ else {
+ /* only need to check 0th per-buffer state */
+ if (ctx->Color.Blend[0].SrcRGB != sfactorRGB ||
+ ctx->Color.Blend[0].DstRGB != dfactorRGB ||
+ ctx->Color.Blend[0].SrcA != sfactorA ||
+ ctx->Color.Blend[0].DstA != dfactorA) {
+ changed = true;
}
}
+
if (!changed)
return;
+ if (!validate_blend_factors(ctx, "glBlendFuncSeparate",
+ sfactorRGB, dfactorRGB,
+ sfactorA, dfactorA)) {
+ return;
+ }
+
FLUSH_VERTICES(ctx, _NEW_COLOR);
for (buf = 0; buf < numBuffers; buf++) {
@@ -242,8 +266,13 @@ _mesa_BlendFuncSeparate( GLenum sfactorRGB, GLenum dfactorRGB,
ctx->Color.Blend[buf].DstRGB = dfactorRGB;
ctx->Color.Blend[buf].SrcA = sfactorA;
ctx->Color.Blend[buf].DstA = dfactorA;
- update_uses_dual_src(ctx, buf);
}
+
+ update_uses_dual_src(ctx, 0);
+ for (buf = 1; buf < numBuffers; buf++) {
+ ctx->Color.Blend[buf]._UsesDualSrc = ctx->Color.Blend[0]._UsesDualSrc;
+ }
+
ctx->Color._BlendFuncPerBuffer = GL_FALSE;
if (ctx->Driver.BlendFuncSeparate) {
@@ -283,18 +312,18 @@ _mesa_BlendFuncSeparateiARB(GLuint buf, GLenum sfactorRGB, GLenum dfactorRGB,
return;
}
- if (!validate_blend_factors(ctx, "glBlendFuncSeparatei",
- sfactorRGB, dfactorRGB,
- sfactorA, dfactorA)) {
- return;
- }
-
if (ctx->Color.Blend[buf].SrcRGB == sfactorRGB &&
ctx->Color.Blend[buf].DstRGB == dfactorRGB &&
ctx->Color.Blend[buf].SrcA == sfactorA &&
ctx->Color.Blend[buf].DstA == dfactorA)
return; /* no change */
+ if (!validate_blend_factors(ctx, "glBlendFuncSeparatei",
+ sfactorRGB, dfactorRGB,
+ sfactorA, dfactorA)) {
+ return;
+ }
+
FLUSH_VERTICES(ctx, _NEW_COLOR);
ctx->Color.Blend[buf].SrcRGB = sfactorRGB;
@@ -331,34 +360,43 @@ legal_blend_equation(const struct gl_context *ctx, GLenum mode)
void GLAPIENTRY
_mesa_BlendEquation( GLenum mode )
{
- GLuint buf, numBuffers;
- GLboolean changed;
GET_CURRENT_CONTEXT(ctx);
+ const unsigned numBuffers = num_buffers(ctx);
+ unsigned buf;
+ bool changed = false;
if (MESA_VERBOSE & VERBOSE_API)
_mesa_debug(ctx, "glBlendEquation(%s)\n",
_mesa_enum_to_string(mode));
- if (!legal_blend_equation(ctx, mode)) {
- _mesa_error(ctx, GL_INVALID_ENUM, "glBlendEquation");
- return;
+ if (ctx->Color._BlendEquationPerBuffer) {
+ /* Check all per-buffer states */
+ for (buf = 0; buf < numBuffers; buf++) {
+ if (ctx->Color.Blend[buf].EquationRGB != mode ||
+ ctx->Color.Blend[buf].EquationA != mode) {
+ changed = true;
+ break;
+ }
+ }
}
-
- numBuffers = ctx->Extensions.ARB_draw_buffers_blend
- ? ctx->Const.MaxDrawBuffers : 1;
-
- changed = GL_FALSE;
- for (buf = 0; buf < numBuffers; buf++) {
- if (ctx->Color.Blend[buf].EquationRGB != mode ||
- ctx->Color.Blend[buf].EquationA != mode) {
- changed = GL_TRUE;
- break;
+ else {
+ /* only need to check 0th per-buffer state */
+ if (ctx->Color.Blend[0].EquationRGB != mode ||
+ ctx->Color.Blend[0].EquationA != mode) {
+ changed = true;
}
}
+
if (!changed)
return;
+ if (!legal_blend_equation(ctx, mode)) {
+ _mesa_error(ctx, GL_INVALID_ENUM, "glBlendEquation");
+ return;
+ }
+
FLUSH_VERTICES(ctx, _NEW_COLOR);
+
for (buf = 0; buf < numBuffers; buf++) {
ctx->Color.Blend[buf].EquationRGB = mode;
ctx->Color.Blend[buf].EquationA = mode;
@@ -383,7 +421,7 @@ _mesa_BlendEquationiARB(GLuint buf, GLenum mode)
buf, _mesa_enum_to_string(mode));
if (buf >= ctx->Const.MaxDrawBuffers) {
- _mesa_error(ctx, GL_INVALID_VALUE, "glBlendFuncSeparatei(buffer=%u)",
+ _mesa_error(ctx, GL_INVALID_VALUE, "glBlendEquationi(buffer=%u)",
buf);
return;
}
@@ -407,15 +445,37 @@ _mesa_BlendEquationiARB(GLuint buf, GLenum mode)
void GLAPIENTRY
_mesa_BlendEquationSeparate( GLenum modeRGB, GLenum modeA )
{
- GLuint buf, numBuffers;
- GLboolean changed;
GET_CURRENT_CONTEXT(ctx);
+ const unsigned numBuffers = num_buffers(ctx);
+ unsigned buf;
+ bool changed = false;
if (MESA_VERBOSE & VERBOSE_API)
_mesa_debug(ctx, "glBlendEquationSeparateEXT(%s %s)\n",
_mesa_enum_to_string(modeRGB),
_mesa_enum_to_string(modeA));
+ if (ctx->Color._BlendEquationPerBuffer) {
+ /* Check all per-buffer states */
+ for (buf = 0; buf < numBuffers; buf++) {
+ if (ctx->Color.Blend[buf].EquationRGB != modeRGB ||
+ ctx->Color.Blend[buf].EquationA != modeA) {
+ changed = true;
+ break;
+ }
+ }
+ }
+ else {
+ /* only need to check 0th per-buffer state */
+ if (ctx->Color.Blend[0].EquationRGB != modeRGB ||
+ ctx->Color.Blend[0].EquationA != modeA) {
+ changed = true;
+ }
+ }
+
+ if (!changed)
+ return;
+
if ( (modeRGB != modeA) && !ctx->Extensions.EXT_blend_equation_separate ) {
_mesa_error(ctx, GL_INVALID_OPERATION,
"glBlendEquationSeparateEXT not supported by driver");
@@ -432,21 +492,8 @@ _mesa_BlendEquationSeparate( GLenum modeRGB, GLenum modeA )
return;
}
- numBuffers = ctx->Extensions.ARB_draw_buffers_blend
- ? ctx->Const.MaxDrawBuffers : 1;
-
- changed = GL_FALSE;
- for (buf = 0; buf < numBuffers; buf++) {
- if (ctx->Color.Blend[buf].EquationRGB != modeRGB ||
- ctx->Color.Blend[buf].EquationA != modeA) {
- changed = GL_TRUE;
- break;
- }
- }
- if (!changed)
- return;
-
FLUSH_VERTICES(ctx, _NEW_COLOR);
+
for (buf = 0; buf < numBuffers; buf++) {
ctx->Color.Blend[buf].EquationRGB = modeRGB;
ctx->Color.Blend[buf].EquationA = modeA;
diff --git a/src/mesa/main/es1_conversion.c b/src/mesa/main/es1_conversion.c
index b254a6ef1c7..1dfe8278e71 100644
--- a/src/mesa/main/es1_conversion.c
+++ b/src/mesa/main/es1_conversion.c
@@ -1,3 +1,4 @@
+
#include <stdbool.h>
#include "api_loopback.h"
@@ -326,7 +327,24 @@ _mesa_GetTexEnvxv(GLenum target, GLenum pname, GLfixed *params)
}
break;
case GL_TEXTURE_ENV:
- if (pname != GL_TEXTURE_ENV_COLOR && pname != GL_RGB_SCALE && pname != GL_ALPHA_SCALE && pname != GL_TEXTURE_ENV_MODE && pname != GL_COMBINE_RGB && pname != GL_COMBINE_ALPHA && pname != GL_SRC0_RGB && pname != GL_SRC1_RGB && pname != GL_SRC2_RGB && pname != GL_SRC0_ALPHA && pname != GL_SRC1_ALPHA && pname != GL_SRC2_ALPHA && pname != GL_OPERAND0_RGB && pname != GL_OPERAND1_RGB && pname != GL_OPERAND2_RGB && pname != GL_OPERAND0_ALPHA && pname != GL_OPERAND1_ALPHA && pname != GL_OPERAND2_ALPHA) {
+ if (pname != GL_TEXTURE_ENV_COLOR &&
+ pname != GL_RGB_SCALE &&
+ pname != GL_ALPHA_SCALE &&
+ pname != GL_TEXTURE_ENV_MODE &&
+ pname != GL_COMBINE_RGB &&
+ pname != GL_COMBINE_ALPHA &&
+ pname != GL_SRC0_RGB &&
+ pname != GL_SRC1_RGB &&
+ pname != GL_SRC2_RGB &&
+ pname != GL_SRC0_ALPHA &&
+ pname != GL_SRC1_ALPHA &&
+ pname != GL_SRC2_ALPHA &&
+ pname != GL_OPERAND0_RGB &&
+ pname != GL_OPERAND1_RGB &&
+ pname != GL_OPERAND2_RGB &&
+ pname != GL_OPERAND0_ALPHA &&
+ pname != GL_OPERAND1_ALPHA &&
+ pname != GL_OPERAND2_ALPHA) {
_mesa_error(_mesa_get_current_context(), GL_INVALID_ENUM,
"glGetTexEnvxv(target=0x%x)", target);
return;
diff --git a/src/mesa/main/ff_fragment_shader.cpp b/src/mesa/main/ff_fragment_shader.cpp
index e4e2a18c1da..e63d0f1ec55 100644
--- a/src/mesa/main/ff_fragment_shader.cpp
+++ b/src/mesa/main/ff_fragment_shader.cpp
@@ -40,7 +40,7 @@
#include "glsl/ir_optimization.h"
#include "glsl/glsl_parser_extras.h"
#include "glsl/glsl_symbol_table.h"
-#include "glsl/glsl_types.h"
+#include "glsl/nir/glsl_types.h"
#include "program/ir_to_mesa.h"
#include "program/program.h"
#include "program/programopt.h"
@@ -975,13 +975,11 @@ static void load_texture( texenv_fragment_program *p, GLuint unit )
ir_var_uniform);
p->top_instructions->push_head(sampler);
- /* Set the texture unit for this sampler. The linker will pick this value
- * up and do-the-right-thing.
- *
- * NOTE: The cast to int is important. Without it, the constant will have
- * type uint, and things later on may get confused.
+ /* Set the texture unit for this sampler in the same way that
+ * layout(binding=X) would.
*/
- sampler->constant_value = new(p->mem_ctx) ir_constant(int(unit));
+ sampler->data.explicit_binding = true;
+ sampler->data.binding = unit;
deref = new(p->mem_ctx) ir_dereference_variable(sampler);
tex->set_sampler(deref, glsl_type::vec4_type);
diff --git a/src/mesa/main/ffvertex_prog.c b/src/mesa/main/ffvertex_prog.c
index a6183b47e2e..34cc9218add 100644
--- a/src/mesa/main/ffvertex_prog.c
+++ b/src/mesa/main/ffvertex_prog.c
@@ -1690,11 +1690,10 @@ _mesa_get_fixed_func_vertex_program(struct gl_context *ctx)
ctx->Const.ShaderCompilerOptions[MESA_SHADER_VERTEX].OptimizeForAOS,
ctx->Const.Program[MESA_SHADER_VERTEX].MaxTemps );
-#if 0
if (ctx->Driver.ProgramStringNotify)
ctx->Driver.ProgramStringNotify( ctx, GL_VERTEX_PROGRAM_ARB,
&prog->Base );
-#endif
+
_mesa_program_cache_insert(ctx, ctx->VertexProgram.Cache,
&key, sizeof(key), &prog->Base);
}
diff --git a/src/mesa/main/format_utils.h b/src/mesa/main/format_utils.h
index 618f43d0aaa..378997b38b2 100644
--- a/src/mesa/main/format_utils.h
+++ b/src/mesa/main/format_utils.h
@@ -34,6 +34,7 @@
#include "imports.h"
#include "macros.h"
#include "util/rounding.h"
+#include "util/half_float.h"
extern const mesa_array_format RGBA32_FLOAT;
extern const mesa_array_format RGBA8_UBYTE;
diff --git a/src/mesa/main/imports.c b/src/mesa/main/imports.c
index 350e6752c8b..230ebbc67f4 100644
--- a/src/mesa/main/imports.c
+++ b/src/mesa/main/imports.c
@@ -307,154 +307,6 @@ _mesa_bitcount_64(uint64_t n)
}
#endif
-
-/**
- * Convert a 4-byte float to a 2-byte half float.
- *
- * Not all float32 values can be represented exactly as a float16 value. We
- * round such intermediate float32 values to the nearest float16. When the
- * float32 lies exactly between to float16 values, we round to the one with
- * an even mantissa.
- *
- * This rounding behavior has several benefits:
- * - It has no sign bias.
- *
- * - It reproduces the behavior of real hardware: opcode F32TO16 in Intel's
- * GPU ISA.
- *
- * - By reproducing the behavior of the GPU (at least on Intel hardware),
- * compile-time evaluation of constant packHalf2x16 GLSL expressions will
- * result in the same value as if the expression were executed on the GPU.
- */
-GLhalfARB
-_mesa_float_to_half(float val)
-{
- const fi_type fi = {val};
- const int flt_m = fi.i & 0x7fffff;
- const int flt_e = (fi.i >> 23) & 0xff;
- const int flt_s = (fi.i >> 31) & 0x1;
- int s, e, m = 0;
- GLhalfARB result;
-
- /* sign bit */
- s = flt_s;
-
- /* handle special cases */
- if ((flt_e == 0) && (flt_m == 0)) {
- /* zero */
- /* m = 0; - already set */
- e = 0;
- }
- else if ((flt_e == 0) && (flt_m != 0)) {
- /* denorm -- denorm float maps to 0 half */
- /* m = 0; - already set */
- e = 0;
- }
- else if ((flt_e == 0xff) && (flt_m == 0)) {
- /* infinity */
- /* m = 0; - already set */
- e = 31;
- }
- else if ((flt_e == 0xff) && (flt_m != 0)) {
- /* NaN */
- m = 1;
- e = 31;
- }
- else {
- /* regular number */
- const int new_exp = flt_e - 127;
- if (new_exp < -14) {
- /* The float32 lies in the range (0.0, min_normal16) and is rounded
- * to a nearby float16 value. The result will be either zero, subnormal,
- * or normal.
- */
- e = 0;
- m = _mesa_lroundevenf((1 << 24) * fabsf(fi.f));
- }
- else if (new_exp > 15) {
- /* map this value to infinity */
- /* m = 0; - already set */
- e = 31;
- }
- else {
- /* The float32 lies in the range
- * [min_normal16, max_normal16 + max_step16)
- * and is rounded to a nearby float16 value. The result will be
- * either normal or infinite.
- */
- e = new_exp + 15;
- m = _mesa_lroundevenf(flt_m / (float) (1 << 13));
- }
- }
-
- assert(0 <= m && m <= 1024);
- if (m == 1024) {
- /* The float32 was rounded upwards into the range of the next exponent,
- * so bump the exponent. This correctly handles the case where f32
- * should be rounded up to float16 infinity.
- */
- ++e;
- m = 0;
- }
-
- result = (s << 15) | (e << 10) | m;
- return result;
-}
-
-
-/**
- * Convert a 2-byte half float to a 4-byte float.
- * Based on code from:
- * http://www.opengl.org/discussion_boards/ubb/Forum3/HTML/008786.html
- */
-float
-_mesa_half_to_float(GLhalfARB val)
-{
- /* XXX could also use a 64K-entry lookup table */
- const int m = val & 0x3ff;
- const int e = (val >> 10) & 0x1f;
- const int s = (val >> 15) & 0x1;
- int flt_m, flt_e, flt_s;
- fi_type fi;
- float result;
-
- /* sign bit */
- flt_s = s;
-
- /* handle special cases */
- if ((e == 0) && (m == 0)) {
- /* zero */
- flt_m = 0;
- flt_e = 0;
- }
- else if ((e == 0) && (m != 0)) {
- /* denorm -- denorm half will fit in non-denorm single */
- const float half_denorm = 1.0f / 16384.0f; /* 2^-14 */
- float mantissa = ((float) (m)) / 1024.0f;
- float sign = s ? -1.0f : 1.0f;
- return sign * mantissa * half_denorm;
- }
- else if ((e == 31) && (m == 0)) {
- /* infinity */
- flt_e = 0xff;
- flt_m = 0;
- }
- else if ((e == 31) && (m != 0)) {
- /* NaN */
- flt_e = 0xff;
- flt_m = 1;
- }
- else {
- /* regular */
- flt_e = e + 112;
- flt_m = m << 13;
- }
-
- fi.i = (flt_s << 31) | (flt_e << 23) | flt_m;
- result = fi.f;
- return result;
-}
-
/*@}*/
diff --git a/src/mesa/main/imports.h b/src/mesa/main/imports.h
index 90247587be3..042147fd8bb 100644
--- a/src/mesa/main/imports.h
+++ b/src/mesa/main/imports.h
@@ -396,13 +396,6 @@ _mesa_flsll(uint64_t n)
#endif
}
-
-extern GLhalfARB
-_mesa_float_to_half(float f);
-
-extern float
-_mesa_half_to_float(GLhalfARB h);
-
static inline bool
_mesa_half_is_negative(GLhalfARB h)
{
diff --git a/src/mesa/main/matrix.c b/src/mesa/main/matrix.c
index 2b8016a4a72..5ff5ac5bfe1 100644
--- a/src/mesa/main/matrix.c
+++ b/src/mesa/main/matrix.c
@@ -151,7 +151,6 @@ _mesa_MatrixMode( GLenum mode )
if (ctx->Transform.MatrixMode == mode && mode != GL_TEXTURE)
return;
- FLUSH_VERTICES(ctx, _NEW_TRANSFORM);
switch (mode) {
case GL_MODELVIEW:
diff --git a/src/mesa/main/mipmap.c b/src/mesa/main/mipmap.c
index ab16c2854a8..50469956c6e 100644
--- a/src/mesa/main/mipmap.c
+++ b/src/mesa/main/mipmap.c
@@ -37,6 +37,7 @@
#include "texstore.h"
#include "image.h"
#include "macros.h"
+#include "util/half_float.h"
#include "../../gallium/auxiliary/util/u_format_rgb9e5.h"
#include "../../gallium/auxiliary/util/u_format_r11g11b10f.h"
diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h
index cbfb15522f0..e57b98a412d 100644
--- a/src/mesa/main/mtypes.h
+++ b/src/mesa/main/mtypes.h
@@ -42,7 +42,7 @@
#include "main/config.h"
#include "glapi/glapi.h"
#include "math/m_matrix.h" /* GLmatrix */
-#include "glsl/shader_enums.h"
+#include "glsl/nir/shader_enums.h"
#include "main/formats.h" /* MESA_FORMAT_COUNT */
@@ -94,11 +94,6 @@ struct vbo_context;
#define PRIM_OUTSIDE_BEGIN_END (PRIM_MAX + 1)
#define PRIM_UNKNOWN (PRIM_MAX + 2)
-#define VARYING_SLOT_MAX (VARYING_SLOT_VAR0 + MAX_VARYING)
-#define VARYING_SLOT_PATCH0 (VARYING_SLOT_MAX)
-#define VARYING_SLOT_TESS_MAX (VARYING_SLOT_PATCH0 + MAX_VARYING)
-#define FRAG_RESULT_MAX (FRAG_RESULT_DATA0 + MAX_DRAW_BUFFERS)
-
/**
* Determine if the given gl_varying_slot appears in the fragment shader.
*/
@@ -487,26 +482,24 @@ struct gl_colorbuffer_attrib
struct gl_current_attrib
{
/**
- * \name Current vertex attributes.
+ * \name Current vertex attributes (color, texcoords, etc).
* \note Values are valid only after FLUSH_VERTICES has been called.
* \note Index and Edgeflag current values are stored as floats in the
* SIX and SEVEN attribute slots.
+ * \note We need double storage for 64-bit vertex attributes
*/
- /* we need double storage for this for vertex attrib 64bit */
- GLfloat Attrib[VERT_ATTRIB_MAX][4*2]; /**< Position, color, texcoords, etc */
+ GLfloat Attrib[VERT_ATTRIB_MAX][4*2];
/**
- * \name Current raster position attributes (always valid).
- * \note This set of attributes is very similar to the SWvertex struct.
+ * \name Current raster position attributes (always up to date after a
+ * glRasterPos call).
*/
- /*@{*/
GLfloat RasterPos[4];
GLfloat RasterDistance;
GLfloat RasterColor[4];
GLfloat RasterSecondaryColor[4];
GLfloat RasterTexCoords[MAX_TEXTURE_COORD_UNITS][4];
GLboolean RasterPosValid;
- /*@}*/
};
@@ -1866,24 +1859,6 @@ typedef enum
/**
- * \brief Layout qualifiers for gl_FragDepth.
- *
- * Extension AMD_conservative_depth allows gl_FragDepth to be redeclared with
- * a layout qualifier.
- *
- * \see enum ir_depth_layout
- */
-enum gl_frag_depth_layout
-{
- FRAG_DEPTH_LAYOUT_NONE, /**< No layout is specified. */
- FRAG_DEPTH_LAYOUT_ANY,
- FRAG_DEPTH_LAYOUT_GREATER,
- FRAG_DEPTH_LAYOUT_LESS,
- FRAG_DEPTH_LAYOUT_UNCHANGED
-};
-
-
-/**
* Base class for any kind of program object
*/
struct gl_program
@@ -2286,12 +2261,34 @@ struct gl_shader
unsigned num_combined_uniform_components;
/**
- * This shader's uniform block information.
+ * This shader's uniform/ssbo block information.
*
* These fields are only set post-linking.
+ *
+ * BufferInterfaceBlocks is a list containing both UBOs and SSBOs. This is
+ * useful during the linking process so that we don't have to handle SSBOs
+ * specifically.
+ *
+ * UniformBlocks is a list of UBOs. This is useful for backends that need
+ * or prefer to see separate index spaces for UBOS and SSBOs like the GL
+ * API specifies.
+ *
+ * ShaderStorageBlocks is a list of SSBOs. This is useful for backends that
+ * need or prefer to see separate index spaces for UBOS and SSBOs like the
+ * GL API specifies.
+ *
+ * UniformBlocks and ShaderStorageBlocks only have pointers into
+ * BufferInterfaceBlocks so the actual resource information is not
+ * duplicated.
*/
+ unsigned NumBufferInterfaceBlocks;
+ struct gl_uniform_block *BufferInterfaceBlocks;
+
unsigned NumUniformBlocks;
- struct gl_uniform_block *UniformBlocks;
+ struct gl_uniform_block **UniformBlocks;
+
+ unsigned NumShaderStorageBlocks;
+ struct gl_uniform_block **ShaderStorageBlocks;
struct exec_list *ir;
struct exec_list *packed_varyings;
@@ -2694,8 +2691,33 @@ struct gl_shader_program
*/
unsigned LastClipDistanceArraySize;
+ /**
+ * This shader's uniform/ssbo block information.
+ *
+ * BufferInterfaceBlocks is a list containing both UBOs and SSBOs. This is
+ * useful during the linking process so that we don't have to handle SSBOs
+ * specifically.
+ *
+ * UniformBlocks is a list of UBOs. This is useful for backends that need
+ * or prefer to see separate index spaces for UBOS and SSBOs like the GL
+ * API specifies.
+ *
+ * ShaderStorageBlocks is a list of SSBOs. This is useful for backends that
+ * need or prefer to see separate index spaces for UBOS and SSBOs like the
+ * GL API specifies.
+ *
+ * UniformBlocks and ShaderStorageBlocks only have pointers into
+ * BufferInterfaceBlocks so the actual resource information is not
+ * duplicated and are only set after linking.
+ */
unsigned NumBufferInterfaceBlocks;
- struct gl_uniform_block *UniformBlocks;
+ struct gl_uniform_block *BufferInterfaceBlocks;
+
+ unsigned NumUniformBlocks;
+ struct gl_uniform_block **UniformBlocks;
+
+ unsigned NumShaderStorageBlocks;
+ struct gl_uniform_block **ShaderStorageBlocks;
/**
* Indices into the _LinkedShaders's UniformBlocks[] array for each stage
@@ -4076,13 +4098,6 @@ struct gl_image_unit
GLboolean Layered;
/**
- * GL_TRUE if the state of this image unit is valid and access from
- * the shader is allowed. Otherwise loads from this unit should
- * return zero and stores should have no effect.
- */
- GLboolean _Valid;
-
- /**
* Layer of the texture object bound to this unit as specified by the
* application.
*/
diff --git a/src/mesa/main/pack.c b/src/mesa/main/pack.c
index 00e31b05c99..89faf515443 100644
--- a/src/mesa/main/pack.c
+++ b/src/mesa/main/pack.c
@@ -1073,6 +1073,21 @@ _mesa_pack_depth_span( struct gl_context *ctx, GLuint n, GLvoid *dest,
}
}
break;
+ case GL_UNSIGNED_INT_24_8:
+ {
+ const GLdouble scale = (GLdouble) 0xffffff;
+ GLuint *dst = (GLuint *) dest;
+ GLuint i;
+ for (i = 0; i < n; i++) {
+ GLuint z = (GLuint) (depthSpan[i] * scale);
+ assert(z <= 0xffffff);
+ dst[i] = (z << 8);
+ }
+ if (dstPacking->SwapBytes) {
+ _mesa_swap4( (GLuint *) dst, n );
+ }
+ break;
+ }
case GL_UNSIGNED_INT:
{
GLuint *dst = (GLuint *) dest;
diff --git a/src/mesa/main/shader_query.cpp b/src/mesa/main/shader_query.cpp
index 6d73e3bdcf2..8182d3dcc04 100644
--- a/src/mesa/main/shader_query.cpp
+++ b/src/mesa/main/shader_query.cpp
@@ -485,8 +485,14 @@ _mesa_program_resource_array_size(struct gl_program_resource *res)
case GL_COMPUTE_SUBROUTINE_UNIFORM:
case GL_TESS_CONTROL_SUBROUTINE_UNIFORM:
case GL_TESS_EVALUATION_SUBROUTINE_UNIFORM:
- case GL_BUFFER_VARIABLE:
return RESOURCE_UNI(res)->array_elements;
+ case GL_BUFFER_VARIABLE:
+ /* Unsized arrays */
+ if (RESOURCE_UNI(res)->array_stride > 0 &&
+ RESOURCE_UNI(res)->array_elements == 0)
+ return 1;
+ else
+ return RESOURCE_UNI(res)->array_elements;
case GL_VERTEX_SUBROUTINE:
case GL_GEOMETRY_SUBROUTINE:
case GL_FRAGMENT_SUBROUTINE:
@@ -833,193 +839,6 @@ program_resource_location(struct gl_shader_program *shProg,
}
}
-static char*
-get_top_level_name(const char *name)
-{
- const char *first_dot = strchr(name, '.');
- const char *first_square_bracket = strchr(name, '[');
- int name_size = 0;
- /* From ARB_program_interface_query spec:
- *
- * "For the property TOP_LEVEL_ARRAY_SIZE, a single integer identifying the
- * number of active array elements of the top-level shader storage block
- * member containing to the active variable is written to <params>. If the
- * top-level block member is not declared as an array, the value one is
- * written to <params>. If the top-level block member is an array with no
- * declared size, the value zero is written to <params>.
- */
-
- /* The buffer variable is on top level.*/
- if (!first_square_bracket && !first_dot)
- name_size = strlen(name);
- else if ((!first_square_bracket ||
- (first_dot && first_dot < first_square_bracket)))
- name_size = first_dot - name;
- else
- name_size = first_square_bracket - name;
-
- return strndup(name, name_size);
-}
-
-static char*
-get_var_name(const char *name)
-{
- const char *first_dot = strchr(name, '.');
-
- if (!first_dot)
- return strdup(name);
-
- return strndup(first_dot+1, strlen(first_dot) - 1);
-}
-
-static GLint
-program_resource_top_level_array_size(struct gl_shader_program *shProg,
- struct gl_program_resource *res,
- const char *name)
-{
- int block_index = RESOURCE_UNI(res)->block_index;
- int array_size = -1;
- char *var_name = get_top_level_name(name);
- char *interface_name =
- get_top_level_name(shProg->UniformBlocks[block_index].Name);
-
- if (strcmp(var_name, interface_name) == 0) {
- /* Deal with instanced array of SSBOs */
- char *temp_name = get_var_name(name);
- free(var_name);
- var_name = get_top_level_name(temp_name);
- free(temp_name);
- }
-
- for (unsigned i = 0; i < shProg->NumShaders; i++) {
- if (shProg->Shaders[i] == NULL)
- continue;
-
- const gl_shader *stage = shProg->Shaders[i];
- foreach_in_list(ir_instruction, node, stage->ir) {
- ir_variable *var = node->as_variable();
- if (!var || !var->get_interface_type() ||
- var->data.mode != ir_var_shader_storage)
- continue;
-
- const glsl_type *interface = var->get_interface_type();
-
- if (strcmp(interface_name, interface->name) != 0)
- continue;
-
- for (unsigned i = 0; i < interface->length; i++) {
- const glsl_struct_field *field = &interface->fields.structure[i];
- if (strcmp(field->name, var_name) != 0)
- continue;
- /* From GL_ARB_program_interface_query spec:
- *
- * "For the property TOP_LEVEL_ARRAY_SIZE, a single integer
- * identifying the number of active array elements of the top-level
- * shader storage block member containing to the active variable is
- * written to <params>. If the top-level block member is not
- * declared as an array, the value one is written to <params>. If
- * the top-level block member is an array with no declared size,
- * the value zero is written to <params>.
- */
- if (field->type->is_unsized_array())
- array_size = 0;
- else if (field->type->is_array())
- array_size = field->type->length;
- else
- array_size = 1;
- goto found_top_level_array_size;
- }
- }
- }
-found_top_level_array_size:
- free(interface_name);
- free(var_name);
- return array_size;
-}
-
-static GLint
-program_resource_top_level_array_stride(struct gl_shader_program *shProg,
- struct gl_program_resource *res,
- const char *name)
-{
- int block_index = RESOURCE_UNI(res)->block_index;
- int array_stride = -1;
- char *var_name = get_top_level_name(name);
- char *interface_name =
- get_top_level_name(shProg->UniformBlocks[block_index].Name);
-
- if (strcmp(var_name, interface_name) == 0) {
- /* Deal with instanced array of SSBOs */
- char *temp_name = get_var_name(name);
- free(var_name);
- var_name = get_top_level_name(temp_name);
- free(temp_name);
- }
-
- for (unsigned i = 0; i < shProg->NumShaders; i++) {
- if (shProg->Shaders[i] == NULL)
- continue;
-
- const gl_shader *stage = shProg->Shaders[i];
- foreach_in_list(ir_instruction, node, stage->ir) {
- ir_variable *var = node->as_variable();
- if (!var || !var->get_interface_type() ||
- var->data.mode != ir_var_shader_storage)
- continue;
-
- const glsl_type *interface = var->get_interface_type();
-
- if (strcmp(interface_name, interface->name) != 0) {
- continue;
- }
-
- for (unsigned i = 0; i < interface->length; i++) {
- const glsl_struct_field *field = &interface->fields.structure[i];
- if (strcmp(field->name, var_name) != 0)
- continue;
- /* From GL_ARB_program_interface_query:
- *
- * "For the property TOP_LEVEL_ARRAY_STRIDE, a single integer
- * identifying the stride between array elements of the top-level
- * shader storage block member containing the active variable is
- * written to <params>. For top-level block members declared as
- * arrays, the value written is the difference, in basic machine
- * units, between the offsets of the active variable for
- * consecutive elements in the top-level array. For top-level
- * block members not declared as an array, zero is written to
- * <params>."
- */
- if (field->type->is_array()) {
- const enum glsl_matrix_layout matrix_layout =
- glsl_matrix_layout(field->matrix_layout);
- bool row_major = matrix_layout == GLSL_MATRIX_LAYOUT_ROW_MAJOR;
- const glsl_type *array_type = field->type->fields.array;
-
- if (interface->interface_packing != GLSL_INTERFACE_PACKING_STD430) {
- if (array_type->is_record() || array_type->is_array()) {
- array_stride = array_type->std140_size(row_major);
- array_stride = glsl_align(array_stride, 16);
- } else {
- unsigned element_base_align = 0;
- element_base_align = array_type->std140_base_alignment(row_major);
- array_stride = MAX2(element_base_align, 16);
- }
- } else {
- array_stride = array_type->std430_array_stride(row_major);
- }
- } else {
- array_stride = 0;
- }
- goto found_top_level_array_size;
- }
- }
- }
-found_top_level_array_size:
- free(interface_name);
- free(var_name);
- return array_stride;
-}
-
/**
* Function implements following location queries:
* glGetUniformLocation
@@ -1133,7 +952,8 @@ get_buffer_property(struct gl_shader_program *shProg,
(*val)++;
}
return 1;
- case GL_ACTIVE_VARIABLES:
+ case GL_ACTIVE_VARIABLES: {
+ unsigned num_values = 0;
for (unsigned i = 0; i < RESOURCE_UBO(res)->NumUniforms; i++) {
const char *iname = RESOURCE_UBO(res)->Uniforms[i].IndexName;
struct gl_program_resource *uni =
@@ -1143,8 +963,10 @@ get_buffer_property(struct gl_shader_program *shProg,
continue;
*val++ =
_mesa_program_resource_index(shProg, uni);
+ num_values++;
}
- return RESOURCE_UBO(res)->NumUniforms;
+ return num_values;
+ }
}
} else if (res->Type == GL_SHADER_STORAGE_BLOCK) {
switch (prop) {
@@ -1166,7 +988,8 @@ get_buffer_property(struct gl_shader_program *shProg,
(*val)++;
}
return 1;
- case GL_ACTIVE_VARIABLES:
+ case GL_ACTIVE_VARIABLES: {
+ unsigned num_values = 0;
for (unsigned i = 0; i < RESOURCE_UBO(res)->NumUniforms; i++) {
const char *iname = RESOURCE_UBO(res)->Uniforms[i].IndexName;
struct gl_program_resource *uni =
@@ -1176,8 +999,10 @@ get_buffer_property(struct gl_shader_program *shProg,
continue;
*val++ =
_mesa_program_resource_index(shProg, uni);
+ num_values++;
}
- return RESOURCE_UBO(res)->NumUniforms;
+ return num_values;
+ }
}
} else if (res->Type == GL_ATOMIC_COUNTER_BUFFER) {
switch (prop) {
@@ -1251,8 +1076,15 @@ _mesa_program_resource_prop(struct gl_shader_program *shProg,
switch (res->Type) {
case GL_UNIFORM:
case GL_BUFFER_VARIABLE:
+ /* Test if a buffer variable is an array or an unsized array.
+ * Unsized arrays return zero as array size.
+ */
+ if (RESOURCE_UNI(res)->is_shader_storage &&
+ RESOURCE_UNI(res)->array_stride > 0)
+ *val = RESOURCE_UNI(res)->array_elements;
+ else
*val = MAX2(RESOURCE_UNI(res)->array_elements, 1);
- return 1;
+ return 1;
case GL_PROGRAM_INPUT:
case GL_PROGRAM_OUTPUT:
*val = MAX2(_mesa_program_resource_array_size(res), 1);
@@ -1374,14 +1206,12 @@ _mesa_program_resource_prop(struct gl_shader_program *shProg,
case GL_TOP_LEVEL_ARRAY_SIZE:
VALIDATE_TYPE(GL_BUFFER_VARIABLE);
- *val = program_resource_top_level_array_size(shProg, res,
- _mesa_program_resource_name(res));
+ *val = RESOURCE_UNI(res)->top_level_array_size;
return 1;
case GL_TOP_LEVEL_ARRAY_STRIDE:
VALIDATE_TYPE(GL_BUFFER_VARIABLE);
- *val = program_resource_top_level_array_stride(shProg, res,
- _mesa_program_resource_name(res));
+ *val = RESOURCE_UNI(res)->top_level_array_stride;
return 1;
/* GL_ARB_tessellation_shader */
diff --git a/src/mesa/main/shaderapi.c b/src/mesa/main/shaderapi.c
index 9dd1054c8ee..18e463d4ccc 100644
--- a/src/mesa/main/shaderapi.c
+++ b/src/mesa/main/shaderapi.c
@@ -713,10 +713,10 @@ get_programiv(struct gl_context *ctx, GLuint program, GLenum pname,
if (!has_ubo)
break;
- for (i = 0; i < shProg->NumBufferInterfaceBlocks; i++) {
+ for (i = 0; i < shProg->NumUniformBlocks; i++) {
/* Add one for the terminating NUL character.
*/
- const GLint len = strlen(shProg->UniformBlocks[i].Name) + 1;
+ const GLint len = strlen(shProg->UniformBlocks[i]->Name) + 1;
if (len > max_len)
max_len = len;
@@ -729,11 +729,7 @@ get_programiv(struct gl_context *ctx, GLuint program, GLenum pname,
if (!has_ubo)
break;
- *params = 0;
- for (unsigned i = 0; i < shProg->NumBufferInterfaceBlocks; i++) {
- if (!shProg->UniformBlocks[i].IsShaderStorage)
- (*params)++;
- }
+ *params = shProg->NumUniformBlocks;
return;
case GL_PROGRAM_BINARY_RETRIEVABLE_HINT:
/* This enum isn't part of the OES extension for OpenGL ES 2.0. It is
diff --git a/src/mesa/main/shaderimage.c b/src/mesa/main/shaderimage.c
index bd4b7c7be3b..c4ebf4201fb 100644
--- a/src/mesa/main/shaderimage.c
+++ b/src/mesa/main/shaderimage.c
@@ -415,8 +415,8 @@ _mesa_init_image_units(struct gl_context *ctx)
ctx->ImageUnits[i] = _mesa_default_image_unit(ctx);
}
-static GLboolean
-validate_image_unit(struct gl_context *ctx, struct gl_image_unit *u)
+GLboolean
+_mesa_is_image_unit_valid(struct gl_context *ctx, struct gl_image_unit *u)
{
struct gl_texture_object *t = u->TexObj;
mesa_format tex_format;
@@ -424,7 +424,8 @@ validate_image_unit(struct gl_context *ctx, struct gl_image_unit *u)
if (!t)
return GL_FALSE;
- _mesa_test_texobj_completeness(ctx, t);
+ if (!t->_BaseComplete && !t->_MipmapComplete)
+ _mesa_test_texobj_completeness(ctx, t);
if (u->Level < t->BaseLevel ||
u->Level > t->_MaxLevel ||
@@ -473,17 +474,6 @@ validate_image_unit(struct gl_context *ctx, struct gl_image_unit *u)
return GL_TRUE;
}
-void
-_mesa_validate_image_units(struct gl_context *ctx)
-{
- unsigned i;
-
- for (i = 0; i < ctx->Const.MaxImageUnits; ++i) {
- struct gl_image_unit *u = &ctx->ImageUnits[i];
- u->_Valid = validate_image_unit(ctx, u);
- }
-}
-
static GLboolean
validate_bind_image_texture(struct gl_context *ctx, GLuint unit,
GLuint texture, GLint level, GLboolean layered,
@@ -567,7 +557,6 @@ _mesa_BindImageTexture(GLuint unit, GLuint texture, GLint level,
u->Access = access;
u->Format = format;
u->_ActualFormat = _mesa_get_shader_image_format(format);
- u->_Valid = validate_image_unit(ctx, u);
if (u->TexObj && _mesa_tex_target_is_layered(u->TexObj->Target)) {
u->Layered = layered;
@@ -703,7 +692,6 @@ _mesa_BindImageTextures(GLuint first, GLsizei count, const GLuint *textures)
u->Access = GL_READ_WRITE;
u->Format = tex_format;
u->_ActualFormat = _mesa_get_shader_image_format(tex_format);
- u->_Valid = validate_image_unit(ctx, u);
} else {
/* Unbind the texture from the unit */
_mesa_reference_texobj(&u->TexObj, NULL);
@@ -713,7 +701,6 @@ _mesa_BindImageTextures(GLuint first, GLsizei count, const GLuint *textures)
u->Access = GL_READ_ONLY;
u->Format = GL_R8;
u->_ActualFormat = MESA_FORMAT_R_UNORM8;
- u->_Valid = GL_FALSE;
}
}
diff --git a/src/mesa/main/shaderimage.h b/src/mesa/main/shaderimage.h
index bbe088a2459..94ee814a716 100644
--- a/src/mesa/main/shaderimage.h
+++ b/src/mesa/main/shaderimage.h
@@ -55,13 +55,15 @@ void
_mesa_init_image_units(struct gl_context *ctx);
/**
- * Recalculate the \c _Valid flag of a context's shader image units.
+ * Return GL_TRUE if the state of the image unit passed as argument is valid
+ * and access from the shader is allowed. Otherwise loads from this unit
+ * should return zero and stores should have no effect.
*
- * To be called when the state of any texture bound to an image unit
- * changes.
+ * The result depends on context state other than the passed image unit, part
+ * of the _NEW_TEXTURE set.
*/
-void
-_mesa_validate_image_units(struct gl_context *ctx);
+GLboolean
+_mesa_is_image_unit_valid(struct gl_context *ctx, struct gl_image_unit *u);
void GLAPIENTRY
_mesa_BindImageTexture(GLuint unit, GLuint texture, GLint level,
diff --git a/src/mesa/main/shaderobj.c b/src/mesa/main/shaderobj.c
index 4e85fda24b4..ffc71931fec 100644
--- a/src/mesa/main/shaderobj.c
+++ b/src/mesa/main/shaderobj.c
@@ -290,8 +290,8 @@ _mesa_clear_shader_program_data(struct gl_shader_program *shProg)
ralloc_free(shProg->InfoLog);
shProg->InfoLog = ralloc_strdup(shProg, "");
- ralloc_free(shProg->UniformBlocks);
- shProg->UniformBlocks = NULL;
+ ralloc_free(shProg->BufferInterfaceBlocks);
+ shProg->BufferInterfaceBlocks = NULL;
shProg->NumBufferInterfaceBlocks = 0;
for (i = 0; i < MESA_SHADER_STAGES; i++) {
ralloc_free(shProg->UniformBlockStageIndex[i]);
diff --git a/src/mesa/main/shared.c b/src/mesa/main/shared.c
index 1acaf59f432..c37b31d1753 100644
--- a/src/mesa/main/shared.c
+++ b/src/mesa/main/shared.c
@@ -107,6 +107,11 @@ _mesa_alloc_shared_state(struct gl_context *ctx)
};
STATIC_ASSERT(ARRAY_SIZE(targets) == NUM_TEXTURE_TARGETS);
shared->DefaultTex[i] = ctx->Driver.NewTextureObject(ctx, 0, targets[i]);
+ /* Need to explicitly set/overwrite the TargetIndex field here since
+ * the call to _mesa_tex_target_to_index() in NewTextureObject() may
+ * fail if the texture target is not supported.
+ */
+ shared->DefaultTex[i]->TargetIndex = i;
}
/* sanity check */
diff --git a/src/mesa/main/state.c b/src/mesa/main/state.c
index d3b1c72b08d..4043c4f2057 100644
--- a/src/mesa/main/state.c
+++ b/src/mesa/main/state.c
@@ -391,8 +391,12 @@ _mesa_update_state_locked( struct gl_context *ctx )
GLbitfield new_state = ctx->NewState;
GLbitfield prog_flags = _NEW_PROGRAM;
GLbitfield new_prog_state = 0x0;
+ const GLbitfield computed_states = ~(_NEW_CURRENT_ATTRIB | _NEW_LINE);
- if (new_state == _NEW_CURRENT_ATTRIB)
+ /* we can skip a bunch of state validation checks if the dirty
+ * state matches one or more bits in 'computed_states'.
+ */
+ if ((new_state & computed_states) == 0)
goto out;
if (MESA_VERBOSE & VERBOSE_STATE)
diff --git a/src/mesa/main/texcompress_bptc.c b/src/mesa/main/texcompress_bptc.c
index f0f6553a01b..26e59158007 100644
--- a/src/mesa/main/texcompress_bptc.c
+++ b/src/mesa/main/texcompress_bptc.c
@@ -30,6 +30,7 @@
#include "texcompress.h"
#include "texcompress_bptc.h"
#include "util/format_srgb.h"
+#include "util/half_float.h"
#include "texstore.h"
#include "macros.h"
#include "image.h"
diff --git a/src/mesa/main/texobj.c b/src/mesa/main/texobj.c
index 173e43c817c..547055ecf39 100644
--- a/src/mesa/main/texobj.c
+++ b/src/mesa/main/texobj.c
@@ -286,6 +286,12 @@ _mesa_initialize_texture_object( struct gl_context *ctx,
obj->RefCount = 1;
obj->Name = name;
obj->Target = target;
+ if (target != 0) {
+ obj->TargetIndex = _mesa_tex_target_to_index(ctx, target);
+ }
+ else {
+ obj->TargetIndex = NUM_TEXTURE_TARGETS; /* invalid/error value */
+ }
obj->Priority = 1.0F;
obj->BaseLevel = 0;
obj->MaxLevel = 1000;
@@ -340,6 +346,10 @@ finish_texture_init(struct gl_context *ctx, GLenum target,
GLenum filter = GL_LINEAR;
assert(obj->Target == 0);
+ obj->Target = target;
+ obj->TargetIndex = _mesa_tex_target_to_index(ctx, target);
+ assert(obj->TargetIndex < NUM_TEXTURE_TARGETS);
+
switch (target) {
case GL_TEXTURE_2D_MULTISAMPLE:
case GL_TEXTURE_2D_MULTISAMPLE_ARRAY:
@@ -1185,46 +1195,26 @@ invalidate_tex_image_error_check(struct gl_context *ctx, GLuint texture,
return t;
}
-/**
- * Wrapper for the driver function. Need this because _mesa_new_texture_object
- * permits a target of 0 and does not initialize targetIndex.
- */
-struct gl_texture_object *
-_mesa_create_nameless_texture(struct gl_context *ctx, GLenum target)
-{
- struct gl_texture_object *texObj = NULL;
- GLint targetIndex;
-
- if (target == 0)
- return texObj;
-
- texObj = ctx->Driver.NewTextureObject(ctx, 0, target);
- targetIndex = _mesa_tex_target_to_index(ctx, texObj->Target);
- assert(targetIndex < NUM_TEXTURE_TARGETS);
- texObj->TargetIndex = targetIndex;
-
- return texObj;
-}
/**
* Helper function for glCreateTextures and glGenTextures. Need this because
* glCreateTextures should throw errors if target = 0. This is not exposed to
* the rest of Mesa to encourage Mesa internals to use nameless textures,
* which do not require expensive hash lookups.
+ * \param target either 0 or a a valid / error-checked texture target enum
*/
static void
create_textures(struct gl_context *ctx, GLenum target,
- GLsizei n, GLuint *textures, bool dsa)
+ GLsizei n, GLuint *textures, const char *caller)
{
GLuint first;
GLint i;
- const char *func = dsa ? "Create" : "Gen";
if (MESA_VERBOSE & (VERBOSE_API|VERBOSE_TEXTURE))
- _mesa_debug(ctx, "gl%sTextures %d\n", func, n);
+ _mesa_debug(ctx, "%s %d\n", caller, n);
if (n < 0) {
- _mesa_error( ctx, GL_INVALID_VALUE, "gl%sTextures(n < 0)", func );
+ _mesa_error(ctx, GL_INVALID_VALUE, "%s(n < 0)", caller);
return;
}
@@ -1241,28 +1231,14 @@ create_textures(struct gl_context *ctx, GLenum target,
/* Allocate new, empty texture objects */
for (i = 0; i < n; i++) {
struct gl_texture_object *texObj;
- GLint targetIndex;
GLuint name = first + i;
texObj = ctx->Driver.NewTextureObject(ctx, name, target);
if (!texObj) {
mtx_unlock(&ctx->Shared->Mutex);
- _mesa_error(ctx, GL_OUT_OF_MEMORY, "gl%sTextures", func);
+ _mesa_error(ctx, GL_OUT_OF_MEMORY, "gl%sTextures", caller);
return;
}
- /* Initialize the target index if target is non-zero. */
- if (target != 0) {
- targetIndex = _mesa_tex_target_to_index(ctx, texObj->Target);
- if (targetIndex < 0) { /* Bad Target */
- mtx_unlock(&ctx->Shared->Mutex);
- _mesa_error(ctx, GL_INVALID_ENUM, "gl%sTextures(target = %s)",
- func, _mesa_enum_to_string(texObj->Target));
- return;
- }
- assert(targetIndex < NUM_TEXTURE_TARGETS);
- texObj->TargetIndex = targetIndex;
- }
-
/* insert into hash table */
_mesa_HashInsert(ctx->Shared->TexObjects, texObj->Name, texObj);
@@ -1296,7 +1272,7 @@ void GLAPIENTRY
_mesa_GenTextures(GLsizei n, GLuint *textures)
{
GET_CURRENT_CONTEXT(ctx);
- create_textures(ctx, 0, n, textures, false);
+ create_textures(ctx, 0, n, textures, "glGenTextures");
}
/**
@@ -1329,7 +1305,7 @@ _mesa_CreateTextures(GLenum target, GLsizei n, GLuint *textures)
return;
}
- create_textures(ctx, target, n, textures, true);
+ create_textures(ctx, target, n, textures, "glCreateTextures");
}
/**
@@ -1383,8 +1359,12 @@ unbind_texobj_from_texunits(struct gl_context *ctx,
const gl_texture_index index = texObj->TargetIndex;
GLuint u;
- if (texObj->Target == 0)
+ if (texObj->Target == 0) {
+ /* texture was never bound */
return;
+ }
+
+ assert(index < NUM_TEXTURE_TARGETS);
for (u = 0; u < ctx->Texture.NumCurrentTexUsed; u++) {
struct gl_texture_unit *unit = &ctx->Texture.Unit[u];
@@ -1752,10 +1732,11 @@ _mesa_BindTexture( GLenum target, GLuint texName )
_mesa_HashInsert(ctx->Shared->TexObjects, texName, newTexObj);
mtx_unlock(&ctx->Shared->Mutex);
}
- newTexObj->Target = target;
- newTexObj->TargetIndex = targetIndex;
}
+ assert(newTexObj->Target == target);
+ assert(newTexObj->TargetIndex == targetIndex);
+
bind_texture(ctx, ctx->Texture.CurrentUnit, newTexObj);
}
@@ -1778,19 +1759,12 @@ _mesa_BindTextureUnit(GLuint unit, GLuint texture)
{
GET_CURRENT_CONTEXT(ctx);
struct gl_texture_object *texObj;
- struct gl_texture_unit *texUnit;
if (unit >= _mesa_max_tex_unit(ctx)) {
_mesa_error(ctx, GL_INVALID_VALUE, "glBindTextureUnit(unit=%u)", unit);
return;
}
- texUnit = _mesa_get_tex_unit(ctx, unit);
- assert(texUnit);
- if (!texUnit) {
- return;
- }
-
if (MESA_VERBOSE & (VERBOSE_API|VERBOSE_TEXTURE))
_mesa_debug(ctx, "glBindTextureUnit %s %d\n",
_mesa_enum_to_string(GL_TEXTURE0+unit), (GLint) texture);
@@ -1812,7 +1786,7 @@ _mesa_BindTextureUnit(GLuint unit, GLuint texture)
/* Error checking */
if (!texObj) {
_mesa_error(ctx, GL_INVALID_OPERATION,
- "glBindTextureUnit(non-gen name)");
+ "glBindTextureUnit(non-gen name)");
return;
}
if (texObj->Target == 0) {
diff --git a/src/mesa/main/texobj.h b/src/mesa/main/texobj.h
index 690878c85fc..8421337de4d 100644
--- a/src/mesa/main/texobj.h
+++ b/src/mesa/main/texobj.h
@@ -202,9 +202,6 @@ _mesa_unlock_context_textures( struct gl_context *ctx );
extern void
_mesa_lock_context_textures( struct gl_context *ctx );
-extern struct gl_texture_object *
-_mesa_create_nameless_texture(struct gl_context *ctx, GLenum target);
-
extern void
_mesa_delete_nameless_texture(struct gl_context *ctx,
struct gl_texture_object *texObj);
diff --git a/src/mesa/main/texstate.c b/src/mesa/main/texstate.c
index 9b5928c4306..cb147fac476 100644
--- a/src/mesa/main/texstate.c
+++ b/src/mesa/main/texstate.c
@@ -34,7 +34,6 @@
#include "context.h"
#include "enums.h"
#include "macros.h"
-#include "shaderimage.h"
#include "texobj.h"
#include "teximage.h"
#include "texstate.h"
@@ -741,8 +740,6 @@ update_texture_state( struct gl_context *ctx )
if (!prog[MESA_SHADER_FRAGMENT] || !prog[MESA_SHADER_VERTEX])
update_texgen(ctx);
-
- _mesa_validate_image_units(ctx);
}
diff --git a/src/mesa/main/textureview.c b/src/mesa/main/textureview.c
index 5a3282a40c1..04b7d73da5c 100644
--- a/src/mesa/main/textureview.c
+++ b/src/mesa/main/textureview.c
@@ -681,6 +681,8 @@ _mesa_TextureView(GLuint texture, GLenum target, GLuint origtexture,
texObj->Immutable = GL_TRUE;
texObj->ImmutableLevels = origTexObj->ImmutableLevels;
texObj->Target = target;
+ texObj->TargetIndex = _mesa_tex_target_to_index(ctx, target);
+ assert(texObj->TargetIndex < NUM_TEXTURE_TARGETS);
if (ctx->Driver.TextureView != NULL &&
!ctx->Driver.TextureView(ctx, texObj, origTexObj)) {
diff --git a/src/mesa/main/uniform_query.cpp b/src/mesa/main/uniform_query.cpp
index d48729778ae..083087d6baa 100644
--- a/src/mesa/main/uniform_query.cpp
+++ b/src/mesa/main/uniform_query.cpp
@@ -318,19 +318,12 @@ _mesa_get_uniform(struct gl_context *ctx, GLuint program, GLint location,
return;
}
- if ((uni->type->base_type == GLSL_TYPE_DOUBLE &&
- returnType != GLSL_TYPE_DOUBLE) ||
- (uni->type->base_type != GLSL_TYPE_DOUBLE &&
- returnType == GLSL_TYPE_DOUBLE)) {
- _mesa_error( ctx, GL_INVALID_OPERATION,
- "glGetnUniform*vARB(incompatible uniform types)");
- return;
- }
{
unsigned elements = (uni->type->is_sampler())
? 1 : uni->type->components();
const int dmul = uni->type->base_type == GLSL_TYPE_DOUBLE ? 2 : 1;
+ const int rmul = returnType == GLSL_TYPE_DOUBLE ? 2 : 1;
/* Calculate the source base address *BEFORE* modifying elements to
* account for the size of the user's buffer.
@@ -342,7 +335,7 @@ _mesa_get_uniform(struct gl_context *ctx, GLuint program, GLint location,
returnType == GLSL_TYPE_UINT || returnType == GLSL_TYPE_DOUBLE);
/* doubles have a different size than the other 3 types */
- unsigned bytes = sizeof(src[0]) * elements * dmul;
+ unsigned bytes = sizeof(src[0]) * elements * rmul;
if (bufSize < 0 || bytes > (unsigned) bufSize) {
_mesa_error( ctx, GL_INVALID_OPERATION,
"glGetnUniform*vARB(out of bounds: bufSize is %d,"
@@ -366,32 +359,57 @@ _mesa_get_uniform(struct gl_context *ctx, GLuint program, GLint location,
} else {
union gl_constant_value *const dst =
(union gl_constant_value *) paramsOut;
-
/* This code could be optimized by putting the loop inside the switch
* statements. However, this is not expected to be
* performance-critical code.
*/
for (unsigned i = 0; i < elements; i++) {
+ int sidx = i * dmul;
+ int didx = i * rmul;
+
switch (returnType) {
case GLSL_TYPE_FLOAT:
switch (uni->type->base_type) {
case GLSL_TYPE_UINT:
- dst[i].f = (float) src[i].u;
+ dst[didx].f = (float) src[sidx].u;
break;
case GLSL_TYPE_INT:
case GLSL_TYPE_SAMPLER:
case GLSL_TYPE_IMAGE:
- dst[i].f = (float) src[i].i;
+ dst[didx].f = (float) src[sidx].i;
break;
case GLSL_TYPE_BOOL:
- dst[i].f = src[i].i ? 1.0f : 0.0f;
+ dst[didx].f = src[sidx].i ? 1.0f : 0.0f;
+ break;
+ case GLSL_TYPE_DOUBLE:
+ dst[didx].f = *(double *)&src[sidx].f;
+ break;
+ default:
+ assert(!"Should not get here.");
+ break;
+ }
+ break;
+ case GLSL_TYPE_DOUBLE:
+ switch (uni->type->base_type) {
+ case GLSL_TYPE_UINT:
+ *(double *)&dst[didx].f = (double) src[sidx].u;
+ break;
+ case GLSL_TYPE_INT:
+ case GLSL_TYPE_SAMPLER:
+ case GLSL_TYPE_IMAGE:
+ *(double *)&dst[didx].f = (double) src[sidx].i;
+ break;
+ case GLSL_TYPE_BOOL:
+ *(double *)&dst[didx].f = src[sidx].i ? 1.0f : 0.0f;
+ break;
+ case GLSL_TYPE_FLOAT:
+ *(double *)&dst[didx].f = (double) src[sidx].f;
break;
default:
assert(!"Should not get here.");
break;
}
break;
-
case GLSL_TYPE_INT:
case GLSL_TYPE_UINT:
switch (uni->type->base_type) {
@@ -413,10 +431,13 @@ _mesa_get_uniform(struct gl_context *ctx, GLuint program, GLint location,
* a floating-point value is rounded to the
* nearest integer..."
*/
- dst[i].i = IROUND(src[i].f);
+ dst[didx].i = IROUND(src[sidx].f);
break;
case GLSL_TYPE_BOOL:
- dst[i].i = src[i].i ? 1 : 0;
+ dst[didx].i = src[sidx].i ? 1 : 0;
+ break;
+ case GLSL_TYPE_DOUBLE:
+ dst[didx].i = *(double *)&src[sidx].f;
break;
default:
assert(!"Should not get here.");
diff --git a/src/mesa/main/uniforms.c b/src/mesa/main/uniforms.c
index 04cc81f9809..bc235380d97 100644
--- a/src/mesa/main/uniforms.c
+++ b/src/mesa/main/uniforms.c
@@ -1016,21 +1016,21 @@ _mesa_UniformBlockBinding(GLuint program,
return;
}
- if (shProg->UniformBlocks[uniformBlockIndex].Binding !=
+ if (shProg->BufferInterfaceBlocks[uniformBlockIndex].Binding !=
uniformBlockBinding) {
int i;
FLUSH_VERTICES(ctx, 0);
ctx->NewDriverState |= ctx->DriverFlags.NewUniformBuffer;
- shProg->UniformBlocks[uniformBlockIndex].Binding = uniformBlockBinding;
+ shProg->BufferInterfaceBlocks[uniformBlockIndex].Binding = uniformBlockBinding;
for (i = 0; i < MESA_SHADER_STAGES; i++) {
int stage_index = shProg->UniformBlockStageIndex[i][uniformBlockIndex];
if (stage_index != -1) {
struct gl_shader *sh = shProg->_LinkedShaders[i];
- sh->UniformBlocks[stage_index].Binding = uniformBlockBinding;
+ sh->BufferInterfaceBlocks[stage_index].Binding = uniformBlockBinding;
}
}
}
@@ -1069,21 +1069,21 @@ _mesa_ShaderStorageBlockBinding(GLuint program,
return;
}
- if (shProg->UniformBlocks[shaderStorageBlockIndex].Binding !=
+ if (shProg->BufferInterfaceBlocks[shaderStorageBlockIndex].Binding !=
shaderStorageBlockBinding) {
int i;
FLUSH_VERTICES(ctx, 0);
ctx->NewDriverState |= ctx->DriverFlags.NewShaderStorageBuffer;
- shProg->UniformBlocks[shaderStorageBlockIndex].Binding = shaderStorageBlockBinding;
+ shProg->BufferInterfaceBlocks[shaderStorageBlockIndex].Binding = shaderStorageBlockBinding;
for (i = 0; i < MESA_SHADER_STAGES; i++) {
int stage_index = shProg->UniformBlockStageIndex[i][shaderStorageBlockIndex];
if (stage_index != -1) {
struct gl_shader *sh = shProg->_LinkedShaders[i];
- sh->UniformBlocks[stage_index].Binding = shaderStorageBlockBinding;
+ sh->BufferInterfaceBlocks[stage_index].Binding = shaderStorageBlockBinding;
}
}
}
diff --git a/src/mesa/main/uniforms.h b/src/mesa/main/uniforms.h
index bec035cdc97..2f88b65043d 100644
--- a/src/mesa/main/uniforms.h
+++ b/src/mesa/main/uniforms.h
@@ -27,7 +27,7 @@
#define UNIFORMS_H
#include "main/glheader.h"
-#include "glsl/glsl_types.h"
+#include "glsl/nir/glsl_types.h"
#include "glsl/ir_uniform.h"
#include "program/prog_parameter.h"
diff --git a/src/mesa/main/version.c b/src/mesa/main/version.c
index 498b2f867d0..5635a643200 100644
--- a/src/mesa/main/version.c
+++ b/src/mesa/main/version.c
@@ -24,6 +24,7 @@
#include <stdio.h>
+#include "context.h"
#include "imports.h"
#include "mtypes.h"
#include "version.h"
@@ -181,7 +182,23 @@ _mesa_override_gl_version(struct gl_context *ctx)
{
if (_mesa_override_gl_version_contextless(&ctx->Const, &ctx->API,
&ctx->Version)) {
- create_version_string(ctx, "");
+ /* We need to include API in version string for OpenGL ES, otherwise
+ * application can not detect GLES via glGetString(GL_VERSION) query.
+ *
+ * From OpenGL ES 3.2 spec, Page 436:
+ *
+ * "The VERSION string is laid out as follows:
+ *
+ * OpenGL ES N.M vendor-specific information"
+ *
+ * From OpenGL 4.5 spec, Page 538:
+ *
+ * "The VERSION and SHADING_LANGUAGE_VERSION strings are laid out as
+ * follows:
+ *
+ * <version number><space><vendor-specific information>"
+ */
+ create_version_string(ctx, _mesa_is_gles(ctx) ? "OpenGL ES " : "");
}
}
diff --git a/src/mesa/program/Android.mk b/src/mesa/program/Android.mk
index ccb0fa5f32b..cc67f8aeadd 100644
--- a/src/mesa/program/Android.mk
+++ b/src/mesa/program/Android.mk
@@ -75,6 +75,7 @@ LOCAL_C_INCLUDES := \
$(MESA_TOP)/src/mapi \
$(MESA_TOP)/src/mesa \
$(MESA_TOP)/src/glsl \
+ $(MESA_TOP)/src/glsl/nir \
$(MESA_TOP)/src/gallium/auxiliary \
$(MESA_TOP)/src/gallium/include
diff --git a/src/mesa/program/ir_to_mesa.cpp b/src/mesa/program/ir_to_mesa.cpp
index 0214b8e684c..1099d79d834 100644
--- a/src/mesa/program/ir_to_mesa.cpp
+++ b/src/mesa/program/ir_to_mesa.cpp
@@ -42,7 +42,7 @@
#include "glsl/ir_optimization.h"
#include "glsl/ir_uniform.h"
#include "glsl/glsl_parser_extras.h"
-#include "glsl/glsl_types.h"
+#include "glsl/nir/glsl_types.h"
#include "glsl/linker.h"
#include "glsl/program.h"
#include "program/hash_table.h"
diff --git a/src/mesa/program/prog_to_nir.c b/src/mesa/program/prog_to_nir.c
index fc00534028f..539e3c05312 100644
--- a/src/mesa/program/prog_to_nir.c
+++ b/src/mesa/program/prog_to_nir.c
@@ -923,7 +923,7 @@ ptn_add_output_stores(struct ptn_compile *c)
{
nir_builder *b = &c->build;
- foreach_list_typed(nir_variable, var, node, &b->shader->outputs) {
+ nir_foreach_variable(var, &b->shader->outputs) {
nir_intrinsic_instr *store =
nir_intrinsic_instr_create(b->shader, nir_intrinsic_store_var);
store->num_components = glsl_get_vector_elements(var->type);
@@ -958,11 +958,10 @@ setup_registers_and_variables(struct ptn_compile *c)
for (int i = 0; i < num_inputs; i++) {
if (!(c->prog->InputsRead & BITFIELD64_BIT(i)))
continue;
- nir_variable *var = rzalloc(shader, nir_variable);
- var->type = glsl_vec4_type();
- var->data.read_only = true;
- var->data.mode = nir_var_shader_in;
- var->name = ralloc_asprintf(var, "in_%d", i);
+
+ nir_variable *var =
+ nir_variable_create(shader, nir_var_shader_in, glsl_vec4_type(),
+ ralloc_asprintf(shader, "in_%d", i));
var->data.location = i;
var->data.index = 0;
@@ -992,12 +991,9 @@ setup_registers_and_variables(struct ptn_compile *c)
nir_ssa_def *f001 = nir_vec4(b, &load_x->dest.ssa, nir_imm_float(b, 0.0),
nir_imm_float(b, 0.0), nir_imm_float(b, 1.0));
- nir_variable *fullvar = rzalloc(shader, nir_variable);
- fullvar->type = glsl_vec4_type();
- fullvar->data.mode = nir_var_local;
- fullvar->name = "fogcoord_tmp";
- exec_list_push_tail(&b->impl->locals, &fullvar->node);
-
+ nir_variable *fullvar =
+ nir_local_variable_create(b->impl, glsl_vec4_type(),
+ "fogcoord_tmp");
nir_intrinsic_instr *store =
nir_intrinsic_instr_create(shader, nir_intrinsic_store_var);
store->num_components = 4;
@@ -1005,17 +1001,15 @@ setup_registers_and_variables(struct ptn_compile *c)
store->src[0] = nir_src_for_ssa(f001);
nir_builder_instr_insert(b, &store->instr);
- /* Insert the real input into the list so the driver has real
- * inputs, but set c->input_vars[i] to the temporary so we use
+ /* We inserted the real input into the list so the driver has real
+ * inputs, but we set c->input_vars[i] to the temporary so we use
* the splatted value.
*/
- exec_list_push_tail(&shader->inputs, &var->node);
c->input_vars[i] = fullvar;
continue;
}
}
- exec_list_push_tail(&shader->inputs, &var->node);
c->input_vars[i] = var;
}
@@ -1135,6 +1129,12 @@ prog_to_nir(const struct gl_program *prog,
s->info.uses_clip_distance_out = false;
s->info.separate_shader = false;
+ if (stage == MESA_SHADER_FRAGMENT) {
+ struct gl_fragment_program *fp = (struct gl_fragment_program *)prog;
+
+ s->info.fs.uses_discard = fp->UsesKill;
+ }
+
fail:
if (c->error) {
ralloc_free(s);
diff --git a/src/mesa/program/program.c b/src/mesa/program/program.c
index e94c1021258..0e78e6ab25d 100644
--- a/src/mesa/program/program.c
+++ b/src/mesa/program/program.c
@@ -173,57 +173,15 @@ _mesa_set_program_error(struct gl_context *ctx, GLint pos, const char *string)
/**
- * Find the line number and column for 'pos' within 'string'.
- * Return a copy of the line which contains 'pos'. Free the line with
- * free().
- * \param string the program string
- * \param pos the position within the string
- * \param line returns the line number corresponding to 'pos'.
- * \param col returns the column number corresponding to 'pos'.
- * \return copy of the line containing 'pos'.
- */
-const GLubyte *
-_mesa_find_line_column(const GLubyte *string, const GLubyte *pos,
- GLint *line, GLint *col)
-{
- const GLubyte *lineStart = string;
- const GLubyte *p = string;
- GLubyte *s;
- int len;
-
- *line = 1;
-
- while (p != pos) {
- if (*p == (GLubyte) '\n') {
- (*line)++;
- lineStart = p + 1;
- }
- p++;
- }
-
- *col = (pos - lineStart) + 1;
-
- /* return copy of this line */
- while (*p != 0 && *p != '\n')
- p++;
- len = p - lineStart;
- s = malloc(len + 1);
- memcpy(s, lineStart, len);
- s[len] = 0;
-
- return s;
-}
-
-
-/**
* Initialize a new gl_program object.
*/
-static void
-init_program_struct(struct gl_program *prog, GLenum target, GLuint id)
+struct gl_program *
+_mesa_init_gl_program(struct gl_program *prog, GLenum target, GLuint id)
{
GLuint i;
- assert(prog);
+ if (!prog)
+ return NULL;
memset(prog, 0, sizeof(*prog));
mtx_init(&prog->Mutex, mtx_plain);
@@ -235,102 +193,8 @@ init_program_struct(struct gl_program *prog, GLenum target, GLuint id)
/* default mapping from samplers to texture units */
for (i = 0; i < MAX_SAMPLERS; i++)
prog->SamplerUnits[i] = i;
-}
-
-
-/**
- * Initialize a new fragment program object.
- */
-struct gl_program *
-_mesa_init_fragment_program(struct gl_context *ctx,
- struct gl_fragment_program *prog,
- GLenum target, GLuint id)
-{
- if (prog) {
- init_program_struct(&prog->Base, target, id);
- return &prog->Base;
- }
- return NULL;
-}
-
-
-/**
- * Initialize a new vertex program object.
- */
-struct gl_program *
-_mesa_init_vertex_program(struct gl_context *ctx,
- struct gl_vertex_program *prog,
- GLenum target, GLuint id)
-{
- if (prog) {
- init_program_struct(&prog->Base, target, id);
- return &prog->Base;
- }
- return NULL;
-}
-
-
-/**
- * Initialize a new compute program object.
- */
-struct gl_program *
-_mesa_init_compute_program(struct gl_context *ctx,
- struct gl_compute_program *prog,
- GLenum target, GLuint id)
-{
- if (prog) {
- init_program_struct(&prog->Base, target, id);
- return &prog->Base;
- }
- return NULL;
-}
-
-
-/**
- * Initialize a new tessellation control program object.
- */
-struct gl_program *
-_mesa_init_tess_ctrl_program(struct gl_context *ctx,
- struct gl_tess_ctrl_program *prog,
- GLenum target, GLuint id)
-{
- if (prog) {
- init_program_struct(&prog->Base, target, id);
- return &prog->Base;
- }
- return NULL;
-}
-
-/**
- * Initialize a new tessellation evaluation program object.
- */
-struct gl_program *
-_mesa_init_tess_eval_program(struct gl_context *ctx,
- struct gl_tess_eval_program *prog,
- GLenum target, GLuint id)
-{
- if (prog) {
- init_program_struct(&prog->Base, target, id);
- return &prog->Base;
- }
- return NULL;
-}
-
-
-/**
- * Initialize a new geometry program object.
- */
-struct gl_program *
-_mesa_init_geometry_program(struct gl_context *ctx,
- struct gl_geometry_program *prog,
- GLenum target, GLuint id)
-{
- if (prog) {
- init_program_struct(&prog->Base, target, id);
- return &prog->Base;
- }
- return NULL;
+ return prog;
}
@@ -349,43 +213,36 @@ _mesa_init_geometry_program(struct gl_context *ctx,
struct gl_program *
_mesa_new_program(struct gl_context *ctx, GLenum target, GLuint id)
{
- struct gl_program *prog;
switch (target) {
- case GL_VERTEX_PROGRAM_ARB: /* == GL_VERTEX_PROGRAM_NV */
- prog = _mesa_init_vertex_program(ctx, CALLOC_STRUCT(gl_vertex_program),
- target, id );
- break;
+ case GL_VERTEX_PROGRAM_ARB: { /* == GL_VERTEX_PROGRAM_NV */
+ struct gl_vertex_program *prog = CALLOC_STRUCT(gl_vertex_program);
+ return _mesa_init_gl_program(&prog->Base, target, id);
+ }
case GL_FRAGMENT_PROGRAM_NV:
- case GL_FRAGMENT_PROGRAM_ARB:
- prog =_mesa_init_fragment_program(ctx,
- CALLOC_STRUCT(gl_fragment_program),
- target, id );
- break;
- case GL_GEOMETRY_PROGRAM_NV:
- prog = _mesa_init_geometry_program(ctx,
- CALLOC_STRUCT(gl_geometry_program),
- target, id);
- break;
- case GL_TESS_CONTROL_PROGRAM_NV:
- prog = _mesa_init_tess_ctrl_program(ctx,
- CALLOC_STRUCT(gl_tess_ctrl_program),
- target, id);
- break;
- case GL_TESS_EVALUATION_PROGRAM_NV:
- prog = _mesa_init_tess_eval_program(ctx,
- CALLOC_STRUCT(gl_tess_eval_program),
- target, id);
- break;
- case GL_COMPUTE_PROGRAM_NV:
- prog = _mesa_init_compute_program(ctx,
- CALLOC_STRUCT(gl_compute_program),
- target, id);
- break;
+ case GL_FRAGMENT_PROGRAM_ARB: {
+ struct gl_fragment_program *prog = CALLOC_STRUCT(gl_fragment_program);
+ return _mesa_init_gl_program(&prog->Base, target, id);
+ }
+ case GL_GEOMETRY_PROGRAM_NV: {
+ struct gl_geometry_program *prog = CALLOC_STRUCT(gl_geometry_program);
+ return _mesa_init_gl_program(&prog->Base, target, id);
+ }
+ case GL_TESS_CONTROL_PROGRAM_NV: {
+ struct gl_tess_ctrl_program *prog = CALLOC_STRUCT(gl_tess_ctrl_program);
+ return _mesa_init_gl_program(&prog->Base, target, id);
+ }
+ case GL_TESS_EVALUATION_PROGRAM_NV: {
+ struct gl_tess_eval_program *prog = CALLOC_STRUCT(gl_tess_eval_program);
+ return _mesa_init_gl_program(&prog->Base, target, id);
+ }
+ case GL_COMPUTE_PROGRAM_NV: {
+ struct gl_compute_program *prog = CALLOC_STRUCT(gl_compute_program);
+ return _mesa_init_gl_program(&prog->Base, target, id);
+ }
default:
_mesa_problem(ctx, "bad target in _mesa_new_program");
- prog = NULL;
+ return NULL;
}
- return prog;
}
@@ -494,123 +351,6 @@ _mesa_reference_program_(struct gl_context *ctx,
/**
- * Return a copy of a program.
- * XXX Problem here if the program object is actually OO-derivation
- * made by a device driver.
- */
-struct gl_program *
-_mesa_clone_program(struct gl_context *ctx, const struct gl_program *prog)
-{
- struct gl_program *clone;
-
- clone = ctx->Driver.NewProgram(ctx, prog->Target, prog->Id);
- if (!clone)
- return NULL;
-
- assert(clone->Target == prog->Target);
- assert(clone->RefCount == 1);
-
- clone->String = (GLubyte *) strdup((char *) prog->String);
- clone->Format = prog->Format;
- clone->Instructions = _mesa_alloc_instructions(prog->NumInstructions);
- if (!clone->Instructions) {
- _mesa_reference_program(ctx, &clone, NULL);
- return NULL;
- }
- _mesa_copy_instructions(clone->Instructions, prog->Instructions,
- prog->NumInstructions);
- clone->InputsRead = prog->InputsRead;
- clone->OutputsWritten = prog->OutputsWritten;
- clone->SamplersUsed = prog->SamplersUsed;
- clone->ShadowSamplers = prog->ShadowSamplers;
- memcpy(clone->TexturesUsed, prog->TexturesUsed, sizeof(prog->TexturesUsed));
-
- if (prog->Parameters)
- clone->Parameters = _mesa_clone_parameter_list(prog->Parameters);
- if (prog->LocalParams) {
- clone->LocalParams = malloc(MAX_PROGRAM_LOCAL_PARAMS *
- sizeof(float[4]));
- if (!clone->LocalParams) {
- _mesa_reference_program(ctx, &clone, NULL);
- return NULL;
- }
- memcpy(clone->LocalParams, prog->LocalParams,
- MAX_PROGRAM_LOCAL_PARAMS * sizeof(float[4]));
- }
- clone->IndirectRegisterFiles = prog->IndirectRegisterFiles;
- clone->NumInstructions = prog->NumInstructions;
- clone->NumTemporaries = prog->NumTemporaries;
- clone->NumParameters = prog->NumParameters;
- clone->NumAttributes = prog->NumAttributes;
- clone->NumAddressRegs = prog->NumAddressRegs;
- clone->NumNativeInstructions = prog->NumNativeInstructions;
- clone->NumNativeTemporaries = prog->NumNativeTemporaries;
- clone->NumNativeParameters = prog->NumNativeParameters;
- clone->NumNativeAttributes = prog->NumNativeAttributes;
- clone->NumNativeAddressRegs = prog->NumNativeAddressRegs;
- clone->NumAluInstructions = prog->NumAluInstructions;
- clone->NumTexInstructions = prog->NumTexInstructions;
- clone->NumTexIndirections = prog->NumTexIndirections;
- clone->NumNativeAluInstructions = prog->NumNativeAluInstructions;
- clone->NumNativeTexInstructions = prog->NumNativeTexInstructions;
- clone->NumNativeTexIndirections = prog->NumNativeTexIndirections;
-
- switch (prog->Target) {
- case GL_VERTEX_PROGRAM_ARB:
- {
- const struct gl_vertex_program *vp = gl_vertex_program_const(prog);
- struct gl_vertex_program *vpc = gl_vertex_program(clone);
- vpc->IsPositionInvariant = vp->IsPositionInvariant;
- }
- break;
- case GL_FRAGMENT_PROGRAM_ARB:
- {
- const struct gl_fragment_program *fp = gl_fragment_program_const(prog);
- struct gl_fragment_program *fpc = gl_fragment_program(clone);
- fpc->UsesKill = fp->UsesKill;
- fpc->UsesDFdy = fp->UsesDFdy;
- fpc->OriginUpperLeft = fp->OriginUpperLeft;
- fpc->PixelCenterInteger = fp->PixelCenterInteger;
- }
- break;
- case GL_GEOMETRY_PROGRAM_NV:
- {
- const struct gl_geometry_program *gp = gl_geometry_program_const(prog);
- struct gl_geometry_program *gpc = gl_geometry_program(clone);
- gpc->VerticesOut = gp->VerticesOut;
- gpc->InputType = gp->InputType;
- gpc->Invocations = gp->Invocations;
- gpc->OutputType = gp->OutputType;
- gpc->UsesEndPrimitive = gp->UsesEndPrimitive;
- gpc->UsesStreams = gp->UsesStreams;
- }
- break;
- case GL_TESS_CONTROL_PROGRAM_NV:
- {
- const struct gl_tess_ctrl_program *tcp = gl_tess_ctrl_program_const(prog);
- struct gl_tess_ctrl_program *tcpc = gl_tess_ctrl_program(clone);
- tcpc->VerticesOut = tcp->VerticesOut;
- }
- break;
- case GL_TESS_EVALUATION_PROGRAM_NV:
- {
- const struct gl_tess_eval_program *tep = gl_tess_eval_program_const(prog);
- struct gl_tess_eval_program *tepc = gl_tess_eval_program(clone);
- tepc->PrimitiveMode = tep->PrimitiveMode;
- tepc->Spacing = tep->Spacing;
- tepc->VertexOrder = tep->VertexOrder;
- tepc->PointMode = tep->PointMode;
- }
- break;
- default:
- _mesa_problem(NULL, "Unexpected target in _mesa_clone_program");
- }
-
- return clone;
-}
-
-
-/**
* Insert 'count' NOP instructions at 'start' in the given program.
* Adjust branch targets accordingly.
*/
@@ -707,190 +447,6 @@ _mesa_delete_instructions(struct gl_program *prog, GLuint start, GLuint count)
/**
- * Search instructions for registers that match (oldFile, oldIndex),
- * replacing them with (newFile, newIndex).
- */
-static void
-replace_registers(struct prog_instruction *inst, GLuint numInst,
- GLuint oldFile, GLuint oldIndex,
- GLuint newFile, GLuint newIndex)
-{
- GLuint i, j;
- for (i = 0; i < numInst; i++) {
- /* src regs */
- for (j = 0; j < _mesa_num_inst_src_regs(inst[i].Opcode); j++) {
- if (inst[i].SrcReg[j].File == oldFile &&
- inst[i].SrcReg[j].Index == oldIndex) {
- inst[i].SrcReg[j].File = newFile;
- inst[i].SrcReg[j].Index = newIndex;
- }
- }
- /* dst reg */
- if (inst[i].DstReg.File == oldFile && inst[i].DstReg.Index == oldIndex) {
- inst[i].DstReg.File = newFile;
- inst[i].DstReg.Index = newIndex;
- }
- }
-}
-
-
-/**
- * Search instructions for references to program parameters. When found,
- * increment the parameter index by 'offset'.
- * Used when combining programs.
- */
-static void
-adjust_param_indexes(struct prog_instruction *inst, GLuint numInst,
- GLuint offset)
-{
- GLuint i, j;
- for (i = 0; i < numInst; i++) {
- for (j = 0; j < _mesa_num_inst_src_regs(inst[i].Opcode); j++) {
- GLuint f = inst[i].SrcReg[j].File;
- if (f == PROGRAM_CONSTANT ||
- f == PROGRAM_UNIFORM ||
- f == PROGRAM_STATE_VAR) {
- inst[i].SrcReg[j].Index += offset;
- }
- }
- }
-}
-
-
-/**
- * Combine two programs into one. Fix instructions so the outputs of
- * the first program go to the inputs of the second program.
- */
-struct gl_program *
-_mesa_combine_programs(struct gl_context *ctx,
- const struct gl_program *progA,
- const struct gl_program *progB)
-{
- struct prog_instruction *newInst;
- struct gl_program *newProg;
- const GLuint lenA = progA->NumInstructions - 1; /* omit END instr */
- const GLuint lenB = progB->NumInstructions;
- const GLuint numParamsA = _mesa_num_parameters(progA->Parameters);
- const GLuint newLength = lenA + lenB;
- GLboolean usedTemps[MAX_PROGRAM_TEMPS];
- GLuint firstTemp = 0;
- GLbitfield64 inputsB;
- GLuint i;
-
- assert(progA->Target == progB->Target);
-
- newInst = _mesa_alloc_instructions(newLength);
- if (!newInst)
- return GL_FALSE;
-
- _mesa_copy_instructions(newInst, progA->Instructions, lenA);
- _mesa_copy_instructions(newInst + lenA, progB->Instructions, lenB);
-
- /* adjust branch / instruction addresses for B's instructions */
- for (i = 0; i < lenB; i++) {
- newInst[lenA + i].BranchTarget += lenA;
- }
-
- newProg = ctx->Driver.NewProgram(ctx, progA->Target, 0);
- newProg->Instructions = newInst;
- newProg->NumInstructions = newLength;
-
- /* find used temp regs (we may need new temps below) */
- _mesa_find_used_registers(newProg, PROGRAM_TEMPORARY,
- usedTemps, MAX_PROGRAM_TEMPS);
-
- if (newProg->Target == GL_FRAGMENT_PROGRAM_ARB) {
- const struct gl_fragment_program *fprogA, *fprogB;
- struct gl_fragment_program *newFprog;
- GLbitfield64 progB_inputsRead = progB->InputsRead;
- GLint progB_colorFile, progB_colorIndex;
-
- fprogA = gl_fragment_program_const(progA);
- fprogB = gl_fragment_program_const(progB);
- newFprog = gl_fragment_program(newProg);
-
- newFprog->UsesKill = fprogA->UsesKill || fprogB->UsesKill;
- newFprog->UsesDFdy = fprogA->UsesDFdy || fprogB->UsesDFdy;
-
- /* We'll do a search and replace for instances
- * of progB_colorFile/progB_colorIndex below...
- */
- progB_colorFile = PROGRAM_INPUT;
- progB_colorIndex = VARYING_SLOT_COL0;
-
- /*
- * The fragment program may get color from a state var rather than
- * a fragment input (vertex output) if it's constant.
- * See the texenvprogram.c code.
- * So, search the program's parameter list now to see if the program
- * gets color from a state var instead of a conventional fragment
- * input register.
- */
- for (i = 0; i < progB->Parameters->NumParameters; i++) {
- struct gl_program_parameter *p = &progB->Parameters->Parameters[i];
- if (p->Type == PROGRAM_STATE_VAR &&
- p->StateIndexes[0] == STATE_INTERNAL &&
- p->StateIndexes[1] == STATE_CURRENT_ATTRIB &&
- (int) p->StateIndexes[2] == (int) VERT_ATTRIB_COLOR0) {
- progB_inputsRead |= VARYING_BIT_COL0;
- progB_colorFile = PROGRAM_STATE_VAR;
- progB_colorIndex = i;
- break;
- }
- }
-
- /* Connect color outputs of fprogA to color inputs of fprogB, via a
- * new temporary register.
- */
- if ((progA->OutputsWritten & BITFIELD64_BIT(FRAG_RESULT_COLOR)) &&
- (progB_inputsRead & VARYING_BIT_COL0)) {
- GLint tempReg = _mesa_find_free_register(usedTemps, MAX_PROGRAM_TEMPS,
- firstTemp);
- if (tempReg < 0) {
- _mesa_problem(ctx, "No free temp regs found in "
- "_mesa_combine_programs(), using 31");
- tempReg = 31;
- }
- firstTemp = tempReg + 1;
-
- /* replace writes to result.color[0] with tempReg */
- replace_registers(newInst, lenA,
- PROGRAM_OUTPUT, FRAG_RESULT_COLOR,
- PROGRAM_TEMPORARY, tempReg);
- /* replace reads from the input color with tempReg */
- replace_registers(newInst + lenA, lenB,
- progB_colorFile, progB_colorIndex, /* search for */
- PROGRAM_TEMPORARY, tempReg /* replace with */ );
- }
-
- /* compute combined program's InputsRead */
- inputsB = progB_inputsRead;
- if (progA->OutputsWritten & BITFIELD64_BIT(FRAG_RESULT_COLOR)) {
- inputsB &= ~(1 << VARYING_SLOT_COL0);
- }
- newProg->InputsRead = progA->InputsRead | inputsB;
- newProg->OutputsWritten = progB->OutputsWritten;
- newProg->SamplersUsed = progA->SamplersUsed | progB->SamplersUsed;
- }
- else {
- /* vertex program */
- assert(0); /* XXX todo */
- }
-
- /*
- * Merge parameters (uniforms, constants, etc)
- */
- newProg->Parameters = _mesa_combine_parameter_lists(progA->Parameters,
- progB->Parameters);
-
- adjust_param_indexes(newInst + lenA, lenB, numParamsA);
-
-
- return newProg;
-}
-
-
-/**
* Populate the 'used' array with flags indicating which registers (TEMPs,
* INPUTs, OUTPUTs, etc, are used by the given program.
* \param file type of register to scan for
@@ -952,140 +508,6 @@ _mesa_find_free_register(const GLboolean used[],
}
-
-/**
- * Check if the given register index is valid (doesn't exceed implementation-
- * dependent limits).
- * \return GL_TRUE if OK, GL_FALSE if bad index
- */
-GLboolean
-_mesa_valid_register_index(const struct gl_context *ctx,
- gl_shader_stage shaderType,
- gl_register_file file, GLint index)
-{
- const struct gl_program_constants *c;
-
- assert(0 <= shaderType && shaderType < MESA_SHADER_STAGES);
- c = &ctx->Const.Program[shaderType];
-
- switch (file) {
- case PROGRAM_UNDEFINED:
- return GL_TRUE; /* XXX or maybe false? */
-
- case PROGRAM_TEMPORARY:
- return index >= 0 && index < (GLint) c->MaxTemps;
-
- case PROGRAM_UNIFORM:
- case PROGRAM_STATE_VAR:
- /* aka constant buffer */
- return index >= 0 && index < (GLint) c->MaxUniformComponents / 4;
-
- case PROGRAM_CONSTANT:
- /* constant buffer w/ possible relative negative addressing */
- return (index > (int) c->MaxUniformComponents / -4 &&
- index < (int) c->MaxUniformComponents / 4);
-
- case PROGRAM_INPUT:
- if (index < 0)
- return GL_FALSE;
-
- switch (shaderType) {
- case MESA_SHADER_VERTEX:
- return index < VERT_ATTRIB_GENERIC0 + (GLint) c->MaxAttribs;
- case MESA_SHADER_FRAGMENT:
- return index < VARYING_SLOT_VAR0 + (GLint) ctx->Const.MaxVarying;
- case MESA_SHADER_GEOMETRY:
- return index < VARYING_SLOT_VAR0 + (GLint) ctx->Const.MaxVarying;
- default:
- return GL_FALSE;
- }
-
- case PROGRAM_OUTPUT:
- if (index < 0)
- return GL_FALSE;
-
- switch (shaderType) {
- case MESA_SHADER_VERTEX:
- return index < VARYING_SLOT_VAR0 + (GLint) ctx->Const.MaxVarying;
- case MESA_SHADER_FRAGMENT:
- return index < FRAG_RESULT_DATA0 + (GLint) ctx->Const.MaxDrawBuffers;
- case MESA_SHADER_GEOMETRY:
- return index < VARYING_SLOT_VAR0 + (GLint) ctx->Const.MaxVarying;
- default:
- return GL_FALSE;
- }
-
- case PROGRAM_ADDRESS:
- return index >= 0 && index < (GLint) c->MaxAddressRegs;
-
- default:
- _mesa_problem(ctx,
- "unexpected register file in _mesa_valid_register_index()");
- return GL_FALSE;
- }
-}
-
-
-
-/**
- * "Post-process" a GPU program. This is intended to be used for debugging.
- * Example actions include no-op'ing instructions or changing instruction
- * behaviour.
- */
-void
-_mesa_postprocess_program(struct gl_context *ctx, struct gl_program *prog)
-{
- static const GLfloat white[4] = { 0.5, 0.5, 0.5, 0.5 };
- GLuint i;
- GLuint whiteSwizzle;
- GLint whiteIndex = _mesa_add_unnamed_constant(prog->Parameters,
- (gl_constant_value *) white,
- 4, &whiteSwizzle);
-
- (void) whiteIndex;
-
- for (i = 0; i < prog->NumInstructions; i++) {
- struct prog_instruction *inst = prog->Instructions + i;
- const GLuint n = _mesa_num_inst_src_regs(inst->Opcode);
-
- (void) n;
-
- if (_mesa_is_tex_instruction(inst->Opcode)) {
-#if 0
- /* replace TEX/TXP/TXB with MOV */
- inst->Opcode = OPCODE_MOV;
- inst->DstReg.WriteMask = WRITEMASK_XYZW;
- inst->SrcReg[0].Swizzle = SWIZZLE_XYZW;
- inst->SrcReg[0].Negate = NEGATE_NONE;
-#endif
-
-#if 0
- /* disable shadow texture mode */
- inst->TexShadow = 0;
-#endif
- }
-
- if (inst->Opcode == OPCODE_TXP) {
-#if 0
- inst->Opcode = OPCODE_MOV;
- inst->DstReg.WriteMask = WRITEMASK_XYZW;
- inst->SrcReg[0].File = PROGRAM_CONSTANT;
- inst->SrcReg[0].Index = whiteIndex;
- inst->SrcReg[0].Swizzle = SWIZZLE_XYZW;
- inst->SrcReg[0].Negate = NEGATE_NONE;
-#endif
-#if 0
- inst->TexShadow = 0;
-#endif
-#if 0
- inst->Opcode = OPCODE_TEX;
- inst->TexShadow = 0;
-#endif
- }
-
- }
-}
-
/* Gets the minimum number of shader invocations per fragment.
* This function is useful to determine if we need to do per
* sample shading or per fragment shading.
diff --git a/src/mesa/program/program.h b/src/mesa/program/program.h
index a894147cafd..24e05974dc3 100644
--- a/src/mesa/program/program.h
+++ b/src/mesa/program/program.h
@@ -63,40 +63,8 @@ _mesa_update_default_objects_program(struct gl_context *ctx);
extern void
_mesa_set_program_error(struct gl_context *ctx, GLint pos, const char *string);
-extern const GLubyte *
-_mesa_find_line_column(const GLubyte *string, const GLubyte *pos,
- GLint *line, GLint *col);
-
-
-extern struct gl_program *
-_mesa_init_vertex_program(struct gl_context *ctx,
- struct gl_vertex_program *prog,
- GLenum target, GLuint id);
-
-extern struct gl_program *
-_mesa_init_fragment_program(struct gl_context *ctx,
- struct gl_fragment_program *prog,
- GLenum target, GLuint id);
-
extern struct gl_program *
-_mesa_init_tess_ctrl_program(struct gl_context *ctx,
- struct gl_tess_ctrl_program *prog,
- GLenum target, GLuint id);
-
-extern struct gl_program *
-_mesa_init_tess_eval_program(struct gl_context *ctx,
- struct gl_tess_eval_program *prog,
- GLenum target, GLuint id);
-
-extern struct gl_program *
-_mesa_init_geometry_program(struct gl_context *ctx,
- struct gl_geometry_program *prog,
- GLenum target, GLuint id);
-
-extern struct gl_program *
-_mesa_init_compute_program(struct gl_context *ctx,
- struct gl_compute_program *prog,
- GLenum target, GLuint id);
+_mesa_init_gl_program(struct gl_program *prog, GLenum target, GLuint id);
extern struct gl_program *
_mesa_new_program(struct gl_context *ctx, GLenum target, GLuint id);
@@ -176,56 +144,12 @@ _mesa_reference_tesseprog(struct gl_context *ctx,
(struct gl_program *) prog);
}
-extern struct gl_program *
-_mesa_clone_program(struct gl_context *ctx, const struct gl_program *prog);
-
-static inline struct gl_vertex_program *
-_mesa_clone_vertex_program(struct gl_context *ctx,
- const struct gl_vertex_program *prog)
-{
- return (struct gl_vertex_program *) _mesa_clone_program(ctx, &prog->Base);
-}
-
-static inline struct gl_tess_ctrl_program *
-_mesa_clone_tess_ctrl_program(struct gl_context *ctx,
- const struct gl_tess_ctrl_program *prog)
-{
- return (struct gl_tess_ctrl_program *) _mesa_clone_program(ctx, &prog->Base);
-}
-
-static inline struct gl_tess_eval_program *
-_mesa_clone_tess_eval_program(struct gl_context *ctx,
- const struct gl_tess_eval_program *prog)
-{
- return (struct gl_tess_eval_program *) _mesa_clone_program(ctx, &prog->Base);
-}
-
-static inline struct gl_geometry_program *
-_mesa_clone_geometry_program(struct gl_context *ctx,
- const struct gl_geometry_program *prog)
-{
- return (struct gl_geometry_program *) _mesa_clone_program(ctx, &prog->Base);
-}
-
-static inline struct gl_fragment_program *
-_mesa_clone_fragment_program(struct gl_context *ctx,
- const struct gl_fragment_program *prog)
-{
- return (struct gl_fragment_program *) _mesa_clone_program(ctx, &prog->Base);
-}
-
-
extern GLboolean
_mesa_insert_instructions(struct gl_program *prog, GLuint start, GLuint count);
extern GLboolean
_mesa_delete_instructions(struct gl_program *prog, GLuint start, GLuint count);
-extern struct gl_program *
-_mesa_combine_programs(struct gl_context *ctx,
- const struct gl_program *progA,
- const struct gl_program *progB);
-
extern void
_mesa_find_used_registers(const struct gl_program *prog,
gl_register_file file,
@@ -235,15 +159,6 @@ extern GLint
_mesa_find_free_register(const GLboolean used[],
GLuint maxRegs, GLuint firstReg);
-
-extern GLboolean
-_mesa_valid_register_index(const struct gl_context *ctx,
- gl_shader_stage shaderType,
- gl_register_file file, GLint index);
-
-extern void
-_mesa_postprocess_program(struct gl_context *ctx, struct gl_program *prog);
-
extern GLint
_mesa_get_min_invocations_per_fragment(struct gl_context *ctx,
const struct gl_fragment_program *prog,
diff --git a/src/mesa/program/sampler.cpp b/src/mesa/program/sampler.cpp
index 1198a3c45f1..84e2504baba 100644
--- a/src/mesa/program/sampler.cpp
+++ b/src/mesa/program/sampler.cpp
@@ -24,7 +24,7 @@
*/
#include "main/mtypes.h"
-#include "glsl/glsl_types.h"
+#include "glsl/nir/glsl_types.h"
#include "glsl/ir.h"
#include "glsl/ir_uniform.h"
#include "glsl/ir_visitor.h"
diff --git a/src/mesa/state_tracker/st_atom_clip.c b/src/mesa/state_tracker/st_atom_clip.c
index 506a770499f..b820d843385 100644
--- a/src/mesa/state_tracker/st_atom_clip.c
+++ b/src/mesa/state_tracker/st_atom_clip.c
@@ -56,6 +56,9 @@ static void update_clip( struct st_context *st )
use_eye = TRUE;
}
+ /* _ClipUserPlane = _NEW_TRANSFORM | _NEW_PROJECTION
+ * EyeUserPlane = _NEW_TRANSFORM
+ */
memcpy(clip.ucp,
use_eye ? ctx->Transform.EyeUserPlane
: ctx->Transform._ClipUserPlane, sizeof(clip.ucp));
@@ -70,7 +73,7 @@ static void update_clip( struct st_context *st )
const struct st_tracked_state st_update_clip = {
"st_update_clip", /* name */
{ /* dirty */
- _NEW_TRANSFORM, /* mesa */
+ _NEW_TRANSFORM | _NEW_PROJECTION, /* mesa */
ST_NEW_VERTEX_PROGRAM, /* st */
},
update_clip /* update */
diff --git a/src/mesa/state_tracker/st_atom_constbuf.c b/src/mesa/state_tracker/st_atom_constbuf.c
index 6affb4d84d5..acaa85d9356 100644
--- a/src/mesa/state_tracker/st_atom_constbuf.c
+++ b/src/mesa/state_tracker/st_atom_constbuf.c
@@ -238,7 +238,7 @@ static void st_bind_ubos(struct st_context *st,
struct gl_uniform_buffer_binding *binding;
struct st_buffer_object *st_obj;
- binding = &st->ctx->UniformBufferBindings[shader->UniformBlocks[i].Binding];
+ binding = &st->ctx->UniformBufferBindings[shader->UniformBlocks[i]->Binding];
st_obj = st_buffer_object(binding->BufferObject);
cb.buffer = st_obj->buffer;
diff --git a/src/mesa/state_tracker/st_atom_pixeltransfer.c b/src/mesa/state_tracker/st_atom_pixeltransfer.c
index a04163cc137..f94c358afba 100644
--- a/src/mesa/state_tracker/st_atom_pixeltransfer.c
+++ b/src/mesa/state_tracker/st_atom_pixeltransfer.c
@@ -25,65 +25,17 @@
*
**************************************************************************/
-/*
- * Generate fragment programs to implement pixel transfer ops, such as
- * scale/bias, colortable, convolution...
- *
- * Authors:
+/* Authors:
* Brian Paul
*/
-#include "main/imports.h"
-#include "main/image.h"
-#include "main/macros.h"
-#include "program/program.h"
-#include "program/prog_cache.h"
-#include "program/prog_instruction.h"
-#include "program/prog_parameter.h"
-#include "program/prog_print.h"
-
#include "st_context.h"
-#include "st_format.h"
#include "st_texture.h"
-#include "pipe/p_screen.h"
-#include "pipe/p_context.h"
#include "util/u_inlines.h"
#include "util/u_pack_color.h"
-struct state_key
-{
- GLuint scaleAndBias:1;
- GLuint pixelMaps:1;
-
-#if 0
- GLfloat Maps[3][256][4];
- int NumMaps;
- GLint NumStages;
- pipeline_stage Stages[STAGE_MAX];
- GLboolean StagesUsed[STAGE_MAX];
- GLfloat Scale1[4], Bias1[4];
- GLfloat Scale2[4], Bias2[4];
-#endif
-};
-
-static void
-make_state_key(struct gl_context *ctx, struct state_key *key)
-{
- memset(key, 0, sizeof(*key));
-
- if (ctx->Pixel.RedBias != 0.0 || ctx->Pixel.RedScale != 1.0 ||
- ctx->Pixel.GreenBias != 0.0 || ctx->Pixel.GreenScale != 1.0 ||
- ctx->Pixel.BlueBias != 0.0 || ctx->Pixel.BlueScale != 1.0 ||
- ctx->Pixel.AlphaBias != 0.0 || ctx->Pixel.AlphaScale != 1.0) {
- key->scaleAndBias = 1;
- }
-
- key->pixelMaps = ctx->Pixel.MapColorFlag;
-}
-
-
/**
* Update the pixelmap texture with the contents of the R/G/B/A pixel maps.
*/
@@ -128,74 +80,15 @@ load_color_map_texture(struct gl_context *ctx, struct pipe_resource *pt)
pipe_transfer_unmap(pipe, transfer);
}
-
-
-#define MAX_INST 100
-
/**
- * Returns a fragment program which implements the current pixel transfer ops.
+ * Upload the pixel transfer color map texture.
*/
-static struct gl_fragment_program *
-get_pixel_transfer_program(struct gl_context *ctx, const struct state_key *key)
+static void
+update_pixel_transfer(struct st_context *st)
{
- struct st_context *st = st_context(ctx);
- struct prog_instruction inst[MAX_INST];
- struct gl_program_parameter_list *params;
- struct gl_fragment_program *fp;
- GLuint ic = 0;
- const GLuint colorTemp = 0;
-
- fp = (struct gl_fragment_program *)
- ctx->Driver.NewProgram(ctx, GL_FRAGMENT_PROGRAM_ARB, 0);
- if (!fp)
- return NULL;
-
- params = _mesa_new_parameter_list();
-
- /*
- * Get initial pixel color from the texture.
- * TEX colorTemp, fragment.texcoord[0], texture[0], 2D;
- */
- _mesa_init_instructions(inst + ic, 1);
- inst[ic].Opcode = OPCODE_TEX;
- inst[ic].DstReg.File = PROGRAM_TEMPORARY;
- inst[ic].DstReg.Index = colorTemp;
- inst[ic].SrcReg[0].File = PROGRAM_INPUT;
- inst[ic].SrcReg[0].Index = VARYING_SLOT_TEX0;
- inst[ic].TexSrcUnit = 0;
- inst[ic].TexSrcTarget = TEXTURE_2D_INDEX;
- ic++;
- fp->Base.InputsRead = BITFIELD64_BIT(VARYING_SLOT_TEX0);
- fp->Base.OutputsWritten = BITFIELD64_BIT(FRAG_RESULT_COLOR);
- fp->Base.SamplersUsed = 0x1; /* sampler 0 (bit 0) is used */
-
- if (key->scaleAndBias) {
- static const gl_state_index scale_state[STATE_LENGTH] =
- { STATE_INTERNAL, STATE_PT_SCALE, 0, 0, 0 };
- static const gl_state_index bias_state[STATE_LENGTH] =
- { STATE_INTERNAL, STATE_PT_BIAS, 0, 0, 0 };
- GLint scale_p, bias_p;
-
- scale_p = _mesa_add_state_reference(params, scale_state);
- bias_p = _mesa_add_state_reference(params, bias_state);
-
- /* MAD colorTemp, colorTemp, scale, bias; */
- _mesa_init_instructions(inst + ic, 1);
- inst[ic].Opcode = OPCODE_MAD;
- inst[ic].DstReg.File = PROGRAM_TEMPORARY;
- inst[ic].DstReg.Index = colorTemp;
- inst[ic].SrcReg[0].File = PROGRAM_TEMPORARY;
- inst[ic].SrcReg[0].Index = colorTemp;
- inst[ic].SrcReg[1].File = PROGRAM_STATE_VAR;
- inst[ic].SrcReg[1].Index = scale_p;
- inst[ic].SrcReg[2].File = PROGRAM_STATE_VAR;
- inst[ic].SrcReg[2].Index = bias_p;
- ic++;
- }
-
- if (key->pixelMaps) {
- const GLuint temp = 1;
+ struct gl_context *ctx = st->ctx;
+ if (ctx->Pixel.MapColorFlag) {
/* create the colormap/texture now if not already done */
if (!st->pixel_xfer.pixelmap_texture) {
st->pixel_xfer.pixelmap_texture = st_create_color_map_texture(ctx);
@@ -203,117 +96,11 @@ get_pixel_transfer_program(struct gl_context *ctx, const struct state_key *key)
st_create_texture_sampler_view(st->pipe,
st->pixel_xfer.pixelmap_texture);
}
-
- /* with a little effort, we can do four pixel map look-ups with
- * two TEX instructions:
- */
-
- /* TEX temp.rg, colorTemp.rgba, texture[1], 2D; */
- _mesa_init_instructions(inst + ic, 1);
- inst[ic].Opcode = OPCODE_TEX;
- inst[ic].DstReg.File = PROGRAM_TEMPORARY;
- inst[ic].DstReg.Index = temp;
- inst[ic].DstReg.WriteMask = WRITEMASK_XY; /* write R,G */
- inst[ic].SrcReg[0].File = PROGRAM_TEMPORARY;
- inst[ic].SrcReg[0].Index = colorTemp;
- inst[ic].TexSrcUnit = 1;
- inst[ic].TexSrcTarget = TEXTURE_2D_INDEX;
- ic++;
-
- /* TEX temp.ba, colorTemp.baba, texture[1], 2D; */
- _mesa_init_instructions(inst + ic, 1);
- inst[ic].Opcode = OPCODE_TEX;
- inst[ic].DstReg.File = PROGRAM_TEMPORARY;
- inst[ic].DstReg.Index = temp;
- inst[ic].DstReg.WriteMask = WRITEMASK_ZW; /* write B,A */
- inst[ic].SrcReg[0].File = PROGRAM_TEMPORARY;
- inst[ic].SrcReg[0].Index = colorTemp;
- inst[ic].SrcReg[0].Swizzle = MAKE_SWIZZLE4(SWIZZLE_Z, SWIZZLE_W,
- SWIZZLE_Z, SWIZZLE_W);
- inst[ic].TexSrcUnit = 1;
- inst[ic].TexSrcTarget = TEXTURE_2D_INDEX;
- ic++;
-
- /* MOV colorTemp, temp; */
- _mesa_init_instructions(inst + ic, 1);
- inst[ic].Opcode = OPCODE_MOV;
- inst[ic].DstReg.File = PROGRAM_TEMPORARY;
- inst[ic].DstReg.Index = colorTemp;
- inst[ic].SrcReg[0].File = PROGRAM_TEMPORARY;
- inst[ic].SrcReg[0].Index = temp;
- ic++;
-
- fp->Base.SamplersUsed |= (1 << 1); /* sampler 1 is used */
- }
-
- /* Modify last instruction's dst reg to write to result.color */
- {
- struct prog_instruction *last = &inst[ic - 1];
- last->DstReg.File = PROGRAM_OUTPUT;
- last->DstReg.Index = FRAG_RESULT_COLOR;
- }
-
- /* END; */
- _mesa_init_instructions(inst + ic, 1);
- inst[ic].Opcode = OPCODE_END;
- ic++;
-
- assert(ic <= MAX_INST);
-
-
- fp->Base.Instructions = _mesa_alloc_instructions(ic);
- if (!fp->Base.Instructions) {
- _mesa_error(ctx, GL_OUT_OF_MEMORY,
- "generating pixel transfer program");
- _mesa_free_parameter_list(params);
- return NULL;
- }
-
- _mesa_copy_instructions(fp->Base.Instructions, inst, ic);
- fp->Base.NumInstructions = ic;
- fp->Base.Parameters = params;
-
-#if 0
- printf("========= pixel transfer prog\n");
- _mesa_print_program(&fp->Base);
- _mesa_print_parameter_list(fp->Base.Parameters);
-#endif
-
- return fp;
-}
-
-
-
-/**
- * Update st->pixel_xfer.program in response to new pixel-transfer state.
- */
-static void
-update_pixel_transfer(struct st_context *st)
-{
- struct gl_context *ctx = st->ctx;
- struct state_key key;
- struct gl_fragment_program *fp;
-
- make_state_key(st->ctx, &key);
-
- fp = (struct gl_fragment_program *)
- _mesa_search_program_cache(st->pixel_xfer.cache, &key, sizeof(key));
- if (!fp) {
- fp = get_pixel_transfer_program(st->ctx, &key);
- _mesa_program_cache_insert(st->ctx, st->pixel_xfer.cache,
- &key, sizeof(key), &fp->Base);
- }
-
- if (ctx->Pixel.MapColorFlag) {
load_color_map_texture(ctx, st->pixel_xfer.pixelmap_texture);
}
- st->pixel_xfer.pixelmap_enabled = ctx->Pixel.MapColorFlag;
-
- st->pixel_xfer.program = (struct st_fragment_program *) fp;
}
-
const struct st_tracked_state st_update_pixel_transfer = {
"st_update_pixel_transfer", /* name */
{ /* dirty */
diff --git a/src/mesa/state_tracker/st_cb_bitmap.c b/src/mesa/state_tracker/st_cb_bitmap.c
index 230eba8c4a5..bb6dfe85644 100644
--- a/src/mesa/state_tracker/st_cb_bitmap.c
+++ b/src/mesa/state_tracker/st_cb_bitmap.c
@@ -108,151 +108,6 @@ struct bitmap_cache
/**
- * Make fragment program for glBitmap:
- * Sample the texture and kill the fragment if the bit is 0.
- * This program will be combined with the user's fragment program.
- */
-static struct st_fragment_program *
-make_bitmap_fragment_program(struct gl_context *ctx, GLuint samplerIndex)
-{
- struct st_context *st = st_context(ctx);
- struct st_fragment_program *stfp;
- struct gl_program *p;
- GLuint ic = 0;
-
- p = ctx->Driver.NewProgram(ctx, GL_FRAGMENT_PROGRAM_ARB, 0);
- if (!p)
- return NULL;
-
- p->NumInstructions = 3;
-
- p->Instructions = _mesa_alloc_instructions(p->NumInstructions);
- if (!p->Instructions) {
- ctx->Driver.DeleteProgram(ctx, p);
- return NULL;
- }
- _mesa_init_instructions(p->Instructions, p->NumInstructions);
-
- /* TEX tmp0, fragment.texcoord[0], texture[0], 2D; */
- p->Instructions[ic].Opcode = OPCODE_TEX;
- p->Instructions[ic].DstReg.File = PROGRAM_TEMPORARY;
- p->Instructions[ic].DstReg.Index = 0;
- p->Instructions[ic].SrcReg[0].File = PROGRAM_INPUT;
- p->Instructions[ic].SrcReg[0].Index = VARYING_SLOT_TEX0;
- p->Instructions[ic].TexSrcUnit = samplerIndex;
- p->Instructions[ic].TexSrcTarget = TEXTURE_2D_INDEX;
- ic++;
-
- /* KIL if -tmp0 < 0 # texel=0 -> keep / texel=0 -> discard */
- p->Instructions[ic].Opcode = OPCODE_KIL;
- p->Instructions[ic].SrcReg[0].File = PROGRAM_TEMPORARY;
-
- if (st->bitmap.tex_format == PIPE_FORMAT_L8_UNORM)
- p->Instructions[ic].SrcReg[0].Swizzle = SWIZZLE_XXXX;
-
- p->Instructions[ic].SrcReg[0].Index = 0;
- p->Instructions[ic].SrcReg[0].Negate = NEGATE_XYZW;
- ic++;
-
- /* END; */
- p->Instructions[ic++].Opcode = OPCODE_END;
-
- assert(ic == p->NumInstructions);
-
- p->InputsRead = VARYING_BIT_TEX0;
- p->OutputsWritten = 0x0;
- p->SamplersUsed = (1 << samplerIndex);
-
- stfp = (struct st_fragment_program *) p;
- stfp->Base.UsesKill = GL_TRUE;
-
- return stfp;
-}
-
-
-static struct gl_program *
-make_bitmap_fragment_program_glsl(struct st_context *st,
- struct st_fragment_program *orig,
- GLuint samplerIndex)
-{
- struct gl_context *ctx = st->ctx;
- struct st_fragment_program *fp = (struct st_fragment_program *)
- ctx->Driver.NewProgram(ctx, GL_FRAGMENT_PROGRAM_ARB, 0);
-
- if (!fp)
- return NULL;
-
- get_bitmap_visitor(fp, orig->glsl_to_tgsi, samplerIndex);
- return &fp->Base.Base;
-}
-
-
-static int
-find_free_bit(uint bitfield)
-{
- int i;
- for (i = 0; i < 32; i++) {
- if ((bitfield & (1 << i)) == 0) {
- return i;
- }
- }
- return -1;
-}
-
-
-/**
- * Combine basic bitmap fragment program with the user-defined program.
- * \param st current context
- * \param fpIn the incoming fragment program
- * \param fpOut the new fragment program which does fragment culling
- * \param bitmap_sampler sampler number for the bitmap texture
- */
-void
-st_make_bitmap_fragment_program(struct st_context *st,
- struct gl_fragment_program *fpIn,
- struct gl_fragment_program **fpOut,
- GLuint *bitmap_sampler)
-{
- struct st_fragment_program *bitmap_prog;
- struct st_fragment_program *stfpIn = (struct st_fragment_program *) fpIn;
- struct gl_program *newProg;
- uint sampler;
-
- /*
- * Generate new program which is the user-defined program prefixed
- * with the bitmap sampler/kill instructions.
- */
- sampler = find_free_bit(fpIn->Base.SamplersUsed);
-
- if (stfpIn->glsl_to_tgsi)
- newProg = make_bitmap_fragment_program_glsl(st, stfpIn, sampler);
- else {
- bitmap_prog = make_bitmap_fragment_program(st->ctx, sampler);
-
- newProg = _mesa_combine_programs(st->ctx,
- &bitmap_prog->Base.Base,
- &fpIn->Base);
- /* done with this after combining */
- st_reference_fragprog(st, &bitmap_prog, NULL);
- }
-
-#if 0
- {
- printf("Combined bitmap program:\n");
- _mesa_print_program(newProg);
- printf("InputsRead: 0x%x\n", newProg->InputsRead);
- printf("OutputsWritten: 0x%x\n", newProg->OutputsWritten);
- _mesa_print_parameter_list(newProg->Parameters);
- }
-#endif
-
- /* return results */
- *fpOut = (struct gl_fragment_program *) newProg;
- *bitmap_sampler = sampler;
-}
-
-
-/**
* Copy user-provide bitmap bits into texture buffer, expanding
* bits into texels.
* "On" bits will set texels to 0x0.
diff --git a/src/mesa/state_tracker/st_cb_bitmap.h b/src/mesa/state_tracker/st_cb_bitmap.h
index b4254ca1eeb..dc7e5cb5c9e 100644
--- a/src/mesa/state_tracker/st_cb_bitmap.h
+++ b/src/mesa/state_tracker/st_cb_bitmap.h
@@ -31,6 +31,7 @@
#include "main/compiler.h"
+#include <stdbool.h>
struct dd_function_table;
struct st_context;
@@ -47,13 +48,11 @@ extern void
st_destroy_bitmap(struct st_context *st);
extern void
-st_make_bitmap_fragment_program(struct st_context *st,
- struct gl_fragment_program *fpIn,
- struct gl_fragment_program **fpOut,
- GLuint *bitmap_sampler);
-
-extern void
st_flush_bitmap_cache(struct st_context *st);
+extern const struct tgsi_token *
+st_get_bitmap_shader(const struct tgsi_token *tokens,
+ unsigned sampler_index,
+ bool use_texcoord, bool swizzle_xxxx);
#endif /* ST_CB_BITMAP_H */
diff --git a/src/mesa/state_tracker/st_cb_bitmap_shader.c b/src/mesa/state_tracker/st_cb_bitmap_shader.c
new file mode 100644
index 00000000000..cddea36d4f6
--- /dev/null
+++ b/src/mesa/state_tracker/st_cb_bitmap_shader.c
@@ -0,0 +1,174 @@
+/**************************************************************************
+ *
+ * Copyright (C) 2015 Advanced Micro Devices, Inc.
+ * Copyright 2007 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include "st_cb_bitmap.h"
+#include "tgsi/tgsi_transform.h"
+#include "tgsi/tgsi_scan.h"
+#include "tgsi/tgsi_dump.h"
+#include "util/u_debug.h"
+
+struct tgsi_bitmap_transform {
+ struct tgsi_transform_context base;
+ struct tgsi_shader_info info;
+ unsigned sampler_index;
+ bool use_texcoord;
+ bool swizzle_xxxx;
+ bool first_instruction_emitted;
+};
+
+static inline struct tgsi_bitmap_transform *
+tgsi_bitmap_transform(struct tgsi_transform_context *tctx)
+{
+ return (struct tgsi_bitmap_transform *)tctx;
+}
+
+static void
+transform_instr(struct tgsi_transform_context *tctx,
+ struct tgsi_full_instruction *current_inst)
+{
+ struct tgsi_bitmap_transform *ctx = tgsi_bitmap_transform(tctx);
+ struct tgsi_full_declaration decl;
+ struct tgsi_full_instruction inst;
+ unsigned i, semantic;
+ int texcoord_index = -1;
+
+ if (ctx->first_instruction_emitted) {
+ tctx->emit_instruction(tctx, current_inst);
+ return;
+ }
+
+ ctx->first_instruction_emitted = true;
+
+ /* Add TEMP[0] if it's missing. */
+ if (ctx->info.file_max[TGSI_FILE_TEMPORARY] == -1) {
+ decl = tgsi_default_full_declaration();
+ decl.Declaration.File = TGSI_FILE_TEMPORARY;
+ tctx->emit_declaration(tctx, &decl);
+ }
+
+ /* Add TEXCOORD[0] if it's missing. */
+ semantic = ctx->use_texcoord ? TGSI_SEMANTIC_TEXCOORD :
+ TGSI_SEMANTIC_GENERIC;
+ for (i = 0; i < ctx->info.num_inputs; i++) {
+ if (ctx->info.input_semantic_name[i] == semantic &&
+ ctx->info.input_semantic_index[i] == 0) {
+ texcoord_index = i;
+ break;
+ }
+ }
+
+ if (texcoord_index == -1) {
+ decl = tgsi_default_full_declaration();
+ decl.Declaration.File = TGSI_FILE_INPUT;
+ decl.Declaration.Semantic = 1;
+ decl.Semantic.Name = semantic;
+ decl.Declaration.Interpolate = 1;
+ decl.Interp.Interpolate = TGSI_INTERPOLATE_PERSPECTIVE;
+ decl.Range.First = decl.Range.Last = ctx->info.num_inputs;
+ texcoord_index = ctx->info.num_inputs;
+ tctx->emit_declaration(tctx, &decl);
+ }
+
+ /* Declare the sampler. */
+ decl = tgsi_default_full_declaration();
+ decl.Declaration.File = TGSI_FILE_SAMPLER;
+ decl.Range.First = decl.Range.Last = ctx->sampler_index;
+ tctx->emit_declaration(tctx, &decl);
+
+ /* TEX tmp0, fragment.texcoord[0], texture[0], 2D; */
+ inst = tgsi_default_full_instruction();
+ inst.Instruction.Opcode = TGSI_OPCODE_TEX;
+ inst.Instruction.Texture = 1;
+ inst.Texture.Texture = TGSI_TEXTURE_2D;
+
+ inst.Instruction.NumDstRegs = 1;
+ inst.Dst[0].Register.File = TGSI_FILE_TEMPORARY;
+ inst.Dst[0].Register.Index = 0;
+ inst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_XYZW;
+
+ inst.Instruction.NumSrcRegs = 2;
+ inst.Src[0].Register.File = TGSI_FILE_INPUT;
+ inst.Src[0].Register.Index = texcoord_index;
+ inst.Src[0].Register.SwizzleX = TGSI_SWIZZLE_X;
+ inst.Src[0].Register.SwizzleY = TGSI_SWIZZLE_Y;
+ inst.Src[0].Register.SwizzleZ = TGSI_SWIZZLE_Z;
+ inst.Src[0].Register.SwizzleW = TGSI_SWIZZLE_W;
+ inst.Src[1].Register.File = TGSI_FILE_SAMPLER;
+ inst.Src[1].Register.Index = ctx->sampler_index;
+
+ tctx->emit_instruction(tctx, &inst);
+
+ /* KIL if -tmp0 < 0 # texel=0 -> keep / texel=0 -> discard */
+ inst = tgsi_default_full_instruction();
+ inst.Instruction.Opcode = TGSI_OPCODE_KILL_IF;
+ inst.Instruction.NumDstRegs = 0;
+ inst.Instruction.NumSrcRegs = 1;
+
+ inst.Src[0].Register.File = TGSI_FILE_TEMPORARY;
+ inst.Src[0].Register.Index = 0;
+ inst.Src[0].Register.Negate = 1;
+ inst.Src[0].Register.SwizzleX = TGSI_SWIZZLE_X;
+ if (ctx->swizzle_xxxx) {
+ inst.Src[0].Register.SwizzleY = TGSI_SWIZZLE_X;
+ inst.Src[0].Register.SwizzleZ = TGSI_SWIZZLE_X;
+ inst.Src[0].Register.SwizzleW = TGSI_SWIZZLE_X;
+ } else {
+ inst.Src[0].Register.SwizzleY = TGSI_SWIZZLE_Y;
+ inst.Src[0].Register.SwizzleZ = TGSI_SWIZZLE_Z;
+ inst.Src[0].Register.SwizzleW = TGSI_SWIZZLE_W;
+ }
+ tctx->emit_instruction(tctx, &inst);
+
+ /* And emit the instruction we got. */
+ tctx->emit_instruction(tctx, current_inst);
+}
+
+const struct tgsi_token *
+st_get_bitmap_shader(const struct tgsi_token *tokens,
+ unsigned sampler_index,
+ bool use_texcoord, bool swizzle_xxxx)
+{
+ struct tgsi_bitmap_transform ctx;
+ struct tgsi_token *newtoks;
+ int newlen;
+
+ memset(&ctx, 0, sizeof(ctx));
+ ctx.base.transform_instruction = transform_instr;
+ ctx.sampler_index = sampler_index;
+ ctx.use_texcoord = use_texcoord;
+ ctx.swizzle_xxxx = swizzle_xxxx;
+ tgsi_scan_shader(tokens, &ctx.info);
+
+ newlen = tgsi_num_tokens(tokens) + 20;
+ newtoks = tgsi_alloc_tokens(newlen);
+ if (!newtoks)
+ return NULL;
+
+ tgsi_transform_shader(tokens, newtoks, newlen, &ctx.base);
+ return newtoks;
+}
diff --git a/src/mesa/state_tracker/st_cb_drawpixels.c b/src/mesa/state_tracker/st_cb_drawpixels.c
index 152160e1dd2..7e8633edc1a 100644
--- a/src/mesa/state_tracker/st_cb_drawpixels.c
+++ b/src/mesa/state_tracker/st_cb_drawpixels.c
@@ -72,217 +72,74 @@
/**
- * Check if the given program is:
- * 0: MOVE result.color, fragment.color;
- * 1: END;
- */
-static GLboolean
-is_passthrough_program(const struct gl_fragment_program *prog)
-{
- if (prog->Base.NumInstructions == 2) {
- const struct prog_instruction *inst = prog->Base.Instructions;
- if (inst[0].Opcode == OPCODE_MOV &&
- inst[1].Opcode == OPCODE_END &&
- inst[0].DstReg.File == PROGRAM_OUTPUT &&
- inst[0].DstReg.Index == FRAG_RESULT_COLOR &&
- inst[0].DstReg.WriteMask == WRITEMASK_XYZW &&
- inst[0].SrcReg[0].File == PROGRAM_INPUT &&
- inst[0].SrcReg[0].Index == VARYING_SLOT_COL0 &&
- inst[0].SrcReg[0].Swizzle == SWIZZLE_XYZW) {
- return GL_TRUE;
- }
- }
- return GL_FALSE;
-}
-
-
-/**
- * Returns a fragment program which implements the current pixel transfer ops.
- */
-static struct gl_fragment_program *
-get_glsl_pixel_transfer_program(struct st_context *st,
- struct st_fragment_program *orig)
-{
- int pixelMaps = 0, scaleAndBias = 0;
- struct gl_context *ctx = st->ctx;
- struct st_fragment_program *fp = (struct st_fragment_program *)
- ctx->Driver.NewProgram(ctx, GL_FRAGMENT_PROGRAM_ARB, 0);
-
- if (!fp)
- return NULL;
-
- if (ctx->Pixel.RedBias != 0.0 || ctx->Pixel.RedScale != 1.0 ||
- ctx->Pixel.GreenBias != 0.0 || ctx->Pixel.GreenScale != 1.0 ||
- ctx->Pixel.BlueBias != 0.0 || ctx->Pixel.BlueScale != 1.0 ||
- ctx->Pixel.AlphaBias != 0.0 || ctx->Pixel.AlphaScale != 1.0) {
- scaleAndBias = 1;
- }
-
- pixelMaps = ctx->Pixel.MapColorFlag;
-
- if (pixelMaps) {
- /* create the colormap/texture now if not already done */
- if (!st->pixel_xfer.pixelmap_texture) {
- st->pixel_xfer.pixelmap_texture = st_create_color_map_texture(ctx);
- st->pixel_xfer.pixelmap_sampler_view =
- st_create_texture_sampler_view(st->pipe,
- st->pixel_xfer.pixelmap_texture);
- }
- }
-
- get_pixel_transfer_visitor(fp, orig->glsl_to_tgsi,
- scaleAndBias, pixelMaps);
-
- return &fp->Base;
-}
-
-
-/**
- * Make fragment shader for glDraw/CopyPixels. This shader is made
- * by combining the pixel transfer shader with the user-defined shader.
- * \param fpIn the current/incoming fragment program
- * \param fpOut returns the combined fragment program
- */
-void
-st_make_drawpix_fragment_program(struct st_context *st,
- struct gl_fragment_program *fpIn,
- struct gl_fragment_program **fpOut)
-{
- struct gl_program *newProg;
- struct st_fragment_program *stfp = (struct st_fragment_program *) fpIn;
-
- if (is_passthrough_program(fpIn)) {
- newProg = (struct gl_program *) _mesa_clone_fragment_program(st->ctx,
- &st->pixel_xfer.program->Base);
- }
- else if (stfp->glsl_to_tgsi != NULL) {
- newProg = (struct gl_program *) get_glsl_pixel_transfer_program(st, stfp);
- }
- else {
-#if 0
- /* debug */
- printf("Base program:\n");
- _mesa_print_program(&fpIn->Base);
- printf("DrawPix program:\n");
- _mesa_print_program(&st->pixel_xfer.program->Base.Base);
-#endif
- newProg = _mesa_combine_programs(st->ctx,
- &st->pixel_xfer.program->Base.Base,
- &fpIn->Base);
- }
-
-#if 0
- /* debug */
- printf("Combined DrawPixels program:\n");
- _mesa_print_program(newProg);
- printf("InputsRead: 0x%x\n", newProg->InputsRead);
- printf("OutputsWritten: 0x%x\n", newProg->OutputsWritten);
- _mesa_print_parameter_list(newProg->Parameters);
-#endif
-
- *fpOut = (struct gl_fragment_program *) newProg;
-}
-
-
-/**
* Create fragment program that does a TEX() instruction to get a Z and/or
* stencil value value, then writes to FRAG_RESULT_DEPTH/FRAG_RESULT_STENCIL.
* Used for glDrawPixels(GL_DEPTH_COMPONENT / GL_STENCIL_INDEX).
* Pass fragment color through as-is.
- * \return pointer to the gl_fragment program
+ *
+ * \return CSO of the fragment shader.
*/
-struct gl_fragment_program *
-st_make_drawpix_z_stencil_program(struct st_context *st,
- GLboolean write_depth,
- GLboolean write_stencil)
+static void *
+get_drawpix_z_stencil_program(struct st_context *st,
+ GLboolean write_depth,
+ GLboolean write_stencil)
{
- struct gl_context *ctx = st->ctx;
- struct gl_program *p;
- struct gl_fragment_program *fp;
- GLuint ic = 0;
+ struct ureg_program *ureg;
+ struct ureg_src depth_sampler, stencil_sampler;
+ struct ureg_src texcoord, color;
+ struct ureg_dst out_color, out_depth, out_stencil;
const GLuint shaderIndex = write_depth * 2 + write_stencil;
+ void *cso;
- assert(shaderIndex < ARRAY_SIZE(st->drawpix.shaders));
+ assert(shaderIndex < ARRAY_SIZE(st->drawpix.zs_shaders));
- if (st->drawpix.shaders[shaderIndex]) {
+ if (st->drawpix.zs_shaders[shaderIndex]) {
/* already have the proper shader */
- return st->drawpix.shaders[shaderIndex];
+ return st->drawpix.zs_shaders[shaderIndex];
}
- /*
- * Create shader now
- */
- p = ctx->Driver.NewProgram(ctx, GL_FRAGMENT_PROGRAM_ARB, 0);
- if (!p)
+ ureg = ureg_create(TGSI_PROCESSOR_FRAGMENT);
+ if (ureg == NULL)
return NULL;
- p->NumInstructions = write_depth ? 3 : 1;
- p->NumInstructions += write_stencil ? 1 : 0;
-
- p->Instructions = _mesa_alloc_instructions(p->NumInstructions);
- if (!p->Instructions) {
- ctx->Driver.DeleteProgram(ctx, p);
- return NULL;
- }
- _mesa_init_instructions(p->Instructions, p->NumInstructions);
+ ureg_property(ureg, TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS, TRUE);
if (write_depth) {
- /* TEX result.depth, fragment.texcoord[0], texture[0], 2D; */
- p->Instructions[ic].Opcode = OPCODE_TEX;
- p->Instructions[ic].DstReg.File = PROGRAM_OUTPUT;
- p->Instructions[ic].DstReg.Index = FRAG_RESULT_DEPTH;
- p->Instructions[ic].DstReg.WriteMask = WRITEMASK_Z;
- p->Instructions[ic].SrcReg[0].File = PROGRAM_INPUT;
- p->Instructions[ic].SrcReg[0].Index = VARYING_SLOT_TEX0;
- p->Instructions[ic].TexSrcUnit = 0;
- p->Instructions[ic].TexSrcTarget = TEXTURE_2D_INDEX;
- ic++;
- /* MOV result.color, fragment.color; */
- p->Instructions[ic].Opcode = OPCODE_MOV;
- p->Instructions[ic].DstReg.File = PROGRAM_OUTPUT;
- p->Instructions[ic].DstReg.Index = FRAG_RESULT_COLOR;
- p->Instructions[ic].SrcReg[0].File = PROGRAM_INPUT;
- p->Instructions[ic].SrcReg[0].Index = VARYING_SLOT_COL0;
- ic++;
+ color = ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_COLOR, 0,
+ TGSI_INTERPOLATE_COLOR);
+ out_color = ureg_DECL_output(ureg, TGSI_SEMANTIC_COLOR, 0);
+
+ depth_sampler = ureg_DECL_sampler(ureg, 0);
+ out_depth = ureg_DECL_output(ureg, TGSI_SEMANTIC_POSITION, 0);
}
if (write_stencil) {
- /* TEX result.stencil, fragment.texcoord[0], texture[0], 2D; */
- p->Instructions[ic].Opcode = OPCODE_TEX;
- p->Instructions[ic].DstReg.File = PROGRAM_OUTPUT;
- p->Instructions[ic].DstReg.Index = FRAG_RESULT_STENCIL;
- p->Instructions[ic].DstReg.WriteMask = WRITEMASK_Y;
- p->Instructions[ic].SrcReg[0].File = PROGRAM_INPUT;
- p->Instructions[ic].SrcReg[0].Index = VARYING_SLOT_TEX0;
- p->Instructions[ic].TexSrcUnit = 1;
- p->Instructions[ic].TexSrcTarget = TEXTURE_2D_INDEX;
- ic++;
+ stencil_sampler = ureg_DECL_sampler(ureg, 1);
+ out_stencil = ureg_DECL_output(ureg, TGSI_SEMANTIC_STENCIL, 0);
}
- /* END; */
- p->Instructions[ic++].Opcode = OPCODE_END;
-
- assert(ic == p->NumInstructions);
+ texcoord = ureg_DECL_fs_input(ureg,
+ st->needs_texcoord_semantic ?
+ TGSI_SEMANTIC_TEXCOORD :
+ TGSI_SEMANTIC_GENERIC,
+ 0, TGSI_INTERPOLATE_LINEAR);
- p->InputsRead = VARYING_BIT_TEX0 | VARYING_BIT_COL0;
- p->OutputsWritten = 0;
if (write_depth) {
- p->OutputsWritten |= BITFIELD64_BIT(FRAG_RESULT_DEPTH);
- p->OutputsWritten |= BITFIELD64_BIT(FRAG_RESULT_COLOR);
+ ureg_TEX(ureg, ureg_writemask(out_depth, TGSI_WRITEMASK_Z),
+ TGSI_TEXTURE_2D, texcoord, depth_sampler);
+ ureg_MOV(ureg, out_color, color);
}
- if (write_stencil)
- p->OutputsWritten |= BITFIELD64_BIT(FRAG_RESULT_STENCIL);
- p->SamplersUsed = 0x1; /* sampler 0 (bit 0) is used */
if (write_stencil)
- p->SamplersUsed |= 1 << 1;
+ ureg_TEX(ureg, ureg_writemask(out_stencil, TGSI_WRITEMASK_Y),
+ TGSI_TEXTURE_2D, texcoord, stencil_sampler);
- fp = (struct gl_fragment_program *) p;
+ ureg_END(ureg);
+ cso = ureg_create_shader_and_destroy(ureg, st->pipe);
/* save the new shader */
- st->drawpix.shaders[shaderIndex] = fp;
-
- return fp;
+ st->drawpix.zs_shaders[shaderIndex] = cso;
+ return cso;
}
@@ -668,6 +525,7 @@ draw_textured_quad(struct gl_context *ctx, GLint x, GLint y, GLfloat z,
int num_sampler_view,
void *driver_vp,
void *driver_fp,
+ struct st_fp_variant *fpv,
const GLfloat *color,
GLboolean invertTex,
GLboolean write_depth, GLboolean write_stencil)
@@ -755,10 +613,9 @@ draw_textured_quad(struct gl_context *ctx, GLint x, GLint y, GLfloat z,
cso_set_tesseval_shader_handle(cso, NULL);
cso_set_geometry_shader_handle(cso, NULL);
- /* texture sampling state: */
+ /* user samplers, plus the drawpix samplers */
{
struct pipe_sampler_state sampler;
- const struct pipe_sampler_state *states[2] = {&sampler, &sampler};
memset(&sampler, 0, sizeof(sampler));
sampler.wrap_s = PIPE_TEX_WRAP_CLAMP;
@@ -769,8 +626,25 @@ draw_textured_quad(struct gl_context *ctx, GLint x, GLint y, GLfloat z,
sampler.mag_img_filter = PIPE_TEX_FILTER_NEAREST;
sampler.normalized_coords = normalized;
- cso_set_samplers(cso, PIPE_SHADER_FRAGMENT,
- num_sampler_view > 1 ? 2 : 1, states);
+ if (fpv) {
+ const struct pipe_sampler_state *samplers[PIPE_MAX_SAMPLERS];
+ uint num = MAX2(MAX2(fpv->drawpix_sampler, fpv->pixelmap_sampler) + 1,
+ st->state.num_samplers[PIPE_SHADER_FRAGMENT]);
+ uint i;
+
+ for (i = 0; i < st->state.num_samplers[PIPE_SHADER_FRAGMENT]; i++)
+ samplers[i] = &st->state.samplers[PIPE_SHADER_FRAGMENT][i];
+
+ samplers[fpv->drawpix_sampler] = &sampler;
+ if (sv[1])
+ samplers[fpv->pixelmap_sampler] = &sampler;
+
+ cso_set_samplers(cso, PIPE_SHADER_FRAGMENT, num, samplers);
+ } else {
+ const struct pipe_sampler_state *samplers[2] = {&sampler, &sampler};
+
+ cso_set_samplers(cso, PIPE_SHADER_FRAGMENT, num_sampler_view, samplers);
+ }
}
/* viewport state: viewport matching window dims */
@@ -790,8 +664,21 @@ draw_textured_quad(struct gl_context *ctx, GLint x, GLint y, GLfloat z,
cso_set_vertex_elements(cso, 3, st->velems_util_draw);
cso_set_stream_outputs(st->cso_context, 0, NULL, NULL);
- /* texture state: */
- cso_set_sampler_views(cso, PIPE_SHADER_FRAGMENT, num_sampler_view, sv);
+ /* user textures, plus the drawpix textures */
+ if (fpv) {
+ struct pipe_sampler_view *sampler_views[PIPE_MAX_SAMPLERS];
+ uint num = MAX2(MAX2(fpv->drawpix_sampler, fpv->pixelmap_sampler) + 1,
+ st->state.num_sampler_views[PIPE_SHADER_FRAGMENT]);
+
+ memcpy(sampler_views, st->state.sampler_views[PIPE_SHADER_FRAGMENT],
+ sizeof(sampler_views));
+
+ sampler_views[fpv->drawpix_sampler] = sv[0];
+ if (sv[1])
+ sampler_views[fpv->pixelmap_sampler] = sv[1];
+ cso_set_sampler_views(cso, PIPE_SHADER_FRAGMENT, num, sampler_views);
+ } else
+ cso_set_sampler_views(cso, PIPE_SHADER_FRAGMENT, num_sampler_view, sv);
/* Compute Gallium window coords (y=0=top) with pixel zoom.
* Recall that these coords are transformed by the current
@@ -1048,30 +935,6 @@ get_color_fp_variant(struct st_context *st)
/**
- * Get fragment program variant for a glDrawPixels or glCopyPixels
- * command for depth/stencil data.
- */
-static struct st_fp_variant *
-get_depth_stencil_fp_variant(struct st_context *st, GLboolean write_depth,
- GLboolean write_stencil)
-{
- struct st_fp_variant_key key;
- struct st_fp_variant *fpv;
-
- memset(&key, 0, sizeof(key));
-
- key.st = st;
- key.drawpixels = 1;
- key.drawpixels_z = write_depth;
- key.drawpixels_stencil = write_stencil;
-
- fpv = st_get_fp_variant(st, st->fp, &key);
-
- return fpv;
-}
-
-
-/**
* Clamp glDrawPixels width and height to the maximum texture size.
*/
static void
@@ -1109,8 +972,8 @@ st_DrawPixels(struct gl_context *ctx, GLint x, GLint y,
GLboolean write_stencil = GL_FALSE, write_depth = GL_FALSE;
struct pipe_sampler_view *sv[2] = { NULL };
int num_sampler_view = 1;
- struct st_fp_variant *fpv;
struct gl_pixelstore_attrib clippedUnpack;
+ struct st_fp_variant *fpv = NULL;
/* Mesa state should be up to date by now */
assert(ctx->NewState == 0x0);
@@ -1144,31 +1007,27 @@ st_DrawPixels(struct gl_context *ctx, GLint x, GLint y,
* Get vertex/fragment shaders
*/
if (write_depth || write_stencil) {
- fpv = get_depth_stencil_fp_variant(st, write_depth, write_stencil);
-
- driver_fp = fpv->driver_shader;
-
+ driver_fp = get_drawpix_z_stencil_program(st, write_depth,
+ write_stencil);
driver_vp = make_passthrough_vertex_shader(st, GL_TRUE);
-
color = ctx->Current.RasterColor;
}
else {
fpv = get_color_fp_variant(st);
driver_fp = fpv->driver_shader;
-
driver_vp = make_passthrough_vertex_shader(st, GL_FALSE);
color = NULL;
- if (st->pixel_xfer.pixelmap_enabled) {
+ if (ctx->Pixel.MapColorFlag) {
pipe_sampler_view_reference(&sv[1],
st->pixel_xfer.pixelmap_sampler_view);
num_sampler_view++;
}
- }
- /* update fragment program constants */
- st_upload_constants(st, fpv->parameters, PIPE_SHADER_FRAGMENT);
+ /* update fragment program constants */
+ st_upload_constants(st, fpv->parameters, PIPE_SHADER_FRAGMENT);
+ }
/* draw with textured quad */
{
@@ -1197,7 +1056,7 @@ st_DrawPixels(struct gl_context *ctx, GLint x, GLint y,
sv,
num_sampler_view,
driver_vp,
- driver_fp,
+ driver_fp, fpv,
color, GL_FALSE, write_depth, write_stencil);
pipe_sampler_view_reference(&sv[0], NULL);
if (num_sampler_view > 1)
@@ -1452,6 +1311,7 @@ st_CopyPixels(struct gl_context *ctx, GLint srcx, GLint srcy,
void *driver_vp, *driver_fp;
struct pipe_resource *pt;
struct pipe_sampler_view *sv[2] = { NULL };
+ struct st_fp_variant *fpv = NULL;
int num_sampler_view = 1;
GLfloat *color;
enum pipe_format srcFormat;
@@ -1459,7 +1319,6 @@ st_CopyPixels(struct gl_context *ctx, GLint srcx, GLint srcy,
GLboolean invertTex = GL_FALSE;
GLint readX, readY, readW, readH;
struct gl_pixelstore_attrib pack = ctx->DefaultPacking;
- struct st_fp_variant *fpv;
st_validate_state(st);
@@ -1491,19 +1350,22 @@ st_CopyPixels(struct gl_context *ctx, GLint srcx, GLint srcy,
* Get vertex/fragment shaders
*/
if (type == GL_COLOR) {
+ fpv = get_color_fp_variant(st);
+
rbRead = st_get_color_read_renderbuffer(ctx);
color = NULL;
- fpv = get_color_fp_variant(st);
driver_fp = fpv->driver_shader;
-
driver_vp = make_passthrough_vertex_shader(st, GL_FALSE);
- if (st->pixel_xfer.pixelmap_enabled) {
+ if (ctx->Pixel.MapColorFlag) {
pipe_sampler_view_reference(&sv[1],
st->pixel_xfer.pixelmap_sampler_view);
num_sampler_view++;
}
+
+ /* update fragment program constants */
+ st_upload_constants(st, fpv->parameters, PIPE_SHADER_FRAGMENT);
}
else {
assert(type == GL_DEPTH);
@@ -1511,15 +1373,10 @@ st_CopyPixels(struct gl_context *ctx, GLint srcx, GLint srcy,
Attachment[BUFFER_DEPTH].Renderbuffer);
color = ctx->Current.Attrib[VERT_ATTRIB_COLOR0];
- fpv = get_depth_stencil_fp_variant(st, GL_TRUE, GL_FALSE);
- driver_fp = fpv->driver_shader;
-
+ driver_fp = get_drawpix_z_stencil_program(st, GL_TRUE, GL_FALSE);
driver_vp = make_passthrough_vertex_shader(st, GL_TRUE);
}
- /* update fragment program constants */
- st_upload_constants(st, fpv->parameters, PIPE_SHADER_FRAGMENT);
-
/* Choose the format for the temporary texture. */
srcFormat = rbRead->texture->format;
srcBind = PIPE_BIND_SAMPLER_VIEW |
@@ -1645,7 +1502,7 @@ st_CopyPixels(struct gl_context *ctx, GLint srcx, GLint srcy,
sv,
num_sampler_view,
driver_vp,
- driver_fp,
+ driver_fp, fpv,
color, invertTex, GL_FALSE, GL_FALSE);
pipe_resource_reference(&pt, NULL);
@@ -1666,12 +1523,12 @@ st_destroy_drawpix(struct st_context *st)
{
GLuint i;
- for (i = 0; i < ARRAY_SIZE(st->drawpix.shaders); i++) {
- if (st->drawpix.shaders[i])
- _mesa_reference_fragprog(st->ctx, &st->drawpix.shaders[i], NULL);
+ for (i = 0; i < ARRAY_SIZE(st->drawpix.zs_shaders); i++) {
+ if (st->drawpix.zs_shaders[i])
+ cso_delete_fragment_shader(st->cso_context,
+ st->drawpix.zs_shaders[i]);
}
- st_reference_fragprog(st, &st->pixel_xfer.combined_prog, NULL);
if (st->drawpix.vert_shaders[0])
cso_delete_vertex_shader(st->cso_context, st->drawpix.vert_shaders[0]);
if (st->drawpix.vert_shaders[1])
diff --git a/src/mesa/state_tracker/st_cb_drawpixels.h b/src/mesa/state_tracker/st_cb_drawpixels.h
index c707ace2f9f..f1fb32dd6cf 100644
--- a/src/mesa/state_tracker/st_cb_drawpixels.h
+++ b/src/mesa/state_tracker/st_cb_drawpixels.h
@@ -31,6 +31,7 @@
#include "main/compiler.h"
+#include <stdbool.h>
struct dd_function_table;
struct st_context;
@@ -40,15 +41,11 @@ extern void st_init_drawpixels_functions(struct dd_function_table *functions);
extern void
st_destroy_drawpix(struct st_context *st);
-extern void
-st_make_drawpix_fragment_program(struct st_context *st,
- struct gl_fragment_program *fpIn,
- struct gl_fragment_program **fpOut);
-
-extern struct gl_fragment_program *
-st_make_drawpix_z_stencil_program(struct st_context *st,
- GLboolean write_depth,
- GLboolean write_stencil);
-
+extern const struct tgsi_token *
+st_get_drawpix_shader(const struct tgsi_token *tokens, bool use_texcoord,
+ bool scale_and_bias, unsigned scale_const,
+ unsigned bias_const, bool pixel_maps,
+ unsigned drawpix_sampler, unsigned pixelmap_sampler,
+ unsigned texcoord_const);
#endif /* ST_CB_DRAWPIXELS_H */
diff --git a/src/mesa/state_tracker/st_cb_drawpixels_shader.c b/src/mesa/state_tracker/st_cb_drawpixels_shader.c
new file mode 100644
index 00000000000..749b46cfbf7
--- /dev/null
+++ b/src/mesa/state_tracker/st_cb_drawpixels_shader.c
@@ -0,0 +1,278 @@
+/**************************************************************************
+ *
+ * Copyright (C) 2015 Advanced Micro Devices, Inc.
+ * Copyright 2007 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include "st_cb_drawpixels.h"
+#include "tgsi/tgsi_transform.h"
+#include "tgsi/tgsi_scan.h"
+
+struct tgsi_drawpix_transform {
+ struct tgsi_transform_context base;
+ struct tgsi_shader_info info;
+ bool use_texcoord;
+ bool scale_and_bias;
+ bool pixel_maps;
+ bool first_instruction_emitted;
+ unsigned scale_const;
+ unsigned bias_const;
+ unsigned color_temp;
+ unsigned drawpix_sampler;
+ unsigned pixelmap_sampler;
+ unsigned texcoord_const;
+};
+
+static inline struct tgsi_drawpix_transform *
+tgsi_drawpix_transform(struct tgsi_transform_context *tctx)
+{
+ return (struct tgsi_drawpix_transform *)tctx;
+}
+
+static void
+set_src(struct tgsi_full_instruction *inst, unsigned i, unsigned file, unsigned index,
+ unsigned x, unsigned y, unsigned z, unsigned w)
+{
+ inst->Src[i].Register.File = file;
+ inst->Src[i].Register.Index = index;
+ inst->Src[i].Register.SwizzleX = x;
+ inst->Src[i].Register.SwizzleY = y;
+ inst->Src[i].Register.SwizzleZ = z;
+ inst->Src[i].Register.SwizzleW = w;
+}
+
+#define SET_SRC(inst, i, file, index, x, y, z, w) \
+ set_src(inst, i, file, index, TGSI_SWIZZLE_##x, TGSI_SWIZZLE_##y, \
+ TGSI_SWIZZLE_##z, TGSI_SWIZZLE_##w)
+
+static void
+transform_instr(struct tgsi_transform_context *tctx,
+ struct tgsi_full_instruction *current_inst)
+{
+ struct tgsi_drawpix_transform *ctx = tgsi_drawpix_transform(tctx);
+ struct tgsi_full_declaration decl;
+ struct tgsi_full_instruction inst;
+ unsigned i, sem_texcoord = ctx->use_texcoord ? TGSI_SEMANTIC_TEXCOORD :
+ TGSI_SEMANTIC_GENERIC;
+ int texcoord_index = -1;
+
+ if (ctx->first_instruction_emitted)
+ goto transform_inst;
+
+ ctx->first_instruction_emitted = true;
+
+ /* Add scale and bias constants. */
+ if (ctx->scale_and_bias) {
+ if (ctx->info.const_file_max[0] < (int)ctx->scale_const) {
+ decl = tgsi_default_full_declaration();
+ decl.Declaration.File = TGSI_FILE_CONSTANT;
+ decl.Range.First = decl.Range.Last = ctx->scale_const;
+ tctx->emit_declaration(tctx, &decl);
+ }
+
+ if (ctx->info.const_file_max[0] < (int)ctx->bias_const) {
+ decl = tgsi_default_full_declaration();
+ decl.Declaration.File = TGSI_FILE_CONSTANT;
+ decl.Range.First = decl.Range.Last = ctx->bias_const;
+ tctx->emit_declaration(tctx, &decl);
+ }
+ }
+
+ if (ctx->info.const_file_max[0] < (int)ctx->texcoord_const) {
+ decl = tgsi_default_full_declaration();
+ decl.Declaration.File = TGSI_FILE_CONSTANT;
+ decl.Range.First = decl.Range.Last = ctx->texcoord_const;
+ tctx->emit_declaration(tctx, &decl);
+ }
+
+ /* Add a new temp. */
+ ctx->color_temp = ctx->info.file_max[TGSI_FILE_TEMPORARY] + 1;
+ decl = tgsi_default_full_declaration();
+ decl.Declaration.File = TGSI_FILE_TEMPORARY;
+ decl.Range.First = decl.Range.Last = ctx->color_temp;
+ tctx->emit_declaration(tctx, &decl);
+
+ /* Add TEXCOORD[texcoord_slot] if it's missing. */
+ for (i = 0; i < ctx->info.num_inputs; i++) {
+ if (ctx->info.input_semantic_name[i] == sem_texcoord &&
+ ctx->info.input_semantic_index[i] == 0) {
+ texcoord_index = i;
+ break;
+ }
+ }
+
+ if (texcoord_index == -1) {
+ decl = tgsi_default_full_declaration();
+ decl.Declaration.File = TGSI_FILE_INPUT;
+ decl.Declaration.Semantic = 1;
+ decl.Semantic.Name = sem_texcoord;
+ decl.Declaration.Interpolate = 1;
+ decl.Interp.Interpolate = TGSI_INTERPOLATE_PERSPECTIVE;
+ decl.Range.First = decl.Range.Last = ctx->info.num_inputs;
+ texcoord_index = ctx->info.num_inputs;
+ tctx->emit_declaration(tctx, &decl);
+ }
+
+ /* Declare the drawpix sampler if it's missing. */
+ if (!(ctx->info.samplers_declared & (1 << ctx->drawpix_sampler))) {
+ decl = tgsi_default_full_declaration();
+ decl.Declaration.File = TGSI_FILE_SAMPLER;
+ decl.Range.First = decl.Range.Last = ctx->drawpix_sampler;
+ tctx->emit_declaration(tctx, &decl);
+ }
+
+ /* Declare the pixel map sampler if it's missing. */
+ if (ctx->pixel_maps &&
+ !(ctx->info.samplers_declared & (1 << ctx->pixelmap_sampler))) {
+ decl = tgsi_default_full_declaration();
+ decl.Declaration.File = TGSI_FILE_SAMPLER;
+ decl.Range.First = decl.Range.Last = ctx->pixelmap_sampler;
+ tctx->emit_declaration(tctx, &decl);
+ }
+
+ /* Get initial pixel color from the texture.
+ * TEX temp, fragment.texcoord[0], texture[0], 2D;
+ */
+ inst = tgsi_default_full_instruction();
+ inst.Instruction.Opcode = TGSI_OPCODE_TEX;
+ inst.Instruction.Texture = 1;
+ inst.Texture.Texture = TGSI_TEXTURE_2D;
+
+ inst.Instruction.NumDstRegs = 1;
+ inst.Dst[0].Register.File = TGSI_FILE_TEMPORARY;
+ inst.Dst[0].Register.Index = ctx->color_temp;
+ inst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_XYZW;
+
+ inst.Instruction.NumSrcRegs = 2;
+ SET_SRC(&inst, 0, TGSI_FILE_INPUT, texcoord_index, X, Y, Z, W);
+ inst.Src[1].Register.File = TGSI_FILE_SAMPLER;
+ inst.Src[1].Register.Index = ctx->drawpix_sampler;
+
+ tctx->emit_instruction(tctx, &inst);
+
+ /* Apply the scale and bias. */
+ if (ctx->scale_and_bias) {
+ /* MAD temp, temp, scale, bias; */
+ inst = tgsi_default_full_instruction();
+ inst.Instruction.Opcode = TGSI_OPCODE_MAD;
+
+ inst.Instruction.NumDstRegs = 1;
+ inst.Dst[0].Register.File = TGSI_FILE_TEMPORARY;
+ inst.Dst[0].Register.Index = ctx->color_temp;
+ inst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_XYZW;
+
+ inst.Instruction.NumSrcRegs = 3;
+ SET_SRC(&inst, 0, TGSI_FILE_TEMPORARY, ctx->color_temp, X, Y, Z, W);
+ SET_SRC(&inst, 1, TGSI_FILE_CONSTANT, ctx->scale_const, X, Y, Z, W);
+ SET_SRC(&inst, 2, TGSI_FILE_CONSTANT, ctx->bias_const, X, Y, Z, W);
+
+ tctx->emit_instruction(tctx, &inst);
+ }
+
+ if (ctx->pixel_maps) {
+ /* do four pixel map look-ups with two TEX instructions: */
+
+ /* TEX temp.xy, temp.xyyy, texture[1], 2D; */
+ inst = tgsi_default_full_instruction();
+ inst.Instruction.Opcode = TGSI_OPCODE_TEX;
+ inst.Instruction.Texture = 1;
+ inst.Texture.Texture = TGSI_TEXTURE_2D;
+
+ inst.Instruction.NumDstRegs = 1;
+ inst.Dst[0].Register.File = TGSI_FILE_TEMPORARY;
+ inst.Dst[0].Register.Index = ctx->color_temp;
+ inst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_XY;
+
+ inst.Instruction.NumSrcRegs = 2;
+ SET_SRC(&inst, 0, TGSI_FILE_TEMPORARY, ctx->color_temp, X, Y, Y, Y);
+ inst.Src[1].Register.File = TGSI_FILE_SAMPLER;
+ inst.Src[1].Register.Index = ctx->pixelmap_sampler;
+
+ tctx->emit_instruction(tctx, &inst);
+
+ /* TEX temp.zw, temp.zwww, texture[1], 2D; */
+ inst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_ZW;
+ SET_SRC(&inst, 0, TGSI_FILE_TEMPORARY, ctx->color_temp, Z, W, W, W);
+ tctx->emit_instruction(tctx, &inst);
+ }
+
+ /* Now, "color_temp" should be used in place of IN:COLOR0,
+ * and CONST[texcoord_slot] should be used in place of IN:TEXCOORD0.
+ */
+
+transform_inst:
+
+ for (i = 0; i < current_inst->Instruction.NumSrcRegs; i++) {
+ struct tgsi_full_src_register *src = &current_inst->Src[i];
+ unsigned reg = src->Register.Index;
+
+ if (src->Register.File != TGSI_FILE_INPUT || src->Register.Indirect)
+ continue;
+
+ if (ctx->info.input_semantic_name[reg] == TGSI_SEMANTIC_COLOR &&
+ ctx->info.input_semantic_index[reg] == 0) {
+ src->Register.File = TGSI_FILE_TEMPORARY;
+ src->Register.Index = ctx->color_temp;
+ } else if (ctx->info.input_semantic_name[reg] == sem_texcoord &&
+ ctx->info.input_semantic_index[reg] == 0) {
+ src->Register.File = TGSI_FILE_CONSTANT;
+ src->Register.Index = ctx->texcoord_const;
+ }
+ }
+
+ tctx->emit_instruction(tctx, current_inst);
+}
+
+const struct tgsi_token *
+st_get_drawpix_shader(const struct tgsi_token *tokens, bool use_texcoord,
+ bool scale_and_bias, unsigned scale_const,
+ unsigned bias_const, bool pixel_maps,
+ unsigned drawpix_sampler, unsigned pixelmap_sampler,
+ unsigned texcoord_const)
+{
+ struct tgsi_drawpix_transform ctx;
+ struct tgsi_token *newtoks;
+ int newlen;
+
+ memset(&ctx, 0, sizeof(ctx));
+ ctx.base.transform_instruction = transform_instr;
+ ctx.use_texcoord = use_texcoord;
+ ctx.scale_and_bias = scale_and_bias;
+ ctx.scale_const = scale_const;
+ ctx.bias_const = bias_const;
+ ctx.pixel_maps = pixel_maps;
+ ctx.drawpix_sampler = drawpix_sampler;
+ ctx.pixelmap_sampler = pixelmap_sampler;
+ ctx.texcoord_const = texcoord_const;
+ tgsi_scan_shader(tokens, &ctx.info);
+
+ newlen = tgsi_num_tokens(tokens) + 30;
+ newtoks = tgsi_alloc_tokens(newlen);
+ if (!newtoks)
+ return NULL;
+
+ tgsi_transform_shader(tokens, newtoks, newlen, &ctx.base);
+ return newtoks;
+}
diff --git a/src/mesa/state_tracker/st_cb_fbo.c b/src/mesa/state_tracker/st_cb_fbo.c
index ff703fa41cb..2a2eb0992c8 100644
--- a/src/mesa/state_tracker/st_cb_fbo.c
+++ b/src/mesa/state_tracker/st_cb_fbo.c
@@ -456,7 +456,7 @@ st_update_renderbuffer_surface(struct st_context *st,
surf_tmpl.u.tex.first_layer = first_layer;
surf_tmpl.u.tex.last_layer = last_layer;
- pipe_surface_reference(&strb->surface, NULL);
+ pipe_surface_release(pipe, &strb->surface);
strb->surface = pipe->create_surface(pipe, resource, &surf_tmpl);
}
diff --git a/src/mesa/state_tracker/st_cb_program.c b/src/mesa/state_tracker/st_cb_program.c
index 3029909d12d..708bdf5011e 100644
--- a/src/mesa/state_tracker/st_cb_program.c
+++ b/src/mesa/state_tracker/st_cb_program.c
@@ -105,29 +105,24 @@ st_new_program(struct gl_context *ctx, GLenum target, GLuint id)
switch (target) {
case GL_VERTEX_PROGRAM_ARB: {
struct st_vertex_program *prog = ST_CALLOC_STRUCT(st_vertex_program);
- return _mesa_init_vertex_program(ctx, &prog->Base, target, id);
+ return _mesa_init_gl_program(&prog->Base.Base, target, id);
}
-
case GL_FRAGMENT_PROGRAM_ARB: {
struct st_fragment_program *prog = ST_CALLOC_STRUCT(st_fragment_program);
- return _mesa_init_fragment_program(ctx, &prog->Base, target, id);
+ return _mesa_init_gl_program(&prog->Base.Base, target, id);
}
-
case GL_GEOMETRY_PROGRAM_NV: {
struct st_geometry_program *prog = ST_CALLOC_STRUCT(st_geometry_program);
- return _mesa_init_geometry_program(ctx, &prog->Base, target, id);
+ return _mesa_init_gl_program(&prog->Base.Base, target, id);
}
-
case GL_TESS_CONTROL_PROGRAM_NV: {
struct st_tessctrl_program *prog = ST_CALLOC_STRUCT(st_tessctrl_program);
- return _mesa_init_tess_ctrl_program(ctx, &prog->Base, target, id);
+ return _mesa_init_gl_program(&prog->Base.Base, target, id);
}
-
case GL_TESS_EVALUATION_PROGRAM_NV: {
struct st_tesseval_program *prog = ST_CALLOC_STRUCT(st_tesseval_program);
- return _mesa_init_tess_eval_program(ctx, &prog->Base, target, id);
+ return _mesa_init_gl_program(&prog->Base.Base, target, id);
}
-
default:
assert(0);
return NULL;
@@ -234,6 +229,8 @@ st_program_string_notify( struct gl_context *ctx,
struct st_fragment_program *stfp = (struct st_fragment_program *) prog;
st_release_fp_variants(st, stfp);
+ if (!st_translate_fragment_program(st, stfp))
+ return false;
if (st->fp == stfp)
st->dirty.st |= ST_NEW_FRAGMENT_PROGRAM;
@@ -242,6 +239,8 @@ st_program_string_notify( struct gl_context *ctx,
struct st_geometry_program *stgp = (struct st_geometry_program *) prog;
st_release_gp_variants(st, stgp);
+ if (!st_translate_geometry_program(st, stgp))
+ return false;
if (st->gp == stgp)
st->dirty.st |= ST_NEW_GEOMETRY_PROGRAM;
@@ -249,7 +248,9 @@ st_program_string_notify( struct gl_context *ctx,
else if (target == GL_VERTEX_PROGRAM_ARB) {
struct st_vertex_program *stvp = (struct st_vertex_program *) prog;
- st_release_vp_variants( st, stvp );
+ st_release_vp_variants(st, stvp);
+ if (!st_translate_vertex_program(st, stvp))
+ return false;
if (st->vp == stvp)
st->dirty.st |= ST_NEW_VERTEX_PROGRAM;
@@ -259,6 +260,8 @@ st_program_string_notify( struct gl_context *ctx,
(struct st_tessctrl_program *) prog;
st_release_tcp_variants(st, sttcp);
+ if (!st_translate_tessctrl_program(st, sttcp))
+ return false;
if (st->tcp == sttcp)
st->dirty.st |= ST_NEW_TESSCTRL_PROGRAM;
@@ -268,6 +271,8 @@ st_program_string_notify( struct gl_context *ctx,
(struct st_tesseval_program *) prog;
st_release_tep_variants(st, sttep);
+ if (!st_translate_tesseval_program(st, sttep))
+ return false;
if (st->tep == sttep)
st->dirty.st |= ST_NEW_TESSEVAL_PROGRAM;
diff --git a/src/mesa/state_tracker/st_context.c b/src/mesa/state_tracker/st_context.c
index a9ab5edcf49..bef7307bb27 100644
--- a/src/mesa/state_tracker/st_context.c
+++ b/src/mesa/state_tracker/st_context.c
@@ -224,8 +224,6 @@ st_create_context_priv( struct gl_context *ctx, struct pipe_context *pipe,
st->ctx->VertexProgram._MaintainTnlProgram = GL_TRUE;
- st->pixel_xfer.cache = _mesa_new_program_cache();
-
st->has_stencil_export =
screen->get_param(screen, PIPE_CAP_SHADER_STENCIL_EXPORT);
st->has_shader_model3 = screen->get_param(screen, PIPE_CAP_SM3);
@@ -386,8 +384,8 @@ void st_destroy_context( struct st_context *st )
pipe_surface_reference(&st->state.framebuffer.cbufs[i], NULL);
}
pipe_surface_reference(&st->state.framebuffer.zsbuf, NULL);
-
- _mesa_delete_program_cache(st->ctx, st->pixel_xfer.cache);
+ pipe_sampler_view_reference(&st->pixel_xfer.pixelmap_sampler_view, NULL);
+ pipe_resource_reference(&st->pixel_xfer.pixelmap_texture, NULL);
_vbo_DestroyContext(st->ctx);
diff --git a/src/mesa/state_tracker/st_context.h b/src/mesa/state_tracker/st_context.h
index a4cda29059d..f187d82449b 100644
--- a/src/mesa/state_tracker/st_context.h
+++ b/src/mesa/state_tracker/st_context.h
@@ -162,15 +162,8 @@ struct st_context
struct gl_texture_object *default_texture;
struct {
- struct gl_program_cache *cache;
- struct st_fragment_program *program; /**< cur pixel transfer prog */
- GLuint xfer_prog_sn; /**< pixel xfer program serial no. */
- GLuint user_prog_sn; /**< user fragment program serial no. */
- struct st_fragment_program *combined_prog;
- GLuint combined_prog_sn;
struct pipe_resource *pixelmap_texture;
struct pipe_sampler_view *pixelmap_sampler_view;
- boolean pixelmap_enabled; /**< use the pixelmap texture? */
} pixel_xfer;
/** for glBitmap */
@@ -184,7 +177,7 @@ struct st_context
/** for glDraw/CopyPixels */
struct {
- struct gl_fragment_program *shaders[4];
+ void *zs_shaders[4];
void *vert_shaders[2]; /**< ureg shaders */
} drawpix;
diff --git a/src/mesa/state_tracker/st_debug.c b/src/mesa/state_tracker/st_debug.c
index 50891c112cb..6d859c6ab5b 100644
--- a/src/mesa/state_tracker/st_debug.c
+++ b/src/mesa/state_tracker/st_debug.c
@@ -98,7 +98,7 @@ st_print_current(void)
if (st->vp->Base.Base.Parameters)
_mesa_print_parameter_list(st->vp->Base.Base.Parameters);
- tgsi_dump( st->fp->variants[0].tgsi.tokens, 0 );
+ tgsi_dump(st->fp->tgsi.tokens, 0);
if (st->fp->Base.Base.Parameters)
_mesa_print_parameter_list(st->fp->Base.Base.Parameters);
}
diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
index 633e90ffa38..f481e8902d8 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -4334,216 +4334,6 @@ glsl_to_tgsi_visitor::renumber_registers(void)
ralloc_free(first_reads);
}
-/**
- * Returns a fragment program which implements the current pixel transfer ops.
- * Based on get_pixel_transfer_program in st_atom_pixeltransfer.c.
- */
-extern "C" void
-get_pixel_transfer_visitor(struct st_fragment_program *fp,
- glsl_to_tgsi_visitor *original,
- int scale_and_bias, int pixel_maps)
-{
- glsl_to_tgsi_visitor *v = new glsl_to_tgsi_visitor();
- struct st_context *st = st_context(original->ctx);
- struct gl_program *prog = &fp->Base.Base;
- struct gl_program_parameter_list *params = _mesa_new_parameter_list();
- st_src_reg coord, src0;
- st_dst_reg dst0;
- glsl_to_tgsi_instruction *inst;
-
- /* Copy attributes of the glsl_to_tgsi_visitor in the original shader. */
- v->ctx = original->ctx;
- v->prog = prog;
- v->shader_program = NULL;
- v->shader = NULL;
- v->glsl_version = original->glsl_version;
- v->native_integers = original->native_integers;
- v->options = original->options;
- v->next_temp = original->next_temp;
- v->num_address_regs = original->num_address_regs;
- v->samplers_used = prog->SamplersUsed = original->samplers_used;
- v->indirect_addr_consts = original->indirect_addr_consts;
- memcpy(&v->immediates, &original->immediates, sizeof(v->immediates));
- v->num_immediates = original->num_immediates;
-
- /*
- * Get initial pixel color from the texture.
- * TEX colorTemp, fragment.texcoord[0], texture[0], 2D;
- */
- coord = st_src_reg(PROGRAM_INPUT, VARYING_SLOT_TEX0, glsl_type::vec2_type);
- src0 = v->get_temp(glsl_type::vec4_type);
- dst0 = st_dst_reg(src0);
- inst = v->emit_asm(NULL, TGSI_OPCODE_TEX, dst0, coord);
- inst->sampler_array_size = 1;
- inst->tex_target = TEXTURE_2D_INDEX;
-
- prog->InputsRead |= VARYING_BIT_TEX0;
- prog->SamplersUsed |= (1 << 0); /* mark sampler 0 as used */
- v->samplers_used |= (1 << 0);
-
- if (scale_and_bias) {
- static const gl_state_index scale_state[STATE_LENGTH] =
- { STATE_INTERNAL, STATE_PT_SCALE,
- (gl_state_index) 0, (gl_state_index) 0, (gl_state_index) 0 };
- static const gl_state_index bias_state[STATE_LENGTH] =
- { STATE_INTERNAL, STATE_PT_BIAS,
- (gl_state_index) 0, (gl_state_index) 0, (gl_state_index) 0 };
- GLint scale_p, bias_p;
- st_src_reg scale, bias;
-
- scale_p = _mesa_add_state_reference(params, scale_state);
- bias_p = _mesa_add_state_reference(params, bias_state);
-
- /* MAD colorTemp, colorTemp, scale, bias; */
- scale = st_src_reg(PROGRAM_STATE_VAR, scale_p, GLSL_TYPE_FLOAT);
- bias = st_src_reg(PROGRAM_STATE_VAR, bias_p, GLSL_TYPE_FLOAT);
- inst = v->emit_asm(NULL, TGSI_OPCODE_MAD, dst0, src0, scale, bias);
- }
-
- if (pixel_maps) {
- st_src_reg temp = v->get_temp(glsl_type::vec4_type);
- st_dst_reg temp_dst = st_dst_reg(temp);
-
- assert(st->pixel_xfer.pixelmap_texture);
- (void) st;
-
- /* With a little effort, we can do four pixel map look-ups with
- * two TEX instructions:
- */
-
- /* TEX temp.rg, colorTemp.rgba, texture[1], 2D; */
- temp_dst.writemask = WRITEMASK_XY; /* write R,G */
- inst = v->emit_asm(NULL, TGSI_OPCODE_TEX, temp_dst, src0);
- inst->sampler.index = 1;
- inst->sampler_array_size = 1;
- inst->tex_target = TEXTURE_2D_INDEX;
-
- /* TEX temp.ba, colorTemp.baba, texture[1], 2D; */
- src0.swizzle = MAKE_SWIZZLE4(SWIZZLE_Z, SWIZZLE_W, SWIZZLE_Z, SWIZZLE_W);
- temp_dst.writemask = WRITEMASK_ZW; /* write B,A */
- inst = v->emit_asm(NULL, TGSI_OPCODE_TEX, temp_dst, src0);
- inst->sampler.index = 1;
- inst->sampler_array_size = 1;
- inst->tex_target = TEXTURE_2D_INDEX;
-
- prog->SamplersUsed |= (1 << 1); /* mark sampler 1 as used */
- v->samplers_used |= (1 << 1);
-
- /* MOV colorTemp, temp; */
- inst = v->emit_asm(NULL, TGSI_OPCODE_MOV, dst0, temp);
- }
-
- /* Now copy the instructions from the original glsl_to_tgsi_visitor into the
- * new visitor. */
- foreach_in_list(glsl_to_tgsi_instruction, inst, &original->instructions) {
- glsl_to_tgsi_instruction *newinst;
- st_src_reg src_regs[4];
-
- if (inst->dst[0].file == PROGRAM_OUTPUT)
- prog->OutputsWritten |= BITFIELD64_BIT(inst->dst[0].index);
-
- for (int i = 0; i < 4; i++) {
- src_regs[i] = inst->src[i];
- if (src_regs[i].file == PROGRAM_INPUT &&
- src_regs[i].index == VARYING_SLOT_COL0) {
- src_regs[i].file = PROGRAM_TEMPORARY;
- src_regs[i].index = src0.index;
- }
- else if (src_regs[i].file == PROGRAM_INPUT)
- prog->InputsRead |= BITFIELD64_BIT(src_regs[i].index);
- }
-
- newinst = v->emit_asm(NULL, inst->op, inst->dst[0], src_regs[0], src_regs[1], src_regs[2], src_regs[3]);
- newinst->tex_target = inst->tex_target;
- newinst->sampler_array_size = inst->sampler_array_size;
- }
-
- /* Make modifications to fragment program info. */
- prog->Parameters = _mesa_combine_parameter_lists(params,
- original->prog->Parameters);
- _mesa_free_parameter_list(params);
- count_resources(v, prog);
- fp->glsl_to_tgsi = v;
-}
-
-/**
- * Make fragment program for glBitmap:
- * Sample the texture and kill the fragment if the bit is 0.
- * This program will be combined with the user's fragment program.
- *
- * Based on make_bitmap_fragment_program in st_cb_bitmap.c.
- */
-extern "C" void
-get_bitmap_visitor(struct st_fragment_program *fp,
- glsl_to_tgsi_visitor *original, int samplerIndex)
-{
- glsl_to_tgsi_visitor *v = new glsl_to_tgsi_visitor();
- struct st_context *st = st_context(original->ctx);
- struct gl_program *prog = &fp->Base.Base;
- st_src_reg coord, src0;
- st_dst_reg dst0;
- glsl_to_tgsi_instruction *inst;
-
- /* Copy attributes of the glsl_to_tgsi_visitor in the original shader. */
- v->ctx = original->ctx;
- v->prog = prog;
- v->shader_program = NULL;
- v->shader = NULL;
- v->glsl_version = original->glsl_version;
- v->native_integers = original->native_integers;
- v->options = original->options;
- v->next_temp = original->next_temp;
- v->num_address_regs = original->num_address_regs;
- v->samplers_used = prog->SamplersUsed = original->samplers_used;
- v->indirect_addr_consts = original->indirect_addr_consts;
- memcpy(&v->immediates, &original->immediates, sizeof(v->immediates));
- v->num_immediates = original->num_immediates;
-
- /* TEX tmp0, fragment.texcoord[0], texture[0], 2D; */
- coord = st_src_reg(PROGRAM_INPUT, VARYING_SLOT_TEX0, glsl_type::vec2_type);
- src0 = v->get_temp(glsl_type::vec4_type);
- dst0 = st_dst_reg(src0);
- inst = v->emit_asm(NULL, TGSI_OPCODE_TEX, dst0, coord);
- inst->sampler.index = samplerIndex;
- inst->sampler_array_size = 1;
- inst->tex_target = TEXTURE_2D_INDEX;
-
- prog->InputsRead |= VARYING_BIT_TEX0;
- prog->SamplersUsed |= (1 << samplerIndex); /* mark sampler as used */
- v->samplers_used |= (1 << samplerIndex);
-
- /* KIL if -tmp0 < 0 # texel=0 -> keep / texel=0 -> discard */
- src0.negate = NEGATE_XYZW;
- if (st->bitmap.tex_format == PIPE_FORMAT_L8_UNORM)
- src0.swizzle = SWIZZLE_XXXX;
- inst = v->emit_asm(NULL, TGSI_OPCODE_KILL_IF, undef_dst, src0);
-
- /* Now copy the instructions from the original glsl_to_tgsi_visitor into the
- * new visitor. */
- foreach_in_list(glsl_to_tgsi_instruction, inst, &original->instructions) {
- glsl_to_tgsi_instruction *newinst;
- st_src_reg src_regs[4];
-
- if (inst->dst[0].file == PROGRAM_OUTPUT)
- prog->OutputsWritten |= BITFIELD64_BIT(inst->dst[0].index);
-
- for (int i = 0; i < 4; i++) {
- src_regs[i] = inst->src[i];
- if (src_regs[i].file == PROGRAM_INPUT)
- prog->InputsRead |= BITFIELD64_BIT(src_regs[i].index);
- }
-
- newinst = v->emit_asm(NULL, inst->op, inst->dst[0], src_regs[0], src_regs[1], src_regs[2], src_regs[3]);
- newinst->tex_target = inst->tex_target;
- newinst->sampler_array_size = inst->sampler_array_size;
- }
-
- /* Make modifications to fragment program info. */
- prog->Parameters = _mesa_clone_parameter_list(original->prog->Parameters);
- count_resources(v, prog);
- fp->glsl_to_tgsi = v;
-}
-
/* ------------------------- TGSI conversion stuff -------------------------- */
struct label {
unsigned branch_target;
@@ -4852,7 +4642,7 @@ src_register(struct st_translate *t, const st_src_reg *reg)
static struct ureg_dst
translate_dst(struct st_translate *t,
const st_dst_reg *dst_reg,
- bool saturate, bool clamp_color)
+ bool saturate)
{
struct ureg_dst dst = dst_register(t, dst_reg->file, dst_reg->index,
dst_reg->array_id);
@@ -4864,28 +4654,6 @@ translate_dst(struct st_translate *t,
if (saturate)
dst = ureg_saturate(dst);
- else if (clamp_color && dst_reg->file == PROGRAM_OUTPUT) {
- /* Clamp colors for ARB_color_buffer_float. */
- switch (t->procType) {
- case TGSI_PROCESSOR_VERTEX:
- /* This can only occur with a compatibility profile, which doesn't
- * support geometry shaders. */
- if (dst_reg->index == VARYING_SLOT_COL0 ||
- dst_reg->index == VARYING_SLOT_COL1 ||
- dst_reg->index == VARYING_SLOT_BFC0 ||
- dst_reg->index == VARYING_SLOT_BFC1) {
- dst = ureg_saturate(dst);
- }
- break;
-
- case TGSI_PROCESSOR_FRAGMENT:
- if (dst_reg->index == FRAG_RESULT_COLOR ||
- dst_reg->index >= FRAG_RESULT_DATA0) {
- dst = ureg_saturate(dst);
- }
- break;
- }
- }
if (dst_reg->reladdr != NULL) {
assert(dst_reg->file != PROGRAM_TEMPORARY);
@@ -4991,8 +4759,7 @@ translate_tex_offset(struct st_translate *t,
static void
compile_tgsi_instruction(struct st_translate *t,
- const glsl_to_tgsi_instruction *inst,
- bool clamp_dst_color_output)
+ const glsl_to_tgsi_instruction *inst)
{
struct ureg_program *ureg = t->ureg;
GLuint i;
@@ -5010,8 +4777,7 @@ compile_tgsi_instruction(struct st_translate *t,
for (i = 0; i < num_dst; i++)
dst[i] = translate_dst(t,
&inst->dst[i],
- inst->saturate,
- clamp_dst_color_output);
+ inst->saturate);
for (i = 0; i < num_src; i++)
src[i] = translate_src(t, &inst->src[i]);
@@ -5286,16 +5052,6 @@ emit_face_var(struct gl_context *ctx, struct st_translate *t)
t->inputs[t->inputMapping[VARYING_SLOT_FACE]] = ureg_src(face_temp);
}
-static void
-emit_edgeflags(struct st_translate *t)
-{
- struct ureg_program *ureg = t->ureg;
- struct ureg_dst edge_dst = t->outputs[t->outputMapping[VARYING_SLOT_EDGE]];
- struct ureg_src edge_src = t->inputs[t->inputMapping[VERT_ATTRIB_EDGEFLAG]];
-
- ureg_MOV(ureg, edge_dst, edge_src);
-}
-
static bool
find_array(unsigned attr, struct array_decl *arrays, unsigned count,
unsigned *array_id, unsigned *array_size)
@@ -5353,9 +5109,7 @@ st_translate_program(
const GLuint outputMapping[],
const GLuint outputSlotToAttr[],
const ubyte outputSemanticName[],
- const ubyte outputSemanticIndex[],
- boolean passthrough_edgeflags,
- boolean clamp_color)
+ const ubyte outputSemanticIndex[])
{
struct st_translate *t;
unsigned i;
@@ -5544,8 +5298,6 @@ st_translate_program(
t->outputs[i] = ureg_writemask(t->outputs[i], TGSI_WRITEMASK_X);
}
}
- if (passthrough_edgeflags)
- emit_edgeflags(t);
}
/* Declare address register.
@@ -5639,7 +5391,7 @@ st_translate_program(
unsigned num_ubos = program->shader->NumUniformBlocks;
for (i = 0; i < num_ubos; i++) {
- unsigned size = program->shader->UniformBlocks[i].UniformBufferSize;
+ unsigned size = program->shader->UniformBlocks[i]->UniformBufferSize;
unsigned num_const_vecs = (size + 15) / 16;
unsigned first, last;
assert(num_const_vecs > 0);
@@ -5696,7 +5448,7 @@ st_translate_program(
*/
foreach_in_list(glsl_to_tgsi_instruction, inst, &program->instructions) {
set_insn_start(t, ureg_get_instruction_number(ureg));
- compile_tgsi_instruction(t, inst, clamp_color);
+ compile_tgsi_instruction(t, inst);
}
/* Fix up all emitted labels:
diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.h b/src/mesa/state_tracker/st_glsl_to_tgsi.h
index 4af747fa9de..729295bcb52 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.h
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.h
@@ -52,17 +52,9 @@ enum pipe_error st_translate_program(
const GLuint outputMapping[],
const GLuint outputSlotToAttr[],
const ubyte outputSemanticName[],
- const ubyte outputSemanticIndex[],
- boolean passthrough_edgeflags,
- boolean clamp_color);
+ const ubyte outputSemanticIndex[]);
void free_glsl_to_tgsi_visitor(struct glsl_to_tgsi_visitor *v);
-void get_pixel_transfer_visitor(struct st_fragment_program *fp,
- struct glsl_to_tgsi_visitor *original,
- int scale_and_bias, int pixel_maps);
-void get_bitmap_visitor(struct st_fragment_program *fp,
- struct glsl_to_tgsi_visitor *original,
- int samplerIndex);
GLboolean st_link_shader(struct gl_context *ctx, struct gl_shader_program *prog);
diff --git a/src/mesa/state_tracker/st_mesa_to_tgsi.c b/src/mesa/state_tracker/st_mesa_to_tgsi.c
index 896e239ee68..4b9dc994ea5 100644
--- a/src/mesa/state_tracker/st_mesa_to_tgsi.c
+++ b/src/mesa/state_tracker/st_mesa_to_tgsi.c
@@ -283,8 +283,7 @@ st_translate_texture_target( GLuint textarget,
static struct ureg_dst
translate_dst( struct st_translate *t,
const struct prog_dst_register *DstReg,
- boolean saturate,
- boolean clamp_color)
+ boolean saturate)
{
struct ureg_dst dst = dst_register( t,
DstReg->File,
@@ -295,27 +294,6 @@ translate_dst( struct st_translate *t,
if (saturate)
dst = ureg_saturate( dst );
- else if (clamp_color && DstReg->File == PROGRAM_OUTPUT) {
- /* Clamp colors for ARB_color_buffer_float. */
- switch (t->procType) {
- case TGSI_PROCESSOR_VERTEX:
- /* This can only occur with a compatibility profile, which doesn't
- * support geometry shaders. */
- if (DstReg->Index == VARYING_SLOT_COL0 ||
- DstReg->Index == VARYING_SLOT_COL1 ||
- DstReg->Index == VARYING_SLOT_BFC0 ||
- DstReg->Index == VARYING_SLOT_BFC1) {
- dst = ureg_saturate(dst);
- }
- break;
-
- case TGSI_PROCESSOR_FRAGMENT:
- if (DstReg->Index >= FRAG_RESULT_COLOR) {
- dst = ureg_saturate(dst);
- }
- break;
- }
- }
if (DstReg->RelAddr)
dst = ureg_dst_indirect( dst, ureg_src(t->address[0]) );
@@ -649,8 +627,7 @@ static void
compile_instruction(
struct gl_context *ctx,
struct st_translate *t,
- const struct prog_instruction *inst,
- boolean clamp_dst_color_output)
+ const struct prog_instruction *inst)
{
struct ureg_program *ureg = t->ureg;
GLuint i;
@@ -665,8 +642,7 @@ compile_instruction(
if (num_dst)
dst[0] = translate_dst( t,
&inst->DstReg,
- inst->Saturate,
- clamp_dst_color_output);
+ inst->Saturate);
for (i = 0; i < num_src; i++)
src[i] = translate_src( t, &inst->SrcReg[i] );
@@ -974,18 +950,6 @@ emit_face_var( struct st_translate *t,
}
-static void
-emit_edgeflags( struct st_translate *t,
- const struct gl_program *program )
-{
- struct ureg_program *ureg = t->ureg;
- struct ureg_dst edge_dst = t->outputs[t->outputMapping[VARYING_SLOT_EDGE]];
- struct ureg_src edge_src = t->inputs[t->inputMapping[VERT_ATTRIB_EDGEFLAG]];
-
- ureg_MOV( ureg, edge_dst, edge_src );
-}
-
-
/**
* Translate Mesa program to TGSI format.
* \param program the program to translate
@@ -1019,9 +983,7 @@ st_translate_mesa_program(
GLuint numOutputs,
const GLuint outputMapping[],
const ubyte outputSemanticName[],
- const ubyte outputSemanticIndex[],
- boolean passthrough_edgeflags,
- boolean clamp_color)
+ const ubyte outputSemanticIndex[])
{
struct st_translate translate, *t;
unsigned i;
@@ -1125,8 +1087,6 @@ st_translate_mesa_program(
t->outputs[i] = ureg_writemask(t->outputs[i], TGSI_WRITEMASK_X);
}
}
- if (passthrough_edgeflags)
- emit_edgeflags( t, program );
}
/* Declare address register.
@@ -1231,7 +1191,7 @@ st_translate_mesa_program(
*/
for (i = 0; i < program->NumInstructions; i++) {
set_insn_start( t, ureg_get_instruction_number( ureg ));
- compile_instruction( ctx, t, &program->Instructions[i], clamp_color );
+ compile_instruction(ctx, t, &program->Instructions[i]);
}
/* Fix up all emitted labels:
diff --git a/src/mesa/state_tracker/st_mesa_to_tgsi.h b/src/mesa/state_tracker/st_mesa_to_tgsi.h
index 62bb654e95a..ed7a3adfe1a 100644
--- a/src/mesa/state_tracker/st_mesa_to_tgsi.h
+++ b/src/mesa/state_tracker/st_mesa_to_tgsi.h
@@ -58,9 +58,7 @@ st_translate_mesa_program(
GLuint numOutputs,
const GLuint outputMapping[],
const ubyte outputSemanticName[],
- const ubyte outputSemanticIndex[],
- boolean passthrough_edgeflags,
- boolean clamp_color);
+ const ubyte outputSemanticIndex[]);
unsigned
st_translate_texture_target(GLuint textarget, GLboolean shadow);
diff --git a/src/mesa/state_tracker/st_program.c b/src/mesa/state_tracker/st_program.c
index a07f8fec309..6a69ba7aa26 100644
--- a/src/mesa/state_tracker/st_program.c
+++ b/src/mesa/state_tracker/st_program.c
@@ -43,6 +43,8 @@
#include "pipe/p_shader_tokens.h"
#include "draw/draw_context.h"
#include "tgsi/tgsi_dump.h"
+#include "tgsi/tgsi_emulate.h"
+#include "tgsi/tgsi_parse.h"
#include "tgsi/tgsi_ureg.h"
#include "st_debug.h"
@@ -92,6 +94,11 @@ st_release_vp_variants( struct st_context *st,
}
stvp->variants = NULL;
+
+ if (stvp->tgsi.tokens) {
+ tgsi_free_tokens(stvp->tgsi.tokens);
+ stvp->tgsi.tokens = NULL;
+ }
}
@@ -107,8 +114,6 @@ delete_fp_variant(struct st_context *st, struct st_fp_variant *fpv)
cso_delete_fragment_shader(st->cso_context, fpv->driver_shader);
if (fpv->parameters)
_mesa_free_parameter_list(fpv->parameters);
- if (fpv->tgsi.tokens)
- ureg_free_tokens(fpv->tgsi.tokens);
free(fpv);
}
@@ -128,6 +133,11 @@ st_release_fp_variants(struct st_context *st, struct st_fragment_program *stfp)
}
stfp->variants = NULL;
+
+ if (stfp->tgsi.tokens) {
+ ureg_free_tokens(stfp->tgsi.tokens);
+ stfp->tgsi.tokens = NULL;
+ }
}
@@ -160,6 +170,11 @@ st_release_gp_variants(struct st_context *st, struct st_geometry_program *stgp)
}
stgp->variants = NULL;
+
+ if (stgp->tgsi.tokens) {
+ ureg_free_tokens(stgp->tgsi.tokens);
+ stgp->tgsi.tokens = NULL;
+ }
}
@@ -192,6 +207,11 @@ st_release_tcp_variants(struct st_context *st, struct st_tessctrl_program *sttcp
}
sttcp->variants = NULL;
+
+ if (sttcp->tgsi.tokens) {
+ ureg_free_tokens(sttcp->tgsi.tokens);
+ sttcp->tgsi.tokens = NULL;
+ }
}
@@ -224,28 +244,34 @@ st_release_tep_variants(struct st_context *st, struct st_tesseval_program *sttep
}
sttep->variants = NULL;
+
+ if (sttep->tgsi.tokens) {
+ ureg_free_tokens(sttep->tgsi.tokens);
+ sttep->tgsi.tokens = NULL;
+ }
}
/**
- * Translate a Mesa vertex shader into a TGSI shader.
- * \param outputMapping to map vertex program output registers (VARYING_SLOT_x)
- * to TGSI output slots
- * \param tokensOut destination for TGSI tokens
- * \return pointer to cached pipe_shader object.
+ * Translate a vertex program.
*/
-void
-st_prepare_vertex_program(struct gl_context *ctx,
+bool
+st_translate_vertex_program(struct st_context *st,
struct st_vertex_program *stvp)
{
- struct st_context *st = st_context(ctx);
- GLuint attr;
+ struct ureg_program *ureg;
+ enum pipe_error error;
+ unsigned num_outputs = 0;
+ unsigned attr;
+ unsigned input_to_index[VERT_ATTRIB_MAX] = {0};
+ unsigned output_slot_to_attr[VARYING_SLOT_MAX] = {0};
+ ubyte output_semantic_name[VARYING_SLOT_MAX] = {0};
+ ubyte output_semantic_index[VARYING_SLOT_MAX] = {0};
stvp->num_inputs = 0;
- stvp->num_outputs = 0;
if (stvp->Base.IsPositionInvariant)
- _mesa_insert_mvp_code(ctx, &stvp->Base);
+ _mesa_insert_mvp_code(st->ctx, &stvp->Base);
/*
* Determine number of inputs, the mappings between VERT_ATTRIB_x
@@ -253,7 +279,7 @@ st_prepare_vertex_program(struct gl_context *ctx,
*/
for (attr = 0; attr < VERT_ATTRIB_MAX; attr++) {
if ((stvp->Base.Base.InputsRead & BITFIELD64_BIT(attr)) != 0) {
- stvp->input_to_index[attr] = stvp->num_inputs;
+ input_to_index[attr] = stvp->num_inputs;
stvp->index_to_input[stvp->num_inputs] = attr;
stvp->num_inputs++;
if ((stvp->Base.Base.DoubleInputsRead & BITFIELD64_BIT(attr)) != 0) {
@@ -264,7 +290,7 @@ st_prepare_vertex_program(struct gl_context *ctx,
}
}
/* bit of a hack, presetup potentially unused edgeflag input */
- stvp->input_to_index[VERT_ATTRIB_EDGEFLAG] = stvp->num_inputs;
+ input_to_index[VERT_ATTRIB_EDGEFLAG] = stvp->num_inputs;
stvp->index_to_input[stvp->num_inputs] = VERT_ATTRIB_EDGEFLAG;
/* Compute mapping of vertex program outputs to slots.
@@ -274,62 +300,62 @@ st_prepare_vertex_program(struct gl_context *ctx,
stvp->result_to_output[attr] = ~0;
}
else {
- unsigned slot = stvp->num_outputs++;
+ unsigned slot = num_outputs++;
stvp->result_to_output[attr] = slot;
- stvp->output_slot_to_attr[slot] = attr;
+ output_slot_to_attr[slot] = attr;
switch (attr) {
case VARYING_SLOT_POS:
- stvp->output_semantic_name[slot] = TGSI_SEMANTIC_POSITION;
- stvp->output_semantic_index[slot] = 0;
+ output_semantic_name[slot] = TGSI_SEMANTIC_POSITION;
+ output_semantic_index[slot] = 0;
break;
case VARYING_SLOT_COL0:
- stvp->output_semantic_name[slot] = TGSI_SEMANTIC_COLOR;
- stvp->output_semantic_index[slot] = 0;
+ output_semantic_name[slot] = TGSI_SEMANTIC_COLOR;
+ output_semantic_index[slot] = 0;
break;
case VARYING_SLOT_COL1:
- stvp->output_semantic_name[slot] = TGSI_SEMANTIC_COLOR;
- stvp->output_semantic_index[slot] = 1;
+ output_semantic_name[slot] = TGSI_SEMANTIC_COLOR;
+ output_semantic_index[slot] = 1;
break;
case VARYING_SLOT_BFC0:
- stvp->output_semantic_name[slot] = TGSI_SEMANTIC_BCOLOR;
- stvp->output_semantic_index[slot] = 0;
+ output_semantic_name[slot] = TGSI_SEMANTIC_BCOLOR;
+ output_semantic_index[slot] = 0;
break;
case VARYING_SLOT_BFC1:
- stvp->output_semantic_name[slot] = TGSI_SEMANTIC_BCOLOR;
- stvp->output_semantic_index[slot] = 1;
+ output_semantic_name[slot] = TGSI_SEMANTIC_BCOLOR;
+ output_semantic_index[slot] = 1;
break;
case VARYING_SLOT_FOGC:
- stvp->output_semantic_name[slot] = TGSI_SEMANTIC_FOG;
- stvp->output_semantic_index[slot] = 0;
+ output_semantic_name[slot] = TGSI_SEMANTIC_FOG;
+ output_semantic_index[slot] = 0;
break;
case VARYING_SLOT_PSIZ:
- stvp->output_semantic_name[slot] = TGSI_SEMANTIC_PSIZE;
- stvp->output_semantic_index[slot] = 0;
+ output_semantic_name[slot] = TGSI_SEMANTIC_PSIZE;
+ output_semantic_index[slot] = 0;
break;
case VARYING_SLOT_CLIP_DIST0:
- stvp->output_semantic_name[slot] = TGSI_SEMANTIC_CLIPDIST;
- stvp->output_semantic_index[slot] = 0;
+ output_semantic_name[slot] = TGSI_SEMANTIC_CLIPDIST;
+ output_semantic_index[slot] = 0;
break;
case VARYING_SLOT_CLIP_DIST1:
- stvp->output_semantic_name[slot] = TGSI_SEMANTIC_CLIPDIST;
- stvp->output_semantic_index[slot] = 1;
+ output_semantic_name[slot] = TGSI_SEMANTIC_CLIPDIST;
+ output_semantic_index[slot] = 1;
break;
case VARYING_SLOT_EDGE:
assert(0);
break;
case VARYING_SLOT_CLIP_VERTEX:
- stvp->output_semantic_name[slot] = TGSI_SEMANTIC_CLIPVERTEX;
- stvp->output_semantic_index[slot] = 0;
+ output_semantic_name[slot] = TGSI_SEMANTIC_CLIPVERTEX;
+ output_semantic_index[slot] = 0;
break;
case VARYING_SLOT_LAYER:
- stvp->output_semantic_name[slot] = TGSI_SEMANTIC_LAYER;
- stvp->output_semantic_index[slot] = 0;
+ output_semantic_name[slot] = TGSI_SEMANTIC_LAYER;
+ output_semantic_index[slot] = 0;
break;
case VARYING_SLOT_VIEWPORT:
- stvp->output_semantic_name[slot] = TGSI_SEMANTIC_VIEWPORT_INDEX;
- stvp->output_semantic_index[slot] = 0;
+ output_semantic_name[slot] = TGSI_SEMANTIC_VIEWPORT_INDEX;
+ output_semantic_index[slot] = 0;
break;
case VARYING_SLOT_TEX0:
@@ -341,8 +367,8 @@ st_prepare_vertex_program(struct gl_context *ctx,
case VARYING_SLOT_TEX6:
case VARYING_SLOT_TEX7:
if (st->needs_texcoord_semantic) {
- stvp->output_semantic_name[slot] = TGSI_SEMANTIC_TEXCOORD;
- stvp->output_semantic_index[slot] = attr - VARYING_SLOT_TEX0;
+ output_semantic_name[slot] = TGSI_SEMANTIC_TEXCOORD;
+ output_semantic_index[slot] = attr - VARYING_SLOT_TEX0;
break;
}
/* fall through */
@@ -350,55 +376,24 @@ st_prepare_vertex_program(struct gl_context *ctx,
default:
assert(attr >= VARYING_SLOT_VAR0 ||
(attr >= VARYING_SLOT_TEX0 && attr <= VARYING_SLOT_TEX7));
- stvp->output_semantic_name[slot] = TGSI_SEMANTIC_GENERIC;
- stvp->output_semantic_index[slot] =
+ output_semantic_name[slot] = TGSI_SEMANTIC_GENERIC;
+ output_semantic_index[slot] =
st_get_generic_varying_index(st, attr);
break;
}
}
}
/* similar hack to above, presetup potentially unused edgeflag output */
- stvp->result_to_output[VARYING_SLOT_EDGE] = stvp->num_outputs;
- stvp->output_semantic_name[stvp->num_outputs] = TGSI_SEMANTIC_EDGEFLAG;
- stvp->output_semantic_index[stvp->num_outputs] = 0;
-}
-
-
-/**
- * Translate a vertex program to create a new variant.
- */
-static struct st_vp_variant *
-st_translate_vertex_program(struct st_context *st,
- struct st_vertex_program *stvp,
- const struct st_vp_variant_key *key)
-{
- struct st_vp_variant *vpv = CALLOC_STRUCT(st_vp_variant);
- struct pipe_context *pipe = st->pipe;
- struct ureg_program *ureg;
- enum pipe_error error;
- unsigned num_outputs;
-
- st_prepare_vertex_program(st->ctx, stvp);
+ stvp->result_to_output[VARYING_SLOT_EDGE] = num_outputs;
+ output_semantic_name[num_outputs] = TGSI_SEMANTIC_EDGEFLAG;
+ output_semantic_index[num_outputs] = 0;
if (!stvp->glsl_to_tgsi)
- {
_mesa_remove_output_reads(&stvp->Base.Base, PROGRAM_OUTPUT);
- }
ureg = ureg_create_with_screen(TGSI_PROCESSOR_VERTEX, st->pipe->screen);
- if (ureg == NULL) {
- free(vpv);
- return NULL;
- }
-
- vpv->key = *key;
-
- vpv->num_inputs = stvp->num_inputs;
- num_outputs = stvp->num_outputs;
- if (key->passthrough_edgeflags) {
- vpv->num_inputs++;
- num_outputs++;
- }
+ if (ureg == NULL)
+ return false;
if (ST_DEBUG & DEBUG_MESA) {
_mesa_print_program(&stvp->Base.Base);
@@ -406,15 +401,15 @@ st_translate_vertex_program(struct st_context *st,
debug_printf("\n");
}
- if (stvp->glsl_to_tgsi)
+ if (stvp->glsl_to_tgsi) {
error = st_translate_program(st->ctx,
TGSI_PROCESSOR_VERTEX,
ureg,
stvp->glsl_to_tgsi,
&stvp->Base.Base,
/* inputs */
- vpv->num_inputs,
- stvp->input_to_index,
+ stvp->num_inputs,
+ input_to_index,
NULL, /* inputSlotToAttr */
NULL, /* input semantic name */
NULL, /* input semantic index */
@@ -423,43 +418,75 @@ st_translate_vertex_program(struct st_context *st,
/* outputs */
num_outputs,
stvp->result_to_output,
- stvp->output_slot_to_attr,
- stvp->output_semantic_name,
- stvp->output_semantic_index,
- key->passthrough_edgeflags,
- key->clamp_color);
- else
+ output_slot_to_attr,
+ output_semantic_name,
+ output_semantic_index);
+
+ st_translate_stream_output_info(stvp->glsl_to_tgsi,
+ stvp->result_to_output,
+ &stvp->tgsi.stream_output);
+
+ free_glsl_to_tgsi_visitor(stvp->glsl_to_tgsi);
+ stvp->glsl_to_tgsi = NULL;
+ } else
error = st_translate_mesa_program(st->ctx,
TGSI_PROCESSOR_VERTEX,
ureg,
&stvp->Base.Base,
/* inputs */
- vpv->num_inputs,
- stvp->input_to_index,
+ stvp->num_inputs,
+ input_to_index,
NULL, /* input semantic name */
NULL, /* input semantic index */
NULL,
/* outputs */
num_outputs,
stvp->result_to_output,
- stvp->output_semantic_name,
- stvp->output_semantic_index,
- key->passthrough_edgeflags,
- key->clamp_color);
+ output_semantic_name,
+ output_semantic_index);
+
+ if (error) {
+ debug_printf("%s: failed to translate Mesa program:\n", __func__);
+ _mesa_print_program(&stvp->Base.Base);
+ debug_assert(0);
+ return false;
+ }
+
+ stvp->tgsi.tokens = ureg_get_tokens(ureg, NULL);
+ ureg_destroy(ureg);
+ return stvp->tgsi.tokens != NULL;
+}
- if (error)
- goto fail;
+static struct st_vp_variant *
+st_create_vp_variant(struct st_context *st,
+ struct st_vertex_program *stvp,
+ const struct st_vp_variant_key *key)
+{
+ struct st_vp_variant *vpv = CALLOC_STRUCT(st_vp_variant);
+ struct pipe_context *pipe = st->pipe;
- vpv->tgsi.tokens = ureg_get_tokens( ureg, NULL );
- if (!vpv->tgsi.tokens)
- goto fail;
+ vpv->key = *key;
+ vpv->tgsi.tokens = tgsi_dup_tokens(stvp->tgsi.tokens);
+ vpv->tgsi.stream_output = stvp->tgsi.stream_output;
+ vpv->num_inputs = stvp->num_inputs;
- ureg_destroy( ureg );
+ /* Emulate features. */
+ if (key->clamp_color || key->passthrough_edgeflags) {
+ const struct tgsi_token *tokens;
+ unsigned flags =
+ (key->clamp_color ? TGSI_EMU_CLAMP_COLOR_OUTPUTS : 0) |
+ (key->passthrough_edgeflags ? TGSI_EMU_PASSTHROUGH_EDGEFLAG : 0);
- if (stvp->glsl_to_tgsi) {
- st_translate_stream_output_info(stvp->glsl_to_tgsi,
- stvp->result_to_output,
- &vpv->tgsi.stream_output);
+ tokens = tgsi_emulate(vpv->tgsi.tokens, flags);
+
+ if (tokens) {
+ tgsi_free_tokens(vpv->tgsi.tokens);
+ vpv->tgsi.tokens = tokens;
+
+ if (key->passthrough_edgeflags)
+ vpv->num_inputs++;
+ } else
+ fprintf(stderr, "mesa: cannot emulate deprecated features\n");
}
if (ST_DEBUG & DEBUG_TGSI) {
@@ -469,14 +496,6 @@ st_translate_vertex_program(struct st_context *st,
vpv->driver_shader = pipe->create_vs_state(pipe, &vpv->tgsi);
return vpv;
-
-fail:
- debug_printf("%s: failed to translate Mesa program:\n", __func__);
- _mesa_print_program(&stvp->Base.Base);
- debug_assert(0);
-
- ureg_destroy( ureg );
- return NULL;
}
@@ -499,7 +518,7 @@ st_get_vp_variant(struct st_context *st,
if (!vpv) {
/* create now */
- vpv = st_translate_vertex_program(st, stvp, key);
+ vpv = st_create_vp_variant(st, stvp, key);
if (vpv) {
/* insert into list */
vpv->next = stvp->variants;
@@ -533,19 +552,12 @@ st_translate_interp(enum glsl_interp_qualifier glsl_qual, bool is_color)
/**
- * Translate a Mesa fragment shader into a TGSI shader using extra info in
- * the key.
- * \return new fragment program variant
+ * Translate a Mesa fragment shader into a TGSI shader.
*/
-static struct st_fp_variant *
+bool
st_translate_fragment_program(struct st_context *st,
- struct st_fragment_program *stfp,
- const struct st_fp_variant_key *key)
+ struct st_fragment_program *stfp)
{
- struct pipe_context *pipe = st->pipe;
- struct st_fp_variant *variant = CALLOC_STRUCT(st_fp_variant);
- GLboolean deleteFP = GL_FALSE;
-
GLuint outputMapping[FRAG_RESULT_MAX];
GLuint inputMapping[VARYING_SLOT_MAX];
GLuint inputSlotToAttr[VARYING_SLOT_MAX];
@@ -565,40 +577,8 @@ st_translate_fragment_program(struct st_context *st,
ubyte fs_output_semantic_index[PIPE_MAX_SHADER_OUTPUTS];
uint fs_num_outputs = 0;
- if (!variant)
- return NULL;
-
- assert(!(key->bitmap && key->drawpixels));
memset(inputSlotToAttr, ~0, sizeof(inputSlotToAttr));
- if (key->bitmap) {
- /* glBitmap drawing */
- struct gl_fragment_program *fp; /* we free this temp program below */
-
- st_make_bitmap_fragment_program(st, &stfp->Base,
- &fp, &variant->bitmap_sampler);
-
- variant->parameters = _mesa_clone_parameter_list(fp->Base.Parameters);
- stfp = st_fragment_program(fp);
- deleteFP = GL_TRUE;
- }
- else if (key->drawpixels) {
- /* glDrawPixels drawing */
- struct gl_fragment_program *fp; /* we free this temp program below */
-
- if (key->drawpixels_z || key->drawpixels_stencil) {
- fp = st_make_drawpix_z_stencil_program(st, key->drawpixels_z,
- key->drawpixels_stencil);
- }
- else {
- /* RGBA */
- st_make_drawpix_fragment_program(st, &stfp->Base, &fp);
- variant->parameters = _mesa_clone_parameter_list(fp->Base.Parameters);
- deleteFP = GL_TRUE;
- }
- stfp = st_fragment_program(fp);
- }
-
if (!stfp->glsl_to_tgsi)
_mesa_remove_output_reads(&stfp->Base.Base, PROGRAM_OUTPUT);
@@ -620,8 +600,7 @@ st_translate_fragment_program(struct st_context *st,
interpLocation[slot] = TGSI_INTERPOLATE_LOC_CENTER;
if (stfp->Base.Base.SystemValuesRead & (SYSTEM_BIT_SAMPLE_ID |
- SYSTEM_BIT_SAMPLE_POS) ||
- key->persample_shading)
+ SYSTEM_BIT_SAMPLE_POS))
interpLocation[slot] = TGSI_INTERPOLATE_LOC_SAMPLE;
switch (attr) {
@@ -805,10 +784,8 @@ st_translate_fragment_program(struct st_context *st,
}
ureg = ureg_create_with_screen(TGSI_PROCESSOR_FRAGMENT, st->pipe->screen);
- if (ureg == NULL) {
- free(variant);
- return NULL;
- }
+ if (ureg == NULL)
+ return false;
if (ST_DEBUG & DEBUG_MESA) {
_mesa_print_program(&stfp->Base.Base);
@@ -841,7 +818,7 @@ st_translate_fragment_program(struct st_context *st,
}
}
- if (stfp->glsl_to_tgsi)
+ if (stfp->glsl_to_tgsi) {
st_translate_program(st->ctx,
TGSI_PROCESSOR_FRAGMENT,
ureg,
@@ -860,9 +837,11 @@ st_translate_fragment_program(struct st_context *st,
outputMapping,
NULL,
fs_output_semantic_name,
- fs_output_semantic_index, FALSE,
- key->clamp_color );
- else
+ fs_output_semantic_index);
+
+ free_glsl_to_tgsi_visitor(stfp->glsl_to_tgsi);
+ stfp->glsl_to_tgsi = NULL;
+ } else
st_translate_mesa_program(st->ctx,
TGSI_PROCESSOR_FRAGMENT,
ureg,
@@ -877,31 +856,134 @@ st_translate_fragment_program(struct st_context *st,
fs_num_outputs,
outputMapping,
fs_output_semantic_name,
- fs_output_semantic_index, FALSE,
- key->clamp_color);
+ fs_output_semantic_index);
+
+ stfp->tgsi.tokens = ureg_get_tokens(ureg, NULL);
+ ureg_destroy(ureg);
+ return stfp->tgsi.tokens != NULL;
+}
+
+static struct st_fp_variant *
+st_create_fp_variant(struct st_context *st,
+ struct st_fragment_program *stfp,
+ const struct st_fp_variant_key *key)
+{
+ struct pipe_context *pipe = st->pipe;
+ struct st_fp_variant *variant = CALLOC_STRUCT(st_fp_variant);
+ struct pipe_shader_state tgsi = {0};
+
+ if (!variant)
+ return NULL;
+
+ tgsi.tokens = stfp->tgsi.tokens;
- variant->tgsi.tokens = ureg_get_tokens( ureg, NULL );
- ureg_destroy( ureg );
+ assert(!(key->bitmap && key->drawpixels));
+
+ /* Emulate features. */
+ if (key->clamp_color || key->persample_shading) {
+ const struct tgsi_token *tokens;
+ unsigned flags =
+ (key->clamp_color ? TGSI_EMU_CLAMP_COLOR_OUTPUTS : 0) |
+ (key->persample_shading ? TGSI_EMU_FORCE_PERSAMPLE_INTERP : 0);
+
+ tokens = tgsi_emulate(tgsi.tokens, flags);
+
+ if (tokens)
+ tgsi.tokens = tokens;
+ else
+ fprintf(stderr, "mesa: cannot emulate deprecated features\n");
+ }
+
+ /* glBitmap */
+ if (key->bitmap) {
+ const struct tgsi_token *tokens;
+
+ variant->bitmap_sampler = ffs(~stfp->Base.Base.SamplersUsed) - 1;
+
+ tokens = st_get_bitmap_shader(tgsi.tokens,
+ variant->bitmap_sampler,
+ st->needs_texcoord_semantic,
+ st->bitmap.tex_format ==
+ PIPE_FORMAT_L8_UNORM);
+
+ if (tokens) {
+ if (tgsi.tokens != stfp->tgsi.tokens)
+ tgsi_free_tokens(tgsi.tokens);
+ tgsi.tokens = tokens;
+ variant->parameters =
+ _mesa_clone_parameter_list(stfp->Base.Base.Parameters);
+ } else
+ fprintf(stderr, "mesa: cannot create a shader for glBitmap\n");
+ }
+
+ /* glDrawPixels (color only) */
+ if (key->drawpixels) {
+ const struct tgsi_token *tokens;
+ unsigned scale_const = 0, bias_const = 0, texcoord_const = 0;
+
+ /* Find the first unused slot. */
+ variant->drawpix_sampler = ffs(~stfp->Base.Base.SamplersUsed) - 1;
+
+ if (key->pixelMaps) {
+ unsigned samplers_used = stfp->Base.Base.SamplersUsed |
+ (1 << variant->drawpix_sampler);
+
+ variant->pixelmap_sampler = ffs(~samplers_used) - 1;
+ }
+
+ variant->parameters =
+ _mesa_clone_parameter_list(stfp->Base.Base.Parameters);
+
+ if (key->scaleAndBias) {
+ static const gl_state_index scale_state[STATE_LENGTH] =
+ { STATE_INTERNAL, STATE_PT_SCALE };
+ static const gl_state_index bias_state[STATE_LENGTH] =
+ { STATE_INTERNAL, STATE_PT_BIAS };
+
+ scale_const = _mesa_add_state_reference(variant->parameters,
+ scale_state);
+ bias_const = _mesa_add_state_reference(variant->parameters,
+ bias_state);
+ }
+
+ {
+ static const gl_state_index state[STATE_LENGTH] =
+ { STATE_INTERNAL, STATE_CURRENT_ATTRIB, VERT_ATTRIB_TEX0 };
+
+ texcoord_const = _mesa_add_state_reference(variant->parameters,
+ state);
+ }
+
+ tokens = st_get_drawpix_shader(tgsi.tokens,
+ st->needs_texcoord_semantic,
+ key->scaleAndBias, scale_const,
+ bias_const, key->pixelMaps,
+ variant->drawpix_sampler,
+ variant->pixelmap_sampler,
+ texcoord_const);
+
+ if (tokens) {
+ if (tgsi.tokens != stfp->tgsi.tokens)
+ tgsi_free_tokens(tgsi.tokens);
+ tgsi.tokens = tokens;
+ } else
+ fprintf(stderr, "mesa: cannot create a shader for glDrawPixels\n");
+ }
if (ST_DEBUG & DEBUG_TGSI) {
- tgsi_dump(variant->tgsi.tokens, 0/*TGSI_DUMP_VERBOSE*/);
+ tgsi_dump(tgsi.tokens, 0);
debug_printf("\n");
}
/* fill in variant */
- variant->driver_shader = pipe->create_fs_state(pipe, &variant->tgsi);
+ variant->driver_shader = pipe->create_fs_state(pipe, &tgsi);
variant->key = *key;
- if (deleteFP) {
- /* Free the temporary program made above */
- struct gl_fragment_program *fp = &stfp->Base;
- _mesa_reference_fragprog(st->ctx, &fp, NULL);
- }
-
+ if (tgsi.tokens != stfp->tgsi.tokens)
+ tgsi_free_tokens(tgsi.tokens);
return variant;
}
-
/**
* Translate fragment program if needed.
*/
@@ -921,7 +1003,7 @@ st_get_fp_variant(struct st_context *st,
if (!fpv) {
/* create new */
- fpv = st_translate_fragment_program(st, stfp, key);
+ fpv = st_create_fp_variant(st, stfp, key);
if (fpv) {
/* insert into list */
fpv->next = stfp->variants;
@@ -1191,9 +1273,7 @@ st_translate_program_common(struct st_context *st,
outputMapping,
outputSlotToAttr,
output_semantic_name,
- output_semantic_index,
- FALSE,
- FALSE);
+ output_semantic_index);
out_state->tokens = ureg_get_tokens(ureg, NULL);
ureg_destroy(ureg);
@@ -1217,19 +1297,15 @@ st_translate_program_common(struct st_context *st,
/**
* Translate a geometry program to create a new variant.
*/
-static struct st_gp_variant *
+bool
st_translate_geometry_program(struct st_context *st,
- struct st_geometry_program *stgp,
- const struct st_gp_variant_key *key)
+ struct st_geometry_program *stgp)
{
- struct pipe_context *pipe = st->pipe;
struct ureg_program *ureg;
- struct st_gp_variant *gpv;
- struct pipe_shader_state state;
ureg = ureg_create_with_screen(TGSI_PROCESSOR_GEOMETRY, st->pipe->screen);
if (ureg == NULL)
- return NULL;
+ return false;
ureg_property(ureg, TGSI_PROPERTY_GS_INPUT_PRIM, stgp->Base.InputType);
ureg_property(ureg, TGSI_PROPERTY_GS_OUTPUT_PRIM, stgp->Base.OutputType);
@@ -1238,19 +1314,29 @@ st_translate_geometry_program(struct st_context *st,
ureg_property(ureg, TGSI_PROPERTY_GS_INVOCATIONS, stgp->Base.Invocations);
st_translate_program_common(st, &stgp->Base.Base, stgp->glsl_to_tgsi, ureg,
- TGSI_PROCESSOR_GEOMETRY, &state);
+ TGSI_PROCESSOR_GEOMETRY, &stgp->tgsi);
+
+ free_glsl_to_tgsi_visitor(stgp->glsl_to_tgsi);
+ stgp->glsl_to_tgsi = NULL;
+ return true;
+}
+
+
+static struct st_gp_variant *
+st_create_gp_variant(struct st_context *st,
+ struct st_geometry_program *stgp,
+ const struct st_gp_variant_key *key)
+{
+ struct pipe_context *pipe = st->pipe;
+ struct st_gp_variant *gpv;
gpv = CALLOC_STRUCT(st_gp_variant);
- if (!gpv) {
- ureg_free_tokens(state.tokens);
+ if (!gpv)
return NULL;
- }
/* fill in new variant */
- gpv->driver_shader = pipe->create_gs_state(pipe, &state);
+ gpv->driver_shader = pipe->create_gs_state(pipe, &stgp->tgsi);
gpv->key = *key;
-
- ureg_free_tokens(state.tokens);
return gpv;
}
@@ -1274,7 +1360,7 @@ st_get_gp_variant(struct st_context *st,
if (!gpv) {
/* create new */
- gpv = st_translate_geometry_program(st, stgp, key);
+ gpv = st_create_gp_variant(st, stgp, key);
if (gpv) {
/* insert into list */
gpv->next = stgp->variants;
@@ -1289,38 +1375,43 @@ st_get_gp_variant(struct st_context *st,
/**
* Translate a tessellation control program to create a new variant.
*/
-static struct st_tcp_variant *
+bool
st_translate_tessctrl_program(struct st_context *st,
- struct st_tessctrl_program *sttcp,
- const struct st_tcp_variant_key *key)
+ struct st_tessctrl_program *sttcp)
{
- struct pipe_context *pipe = st->pipe;
struct ureg_program *ureg;
- struct st_tcp_variant *tcpv;
- struct pipe_shader_state state;
- ureg = ureg_create_with_screen(TGSI_PROCESSOR_TESS_CTRL, pipe->screen);
- if (ureg == NULL) {
- return NULL;
- }
+ ureg = ureg_create_with_screen(TGSI_PROCESSOR_TESS_CTRL, st->pipe->screen);
+ if (ureg == NULL)
+ return false;
ureg_property(ureg, TGSI_PROPERTY_TCS_VERTICES_OUT,
sttcp->Base.VerticesOut);
st_translate_program_common(st, &sttcp->Base.Base, sttcp->glsl_to_tgsi,
- ureg, TGSI_PROCESSOR_TESS_CTRL, &state);
+ ureg, TGSI_PROCESSOR_TESS_CTRL, &sttcp->tgsi);
+
+ free_glsl_to_tgsi_visitor(sttcp->glsl_to_tgsi);
+ sttcp->glsl_to_tgsi = NULL;
+ return true;
+}
+
+
+static struct st_tcp_variant *
+st_create_tcp_variant(struct st_context *st,
+ struct st_tessctrl_program *sttcp,
+ const struct st_tcp_variant_key *key)
+{
+ struct pipe_context *pipe = st->pipe;
+ struct st_tcp_variant *tcpv;
tcpv = CALLOC_STRUCT(st_tcp_variant);
- if (!tcpv) {
- ureg_free_tokens(state.tokens);
+ if (!tcpv)
return NULL;
- }
/* fill in new variant */
- tcpv->driver_shader = pipe->create_tcs_state(pipe, &state);
+ tcpv->driver_shader = pipe->create_tcs_state(pipe, &sttcp->tgsi);
tcpv->key = *key;
-
- ureg_free_tokens(state.tokens);
return tcpv;
}
@@ -1344,7 +1435,7 @@ st_get_tcp_variant(struct st_context *st,
if (!tcpv) {
/* create new */
- tcpv = st_translate_tessctrl_program(st, sttcp, key);
+ tcpv = st_create_tcp_variant(st, sttcp, key);
if (tcpv) {
/* insert into list */
tcpv->next = sttcp->variants;
@@ -1359,20 +1450,15 @@ st_get_tcp_variant(struct st_context *st,
/**
* Translate a tessellation evaluation program to create a new variant.
*/
-static struct st_tep_variant *
+bool
st_translate_tesseval_program(struct st_context *st,
- struct st_tesseval_program *sttep,
- const struct st_tep_variant_key *key)
+ struct st_tesseval_program *sttep)
{
- struct pipe_context *pipe = st->pipe;
struct ureg_program *ureg;
- struct st_tep_variant *tepv;
- struct pipe_shader_state state;
- ureg = ureg_create_with_screen(TGSI_PROCESSOR_TESS_EVAL, pipe->screen);
- if (ureg == NULL) {
- return NULL;
- }
+ ureg = ureg_create_with_screen(TGSI_PROCESSOR_TESS_EVAL, st->pipe->screen);
+ if (ureg == NULL)
+ return false;
if (sttep->Base.PrimitiveMode == GL_ISOLINES)
ureg_property(ureg, TGSI_PROPERTY_TES_PRIM_MODE, GL_LINES);
@@ -1400,19 +1486,29 @@ st_translate_tesseval_program(struct st_context *st,
ureg_property(ureg, TGSI_PROPERTY_TES_POINT_MODE, sttep->Base.PointMode);
st_translate_program_common(st, &sttep->Base.Base, sttep->glsl_to_tgsi,
- ureg, TGSI_PROCESSOR_TESS_EVAL, &state);
+ ureg, TGSI_PROCESSOR_TESS_EVAL, &sttep->tgsi);
+
+ free_glsl_to_tgsi_visitor(sttep->glsl_to_tgsi);
+ sttep->glsl_to_tgsi = NULL;
+ return true;
+}
+
+
+static struct st_tep_variant *
+st_create_tep_variant(struct st_context *st,
+ struct st_tesseval_program *sttep,
+ const struct st_tep_variant_key *key)
+{
+ struct pipe_context *pipe = st->pipe;
+ struct st_tep_variant *tepv;
tepv = CALLOC_STRUCT(st_tep_variant);
- if (!tepv) {
- ureg_free_tokens(state.tokens);
+ if (!tepv)
return NULL;
- }
/* fill in new variant */
- tepv->driver_shader = pipe->create_tes_state(pipe, &state);
+ tepv->driver_shader = pipe->create_tes_state(pipe, &sttep->tgsi);
tepv->key = *key;
-
- ureg_free_tokens(state.tokens);
return tepv;
}
@@ -1436,7 +1532,7 @@ st_get_tep_variant(struct st_context *st,
if (!tepv) {
/* create new */
- tepv = st_translate_tesseval_program(st, sttep, key);
+ tepv = st_create_tep_variant(st, sttep, key);
if (tepv) {
/* insert into list */
tepv->next = sttep->variants;
diff --git a/src/mesa/state_tracker/st_program.h b/src/mesa/state_tracker/st_program.h
index 7013993fe38..d9b53ac008c 100644
--- a/src/mesa/state_tracker/st_program.h
+++ b/src/mesa/state_tracker/st_program.h
@@ -59,8 +59,6 @@ struct st_fp_variant_key
GLuint drawpixels:1; /**< glDrawPixels variant */
GLuint scaleAndBias:1; /**< glDrawPixels w/ scale and/or bias? */
GLuint pixelMaps:1; /**< glDrawPixels w/ pixel lookup map? */
- GLuint drawpixels_z:1; /**< glDrawPixels(GL_DEPTH) */
- GLuint drawpixels_stencil:1; /**< glDrawPixels(GL_STENCIL) */
/** for ARB_color_buffer_float */
GLuint clamp_color:1;
@@ -78,8 +76,6 @@ struct st_fp_variant
/** Parameters which generated this version of fragment program */
struct st_fp_variant_key key;
- struct pipe_shader_state tgsi;
-
/** Driver's compiled shader */
void *driver_shader;
@@ -87,6 +83,10 @@ struct st_fp_variant
struct gl_program_parameter_list *parameters;
uint bitmap_sampler;
+ /** For glDrawPixels variants */
+ unsigned drawpix_sampler;
+ unsigned pixelmap_sampler;
+
/** next in linked list */
struct st_fp_variant *next;
};
@@ -98,6 +98,7 @@ struct st_fp_variant
struct st_fragment_program
{
struct gl_fragment_program Base;
+ struct pipe_shader_state tgsi;
struct glsl_to_tgsi_visitor* glsl_to_tgsi;
struct st_fp_variant *variants;
@@ -153,20 +154,16 @@ struct st_vp_variant
struct st_vertex_program
{
struct gl_vertex_program Base; /**< The Mesa vertex program */
+ struct pipe_shader_state tgsi;
struct glsl_to_tgsi_visitor* glsl_to_tgsi;
/** maps a Mesa VERT_ATTRIB_x to a packed TGSI input index */
- GLuint input_to_index[VERT_ATTRIB_MAX];
/** maps a TGSI input index back to a Mesa VERT_ATTRIB_x */
GLuint index_to_input[PIPE_MAX_SHADER_INPUTS];
GLuint num_inputs;
/** Maps VARYING_SLOT_x to slot */
GLuint result_to_output[VARYING_SLOT_MAX];
- GLuint output_slot_to_attr[VARYING_SLOT_MAX];
- ubyte output_semantic_name[VARYING_SLOT_MAX];
- ubyte output_semantic_index[VARYING_SLOT_MAX];
- GLuint num_outputs;
/** List of translated variants of this vertex program.
*/
@@ -203,6 +200,7 @@ struct st_gp_variant
struct st_geometry_program
{
struct gl_geometry_program Base; /**< The Mesa geometry program */
+ struct pipe_shader_state tgsi;
struct glsl_to_tgsi_visitor* glsl_to_tgsi;
struct st_gp_variant *variants;
@@ -238,6 +236,7 @@ struct st_tcp_variant
struct st_tessctrl_program
{
struct gl_tess_ctrl_program Base; /**< The Mesa tess ctrl program */
+ struct pipe_shader_state tgsi;
struct glsl_to_tgsi_visitor* glsl_to_tgsi;
struct st_tcp_variant *variants;
@@ -273,6 +272,7 @@ struct st_tep_variant
struct st_tesseval_program
{
struct gl_tess_eval_program Base; /**< The Mesa tess eval program */
+ struct pipe_shader_state tgsi;
struct glsl_to_tgsi_visitor* glsl_to_tgsi;
struct st_tep_variant *variants;
@@ -414,16 +414,6 @@ st_get_tep_variant(struct st_context *st,
struct st_tesseval_program *stgp,
const struct st_tep_variant_key *key);
-
-extern void
-st_prepare_vertex_program(struct gl_context *ctx,
- struct st_vertex_program *stvp);
-
-extern GLboolean
-st_prepare_fragment_program(struct gl_context *ctx,
- struct st_fragment_program *stfp);
-
-
extern void
st_release_vp_variants( struct st_context *st,
struct st_vertex_program *stvp );
@@ -447,6 +437,25 @@ st_release_tep_variants(struct st_context *st,
extern void
st_destroy_program_variants(struct st_context *st);
+extern bool
+st_translate_vertex_program(struct st_context *st,
+ struct st_vertex_program *stvp);
+
+extern bool
+st_translate_fragment_program(struct st_context *st,
+ struct st_fragment_program *stfp);
+
+extern bool
+st_translate_geometry_program(struct st_context *st,
+ struct st_geometry_program *stgp);
+
+extern bool
+st_translate_tessctrl_program(struct st_context *st,
+ struct st_tessctrl_program *sttcp);
+
+extern bool
+st_translate_tesseval_program(struct st_context *st,
+ struct st_tesseval_program *sttep);
extern void
st_print_current_vertex_program(void);
diff --git a/src/mesa/tnl/t_draw.c b/src/mesa/tnl/t_draw.c
index c130ab3f93d..6f29abbe1ba 100644
--- a/src/mesa/tnl/t_draw.c
+++ b/src/mesa/tnl/t_draw.c
@@ -35,6 +35,7 @@
#include "main/mtypes.h"
#include "main/macros.h"
#include "main/enums.h"
+#include "util/half_float.h"
#include "t_context.h"
#include "tnl.h"
diff --git a/src/mesa/vbo/vbo_context.c b/src/mesa/vbo/vbo_context.c
index e3eb286e482..5e1a760eb2c 100644
--- a/src/mesa/vbo/vbo_context.c
+++ b/src/mesa/vbo/vbo_context.c
@@ -33,7 +33,6 @@
#include "vbo.h"
#include "vbo_context.h"
-#define NR_MAT_ATTRIBS 12
static GLuint check_size( const GLfloat *attr )
{
@@ -44,32 +43,47 @@ static GLuint check_size( const GLfloat *attr )
}
+/**
+ * Helper for initializing a vertex array.
+ */
+static void
+init_array(struct gl_context *ctx, struct gl_client_array *cl,
+ unsigned size, const void *pointer)
+{
+ memset(cl, 0, sizeof(*cl));
+
+ cl->Size = size;
+ cl->Type = GL_FLOAT;
+ cl->Format = GL_RGBA;
+ cl->Stride = 0;
+ cl->StrideB = 0;
+ cl->_ElementSize = cl->Size * sizeof(GLfloat);
+ cl->Ptr = pointer;
+ cl->Enabled = 1;
+
+ _mesa_reference_buffer_object(ctx, &cl->BufferObj,
+ ctx->Shared->NullBufferObj);
+}
+
+
+/**
+ * Set up the vbo->currval arrays to point at the context's current
+ * vertex attributes (with strides = 0).
+ */
static void init_legacy_currval(struct gl_context *ctx)
{
struct vbo_context *vbo = vbo_context(ctx);
- struct gl_client_array *arrays = &vbo->currval[VBO_ATTRIB_POS];
GLuint i;
- memset(arrays, 0, sizeof(*arrays) * VERT_ATTRIB_FF_MAX);
-
/* Set up a constant (StrideB == 0) array for each current
* attribute:
*/
for (i = 0; i < VERT_ATTRIB_FF_MAX; i++) {
- struct gl_client_array *cl = &arrays[i];
+ struct gl_client_array *cl = &vbo->currval[VERT_ATTRIB_FF(i)];
- /* Size will have to be determined at runtime:
- */
- cl->Size = check_size(ctx->Current.Attrib[i]);
- cl->Stride = 0;
- cl->StrideB = 0;
- cl->Enabled = 1;
- cl->Type = GL_FLOAT;
- cl->Format = GL_RGBA;
- cl->Ptr = (const void *)ctx->Current.Attrib[i];
- cl->_ElementSize = cl->Size * sizeof(GLfloat);
- _mesa_reference_buffer_object(ctx, &cl->BufferObj,
- ctx->Shared->NullBufferObj);
+ init_array(ctx, cl,
+ check_size(ctx->Current.Attrib[i]),
+ ctx->Current.Attrib[i]);
}
}
@@ -77,26 +91,12 @@ static void init_legacy_currval(struct gl_context *ctx)
static void init_generic_currval(struct gl_context *ctx)
{
struct vbo_context *vbo = vbo_context(ctx);
- struct gl_client_array *arrays = &vbo->currval[VBO_ATTRIB_GENERIC0];
GLuint i;
- memset(arrays, 0, sizeof(*arrays) * VERT_ATTRIB_GENERIC_MAX);
-
for (i = 0; i < VERT_ATTRIB_GENERIC_MAX; i++) {
- struct gl_client_array *cl = &arrays[i];
+ struct gl_client_array *cl = &vbo->currval[VBO_ATTRIB_GENERIC0 + i];
- /* This will have to be determined at runtime:
- */
- cl->Size = 1;
- cl->Type = GL_FLOAT;
- cl->Format = GL_RGBA;
- cl->Ptr = (const void *)ctx->Current.Attrib[VERT_ATTRIB_GENERIC0 + i];
- cl->Stride = 0;
- cl->StrideB = 0;
- cl->Enabled = 1;
- cl->_ElementSize = cl->Size * sizeof(GLfloat);
- _mesa_reference_buffer_object(ctx, &cl->BufferObj,
- ctx->Shared->NullBufferObj);
+ init_array(ctx, cl, 1, ctx->Current.Attrib[VERT_ATTRIB_GENERIC0 + i]);
}
}
@@ -104,46 +104,34 @@ static void init_generic_currval(struct gl_context *ctx)
static void init_mat_currval(struct gl_context *ctx)
{
struct vbo_context *vbo = vbo_context(ctx);
- struct gl_client_array *arrays =
- &vbo->currval[VBO_ATTRIB_MAT_FRONT_AMBIENT];
GLuint i;
- assert(NR_MAT_ATTRIBS == MAT_ATTRIB_MAX);
-
- memset(arrays, 0, sizeof(*arrays) * NR_MAT_ATTRIBS);
-
/* Set up a constant (StrideB == 0) array for each current
* attribute:
*/
- for (i = 0; i < NR_MAT_ATTRIBS; i++) {
- struct gl_client_array *cl = &arrays[i];
+ for (i = 0; i < MAT_ATTRIB_MAX; i++) {
+ struct gl_client_array *cl =
+ &vbo->currval[VBO_ATTRIB_MAT_FRONT_AMBIENT + i];
+ unsigned size;
/* Size is fixed for the material attributes, for others will
* be determined at runtime:
*/
- switch (i - VERT_ATTRIB_GENERIC0) {
+ switch (i) {
case MAT_ATTRIB_FRONT_SHININESS:
case MAT_ATTRIB_BACK_SHININESS:
- cl->Size = 1;
- break;
+ size = 1;
+ break;
case MAT_ATTRIB_FRONT_INDEXES:
case MAT_ATTRIB_BACK_INDEXES:
- cl->Size = 3;
- break;
+ size = 3;
+ break;
default:
- cl->Size = 4;
- break;
+ size = 4;
+ break;
}
- cl->Ptr = (const void *)ctx->Light.Material.Attrib[i];
- cl->Type = GL_FLOAT;
- cl->Format = GL_RGBA;
- cl->Stride = 0;
- cl->StrideB = 0;
- cl->Enabled = 1;
- cl->_ElementSize = cl->Size * sizeof(GLfloat);
- _mesa_reference_buffer_object(ctx, &cl->BufferObj,
- ctx->Shared->NullBufferObj);
+ init_array(ctx, cl, size, ctx->Light.Material.Attrib[i]);
}
}
@@ -175,7 +163,7 @@ GLboolean _vbo_CreateContext( struct gl_context *ctx )
for (i = 0; i < ARRAY_SIZE(vbo->map_vp_none); i++)
vbo->map_vp_none[i] = i;
/* map material attribs to generic slots */
- for (i = 0; i < NR_MAT_ATTRIBS; i++)
+ for (i = 0; i < MAT_ATTRIB_MAX; i++)
vbo->map_vp_none[VERT_ATTRIB_GENERIC(i)]
= VBO_ATTRIB_MAT_FRONT_AMBIENT + i;
diff --git a/src/mesa/vbo/vbo_exec.h b/src/mesa/vbo/vbo_exec.h
index 80f3015925d..00378eb7984 100644
--- a/src/mesa/vbo/vbo_exec.h
+++ b/src/mesa/vbo/vbo_exec.h
@@ -79,7 +79,7 @@ struct vbo_exec_copied_vtx {
struct vbo_exec_context
{
- struct gl_context *ctx;
+ struct gl_context *ctx;
GLvertexformat vtxfmt;
GLvertexformat vtxfmt_noop;
GLboolean validating; /**< if we're in the middle of state validation */
@@ -97,15 +97,17 @@ struct vbo_exec_context
GLuint buffer_used; /* in bytes */
fi_type vertex[VBO_ATTRIB_MAX*4]; /* current vertex */
- GLuint vert_count;
- GLuint max_vert;
+ GLuint vert_count; /**< Number of vertices currently in buffer */
+ GLuint max_vert; /**< Max number of vertices allowed in buffer */
struct vbo_exec_copied_vtx copied;
- GLubyte attrsz[VBO_ATTRIB_MAX];
- GLenum attrtype[VBO_ATTRIB_MAX];
- GLubyte active_sz[VBO_ATTRIB_MAX];
+ GLubyte attrsz[VBO_ATTRIB_MAX]; /**< nr. of attrib components (1..4) */
+ GLenum attrtype[VBO_ATTRIB_MAX]; /**< GL_FLOAT, GL_DOUBLE, GL_INT, etc */
+ GLubyte active_sz[VBO_ATTRIB_MAX]; /**< attrib size (nr. 32-bit words) */
+ /** pointers into the current 'vertex' array, declared above */
fi_type *attrptr[VBO_ATTRIB_MAX];
+
struct gl_client_array arrays[VERT_ATTRIB_MAX];
/* According to program mode, the values above plus current
@@ -115,7 +117,6 @@ struct vbo_exec_context
const struct gl_client_array *inputs[VERT_ATTRIB_MAX];
} vtx;
-
struct {
GLboolean recalculate_maps;
struct vbo_exec_eval1_map map1[VERT_ATTRIB_MAX];
@@ -131,7 +132,7 @@ struct vbo_exec_context
GLboolean recalculate_inputs;
} array;
- /* Which flags to set in vbo_exec_BeginVertices() */
+ /* Which flags to set in vbo_exec_begin_vertices() */
GLbitfield begin_vertices_flags;
#ifdef DEBUG
@@ -147,8 +148,6 @@ void vbo_exec_init( struct gl_context *ctx );
void vbo_exec_destroy( struct gl_context *ctx );
void vbo_exec_invalidate_state( struct gl_context *ctx, GLuint new_state );
-void vbo_exec_BeginVertices( struct gl_context *ctx );
-
/* Internal functions:
*/
diff --git a/src/mesa/vbo/vbo_exec_api.c b/src/mesa/vbo/vbo_exec_api.c
index 583a2f9b79f..7ae08fe3062 100644
--- a/src/mesa/vbo/vbo_exec_api.c
+++ b/src/mesa/vbo/vbo_exec_api.c
@@ -375,13 +375,16 @@ vbo_exec_wrap_upgrade_vertex(struct vbo_exec_context *exec,
* This is when a vertex attribute transitions to a different size.
* For example, we saw a bunch of glTexCoord2f() calls and now we got a
* glTexCoord4f() call. We promote the array from size=2 to size=4.
+ * \param newSize size of new vertex (number of 32-bit words).
*/
static void
-vbo_exec_fixup_vertex(struct gl_context *ctx, GLuint attr, GLuint newSize, GLenum newType)
+vbo_exec_fixup_vertex(struct gl_context *ctx, GLuint attr,
+ GLuint newSize, GLenum newType)
{
struct vbo_exec_context *exec = &vbo_context(ctx)->exec;
- if (newSize > exec->vtx.attrsz[attr] || newType != exec->vtx.attrtype[attr]) {
+ if (newSize > exec->vtx.attrsz[attr] ||
+ newType != exec->vtx.attrtype[attr]) {
/* New size is larger. Need to flush existing vertices and get
* an enlarged vertex format.
*/
@@ -411,20 +414,49 @@ vbo_exec_fixup_vertex(struct gl_context *ctx, GLuint attr, GLuint newSize, GLenu
/**
+ * Called upon first glVertex, glColor, glTexCoord, etc.
+ */
+static void
+vbo_exec_begin_vertices(struct gl_context *ctx)
+{
+ struct vbo_exec_context *exec = &vbo_context(ctx)->exec;
+
+ vbo_exec_vtx_map( exec );
+
+ assert((ctx->Driver.NeedFlush & FLUSH_UPDATE_CURRENT) == 0);
+ assert(exec->begin_vertices_flags);
+
+ ctx->Driver.NeedFlush |= exec->begin_vertices_flags;
+}
+
+
+/**
* This macro is used to implement all the glVertex, glColor, glTexCoord,
* glVertexAttrib, etc functions.
+ * \param A attribute index
+ * \param N attribute size (1..4)
+ * \param T type (GL_FLOAT, GL_DOUBLE, GL_INT, GL_UNSIGNED_INT)
+ * \param C cast type (fi_type or double)
+ * \param V0, V1, v2, V3 attribute value
*/
#define ATTR_UNION( A, N, T, C, V0, V1, V2, V3 ) \
do { \
struct vbo_exec_context *exec = &vbo_context(ctx)->exec; \
int sz = (sizeof(C) / sizeof(GLfloat)); \
- if (unlikely(!(ctx->Driver.NeedFlush & FLUSH_UPDATE_CURRENT))) \
- vbo_exec_BeginVertices(ctx); \
\
+ assert(sz == 1 || sz == 2); \
+ \
+ if (unlikely(!(ctx->Driver.NeedFlush & FLUSH_UPDATE_CURRENT))) { \
+ vbo_exec_begin_vertices(ctx); \
+ } \
+ \
+ /* check if attribute size or type is changing */ \
if (unlikely(exec->vtx.active_sz[A] != N * sz) || \
- unlikely(exec->vtx.attrtype[A] != T)) \
+ unlikely(exec->vtx.attrtype[A] != T)) { \
vbo_exec_fixup_vertex(ctx, A, N * sz, T); \
+ } \
\
+ /* store vertex attribute in vertex buffer */ \
{ \
C *dest = (C *)exec->vtx.attrptr[A]; \
if (N>0) dest[0] = V0; \
@@ -438,6 +470,7 @@ do { \
/* This is a glVertex call */ \
GLuint i; \
\
+ /* copy 32-bit words */ \
for (i = 0; i < exec->vtx.vertex_size; i++) \
exec->vtx.buffer_ptr[i] = exec->vtx.vertex[i]; \
\
@@ -1149,22 +1182,6 @@ void vbo_exec_vtx_destroy( struct vbo_exec_context *exec )
/**
- * Called upon first glVertex, glColor, glTexCoord, etc.
- */
-void vbo_exec_BeginVertices( struct gl_context *ctx )
-{
- struct vbo_exec_context *exec = &vbo_context(ctx)->exec;
-
- vbo_exec_vtx_map( exec );
-
- assert((ctx->Driver.NeedFlush & FLUSH_UPDATE_CURRENT) == 0);
- assert(exec->begin_vertices_flags);
-
- ctx->Driver.NeedFlush |= exec->begin_vertices_flags;
-}
-
-
-/**
* If inside glBegin()/glEnd(), it should assert(0). Otherwise, if
* FLUSH_STORED_VERTICES bit in \p flags is set flushes any buffered
* vertices, if FLUSH_UPDATE_CURRENT bit is set updates
@@ -1197,7 +1214,7 @@ void vbo_exec_FlushVertices( struct gl_context *ctx, GLuint flags )
/* Flush (draw), and make sure VBO is left unmapped when done */
vbo_exec_FlushVertices_internal(exec, GL_TRUE);
- /* Need to do this to ensure vbo_exec_BeginVertices gets called again:
+ /* Need to do this to ensure vbo_exec_begin_vertices gets called again:
*/
ctx->Driver.NeedFlush &= ~(FLUSH_UPDATE_CURRENT | flags);
diff --git a/src/mesa/vbo/vbo_exec_draw.c b/src/mesa/vbo/vbo_exec_draw.c
index 2bfb0c32b73..174cbc37c26 100644
--- a/src/mesa/vbo/vbo_exec_draw.c
+++ b/src/mesa/vbo/vbo_exec_draw.c
@@ -53,10 +53,10 @@ vbo_exec_debug_verts( struct vbo_exec_context *exec )
for (i = 0 ; i < exec->vtx.prim_count ; i++) {
struct _mesa_prim *prim = &exec->vtx.prim[i];
printf(" prim %d: %s%s %d..%d %s %s\n",
- i,
+ i,
_mesa_lookup_prim_by_nr(prim->mode),
prim->weak ? " (weak)" : "",
- prim->start,
+ prim->start,
prim->start + prim->count,
prim->begin ? "BEGIN" : "(wrap)",
prim->end ? "END" : "(wrap)");
@@ -79,7 +79,6 @@ vbo_copy_vertices( struct vbo_exec_context *exec )
exec->vtx.prim[exec->vtx.prim_count-1].start *
exec->vtx.vertex_size);
-
switch (exec->ctx->Driver.CurrentExecPrimitive) {
case GL_POINTS:
return 0;
@@ -219,7 +218,7 @@ vbo_exec_bind_arrays( struct gl_context *ctx )
exec->vtx.inputs[attr] = &arrays[attr];
if (_mesa_is_bufferobj(exec->vtx.bufferobj)) {
- /* a real buffer obj: Ptr is an offset, not a pointer*/
+ /* a real buffer obj: Ptr is an offset, not a pointer */
assert(exec->vtx.bufferobj->Mappings[MAP_INTERNAL].Pointer);
assert(offset >= 0);
arrays[attr].Ptr = (GLubyte *)
@@ -259,7 +258,7 @@ vbo_exec_vtx_unmap( struct vbo_exec_context *exec )
{
if (_mesa_is_bufferobj(exec->vtx.bufferobj)) {
struct gl_context *ctx = exec->ctx;
-
+
if (ctx->Driver.FlushMappedBufferRange) {
GLintptr offset = exec->vtx.buffer_used -
exec->vtx.bufferobj->Mappings[MAP_INTERNAL].Offset;
@@ -277,7 +276,7 @@ vbo_exec_vtx_unmap( struct vbo_exec_context *exec )
assert(exec->vtx.buffer_used <= VBO_VERT_BUFFER_SIZE);
assert(exec->vtx.buffer_ptr != NULL);
-
+
ctx->Driver.UnmapBuffer(ctx, exec->vtx.bufferobj, MAP_INTERNAL);
exec->vtx.buffer_map = NULL;
exec->vtx.buffer_ptr = NULL;
@@ -299,7 +298,7 @@ vbo_exec_vtx_map( struct vbo_exec_context *exec )
GL_MAP_FLUSH_EXPLICIT_BIT |
MESA_MAP_NOWAIT_BIT;
const GLenum usage = GL_STREAM_DRAW_ARB;
-
+
if (!_mesa_is_bufferobj(exec->vtx.bufferobj))
return;
@@ -323,7 +322,7 @@ vbo_exec_vtx_map( struct vbo_exec_context *exec )
exec->vtx.buffer_ptr = exec->vtx.buffer_map = NULL;
}
}
-
+
if (!exec->vtx.buffer_map) {
/* Need to allocate a new VBO */
exec->vtx.buffer_used = 0;
@@ -381,14 +380,14 @@ vbo_exec_vtx_flush(struct vbo_exec_context *exec, GLboolean keepUnmapped)
if (0)
vbo_exec_debug_verts( exec );
- if (exec->vtx.prim_count &&
+ if (exec->vtx.prim_count &&
exec->vtx.vert_count) {
- exec->vtx.copied.nr = vbo_copy_vertices( exec );
+ exec->vtx.copied.nr = vbo_copy_vertices( exec );
if (exec->vtx.copied.nr != exec->vtx.vert_count) {
struct gl_context *ctx = exec->ctx;
-
+
/* Before the update_state() as this may raise _NEW_VARYING_VP_INPUTS
* from _mesa_set_varying_vp_inputs().
*/
@@ -405,7 +404,7 @@ vbo_exec_vtx_flush(struct vbo_exec_context *exec, GLboolean keepUnmapped)
printf("%s %d %d\n", __func__, exec->vtx.prim_count,
exec->vtx.vert_count);
- vbo_context(ctx)->draw_prims( ctx,
+ vbo_context(ctx)->draw_prims( ctx,
exec->vtx.prim,
exec->vtx.prim_count,
NULL,
@@ -433,7 +432,7 @@ vbo_exec_vtx_flush(struct vbo_exec_context *exec, GLboolean keepUnmapped)
if (keepUnmapped || exec->vtx.vertex_size == 0)
exec->vtx.max_vert = 0;
else
- exec->vtx.max_vert = ((VBO_VERT_BUFFER_SIZE - exec->vtx.buffer_used) /
+ exec->vtx.max_vert = ((VBO_VERT_BUFFER_SIZE - exec->vtx.buffer_used) /
(exec->vtx.vertex_size * sizeof(GLfloat)));
exec->vtx.buffer_ptr = exec->vtx.buffer_map;
diff --git a/src/mesa/vbo/vbo_save_api.c b/src/mesa/vbo/vbo_save_api.c
index 1a70d168c55..fdc677f9a07 100644
--- a/src/mesa/vbo/vbo_save_api.c
+++ b/src/mesa/vbo/vbo_save_api.c
@@ -648,7 +648,8 @@ _save_upgrade_vertex(struct gl_context *ctx, GLuint attr, GLuint newsz)
/* Recalculate all the attrptr[] values:
*/
- for (i = 0, tmp = save->vertex; i < VBO_ATTRIB_MAX; i++) {
+ tmp = save->vertex;
+ for (i = 0; i < VBO_ATTRIB_MAX; i++) {
if (save->attrsz[i]) {
save->attrptr[i] = tmp;
tmp += save->attrsz[i];
@@ -1543,7 +1544,7 @@ vbo_print_vertex_list(struct gl_context *ctx, void *data, FILE *f)
node->vertex_store->bufferobj : NULL;
(void) ctx;
- fprintf(f, "VBO-VERTEX-LIST, %u vertices %d primitives, %d vertsize "
+ fprintf(f, "VBO-VERTEX-LIST, %u vertices, %d primitives, %d vertsize, "
"buffer %p\n",
node->count, node->prim_count, node->vertex_size,
buffer);